upload.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. package oss
  2. import (
  3. "crypto/md5"
  4. "encoding/base64"
  5. "encoding/json"
  6. "errors"
  7. "io/ioutil"
  8. "os"
  9. "time"
  10. )
  11. //
  12. // UploadFile 分片上传文件
  13. //
  14. // objectKey object名称。
  15. // filePath 本地文件。需要上传的文件。
  16. // partSize 本次上传文件片的大小,字节数。比如100 * 1024为每片100KB。
  17. // options 上传Object时可以指定Object的属性。详见InitiateMultipartUpload。
  18. //
  19. // error 操作成功为nil,非nil为错误信息。
  20. //
  21. func (bucket Bucket) UploadFile(objectKey, filePath string, partSize int64, options ...Option) error {
  22. if partSize < MinPartSize || partSize > MaxPartSize {
  23. return errors.New("oss: part size invalid range (1024KB, 5GB]")
  24. }
  25. cpConf, err := getCpConfig(options, filePath)
  26. if err != nil {
  27. return err
  28. }
  29. routines := getRoutines(options)
  30. if cpConf.IsEnable {
  31. return bucket.uploadFileWithCp(objectKey, filePath, partSize, options, cpConf.FilePath, routines)
  32. }
  33. return bucket.uploadFile(objectKey, filePath, partSize, options, routines)
  34. }
  35. // ----- 并发无断点的上传 -----
  36. // 获取Checkpoint配置
  37. func getCpConfig(options []Option, filePath string) (*cpConfig, error) {
  38. cpc := &cpConfig{}
  39. cpcOpt, err := findOption(options, checkpointConfig, nil)
  40. if err != nil || cpcOpt == nil {
  41. return cpc, err
  42. }
  43. cpc = cpcOpt.(*cpConfig)
  44. if cpc.IsEnable && cpc.FilePath == "" {
  45. cpc.FilePath = filePath + CheckpointFileSuffix
  46. }
  47. return cpc, nil
  48. }
  49. // 获取并发数,默认并发数1
  50. func getRoutines(options []Option) int {
  51. rtnOpt, err := findOption(options, routineNum, nil)
  52. if err != nil || rtnOpt == nil {
  53. return 1
  54. }
  55. rs := rtnOpt.(int)
  56. if rs < 1 {
  57. rs = 1
  58. } else if rs > 100 {
  59. rs = 100
  60. }
  61. return rs
  62. }
  63. // 获取进度回调
  64. func getProgressListener(options []Option) ProgressListener {
  65. isSet, listener, _ := isOptionSet(options, progressListener)
  66. if !isSet {
  67. return nil
  68. }
  69. return listener.(ProgressListener)
  70. }
  71. // 测试使用
  72. type uploadPartHook func(id int, chunk FileChunk) error
  73. var uploadPartHooker uploadPartHook = defaultUploadPart
  74. func defaultUploadPart(id int, chunk FileChunk) error {
  75. return nil
  76. }
  77. // 工作协程参数
  78. type workerArg struct {
  79. bucket *Bucket
  80. filePath string
  81. imur InitiateMultipartUploadResult
  82. hook uploadPartHook
  83. }
  84. // 工作协程
  85. func worker(id int, arg workerArg, jobs <-chan FileChunk, results chan<- UploadPart, failed chan<- error, die <-chan bool) {
  86. for chunk := range jobs {
  87. if err := arg.hook(id, chunk); err != nil {
  88. failed <- err
  89. break
  90. }
  91. part, err := arg.bucket.UploadPartFromFile(arg.imur, arg.filePath, chunk.Offset, chunk.Size, chunk.Number)
  92. if err != nil {
  93. failed <- err
  94. break
  95. }
  96. select {
  97. case <-die:
  98. return
  99. default:
  100. }
  101. results <- part
  102. }
  103. }
  104. // 调度协程
  105. func scheduler(jobs chan FileChunk, chunks []FileChunk) {
  106. for _, chunk := range chunks {
  107. jobs <- chunk
  108. }
  109. close(jobs)
  110. }
  111. func getTotalBytes(chunks []FileChunk) int64 {
  112. var tb int64
  113. for _, chunk := range chunks {
  114. tb += chunk.Size
  115. }
  116. return tb
  117. }
  118. // 并发上传,不带断点续传功能
  119. func (bucket Bucket) uploadFile(objectKey, filePath string, partSize int64, options []Option, routines int) error {
  120. listener := getProgressListener(options)
  121. chunks, err := SplitFileByPartSize(filePath, partSize)
  122. if err != nil {
  123. return err
  124. }
  125. // 初始化上传任务
  126. imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
  127. if err != nil {
  128. return err
  129. }
  130. jobs := make(chan FileChunk, len(chunks))
  131. results := make(chan UploadPart, len(chunks))
  132. failed := make(chan error)
  133. die := make(chan bool)
  134. var completedBytes int64
  135. totalBytes := getTotalBytes(chunks)
  136. event := newProgressEvent(TransferStartedEvent, 0, totalBytes)
  137. publishProgress(listener, event)
  138. // 启动工作协程
  139. arg := workerArg{&bucket, filePath, imur, uploadPartHooker}
  140. for w := 1; w <= routines; w++ {
  141. go worker(w, arg, jobs, results, failed, die)
  142. }
  143. // 并发上传分片
  144. go scheduler(jobs, chunks)
  145. // 等待分配分片上传完成
  146. completed := 0
  147. parts := make([]UploadPart, len(chunks))
  148. for completed < len(chunks) {
  149. select {
  150. case part := <-results:
  151. completed++
  152. parts[part.PartNumber-1] = part
  153. completedBytes += chunks[part.PartNumber-1].Size
  154. event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes)
  155. publishProgress(listener, event)
  156. case err := <-failed:
  157. close(die)
  158. event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes)
  159. publishProgress(listener, event)
  160. bucket.AbortMultipartUpload(imur)
  161. return err
  162. }
  163. if completed >= len(chunks) {
  164. break
  165. }
  166. }
  167. event = newProgressEvent(TransferStartedEvent, completedBytes, totalBytes)
  168. publishProgress(listener, event)
  169. // 提交任务
  170. _, err = bucket.CompleteMultipartUpload(imur, parts)
  171. if err != nil {
  172. bucket.AbortMultipartUpload(imur)
  173. return err
  174. }
  175. return nil
  176. }
  177. // ----- 并发带断点的上传 -----
  178. const uploadCpMagic = "FE8BB4EA-B593-4FAC-AD7A-2459A36E2E62"
  179. type uploadCheckpoint struct {
  180. Magic string // magic
  181. MD5 string // cp内容的MD5
  182. FilePath string // 本地文件
  183. FileStat cpStat // 文件状态
  184. ObjectKey string // key
  185. UploadID string // upload id
  186. Parts []cpPart // 本地文件的全部分片
  187. }
  188. type cpStat struct {
  189. Size int64 // 文件大小
  190. LastModified time.Time // 本地文件最后修改时间
  191. MD5 string // 本地文件MD5
  192. }
  193. type cpPart struct {
  194. Chunk FileChunk // 分片
  195. Part UploadPart // 上传完成的分片
  196. IsCompleted bool // upload是否完成
  197. }
  198. // CP数据是否有效,CP有效且文件没有更新时有效
  199. func (cp uploadCheckpoint) isValid(filePath string) (bool, error) {
  200. // 比较CP的Magic及MD5
  201. cpb := cp
  202. cpb.MD5 = ""
  203. js, _ := json.Marshal(cpb)
  204. sum := md5.Sum(js)
  205. b64 := base64.StdEncoding.EncodeToString(sum[:])
  206. if cp.Magic != uploadCpMagic || b64 != cp.MD5 {
  207. return false, nil
  208. }
  209. // 确认本地文件是否更新
  210. fd, err := os.Open(filePath)
  211. if err != nil {
  212. return false, err
  213. }
  214. defer fd.Close()
  215. st, err := fd.Stat()
  216. if err != nil {
  217. return false, err
  218. }
  219. md, err := calcFileMD5(filePath)
  220. if err != nil {
  221. return false, err
  222. }
  223. // 比较文件大小/文件最后更新时间/文件MD5
  224. if cp.FileStat.Size != st.Size() ||
  225. cp.FileStat.LastModified != st.ModTime() ||
  226. cp.FileStat.MD5 != md {
  227. return false, nil
  228. }
  229. return true, nil
  230. }
  231. // 从文件中load
  232. func (cp *uploadCheckpoint) load(filePath string) error {
  233. contents, err := ioutil.ReadFile(filePath)
  234. if err != nil {
  235. return err
  236. }
  237. err = json.Unmarshal(contents, cp)
  238. return err
  239. }
  240. // dump到文件
  241. func (cp *uploadCheckpoint) dump(filePath string) error {
  242. bcp := *cp
  243. // 计算MD5
  244. bcp.MD5 = ""
  245. js, err := json.Marshal(bcp)
  246. if err != nil {
  247. return err
  248. }
  249. sum := md5.Sum(js)
  250. b64 := base64.StdEncoding.EncodeToString(sum[:])
  251. bcp.MD5 = b64
  252. // 序列化
  253. js, err = json.Marshal(bcp)
  254. if err != nil {
  255. return err
  256. }
  257. // dump
  258. return ioutil.WriteFile(filePath, js, FilePermMode)
  259. }
  260. // 更新分片状态
  261. func (cp *uploadCheckpoint) updatePart(part UploadPart) {
  262. cp.Parts[part.PartNumber-1].Part = part
  263. cp.Parts[part.PartNumber-1].IsCompleted = true
  264. }
  265. // 未完成的分片
  266. func (cp *uploadCheckpoint) todoParts() []FileChunk {
  267. fcs := []FileChunk{}
  268. for _, part := range cp.Parts {
  269. if !part.IsCompleted {
  270. fcs = append(fcs, part.Chunk)
  271. }
  272. }
  273. return fcs
  274. }
  275. // 所有的分片
  276. func (cp *uploadCheckpoint) allParts() []UploadPart {
  277. ps := []UploadPart{}
  278. for _, part := range cp.Parts {
  279. ps = append(ps, part.Part)
  280. }
  281. return ps
  282. }
  283. // 完成的字节数
  284. func (cp *uploadCheckpoint) getCompletedBytes() int64 {
  285. var completedBytes int64
  286. for _, part := range cp.Parts {
  287. if part.IsCompleted {
  288. completedBytes += part.Chunk.Size
  289. }
  290. }
  291. return completedBytes
  292. }
  293. // 计算文件文件MD5
  294. func calcFileMD5(filePath string) (string, error) {
  295. return "", nil
  296. }
  297. // 初始化分片上传
  298. func prepare(cp *uploadCheckpoint, objectKey, filePath string, partSize int64, bucket *Bucket, options []Option) error {
  299. // cp
  300. cp.Magic = uploadCpMagic
  301. cp.FilePath = filePath
  302. cp.ObjectKey = objectKey
  303. // localfile
  304. fd, err := os.Open(filePath)
  305. if err != nil {
  306. return err
  307. }
  308. defer fd.Close()
  309. st, err := fd.Stat()
  310. if err != nil {
  311. return err
  312. }
  313. cp.FileStat.Size = st.Size()
  314. cp.FileStat.LastModified = st.ModTime()
  315. md, err := calcFileMD5(filePath)
  316. if err != nil {
  317. return err
  318. }
  319. cp.FileStat.MD5 = md
  320. // chunks
  321. parts, err := SplitFileByPartSize(filePath, partSize)
  322. if err != nil {
  323. return err
  324. }
  325. cp.Parts = make([]cpPart, len(parts))
  326. for i, part := range parts {
  327. cp.Parts[i].Chunk = part
  328. cp.Parts[i].IsCompleted = false
  329. }
  330. // init load
  331. imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
  332. if err != nil {
  333. return err
  334. }
  335. cp.UploadID = imur.UploadID
  336. return nil
  337. }
  338. // 提交分片上传,删除CP文件
  339. func complete(cp *uploadCheckpoint, bucket *Bucket, parts []UploadPart, cpFilePath string) error {
  340. imur := InitiateMultipartUploadResult{Bucket: bucket.BucketName,
  341. Key: cp.ObjectKey, UploadID: cp.UploadID}
  342. _, err := bucket.CompleteMultipartUpload(imur, parts)
  343. if err != nil {
  344. return err
  345. }
  346. os.Remove(cpFilePath)
  347. return err
  348. }
  349. // 并发带断点的上传
  350. func (bucket Bucket) uploadFileWithCp(objectKey, filePath string, partSize int64, options []Option, cpFilePath string, routines int) error {
  351. listener := getProgressListener(options)
  352. // LOAD CP数据
  353. ucp := uploadCheckpoint{}
  354. err := ucp.load(cpFilePath)
  355. if err != nil {
  356. os.Remove(cpFilePath)
  357. }
  358. // LOAD出错或数据无效重新初始化上传
  359. valid, err := ucp.isValid(filePath)
  360. if err != nil || !valid {
  361. if err = prepare(&ucp, objectKey, filePath, partSize, &bucket, options); err != nil {
  362. return err
  363. }
  364. os.Remove(cpFilePath)
  365. }
  366. chunks := ucp.todoParts()
  367. imur := InitiateMultipartUploadResult{
  368. Bucket: bucket.BucketName,
  369. Key: objectKey,
  370. UploadID: ucp.UploadID}
  371. jobs := make(chan FileChunk, len(chunks))
  372. results := make(chan UploadPart, len(chunks))
  373. failed := make(chan error)
  374. die := make(chan bool)
  375. completedBytes := ucp.getCompletedBytes()
  376. event := newProgressEvent(TransferStartedEvent, completedBytes, ucp.FileStat.Size)
  377. publishProgress(listener, event)
  378. // 启动工作协程
  379. arg := workerArg{&bucket, filePath, imur, uploadPartHooker}
  380. for w := 1; w <= routines; w++ {
  381. go worker(w, arg, jobs, results, failed, die)
  382. }
  383. // 并发上传分片
  384. go scheduler(jobs, chunks)
  385. // 等待分配分片上传完成
  386. completed := 0
  387. for completed < len(chunks) {
  388. select {
  389. case part := <-results:
  390. completed++
  391. ucp.updatePart(part)
  392. ucp.dump(cpFilePath)
  393. completedBytes += ucp.Parts[part.PartNumber-1].Chunk.Size
  394. event = newProgressEvent(TransferDataEvent, completedBytes, ucp.FileStat.Size)
  395. publishProgress(listener, event)
  396. case err := <-failed:
  397. close(die)
  398. event = newProgressEvent(TransferFailedEvent, completedBytes, ucp.FileStat.Size)
  399. publishProgress(listener, event)
  400. return err
  401. }
  402. if completed >= len(chunks) {
  403. break
  404. }
  405. }
  406. event = newProgressEvent(TransferCompletedEvent, completedBytes, ucp.FileStat.Size)
  407. publishProgress(listener, event)
  408. // 提交分片上传
  409. err = complete(&ucp, &bucket, ucp.allParts(), cpFilePath)
  410. return err
  411. }