upload.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. package oss
  2. import (
  3. "crypto/md5"
  4. "encoding/base64"
  5. "encoding/hex"
  6. "encoding/json"
  7. "errors"
  8. "fmt"
  9. "io/ioutil"
  10. "os"
  11. "path/filepath"
  12. "time"
  13. )
  14. // UploadFile is multipart file upload.
  15. //
  16. // objectKey the object name.
  17. // filePath the local file path to upload.
  18. // partSize the part size in byte.
  19. // options the options for uploading object.
  20. //
  21. // error it's nil if the operation succeeds, otherwise it's an error object.
  22. //
  23. func (bucket Bucket) UploadFile(objectKey, filePath string, partSize int64, options ...Option) error {
  24. if partSize < MinPartSize || partSize > MaxPartSize {
  25. return errors.New("oss: part size invalid range (100KB, 5GB]")
  26. }
  27. cpConf := getCpConfig(options)
  28. routines := getRoutines(options)
  29. if cpConf != nil && cpConf.IsEnable {
  30. cpFilePath := getUploadCpFilePath(cpConf, filePath, bucket.BucketName, objectKey)
  31. if cpFilePath != "" {
  32. return bucket.uploadFileWithCp(objectKey, filePath, partSize, options, cpFilePath, routines)
  33. }
  34. }
  35. return bucket.uploadFile(objectKey, filePath, partSize, options, routines)
  36. }
  37. func getUploadCpFilePath(cpConf *cpConfig, srcFile, destBucket, destObject string) string {
  38. if cpConf.FilePath == "" && cpConf.DirPath != "" {
  39. dest := fmt.Sprintf("oss://%v/%v", destBucket, destObject)
  40. absPath, _ := filepath.Abs(srcFile)
  41. cpFileName := getCpFileName(absPath, dest, "")
  42. cpConf.FilePath = cpConf.DirPath + string(os.PathSeparator) + cpFileName
  43. }
  44. return cpConf.FilePath
  45. }
  46. // ----- concurrent upload without checkpoint -----
  47. // getCpConfig gets checkpoint configuration
  48. func getCpConfig(options []Option) *cpConfig {
  49. cpcOpt, err := findOption(options, checkpointConfig, nil)
  50. if err != nil || cpcOpt == nil {
  51. return nil
  52. }
  53. return cpcOpt.(*cpConfig)
  54. }
  55. // getCpFileName return the name of the checkpoint file
  56. func getCpFileName(src, dest, versionId string) string {
  57. md5Ctx := md5.New()
  58. md5Ctx.Write([]byte(src))
  59. srcCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
  60. md5Ctx.Reset()
  61. md5Ctx.Write([]byte(dest))
  62. destCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
  63. if versionId == "" {
  64. return fmt.Sprintf("%v-%v.cp", srcCheckSum, destCheckSum)
  65. }
  66. md5Ctx.Reset()
  67. md5Ctx.Write([]byte(versionId))
  68. versionCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
  69. return fmt.Sprintf("%v-%v-%v.cp", srcCheckSum, destCheckSum, versionCheckSum)
  70. }
  71. // getRoutines gets the routine count. by default it's 1.
  72. func getRoutines(options []Option) int {
  73. rtnOpt, err := findOption(options, routineNum, nil)
  74. if err != nil || rtnOpt == nil {
  75. return 1
  76. }
  77. rs := rtnOpt.(int)
  78. if rs < 1 {
  79. rs = 1
  80. } else if rs > 100 {
  81. rs = 100
  82. }
  83. return rs
  84. }
  85. // getPayer return the payer of the request
  86. func getPayer(options []Option) string {
  87. payerOpt, err := findOption(options, HTTPHeaderOssRequester, nil)
  88. if err != nil || payerOpt == nil {
  89. return ""
  90. }
  91. return payerOpt.(string)
  92. }
  93. // getProgressListener gets the progress callback
  94. func getProgressListener(options []Option) ProgressListener {
  95. isSet, listener, _ := isOptionSet(options, progressListener)
  96. if !isSet {
  97. return nil
  98. }
  99. return listener.(ProgressListener)
  100. }
  101. // uploadPartHook is for testing usage
  102. type uploadPartHook func(id int, chunk FileChunk) error
  103. var uploadPartHooker uploadPartHook = defaultUploadPart
  104. func defaultUploadPart(id int, chunk FileChunk) error {
  105. return nil
  106. }
  107. // workerArg defines worker argument structure
  108. type workerArg struct {
  109. bucket *Bucket
  110. filePath string
  111. imur InitiateMultipartUploadResult
  112. options []Option
  113. hook uploadPartHook
  114. }
  115. // worker is the worker coroutine function
  116. type defaultUploadProgressListener struct {
  117. }
  118. // ProgressChanged no-ops
  119. func (listener *defaultUploadProgressListener) ProgressChanged(event *ProgressEvent) {
  120. }
  121. func worker(id int, arg workerArg, jobs <-chan FileChunk, results chan<- UploadPart, failed chan<- error, die <-chan bool) {
  122. for chunk := range jobs {
  123. if err := arg.hook(id, chunk); err != nil {
  124. failed <- err
  125. break
  126. }
  127. p := Progress(&defaultUploadProgressListener{})
  128. opts := make([]Option, len(arg.options)+1)
  129. opts = append(opts, arg.options...)
  130. // use defaultUploadProgressListener
  131. opts = append(opts, p)
  132. part, err := arg.bucket.UploadPartFromFile(arg.imur, arg.filePath, chunk.Offset, chunk.Size, chunk.Number, opts...)
  133. if err != nil {
  134. failed <- err
  135. break
  136. }
  137. select {
  138. case <-die:
  139. return
  140. default:
  141. }
  142. results <- part
  143. }
  144. }
  145. // scheduler function
  146. func scheduler(jobs chan FileChunk, chunks []FileChunk) {
  147. for _, chunk := range chunks {
  148. jobs <- chunk
  149. }
  150. close(jobs)
  151. }
  152. func getTotalBytes(chunks []FileChunk) int64 {
  153. var tb int64
  154. for _, chunk := range chunks {
  155. tb += chunk.Size
  156. }
  157. return tb
  158. }
  159. // uploadFile is a concurrent upload, without checkpoint
  160. func (bucket Bucket) uploadFile(objectKey, filePath string, partSize int64, options []Option, routines int) error {
  161. listener := getProgressListener(options)
  162. chunks, err := SplitFileByPartSize(filePath, partSize)
  163. if err != nil {
  164. return err
  165. }
  166. partOptions := ChoiceTransferPartOption(options)
  167. completeOptions := ChoiceCompletePartOption(options)
  168. abortOptions := ChoiceAbortPartOption(options)
  169. // Initialize the multipart upload
  170. imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
  171. if err != nil {
  172. return err
  173. }
  174. jobs := make(chan FileChunk, len(chunks))
  175. results := make(chan UploadPart, len(chunks))
  176. failed := make(chan error)
  177. die := make(chan bool)
  178. var completedBytes int64
  179. totalBytes := getTotalBytes(chunks)
  180. event := newProgressEvent(TransferStartedEvent, 0, totalBytes, 0)
  181. publishProgress(listener, event)
  182. // Start the worker coroutine
  183. arg := workerArg{&bucket, filePath, imur, partOptions, uploadPartHooker}
  184. for w := 1; w <= routines; w++ {
  185. go worker(w, arg, jobs, results, failed, die)
  186. }
  187. // Schedule the jobs
  188. go scheduler(jobs, chunks)
  189. // Waiting for the upload finished
  190. completed := 0
  191. parts := make([]UploadPart, len(chunks))
  192. for completed < len(chunks) {
  193. select {
  194. case part := <-results:
  195. completed++
  196. parts[part.PartNumber-1] = part
  197. completedBytes += chunks[part.PartNumber-1].Size
  198. // why RwBytes in ProgressEvent is 0 ?
  199. // because read or write event has been notified in teeReader.Read()
  200. event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes, chunks[part.PartNumber-1].Size)
  201. publishProgress(listener, event)
  202. case err := <-failed:
  203. close(die)
  204. event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes, 0)
  205. publishProgress(listener, event)
  206. bucket.AbortMultipartUpload(imur, abortOptions...)
  207. return err
  208. }
  209. if completed >= len(chunks) {
  210. break
  211. }
  212. }
  213. event = newProgressEvent(TransferStartedEvent, completedBytes, totalBytes, 0)
  214. publishProgress(listener, event)
  215. // Complete the multpart upload
  216. _, err = bucket.CompleteMultipartUpload(imur, parts, completeOptions...)
  217. if err != nil {
  218. bucket.AbortMultipartUpload(imur, abortOptions...)
  219. return err
  220. }
  221. return nil
  222. }
  223. // ----- concurrent upload with checkpoint -----
  224. const uploadCpMagic = "FE8BB4EA-B593-4FAC-AD7A-2459A36E2E62"
  225. type uploadCheckpoint struct {
  226. Magic string // Magic
  227. MD5 string // Checkpoint file content's MD5
  228. FilePath string // Local file path
  229. FileStat cpStat // File state
  230. ObjectKey string // Key
  231. UploadID string // Upload ID
  232. Parts []cpPart // All parts of the local file
  233. }
  234. type cpStat struct {
  235. Size int64 // File size
  236. LastModified time.Time // File's last modified time
  237. MD5 string // Local file's MD5
  238. }
  239. type cpPart struct {
  240. Chunk FileChunk // File chunk
  241. Part UploadPart // Uploaded part
  242. IsCompleted bool // Upload complete flag
  243. }
  244. // isValid checks if the uploaded data is valid---it's valid when the file is not updated and the checkpoint data is valid.
  245. func (cp uploadCheckpoint) isValid(filePath string) (bool, error) {
  246. // Compare the CP's magic number and MD5.
  247. cpb := cp
  248. cpb.MD5 = ""
  249. js, _ := json.Marshal(cpb)
  250. sum := md5.Sum(js)
  251. b64 := base64.StdEncoding.EncodeToString(sum[:])
  252. if cp.Magic != uploadCpMagic || b64 != cp.MD5 {
  253. return false, nil
  254. }
  255. // Make sure if the local file is updated.
  256. fd, err := os.Open(filePath)
  257. if err != nil {
  258. return false, err
  259. }
  260. defer fd.Close()
  261. st, err := fd.Stat()
  262. if err != nil {
  263. return false, err
  264. }
  265. md, err := calcFileMD5(filePath)
  266. if err != nil {
  267. return false, err
  268. }
  269. // Compare the file size, file's last modified time and file's MD5
  270. if cp.FileStat.Size != st.Size() ||
  271. !cp.FileStat.LastModified.Equal(st.ModTime()) ||
  272. cp.FileStat.MD5 != md {
  273. return false, nil
  274. }
  275. return true, nil
  276. }
  277. // load loads from the file
  278. func (cp *uploadCheckpoint) load(filePath string) error {
  279. contents, err := ioutil.ReadFile(filePath)
  280. if err != nil {
  281. return err
  282. }
  283. err = json.Unmarshal(contents, cp)
  284. return err
  285. }
  286. // dump dumps to the local file
  287. func (cp *uploadCheckpoint) dump(filePath string) error {
  288. bcp := *cp
  289. // Calculate MD5
  290. bcp.MD5 = ""
  291. js, err := json.Marshal(bcp)
  292. if err != nil {
  293. return err
  294. }
  295. sum := md5.Sum(js)
  296. b64 := base64.StdEncoding.EncodeToString(sum[:])
  297. bcp.MD5 = b64
  298. // Serialization
  299. js, err = json.Marshal(bcp)
  300. if err != nil {
  301. return err
  302. }
  303. // Dump
  304. return ioutil.WriteFile(filePath, js, FilePermMode)
  305. }
  306. // updatePart updates the part status
  307. func (cp *uploadCheckpoint) updatePart(part UploadPart) {
  308. cp.Parts[part.PartNumber-1].Part = part
  309. cp.Parts[part.PartNumber-1].IsCompleted = true
  310. }
  311. // todoParts returns unfinished parts
  312. func (cp *uploadCheckpoint) todoParts() []FileChunk {
  313. fcs := []FileChunk{}
  314. for _, part := range cp.Parts {
  315. if !part.IsCompleted {
  316. fcs = append(fcs, part.Chunk)
  317. }
  318. }
  319. return fcs
  320. }
  321. // allParts returns all parts
  322. func (cp *uploadCheckpoint) allParts() []UploadPart {
  323. ps := []UploadPart{}
  324. for _, part := range cp.Parts {
  325. ps = append(ps, part.Part)
  326. }
  327. return ps
  328. }
  329. // getCompletedBytes returns completed bytes count
  330. func (cp *uploadCheckpoint) getCompletedBytes() int64 {
  331. var completedBytes int64
  332. for _, part := range cp.Parts {
  333. if part.IsCompleted {
  334. completedBytes += part.Chunk.Size
  335. }
  336. }
  337. return completedBytes
  338. }
  339. // calcFileMD5 calculates the MD5 for the specified local file
  340. func calcFileMD5(filePath string) (string, error) {
  341. return "", nil
  342. }
  343. // prepare initializes the multipart upload
  344. func prepare(cp *uploadCheckpoint, objectKey, filePath string, partSize int64, bucket *Bucket, options []Option) error {
  345. // CP
  346. cp.Magic = uploadCpMagic
  347. cp.FilePath = filePath
  348. cp.ObjectKey = objectKey
  349. // Local file
  350. fd, err := os.Open(filePath)
  351. if err != nil {
  352. return err
  353. }
  354. defer fd.Close()
  355. st, err := fd.Stat()
  356. if err != nil {
  357. return err
  358. }
  359. cp.FileStat.Size = st.Size()
  360. cp.FileStat.LastModified = st.ModTime()
  361. md, err := calcFileMD5(filePath)
  362. if err != nil {
  363. return err
  364. }
  365. cp.FileStat.MD5 = md
  366. // Chunks
  367. parts, err := SplitFileByPartSize(filePath, partSize)
  368. if err != nil {
  369. return err
  370. }
  371. cp.Parts = make([]cpPart, len(parts))
  372. for i, part := range parts {
  373. cp.Parts[i].Chunk = part
  374. cp.Parts[i].IsCompleted = false
  375. }
  376. // Init load
  377. imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
  378. if err != nil {
  379. return err
  380. }
  381. cp.UploadID = imur.UploadID
  382. return nil
  383. }
  384. // complete completes the multipart upload and deletes the local CP files
  385. func complete(cp *uploadCheckpoint, bucket *Bucket, parts []UploadPart, cpFilePath string, options []Option) error {
  386. imur := InitiateMultipartUploadResult{Bucket: bucket.BucketName,
  387. Key: cp.ObjectKey, UploadID: cp.UploadID}
  388. _, err := bucket.CompleteMultipartUpload(imur, parts, options...)
  389. if err != nil {
  390. return err
  391. }
  392. os.Remove(cpFilePath)
  393. return err
  394. }
  395. // uploadFileWithCp handles concurrent upload with checkpoint
  396. func (bucket Bucket) uploadFileWithCp(objectKey, filePath string, partSize int64, options []Option, cpFilePath string, routines int) error {
  397. listener := getProgressListener(options)
  398. partOptions := ChoiceTransferPartOption(options)
  399. completeOptions := ChoiceCompletePartOption(options)
  400. // Load CP data
  401. ucp := uploadCheckpoint{}
  402. err := ucp.load(cpFilePath)
  403. if err != nil {
  404. os.Remove(cpFilePath)
  405. }
  406. // Load error or the CP data is invalid.
  407. valid, err := ucp.isValid(filePath)
  408. if err != nil || !valid {
  409. if err = prepare(&ucp, objectKey, filePath, partSize, &bucket, options); err != nil {
  410. return err
  411. }
  412. os.Remove(cpFilePath)
  413. }
  414. chunks := ucp.todoParts()
  415. imur := InitiateMultipartUploadResult{
  416. Bucket: bucket.BucketName,
  417. Key: objectKey,
  418. UploadID: ucp.UploadID}
  419. jobs := make(chan FileChunk, len(chunks))
  420. results := make(chan UploadPart, len(chunks))
  421. failed := make(chan error)
  422. die := make(chan bool)
  423. completedBytes := ucp.getCompletedBytes()
  424. // why RwBytes in ProgressEvent is 0 ?
  425. // because read or write event has been notified in teeReader.Read()
  426. event := newProgressEvent(TransferStartedEvent, completedBytes, ucp.FileStat.Size, 0)
  427. publishProgress(listener, event)
  428. // Start the workers
  429. arg := workerArg{&bucket, filePath, imur, partOptions, uploadPartHooker}
  430. for w := 1; w <= routines; w++ {
  431. go worker(w, arg, jobs, results, failed, die)
  432. }
  433. // Schedule jobs
  434. go scheduler(jobs, chunks)
  435. // Waiting for the job finished
  436. completed := 0
  437. for completed < len(chunks) {
  438. select {
  439. case part := <-results:
  440. completed++
  441. ucp.updatePart(part)
  442. ucp.dump(cpFilePath)
  443. completedBytes += ucp.Parts[part.PartNumber-1].Chunk.Size
  444. event = newProgressEvent(TransferDataEvent, completedBytes, ucp.FileStat.Size, ucp.Parts[part.PartNumber-1].Chunk.Size)
  445. publishProgress(listener, event)
  446. case err := <-failed:
  447. close(die)
  448. event = newProgressEvent(TransferFailedEvent, completedBytes, ucp.FileStat.Size, 0)
  449. publishProgress(listener, event)
  450. return err
  451. }
  452. if completed >= len(chunks) {
  453. break
  454. }
  455. }
  456. event = newProgressEvent(TransferCompletedEvent, completedBytes, ucp.FileStat.Size, 0)
  457. publishProgress(listener, event)
  458. // Complete the multipart upload
  459. err = complete(&ucp, &bucket, ucp.allParts(), cpFilePath, completeOptions)
  460. return err
  461. }