upload.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. package oss
  2. import (
  3. "crypto/md5"
  4. "encoding/base64"
  5. "encoding/hex"
  6. "encoding/json"
  7. "errors"
  8. "fmt"
  9. "io/ioutil"
  10. "os"
  11. "path/filepath"
  12. "time"
  13. )
  14. // UploadFile is multipart file upload.
  15. //
  16. // objectKey the object name.
  17. // filePath the local file path to upload.
  18. // partSize the part size in byte.
  19. // options the options for uploading object.
  20. //
  21. // error it's nil if the operation succeeds, otherwise it's an error object.
  22. //
  23. func (bucket Bucket) UploadFile(objectKey, filePath string, partSize int64, options ...Option) error {
  24. if partSize < MinPartSize || partSize > MaxPartSize {
  25. return errors.New("oss: part size invalid range (100KB, 5GB]")
  26. }
  27. cpConf := getCpConfig(options)
  28. routines := getRoutines(options)
  29. if cpConf != nil && cpConf.IsEnable {
  30. cpFilePath := getUploadCpFilePath(cpConf, filePath, bucket.BucketName, objectKey)
  31. if cpFilePath != "" {
  32. return bucket.uploadFileWithCp(objectKey, filePath, partSize, options, cpFilePath, routines)
  33. }
  34. }
  35. return bucket.uploadFile(objectKey, filePath, partSize, options, routines)
  36. }
  37. func getUploadCpFilePath(cpConf *cpConfig, srcFile, destBucket, destObject string) string {
  38. if cpConf.FilePath == "" && cpConf.DirPath != "" {
  39. dest := fmt.Sprintf("oss://%v/%v", destBucket, destObject)
  40. absPath, _ := filepath.Abs(srcFile)
  41. cpFileName := getCpFileName(absPath, dest)
  42. cpConf.FilePath = cpConf.DirPath + string(os.PathSeparator) + cpFileName
  43. }
  44. return cpConf.FilePath
  45. }
  46. // ----- concurrent upload without checkpoint -----
  47. // getCpConfig gets checkpoint configuration
  48. func getCpConfig(options []Option) *cpConfig {
  49. cpcOpt, err := findOption(options, checkpointConfig, nil)
  50. if err != nil || cpcOpt == nil {
  51. return nil
  52. }
  53. return cpcOpt.(*cpConfig)
  54. }
  55. // getCpFileName return the name of the checkpoint file
  56. func getCpFileName(src, dest string) string {
  57. md5Ctx := md5.New()
  58. md5Ctx.Write([]byte(src))
  59. srcCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
  60. md5Ctx.Reset()
  61. md5Ctx.Write([]byte(dest))
  62. destCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
  63. return fmt.Sprintf("%v-%v.cp", srcCheckSum, destCheckSum)
  64. }
  65. // getRoutines gets the routine count. by default it's 1.
  66. func getRoutines(options []Option) int {
  67. rtnOpt, err := findOption(options, routineNum, nil)
  68. if err != nil || rtnOpt == nil {
  69. return 1
  70. }
  71. rs := rtnOpt.(int)
  72. if rs < 1 {
  73. rs = 1
  74. } else if rs > 100 {
  75. rs = 100
  76. }
  77. return rs
  78. }
  79. // getPayer return the payer of the request
  80. func getPayer(options []Option) string {
  81. payerOpt, err := findOption(options, HTTPHeaderOssRequester, nil)
  82. if err != nil || payerOpt == nil {
  83. return ""
  84. }
  85. return payerOpt.(string)
  86. }
  87. // getProgressListener gets the progress callback
  88. func getProgressListener(options []Option) ProgressListener {
  89. isSet, listener, _ := isOptionSet(options, progressListener)
  90. if !isSet {
  91. return nil
  92. }
  93. return listener.(ProgressListener)
  94. }
  95. // uploadPartHook is for testing usage
  96. type uploadPartHook func(id int, chunk FileChunk) error
  97. var uploadPartHooker uploadPartHook = defaultUploadPart
  98. func defaultUploadPart(id int, chunk FileChunk) error {
  99. return nil
  100. }
  101. // workerArg defines worker argument structure
  102. type workerArg struct {
  103. bucket *Bucket
  104. filePath string
  105. imur InitiateMultipartUploadResult
  106. options []Option
  107. hook uploadPartHook
  108. }
  109. // worker is the worker coroutine function
  110. func worker(id int, arg workerArg, jobs <-chan FileChunk, results chan<- UploadPart, failed chan<- error, die <-chan bool) {
  111. for chunk := range jobs {
  112. if err := arg.hook(id, chunk); err != nil {
  113. failed <- err
  114. break
  115. }
  116. part, err := arg.bucket.UploadPartFromFile(arg.imur, arg.filePath, chunk.Offset, chunk.Size, chunk.Number, arg.options...)
  117. if err != nil {
  118. failed <- err
  119. break
  120. }
  121. select {
  122. case <-die:
  123. return
  124. default:
  125. }
  126. results <- part
  127. }
  128. }
  129. // scheduler function
  130. func scheduler(jobs chan FileChunk, chunks []FileChunk) {
  131. for _, chunk := range chunks {
  132. jobs <- chunk
  133. }
  134. close(jobs)
  135. }
  136. func getTotalBytes(chunks []FileChunk) int64 {
  137. var tb int64
  138. for _, chunk := range chunks {
  139. tb += chunk.Size
  140. }
  141. return tb
  142. }
  143. // uploadFile is a concurrent upload, without checkpoint
  144. func (bucket Bucket) uploadFile(objectKey, filePath string, partSize int64, options []Option, routines int) error {
  145. listener := getProgressListener(options)
  146. chunks, err := SplitFileByPartSize(filePath, partSize)
  147. if err != nil {
  148. return err
  149. }
  150. // Initialize the multipart upload
  151. imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
  152. if err != nil {
  153. return err
  154. }
  155. jobs := make(chan FileChunk, len(chunks))
  156. results := make(chan UploadPart, len(chunks))
  157. failed := make(chan error)
  158. die := make(chan bool)
  159. var completedBytes int64
  160. totalBytes := getTotalBytes(chunks)
  161. event := newProgressEvent(TransferStartedEvent, 0, totalBytes, 0)
  162. publishProgress(listener, event)
  163. // Start the worker coroutine
  164. arg := workerArg{&bucket, filePath, imur, options, uploadPartHooker}
  165. for w := 1; w <= routines; w++ {
  166. go worker(w, arg, jobs, results, failed, die)
  167. }
  168. // Schedule the jobs
  169. go scheduler(jobs, chunks)
  170. // Waiting for the upload finished
  171. completed := 0
  172. parts := make([]UploadPart, len(chunks))
  173. for completed < len(chunks) {
  174. select {
  175. case part := <-results:
  176. completed++
  177. parts[part.PartNumber-1] = part
  178. completedBytes += chunks[part.PartNumber-1].Size
  179. // why RwBytes in ProgressEvent is 0 ?
  180. // because read or write event has been notified in teeReader.Read()
  181. event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes, 0)
  182. publishProgress(listener, event)
  183. case err := <-failed:
  184. close(die)
  185. event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes, 0)
  186. publishProgress(listener, event)
  187. bucket.AbortMultipartUpload(imur, options...)
  188. return err
  189. }
  190. if completed >= len(chunks) {
  191. break
  192. }
  193. }
  194. event = newProgressEvent(TransferStartedEvent, completedBytes, totalBytes, 0)
  195. publishProgress(listener, event)
  196. // Complete the multpart upload
  197. _, err = bucket.CompleteMultipartUpload(imur, parts, options...)
  198. if err != nil {
  199. bucket.AbortMultipartUpload(imur, options...)
  200. return err
  201. }
  202. return nil
  203. }
  204. // ----- concurrent upload with checkpoint -----
  205. const uploadCpMagic = "FE8BB4EA-B593-4FAC-AD7A-2459A36E2E62"
  206. type uploadCheckpoint struct {
  207. Magic string // Magic
  208. MD5 string // Checkpoint file content's MD5
  209. FilePath string // Local file path
  210. FileStat cpStat // File state
  211. ObjectKey string // Key
  212. UploadID string // Upload ID
  213. Parts []cpPart // All parts of the local file
  214. }
  215. type cpStat struct {
  216. Size int64 // File size
  217. LastModified time.Time // File's last modified time
  218. MD5 string // Local file's MD5
  219. }
  220. type cpPart struct {
  221. Chunk FileChunk // File chunk
  222. Part UploadPart // Uploaded part
  223. IsCompleted bool // Upload complete flag
  224. }
  225. // isValid checks if the uploaded data is valid---it's valid when the file is not updated and the checkpoint data is valid.
  226. func (cp uploadCheckpoint) isValid(filePath string) (bool, error) {
  227. // Compare the CP's magic number and MD5.
  228. cpb := cp
  229. cpb.MD5 = ""
  230. js, _ := json.Marshal(cpb)
  231. sum := md5.Sum(js)
  232. b64 := base64.StdEncoding.EncodeToString(sum[:])
  233. if cp.Magic != uploadCpMagic || b64 != cp.MD5 {
  234. return false, nil
  235. }
  236. // Make sure if the local file is updated.
  237. fd, err := os.Open(filePath)
  238. if err != nil {
  239. return false, err
  240. }
  241. defer fd.Close()
  242. st, err := fd.Stat()
  243. if err != nil {
  244. return false, err
  245. }
  246. md, err := calcFileMD5(filePath)
  247. if err != nil {
  248. return false, err
  249. }
  250. // Compare the file size, file's last modified time and file's MD5
  251. if cp.FileStat.Size != st.Size() ||
  252. cp.FileStat.LastModified != st.ModTime() ||
  253. cp.FileStat.MD5 != md {
  254. return false, nil
  255. }
  256. return true, nil
  257. }
  258. // load loads from the file
  259. func (cp *uploadCheckpoint) load(filePath string) error {
  260. contents, err := ioutil.ReadFile(filePath)
  261. if err != nil {
  262. return err
  263. }
  264. err = json.Unmarshal(contents, cp)
  265. return err
  266. }
  267. // dump dumps to the local file
  268. func (cp *uploadCheckpoint) dump(filePath string) error {
  269. bcp := *cp
  270. // Calculate MD5
  271. bcp.MD5 = ""
  272. js, err := json.Marshal(bcp)
  273. if err != nil {
  274. return err
  275. }
  276. sum := md5.Sum(js)
  277. b64 := base64.StdEncoding.EncodeToString(sum[:])
  278. bcp.MD5 = b64
  279. // Serialization
  280. js, err = json.Marshal(bcp)
  281. if err != nil {
  282. return err
  283. }
  284. // Dump
  285. return ioutil.WriteFile(filePath, js, FilePermMode)
  286. }
  287. // updatePart updates the part status
  288. func (cp *uploadCheckpoint) updatePart(part UploadPart) {
  289. cp.Parts[part.PartNumber-1].Part = part
  290. cp.Parts[part.PartNumber-1].IsCompleted = true
  291. }
  292. // todoParts returns unfinished parts
  293. func (cp *uploadCheckpoint) todoParts() []FileChunk {
  294. fcs := []FileChunk{}
  295. for _, part := range cp.Parts {
  296. if !part.IsCompleted {
  297. fcs = append(fcs, part.Chunk)
  298. }
  299. }
  300. return fcs
  301. }
  302. // allParts returns all parts
  303. func (cp *uploadCheckpoint) allParts() []UploadPart {
  304. ps := []UploadPart{}
  305. for _, part := range cp.Parts {
  306. ps = append(ps, part.Part)
  307. }
  308. return ps
  309. }
  310. // getCompletedBytes returns completed bytes count
  311. func (cp *uploadCheckpoint) getCompletedBytes() int64 {
  312. var completedBytes int64
  313. for _, part := range cp.Parts {
  314. if part.IsCompleted {
  315. completedBytes += part.Chunk.Size
  316. }
  317. }
  318. return completedBytes
  319. }
  320. // calcFileMD5 calculates the MD5 for the specified local file
  321. func calcFileMD5(filePath string) (string, error) {
  322. return "", nil
  323. }
  324. // prepare initializes the multipart upload
  325. func prepare(cp *uploadCheckpoint, objectKey, filePath string, partSize int64, bucket *Bucket, options []Option) error {
  326. // CP
  327. cp.Magic = uploadCpMagic
  328. cp.FilePath = filePath
  329. cp.ObjectKey = objectKey
  330. // Local file
  331. fd, err := os.Open(filePath)
  332. if err != nil {
  333. return err
  334. }
  335. defer fd.Close()
  336. st, err := fd.Stat()
  337. if err != nil {
  338. return err
  339. }
  340. cp.FileStat.Size = st.Size()
  341. cp.FileStat.LastModified = st.ModTime()
  342. md, err := calcFileMD5(filePath)
  343. if err != nil {
  344. return err
  345. }
  346. cp.FileStat.MD5 = md
  347. // Chunks
  348. parts, err := SplitFileByPartSize(filePath, partSize)
  349. if err != nil {
  350. return err
  351. }
  352. cp.Parts = make([]cpPart, len(parts))
  353. for i, part := range parts {
  354. cp.Parts[i].Chunk = part
  355. cp.Parts[i].IsCompleted = false
  356. }
  357. // Init load
  358. imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
  359. if err != nil {
  360. return err
  361. }
  362. cp.UploadID = imur.UploadID
  363. return nil
  364. }
  365. // complete completes the multipart upload and deletes the local CP files
  366. func complete(cp *uploadCheckpoint, bucket *Bucket, parts []UploadPart, cpFilePath string, options []Option) error {
  367. imur := InitiateMultipartUploadResult{Bucket: bucket.BucketName,
  368. Key: cp.ObjectKey, UploadID: cp.UploadID}
  369. _, err := bucket.CompleteMultipartUpload(imur, parts, options...)
  370. if err != nil {
  371. return err
  372. }
  373. os.Remove(cpFilePath)
  374. return err
  375. }
  376. // uploadFileWithCp handles concurrent upload with checkpoint
  377. func (bucket Bucket) uploadFileWithCp(objectKey, filePath string, partSize int64, options []Option, cpFilePath string, routines int) error {
  378. listener := getProgressListener(options)
  379. // Load CP data
  380. ucp := uploadCheckpoint{}
  381. err := ucp.load(cpFilePath)
  382. if err != nil {
  383. os.Remove(cpFilePath)
  384. }
  385. // Load error or the CP data is invalid.
  386. valid, err := ucp.isValid(filePath)
  387. if err != nil || !valid {
  388. if err = prepare(&ucp, objectKey, filePath, partSize, &bucket, options); err != nil {
  389. return err
  390. }
  391. os.Remove(cpFilePath)
  392. }
  393. chunks := ucp.todoParts()
  394. imur := InitiateMultipartUploadResult{
  395. Bucket: bucket.BucketName,
  396. Key: objectKey,
  397. UploadID: ucp.UploadID}
  398. jobs := make(chan FileChunk, len(chunks))
  399. results := make(chan UploadPart, len(chunks))
  400. failed := make(chan error)
  401. die := make(chan bool)
  402. completedBytes := ucp.getCompletedBytes()
  403. // why RwBytes in ProgressEvent is 0 ?
  404. // because read or write event has been notified in teeReader.Read()
  405. event := newProgressEvent(TransferStartedEvent, completedBytes, ucp.FileStat.Size, 0)
  406. publishProgress(listener, event)
  407. // Start the workers
  408. arg := workerArg{&bucket, filePath, imur, options, uploadPartHooker}
  409. for w := 1; w <= routines; w++ {
  410. go worker(w, arg, jobs, results, failed, die)
  411. }
  412. // Schedule jobs
  413. go scheduler(jobs, chunks)
  414. // Waiting for the job finished
  415. completed := 0
  416. for completed < len(chunks) {
  417. select {
  418. case part := <-results:
  419. completed++
  420. ucp.updatePart(part)
  421. ucp.dump(cpFilePath)
  422. completedBytes += ucp.Parts[part.PartNumber-1].Chunk.Size
  423. event = newProgressEvent(TransferDataEvent, completedBytes, ucp.FileStat.Size, 0)
  424. publishProgress(listener, event)
  425. case err := <-failed:
  426. close(die)
  427. event = newProgressEvent(TransferFailedEvent, completedBytes, ucp.FileStat.Size, 0)
  428. publishProgress(listener, event)
  429. return err
  430. }
  431. if completed >= len(chunks) {
  432. break
  433. }
  434. }
  435. event = newProgressEvent(TransferCompletedEvent, completedBytes, ucp.FileStat.Size, 0)
  436. publishProgress(listener, event)
  437. // Complete the multipart upload
  438. err = complete(&ucp, &bucket, ucp.allParts(), cpFilePath, options)
  439. return err
  440. }