upload.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. package oss
  2. import (
  3. "crypto/md5"
  4. "encoding/base64"
  5. "encoding/hex"
  6. "encoding/json"
  7. "errors"
  8. "fmt"
  9. "io/ioutil"
  10. "os"
  11. "path/filepath"
  12. "time"
  13. )
  14. // UploadFile is multipart file upload.
  15. //
  16. // objectKey the object name.
  17. // filePath the local file path to upload.
  18. // partSize the part size in byte.
  19. // options the options for uploading object.
  20. //
  21. // error it's nil if the operation succeeds, otherwise it's an error object.
  22. //
  23. func (bucket Bucket) UploadFile(objectKey, filePath string, partSize int64, options ...Option) error {
  24. if partSize < MinPartSize || partSize > MaxPartSize {
  25. return errors.New("oss: part size invalid range (1024KB, 5GB]")
  26. }
  27. cpConf := getCpConfig(options)
  28. routines := getRoutines(options)
  29. if cpConf != nil && cpConf.IsEnable && cpConf.cpDir != "" {
  30. dest := fmt.Sprintf("oss://%v/%v", bucket.BucketName, objectKey)
  31. absPath, _ := filepath.Abs(filePath)
  32. cpFileName := getCpFileName(absPath, dest)
  33. cpFilePath := cpConf.cpDir + string(os.PathSeparator) + cpFileName
  34. return bucket.uploadFileWithCp(objectKey, filePath, partSize, options, cpFilePath, routines)
  35. }
  36. return bucket.uploadFile(objectKey, filePath, partSize, options, routines)
  37. }
  38. // ----- concurrent upload without checkpoint -----
  39. // getCpConfig gets checkpoint configuration
  40. func getCpConfig(options []Option) *cpConfig {
  41. cpcOpt, err := findOption(options, checkpointConfig, nil)
  42. if err != nil || cpcOpt == nil {
  43. return nil
  44. }
  45. return cpcOpt.(*cpConfig)
  46. }
  47. // getCpFileName return the name of the checkpoint file
  48. func getCpFileName(src, dest string) string {
  49. md5Ctx := md5.New()
  50. md5Ctx.Write([]byte(src))
  51. srcCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
  52. md5Ctx.Reset()
  53. md5Ctx.Write([]byte(dest))
  54. destCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
  55. return fmt.Sprintf("%v-%v.cp", srcCheckSum, destCheckSum)
  56. }
  57. // getRoutines gets the routine count. by default it's 1.
  58. func getRoutines(options []Option) int {
  59. rtnOpt, err := findOption(options, routineNum, nil)
  60. if err != nil || rtnOpt == nil {
  61. return 1
  62. }
  63. rs := rtnOpt.(int)
  64. if rs < 1 {
  65. rs = 1
  66. } else if rs > 100 {
  67. rs = 100
  68. }
  69. return rs
  70. }
  71. // getProgressListener gets the progress callback
  72. func getProgressListener(options []Option) ProgressListener {
  73. isSet, listener, _ := isOptionSet(options, progressListener)
  74. if !isSet {
  75. return nil
  76. }
  77. return listener.(ProgressListener)
  78. }
  79. // uploadPartHook is for testing usage
  80. type uploadPartHook func(id int, chunk FileChunk) error
  81. var uploadPartHooker uploadPartHook = defaultUploadPart
  82. func defaultUploadPart(id int, chunk FileChunk) error {
  83. return nil
  84. }
  85. // workerArg defines worker argument structure
  86. type workerArg struct {
  87. bucket *Bucket
  88. filePath string
  89. imur InitiateMultipartUploadResult
  90. hook uploadPartHook
  91. }
  92. // worker is the worker coroutine function
  93. func worker(id int, arg workerArg, jobs <-chan FileChunk, results chan<- UploadPart, failed chan<- error, die <-chan bool) {
  94. for chunk := range jobs {
  95. if err := arg.hook(id, chunk); err != nil {
  96. failed <- err
  97. break
  98. }
  99. part, err := arg.bucket.UploadPartFromFile(arg.imur, arg.filePath, chunk.Offset, chunk.Size, chunk.Number)
  100. if err != nil {
  101. failed <- err
  102. break
  103. }
  104. select {
  105. case <-die:
  106. return
  107. default:
  108. }
  109. results <- part
  110. }
  111. }
  112. // scheduler function
  113. func scheduler(jobs chan FileChunk, chunks []FileChunk) {
  114. for _, chunk := range chunks {
  115. jobs <- chunk
  116. }
  117. close(jobs)
  118. }
  119. func getTotalBytes(chunks []FileChunk) int64 {
  120. var tb int64
  121. for _, chunk := range chunks {
  122. tb += chunk.Size
  123. }
  124. return tb
  125. }
  126. // uploadFile is a concurrent upload, without checkpoint
  127. func (bucket Bucket) uploadFile(objectKey, filePath string, partSize int64, options []Option, routines int) error {
  128. listener := getProgressListener(options)
  129. chunks, err := SplitFileByPartSize(filePath, partSize)
  130. if err != nil {
  131. return err
  132. }
  133. // Initialize the multipart upload
  134. imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
  135. if err != nil {
  136. return err
  137. }
  138. jobs := make(chan FileChunk, len(chunks))
  139. results := make(chan UploadPart, len(chunks))
  140. failed := make(chan error)
  141. die := make(chan bool)
  142. var completedBytes int64
  143. totalBytes := getTotalBytes(chunks)
  144. event := newProgressEvent(TransferStartedEvent, 0, totalBytes)
  145. publishProgress(listener, event)
  146. // Start the worker coroutine
  147. arg := workerArg{&bucket, filePath, imur, uploadPartHooker}
  148. for w := 1; w <= routines; w++ {
  149. go worker(w, arg, jobs, results, failed, die)
  150. }
  151. // Schedule the jobs
  152. go scheduler(jobs, chunks)
  153. // Waiting for the upload finished
  154. completed := 0
  155. parts := make([]UploadPart, len(chunks))
  156. for completed < len(chunks) {
  157. select {
  158. case part := <-results:
  159. completed++
  160. parts[part.PartNumber-1] = part
  161. completedBytes += chunks[part.PartNumber-1].Size
  162. event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes)
  163. publishProgress(listener, event)
  164. case err := <-failed:
  165. close(die)
  166. event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes)
  167. publishProgress(listener, event)
  168. bucket.AbortMultipartUpload(imur)
  169. return err
  170. }
  171. if completed >= len(chunks) {
  172. break
  173. }
  174. }
  175. event = newProgressEvent(TransferStartedEvent, completedBytes, totalBytes)
  176. publishProgress(listener, event)
  177. // Complete the multpart upload
  178. _, err = bucket.CompleteMultipartUpload(imur, parts)
  179. if err != nil {
  180. bucket.AbortMultipartUpload(imur)
  181. return err
  182. }
  183. return nil
  184. }
  185. // ----- concurrent upload with checkpoint -----
  186. const uploadCpMagic = "FE8BB4EA-B593-4FAC-AD7A-2459A36E2E62"
  187. type uploadCheckpoint struct {
  188. Magic string // Magic
  189. MD5 string // Checkpoint file content's MD5
  190. FilePath string // Local file path
  191. FileStat cpStat // File state
  192. ObjectKey string // Key
  193. UploadID string // Upload ID
  194. Parts []cpPart // All parts of the local file
  195. }
  196. type cpStat struct {
  197. Size int64 // File size
  198. LastModified time.Time // File's last modified time
  199. MD5 string // Local file's MD5
  200. }
  201. type cpPart struct {
  202. Chunk FileChunk // File chunk
  203. Part UploadPart // Uploaded part
  204. IsCompleted bool // Upload complete flag
  205. }
  206. // isValid checks if the uploaded data is valid---it's valid when the file is not updated and the checkpoint data is valid.
  207. func (cp uploadCheckpoint) isValid(filePath string) (bool, error) {
  208. // Compare the CP's magic number and MD5.
  209. cpb := cp
  210. cpb.MD5 = ""
  211. js, _ := json.Marshal(cpb)
  212. sum := md5.Sum(js)
  213. b64 := base64.StdEncoding.EncodeToString(sum[:])
  214. if cp.Magic != uploadCpMagic || b64 != cp.MD5 {
  215. return false, nil
  216. }
  217. // Make sure if the local file is updated.
  218. fd, err := os.Open(filePath)
  219. if err != nil {
  220. return false, err
  221. }
  222. defer fd.Close()
  223. st, err := fd.Stat()
  224. if err != nil {
  225. return false, err
  226. }
  227. md, err := calcFileMD5(filePath)
  228. if err != nil {
  229. return false, err
  230. }
  231. // Compare the file size, file's last modified time and file's MD5
  232. if cp.FileStat.Size != st.Size() ||
  233. cp.FileStat.LastModified != st.ModTime() ||
  234. cp.FileStat.MD5 != md {
  235. return false, nil
  236. }
  237. return true, nil
  238. }
  239. // load loads from the file
  240. func (cp *uploadCheckpoint) load(filePath string) error {
  241. contents, err := ioutil.ReadFile(filePath)
  242. if err != nil {
  243. return err
  244. }
  245. err = json.Unmarshal(contents, cp)
  246. return err
  247. }
  248. // dump dumps to the local file
  249. func (cp *uploadCheckpoint) dump(filePath string) error {
  250. bcp := *cp
  251. // Calculate MD5
  252. bcp.MD5 = ""
  253. js, err := json.Marshal(bcp)
  254. if err != nil {
  255. return err
  256. }
  257. sum := md5.Sum(js)
  258. b64 := base64.StdEncoding.EncodeToString(sum[:])
  259. bcp.MD5 = b64
  260. // Serialization
  261. js, err = json.Marshal(bcp)
  262. if err != nil {
  263. return err
  264. }
  265. // Dump
  266. return ioutil.WriteFile(filePath, js, FilePermMode)
  267. }
  268. // updatePart updates the part status
  269. func (cp *uploadCheckpoint) updatePart(part UploadPart) {
  270. cp.Parts[part.PartNumber-1].Part = part
  271. cp.Parts[part.PartNumber-1].IsCompleted = true
  272. }
  273. // todoParts returns unfinished parts
  274. func (cp *uploadCheckpoint) todoParts() []FileChunk {
  275. fcs := []FileChunk{}
  276. for _, part := range cp.Parts {
  277. if !part.IsCompleted {
  278. fcs = append(fcs, part.Chunk)
  279. }
  280. }
  281. return fcs
  282. }
  283. // allParts returns all parts
  284. func (cp *uploadCheckpoint) allParts() []UploadPart {
  285. ps := []UploadPart{}
  286. for _, part := range cp.Parts {
  287. ps = append(ps, part.Part)
  288. }
  289. return ps
  290. }
  291. // getCompletedBytes returns completed bytes count
  292. func (cp *uploadCheckpoint) getCompletedBytes() int64 {
  293. var completedBytes int64
  294. for _, part := range cp.Parts {
  295. if part.IsCompleted {
  296. completedBytes += part.Chunk.Size
  297. }
  298. }
  299. return completedBytes
  300. }
  301. // calcFileMD5 calculates the MD5 for the specified local file
  302. func calcFileMD5(filePath string) (string, error) {
  303. return "", nil
  304. }
  305. // prepare initializes the multipart upload
  306. func prepare(cp *uploadCheckpoint, objectKey, filePath string, partSize int64, bucket *Bucket, options []Option) error {
  307. // CP
  308. cp.Magic = uploadCpMagic
  309. cp.FilePath = filePath
  310. cp.ObjectKey = objectKey
  311. // Local file
  312. fd, err := os.Open(filePath)
  313. if err != nil {
  314. return err
  315. }
  316. defer fd.Close()
  317. st, err := fd.Stat()
  318. if err != nil {
  319. return err
  320. }
  321. cp.FileStat.Size = st.Size()
  322. cp.FileStat.LastModified = st.ModTime()
  323. md, err := calcFileMD5(filePath)
  324. if err != nil {
  325. return err
  326. }
  327. cp.FileStat.MD5 = md
  328. // Chunks
  329. parts, err := SplitFileByPartSize(filePath, partSize)
  330. if err != nil {
  331. return err
  332. }
  333. cp.Parts = make([]cpPart, len(parts))
  334. for i, part := range parts {
  335. cp.Parts[i].Chunk = part
  336. cp.Parts[i].IsCompleted = false
  337. }
  338. // Init load
  339. imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
  340. if err != nil {
  341. return err
  342. }
  343. cp.UploadID = imur.UploadID
  344. return nil
  345. }
  346. // complete completes the multipart upload and deletes the local CP files
  347. func complete(cp *uploadCheckpoint, bucket *Bucket, parts []UploadPart, cpFilePath string) error {
  348. imur := InitiateMultipartUploadResult{Bucket: bucket.BucketName,
  349. Key: cp.ObjectKey, UploadID: cp.UploadID}
  350. _, err := bucket.CompleteMultipartUpload(imur, parts)
  351. if err != nil {
  352. return err
  353. }
  354. os.Remove(cpFilePath)
  355. return err
  356. }
  357. // uploadFileWithCp handles concurrent upload with checkpoint
  358. func (bucket Bucket) uploadFileWithCp(objectKey, filePath string, partSize int64, options []Option, cpFilePath string, routines int) error {
  359. listener := getProgressListener(options)
  360. // Load CP data
  361. ucp := uploadCheckpoint{}
  362. err := ucp.load(cpFilePath)
  363. if err != nil {
  364. os.Remove(cpFilePath)
  365. }
  366. // Load error or the CP data is invalid.
  367. valid, err := ucp.isValid(filePath)
  368. if err != nil || !valid {
  369. if err = prepare(&ucp, objectKey, filePath, partSize, &bucket, options); err != nil {
  370. return err
  371. }
  372. os.Remove(cpFilePath)
  373. }
  374. chunks := ucp.todoParts()
  375. imur := InitiateMultipartUploadResult{
  376. Bucket: bucket.BucketName,
  377. Key: objectKey,
  378. UploadID: ucp.UploadID}
  379. jobs := make(chan FileChunk, len(chunks))
  380. results := make(chan UploadPart, len(chunks))
  381. failed := make(chan error)
  382. die := make(chan bool)
  383. completedBytes := ucp.getCompletedBytes()
  384. event := newProgressEvent(TransferStartedEvent, completedBytes, ucp.FileStat.Size)
  385. publishProgress(listener, event)
  386. // Start the workers
  387. arg := workerArg{&bucket, filePath, imur, uploadPartHooker}
  388. for w := 1; w <= routines; w++ {
  389. go worker(w, arg, jobs, results, failed, die)
  390. }
  391. // Schedule jobs
  392. go scheduler(jobs, chunks)
  393. // Waiting for the job finished
  394. completed := 0
  395. for completed < len(chunks) {
  396. select {
  397. case part := <-results:
  398. completed++
  399. ucp.updatePart(part)
  400. ucp.dump(cpFilePath)
  401. completedBytes += ucp.Parts[part.PartNumber-1].Chunk.Size
  402. event = newProgressEvent(TransferDataEvent, completedBytes, ucp.FileStat.Size)
  403. publishProgress(listener, event)
  404. case err := <-failed:
  405. close(die)
  406. event = newProgressEvent(TransferFailedEvent, completedBytes, ucp.FileStat.Size)
  407. publishProgress(listener, event)
  408. return err
  409. }
  410. if completed >= len(chunks) {
  411. break
  412. }
  413. }
  414. event = newProgressEvent(TransferCompletedEvent, completedBytes, ucp.FileStat.Size)
  415. publishProgress(listener, event)
  416. // Complete the multipart upload
  417. err = complete(&ucp, &bucket, ucp.allParts(), cpFilePath)
  418. return err
  419. }