2014-10-10 238 views
0

我試圖將我的程序的上傳(和下載)性能推到極限。 當使用aws的命令行界面上傳256MB文件時,我正在獲得大約1000Mbps的速度。 但我得到停留在大約600Mbps的上傳與以下程序使用Node.js SDK的AWS S3性能

if (process.argv.length < 7) { 
    console.log ("usage: " + process.argv [0] + " " + process.argv[1] + " <config> <region> <bucket> <key> <file>") 
    return -1 
} 

var config = process.argv[2] 
var region = process.argv[3] 
var bucketName = process.argv[4] 
var key = process.argv[5] 
var file = process.argv[6] 

var multipartMap = { Parts: [] } 
var uploadStartTime // = new Date() 
var partSize = 1024 * 1024 * 8   // at least 5MB, specified by amazon 
var partNum 
var multipartParams = { 
    Bucket: bucketName, 
    Key: key, 
    ContentType: "binary", 
    StorageClass: "REDUCED_REDUNDANCY", 
} 
var part = 0 
var maxRetry = 3 

var fs = require ('fs') 
var aws = require ('aws-sdk') 

function upload (bucket, multipart, partParams, trial) { 
    var trial = trial || 1; 
    bucket.uploadPart (partParams, function (err, data) { 
     if (err) { 
      console.log ("failed: ", err) 
      if (trial < maxRetry) { 
       console.log ("retrying part: ", partParams.PartNumber) 
       upload (bucket, multipart, partParams, trial + 1) 
      } else { 
       console.log ("failed: ", err, " unable to upload part: ", partParams.PartNumber) 
      } 
      return; 
     } 
     multipartMap.Parts[this.request.params.PartNumber - 1] = { 
      ETag: data.ETag, 
      PartNumber: Number (this.request.params.PartNumber) 
     } 

     if (--partNum > 0) return; 

     var doneParams = { 
      Bucket: bucketName, 
      Key: key, 
      MultipartUpload: multipartMap, 
      UploadId: multipart.UploadId 
     } 

     console.log ("success") 
     bucket.completeMultipartUpload (doneParams, function (err, data){ 
      if (err) { 
       console.log("An error occurred while completing the multipart upload"); 
       console.log(err); 
      } else { 
       var delta = (new Date() - uploadStartTime)/1000; 
       console.log('Completed upload in', delta, 'seconds'); 
       console.log('Final upload data:', data); 
      } 
     }) 
    }) 
} 

var kickoffTime = new Date() 
aws.config.loadFromPath (config) 
aws.config.region = region 

var bucket = new aws.S3 ({params: {Bucket: bucketName}}) 

console.log ("filename: ", file) 
buffer = fs.readFileSync (file) 
partNum = Math.ceil (buffer.length/partSize) // number of parts 
var totalPart = partNum 

uploadStartTime = new Date() 
bucket.createMultipartUpload (multipartParams, function (err, multipart) { 
    if (err) { 
     console.log ("cannot create multipart upload: ", err) 
     return -1 
    } 

    for (var i = 0; i < buffer.length; i += partSize) { 
     ++part 
     var end = Math.min (i + partSize, buffer.length) 
     var body = buffer.slice (i, end) 
     var partParams = { 
      Body: body, 
      Bucket: bucketName, 
      Key: key, 
      PartNumber: String (part), 
      UploadId: multipart.UploadId, 
      ContentLength: end - i 
     } 

     upload (bucket, multipart, partParams); 
    } 
}) 
var kickoffTimeDelta = (new Date() - kickoffTime)/1000 
console.log ("Kickoff time: ", kickoffTimeDelta) 

這項計劃將不會空文件的工作,但請忽略此情況。上述程序參照this進行編碼。如果(process.argv.length < 7)console.log(「usage:」+ process.argv [0] +「」+ process) .argv 1 +「「) 返回-1 }

var config = process.argv[2] 
var region = process.argv[3] 
var bucketName = process.argv[4] 
var key = process.argv[5] 
var file = process.argv[6] 

var fs = require ('fs') 
var aws = require ('aws-sdk') 
fs.readFile (config, "utf8", function (err, configFile) { 
    if (err) { 
     console.log ("Config file cannot be read: ", err) 
     return -1 
    } 
    aws.config = JSON.parse (configFile) 
    aws.config.region = region 

    var bucket = new aws.S3 ({params: {Bucket: bucketName}}) 

    bucket.createBucket (function() { 
     var data = {Key: key} 
     bucket.getObject (data, function (err, fileData) { 
      if (err) { 
       console.log ("Error downloading data: ", err) 
      } else { 
       fs.writeFile (file, fileData.Body, function (err) { 
        if (err) { 
         console.log ("Error writing data: ", err) 
        } else { 
         console.log ("Successfully downloaded!") 
        } 
       }) 
      } 
     }) 
    }) 
}) 

我新的Node.js和AWS的SDK,有什麼遺漏,以實現更好的吞吐率? 謝謝

回答

0

嗯......有一個澄清的問題,但沒有這樣的聲譽。

你看到兩端有多少個請求?如果您每秒以超過100次請求的速度觸碰S3,您可以通過隨機化密鑰名稱的開頭來獲得更好的性能。

請參見本文的解釋,並提出了一些建議: http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html

基本上,如果你有一個鍵(子目錄)一堆文件具有相同字符開頭,你可以壓倒索引分區...所以對於大容量的讀/寫操作,隨機密鑰名稱可以提高性能。

+0

謝謝,這將是一個有用的資源,但在此刻,我非常關心如何將少量文件一次放到S3上以獲得最佳性能。由於系統將擴展並容納大量請求,因此最終可能會有點意義不大 – 2014-10-12 16:16:31