1
我使用aio在一個線程的不同磁盤上寫入多個文件。當我使用緩衝寫入時,IO處理是併發的。但CPU負載非常高。當我用DIRECT標誌打開文件時,IO處理不是併發的。如何使用DMA在一個線程中同時寫入不同磁盤上的多個文件?
如何使用DMA在一個線程中同時寫入不同磁盤上的多個文件?
#include <malloc.h>
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <sstream>
#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/syscall.h>
#include <linux/aio_abi.h>
using namespace std;
long double timeDiff(timespec start, timespec end) {
const long double s = start.tv_sec + start.tv_nsec * 1.0e-9;
const long double e = end.tv_sec + end.tv_nsec * 1.0e-9;
return e - s;
}
// nr: maximum number of requests that can simultaneously reside in the context.
inline int io_setup(unsigned nr, aio_context_t *ctxp) {
return syscall(__NR_io_setup, nr, ctxp);
}
inline int io_destroy(aio_context_t ctx) {
return syscall(__NR_io_destroy, ctx);
}
// Every I/O request that is submitted to
inline int io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) {
return syscall(__NR_io_submit, ctx, nr, iocbpp);
}
// For every completed I/O request kernel creates an io_event structure.
// minimal number of events one wants to get.
// maximum number of events one wants to get.
inline int io_getevents(aio_context_t ctx, long min_nr, long max_nr,
struct io_event *events, struct timespec *timeout) {
return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout);
}
int main(int argc, char *argv[]) {
// prepare data
const unsigned int kAlignment = 4096;
const long data_size = 1600 * 1024 * 12/8;
//const long data_size = 2448 * 1344 * 12/8;
void * data = memalign(kAlignment, data_size);
memset(data, 0, data_size);
//for (int i = 0; i < data_size; ++i)
// data[i] = 'A';
// prepare fd
//const int file_num = 3;
const int file_num = 2;
int fd_arr[file_num];
for (int i = 0; i < file_num; ++i) {
ostringstream filename;
if (i == 0) {
//filename << "/data/test";
filename << "/test";
} else {
filename << "/data" << i << "/test";
}
//filename << "/data/test" << i;
int fd = open(filename.str().c_str(), O_WRONLY | O_NONBLOCK | O_CREAT | O_DIRECT | O_APPEND, 0644);
//int fd = open(filename.str().c_str(), O_WRONLY | O_NONBLOCK | O_CREAT | O_DIRECT, 0644);
//int fd = open(filename.str().c_str(), O_WRONLY | O_NONBLOCK | O_CREAT, 0644);
if (fd < 0) {
perror("open");
return -1;
}
fd_arr[i] = fd;
}
aio_context_t ctx;
struct io_event events[file_num];
int ret;
ctx = 0;
ret = io_setup(1000, &ctx);
if (ret < 0) {
perror("io_setup");
return -1;
}
struct iocb cbs[file_num];
for (int i = 0; i < file_num; ++i) {
memset(&cbs[i], 0, sizeof(cbs[i]));
}
struct iocb * cbs_pointer[file_num];
for (int i = 0; i < file_num; ++i) {
/* setup I/O control block */
cbs_pointer[i] = &cbs[i];
cbs[i].aio_fildes = fd_arr[i];
cbs[i].aio_lio_opcode = IOCB_CMD_PWRITE; // IOCV_CMD
cbs[i].aio_nbytes = data_size;
}
timespec tStart, tCurr;
clock_gettime(CLOCK_REALTIME, &tStart);
const int frame_num = 10000;
for (int k = 0; k < frame_num; ++k) {
for (int i = 0; i < file_num; ++i) {
/* setup I/O control block */
cbs[i].aio_buf = (uint64_t)data;
//cbs[i].aio_offset = k * data_size;
}
ret = io_submit(ctx, file_num, cbs_pointer);
if (ret < 0) {
perror("io_submit");
return -1;
}
/* get reply */
ret = io_getevents(ctx, file_num, file_num, events, NULL);
//printf("events: %d, k: %d\n", ret, k);
}
clock_gettime(CLOCK_REALTIME, &tCurr);
cout << "frame: " << frame_num << " time: " << timeDiff(tStart, tCurr) << endl;
ret = io_destroy(ctx);
if (ret < 0) {
perror("io_destroy");
return -1;
}
// close fd
for (int i = 0; i < file_num; ++i) {
fsync(fd_arr[i]);
close(fd_arr[i]);
}
return 0;
}
謝謝!是否用O_DIRECT標誌打開文件有什麼區別?沒有它,總帶寬大於一個磁盤寫入帶寬,但CPU負載很高。有了它,總帶寬與一個磁盤帶寬相同。 – ceys
如果沒有'O_DIRECT',所有的I/O都會首先進入內核頁面緩存,即'memcpy()',然後在稍後的階段,它會被DMA寫入光盤。打開「O_DIRECT」後,如果你正在寫的塊是4Kb多邊形,與4Kb邊界對齊,並且在I/O **期間你不能訪問該內存,那麼機會很大,它將直接DMA介入'memcpy()'。當然,如果你正在分配新的擴展盤區,正如我所提到的那樣,這是一個全局互斥鎖,所以你會串行化並失去併發性。 –