我開發了一個程序,它讀取.txt文件中的數字,它將存儲到一個矢量中,以進行一系列組合和計算,以確定結果是否與數字匹配我想要的。這些過程將在多個線程中完成,其中每個線程將負責處理parallel for循環內的各種迭代次數。OpenMP,並行for循環,處理時間差別很大
長話短說,當處理時間可能短到3分鐘或可能超過10分鐘時,處理時間會有很大變化(例如9個數字)。
這裏的基準,我試過到目前爲止:
8 numbers serial : 18.119 seconds
8 numbers multithread (first-try): 10.238 seconds
8 numbers multithread (second-try): 18.943 seconds
9 numbers serial : 458.980 seconds
9 numbers multithread (first-try): 172.347 seconds
9 numbers multithread (second-try): 519.532 seconds //Seriously?
//Another try after suggested modifications
9 numbers multithread (first-try): 297.017 seconds
9 numbers multithread (second-try): 297.85 seconds
9 numbers multithread (third-try): 304.755 seconds
9 numbers multithread (fourth-try): 396.391 seconds
所以現在的問題是,是否有利於改善項目(多線程),因此只需要最少的任何可能的方式洗牌/計算數量的時間?
下面的代碼的一部分,其中用於循環並行發生(更新略有修改):
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <stdlib.h>
#include <algorithm>
#include <stdio.h>
#include <Windows.h>
#include <omp.h>
#define OPERATORSIZE 3
using namespace std;
int cur_target;
ofstream outFile;
string get_operator(int i) {
switch (i) {
case 0:
return "+";
case 1:
return "-";
case 2:
return "*";
case 3:
return "/";
default:
return "";
}
}
int prev_num_pos(vector<int> &cur_equation, int count) {
for (int i = count - 1; i >= 0; i--) {
if (cur_equation[i] != -1) return i + 1;
}
return 0;
}
bool nextoperator(int k, vector<int> &operator_array) {
for (int i = k - 2; i >= 0; i--) {
if (operator_array[i] < OPERATORSIZE) {
operator_array[i] += 1;
break;
}
else
operator_array[i] = 0;
switch (i) {
case 0:
return false;
}
}
return true;
}
void vector_combination(vector<int> int_list) { // Generate the number combinations from the number list
bool div_remainder = false;
int count = 0;
#pragma omp parallel for schedule(dynamic) firstprivate(div_remainder) reduction(+:count)
for (int i = 0; i < int_list.size(); ++i) {
vector<int> cur_equation, cur_temp, cur_list, operator_array;
auto list = int_list;
rotate(list.begin(), list.begin() + i, list.begin() + i + 1);
do
{
cur_list.clear();
operator_array.clear();
for (auto x : list)
cur_list.push_back(x);
for (int i = 0; i < cur_list.size() - 1; i++)
operator_array.push_back(0);
do
{
div_remainder = false;
count = 0;
cur_equation = operator_array;
cur_temp = cur_list;
for (int i = 0; i < cur_equation.size(); ++i) { // Check for equation priorities
if (cur_equation[i] == 3) {
count = i;
if (cur_temp[count] % cur_temp[count + 1] != 0) {
div_remainder = true;
break;
}
}
}
if (div_remainder)
continue;
for (int i = 0; i < cur_temp.size() - 1; ++i) {
count = -1;
if (cur_equation[i] == 2 || cur_equation[i] == 3) {
count = prev_num_pos(cur_equation, i);
}
else
continue;
if (cur_equation[i] == 2) {
cur_temp[count] *= cur_temp[i + 1];
cur_equation[i] = -1;
}
else if (cur_equation[i] == 3) {
if (cur_temp[i + 1] != 0) {
cur_temp[count] /= cur_temp[i + 1];
cur_equation[i] = -1;
}
else {
div_remainder = true;
break;
}
}
}
if (div_remainder)
continue;
for (int i = 0; i < cur_temp.size() - 1; ++i) {
switch (cur_equation[i]) {
case 0: {
cur_temp[0] += cur_temp[i + 1]; // Addition
cur_equation[i] = -1;
break;
}
case 1: { // Subtraction
cur_temp[0] -= cur_temp[i + 1];
cur_equation[i] = -i;
break;
}
}
}
if (cur_temp[0] == cur_target) {
#pragma omp critical
{
for (int i = 0; i < cur_list.size(); ++i) {
outFile << cur_list[i];
if (i < cur_list.size() - 1) { outFile << get_operator(operator_array[i]); }
}
outFile << "\n";
}
}
} while (nextoperator(cur_list.size(), operator_array));
// Send to function to undergone a list of operator combinations
} while (next_permutation(list.begin() + 1, list.end()));
}
}
int main(void) {
SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);
vector<int> int_list;
string line;
ifstream myfile("Problem.txt");
if (myfile.is_open()) {
while (getline(myfile, line)) {
int num = stoi(line);
int_list.push_back(num);
cur_target = num;
}
}
else
cout << "Unable to open file." << endl;
myfile.close();
int_list.pop_back();
sort(int_list.begin(), int_list.end());
outFile.open("answer.txt");
vector_combination(int_list);
outFile.close();
int answer_count = 0;
myfile.open("answer.txt");
if (myfile.is_open()) {
while (getline(myfile, line)) {
++answer_count;
if (answer_count > 1)
break;
}
}
myfile.close();
if (answer_count == 0) {
outFile.open("answer.txt");
outFile << "-1" << endl;
}
outFile.close();
return 0;
}
至於樣品輸入,創建名爲「Problem.txt」利用隨機數等.txt文件所以(最後一個數字是目標的結果)(與用於基準當前樣本輸入更新):
28
55
78
77
33
65
35
62
19
221
該程序上運行的硬件/軟件規範: 處理器:酷睿i5的Sandy Bridge 2500K, 拉姆:8GB, OS:視窗10名專業人員, IDE:的Visual Studio 2015企業版,
嘗試衡量程序完成了多少工作,以及它自己的運行時間是多少。在linux中有'/ usr/bin/time。/ your_program'和'perf stat。/ your_program'。檢查是你的循環佔用大部分時間,或者由內核或硬盤驅動器進行長時間的I/O工作。還要檢查你的CPU核心是否空閒,CPU頻率是否好(不要在電池供電的筆記本電腦上進行基準測試)。如果不知道數據的大小(以及每個循環數),完整的代碼+數據示例/分析數據,操作系統和硬件信息,則很難回答。 – osgx
@osgx我通過使用探查器進行了檢查。到目前爲止,消耗大部分資源的唯一部分是向量複製,循環花費不大。無論如何,我會盡我所能以您提到的要求更新問題。 –
是在openmp循環內複製的向量嗎?該問題中包含片段中的矢量複製代碼?什麼時間openmp循環(<5%,20%50%或更多)?你能發佈性能分析結果嗎?矢量的大小,循環次數,內存消耗是多少? – osgx