AfxBeginThread是MFC的全局函数,是对CreateThread的封装。
CreateThread是Win32 API函数,AfxBeginThread最终要调到CreateThread。
而_beginthread是C的运行库函数,后台也是调用CreateThread来实现。
在 windows下头文件中包含 #include<process.h>,就可以使用_beginthread进行线程创建。
矩阵乘法的并行算法(CreateThread)
https://blog.csdn.net/qq_40515692/article/details/106749232
// 假设m个线程,每个线程一个hThread和一个配套的mydt(static让变量的生命周期不局限于主函数内)
const int m = 4;
HANDLE hThread[m];
static MYDATA mydt[m];
// temp就是平均到每个线程的计算元素
int temp = (M * P) / m;
for (int i = 0; i < m; ++i) {
mydt[i].A = A, mydt[i].B = B, mydt[i].C = C;
mydt[i].begin = i * temp, mydt[i].end = i * temp + temp, mydt[i].P = P, mydt[i].N = N;
if (i == m - 1) // 最后一个线程计算剩余的
mydt[i].end = M * P;
hThread[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) ThreadProc, &mydt[i], 0, NULL);
}
WaitForMultipleObjects(m, hThread, TRUE, INFINITE);
#include <windows.h>
#include <iostream>
#include <ctime>
using namespace std;
struct MYDATA {
int begin, end;
int *A, *B, *C;
int P, N;
};
DWORD ThreadProc(LPVOID IpParam) {
MYDATA *pmd = (MYDATA *) IpParam;
int *A = pmd->A, *B = pmd->B, *C = pmd->C;
int begin = pmd->begin, end = pmd->end, P = pmd->P, N = pmd->N;
// 线程多的话,这个最好应该提出来
int sizeB = N * P;
int *revB = new int[sizeB];
for (int index = 0; index < sizeB; index++) {
int i = index / P, j = index % P;
revB[i * P + j] = B[j * P + i];
}
for (int index = begin; index < end; index++) {
int i = index / P, j = index % P;
C[i * P + j] = 0;
for (int k = 0; k < N; ++k) {
C[i * P + j] += A[i * N + k] * revB[j * P + k];
}
}
delete[]revB;
return 0;
}
void func(int *A, int *B, int *C, int M, int P, int N) {
// C(M,P) = A(M,N) * B(N,P)
for (int i = 0; i < M; ++i) {
for (int j = 0; j < P; ++j) {
C[i * P + j] = 0;
for (int k = 0; k < N; ++k) {
C[i * P + j] += A[i * N + k] * B[k * P + j];
}
}
}
}
void printM(int *A, int M, int N) {
// print A(M,N)
for (int i = 0; i < M; i++) {
for (int j = 0; j < N; j++)
cout << A[i * N + j] << " ";
cout << endl;
}
cout << endl;
}
const int M = 1000, N = 1000, P = 1000;
int A[M * N], B[N * P], C[M * P];
int main() {
clock_t startTime, endTime;
startTime = clock();//计时开始
for (int i = 0; i < M * N; i++) A[i] = i;
for (int i = 0; i < N * P; i++) B[i] = i;
// ----------------------------------- 多线程
const int m = 4;
HANDLE hThread[m];
static MYDATA mydt[m];
int temp = (M * P) / m;
for (int i = 0; i < m; ++i) {
mydt[i].A = A, mydt[i].B = B, mydt[i].C = C;
mydt[i].begin = i * temp, mydt[i].end = i * temp + temp, mydt[i].P = P, mydt[i].N = N;
if (i == m - 1) // 最后一个线程计算剩余的
mydt[i].end = M * P;
hThread[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) ThreadProc, &mydt[i], 0, NULL);
}
WaitForMultipleObjects(m, hThread, TRUE, INFINITE);
// ----------------------------------- 串行
// func(A, B, C, M, P, N);
// printM(A, M, N);
// printM(B, N, P);
// printM(C, M, P);
endTime = clock();//计时结束
cout << "use time: " << (double) (endTime - startTime) / CLOCKS_PER_SEC << endl;
return 0;
}
SetThreadAffinityMask用于指定线程在那个cpu核心上运行。
SetThreadAffinityMask:The SetThreadAffinityMask function sets a processor affinity mask for the specified thread.
DWORD_PTR SetThreadAffinityMask(HANDLE hThread, DWORD_PTR dwThreadAffinityMask);
调用SetThreadAffinityMask,能为各个线程设置亲缘性屏蔽:该函数中的hThread参数用于指明要限制哪个线程, dwThreadAffinityMask用于指明该线程能够在哪个CPU上运行。
dwThreadAffinityMask必须是进程的亲缘性屏蔽的相应子集。返回值是线程的前一个亲缘性屏蔽。因此,若要将3个线程限制到CPU1、2和3上去运行,可以这样操作:
//Thread 0 can only run on CPU 0.
SetThreadAffinityMask(hThread0, 0x00000001); //第0位是1
//Threads 1, 2, 3 run on CPUs 1, 2, 3.//第1 2 3位是1
SetThreadAffinityMask(hThread1, 0x0000000E);
SetThreadAffinityMask(hThread2, 0x0000000E);
SetThreadAffinityMask(hThread3, 0x0000000E);