From: Peter Schaefer Date: Fri, 3 Aug 2012 10:06:38 +0000 (+0200) Subject: [src] openMP funzt... aber leider nicht schneller X-Git-Url: https://git.leopard-lacewing.eu/?a=commitdiff_plain;h=d229e614b77f4b818140a8f3c311c0881f19a5a5;p=bacc.git [src] openMP funzt... aber leider nicht schneller --- diff --git a/src/mex_build_V.cpp b/src/mex_build_V.cpp index 91d6d96..d02a66b 100644 --- a/src/mex_build_V.cpp +++ b/src/mex_build_V.cpp @@ -21,6 +21,10 @@ * * Peter Schaefer */ + +//#define DEBUG +#define PARALLEL + //#include #include //#include @@ -98,6 +102,30 @@ void inline getSCorner(double* d, double x[4][3]) { } } +double inline distT(double b, double d, double t, double v, double d1, + double d2, double d3, double * dummy) { + double dis1 = 0, dis2 = 0; + if (d1 < 0) + dis1 = -d1 - t; + else + dis1 = d1 - b; + if (dis1 < 0) + dis1 = 0; + + if (d2 < 0) + dis2 = -d2 - v; + else + dis2 = d2 - d; + if (dis2 < 0) + dis2 = 0; + +// if(d3!=0) +// mexErrMsgTxt("expected d3 == 0"); + +// return dis1 * dis1 + dis2 * dis2 + d3 * d3; + return (dis1 * dis1 + dis2 * dis2 + d3 * d3) * (4 * M_PI); +} + void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { /* mexPrintf("%d",omp_get_max_threads()); @@ -105,18 +133,8 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { mexPrintf("%d",omp_get_num_threads()); */ - int j, k; //Schleifenindizes + int i, j, k; //Schleifenindizes int count; - double tmp; - - double x[4][3] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - double y[4][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } }; - double xa[3] = { 0, 0, 0 }; - double xb[3] = { 0, 0, 0 }; - double ya[3] = { 0, 0, 0 }; - double yb[3] = { 0, 0, 0 }; - double d[3] = { 0, 0, 0 }; - double dt[3] = { 0, 0, 0 }; //Sicherheitsabfragen zu Datengroessen if ((nrhs != 4)) @@ -172,48 +190,99 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { ctypeP = cParO4; ctypeO = cOrtO1; break; + case 0: + ctypeP = distT; + ctypeO = distT; + break; } count = 0; - omp_set_dynamic(1); +// omp_set_dynamic(1); //LageInformationen int rx, rxa, rxb, ry, rya, ryb; + double tmp; + double x[4][3] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + double y[4][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } }; + double xa[3] = { 0, 0, 0 }; + double xb[3] = { 0, 0, 0 }; + double ya[3] = { 0, 0, 0 }; + double yb[3] = { 0, 0, 0 }; + double d[3] = { 0, 0, 0 }; + double dt[3] = { 0, 0, 0 }; + +#define V_MINROW_PER_WORKER 3 + + int actualNumberOfThreads = omp_get_max_threads(); + int firstRow = 0, lastRow = -1; + int targetSize = em * (em + 1) / (2 * actualNumberOfThreads); + + if (targetSize < V_MINROW_PER_WORKER) { + actualNumberOfThreads = (int) em * (em + 1) / (2 * V_MINROW_PER_WORKER); + if (actualNumberOfThreads < 1) { + actualNumberOfThreads = 1; + targetSize = em; + } else { + targetSize = V_MINROW_PER_WORKER; + } + } +#ifdef DEBUG + mexPrintf("matrixSize (em) : %d\n" + "targetSize : %d\n" + "actualNumberOfThreads: %d\n", em, targetSize, + actualNumberOfThreads); +#endif //Ausrechnen -#pragma omp parallel private(j,k,tmp,x,y,xa,xb,ya,yb,d,dt,rx, rxa, rxb, ry, rya, ryb) shared(C,E,ctypeO,ctypeP) +#ifndef DEBUG +#ifdef PARALLEL +#pragma omp parallel private(i,j,k,tmp,x,y,xa,xb,ya,yb,d,dt,rx, rxa, rxb, ry, rya, ryb,lastRow,firstRow) shared(C,E,ctypeO,ctypeP,em,targetSize,actualNumberOfThreads) { #pragma omp parallel for - for (j = 0; j < em; ++j) { - x[0][0] = C[(int) E[j] - 1]; - x[0][1] = C[cm + (int) E[j] - 1]; - x[0][2] = C[2 * cm + (int) E[j] - 1]; +#endif +#endif + for (i = 0; i < actualNumberOfThreads; ++i) { + firstRow = (int) sqrt(2*i*targetSize); + if (i == actualNumberOfThreads - 1) { + lastRow = em - 1; + } else { + lastRow = (int) sqrt(2*(i+1)*targetSize)-1; + } +#ifdef DEBUG + mexPrintf("thread Nr. : %d\n" + "firstRow : %d\n" + "lastRow : %d\n", i, firstRow, lastRow); +#endif - x[1][0] = C[(int) E[em + j] - 1]; - x[1][1] = C[cm + (int) E[em + j] - 1]; - x[1][2] = C[2 * cm + (int) E[em + j] - 1]; + for (j = firstRow; j <= lastRow; ++j) { + for (k = 0; k <= j; ++k) { + x[0][0] = C[(int) E[j] - 1]; + x[0][1] = C[cm + (int) E[j] - 1]; + x[0][2] = C[2 * cm + (int) E[j] - 1]; - x[2][0] = C[(int) E[2 * em + j] - 1]; - x[2][1] = C[cm + (int) E[2 * em + j] - 1]; - x[2][2] = C[2 * cm + (int) E[2 * em + j] - 1]; + x[1][0] = C[(int) E[em + j] - 1]; + x[1][1] = C[cm + (int) E[em + j] - 1]; + x[1][2] = C[2 * cm + (int) E[em + j] - 1]; - x[3][0] = C[(int) E[3 * em + j] - 1]; - x[3][1] = C[cm + (int) E[3 * em + j] - 1]; - x[3][2] = C[2 * cm + (int) E[3 * em + j] - 1]; + x[2][0] = C[(int) E[2 * em + j] - 1]; + x[2][1] = C[cm + (int) E[2 * em + j] - 1]; + x[2][2] = C[2 * cm + (int) E[2 * em + j] - 1]; - // Seitenvektoren aufbauen - sub(xa, x[1], x[0]); - sub(xb, x[3], x[0]); + x[3][0] = C[(int) E[3 * em + j] - 1]; + x[3][1] = C[cm + (int) E[3 * em + j] - 1]; + x[3][2] = C[2 * cm + (int) E[3 * em + j] - 1]; - // Lageeigenschaften des Flaechenstuecks - rxa = dimOfVec(xa); - rxb = dimOfVec(xb); - rx = dimOfThird(rxa, rxb); + // Seitenvektoren aufbauen + sub(xa, x[1], x[0]); + sub(xb, x[3], x[0]); - //kleinste Ecke finden und fuer \delta verwenden - getSCorner(dt, x); + // Lageeigenschaften des Flaechenstuecks + rxa = dimOfVec(xa); + rxb = dimOfVec(xb); + rx = dimOfThird(rxa, rxb); - for (k = 0; k <= j; ++k) { + //kleinste Ecke finden und fuer \delta verwenden + getSCorner(dt, x); y[0][0] = C[(int) E[k] - 1]; y[0][1] = C[cm + (int) E[k] - 1]; @@ -245,33 +314,41 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { //\delta berechnen sub(d, dt); - - // mexPrintf("(%f,%f)(%f,%f)+(%f,%f,%f)\n",fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]), - // fabs(yb[rxb]), d[rxa], d[rxb], d[rx]); +#ifdef DEBUG + mexPrintf("[%d %d](%f,%f)(%f,%f)+(%f,%f,%f)\n", j, k, + fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]), + fabs(yb[rxb]), d[rxa], d[rxb], d[rx]); +#endif if (rx == ry) { if (rxa == rya) { - tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]), - fabs(yb[rxb]), d[rxa], d[rxb], d[rx], zeta); + tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]), + fabs(ya[rxa]), fabs(yb[rxb]), d[rxa], d[rxb], + d[rx], zeta); } else { - tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]), fabs(yb[rxa]), - fabs(ya[rxb]), d[rxa], d[rxb], d[rx], zeta); + tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]), + fabs(yb[rxa]), fabs(ya[rxb]), d[rxa], d[rxb], + d[rx], zeta); } } else { if (rxa == rya) { - tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]), fabs(ya[rya]), - fabs(yb[ryb]), d[rxb], d[rxa], d[rx], zeta); + tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]), + fabs(ya[rya]), fabs(yb[ryb]), d[rxb], d[rxa], + d[rx], zeta); } else if (rxa == ryb) { - tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]), fabs(yb[ryb]), - fabs(ya[rya]), d[rxb], d[rxa], d[rx], zeta); + tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]), + fabs(yb[ryb]), fabs(ya[rya]), d[rxb], d[rxa], + d[rx], zeta); } else if (rxb == rya) { - tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rya]), - fabs(yb[ryb]), d[rxa], d[rxb], d[rx], zeta); + tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]), + fabs(ya[rya]), fabs(yb[ryb]), d[rxa], d[rxb], + d[rx], zeta); } else { - tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]), fabs(yb[ryb]), - fabs(ya[rya]), d[rxa], d[rxb], d[rx], zeta); + tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]), + fabs(yb[ryb]), fabs(ya[rya]), d[rxa], d[rxb], + d[rx], zeta); } } @@ -280,8 +357,12 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { if (k != j) A[(j * em) + k] = 1. / (4 * M_PI) * tmp; } - } } +#ifndef DEBUG +#ifdef PARALLEL +} +#endif +#endif return; } diff --git a/src/slpRectangle.cpp b/src/slpRectangle.cpp index be3928d..aad61cb 100644 --- a/src/slpRectangle.cpp +++ b/src/slpRectangle.cpp @@ -566,14 +566,3 @@ double cParO4(double b, double d, double t, double v, double d1, double d2, return calcParIntA(b, d, t, v, d1, d2, d3); } - -double evalInt(double* C, double * E, int cm, int em, int j, int k, - double (*ctypeP)(double, double, double, double, double, double, double, - double*), - double (*ctypeO)(double, double, double, double, double, double, double, - double*), double *zeta) { - - - -return 0; -} diff --git a/src/slpRectangle.hpp b/src/slpRectangle.hpp index 4c3a22d..1bfd66b 100644 --- a/src/slpRectangle.hpp +++ b/src/slpRectangle.hpp @@ -50,11 +50,5 @@ double cParO3(double, double, double, double, double, double, double, double*); // A oder Q oder Qx1x2 double cParO4(double, double, double, double, double, double, double, double*); -double evalInt(double *, double *, int, int, int, int, - double (*)(double, double, double, double, double, double, double, - double*), - double (*)(double, double, double, double, double, double, double, - double*), double *); - //} #endif diff --git a/src/test_sol.m b/src/test_sol.m index 82836a1..16bf4e4 100644 --- a/src/test_sol.m +++ b/src/test_sol.m @@ -2,7 +2,7 @@ format longG % Matrix MEX Funktion neu Compilieren -mex -v mex_build_V.cpp slpRectangle.cpp CFLAGS="\$CFLAGS -fopenmp" CXXFLAGS="\$CXXFLAGS -fopenmp" LDFLAGS="\$LDFLAGS -fopenmp" +mex mex_build_V.cpp slpRectangle.cpp CFLAGS="\$CFLAGS -fopenmp" CXXFLAGS="\$CXXFLAGS -fopenmp" LDFLAGS="\$LDFLAGS -fopenmp" % mex mex_build_V.cpp slpRectangle.cpp