]> git.leopard-lacewing.eu Git - bacc.git/commitdiff
[src] openMP passt erst mal so
authorPeter Schaefer <peter.schaefer@tuwien.ac.at>
Fri, 3 Aug 2012 12:18:21 +0000 (14:18 +0200)
committerPeter Schaefer <peter.schaefer@tuwien.ac.at>
Fri, 3 Aug 2012 12:18:21 +0000 (14:18 +0200)
src/mex_build_V.cpp
src/test_sol.m

index 00e0d56a8fd003017006808b981807422abed310..501b9832c997f9533a163d658325fd066b61bf2f 100644 (file)
@@ -22,7 +22,7 @@
  * Peter Schaefer\r
  */\r
 \r
-//#define DEBUG\r
+#define DEBUG 0\r
 #define PARALLEL\r
 \r
 //#include <iostream>\r
@@ -216,7 +216,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
 \r
        int actualNumberOfThreads = omp_get_max_threads();\r
 \r
-       if(MAX_WORKER<actualNumberOfThreads)\r
+       if (MAX_WORKER < actualNumberOfThreads)\r
                actualNumberOfThreads = MAX_WORKER;\r
 \r
        int firstRow = 0, lastRow = -1;\r
@@ -231,7 +231,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
                        targetSize = MINSIZE_PER_WORKER;\r
                }\r
        }\r
-#ifdef DEBUG\r
+#if DEBUG >= 3\r
        mexPrintf("matrixSize (em)      : %d\n"\r
                        "targetSize           : %d\n"\r
                        "actualNumberOfThreads: %d\n", em, targetSize,\r
@@ -240,137 +240,147 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
 \r
 //Ausrechnen\r
 #ifdef PARALLEL\r
-#pragma omp parallel private(i,j,k,tmp,x,y,xa,xb,ya,yb,d,dt,rx, rxa, rxb, ry, rya, ryb,lastRow,firstRow) shared(C,E,ctypeO,ctypeP,em,targetSize,actualNumberOfThreads)\r
+//#pragma omp parallel p\r
        {\r
-#pragma omp for\r
-#endif\r
+#pragma omp parallel for schedule(static,1) private(i,j,k,tmp,x,y,xa,xb,ya,yb,d,dt,rx, rxa, rxb, ry, rya, ryb,lastRow,firstRow) shared(C,E,ctypeO,ctypeP,em,targetSize,actualNumberOfThreads)\r
 \r
-       for (i = 0; i < actualNumberOfThreads; ++i) {\r
-               firstRow = (int) sqrt(2*i*targetSize);\r
-               if (i == actualNumberOfThreads - 1) {\r
-                       lastRow = em - 1;\r
-               } else {\r
-                       lastRow = (int) sqrt(2*(i+1)*targetSize)-1;\r
-               }\r
-#ifdef DEBUG\r
-               mexPrintf("thread Nr.  : %d\n"\r
-                               "firstRow    : %d\n"\r
-                               "lastRow     : %d\n", i, firstRow, lastRow);\r
 #endif\r
+               for (i = 0; i < actualNumberOfThreads; ++i) {\r
+                       firstRow = (int) sqrt(2 * i * targetSize);\r
+                       if (i == actualNumberOfThreads - 1) {\r
+                               lastRow = em - 1;\r
+                       } else {\r
+                               lastRow = (int) sqrt(2 * (i + 1) * targetSize) - 1;\r
+                       }\r
 \r
-               for (j = firstRow; j <= lastRow; ++j) {\r
-                       for (k = 0; k <= j; ++k) {\r
-                               x[0][0] = C[(int) E[j] - 1];\r
-                               x[0][1] = C[cm + (int) E[j] - 1];\r
-                               x[0][2] = C[2 * cm + (int) E[j] - 1];\r
-\r
-                               x[1][0] = C[(int) E[em + j] - 1];\r
-                               x[1][1] = C[cm + (int) E[em + j] - 1];\r
-                               x[1][2] = C[2 * cm + (int) E[em + j] - 1];\r
-\r
-                               x[2][0] = C[(int) E[2 * em + j] - 1];\r
-                               x[2][1] = C[cm + (int) E[2 * em + j] - 1];\r
-                               x[2][2] = C[2 * cm + (int) E[2 * em + j] - 1];\r
-\r
-                               x[3][0] = C[(int) E[3 * em + j] - 1];\r
-                               x[3][1] = C[cm + (int) E[3 * em + j] - 1];\r
-                               x[3][2] = C[2 * cm + (int) E[3 * em + j] - 1];\r
-\r
-                               // Seitenvektoren aufbauen\r
-                               sub(xa, x[1], x[0]);\r
-                               sub(xb, x[3], x[0]);\r
-\r
-                               // Lageeigenschaften des Flaechenstuecks\r
-                               rxa = dimOfVec(xa);\r
-                               rxb = dimOfVec(xb);\r
-                               rx = dimOfThird(rxa, rxb);\r
-\r
-                               //kleinste Ecke finden und fuer \delta verwenden\r
-                               getSCorner(dt, x);\r
-\r
-                               y[0][0] = C[(int) E[k] - 1];\r
-                               y[0][1] = C[cm + (int) E[k] - 1];\r
-                               y[0][2] = C[2 * cm + (int) E[k] - 1];\r
-\r
-                               y[1][0] = C[(int) E[em + k] - 1];\r
-                               y[1][1] = C[cm + (int) E[em + k] - 1];\r
-                               y[1][2] = C[2 * cm + (int) E[em + k] - 1];\r
-\r
-                               y[2][0] = C[(int) E[2 * em + k] - 1];\r
-                               y[2][1] = C[cm + (int) E[2 * em + k] - 1];\r
-                               y[2][2] = C[2 * cm + (int) E[2 * em + k] - 1];\r
-\r
-                               y[3][0] = C[(int) E[3 * em + k] - 1];\r
-                               y[3][1] = C[cm + (int) E[3 * em + k] - 1];\r
-                               y[3][2] = C[2 * cm + (int) E[3 * em + k] - 1];\r
-\r
-                               // Seiten Vektoren aufbauen\r
-                               sub(ya, y[1], y[0]);\r
-                               sub(yb, y[3], y[0]);\r
-\r
-                               // Lageeigenschaften des Flaechenstuecks\r
-                               rya = dimOfVec(ya);\r
-                               ryb = dimOfVec(yb);\r
-                               ry = dimOfThird(rya, ryb);\r
-\r
-                               //kleinste Ecke finden\r
-                               getSCorner(d, y);\r
-\r
-                               //\delta berechnen\r
-                               sub(d, dt);\r
-#ifdef DEBUG\r
-                               mexPrintf("[%d %d](%f,%f)(%f,%f)+(%f,%f,%f)\n", j, k,\r
-                                               fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]),\r
-                                               fabs(yb[rxb]), d[rxa], d[rxb], d[rx]);\r
-#endif\r
-                               if (rx == ry) {\r
-                                       if (rxa == rya) {\r
-                                               tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
-                                                               fabs(ya[rxa]), fabs(yb[rxb]), d[rxa], d[rxb],\r
-                                                               d[rx], zeta);\r
-\r
-                                       } else {\r
-                                               tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
-                                                               fabs(yb[rxa]), fabs(ya[rxb]), d[rxa], d[rxb],\r
-                                                               d[rx], zeta);\r
-                                       }\r
-\r
-                               } else {\r
-\r
-                                       if (rxa == rya) {\r
-                                               tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
-                                                               fabs(ya[rya]), fabs(yb[ryb]), d[rxb], d[rxa],\r
-                                                               d[rx], zeta);\r
-                                       } else if (rxa == ryb) {\r
-                                               tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
-                                                               fabs(yb[ryb]), fabs(ya[rya]), d[rxb], d[rxa],\r
-                                                               d[rx], zeta);\r
-                                       } else if (rxb == rya) {\r
-                                               tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
-                                                               fabs(ya[rya]), fabs(yb[ryb]), d[rxa], d[rxb],\r
-                                                               d[rx], zeta);\r
-                                       } else {\r
-                                               tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
-                                                               fabs(yb[ryb]), fabs(ya[rya]), d[rxa], d[rxb],\r
-                                                               d[rx], zeta);\r
-                                       }\r
+#if DEBUG >= 4\r
+#pragma omp critical\r
+                       mexPrintf("thread Nr.: (%d) | firstRow/lastRow: [%d / %d] \n",\r
 \r
-                               }\r
 #ifdef PARALLEL\r
-#pragma omp critical\r
+                                       omp_get_thread_num()\r
+#else\r
+                                       i\r
 #endif\r
-                               A[(k * em) + j] = 1. / (4 * M_PI) * tmp;\r
-                               if (k != j)\r
-                                       A[(j * em) + k] = 1. / (4 * M_PI) * tmp;\r
-#ifdef PARALLEL\r
+                                       , firstRow, lastRow);\r
 #pragma omp end critical\r
 #endif\r
+                       {\r
+                               for (j = firstRow; j <= lastRow; ++j) {\r
+                                       for (k = 0; k <= j; ++k) {\r
+                                               x[0][0] = C[(int) E[j] - 1];\r
+                                               x[0][1] = C[cm + (int) E[j] - 1];\r
+                                               x[0][2] = C[2 * cm + (int) E[j] - 1];\r
+\r
+                                               x[1][0] = C[(int) E[em + j] - 1];\r
+                                               x[1][1] = C[cm + (int) E[em + j] - 1];\r
+                                               x[1][2] = C[2 * cm + (int) E[em + j] - 1];\r
+\r
+                                               x[2][0] = C[(int) E[2 * em + j] - 1];\r
+                                               x[2][1] = C[cm + (int) E[2 * em + j] - 1];\r
+                                               x[2][2] = C[2 * cm + (int) E[2 * em + j] - 1];\r
+\r
+                                               x[3][0] = C[(int) E[3 * em + j] - 1];\r
+                                               x[3][1] = C[cm + (int) E[3 * em + j] - 1];\r
+                                               x[3][2] = C[2 * cm + (int) E[3 * em + j] - 1];\r
+\r
+                                               // Seitenvektoren aufbauen\r
+                                               sub(xa, x[1], x[0]);\r
+                                               sub(xb, x[3], x[0]);\r
+\r
+                                               // Lageeigenschaften des Flaechenstuecks\r
+                                               rxa = dimOfVec(xa);\r
+                                               rxb = dimOfVec(xb);\r
+                                               rx = dimOfThird(rxa, rxb);\r
+\r
+                                               //kleinste Ecke finden und fuer \delta verwenden\r
+                                               getSCorner(dt, x);\r
+\r
+                                               y[0][0] = C[(int) E[k] - 1];\r
+                                               y[0][1] = C[cm + (int) E[k] - 1];\r
+                                               y[0][2] = C[2 * cm + (int) E[k] - 1];\r
+\r
+                                               y[1][0] = C[(int) E[em + k] - 1];\r
+                                               y[1][1] = C[cm + (int) E[em + k] - 1];\r
+                                               y[1][2] = C[2 * cm + (int) E[em + k] - 1];\r
+\r
+                                               y[2][0] = C[(int) E[2 * em + k] - 1];\r
+                                               y[2][1] = C[cm + (int) E[2 * em + k] - 1];\r
+                                               y[2][2] = C[2 * cm + (int) E[2 * em + k] - 1];\r
+\r
+                                               y[3][0] = C[(int) E[3 * em + k] - 1];\r
+                                               y[3][1] = C[cm + (int) E[3 * em + k] - 1];\r
+                                               y[3][2] = C[2 * cm + (int) E[3 * em + k] - 1];\r
+\r
+                                               // Seiten Vektoren aufbauen\r
+                                               sub(ya, y[1], y[0]);\r
+                                               sub(yb, y[3], y[0]);\r
+\r
+                                               // Lageeigenschaften des Flaechenstuecks\r
+                                               rya = dimOfVec(ya);\r
+                                               ryb = dimOfVec(yb);\r
+                                               ry = dimOfThird(rya, ryb);\r
+\r
+                                               //kleinste Ecke finden\r
+                                               getSCorner(d, y);\r
+\r
+                                               //\delta berechnen\r
+                                               sub(d, dt);\r
+#if DEBUG >= 5\r
+#pragma omp critical\r
+                                               mexPrintf("[%d %d](%f,%f)(%f,%f)+(%f,%f,%f)\n", j, k,\r
+                                                               fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]),\r
+                                                               fabs(yb[rxb]), d[rxa], d[rxb], d[rx]);\r
+#pragma omp end critical\r
+#endif\r
+                                               if (rx == ry) {\r
+                                                       if (rxa == rya) {\r
+                                                               tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
+                                                                               fabs(ya[rxa]), fabs(yb[rxb]), d[rxa],\r
+                                                                               d[rxb], d[rx], zeta);\r
+\r
+                                                       } else {\r
+                                                               tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
+                                                                               fabs(yb[rxa]), fabs(ya[rxb]), d[rxa],\r
+                                                                               d[rxb], d[rx], zeta);\r
+                                                       }\r
+\r
+                                               } else {\r
+\r
+                                                       if (rxa == rya) {\r
+                                                               tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
+                                                                               fabs(ya[rya]), fabs(yb[ryb]), d[rxb],\r
+                                                                               d[rxa], d[rx], zeta);\r
+                                                       } else if (rxa == ryb) {\r
+                                                               tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
+                                                                               fabs(yb[ryb]), fabs(ya[rya]), d[rxb],\r
+                                                                               d[rxa], d[rx], zeta);\r
+                                                       } else if (rxb == rya) {\r
+                                                               tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
+                                                                               fabs(ya[rya]), fabs(yb[ryb]), d[rxa],\r
+                                                                               d[rxb], d[rx], zeta);\r
+                                                       } else {\r
+                                                               tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
+                                                                               fabs(yb[ryb]), fabs(ya[rya]), d[rxa],\r
+                                                                               d[rxb], d[rx], zeta);\r
+                                                       }\r
+\r
+                                               }\r
+\r
+#pragma omp critical\r
+\r
+                                               A[(k * em) + j] = 1. / (4 * M_PI) * tmp;\r
+                                               if (k != j)\r
+                                                       A[(j * em) + k] = 1. / (4 * M_PI) * tmp;\r
+\r
+#pragma omp end critical\r
+\r
+                                       }\r
+                               }\r
                        }\r
                }\r
-       }\r
-\r
 #ifdef PARALLEL\r
-}\r
+       }\r
 #endif\r
 \r
        return;\r
index 2a8e9739901b9b4e79d2827b8c65e595d5cd4f47..9168d75f57d4c9190ec6429c901ad9fa5bd60087 100644 (file)
@@ -9,7 +9,7 @@ mex mex_build_V.cpp slpRectangle.cpp CFLAGS="\$CFLAGS -fopenmp" CXXFLAGS="\$CXXF
 % Test ausführen
 
 %Anzahl der Schritte
-steps = 16;
+steps = 2;
 
 %Art der Berechnungen
 type = [1];