* Peter Schaefer\r
*/\r
\r
-//#define DEBUG\r
+#define DEBUG 0\r
#define PARALLEL\r
\r
//#include <iostream>\r
\r
int actualNumberOfThreads = omp_get_max_threads();\r
\r
- if(MAX_WORKER<actualNumberOfThreads)\r
+ if (MAX_WORKER < actualNumberOfThreads)\r
actualNumberOfThreads = MAX_WORKER;\r
\r
int firstRow = 0, lastRow = -1;\r
targetSize = MINSIZE_PER_WORKER;\r
}\r
}\r
-#ifdef DEBUG\r
+#if DEBUG >= 3\r
mexPrintf("matrixSize (em) : %d\n"\r
"targetSize : %d\n"\r
"actualNumberOfThreads: %d\n", em, targetSize,\r
\r
//Ausrechnen\r
#ifdef PARALLEL\r
-#pragma omp parallel private(i,j,k,tmp,x,y,xa,xb,ya,yb,d,dt,rx, rxa, rxb, ry, rya, ryb,lastRow,firstRow) shared(C,E,ctypeO,ctypeP,em,targetSize,actualNumberOfThreads)\r
+//#pragma omp parallel p\r
{\r
-#pragma omp for\r
-#endif\r
+#pragma omp parallel for schedule(static,1) private(i,j,k,tmp,x,y,xa,xb,ya,yb,d,dt,rx, rxa, rxb, ry, rya, ryb,lastRow,firstRow) shared(C,E,ctypeO,ctypeP,em,targetSize,actualNumberOfThreads)\r
\r
- for (i = 0; i < actualNumberOfThreads; ++i) {\r
- firstRow = (int) sqrt(2*i*targetSize);\r
- if (i == actualNumberOfThreads - 1) {\r
- lastRow = em - 1;\r
- } else {\r
- lastRow = (int) sqrt(2*(i+1)*targetSize)-1;\r
- }\r
-#ifdef DEBUG\r
- mexPrintf("thread Nr. : %d\n"\r
- "firstRow : %d\n"\r
- "lastRow : %d\n", i, firstRow, lastRow);\r
#endif\r
+ for (i = 0; i < actualNumberOfThreads; ++i) {\r
+ firstRow = (int) sqrt(2 * i * targetSize);\r
+ if (i == actualNumberOfThreads - 1) {\r
+ lastRow = em - 1;\r
+ } else {\r
+ lastRow = (int) sqrt(2 * (i + 1) * targetSize) - 1;\r
+ }\r
\r
- for (j = firstRow; j <= lastRow; ++j) {\r
- for (k = 0; k <= j; ++k) {\r
- x[0][0] = C[(int) E[j] - 1];\r
- x[0][1] = C[cm + (int) E[j] - 1];\r
- x[0][2] = C[2 * cm + (int) E[j] - 1];\r
-\r
- x[1][0] = C[(int) E[em + j] - 1];\r
- x[1][1] = C[cm + (int) E[em + j] - 1];\r
- x[1][2] = C[2 * cm + (int) E[em + j] - 1];\r
-\r
- x[2][0] = C[(int) E[2 * em + j] - 1];\r
- x[2][1] = C[cm + (int) E[2 * em + j] - 1];\r
- x[2][2] = C[2 * cm + (int) E[2 * em + j] - 1];\r
-\r
- x[3][0] = C[(int) E[3 * em + j] - 1];\r
- x[3][1] = C[cm + (int) E[3 * em + j] - 1];\r
- x[3][2] = C[2 * cm + (int) E[3 * em + j] - 1];\r
-\r
- // Seitenvektoren aufbauen\r
- sub(xa, x[1], x[0]);\r
- sub(xb, x[3], x[0]);\r
-\r
- // Lageeigenschaften des Flaechenstuecks\r
- rxa = dimOfVec(xa);\r
- rxb = dimOfVec(xb);\r
- rx = dimOfThird(rxa, rxb);\r
-\r
- //kleinste Ecke finden und fuer \delta verwenden\r
- getSCorner(dt, x);\r
-\r
- y[0][0] = C[(int) E[k] - 1];\r
- y[0][1] = C[cm + (int) E[k] - 1];\r
- y[0][2] = C[2 * cm + (int) E[k] - 1];\r
-\r
- y[1][0] = C[(int) E[em + k] - 1];\r
- y[1][1] = C[cm + (int) E[em + k] - 1];\r
- y[1][2] = C[2 * cm + (int) E[em + k] - 1];\r
-\r
- y[2][0] = C[(int) E[2 * em + k] - 1];\r
- y[2][1] = C[cm + (int) E[2 * em + k] - 1];\r
- y[2][2] = C[2 * cm + (int) E[2 * em + k] - 1];\r
-\r
- y[3][0] = C[(int) E[3 * em + k] - 1];\r
- y[3][1] = C[cm + (int) E[3 * em + k] - 1];\r
- y[3][2] = C[2 * cm + (int) E[3 * em + k] - 1];\r
-\r
- // Seiten Vektoren aufbauen\r
- sub(ya, y[1], y[0]);\r
- sub(yb, y[3], y[0]);\r
-\r
- // Lageeigenschaften des Flaechenstuecks\r
- rya = dimOfVec(ya);\r
- ryb = dimOfVec(yb);\r
- ry = dimOfThird(rya, ryb);\r
-\r
- //kleinste Ecke finden\r
- getSCorner(d, y);\r
-\r
- //\delta berechnen\r
- sub(d, dt);\r
-#ifdef DEBUG\r
- mexPrintf("[%d %d](%f,%f)(%f,%f)+(%f,%f,%f)\n", j, k,\r
- fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]),\r
- fabs(yb[rxb]), d[rxa], d[rxb], d[rx]);\r
-#endif\r
- if (rx == ry) {\r
- if (rxa == rya) {\r
- tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
- fabs(ya[rxa]), fabs(yb[rxb]), d[rxa], d[rxb],\r
- d[rx], zeta);\r
-\r
- } else {\r
- tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
- fabs(yb[rxa]), fabs(ya[rxb]), d[rxa], d[rxb],\r
- d[rx], zeta);\r
- }\r
-\r
- } else {\r
-\r
- if (rxa == rya) {\r
- tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
- fabs(ya[rya]), fabs(yb[ryb]), d[rxb], d[rxa],\r
- d[rx], zeta);\r
- } else if (rxa == ryb) {\r
- tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
- fabs(yb[ryb]), fabs(ya[rya]), d[rxb], d[rxa],\r
- d[rx], zeta);\r
- } else if (rxb == rya) {\r
- tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
- fabs(ya[rya]), fabs(yb[ryb]), d[rxa], d[rxb],\r
- d[rx], zeta);\r
- } else {\r
- tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
- fabs(yb[ryb]), fabs(ya[rya]), d[rxa], d[rxb],\r
- d[rx], zeta);\r
- }\r
+#if DEBUG >= 4\r
+#pragma omp critical\r
+ mexPrintf("thread Nr.: (%d) | firstRow/lastRow: [%d / %d] \n",\r
\r
- }\r
#ifdef PARALLEL\r
-#pragma omp critical\r
+ omp_get_thread_num()\r
+#else\r
+ i\r
#endif\r
- A[(k * em) + j] = 1. / (4 * M_PI) * tmp;\r
- if (k != j)\r
- A[(j * em) + k] = 1. / (4 * M_PI) * tmp;\r
-#ifdef PARALLEL\r
+ , firstRow, lastRow);\r
#pragma omp end critical\r
#endif\r
+ {\r
+ for (j = firstRow; j <= lastRow; ++j) {\r
+ for (k = 0; k <= j; ++k) {\r
+ x[0][0] = C[(int) E[j] - 1];\r
+ x[0][1] = C[cm + (int) E[j] - 1];\r
+ x[0][2] = C[2 * cm + (int) E[j] - 1];\r
+\r
+ x[1][0] = C[(int) E[em + j] - 1];\r
+ x[1][1] = C[cm + (int) E[em + j] - 1];\r
+ x[1][2] = C[2 * cm + (int) E[em + j] - 1];\r
+\r
+ x[2][0] = C[(int) E[2 * em + j] - 1];\r
+ x[2][1] = C[cm + (int) E[2 * em + j] - 1];\r
+ x[2][2] = C[2 * cm + (int) E[2 * em + j] - 1];\r
+\r
+ x[3][0] = C[(int) E[3 * em + j] - 1];\r
+ x[3][1] = C[cm + (int) E[3 * em + j] - 1];\r
+ x[3][2] = C[2 * cm + (int) E[3 * em + j] - 1];\r
+\r
+ // Seitenvektoren aufbauen\r
+ sub(xa, x[1], x[0]);\r
+ sub(xb, x[3], x[0]);\r
+\r
+ // Lageeigenschaften des Flaechenstuecks\r
+ rxa = dimOfVec(xa);\r
+ rxb = dimOfVec(xb);\r
+ rx = dimOfThird(rxa, rxb);\r
+\r
+ //kleinste Ecke finden und fuer \delta verwenden\r
+ getSCorner(dt, x);\r
+\r
+ y[0][0] = C[(int) E[k] - 1];\r
+ y[0][1] = C[cm + (int) E[k] - 1];\r
+ y[0][2] = C[2 * cm + (int) E[k] - 1];\r
+\r
+ y[1][0] = C[(int) E[em + k] - 1];\r
+ y[1][1] = C[cm + (int) E[em + k] - 1];\r
+ y[1][2] = C[2 * cm + (int) E[em + k] - 1];\r
+\r
+ y[2][0] = C[(int) E[2 * em + k] - 1];\r
+ y[2][1] = C[cm + (int) E[2 * em + k] - 1];\r
+ y[2][2] = C[2 * cm + (int) E[2 * em + k] - 1];\r
+\r
+ y[3][0] = C[(int) E[3 * em + k] - 1];\r
+ y[3][1] = C[cm + (int) E[3 * em + k] - 1];\r
+ y[3][2] = C[2 * cm + (int) E[3 * em + k] - 1];\r
+\r
+ // Seiten Vektoren aufbauen\r
+ sub(ya, y[1], y[0]);\r
+ sub(yb, y[3], y[0]);\r
+\r
+ // Lageeigenschaften des Flaechenstuecks\r
+ rya = dimOfVec(ya);\r
+ ryb = dimOfVec(yb);\r
+ ry = dimOfThird(rya, ryb);\r
+\r
+ //kleinste Ecke finden\r
+ getSCorner(d, y);\r
+\r
+ //\delta berechnen\r
+ sub(d, dt);\r
+#if DEBUG >= 5\r
+#pragma omp critical\r
+ mexPrintf("[%d %d](%f,%f)(%f,%f)+(%f,%f,%f)\n", j, k,\r
+ fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]),\r
+ fabs(yb[rxb]), d[rxa], d[rxb], d[rx]);\r
+#pragma omp end critical\r
+#endif\r
+ if (rx == ry) {\r
+ if (rxa == rya) {\r
+ tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
+ fabs(ya[rxa]), fabs(yb[rxb]), d[rxa],\r
+ d[rxb], d[rx], zeta);\r
+\r
+ } else {\r
+ tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
+ fabs(yb[rxa]), fabs(ya[rxb]), d[rxa],\r
+ d[rxb], d[rx], zeta);\r
+ }\r
+\r
+ } else {\r
+\r
+ if (rxa == rya) {\r
+ tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
+ fabs(ya[rya]), fabs(yb[ryb]), d[rxb],\r
+ d[rxa], d[rx], zeta);\r
+ } else if (rxa == ryb) {\r
+ tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
+ fabs(yb[ryb]), fabs(ya[rya]), d[rxb],\r
+ d[rxa], d[rx], zeta);\r
+ } else if (rxb == rya) {\r
+ tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
+ fabs(ya[rya]), fabs(yb[ryb]), d[rxa],\r
+ d[rxb], d[rx], zeta);\r
+ } else {\r
+ tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
+ fabs(yb[ryb]), fabs(ya[rya]), d[rxa],\r
+ d[rxb], d[rx], zeta);\r
+ }\r
+\r
+ }\r
+\r
+#pragma omp critical\r
+\r
+ A[(k * em) + j] = 1. / (4 * M_PI) * tmp;\r
+ if (k != j)\r
+ A[(j * em) + k] = 1. / (4 * M_PI) * tmp;\r
+\r
+#pragma omp end critical\r
+\r
+ }\r
+ }\r
}\r
}\r
- }\r
-\r
#ifdef PARALLEL\r
-}\r
+ }\r
#endif\r
\r
return;\r