*\r
* Peter Schaefer\r
*/\r
+\r
+//#define DEBUG\r
+#define PARALLEL\r
+\r
//#include <iostream>\r
#include <cmath>\r
//#include <cassert>\r
}\r
}\r
\r
+double inline distT(double b, double d, double t, double v, double d1,\r
+ double d2, double d3, double * dummy) {\r
+ double dis1 = 0, dis2 = 0;\r
+ if (d1 < 0)\r
+ dis1 = -d1 - t;\r
+ else\r
+ dis1 = d1 - b;\r
+ if (dis1 < 0)\r
+ dis1 = 0;\r
+\r
+ if (d2 < 0)\r
+ dis2 = -d2 - v;\r
+ else\r
+ dis2 = d2 - d;\r
+ if (dis2 < 0)\r
+ dis2 = 0;\r
+\r
+// if(d3!=0)\r
+// mexErrMsgTxt("expected d3 == 0");\r
+\r
+// return dis1 * dis1 + dis2 * dis2 + d3 * d3;\r
+ return (dis1 * dis1 + dis2 * dis2 + d3 * d3) * (4 * M_PI);\r
+}\r
+\r
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {\r
/*\r
mexPrintf("%d",omp_get_max_threads());\r
mexPrintf("%d",omp_get_num_threads());\r
*/\r
\r
- int j, k; //Schleifenindizes\r
+ int i, j, k; //Schleifenindizes\r
int count;\r
- double tmp;\r
-\r
- double x[4][3] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };\r
- double y[4][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };\r
- double xa[3] = { 0, 0, 0 };\r
- double xb[3] = { 0, 0, 0 };\r
- double ya[3] = { 0, 0, 0 };\r
- double yb[3] = { 0, 0, 0 };\r
- double d[3] = { 0, 0, 0 };\r
- double dt[3] = { 0, 0, 0 };\r
\r
//Sicherheitsabfragen zu Datengroessen\r
if ((nrhs != 4))\r
ctypeP = cParO4;\r
ctypeO = cOrtO1;\r
break;\r
+ case 0:\r
+ ctypeP = distT;\r
+ ctypeO = distT;\r
+ break;\r
}\r
\r
count = 0;\r
- omp_set_dynamic(1);\r
+// omp_set_dynamic(1);\r
\r
//LageInformationen\r
int rx, rxa, rxb, ry, rya, ryb;\r
+ double tmp;\r
+ double x[4][3] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };\r
+ double y[4][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };\r
+ double xa[3] = { 0, 0, 0 };\r
+ double xb[3] = { 0, 0, 0 };\r
+ double ya[3] = { 0, 0, 0 };\r
+ double yb[3] = { 0, 0, 0 };\r
+ double d[3] = { 0, 0, 0 };\r
+ double dt[3] = { 0, 0, 0 };\r
+\r
+#define V_MINROW_PER_WORKER 3\r
+\r
+ int actualNumberOfThreads = omp_get_max_threads();\r
+ int firstRow = 0, lastRow = -1;\r
+ int targetSize = em * (em + 1) / (2 * actualNumberOfThreads);\r
+\r
+ if (targetSize < V_MINROW_PER_WORKER) {\r
+ actualNumberOfThreads = (int) em * (em + 1) / (2 * V_MINROW_PER_WORKER);\r
+ if (actualNumberOfThreads < 1) {\r
+ actualNumberOfThreads = 1;\r
+ targetSize = em;\r
+ } else {\r
+ targetSize = V_MINROW_PER_WORKER;\r
+ }\r
+ }\r
+#ifdef DEBUG\r
+ mexPrintf("matrixSize (em) : %d\n"\r
+ "targetSize : %d\n"\r
+ "actualNumberOfThreads: %d\n", em, targetSize,\r
+ actualNumberOfThreads);\r
+#endif\r
\r
//Ausrechnen\r
-#pragma omp parallel private(j,k,tmp,x,y,xa,xb,ya,yb,d,dt,rx, rxa, rxb, ry, rya, ryb) shared(C,E,ctypeO,ctypeP)\r
+#ifndef DEBUG\r
+#ifdef PARALLEL\r
+#pragma omp parallel private(i,j,k,tmp,x,y,xa,xb,ya,yb,d,dt,rx, rxa, rxb, ry, rya, ryb,lastRow,firstRow) shared(C,E,ctypeO,ctypeP,em,targetSize,actualNumberOfThreads)\r
{\r
#pragma omp parallel for\r
- for (j = 0; j < em; ++j) {\r
- x[0][0] = C[(int) E[j] - 1];\r
- x[0][1] = C[cm + (int) E[j] - 1];\r
- x[0][2] = C[2 * cm + (int) E[j] - 1];\r
+#endif\r
+#endif\r
+ for (i = 0; i < actualNumberOfThreads; ++i) {\r
+ firstRow = (int) sqrt(2*i*targetSize);\r
+ if (i == actualNumberOfThreads - 1) {\r
+ lastRow = em - 1;\r
+ } else {\r
+ lastRow = (int) sqrt(2*(i+1)*targetSize)-1;\r
+ }\r
+#ifdef DEBUG\r
+ mexPrintf("thread Nr. : %d\n"\r
+ "firstRow : %d\n"\r
+ "lastRow : %d\n", i, firstRow, lastRow);\r
+#endif\r
\r
- x[1][0] = C[(int) E[em + j] - 1];\r
- x[1][1] = C[cm + (int) E[em + j] - 1];\r
- x[1][2] = C[2 * cm + (int) E[em + j] - 1];\r
+ for (j = firstRow; j <= lastRow; ++j) {\r
+ for (k = 0; k <= j; ++k) {\r
+ x[0][0] = C[(int) E[j] - 1];\r
+ x[0][1] = C[cm + (int) E[j] - 1];\r
+ x[0][2] = C[2 * cm + (int) E[j] - 1];\r
\r
- x[2][0] = C[(int) E[2 * em + j] - 1];\r
- x[2][1] = C[cm + (int) E[2 * em + j] - 1];\r
- x[2][2] = C[2 * cm + (int) E[2 * em + j] - 1];\r
+ x[1][0] = C[(int) E[em + j] - 1];\r
+ x[1][1] = C[cm + (int) E[em + j] - 1];\r
+ x[1][2] = C[2 * cm + (int) E[em + j] - 1];\r
\r
- x[3][0] = C[(int) E[3 * em + j] - 1];\r
- x[3][1] = C[cm + (int) E[3 * em + j] - 1];\r
- x[3][2] = C[2 * cm + (int) E[3 * em + j] - 1];\r
+ x[2][0] = C[(int) E[2 * em + j] - 1];\r
+ x[2][1] = C[cm + (int) E[2 * em + j] - 1];\r
+ x[2][2] = C[2 * cm + (int) E[2 * em + j] - 1];\r
\r
- // Seitenvektoren aufbauen\r
- sub(xa, x[1], x[0]);\r
- sub(xb, x[3], x[0]);\r
+ x[3][0] = C[(int) E[3 * em + j] - 1];\r
+ x[3][1] = C[cm + (int) E[3 * em + j] - 1];\r
+ x[3][2] = C[2 * cm + (int) E[3 * em + j] - 1];\r
\r
- // Lageeigenschaften des Flaechenstuecks\r
- rxa = dimOfVec(xa);\r
- rxb = dimOfVec(xb);\r
- rx = dimOfThird(rxa, rxb);\r
+ // Seitenvektoren aufbauen\r
+ sub(xa, x[1], x[0]);\r
+ sub(xb, x[3], x[0]);\r
\r
- //kleinste Ecke finden und fuer \delta verwenden\r
- getSCorner(dt, x);\r
+ // Lageeigenschaften des Flaechenstuecks\r
+ rxa = dimOfVec(xa);\r
+ rxb = dimOfVec(xb);\r
+ rx = dimOfThird(rxa, rxb);\r
\r
- for (k = 0; k <= j; ++k) {\r
+ //kleinste Ecke finden und fuer \delta verwenden\r
+ getSCorner(dt, x);\r
\r
y[0][0] = C[(int) E[k] - 1];\r
y[0][1] = C[cm + (int) E[k] - 1];\r
\r
//\delta berechnen\r
sub(d, dt);\r
-\r
- // mexPrintf("(%f,%f)(%f,%f)+(%f,%f,%f)\n",fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]),\r
- // fabs(yb[rxb]), d[rxa], d[rxb], d[rx]);\r
+#ifdef DEBUG\r
+ mexPrintf("[%d %d](%f,%f)(%f,%f)+(%f,%f,%f)\n", j, k,\r
+ fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]),\r
+ fabs(yb[rxb]), d[rxa], d[rxb], d[rx]);\r
+#endif\r
if (rx == ry) {\r
if (rxa == rya) {\r
- tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rxa]),\r
- fabs(yb[rxb]), d[rxa], d[rxb], d[rx], zeta);\r
+ tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
+ fabs(ya[rxa]), fabs(yb[rxb]), d[rxa], d[rxb],\r
+ d[rx], zeta);\r
\r
} else {\r
- tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]), fabs(yb[rxa]),\r
- fabs(ya[rxb]), d[rxa], d[rxb], d[rx], zeta);\r
+ tmp = ctypeP(fabs(xa[rxa]), fabs(xb[rxb]),\r
+ fabs(yb[rxa]), fabs(ya[rxb]), d[rxa], d[rxb],\r
+ d[rx], zeta);\r
}\r
\r
} else {\r
\r
if (rxa == rya) {\r
- tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]), fabs(ya[rya]),\r
- fabs(yb[ryb]), d[rxb], d[rxa], d[rx], zeta);\r
+ tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
+ fabs(ya[rya]), fabs(yb[ryb]), d[rxb], d[rxa],\r
+ d[rx], zeta);\r
} else if (rxa == ryb) {\r
- tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]), fabs(yb[ryb]),\r
- fabs(ya[rya]), d[rxb], d[rxa], d[rx], zeta);\r
+ tmp = ctypeO(fabs(xb[rxb]), fabs(xa[rxa]),\r
+ fabs(yb[ryb]), fabs(ya[rya]), d[rxb], d[rxa],\r
+ d[rx], zeta);\r
} else if (rxb == rya) {\r
- tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]), fabs(ya[rya]),\r
- fabs(yb[ryb]), d[rxa], d[rxb], d[rx], zeta);\r
+ tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
+ fabs(ya[rya]), fabs(yb[ryb]), d[rxa], d[rxb],\r
+ d[rx], zeta);\r
} else {\r
- tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]), fabs(yb[ryb]),\r
- fabs(ya[rya]), d[rxa], d[rxb], d[rx], zeta);\r
+ tmp = ctypeO(fabs(xa[rxa]), fabs(xb[rxb]),\r
+ fabs(yb[ryb]), fabs(ya[rya]), d[rxa], d[rxb],\r
+ d[rx], zeta);\r
}\r
\r
}\r
if (k != j)\r
A[(j * em) + k] = 1. / (4 * M_PI) * tmp;\r
}\r
-\r
}\r
}\r
+#ifndef DEBUG\r
+#ifdef PARALLEL\r
+}\r
+#endif\r
+#endif\r
return;\r
}\r