Codebase list getfem++ / f18aaf2
fix blas and lapack interface Closes: #943943. Konstantinos Poulios 4 years ago
3 changed file(s) with 288 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
11
22 * Team upload.
33 * Remove redundant dependencies on muparser and several boost packages.
4 * Fix blas and lapack interface. Closes #943943.
45
56 -- Konstantinos Poulios <poulios.konstantinos@gmail.com> Wed, 25 Dec 2019 19:08:00 +0100
67
0 --- a/src/gmm/gmm_blas_interface.h
1 +++ b/src/gmm/gmm_blas_interface.h
2 @@ -52,6 +52,12 @@
3 #define GMMLAPACK_TRACE(f)
4 // #define GMMLAPACK_TRACE(f) cout << "function " << f << " called" << endl;
5
6 +#ifdef WeirdNEC // Rarely defined parameter (name taken from cblas_f77.h)
7 + #define F77_INT long
8 +#else // By default F77_INT will just be int in C
9 + #define F77_INT int
10 +#endif
11 +
12 /* ********************************************************************* */
13 /* Operations interfaced for T = float, double, std::complex<float> */
14 /* or std::complex<double> : */
15 @@ -151,13 +157,17 @@
16 /* BLAS functions used. */
17 /* ********************************************************************* */
18 extern "C" {
19 - void daxpy_(const long *n, const double *alpha, const double *x,
20 - const long *incx, double *y, const long *incy);
21 - void dgemm_(const char *tA, const char *tB, const long *m,
22 - const long *n, const long *k, const double *alpha,
23 - const double *A, const long *ldA, const double *B,
24 - const long *ldB, const double *beta, double *C,
25 - const long *ldC);
26 + void daxpy_(const F77_INT *n,
27 + const double *alpha,
28 + const double *x, const F77_INT *incx,
29 + double *y, const F77_INT *incy);
30 + void dgemm_(const char *tA, const char *tB,
31 + const F77_INT *m, const F77_INT *n, const F77_INT *k,
32 + const double *alpha,
33 + const double *A, const F77_INT *ldA,
34 + const double *B, const F77_INT *ldB,
35 + const double *beta,
36 + double *C, const F77_INT *ldC);
37 void sgemm_(...); void cgemm_(...); void zgemm_(...);
38 void sgemv_(...); void dgemv_(...); void cgemv_(...); void zgemv_(...);
39 void strsv_(...); void dtrsv_(...); void ctrsv_(...); void ztrsv_(...);
40 @@ -180,7 +190,8 @@
41 inline number_traits<base_type >::magnitude_type \
42 vect_norm2(param1(base_type)) { \
43 GMMLAPACK_TRACE("nrm2_interface"); \
44 - long inc(1), n(long(vect_size(x))); trans1(base_type); \
45 + F77_INT inc(1), n(F77_INT(vect_size(x))); \
46 + trans1(base_type); \
47 return blas_name(&n, &x[0], &inc); \
48 }
49
50 @@ -200,7 +211,8 @@
51 blas_name, base_type) \
52 inline base_type vect_sp(param1(base_type), param2(base_type)) { \
53 GMMLAPACK_TRACE("dot_interface"); \
54 - trans1(base_type); trans2(base_type); long inc(1), n(long(vect_size(y)));\
55 + trans1(base_type); trans2(base_type); \
56 + F77_INT inc(1), n(F77_INT(vect_size(y))); \
57 return mult1 mult2 blas_name(&n, &x[0], &inc, &y[0], &inc); \
58 }
59
60 @@ -267,7 +279,8 @@
61 blas_name, base_type) \
62 inline base_type vect_hp(param1(base_type), param2(base_type)) { \
63 GMMLAPACK_TRACE("dotc_interface"); \
64 - trans1(base_type); trans2(base_type); long inc(1), n(long(vect_size(y)));\
65 + trans1(base_type); trans2(base_type); \
66 + F77_INT inc(1), n(F77_INT(vect_size(y))); \
67 return mult1 mult2 blas_name(&n, &x[0], &inc, &y[0], &inc); \
68 }
69
70 @@ -332,7 +345,8 @@
71 # define axpy_interface(param1, trans1, blas_name, base_type) \
72 inline void add(param1(base_type), std::vector<base_type > &y) { \
73 GMMLAPACK_TRACE("axpy_interface"); \
74 - long inc(1), n(long(vect_size(y))); trans1(base_type); \
75 + F77_INT inc(1), n(F77_INT(vect_size(y))); \
76 + trans1(base_type); \
77 if (n == 0) return; \
78 blas_name(&n, &a, &x[0], &inc, &y[0], &inc); \
79 }
80 @@ -367,7 +381,8 @@
81 std::vector<base_type > &z, orien) { \
82 GMMLAPACK_TRACE("gemv_interface"); \
83 trans1(base_type); trans2(base_type); base_type beta(1); \
84 - long m(long(mat_nrows(A))), lda(m), n(long(mat_ncols(A))), inc(1); \
85 + F77_INT m(F77_INT(mat_nrows(A))), lda(m); \
86 + F77_INT n(F77_INT(mat_ncols(A))), inc(1); \
87 if (m && n) blas_name(&t, &m, &n, &alpha, &A(0,0), &lda, &x[0], &inc, \
88 &beta, &z[0], &inc); \
89 else gmm::clear(z); \
90 @@ -489,7 +504,8 @@
91 std::vector<base_type > &z, orien) { \
92 GMMLAPACK_TRACE("gemv_interface2"); \
93 trans1(base_type); trans2(base_type); base_type beta(0); \
94 - long m(long(mat_nrows(A))), lda(m), n(long(mat_ncols(A))), inc(1); \
95 + F77_INT m(F77_INT(mat_nrows(A))), lda(m); \
96 + F77_INT n(F77_INT(mat_ncols(A))), inc(1); \
97 if (m && n) \
98 blas_name(&t, &m, &n, &alpha, &A(0,0), &lda, &x[0], &inc, &beta, \
99 &z[0], &inc); \
100 @@ -586,8 +602,8 @@
101 const std::vector<base_type > &V, \
102 const std::vector<base_type > &W) { \
103 GMMLAPACK_TRACE("ger_interface"); \
104 - long m(long(mat_nrows(A))), lda = m, n(long(mat_ncols(A))); \
105 - long incx = 1, incy = 1; \
106 + F77_INT m(F77_INT(mat_nrows(A))), lda = m; \
107 + F77_INT n(F77_INT(mat_ncols(A))), incx = 1, incy = 1; \
108 base_type alpha(1); \
109 if (m && n) \
110 blas_name(&m, &n, &alpha, &V[0], &incx, &W[0], &incy, &A(0,0), &lda);\
111 @@ -604,8 +620,8 @@
112 const std::vector<base_type > &W) { \
113 GMMLAPACK_TRACE("ger_interface"); \
114 gemv_trans2_s(base_type); \
115 - long m(long(mat_nrows(A))), lda = m, n(long(mat_ncols(A))); \
116 - long incx = 1, incy = 1; \
117 + F77_INT m(F77_INT(mat_nrows(A))), lda = m; \
118 + F77_INT n(F77_INT(mat_ncols(A))), incx = 1, incy = 1; \
119 if (m && n) \
120 blas_name(&m, &n, &alpha, &x[0], &incx, &W[0], &incy, &A(0,0), &lda);\
121 }
122 @@ -621,8 +637,8 @@
123 gemv_p2_s(base_type)) { \
124 GMMLAPACK_TRACE("ger_interface"); \
125 gemv_trans2_s(base_type); \
126 - long m(long(mat_nrows(A))), lda = m, n(long(mat_ncols(A))); \
127 - long incx = 1, incy = 1; \
128 + F77_INT m(F77_INT(mat_nrows(A))), lda = m; \
129 + F77_INT n(F77_INT(mat_ncols(A))), incx = 1, incy = 1; \
130 base_type al2 = gmm::conj(alpha); \
131 if (m && n) \
132 blas_name(&m, &n, &al2, &V[0], &incx, &x[0], &incy, &A(0,0), &lda); \
133 @@ -643,9 +659,8 @@
134 dense_matrix<base_type > &C, c_mult) { \
135 GMMLAPACK_TRACE("gemm_interface_nn"); \
136 const char t = 'N'; \
137 - long m(long(mat_nrows(A))), lda = m, k(long(mat_ncols(A))); \
138 - long n(long(mat_ncols(B))); \
139 - long ldb = k, ldc = m; \
140 + F77_INT m(F77_INT(mat_nrows(A))), lda = m, k(F77_INT(mat_ncols(A))); \
141 + F77_INT n(F77_INT(mat_ncols(B))), ldb = k, ldc = m; \
142 base_type alpha(1), beta(0); \
143 if (m && k && n) \
144 blas_name(&t, &t, &m, &n, &k, &alpha, \
145 @@ -671,8 +686,8 @@
146 dense_matrix<base_type > &A \
147 = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_))); \
148 const char t = 'T', u = 'N'; \
149 - long m(long(mat_ncols(A))), k(long(mat_nrows(A))), n(long(mat_ncols(B))); \
150 - long lda = k, ldb = k, ldc = m; \
151 + F77_INT m(F77_INT(mat_ncols(A))), k(F77_INT(mat_nrows(A))); \
152 + F77_INT n(F77_INT(mat_ncols(B))), lda = k, ldb = k, ldc = m; \
153 base_type alpha(1), beta(0); \
154 if (m && k && n) \
155 blas_name(&t, &u, &m, &n, &k, &alpha, \
156 @@ -701,9 +716,8 @@
157 dense_matrix<base_type > &B \
158 = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_))); \
159 const char t = 'N', u = 'T'; \
160 - long m(long(mat_nrows(A))), lda = m, k(long(mat_ncols(A))); \
161 - long n(long(mat_nrows(B))); \
162 - long ldb = n, ldc = m; \
163 + F77_INT m(F77_INT(mat_nrows(A))), lda = m, k(F77_INT(mat_ncols(A))); \
164 + F77_INT n(F77_INT(mat_nrows(B))), ldb = n, ldc = m; \
165 base_type alpha(1), beta(0); \
166 if (m && k && n) \
167 blas_name(&t, &u, &m, &n, &k, &alpha, \
168 @@ -735,8 +749,8 @@
169 dense_matrix<base_type > &B \
170 = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_))); \
171 const char t = 'T', u = 'T'; \
172 - long m(long(mat_ncols(A))), k(long(mat_nrows(A))), n(long(mat_nrows(B))); \
173 - long lda = k, ldb = n, ldc = m; \
174 + F77_INT m(F77_INT(mat_ncols(A))), k(F77_INT(mat_nrows(A))); \
175 + F77_INT n(F77_INT(mat_nrows(B))), lda = k, ldb = n, ldc = m; \
176 base_type alpha(1), beta(0); \
177 if (m && k && n) \
178 blas_name(&t, &u, &m, &n, &k, &alpha, \
179 @@ -775,8 +789,8 @@
180 dense_matrix<base_type > &A \
181 = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_))); \
182 const char t = 'C', u = 'N'; \
183 - long m(long(mat_ncols(A))), k(long(mat_nrows(A))), n(long(mat_ncols(B))); \
184 - long lda = k, ldb = k, ldc = m; \
185 + F77_INT m(F77_INT(mat_ncols(A))), k(F77_INT(mat_nrows(A))); \
186 + F77_INT n(F77_INT(mat_ncols(B))), lda = k, ldb = k, ldc = m; \
187 base_type alpha(1), beta(0); \
188 if (m && k && n) \
189 blas_name(&t, &u, &m, &n, &k, &alpha, \
190 @@ -801,8 +815,8 @@
191 dense_matrix<base_type > &B \
192 = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_))); \
193 const char t = 'N', u = 'C'; \
194 - long m(long(mat_nrows(A))), lda = m, k(long(mat_ncols(A))); \
195 - long n(long(mat_nrows(B))), ldb = n, ldc = m; \
196 + F77_INT m(F77_INT(mat_nrows(A))), lda = m, k(F77_INT(mat_ncols(A))); \
197 + F77_INT n(F77_INT(mat_nrows(B))), ldb = n, ldc = m; \
198 base_type alpha(1), beta(0); \
199 if (m && k && n) \
200 blas_name(&t, &u, &m, &n, &k, &alpha, \
201 @@ -830,8 +844,8 @@
202 dense_matrix<base_type > &B \
203 = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_))); \
204 const char t = 'C', u = 'C'; \
205 - long m(long(mat_ncols(A))), k(long(mat_nrows(A))), lda = k; \
206 - long n(long(mat_nrows(B))), ldb = n, ldc = m; \
207 + F77_INT m(F77_INT(mat_ncols(A))), k(F77_INT(mat_nrows(A))), lda = k; \
208 + F77_INT n(F77_INT(mat_nrows(B))), ldb = n, ldc = m; \
209 base_type alpha(1), beta(0); \
210 if (m && k && n) \
211 blas_name(&t, &u, &m, &n, &k, &alpha, \
212 @@ -853,7 +867,7 @@
213 size_type k, bool is_unit) { \
214 GMMLAPACK_TRACE("trsv_interface"); \
215 loru; trans1(base_type); char d = is_unit ? 'U' : 'N'; \
216 - long lda(long(mat_nrows(A))), inc(1), n = long(k); \
217 + F77_INT lda(F77_INT(mat_nrows(A))), inc(1), n = F77_INT(k); \
218 if (lda) blas_name(&l, &t, &d, &n, &A(0,0), &lda, &x[0], &inc); \
219 }
220
221 --- a/src/bgeot_sparse_tensors.cc
222 +++ b/src/bgeot_sparse_tensors.cc
223 @@ -907,10 +907,10 @@
224 }
225
226 static bool do_reduction2v(bgeot::multi_tensor_iterator &mti) {
227 - long n = mti.vectorized_size();
228 + F77_INT n = F77_INT(mti.vectorized_size());
229 const std::vector<stride_type> &s = mti.vectorized_strides();
230 if (n && s[0] && s[1] && s[2] == 0) {
231 - long incx = s[1], incy = s[0];
232 + F77_INT incx = F77_INT(s[1]), incy = F77_INT(s[0]);
233 /*mti.print();
234 scalar_type *b[3];
235 for (int i=0; i < 3; ++i) b[i] = &mti.p(i);*/
236 @@ -945,10 +945,10 @@
237 }
238
239 static bool do_reduction3v(bgeot::multi_tensor_iterator &mti) {
240 - long n = mti.vectorized_size();
241 + F77_INT n = F77_INT(mti.vectorized_size());
242 const std::vector<stride_type> &s = mti.vectorized_strides();
243 if (n && s[0] && s[1] && s[2] == 0 && s[3] == 0) {
244 - long incx = s[1], incy = s[0];
245 + F77_INT incx = F77_INT(s[1]), incy = F77_INT(s[0]);
246 do {
247 double v = mti.p(2)*mti.p(3);
248 gmm::daxpy_(&n, &v, &mti.p(1), &incx, &mti.p(0), &incy);
249 --- a/src/getfem_assembling_tensors.cc
250 +++ b/src/getfem_assembling_tensors.cc
251 @@ -354,8 +354,8 @@
252 tensor_bases[k] = const_cast<TDIter>(&(*eltm[k]->begin()));
253 }
254 red.do_reduction();
255 - long one = 1, n = int(red.out_data.size()); assert(n);
256 - gmm::daxpy_(&n, &c, const_cast<double*>(&(red.out_data[0])),
257 + F77_INT one = 1, n = F77_INT(red.out_data.size()); assert(n);
258 + gmm::daxpy_(&n, &c, const_cast<double*>(&(red.out_data[0])),
259 &one, (double*)&(t[0]), &one);
260 }
261 void resize_t(bgeot::base_tensor &t) {
262 --- a/src/getfem_mat_elem.cc
263 +++ b/src/getfem_mat_elem.cc
264 @@ -381,18 +381,18 @@
265 if (nm == 0) {
266 t[0] += J;
267 } else {
268 - long n0 = int(es_end[0] - es_beg[0]);
269 + F77_INT n0 = F77_INT(es_end[0] - es_beg[0]);
270 base_tensor::const_iterator pts0 = pts[0];
271
272 /* very heavy reduction .. takes much time */
273 k = nm-1; Vtab[k] = J;
274 - long one = 1;
275 + F77_INT one = 1;
276 scalar_type V;
277 do {
278 for (V = Vtab[k]; k; --k)
279 Vtab[k-1] = V = *pts[k] * V;
280 GMM_ASSERT1(pt+n0 <= t.end(), "Internal error");
281 - gmm::daxpy_(&n0, &V, const_cast<double*>(&(pts0[0])), &one,
282 + gmm::daxpy_(&n0, &V, const_cast<double*>(&(pts0[0])), &one,
283 (double*)&(*pt), &one);
284 pt+=n0;
285 for (k=1; k != nm && ++pts[k] == es_end[k]; ++k)
66 fix_python3_tests_4fe9963c.patch
77 fix_python3_tests_8dcd729f.patch
88 scripts_python3.patch
9 fix_blas_interface.patch