ergo
template_blas_gemm.h
Go to the documentation of this file.
1 /* Ergo, version 3.4, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2014 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_BLAS_GEMM_HEADER
36 #define TEMPLATE_BLAS_GEMM_HEADER
37 
38 #include "template_blas_common.h"
39 
40 template<class Treal>
41 int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer *
42  n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda,
43  const Treal *b, const integer *ldb, const Treal *beta, Treal *c__,
44  const integer *ldc)
45 {
46  /* System generated locals */
47  integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
48  i__3;
49  /* Local variables */
50  integer info;
51  logical nota, notb;
52  Treal temp;
53  integer i__, j, l;
54  integer nrowa, nrowb;
55 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
56 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
57 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
58 /* Purpose
59  =======
60  DGEMM performs one of the matrix-matrix operations
61  C := alpha*op( A )*op( B ) + beta*C,
62  where op( X ) is one of
63  op( X ) = X or op( X ) = X',
64  alpha and beta are scalars, and A, B and C are matrices, with op( A )
65  an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
66  Parameters
67  ==========
68  TRANSA - CHARACTER*1.
69  On entry, TRANSA specifies the form of op( A ) to be used in
70  the matrix multiplication as follows:
71  TRANSA = 'N' or 'n', op( A ) = A.
72  TRANSA = 'T' or 't', op( A ) = A'.
73  TRANSA = 'C' or 'c', op( A ) = A'.
74  Unchanged on exit.
75  TRANSB - CHARACTER*1.
76  On entry, TRANSB specifies the form of op( B ) to be used in
77  the matrix multiplication as follows:
78  TRANSB = 'N' or 'n', op( B ) = B.
79  TRANSB = 'T' or 't', op( B ) = B'.
80  TRANSB = 'C' or 'c', op( B ) = B'.
81  Unchanged on exit.
82  M - INTEGER.
83  On entry, M specifies the number of rows of the matrix
84  op( A ) and of the matrix C. M must be at least zero.
85  Unchanged on exit.
86  N - INTEGER.
87  On entry, N specifies the number of columns of the matrix
88  op( B ) and the number of columns of the matrix C. N must be
89  at least zero.
90  Unchanged on exit.
91  K - INTEGER.
92  On entry, K specifies the number of columns of the matrix
93  op( A ) and the number of rows of the matrix op( B ). K must
94  be at least zero.
95  Unchanged on exit.
96  ALPHA - DOUBLE PRECISION.
97  On entry, ALPHA specifies the scalar alpha.
98  Unchanged on exit.
99  A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
100  k when TRANSA = 'N' or 'n', and is m otherwise.
101  Before entry with TRANSA = 'N' or 'n', the leading m by k
102  part of the array A must contain the matrix A, otherwise
103  the leading k by m part of the array A must contain the
104  matrix A.
105  Unchanged on exit.
106  LDA - INTEGER.
107  On entry, LDA specifies the first dimension of A as declared
108  in the calling (sub) program. When TRANSA = 'N' or 'n' then
109  LDA must be at least max( 1, m ), otherwise LDA must be at
110  least max( 1, k ).
111  Unchanged on exit.
112  B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
113  n when TRANSB = 'N' or 'n', and is k otherwise.
114  Before entry with TRANSB = 'N' or 'n', the leading k by n
115  part of the array B must contain the matrix B, otherwise
116  the leading n by k part of the array B must contain the
117  matrix B.
118  Unchanged on exit.
119  LDB - INTEGER.
120  On entry, LDB specifies the first dimension of B as declared
121  in the calling (sub) program. When TRANSB = 'N' or 'n' then
122  LDB must be at least max( 1, k ), otherwise LDB must be at
123  least max( 1, n ).
124  Unchanged on exit.
125  BETA - DOUBLE PRECISION.
126  On entry, BETA specifies the scalar beta. When BETA is
127  supplied as zero then C need not be set on input.
128  Unchanged on exit.
129  C - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
130  Before entry, the leading m by n part of the array C must
131  contain the matrix C, except when beta is zero, in which
132  case C need not be set on entry.
133  On exit, the array C is overwritten by the m by n matrix
134  ( alpha*op( A )*op( B ) + beta*C ).
135  LDC - INTEGER.
136  On entry, LDC specifies the first dimension of C as declared
137  in the calling (sub) program. LDC must be at least
138  max( 1, m ).
139  Unchanged on exit.
140  Level 3 Blas routine.
141  -- Written on 8-February-1989.
142  Jack Dongarra, Argonne National Laboratory.
143  Iain Duff, AERE Harwell.
144  Jeremy Du Croz, Numerical Algorithms Group Ltd.
145  Sven Hammarling, Numerical Algorithms Group Ltd.
146  Set NOTA and NOTB as true if A and B respectively are not
147  transposed and set NROWA, NCOLA and NROWB as the number of rows
148  and columns of A and the number of rows of B respectively.
149  Parameter adjustments */
150  a_dim1 = *lda;
151  a_offset = 1 + a_dim1 * 1;
152  a -= a_offset;
153  b_dim1 = *ldb;
154  b_offset = 1 + b_dim1 * 1;
155  b -= b_offset;
156  c_dim1 = *ldc;
157  c_offset = 1 + c_dim1 * 1;
158  c__ -= c_offset;
159  /* Function Body */
160  nota = template_blas_lsame(transa, "N");
161  notb = template_blas_lsame(transb, "N");
162  if (nota) {
163  nrowa = *m;
164  } else {
165  nrowa = *k;
166  }
167  if (notb) {
168  nrowb = *k;
169  } else {
170  nrowb = *n;
171  }
172 /* Test the input parameters. */
173  info = 0;
174  if (! nota && ! template_blas_lsame(transa, "C") && ! template_blas_lsame(
175  transa, "T")) {
176  info = 1;
177  } else if (! notb && ! template_blas_lsame(transb, "C") && !
178  template_blas_lsame(transb, "T")) {
179  info = 2;
180  } else if (*m < 0) {
181  info = 3;
182  } else if (*n < 0) {
183  info = 4;
184  } else if (*k < 0) {
185  info = 5;
186  } else if (*lda < maxMACRO(1,nrowa)) {
187  info = 8;
188  } else if (*ldb < maxMACRO(1,nrowb)) {
189  info = 10;
190  } else if (*ldc < maxMACRO(1,*m)) {
191  info = 13;
192  }
193  if (info != 0) {
194  template_blas_erbla("DGEMM ", &info);
195  return 0;
196  }
197 /* Quick return if possible. */
198  if (*m == 0 || *n == 0 || ( (*alpha == 0. || *k == 0) && *beta == 1.) ) {
199  return 0;
200  }
201 /* And if alpha.eq.zero. */
202  if (*alpha == 0.) {
203  if (*beta == 0.) {
204  i__1 = *n;
205  for (j = 1; j <= i__1; ++j) {
206  i__2 = *m;
207  for (i__ = 1; i__ <= i__2; ++i__) {
208  c___ref(i__, j) = 0.;
209 /* L10: */
210  }
211 /* L20: */
212  }
213  } else {
214  i__1 = *n;
215  for (j = 1; j <= i__1; ++j) {
216  i__2 = *m;
217  for (i__ = 1; i__ <= i__2; ++i__) {
218  c___ref(i__, j) = *beta * c___ref(i__, j);
219 /* L30: */
220  }
221 /* L40: */
222  }
223  }
224  return 0;
225  }
226 /* Start the operations. */
227  if (notb) {
228  if (nota) {
229 /* Form C := alpha*A*B + beta*C. */
230  i__1 = *n;
231  for (j = 1; j <= i__1; ++j) {
232  if (*beta == 0.) {
233  i__2 = *m;
234  for (i__ = 1; i__ <= i__2; ++i__) {
235  c___ref(i__, j) = 0.;
236 /* L50: */
237  }
238  } else if (*beta != 1.) {
239  i__2 = *m;
240  for (i__ = 1; i__ <= i__2; ++i__) {
241  c___ref(i__, j) = *beta * c___ref(i__, j);
242 /* L60: */
243  }
244  }
245  i__2 = *k;
246  for (l = 1; l <= i__2; ++l) {
247  if (b_ref(l, j) != 0.) {
248  temp = *alpha * b_ref(l, j);
249  i__3 = *m;
250  for (i__ = 1; i__ <= i__3; ++i__) {
251  c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
252  i__, l);
253 /* L70: */
254  }
255  }
256 /* L80: */
257  }
258 /* L90: */
259  }
260  } else {
261 /* Form C := alpha*A'*B + beta*C */
262  i__1 = *n;
263  for (j = 1; j <= i__1; ++j) {
264  i__2 = *m;
265  for (i__ = 1; i__ <= i__2; ++i__) {
266  temp = 0.;
267  i__3 = *k;
268  for (l = 1; l <= i__3; ++l) {
269  temp += a_ref(l, i__) * b_ref(l, j);
270 /* L100: */
271  }
272  if (*beta == 0.) {
273  c___ref(i__, j) = *alpha * temp;
274  } else {
275  c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
276  j);
277  }
278 /* L110: */
279  }
280 /* L120: */
281  }
282  }
283  } else {
284  if (nota) {
285 /* Form C := alpha*A*B' + beta*C */
286  i__1 = *n;
287  for (j = 1; j <= i__1; ++j) {
288  if (*beta == 0.) {
289  i__2 = *m;
290  for (i__ = 1; i__ <= i__2; ++i__) {
291  c___ref(i__, j) = 0.;
292 /* L130: */
293  }
294  } else if (*beta != 1.) {
295  i__2 = *m;
296  for (i__ = 1; i__ <= i__2; ++i__) {
297  c___ref(i__, j) = *beta * c___ref(i__, j);
298 /* L140: */
299  }
300  }
301  i__2 = *k;
302  for (l = 1; l <= i__2; ++l) {
303  if (b_ref(j, l) != 0.) {
304  temp = *alpha * b_ref(j, l);
305  i__3 = *m;
306  for (i__ = 1; i__ <= i__3; ++i__) {
307  c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
308  i__, l);
309 /* L150: */
310  }
311  }
312 /* L160: */
313  }
314 /* L170: */
315  }
316  } else {
317 /* Form C := alpha*A'*B' + beta*C */
318  i__1 = *n;
319  for (j = 1; j <= i__1; ++j) {
320  i__2 = *m;
321  for (i__ = 1; i__ <= i__2; ++i__) {
322  temp = 0.;
323  i__3 = *k;
324  for (l = 1; l <= i__3; ++l) {
325  temp += a_ref(l, i__) * b_ref(j, l);
326 /* L180: */
327  }
328  if (*beta == 0.) {
329  c___ref(i__, j) = *alpha * temp;
330  } else {
331  c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
332  j);
333  }
334 /* L190: */
335  }
336 /* L200: */
337  }
338  }
339  }
340  return 0;
341 /* End of DGEMM . */
342 } /* dgemm_ */
343 #undef c___ref
344 #undef b_ref
345 #undef a_ref
346 
347 #endif
#define b_ref(a_1, a_2)
int integer
Definition: template_blas_common.h:38
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:144
#define c___ref(a_1, a_2)
#define a_ref(a_1, a_2)
bool logical
Definition: template_blas_common.h:39
int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition: template_blas_gemm.h:41
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:44