ergo
template_lapack_orgqr.h
Go to the documentation of this file.
1 /* Ergo, version 3.4, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2014 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_ORGQR_HEADER
36 #define TEMPLATE_LAPACK_ORGQR_HEADER
37 
38 #include "template_lapack_common.h"
39 
40 template<class Treal>
42  const integer *m,
43  const integer *n,
44  const integer *k,
45  Treal * a,
46  const integer *lda,
47  const Treal *tau,
48  Treal *work,
49  const integer *lwork,
50  integer *info
51  )
52 {
53 /* -- LAPACK routine (version 3.0) --
54  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
55  Courant Institute, Argonne National Lab, and Rice University
56  June 30, 1999
57 
58 
59  Purpose
60  =======
61 
62  DORGQR generates an M-by-N real matrix Q with orthonormal columns,
63  which is defined as the first N columns of a product of K elementary
64  reflectors of order M
65 
66  Q = H(1) H(2) . . . H(k)
67 
68  as returned by DGEQRF.
69 
70  Arguments
71  =========
72 
73  M (input) INTEGER
74  The number of rows of the matrix Q. M >= 0.
75 
76  N (input) INTEGER
77  The number of columns of the matrix Q. M >= N >= 0.
78 
79  K (input) INTEGER
80  The number of elementary reflectors whose product defines the
81  matrix Q. N >= K >= 0.
82 
83  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
84  On entry, the i-th column must contain the vector which
85  defines the elementary reflector H(i), for i = 1,2,...,k, as
86  returned by DGEQRF in the first k columns of its array
87  argument A.
88  On exit, the M-by-N matrix Q.
89 
90  LDA (input) INTEGER
91  The first dimension of the array A. LDA >= max(1,M).
92 
93  TAU (input) DOUBLE PRECISION array, dimension (K)
94  TAU(i) must contain the scalar factor of the elementary
95  reflector H(i), as returned by DGEQRF.
96 
97  WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
98  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
99 
100  LWORK (input) INTEGER
101  The dimension of the array WORK. LWORK >= max(1,N).
102  For optimum performance LWORK >= N*NB, where NB is the
103  optimal blocksize.
104 
105  If LWORK = -1, then a workspace query is assumed; the routine
106  only calculates the optimal size of the WORK array, returns
107  this value as the first entry of the WORK array, and no error
108  message related to LWORK is issued by XERBLA.
109 
110  INFO (output) INTEGER
111  = 0: successful exit
112  < 0: if INFO = -i, the i-th argument has an illegal value
113 
114  =====================================================================
115 
116 
117  Test the input arguments
118 
119  Parameter adjustments */
120  /* Table of constant values */
121  integer c__1 = 1;
122  integer c_n1 = -1;
123  integer c__3 = 3;
124  integer c__2 = 2;
125 
126  /* System generated locals */
127  integer a_dim1, a_offset, i__1, i__2, i__3;
128  /* Local variables */
129  integer i__, j, l, nbmin, iinfo;
130  integer ib, nb, ki, kk;
131  integer nx;
132  integer ldwork, lwkopt;
133  logical lquery;
134  integer iws;
135 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
136 
137 
138  a_dim1 = *lda;
139  a_offset = 1 + a_dim1 * 1;
140  a -= a_offset;
141  --tau;
142  --work;
143 
144  /* Initialization added by Elias to get rid of compiler warnings. */
145  ki = 0;
146  /* Function Body */
147  *info = 0;
148  nb = template_lapack_ilaenv(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
149  lwkopt = maxMACRO(1,*n) * nb;
150  work[1] = (Treal) lwkopt;
151  lquery = *lwork == -1;
152  if (*m < 0) {
153  *info = -1;
154  } else if (*n < 0 || *n > *m) {
155  *info = -2;
156  } else if (*k < 0 || *k > *n) {
157  *info = -3;
158  } else if (*lda < maxMACRO(1,*m)) {
159  *info = -5;
160  } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
161  *info = -8;
162  }
163  if (*info != 0) {
164  i__1 = -(*info);
165  template_blas_erbla("ORGQR ", &i__1);
166  return 0;
167  } else if (lquery) {
168  return 0;
169  }
170 
171 /* Quick return if possible */
172 
173  if (*n <= 0) {
174  work[1] = 1.;
175  return 0;
176  }
177 
178  nbmin = 2;
179  nx = 0;
180  iws = *n;
181  if (nb > 1 && nb < *k) {
182 
183 /* Determine when to cross over from blocked to unblocked code.
184 
185  Computing MAX */
186  i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DORGQR", " ", m, n, k, &c_n1, (
187  ftnlen)6, (ftnlen)1);
188  nx = maxMACRO(i__1,i__2);
189  if (nx < *k) {
190 
191 /* Determine if workspace is large enough for blocked code. */
192 
193  ldwork = *n;
194  iws = ldwork * nb;
195  if (*lwork < iws) {
196 
197 /* Not enough workspace to use optimal NB: reduce NB and
198  determine the minimum value of NB. */
199 
200  nb = *lwork / ldwork;
201 /* Computing MAX */
202  i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORGQR", " ", m, n, k, &c_n1,
203  (ftnlen)6, (ftnlen)1);
204  nbmin = maxMACRO(i__1,i__2);
205  }
206  }
207  }
208 
209  if (nb >= nbmin && nb < *k && nx < *k) {
210 
211 /* Use blocked code after the last block.
212  The first kk columns are handled by the block method. */
213 
214  ki = (*k - nx - 1) / nb * nb;
215 /* Computing MIN */
216  i__1 = *k, i__2 = ki + nb;
217  kk = minMACRO(i__1,i__2);
218 
219 /* Set A(1:kk,kk+1:n) to zero. */
220 
221  i__1 = *n;
222  for (j = kk + 1; j <= i__1; ++j) {
223  i__2 = kk;
224  for (i__ = 1; i__ <= i__2; ++i__) {
225  a_ref(i__, j) = 0.;
226 /* L10: */
227  }
228 /* L20: */
229  }
230  } else {
231  kk = 0;
232  }
233 
234 /* Use unblocked code for the last or only block. */
235 
236  if (kk < *n) {
237  i__1 = *m - kk;
238  i__2 = *n - kk;
239  i__3 = *k - kk;
240  template_lapack_org2r(&i__1, &i__2, &i__3, &a_ref(kk + 1, kk + 1), lda, &tau[kk + 1]
241  , &work[1], &iinfo);
242  }
243 
244  if (kk > 0) {
245 
246 /* Use blocked code */
247 
248  i__1 = -nb;
249  for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
250 /* Computing MIN */
251  i__2 = nb, i__3 = *k - i__ + 1;
252  ib = minMACRO(i__2,i__3);
253  if (i__ + ib <= *n) {
254 
255 /* Form the triangular factor of the block reflector
256  H = H(i) H(i+1) . . . H(i+ib-1) */
257 
258  i__2 = *m - i__ + 1;
259  template_lapack_larft("Forward", "Columnwise", &i__2, &ib, &a_ref(i__, i__),
260  lda, &tau[i__], &work[1], &ldwork);
261 
262 /* Apply H to A(i:m,i+ib:n) from the left */
263 
264  i__2 = *m - i__ + 1;
265  i__3 = *n - i__ - ib + 1;
266  template_lapack_larfb("Left", "No transpose", "Forward", "Columnwise", &
267  i__2, &i__3, &ib, &a_ref(i__, i__), lda, &work[1], &
268  ldwork, &a_ref(i__, i__ + ib), lda, &work[ib + 1], &
269  ldwork);
270  }
271 
272 /* Apply H to rows i:m of current block */
273 
274  i__2 = *m - i__ + 1;
275  template_lapack_org2r(&i__2, &ib, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[
276  1], &iinfo);
277 
278 /* Set rows 1:i-1 of current block to zero */
279 
280  i__2 = i__ + ib - 1;
281  for (j = i__; j <= i__2; ++j) {
282  i__3 = i__ - 1;
283  for (l = 1; l <= i__3; ++l) {
284  a_ref(l, j) = 0.;
285 /* L30: */
286  }
287 /* L40: */
288  }
289 /* L50: */
290  }
291  }
292 
293  work[1] = (Treal) iws;
294  return 0;
295 
296 /* End of DORGQR */
297 
298 } /* dorgqr_ */
299 
300 #undef a_ref
301 
302 
303 #endif
int template_lapack_orgqr(const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_orgqr.h:41
int integer
Definition: template_blas_common.h:38
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:279
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
#define minMACRO(a, b)
Definition: template_blas_common.h:44
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:144
int template_lapack_larfb(const char *side, const char *trans, const char *direct, const char *storev, const integer *m, const integer *n, const integer *k, const Treal *v, const integer *ldv, const Treal *t, const integer *ldt, Treal *c__, const integer *ldc, Treal *work, const integer *ldwork)
Definition: template_lapack_larfb.h:40
#define a_ref(a_1, a_2)
bool logical
Definition: template_blas_common.h:39
int ftnlen
Definition: template_blas_common.h:40
int template_lapack_org2r(const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *work, integer *info)
Definition: template_lapack_org2r.h:40
int template_lapack_larft(const char *direct, const char *storev, const integer *n, const integer *k, Treal *v, const integer *ldv, const Treal *tau, Treal *t, const integer *ldt)
Definition: template_lapack_larft.h:40