Actual source code: dsnhep.c
slepc-3.11.0 2019-03-29
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
15: {
19: DSAllocateMat_Private(ds,DS_MAT_A);
20: DSAllocateMat_Private(ds,DS_MAT_Q);
21: PetscFree(ds->perm);
22: PetscMalloc1(ld,&ds->perm);
23: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
24: return(0);
25: }
27: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
28: {
32: DSViewMat(ds,viewer,DS_MAT_A);
33: if (ds->state>DS_STATE_INTERMEDIATE) {
34: DSViewMat(ds,viewer,DS_MAT_Q);
35: }
36: if (ds->mat[DS_MAT_X]) {
37: DSViewMat(ds,viewer,DS_MAT_X);
38: }
39: if (ds->mat[DS_MAT_Y]) {
40: DSViewMat(ds,viewer,DS_MAT_Y);
41: }
42: return(0);
43: }
45: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
46: {
47: #if defined(PETSC_MISSING_LAPACK_GESVD)
49: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
50: #else
52: PetscInt i,j;
53: PetscBLASInt info,ld,n,n1,lwork,inc=1;
54: PetscScalar sdummy,done=1.0,zero=0.0;
55: PetscReal *sigma;
56: PetscBool iscomplex = PETSC_FALSE;
57: PetscScalar *A = ds->mat[DS_MAT_A];
58: PetscScalar *Q = ds->mat[DS_MAT_Q];
59: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
60: PetscScalar *W;
63: if (left) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for left vectors");
64: PetscBLASIntCast(ds->n,&n);
65: PetscBLASIntCast(ds->ld,&ld);
66: n1 = n+1;
67: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
68: if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
69: DSAllocateWork_Private(ds,5*ld,6*ld,0);
70: DSAllocateMat_Private(ds,DS_MAT_W);
71: W = ds->mat[DS_MAT_W];
72: lwork = 5*ld;
73: sigma = ds->rwork+5*ld;
75: /* build A-w*I in W */
76: for (j=0;j<n;j++)
77: for (i=0;i<=n;i++)
78: W[i+j*ld] = A[i+j*ld];
79: for (i=0;i<n;i++)
80: W[i+i*ld] -= A[(*k)+(*k)*ld];
82: /* compute SVD of W */
83: #if !defined(PETSC_USE_COMPLEX)
84: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
85: #else
86: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
87: #endif
88: SlepcCheckLapackInfo("gesvd",info);
90: /* the smallest singular value is the new error estimate */
91: if (rnorm) *rnorm = sigma[n-1];
93: /* update vector with right singular vector associated to smallest singular value,
94: accumulating the transformation matrix Q */
95: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
96: return(0);
97: #endif
98: }
100: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
101: {
103: PetscInt i;
106: for (i=0;i<ds->n;i++) {
107: DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
108: }
109: return(0);
110: }
112: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
113: {
114: #if defined(SLEPC_MISSING_LAPACK_TREVC)
116: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
117: #else
119: PetscInt i;
120: PetscBLASInt mm=1,mout,info,ld,n,*select,inc = 1;
121: PetscScalar tmp,done=1.0,zero=0.0;
122: PetscReal norm;
123: PetscBool iscomplex = PETSC_FALSE;
124: PetscScalar *A = ds->mat[DS_MAT_A];
125: PetscScalar *Q = ds->mat[DS_MAT_Q];
126: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
127: PetscScalar *Y;
130: PetscBLASIntCast(ds->n,&n);
131: PetscBLASIntCast(ds->ld,&ld);
132: DSAllocateWork_Private(ds,0,0,ld);
133: select = ds->iwork;
134: for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
136: /* compute k-th eigenvector Y of A */
137: Y = X+(*k)*ld;
138: select[*k] = (PetscBLASInt)PETSC_TRUE;
139: #if !defined(PETSC_USE_COMPLEX)
140: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
141: mm = iscomplex? 2: 1;
142: if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
143: DSAllocateWork_Private(ds,3*ld,0,0);
144: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
145: #else
146: DSAllocateWork_Private(ds,2*ld,ld,0);
147: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
148: #endif
149: SlepcCheckLapackInfo("trevc",info);
150: if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");
152: /* accumulate and normalize eigenvectors */
153: if (ds->state>=DS_STATE_CONDENSED) {
154: PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));
155: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
156: #if !defined(PETSC_USE_COMPLEX)
157: if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
158: #endif
159: norm = BLASnrm2_(&n,Y,&inc);
160: #if !defined(PETSC_USE_COMPLEX)
161: if (iscomplex) {
162: tmp = BLASnrm2_(&n,Y+ld,&inc);
163: norm = SlepcAbsEigenvalue(norm,tmp);
164: }
165: #endif
166: tmp = 1.0 / norm;
167: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
168: #if !defined(PETSC_USE_COMPLEX)
169: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
170: #endif
171: }
173: /* set output arguments */
174: if (iscomplex) (*k)++;
175: if (rnorm) {
176: if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
177: else *rnorm = PetscAbsScalar(Y[n-1]);
178: }
179: return(0);
180: #endif
181: }
183: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
184: {
185: #if defined(SLEPC_MISSING_LAPACK_TREVC)
187: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
188: #else
190: PetscInt i;
191: PetscBLASInt n,ld,mout,info,inc = 1;
192: PetscBool iscomplex;
193: PetscScalar *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
194: PetscReal norm;
195: const char *side,*back;
198: PetscBLASIntCast(ds->n,&n);
199: PetscBLASIntCast(ds->ld,&ld);
200: if (left) {
201: X = NULL;
202: Y = ds->mat[DS_MAT_Y];
203: side = "L";
204: } else {
205: X = ds->mat[DS_MAT_X];
206: Y = NULL;
207: side = "R";
208: }
209: Z = left? Y: X;
210: if (ds->state>=DS_STATE_CONDENSED) {
211: /* DSSolve() has been called, backtransform with matrix Q */
212: back = "B";
213: PetscMemcpy(Z,ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
214: } else back = "A";
215: #if !defined(PETSC_USE_COMPLEX)
216: DSAllocateWork_Private(ds,3*ld,0,0);
217: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
218: #else
219: DSAllocateWork_Private(ds,2*ld,ld,0);
220: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
221: #endif
222: SlepcCheckLapackInfo("trevc",info);
224: /* normalize eigenvectors */
225: for (i=0;i<n;i++) {
226: iscomplex = (i<n-1 && A[i+1+i*ld]!=0.0)? PETSC_TRUE: PETSC_FALSE;
227: norm = BLASnrm2_(&n,Z+i*ld,&inc);
228: #if !defined(PETSC_USE_COMPLEX)
229: if (iscomplex) {
230: tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
231: norm = SlepcAbsEigenvalue(norm,tmp);
232: }
233: #endif
234: tmp = 1.0 / norm;
235: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
236: #if !defined(PETSC_USE_COMPLEX)
237: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
238: #endif
239: if (iscomplex) i++;
240: }
241: return(0);
242: #endif
243: }
245: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
246: {
250: switch (mat) {
251: case DS_MAT_X:
252: if (ds->refined) {
253: if (!ds->extrarow) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Refined vectors require activating the extra row");
254: if (j) {
255: DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
256: } else {
257: DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
258: }
259: } else {
260: if (j) {
261: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
262: } else {
263: DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
264: }
265: }
266: break;
267: case DS_MAT_Y:
268: if (ds->refined) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
269: if (j) {
270: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
271: } else {
272: DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
273: }
274: break;
275: case DS_MAT_U:
276: case DS_MAT_VT:
277: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
278: break;
279: default:
280: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
281: }
282: if (ds->state < DS_STATE_CONDENSED) {
283: DSSetState(ds,DS_STATE_CONDENSED);
284: }
285: return(0);
286: }
288: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
289: {
290: #if defined(PETSC_MISSING_LAPACK_TRSEN)
292: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
293: #else
295: PetscInt i;
296: PetscBLASInt info,n,ld,mout,lwork,*selection;
297: PetscScalar *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
298: #if !defined(PETSC_USE_COMPLEX)
299: PetscBLASInt *iwork,liwork;
300: #endif
303: if (!k) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_WRONG,"Must supply argument k");
304: PetscBLASIntCast(ds->n,&n);
305: PetscBLASIntCast(ds->ld,&ld);
306: #if !defined(PETSC_USE_COMPLEX)
307: lwork = n;
308: liwork = 1;
309: DSAllocateWork_Private(ds,lwork,0,liwork+n);
310: work = ds->work;
311: lwork = ds->lwork;
312: selection = ds->iwork;
313: iwork = ds->iwork + n;
314: liwork = ds->liwork - n;
315: #else
316: lwork = 1;
317: DSAllocateWork_Private(ds,lwork,0,n);
318: work = ds->work;
319: selection = ds->iwork;
320: #endif
321: /* Compute the selected eigenvalue to be in the leading position */
322: DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
323: PetscMemzero(selection,n*sizeof(PetscBLASInt));
324: for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
325: #if !defined(PETSC_USE_COMPLEX)
326: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
327: #else
328: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
329: #endif
330: SlepcCheckLapackInfo("trsen",info);
331: *k = mout;
332: return(0);
333: #endif
334: }
336: static PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
337: {
338: #if defined(SLEPC_MISSING_LAPACK_TREXC)
340: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
341: #else
343: PetscScalar re;
344: PetscInt i,j,pos,result;
345: PetscBLASInt ifst,ilst,info,n,ld;
346: PetscScalar *T = ds->mat[DS_MAT_A];
347: PetscScalar *Q = ds->mat[DS_MAT_Q];
348: #if !defined(PETSC_USE_COMPLEX)
349: PetscScalar *work,im;
350: #endif
353: PetscBLASIntCast(ds->n,&n);
354: PetscBLASIntCast(ds->ld,&ld);
355: #if !defined(PETSC_USE_COMPLEX)
356: DSAllocateWork_Private(ds,ld,0,0);
357: work = ds->work;
358: #endif
359: /* selection sort */
360: for (i=ds->l;i<n-1;i++) {
361: re = wr[i];
362: #if !defined(PETSC_USE_COMPLEX)
363: im = wi[i];
364: #endif
365: pos = 0;
366: j=i+1; /* j points to the next eigenvalue */
367: #if !defined(PETSC_USE_COMPLEX)
368: if (im != 0) j=i+2;
369: #endif
370: /* find minimum eigenvalue */
371: for (;j<n;j++) {
372: #if !defined(PETSC_USE_COMPLEX)
373: SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
374: #else
375: SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
376: #endif
377: if (result > 0) {
378: re = wr[j];
379: #if !defined(PETSC_USE_COMPLEX)
380: im = wi[j];
381: #endif
382: pos = j;
383: }
384: #if !defined(PETSC_USE_COMPLEX)
385: if (wi[j] != 0) j++;
386: #endif
387: }
388: if (pos) {
389: /* interchange blocks */
390: PetscBLASIntCast(pos+1,&ifst);
391: PetscBLASIntCast(i+1,&ilst);
392: #if !defined(PETSC_USE_COMPLEX)
393: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
394: #else
395: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
396: #endif
397: SlepcCheckLapackInfo("trexc",info);
398: /* recover original eigenvalues from T matrix */
399: for (j=i;j<n;j++) {
400: wr[j] = T[j+j*ld];
401: #if !defined(PETSC_USE_COMPLEX)
402: if (j<n-1 && T[j+1+j*ld] != 0.0) {
403: /* complex conjugate eigenvalue */
404: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
405: PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
406: wr[j+1] = wr[j];
407: wi[j+1] = -wi[j];
408: j++;
409: } else {
410: wi[j] = 0.0;
411: }
412: #endif
413: }
414: }
415: #if !defined(PETSC_USE_COMPLEX)
416: if (wi[i] != 0) i++;
417: #endif
418: }
419: return(0);
420: #endif
421: }
423: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
424: {
428: if (!rr || wr == rr) {
429: DSSort_NHEP_Total(ds,wr,wi);
430: } else {
431: DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
432: }
433: return(0);
434: }
436: static PetscErrorCode DSSortWithPermutation_NHEP(DS ds,PetscInt *perm,PetscScalar *wr,PetscScalar *wi)
437: {
438: #if defined(SLEPC_MISSING_LAPACK_TREXC)
440: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
441: #else
443: PetscInt i,j,pos,inc=1;
444: PetscBLASInt ifst,ilst,info,n,ld;
445: PetscScalar *T = ds->mat[DS_MAT_A];
446: PetscScalar *Q = ds->mat[DS_MAT_Q];
447: #if !defined(PETSC_USE_COMPLEX)
448: PetscScalar *work;
449: #endif
452: PetscBLASIntCast(ds->n,&n);
453: PetscBLASIntCast(ds->ld,&ld);
454: #if !defined(PETSC_USE_COMPLEX)
455: DSAllocateWork_Private(ds,ld,0,0);
456: work = ds->work;
457: #endif
458: for (i=ds->l;i<n-1;i++) {
459: pos = perm[i];
460: #if !defined(PETSC_USE_COMPLEX)
461: inc = (pos<n-1 && T[pos+1+pos*ld] != 0.0)? 2: 1;
462: #endif
463: if (pos!=i) {
464: #if !defined(PETSC_USE_COMPLEX)
465: if ((T[pos+(pos-1)*ld] != 0.0 && perm[i+1]!=pos-1) || (T[pos+1+pos*ld] != 0.0 && perm[i+1]!=pos+1))
466: SETERRQ1(PETSC_COMM_SELF,1,"Invalid permutation due to a 2x2 block at position %D",pos);
467: #endif
469: /* interchange blocks */
470: PetscBLASIntCast(pos+1,&ifst);
471: PetscBLASIntCast(i+1,&ilst);
472: #if !defined(PETSC_USE_COMPLEX)
473: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
474: #else
475: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
476: #endif
477: SlepcCheckLapackInfo("trexc",info);
478: for (j=i+1;j<n;j++) {
479: if (perm[j]>=i && perm[j]<pos) perm[j]+=inc;
480: }
481: perm[i] = i;
482: if (inc==2) perm[i+1] = i+1;
483: }
484: if (inc==2) i++;
485: }
486: /* recover original eigenvalues from T matrix */
487: for (j=ds->l;j<n;j++) {
488: wr[j] = T[j+j*ld];
489: #if !defined(PETSC_USE_COMPLEX)
490: if (j<n-1 && T[j+1+j*ld] != 0.0) {
491: /* complex conjugate eigenvalue */
492: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
493: wr[j+1] = wr[j];
494: wi[j+1] = -wi[j];
495: j++;
496: } else {
497: wi[j] = 0.0;
498: }
499: #endif
500: }
501: return(0);
502: #endif
503: }
505: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
506: {
508: PetscInt i;
509: PetscBLASInt n,ld,incx=1;
510: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
513: PetscBLASIntCast(ds->n,&n);
514: PetscBLASIntCast(ds->ld,&ld);
515: A = ds->mat[DS_MAT_A];
516: Q = ds->mat[DS_MAT_Q];
517: DSAllocateWork_Private(ds,2*ld,0,0);
518: x = ds->work;
519: y = ds->work+ld;
520: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
521: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
522: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
523: ds->k = n;
524: return(0);
525: }
527: /*
528: Reduce a matrix A to upper Hessenberg form Q'*A*Q, where Q is an orthogonal matrix.
529: The result overwrites A. Matrix A has the form
531: [ S | * ]
532: A = [-------]
533: [ R | H ]
535: where S is an upper (quasi-)triangular matrix of order k, H is an upper Hessenberg
536: matrix of order n-k, and R is all zeros except the first row (the arrow).
537: The algorithm uses elementary reflectors to annihilate entries in the arrow, and
538: then proceeds upwards.
539: If ilo>1, then it is assumed that the first ilo-1 entries of the arrow are zero, and
540: hence the first ilo-1 rows and columns of Q are set to the identity matrix.
542: Required workspace is 2*n.
543: */
544: static PetscErrorCode ArrowHessenberg(PetscBLASInt n,PetscBLASInt k,PetscBLASInt ilo,PetscScalar *A,PetscBLASInt lda,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *work)
545: {
546: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
548: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
549: #else
550: PetscBLASInt i,j,n0,m,inc=1,incn=-1;
551: PetscScalar t,*v=work,*w=work+n,tau,tauc;
554: m = n-ilo+1;
555: for (i=k;i>ilo;i--) {
556: for (j=0;j<=i-ilo;j++) v[j] = A[i+(i-j-1)*lda]; /* _larfg does not allow negative inc, so use buffer */
557: n0 = i-ilo+1;
558: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,v,v+1,&inc,&tau));
559: for (j=1;j<=i-ilo;j++) v[j] = PetscConj(v[j]);
560: tauc = PetscConj(tau);
561: A[i+(i-1)*lda] = v[0];
562: v[0] = 1.0;
563: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&i,&n0,v,&incn,&tauc,A+(ilo-1)*lda,&lda,w));
564: for (j=1;j<=i-ilo;j++) A[i+(i-j-1)*lda] = 0.0;
565: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,A+ilo-1+(ilo-1)*lda,&lda,w));
566: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,Q+ilo-1+(ilo-1)*ldq,&ldq,w));
567: }
568: /* trivial in-place transposition of Q */
569: for (j=ilo-1;j<k;j++) {
570: for (i=j;i<k;i++) {
571: t = Q[i+j*ldq];
572: if (i!=j) Q[i+j*ldq] = PetscConj(Q[j+i*ldq]);
573: Q[j+i*ldq] = PetscConj(t);
574: }
575: }
576: return(0);
577: #endif
578: }
580: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
581: {
582: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
584: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
585: #else
587: PetscScalar *work,*tau;
588: PetscInt i,j;
589: PetscBLASInt ilo,lwork,info,n,k,ld;
590: PetscScalar *A = ds->mat[DS_MAT_A];
591: PetscScalar *Q = ds->mat[DS_MAT_Q];
594: #if !defined(PETSC_USE_COMPLEX)
596: #endif
597: PetscBLASIntCast(ds->n,&n);
598: PetscBLASIntCast(ds->ld,&ld);
599: PetscBLASIntCast(ds->l+1,&ilo);
600: PetscBLASIntCast(ds->k,&k);
601: DSAllocateWork_Private(ds,ld+6*ld,0,0);
602: tau = ds->work;
603: work = ds->work+ld;
604: lwork = 6*ld;
606: /* initialize orthogonal matrix */
607: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
608: for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
609: if (n==1) { /* quick return */
610: wr[0] = A[0];
611: wi[0] = 0.0;
612: return(0);
613: }
615: /* reduce to upper Hessenberg form */
616: if (ds->state<DS_STATE_INTERMEDIATE) {
617: if (PETSC_FALSE && k>0) {
618: ArrowHessenberg(n,k,ilo,A,ld,Q,ld,work);
619: } else {
620: PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
621: SlepcCheckLapackInfo("gehrd",info);
622: for (j=0;j<n-1;j++) {
623: for (i=j+2;i<n;i++) {
624: Q[i+j*ld] = A[i+j*ld];
625: A[i+j*ld] = 0.0;
626: }
627: }
628: PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
629: SlepcCheckLapackInfo("orghr",info);
630: }
631: }
633: /* compute the (real) Schur form */
634: #if !defined(PETSC_USE_COMPLEX)
635: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
636: for (j=0;j<ds->l;j++) {
637: if (j==n-1 || A[j+1+j*ld] == 0.0) {
638: /* real eigenvalue */
639: wr[j] = A[j+j*ld];
640: wi[j] = 0.0;
641: } else {
642: /* complex eigenvalue */
643: wr[j] = A[j+j*ld];
644: wr[j+1] = A[j+j*ld];
645: wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
646: PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
647: wi[j+1] = -wi[j];
648: j++;
649: }
650: }
651: #else
652: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
653: if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
654: #endif
655: SlepcCheckLapackInfo("hseqr",info);
656: return(0);
657: #endif
658: }
660: PetscErrorCode DSSynchronize_NHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
661: {
663: PetscInt ld=ds->ld,l=ds->l,k;
664: PetscMPIInt n,rank,off=0,size,ldn;
667: k = (ds->n-l)*ld;
668: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
669: if (eigr) k += ds->n-l;
670: if (eigi) k += ds->n-l;
671: DSAllocateWork_Private(ds,k,0,0);
672: PetscMPIIntCast(k*sizeof(PetscScalar),&size);
673: PetscMPIIntCast(ds->n-l,&n);
674: PetscMPIIntCast(ld*(ds->n-l),&ldn);
675: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
676: if (!rank) {
677: MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
678: if (ds->state>DS_STATE_RAW) {
679: MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
680: }
681: if (eigr) {
682: MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
683: }
684: if (eigi) {
685: MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
686: }
687: }
688: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
689: if (rank) {
690: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
691: if (ds->state>DS_STATE_RAW) {
692: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
693: }
694: if (eigr) {
695: MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
696: }
697: if (eigi) {
698: MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
699: }
700: }
701: return(0);
702: }
704: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
705: {
706: PetscInt i,newn,ld=ds->ld,l=ds->l;
707: PetscScalar *A;
710: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
711: A = ds->mat[DS_MAT_A];
712: /* be careful not to break a diagonal 2x2 block */
713: if (A[n+(n-1)*ld]==0.0) newn = n;
714: else {
715: if (n<ds->n-1) newn = n+1;
716: else newn = n-1;
717: }
718: if (ds->extrarow && ds->k==ds->n) {
719: /* copy entries of extra row to the new position, then clean last row */
720: for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
721: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
722: }
723: ds->k = 0;
724: ds->n = newn;
725: return(0);
726: }
728: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
729: {
730: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
732: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
733: #else
735: PetscScalar *work;
736: PetscReal *rwork;
737: PetscBLASInt *ipiv;
738: PetscBLASInt lwork,info,n,ld;
739: PetscReal hn,hin;
740: PetscScalar *A;
743: PetscBLASIntCast(ds->n,&n);
744: PetscBLASIntCast(ds->ld,&ld);
745: lwork = 8*ld;
746: DSAllocateWork_Private(ds,lwork,ld,ld);
747: work = ds->work;
748: rwork = ds->rwork;
749: ipiv = ds->iwork;
751: /* use workspace matrix W to avoid overwriting A */
752: DSAllocateMat_Private(ds,DS_MAT_W);
753: A = ds->mat[DS_MAT_W];
754: PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);
756: /* norm of A */
757: if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
758: else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);
760: /* norm of inv(A) */
761: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
762: SlepcCheckLapackInfo("getrf",info);
763: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
764: SlepcCheckLapackInfo("getri",info);
765: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
767: *cond = hn*hin;
768: return(0);
769: #endif
770: }
772: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
773: {
774: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
776: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
777: #else
779: PetscInt i,j;
780: PetscBLASInt *ipiv,info,n,ld,one=1,ncol;
781: PetscScalar *A,*B,*Q,*g=gin,*ghat;
782: PetscScalar done=1.0,dmone=-1.0,dzero=0.0;
783: PetscReal gnorm;
786: PetscBLASIntCast(ds->n,&n);
787: PetscBLASIntCast(ds->ld,&ld);
788: A = ds->mat[DS_MAT_A];
790: if (!recover) {
792: DSAllocateWork_Private(ds,0,0,ld);
793: ipiv = ds->iwork;
794: if (!g) {
795: DSAllocateWork_Private(ds,ld,0,0);
796: g = ds->work;
797: }
798: /* use workspace matrix W to factor A-tau*eye(n) */
799: DSAllocateMat_Private(ds,DS_MAT_W);
800: B = ds->mat[DS_MAT_W];
801: PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);
803: /* Vector g initialy stores b = beta*e_n^T */
804: PetscMemzero(g,n*sizeof(PetscScalar));
805: g[n-1] = beta;
807: /* g = (A-tau*eye(n))'\b */
808: for (i=0;i<n;i++) B[i+i*ld] -= tau;
809: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
810: SlepcCheckLapackInfo("getrf",info);
811: PetscLogFlops(2.0*n*n*n/3.0);
812: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
813: SlepcCheckLapackInfo("getrs",info);
814: PetscLogFlops(2.0*n*n-n);
816: /* A = A + g*b' */
817: for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta;
819: } else { /* recover */
822: DSAllocateWork_Private(ds,ld,0,0);
823: ghat = ds->work;
824: Q = ds->mat[DS_MAT_Q];
826: /* g^ = -Q(:,idx)'*g */
827: PetscBLASIntCast(ds->l+ds->k,&ncol);
828: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));
830: /* A = A + g^*b' */
831: for (i=0;i<ds->l+ds->k;i++)
832: for (j=ds->l;j<ds->l+ds->k;j++)
833: A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;
835: /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
836: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
837: }
839: /* Compute gamma factor */
840: if (gamma) {
841: gnorm = 0.0;
842: for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
843: *gamma = PetscSqrtReal(1.0+gnorm);
844: }
845: return(0);
846: #endif
847: }
849: SLEPC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
850: {
852: ds->ops->allocate = DSAllocate_NHEP;
853: ds->ops->view = DSView_NHEP;
854: ds->ops->vectors = DSVectors_NHEP;
855: ds->ops->solve[0] = DSSolve_NHEP;
856: ds->ops->sort = DSSort_NHEP;
857: ds->ops->sortperm = DSSortWithPermutation_NHEP;
858: ds->ops->synchronize = DSSynchronize_NHEP;
859: ds->ops->truncate = DSTruncate_NHEP;
860: ds->ops->update = DSUpdateExtraRow_NHEP;
861: ds->ops->cond = DSCond_NHEP;
862: ds->ops->transharm = DSTranslateHarmonic_NHEP;
863: return(0);
864: }