Actual source code: dsnhep.c

slepc-3.11.0 2019-03-29
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
 15: {

 19:   DSAllocateMat_Private(ds,DS_MAT_A);
 20:   DSAllocateMat_Private(ds,DS_MAT_Q);
 21:   PetscFree(ds->perm);
 22:   PetscMalloc1(ld,&ds->perm);
 23:   PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
 24:   return(0);
 25: }

 27: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
 28: {

 32:   DSViewMat(ds,viewer,DS_MAT_A);
 33:   if (ds->state>DS_STATE_INTERMEDIATE) {
 34:     DSViewMat(ds,viewer,DS_MAT_Q);
 35:   }
 36:   if (ds->mat[DS_MAT_X]) {
 37:     DSViewMat(ds,viewer,DS_MAT_X);
 38:   }
 39:   if (ds->mat[DS_MAT_Y]) {
 40:     DSViewMat(ds,viewer,DS_MAT_Y);
 41:   }
 42:   return(0);
 43: }

 45: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
 46: {
 47: #if defined(PETSC_MISSING_LAPACK_GESVD)
 49:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
 50: #else
 52:   PetscInt       i,j;
 53:   PetscBLASInt   info,ld,n,n1,lwork,inc=1;
 54:   PetscScalar    sdummy,done=1.0,zero=0.0;
 55:   PetscReal      *sigma;
 56:   PetscBool      iscomplex = PETSC_FALSE;
 57:   PetscScalar    *A = ds->mat[DS_MAT_A];
 58:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
 59:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
 60:   PetscScalar    *W;

 63:   if (left) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for left vectors");
 64:   PetscBLASIntCast(ds->n,&n);
 65:   PetscBLASIntCast(ds->ld,&ld);
 66:   n1 = n+1;
 67:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
 68:   if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
 69:   DSAllocateWork_Private(ds,5*ld,6*ld,0);
 70:   DSAllocateMat_Private(ds,DS_MAT_W);
 71:   W = ds->mat[DS_MAT_W];
 72:   lwork = 5*ld;
 73:   sigma = ds->rwork+5*ld;

 75:   /* build A-w*I in W */
 76:   for (j=0;j<n;j++)
 77:     for (i=0;i<=n;i++)
 78:       W[i+j*ld] = A[i+j*ld];
 79:   for (i=0;i<n;i++)
 80:     W[i+i*ld] -= A[(*k)+(*k)*ld];

 82:   /* compute SVD of W */
 83: #if !defined(PETSC_USE_COMPLEX)
 84:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
 85: #else
 86:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
 87: #endif
 88:   SlepcCheckLapackInfo("gesvd",info);

 90:   /* the smallest singular value is the new error estimate */
 91:   if (rnorm) *rnorm = sigma[n-1];

 93:   /* update vector with right singular vector associated to smallest singular value,
 94:      accumulating the transformation matrix Q */
 95:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
 96:   return(0);
 97: #endif
 98: }

100: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
101: {
103:   PetscInt       i;

106:   for (i=0;i<ds->n;i++) {
107:     DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
108:   }
109:   return(0);
110: }

112: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
113: {
114: #if defined(SLEPC_MISSING_LAPACK_TREVC)
116:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
117: #else
119:   PetscInt       i;
120:   PetscBLASInt   mm=1,mout,info,ld,n,*select,inc = 1;
121:   PetscScalar    tmp,done=1.0,zero=0.0;
122:   PetscReal      norm;
123:   PetscBool      iscomplex = PETSC_FALSE;
124:   PetscScalar    *A = ds->mat[DS_MAT_A];
125:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
126:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
127:   PetscScalar    *Y;

130:   PetscBLASIntCast(ds->n,&n);
131:   PetscBLASIntCast(ds->ld,&ld);
132:   DSAllocateWork_Private(ds,0,0,ld);
133:   select = ds->iwork;
134:   for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;

136:   /* compute k-th eigenvector Y of A */
137:   Y = X+(*k)*ld;
138:   select[*k] = (PetscBLASInt)PETSC_TRUE;
139: #if !defined(PETSC_USE_COMPLEX)
140:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
141:   mm = iscomplex? 2: 1;
142:   if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
143:   DSAllocateWork_Private(ds,3*ld,0,0);
144:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
145: #else
146:   DSAllocateWork_Private(ds,2*ld,ld,0);
147:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
148: #endif
149:   SlepcCheckLapackInfo("trevc",info);
150:   if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");

152:   /* accumulate and normalize eigenvectors */
153:   if (ds->state>=DS_STATE_CONDENSED) {
154:     PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));
155:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
156: #if !defined(PETSC_USE_COMPLEX)
157:     if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
158: #endif
159:     norm = BLASnrm2_(&n,Y,&inc);
160: #if !defined(PETSC_USE_COMPLEX)
161:     if (iscomplex) {
162:       tmp = BLASnrm2_(&n,Y+ld,&inc);
163:       norm = SlepcAbsEigenvalue(norm,tmp);
164:     }
165: #endif
166:     tmp = 1.0 / norm;
167:     PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
168: #if !defined(PETSC_USE_COMPLEX)
169:     if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
170: #endif
171:   }

173:   /* set output arguments */
174:   if (iscomplex) (*k)++;
175:   if (rnorm) {
176:     if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
177:     else *rnorm = PetscAbsScalar(Y[n-1]);
178:   }
179:   return(0);
180: #endif
181: }

183: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
184: {
185: #if defined(SLEPC_MISSING_LAPACK_TREVC)
187:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
188: #else
190:   PetscInt       i;
191:   PetscBLASInt   n,ld,mout,info,inc = 1;
192:   PetscBool      iscomplex;
193:   PetscScalar    *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
194:   PetscReal      norm;
195:   const char     *side,*back;

198:   PetscBLASIntCast(ds->n,&n);
199:   PetscBLASIntCast(ds->ld,&ld);
200:   if (left) {
201:     X = NULL;
202:     Y = ds->mat[DS_MAT_Y];
203:     side = "L";
204:   } else {
205:     X = ds->mat[DS_MAT_X];
206:     Y = NULL;
207:     side = "R";
208:   }
209:   Z = left? Y: X;
210:   if (ds->state>=DS_STATE_CONDENSED) {
211:     /* DSSolve() has been called, backtransform with matrix Q */
212:     back = "B";
213:     PetscMemcpy(Z,ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
214:   } else back = "A";
215: #if !defined(PETSC_USE_COMPLEX)
216:   DSAllocateWork_Private(ds,3*ld,0,0);
217:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
218: #else
219:   DSAllocateWork_Private(ds,2*ld,ld,0);
220:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
221: #endif
222:   SlepcCheckLapackInfo("trevc",info);

224:   /* normalize eigenvectors */
225:   for (i=0;i<n;i++) {
226:     iscomplex = (i<n-1 && A[i+1+i*ld]!=0.0)? PETSC_TRUE: PETSC_FALSE;
227:     norm = BLASnrm2_(&n,Z+i*ld,&inc);
228: #if !defined(PETSC_USE_COMPLEX)
229:     if (iscomplex) {
230:       tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
231:       norm = SlepcAbsEigenvalue(norm,tmp);
232:     }
233: #endif
234:     tmp = 1.0 / norm;
235:     PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
236: #if !defined(PETSC_USE_COMPLEX)
237:     if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
238: #endif
239:     if (iscomplex) i++;
240:   }
241:   return(0);
242: #endif
243: }

245: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
246: {

250:   switch (mat) {
251:     case DS_MAT_X:
252:       if (ds->refined) {
253:         if (!ds->extrarow) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Refined vectors require activating the extra row");
254:         if (j) {
255:           DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
256:         } else {
257:           DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
258:         }
259:       } else {
260:         if (j) {
261:           DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
262:         } else {
263:           DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
264:         }
265:       }
266:       break;
267:     case DS_MAT_Y:
268:       if (ds->refined) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
269:       if (j) {
270:         DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
271:       } else {
272:         DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
273:       }
274:       break;
275:     case DS_MAT_U:
276:     case DS_MAT_VT:
277:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
278:       break;
279:     default:
280:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
281:   }
282:   if (ds->state < DS_STATE_CONDENSED) {
283:     DSSetState(ds,DS_STATE_CONDENSED);
284:   }
285:   return(0);
286: }

288: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
289: {
290: #if defined(PETSC_MISSING_LAPACK_TRSEN)
292:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
293: #else
295:   PetscInt       i;
296:   PetscBLASInt   info,n,ld,mout,lwork,*selection;
297:   PetscScalar    *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
298: #if !defined(PETSC_USE_COMPLEX)
299:   PetscBLASInt   *iwork,liwork;
300: #endif

303:   if (!k) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_WRONG,"Must supply argument k");
304:   PetscBLASIntCast(ds->n,&n);
305:   PetscBLASIntCast(ds->ld,&ld);
306: #if !defined(PETSC_USE_COMPLEX)
307:   lwork = n;
308:   liwork = 1;
309:   DSAllocateWork_Private(ds,lwork,0,liwork+n);
310:   work = ds->work;
311:   lwork = ds->lwork;
312:   selection = ds->iwork;
313:   iwork = ds->iwork + n;
314:   liwork = ds->liwork - n;
315: #else
316:   lwork = 1;
317:   DSAllocateWork_Private(ds,lwork,0,n);
318:   work = ds->work;
319:   selection = ds->iwork;
320: #endif
321:   /* Compute the selected eigenvalue to be in the leading position */
322:   DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
323:   PetscMemzero(selection,n*sizeof(PetscBLASInt));
324:   for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
325: #if !defined(PETSC_USE_COMPLEX)
326:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
327: #else
328:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
329: #endif
330:   SlepcCheckLapackInfo("trsen",info);
331:   *k = mout;
332:   return(0);
333: #endif
334: }

336: static PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
337: {
338: #if defined(SLEPC_MISSING_LAPACK_TREXC)
340:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
341: #else
343:   PetscScalar    re;
344:   PetscInt       i,j,pos,result;
345:   PetscBLASInt   ifst,ilst,info,n,ld;
346:   PetscScalar    *T = ds->mat[DS_MAT_A];
347:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
348: #if !defined(PETSC_USE_COMPLEX)
349:   PetscScalar    *work,im;
350: #endif

353:   PetscBLASIntCast(ds->n,&n);
354:   PetscBLASIntCast(ds->ld,&ld);
355: #if !defined(PETSC_USE_COMPLEX)
356:   DSAllocateWork_Private(ds,ld,0,0);
357:   work = ds->work;
358: #endif
359:   /* selection sort */
360:   for (i=ds->l;i<n-1;i++) {
361:     re = wr[i];
362: #if !defined(PETSC_USE_COMPLEX)
363:     im = wi[i];
364: #endif
365:     pos = 0;
366:     j=i+1; /* j points to the next eigenvalue */
367: #if !defined(PETSC_USE_COMPLEX)
368:     if (im != 0) j=i+2;
369: #endif
370:     /* find minimum eigenvalue */
371:     for (;j<n;j++) {
372: #if !defined(PETSC_USE_COMPLEX)
373:       SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
374: #else
375:       SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
376: #endif
377:       if (result > 0) {
378:         re = wr[j];
379: #if !defined(PETSC_USE_COMPLEX)
380:         im = wi[j];
381: #endif
382:         pos = j;
383:       }
384: #if !defined(PETSC_USE_COMPLEX)
385:       if (wi[j] != 0) j++;
386: #endif
387:     }
388:     if (pos) {
389:       /* interchange blocks */
390:       PetscBLASIntCast(pos+1,&ifst);
391:       PetscBLASIntCast(i+1,&ilst);
392: #if !defined(PETSC_USE_COMPLEX)
393:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
394: #else
395:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
396: #endif
397:       SlepcCheckLapackInfo("trexc",info);
398:       /* recover original eigenvalues from T matrix */
399:       for (j=i;j<n;j++) {
400:         wr[j] = T[j+j*ld];
401: #if !defined(PETSC_USE_COMPLEX)
402:         if (j<n-1 && T[j+1+j*ld] != 0.0) {
403:           /* complex conjugate eigenvalue */
404:           wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
405:                   PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
406:           wr[j+1] = wr[j];
407:           wi[j+1] = -wi[j];
408:           j++;
409:         } else {
410:           wi[j] = 0.0;
411:         }
412: #endif
413:       }
414:     }
415: #if !defined(PETSC_USE_COMPLEX)
416:     if (wi[i] != 0) i++;
417: #endif
418:   }
419:   return(0);
420: #endif
421: }

423: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
424: {

428:   if (!rr || wr == rr) {
429:     DSSort_NHEP_Total(ds,wr,wi);
430:   } else {
431:     DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
432:   }
433:   return(0);
434: }

436: static PetscErrorCode DSSortWithPermutation_NHEP(DS ds,PetscInt *perm,PetscScalar *wr,PetscScalar *wi)
437: {
438: #if defined(SLEPC_MISSING_LAPACK_TREXC)
440:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
441: #else
443:   PetscInt       i,j,pos,inc=1;
444:   PetscBLASInt   ifst,ilst,info,n,ld;
445:   PetscScalar    *T = ds->mat[DS_MAT_A];
446:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
447: #if !defined(PETSC_USE_COMPLEX)
448:   PetscScalar    *work;
449: #endif

452:   PetscBLASIntCast(ds->n,&n);
453:   PetscBLASIntCast(ds->ld,&ld);
454: #if !defined(PETSC_USE_COMPLEX)
455:   DSAllocateWork_Private(ds,ld,0,0);
456:   work = ds->work;
457: #endif
458:   for (i=ds->l;i<n-1;i++) {
459:     pos = perm[i];
460: #if !defined(PETSC_USE_COMPLEX)
461:     inc = (pos<n-1 && T[pos+1+pos*ld] != 0.0)? 2: 1;
462: #endif
463:     if (pos!=i) {
464: #if !defined(PETSC_USE_COMPLEX)
465:       if ((T[pos+(pos-1)*ld] != 0.0 && perm[i+1]!=pos-1) || (T[pos+1+pos*ld] != 0.0 && perm[i+1]!=pos+1)) 
466:  SETERRQ1(PETSC_COMM_SELF,1,"Invalid permutation due to a 2x2 block at position %D",pos);
467: #endif

469:       /* interchange blocks */
470:       PetscBLASIntCast(pos+1,&ifst);
471:       PetscBLASIntCast(i+1,&ilst);
472: #if !defined(PETSC_USE_COMPLEX)
473:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
474: #else
475:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
476: #endif
477:       SlepcCheckLapackInfo("trexc",info);
478:       for (j=i+1;j<n;j++) {
479:         if (perm[j]>=i && perm[j]<pos) perm[j]+=inc;
480:       }
481:       perm[i] = i;
482:       if (inc==2) perm[i+1] = i+1;
483:     }
484:     if (inc==2) i++;
485:   }
486:   /* recover original eigenvalues from T matrix */
487:   for (j=ds->l;j<n;j++) {
488:     wr[j] = T[j+j*ld];
489: #if !defined(PETSC_USE_COMPLEX)
490:     if (j<n-1 && T[j+1+j*ld] != 0.0) {
491:       /* complex conjugate eigenvalue */
492:       wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
493:       wr[j+1] = wr[j];
494:       wi[j+1] = -wi[j];
495:       j++;
496:     } else {
497:       wi[j] = 0.0;
498:     }
499: #endif
500:   }
501:   return(0);
502: #endif
503: }

505: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
506: {
508:   PetscInt       i;
509:   PetscBLASInt   n,ld,incx=1;
510:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;

513:   PetscBLASIntCast(ds->n,&n);
514:   PetscBLASIntCast(ds->ld,&ld);
515:   A  = ds->mat[DS_MAT_A];
516:   Q  = ds->mat[DS_MAT_Q];
517:   DSAllocateWork_Private(ds,2*ld,0,0);
518:   x = ds->work;
519:   y = ds->work+ld;
520:   for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
521:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
522:   for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
523:   ds->k = n;
524:   return(0);
525: }

527: /*
528:    Reduce a matrix A to upper Hessenberg form Q'*A*Q, where Q is an orthogonal matrix.
529:    The result overwrites A. Matrix A has the form

531:          [ S | * ]
532:      A = [-------]
533:          [ R | H ]

535:   where S is an upper (quasi-)triangular matrix of order k, H is an upper Hessenberg
536:   matrix of order n-k, and R is all zeros except the first row (the arrow).
537:   The algorithm uses elementary reflectors to annihilate entries in the arrow, and
538:   then proceeds upwards. 
539:   If ilo>1, then it is assumed that the first ilo-1 entries of the arrow are zero, and
540:   hence the first ilo-1 rows and columns of Q are set to the identity matrix.

542:   Required workspace is 2*n.
543: */
544: static PetscErrorCode ArrowHessenberg(PetscBLASInt n,PetscBLASInt k,PetscBLASInt ilo,PetscScalar *A,PetscBLASInt lda,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *work)
545: {
546: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
548:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
549: #else
550:   PetscBLASInt i,j,n0,m,inc=1,incn=-1;
551:   PetscScalar  t,*v=work,*w=work+n,tau,tauc;

554:   m = n-ilo+1;
555:   for (i=k;i>ilo;i--) {
556:     for (j=0;j<=i-ilo;j++) v[j] = A[i+(i-j-1)*lda];  /* _larfg does not allow negative inc, so use buffer */
557:     n0 = i-ilo+1;
558:     PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,v,v+1,&inc,&tau));
559:     for (j=1;j<=i-ilo;j++) v[j] = PetscConj(v[j]);
560:     tauc = PetscConj(tau);
561:     A[i+(i-1)*lda] = v[0];
562:     v[0] = 1.0;
563:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&i,&n0,v,&incn,&tauc,A+(ilo-1)*lda,&lda,w));
564:     for (j=1;j<=i-ilo;j++) A[i+(i-j-1)*lda] = 0.0;
565:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,A+ilo-1+(ilo-1)*lda,&lda,w));
566:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,Q+ilo-1+(ilo-1)*ldq,&ldq,w));
567:   }
568:   /* trivial in-place transposition of Q */
569:   for (j=ilo-1;j<k;j++) {
570:     for (i=j;i<k;i++) {
571:       t = Q[i+j*ldq];
572:       if (i!=j) Q[i+j*ldq] = PetscConj(Q[j+i*ldq]);
573:       Q[j+i*ldq] = PetscConj(t);
574:     }
575:   }
576:   return(0);
577: #endif
578: }

580: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
581: {
582: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
584:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
585: #else
587:   PetscScalar    *work,*tau;
588:   PetscInt       i,j;
589:   PetscBLASInt   ilo,lwork,info,n,k,ld;
590:   PetscScalar    *A = ds->mat[DS_MAT_A];
591:   PetscScalar    *Q = ds->mat[DS_MAT_Q];

594: #if !defined(PETSC_USE_COMPLEX)
596: #endif
597:   PetscBLASIntCast(ds->n,&n);
598:   PetscBLASIntCast(ds->ld,&ld);
599:   PetscBLASIntCast(ds->l+1,&ilo);
600:   PetscBLASIntCast(ds->k,&k);
601:   DSAllocateWork_Private(ds,ld+6*ld,0,0);
602:   tau  = ds->work;
603:   work = ds->work+ld;
604:   lwork = 6*ld;

606:   /* initialize orthogonal matrix */
607:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
608:   for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
609:   if (n==1) { /* quick return */
610:     wr[0] = A[0];
611:     wi[0] = 0.0;
612:     return(0);
613:   }

615:   /* reduce to upper Hessenberg form */
616:   if (ds->state<DS_STATE_INTERMEDIATE) {
617:     if (PETSC_FALSE && k>0) {
618:       ArrowHessenberg(n,k,ilo,A,ld,Q,ld,work);
619:     } else {
620:       PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
621:       SlepcCheckLapackInfo("gehrd",info);
622:       for (j=0;j<n-1;j++) {
623:         for (i=j+2;i<n;i++) {
624:           Q[i+j*ld] = A[i+j*ld];
625:           A[i+j*ld] = 0.0;
626:         }
627:       }
628:       PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
629:       SlepcCheckLapackInfo("orghr",info);
630:     }
631:   }

633:   /* compute the (real) Schur form */
634: #if !defined(PETSC_USE_COMPLEX)
635:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
636:   for (j=0;j<ds->l;j++) {
637:     if (j==n-1 || A[j+1+j*ld] == 0.0) {
638:       /* real eigenvalue */
639:       wr[j] = A[j+j*ld];
640:       wi[j] = 0.0;
641:     } else {
642:       /* complex eigenvalue */
643:       wr[j] = A[j+j*ld];
644:       wr[j+1] = A[j+j*ld];
645:       wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
646:               PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
647:       wi[j+1] = -wi[j];
648:       j++;
649:     }
650:   }
651: #else
652:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
653:   if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
654: #endif
655:   SlepcCheckLapackInfo("hseqr",info);
656:   return(0);
657: #endif
658: }

660: PetscErrorCode DSSynchronize_NHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
661: {
663:   PetscInt       ld=ds->ld,l=ds->l,k;
664:   PetscMPIInt    n,rank,off=0,size,ldn;

667:   k = (ds->n-l)*ld;
668:   if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
669:   if (eigr) k += ds->n-l; 
670:   if (eigi) k += ds->n-l; 
671:   DSAllocateWork_Private(ds,k,0,0);
672:   PetscMPIIntCast(k*sizeof(PetscScalar),&size);
673:   PetscMPIIntCast(ds->n-l,&n);
674:   PetscMPIIntCast(ld*(ds->n-l),&ldn);
675:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
676:   if (!rank) {
677:     MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
678:     if (ds->state>DS_STATE_RAW) {
679:       MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
680:     }
681:     if (eigr) {
682:       MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
683:     }
684:     if (eigi) {
685:       MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
686:     }
687:   }
688:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
689:   if (rank) {
690:     MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
691:     if (ds->state>DS_STATE_RAW) {
692:       MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
693:     }
694:     if (eigr) {
695:       MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
696:     }
697:     if (eigi) {
698:       MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
699:     }
700:   }
701:   return(0);
702: }

704: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
705: {
706:   PetscInt    i,newn,ld=ds->ld,l=ds->l;
707:   PetscScalar *A;

710:   if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
711:   A = ds->mat[DS_MAT_A];
712:   /* be careful not to break a diagonal 2x2 block */
713:   if (A[n+(n-1)*ld]==0.0) newn = n;
714:   else {
715:     if (n<ds->n-1) newn = n+1;
716:     else newn = n-1;
717:   }
718:   if (ds->extrarow && ds->k==ds->n) {
719:     /* copy entries of extra row to the new position, then clean last row */
720:     for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
721:     for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
722:   }
723:   ds->k = 0;
724:   ds->n = newn;
725:   return(0);
726: }

728: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
729: {
730: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
732:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
733: #else
735:   PetscScalar    *work;
736:   PetscReal      *rwork;
737:   PetscBLASInt   *ipiv;
738:   PetscBLASInt   lwork,info,n,ld;
739:   PetscReal      hn,hin;
740:   PetscScalar    *A;

743:   PetscBLASIntCast(ds->n,&n);
744:   PetscBLASIntCast(ds->ld,&ld);
745:   lwork = 8*ld;
746:   DSAllocateWork_Private(ds,lwork,ld,ld);
747:   work  = ds->work;
748:   rwork = ds->rwork;
749:   ipiv  = ds->iwork;

751:   /* use workspace matrix W to avoid overwriting A */
752:   DSAllocateMat_Private(ds,DS_MAT_W);
753:   A = ds->mat[DS_MAT_W];
754:   PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);

756:   /* norm of A */
757:   if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
758:   else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);

760:   /* norm of inv(A) */
761:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
762:   SlepcCheckLapackInfo("getrf",info);
763:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
764:   SlepcCheckLapackInfo("getri",info);
765:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

767:   *cond = hn*hin;
768:   return(0);
769: #endif
770: }

772: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
773: {
774: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
776:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
777: #else
779:   PetscInt       i,j;
780:   PetscBLASInt   *ipiv,info,n,ld,one=1,ncol;
781:   PetscScalar    *A,*B,*Q,*g=gin,*ghat;
782:   PetscScalar    done=1.0,dmone=-1.0,dzero=0.0;
783:   PetscReal      gnorm;

786:   PetscBLASIntCast(ds->n,&n);
787:   PetscBLASIntCast(ds->ld,&ld);
788:   A  = ds->mat[DS_MAT_A];

790:   if (!recover) {

792:     DSAllocateWork_Private(ds,0,0,ld);
793:     ipiv = ds->iwork;
794:     if (!g) {
795:       DSAllocateWork_Private(ds,ld,0,0);
796:       g = ds->work;
797:     }
798:     /* use workspace matrix W to factor A-tau*eye(n) */
799:     DSAllocateMat_Private(ds,DS_MAT_W);
800:     B = ds->mat[DS_MAT_W];
801:     PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);

803:     /* Vector g initialy stores b = beta*e_n^T */
804:     PetscMemzero(g,n*sizeof(PetscScalar));
805:     g[n-1] = beta;

807:     /* g = (A-tau*eye(n))'\b */
808:     for (i=0;i<n;i++) B[i+i*ld] -= tau;
809:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
810:     SlepcCheckLapackInfo("getrf",info);
811:     PetscLogFlops(2.0*n*n*n/3.0);
812:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
813:     SlepcCheckLapackInfo("getrs",info);
814:     PetscLogFlops(2.0*n*n-n);

816:     /* A = A + g*b' */
817:     for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta;

819:   } else { /* recover */

822:     DSAllocateWork_Private(ds,ld,0,0);
823:     ghat = ds->work;
824:     Q    = ds->mat[DS_MAT_Q];

826:     /* g^ = -Q(:,idx)'*g */
827:     PetscBLASIntCast(ds->l+ds->k,&ncol);
828:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));

830:     /* A = A + g^*b' */
831:     for (i=0;i<ds->l+ds->k;i++)
832:       for (j=ds->l;j<ds->l+ds->k;j++)
833:         A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;

835:     /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
836:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
837:   }

839:   /* Compute gamma factor */
840:   if (gamma) {
841:     gnorm = 0.0;
842:     for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
843:     *gamma = PetscSqrtReal(1.0+gnorm);
844:   }
845:   return(0);
846: #endif
847: }

849: SLEPC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
850: {
852:   ds->ops->allocate      = DSAllocate_NHEP;
853:   ds->ops->view          = DSView_NHEP;
854:   ds->ops->vectors       = DSVectors_NHEP;
855:   ds->ops->solve[0]      = DSSolve_NHEP;
856:   ds->ops->sort          = DSSort_NHEP;
857:   ds->ops->sortperm      = DSSortWithPermutation_NHEP;
858:   ds->ops->synchronize   = DSSynchronize_NHEP;
859:   ds->ops->truncate      = DSTruncate_NHEP;
860:   ds->ops->update        = DSUpdateExtraRow_NHEP;
861:   ds->ops->cond          = DSCond_NHEP;
862:   ds->ops->transharm     = DSTranslateHarmonic_NHEP;
863:   return(0);
864: }