diff --git a/.gitignore b/.gitignore
index 852d05fcb4..568e67b33f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ CBLAS/examples/cblas_ex2
 # LAPACK testing
 TESTING/LIN/xlintst*
 TESTING/EIG/xeigtst*
+TESTING/EIG/xdmd*
 TESTING/*.out
 TESTING/*.txt
 !TESTING/CMakeLists.txt
diff --git a/LAPACKE/include/lapack.h b/LAPACKE/include/lapack.h
index 6c847147d4..0b637afb23 100644
--- a/LAPACKE/include/lapack.h
+++ b/LAPACKE/include/lapack.h
@@ -3319,6 +3319,138 @@ void LAPACK_zgesdd_base(
     #define LAPACK_zgesdd(...) LAPACK_zgesdd_base(__VA_ARGS__)
 #endif
 
+#define LAPACK_cgedmd LAPACK_GLOBAL(cgedmd,CGEDMD)
+void LAPACK_cgedmd(
+    char const* jobs, char const* jobz, char const* jobf,
+    lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n,
+    lapack_complex_float* x, lapack_int const* ldx,
+    lapack_complex_float* y, lapack_int const* ldy, lapack_int const* k,
+    lapack_complex_float* reig, lapack_complex_float* imeig,
+    lapack_complex_float* z, lapack_int const* ldz, lapack_complex_float* res,
+    lapack_complex_float* b, lapack_int const* ldb,
+    lapack_complex_float* w, lapack_int const* ldw,
+    lapack_complex_float* s, lapack_int const* lds,
+    lapack_complex_float* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_dgedmd LAPACK_GLOBAL(dgedmd,DGEDMD)
+void LAPACK_dgedmd(
+    char const* jobs, char const* jobz, char const* jobf,
+    lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n,
+    double* x, lapack_int const* ldx,
+    double* y, lapack_int const* ldy, lapack_int const* k,
+    double* reig, double* imeig,
+    double* z, lapack_int const* ldz, double* res,
+    double* b, lapack_int const* ldb,
+    double* w, lapack_int const* ldw,
+    double* s, lapack_int const* lds,
+    double* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_sgedmd LAPACK_GLOBAL(sgedmd,SGEDMD)
+void LAPACK_sgedmd(
+    char const* jobs, char const* jobz, char const* jobf,
+    lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n,
+    float* x, lapack_int const* ldx,
+    float* y, lapack_int const* ldy, lapack_int const* k,
+    float* reig, float* imeig,
+    float* z, lapack_int const* ldz, float* res,
+    float* b, lapack_int const* ldb,
+    float* w, lapack_int const* ldw,
+    float* s, lapack_int const* lds,
+    float* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_zgedmd LAPACK_GLOBAL(zgedmd,ZGEDMD)
+void LAPACK_zgedmd(
+    char const* jobs, char const* jobz, char const* jobf,
+    lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n,
+    lapack_complex_double* x, lapack_int const* ldx,
+    lapack_complex_double* y, lapack_int const* ldy, lapack_int const* k,
+    lapack_complex_double* reig, lapack_complex_double* imeig,
+    lapack_complex_double* z, lapack_int const* ldz, lapack_complex_double* res,
+    lapack_complex_double* b, lapack_int const* ldb,
+    lapack_complex_double* w, lapack_int const* ldw,
+    lapack_complex_double* s, lapack_int const* lds,
+    lapack_complex_double* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_cgedmdq LAPACK_GLOBAL(cgedmdq,CGEDMDQ)
+void LAPACK_cgedmdq(
+    char const* jobs, char const* jobz, char const* jobr, char const* jobq,
+    char const* jobt, char const* jobf, lapack_int const* whtsvd,
+    lapack_int const* m, lapack_int const* n,
+    lapack_complex_float* f, lapack_int const* ldf,
+    lapack_complex_float* x, lapack_int const* ldx,
+    lapack_complex_float* y, lapack_int const* ldy, lapack_int const* nrnk,
+    float const* tol, lapack_int const* k,
+    lapack_complex_float* reig, lapack_complex_float* imeig,
+    lapack_complex_float* z, lapack_int const* ldz, lapack_complex_float* res,
+    lapack_complex_float* b, lapack_int const* ldb,
+    lapack_complex_float* v, lapack_int const* ldv,
+    lapack_complex_float* s, lapack_int const* lds,
+    lapack_complex_float* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_dgedmdq LAPACK_GLOBAL(dgedmdq,DGEDMDQ)
+void LAPACK_dgedmdq(
+    char const* jobs, char const* jobz, char const* jobr, char const* jobq,
+    char const* jobt, char const* jobf, lapack_int const* whtsvd,
+    lapack_int const* m, lapack_int const* n,
+    double* f, lapack_int const* ldf,
+    double* x, lapack_int const* ldx,
+    double* y, lapack_int const* ldy, lapack_int const* nrnk,
+    double const* tol, lapack_int const* k,
+    double* reig, double* imeig,
+    double* z, lapack_int const* ldz, double* res,
+    double* b, lapack_int const* ldb,
+    double* v, lapack_int const* ldv,
+    double* s, lapack_int const* lds,
+    double* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_sgedmdq LAPACK_GLOBAL(sgedmdq,SGEDMDQ)
+void LAPACK_sgedmdq(
+    char const* jobs, char const* jobz, char const* jobr, char const* jobq,
+    char const* jobt, char const* jobf, lapack_int const* whtsvd,
+    lapack_int const* m, lapack_int const* n,
+    float* f, lapack_int const* ldf,
+    float* x, lapack_int const* ldx,
+    float* y, lapack_int const* ldy, lapack_int const* nrnk,
+    float const* tol, lapack_int const* k,
+    float* reig, float* imeig,
+    float* z, lapack_int const* ldz, float* res,
+    float* b, lapack_int const* ldb,
+    float* v, lapack_int const* ldv,
+    float* s, lapack_int const* lds,
+    float* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
+#define LAPACK_zgedmdq LAPACK_GLOBAL(zgedmdq,ZGEDMDQ)
+void LAPACK_zgedmdq(
+    char const* jobs, char const* jobz, char const* jobr, char const* jobq,
+    char const* jobt, char const* jobf, lapack_int const* whtsvd,
+    lapack_int const* m, lapack_int const* n,
+    lapack_complex_double* f, lapack_int const* ldf,
+    lapack_complex_double* x, lapack_int const* ldx,
+    lapack_complex_double* y, lapack_int const* ldy, lapack_int const* nrnk,
+    double const* tol, lapack_int const* k,
+    lapack_complex_double* reig, lapack_complex_double* imeig,
+    lapack_complex_double* z, lapack_int const* ldz, lapack_complex_double* res,
+    lapack_complex_double* b, lapack_int const* ldb,
+    lapack_complex_double* v, lapack_int const* ldv,
+    lapack_complex_double* s, lapack_int const* lds,
+    lapack_complex_double* work, lapack_int const* lwork,
+    lapack_int* iwork, lapack_int const* liwork,
+    lapack_int* info );
+
 #define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV)
 void LAPACK_cgesv(
     lapack_int const* n, lapack_int const* nrhs,
diff --git a/LAPACKE/include/lapacke.h b/LAPACKE/include/lapacke.h
index 9bd228064f..9a9ab47538 100644
--- a/LAPACKE/include/lapacke.h
+++ b/LAPACKE/include/lapacke.h
@@ -956,7 +956,7 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp, char jobr,
                            lapack_int lda, double* s, lapack_complex_double* u,
                            lapack_int ldu, lapack_complex_double* v,
                            lapack_int ldv, lapack_int* numrank );
-                           
+
 lapack_int LAPACKE_sgesvj( int matrix_layout, char joba, char jobu, char jobv,
                            lapack_int m, lapack_int n, float* a, lapack_int lda,
                            float* sva, lapack_int mv, float* v, lapack_int ldv,
@@ -5712,6 +5712,120 @@ lapack_int LAPACKE_zgesdd_work( int matrix_layout, char jobz, lapack_int m,
                                 lapack_complex_double* work, lapack_int lwork,
                                 double* rwork, lapack_int* iwork );
 
+lapack_int LAPACKE_sgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, float* x, lapack_int ldx,
+                                float* y, lapack_int ldy, lapack_int k,
+                                float* reig, float* imeig, float* z,
+                                lapack_int ldz, float* res, float* b,
+                                lapack_int ldb, float* w, lapack_int ldw,
+                                float* s, lapack_int lds, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_dgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, double* x, lapack_int ldx,
+                                double* y, lapack_int ldy, lapack_int k,
+                                double* reig, double* imeig, double* z,
+                                lapack_int ldz, double* res, double* b,
+                                lapack_int ldb, double* w, lapack_int ldw,
+                                double* s, lapack_int lds, double* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_cgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, lapack_complex_float* x,
+                                lapack_int ldx, lapack_complex_float* y,
+                                lapack_int ldy, lapack_int k,
+                                lapack_complex_float* reig,
+                                lapack_complex_float* imeig,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* res,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* w, lapack_int ldw,
+                                lapack_complex_float* s, lapack_int lds,
+                                lapack_complex_float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_zgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, lapack_complex_double* x,
+                                lapack_int ldx, lapack_complex_double* y,
+                                lapack_int ldy, lapack_int k,
+                                lapack_complex_double* reig,
+                                lapack_complex_double* imeig,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_complex_double* res,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* w, lapack_int ldw,
+                                lapack_complex_double* s, lapack_int lds,
+                                lapack_complex_double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_sgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 float* f, lapack_int ldf, float* x,
+                                 lapack_int ldx, float* y, lapack_int ldy,
+                                 lapack_int nrnk, float tol, lapack_int k,
+                                 float* reig, float* imeig, float* z,
+                                 lapack_int ldz, float* res, float* b,
+                                 lapack_int ldb, float* v, lapack_int ldv,
+                                 float* s, lapack_int lds, float* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork );
+
+lapack_int LAPACKE_dgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 double* f, lapack_int ldf, double* x,
+                                 lapack_int ldx, double* y, lapack_int ldy,
+                                 lapack_int nrnk, double tol, lapack_int k,
+                                 double* reig, double* imeig, double* z,
+                                 lapack_int ldz, double* res, double* b,
+                                 lapack_int ldb, double* v, lapack_int ldv,
+                                 double* s, lapack_int lds, double* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork );
+
+lapack_int LAPACKE_cgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 lapack_complex_float* f, lapack_int ldf,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 lapack_complex_float* y, lapack_int ldy,
+                                 lapack_int nrnk, float tol, lapack_int k,
+                                 lapack_complex_float* reig,
+                                 lapack_complex_float* imeig,
+                                 lapack_complex_float* z, lapack_int ldz,
+                                 lapack_complex_float* res,
+                                 lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* v, lapack_int ldv,
+                                 lapack_complex_float* s, lapack_int lds,
+                                 lapack_complex_float* work, lapack_int lwork,
+                                 lapack_int* iwork,
+                                 lapack_int liwork );
+
+lapack_int LAPACKE_zgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 lapack_complex_double* f, lapack_int ldf,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 lapack_complex_double* y, lapack_int ldy,
+                                 lapack_int nrnk, double tol, lapack_int k,
+                                 lapack_complex_double* reig,
+                                 lapack_complex_double* imeig,
+                                 lapack_complex_double* z, lapack_int ldz,
+                                 lapack_complex_double* res,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* v, lapack_int ldv,
+                                 lapack_complex_double* s, lapack_int lds,
+                                 lapack_complex_double* work, lapack_int lwork,
+                                 lapack_int* iwork,
+                                 lapack_int liwork );
+
 lapack_int LAPACKE_sgesv_work( int matrix_layout, lapack_int n, lapack_int nrhs,
                                float* a, lapack_int lda, lapack_int* ipiv,
                                float* b, lapack_int ldb );
@@ -5833,7 +5947,7 @@ lapack_int LAPACKE_zgesvdq_work( int matrix_layout, char joba, char jobp,
                                 lapack_int* iwork, lapack_int liwork,
                                 lapack_complex_double* cwork, lapack_int lcwork,
                                 double* rwork, lapack_int lrwork);
-                                
+
 lapack_int LAPACKE_sgesvj_work( int matrix_layout, char joba, char jobu,
                                 char jobv, lapack_int m, lapack_int n, float* a,
                                 lapack_int lda, float* sva, lapack_int mv,
@@ -12550,7 +12664,7 @@ lapack_int LAPACKE_zhegv_2stage_work( int matrix_layout, lapack_int itype, char
 //LAPACK 3.8.0
 lapack_int LAPACKE_ssysv_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           lapack_int nrhs, float* a, lapack_int lda,
-                          float* tb, lapack_int ltb, lapack_int* ipiv, 
+                          float* tb, lapack_int ltb, lapack_int* ipiv,
                           lapack_int* ipiv2, float* b, lapack_int ldb );
 lapack_int LAPACKE_ssysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                lapack_int nrhs, float* a, lapack_int lda,
@@ -12560,7 +12674,7 @@ lapack_int LAPACKE_ssysv_aa_2stage_work( int matrix_layout, char uplo, lapack_in
 lapack_int LAPACKE_dsysv_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           lapack_int nrhs, double* a, lapack_int lda,
                           double* tb, lapack_int ltb,
-                          lapack_int* ipiv, lapack_int* ipiv2, 
+                          lapack_int* ipiv, lapack_int* ipiv2,
                           double* b, lapack_int ldb );
 lapack_int LAPACKE_dsysv_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                lapack_int nrhs, double* a, lapack_int lda,
@@ -12612,10 +12726,10 @@ lapack_int LAPACKE_zhesv_aa_2stage_work( int matrix_layout, char uplo, lapack_in
                                lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* work, lapack_int lwork );
-                               
+
 lapack_int LAPACKE_ssytrf_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           float* a, lapack_int lda,
-                          float* tb, lapack_int ltb, lapack_int* ipiv, 
+                          float* tb, lapack_int ltb, lapack_int* ipiv,
                           lapack_int* ipiv2 );
 lapack_int LAPACKE_ssytrf_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                float* a, lapack_int lda,
@@ -12671,7 +12785,7 @@ lapack_int LAPACKE_zhetrf_aa_2stage_work( int matrix_layout, char uplo, lapack_i
 
 lapack_int LAPACKE_ssytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           lapack_int nrhs, float* a, lapack_int lda,
-                          float* tb, lapack_int ltb, lapack_int* ipiv, 
+                          float* tb, lapack_int ltb, lapack_int* ipiv,
                           lapack_int* ipiv2, float* b, lapack_int ldb );
 lapack_int LAPACKE_ssytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                lapack_int nrhs, float* a, lapack_int lda,
@@ -12680,7 +12794,7 @@ lapack_int LAPACKE_ssytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_i
 lapack_int LAPACKE_dsytrs_aa_2stage( int matrix_layout, char uplo, lapack_int n,
                           lapack_int nrhs, double* a, lapack_int lda,
                           double* tb, lapack_int ltb,
-                          lapack_int* ipiv, lapack_int* ipiv2, 
+                          lapack_int* ipiv, lapack_int* ipiv2,
                           double* b, lapack_int ldb );
 lapack_int LAPACKE_dsytrs_aa_2stage_work( int matrix_layout, char uplo, lapack_int n,
                                lapack_int nrhs, double* a, lapack_int lda,
@@ -12727,7 +12841,6 @@ lapack_int LAPACKE_zhetrs_aa_2stage_work( int matrix_layout, char uplo, lapack_i
                                lapack_int lda, lapack_complex_double* tb,
                                lapack_int ltb, lapack_int* ipiv, lapack_int* ipiv2,
                                lapack_complex_double* b, lapack_int ldb );
-
 //LAPACK 3.10.0
 lapack_int LAPACKE_sorhr_col( int matrix_layout, lapack_int m, lapack_int n,
                               lapack_int nb, float* a,
diff --git a/LAPACKE/src/CMakeLists.txt b/LAPACKE/src/CMakeLists.txt
index 115c0e8e8b..eebc5f869f 100644
--- a/LAPACKE/src/CMakeLists.txt
+++ b/LAPACKE/src/CMakeLists.txt
@@ -89,6 +89,10 @@ lapacke_cgerqf.c
 lapacke_cgerqf_work.c
 lapacke_cgesdd.c
 lapacke_cgesdd_work.c
+lapacke_cgedmd.c
+lapacke_cgedmd_work.c
+lapacke_cgedmdq.c
+lapacke_cgedmdq_work.c
 lapacke_cgesv.c
 lapacke_cgesv_work.c
 lapacke_cgesvd.c
@@ -713,6 +717,10 @@ lapacke_dgerqf.c
 lapacke_dgerqf_work.c
 lapacke_dgesdd.c
 lapacke_dgesdd_work.c
+lapacke_dgedmd.c
+lapacke_dgedmd_work.c
+lapacke_dgedmdq.c
+lapacke_dgedmdq_work.c
 lapacke_dgesv.c
 lapacke_dgesv_work.c
 lapacke_dgesvd.c
@@ -1294,6 +1302,10 @@ lapacke_sgerqf.c
 lapacke_sgerqf_work.c
 lapacke_sgesdd.c
 lapacke_sgesdd_work.c
+lapacke_sgedmd.c
+lapacke_sgedmd_work.c
+lapacke_sgedmdq.c
+lapacke_sgedmdq_work.c
 lapacke_sgesv.c
 lapacke_sgesv_work.c
 lapacke_sgesvd.c
@@ -1864,6 +1876,10 @@ lapacke_zgerqf.c
 lapacke_zgerqf_work.c
 lapacke_zgesdd.c
 lapacke_zgesdd_work.c
+lapacke_zgedmd.c
+lapacke_zgedmd_work.c
+lapacke_zgedmdq.c
+lapacke_zgedmdq_work.c
 lapacke_zgesv.c
 lapacke_zgesv_work.c
 lapacke_zgesvd.c
diff --git a/LAPACKE/src/Makefile b/LAPACKE/src/Makefile
index 5c6495ed60..fece21af48 100644
--- a/LAPACKE/src/Makefile
+++ b/LAPACKE/src/Makefile
@@ -136,6 +136,10 @@ lapacke_cgerqf.o \
 lapacke_cgerqf_work.o \
 lapacke_cgesdd.o \
 lapacke_cgesdd_work.o \
+lapacke_cgedmd.o \
+lapacke_cgedmd_work.o \
+lapacke_cgedmdq.o \
+lapacke_cgedmdq_work.o \
 lapacke_cgesv.o \
 lapacke_cgesv_work.o \
 lapacke_cgesvd.o \
@@ -760,6 +764,10 @@ lapacke_dgerqf.o \
 lapacke_dgerqf_work.o \
 lapacke_dgesdd.o \
 lapacke_dgesdd_work.o \
+lapacke_dgedmd.o \
+lapacke_dgedmd_work.o \
+lapacke_dgedmdq.o \
+lapacke_dgedmdq_work.o \
 lapacke_dgesv.o \
 lapacke_dgesv_work.o \
 lapacke_dgesvd.o \
@@ -1336,6 +1344,10 @@ lapacke_sgerqf.o \
 lapacke_sgerqf_work.o \
 lapacke_sgesdd.o \
 lapacke_sgesdd_work.o \
+lapacke_sgedmd.o \
+lapacke_sgedmd_work.o \
+lapacke_sgedmdq.o \
+lapacke_sgedmdq_work.o \
 lapacke_sgesv.o \
 lapacke_sgesv_work.o \
 lapacke_sgesvd.o \
@@ -1906,6 +1918,10 @@ lapacke_zgerqf.o \
 lapacke_zgerqf_work.o \
 lapacke_zgesdd.o \
 lapacke_zgesdd_work.o \
+lapacke_zgedmd.o \
+lapacke_zgedmd_work.o \
+lapacke_zgedmdq.o \
+lapacke_zgedmdq_work.o \
 lapacke_zgesv.o \
 lapacke_zgesv_work.o \
 lapacke_zgesvd.o \
diff --git a/LAPACKE/src/lapacke_cgedmd.c b/LAPACKE/src/lapacke_cgedmd.c
new file mode 100644
index 0000000000..a269b0dafc
--- /dev/null
+++ b/LAPACKE/src/lapacke_cgedmd.c
@@ -0,0 +1,115 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function cgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_cgedmd( int matrix_layout, char jobs, char jobz, char jobf,
+                           lapack_int whtsvd, lapack_int m, lapack_int n,
+                           lapack_complex_float* x, lapack_int ldx,
+                           lapack_complex_float* y, lapack_int ldy, lapack_int k,
+                           lapack_complex_float* reig, lapack_complex_float* imeig,
+                           lapack_complex_float* z, lapack_int ldz,
+                           lapack_complex_float* res, lapack_complex_float* b,
+                           lapack_int ldb, lapack_complex_float* w,
+                           lapack_int ldw, lapack_complex_float* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    lapack_complex_float* work = NULL;
+    lapack_int* iwork = NULL;
+    lapack_complex_float work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmd", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -8;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -10;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -15;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -18;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, w, ldw ) ) {
+            return -20;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -22;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_cgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, &work_query, lwork,
+                                &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = LAPACK_C2INT( work_query );
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_cgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, work, lwork, iwork,
+                                liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmd", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_cgedmd_work.c b/LAPACKE/src/lapacke_cgedmd_work.c
new file mode 100644
index 0000000000..534934efb4
--- /dev/null
+++ b/LAPACKE/src/lapacke_cgedmd_work.c
@@ -0,0 +1,180 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function cgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_cgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, lapack_complex_float* x, lapack_int ldx,
+                                lapack_complex_float* y, lapack_int ldy, lapack_int k,
+                                lapack_complex_float* reig, lapack_complex_float* imeig,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* res, lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* w,
+                                lapack_int ldw, lapack_complex_float* s, lapack_int lds,
+                                lapack_complex_float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_cgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                       &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                       work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldw_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        lapack_complex_float* x_t = NULL;
+        lapack_complex_float* y_t = NULL;
+        lapack_complex_float* z_t = NULL;
+        lapack_complex_float* b_t = NULL;
+        lapack_complex_float* w_t = NULL;
+        lapack_complex_float* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldx < n ) {
+            info = -9;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -11;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -19;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( ldw < n ) {
+            info = -21;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 ) {
+            LAPACK_cgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                           &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                           work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        x_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        y_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        z_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        w_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldw_t * MAX(1,n) );
+        if( w_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        s_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        /* Transpose input matrices */
+        LAPACKE_cge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_cgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t,
+                       &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t,
+                       w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_5:
+        LAPACKE_free( w_t );
+exit_level_4:
+        LAPACKE_free( b_t );
+exit_level_3:
+        LAPACKE_free( z_t );
+exit_level_2:
+        LAPACKE_free( y_t );
+exit_level_1:
+        LAPACKE_free( x_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_cgedmd_work", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_cgedmdq.c b/LAPACKE/src/lapacke_cgedmdq.c
new file mode 100644
index 0000000000..60e83729bd
--- /dev/null
+++ b/LAPACKE/src/lapacke_cgedmdq.c
@@ -0,0 +1,123 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function cgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_cgedmdq( int matrix_layout, char jobs, char jobz, char jobr,
+                            char jobq, char jobt, char jobf, lapack_int whtsvd,
+                            lapack_int m, lapack_int n, lapack_complex_float* f,
+                            lapack_int ldf, lapack_complex_float* x,
+                            lapack_int ldx, lapack_complex_float* y,
+                            lapack_int ldy, lapack_int nrnk, float tol,
+                            lapack_int k, lapack_complex_float* reig,
+                            lapack_complex_float* imeig,
+                            lapack_complex_float* z, lapack_int ldz,
+                            lapack_complex_float* res, lapack_complex_float* b,
+                            lapack_int ldb, lapack_complex_float* v,
+                            lapack_int ldv, lapack_complex_float* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    lapack_complex_float* work = NULL;
+    lapack_int* iwork = NULL;
+    lapack_complex_float work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmdq", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, f, ldf ) ) {
+            return -11;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -13;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -15;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -22;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -25;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, v, ldv ) ) {
+            return -27;
+        }
+        if( LAPACKE_cge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -29;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_cgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, &work_query, lwork,
+                                 &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = LAPACK_C2INT( work_query );
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_cgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, work, lwork, iwork,
+                                 liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmdq", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_cgedmdq_work.c b/LAPACKE/src/lapacke_cgedmdq_work.c
new file mode 100644
index 0000000000..5bdbd3f564
--- /dev/null
+++ b/LAPACKE/src/lapacke_cgedmdq_work.c
@@ -0,0 +1,205 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function cgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_cgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 lapack_complex_float* f, lapack_int ldf,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 lapack_complex_float* y, lapack_int ldy,
+                                 lapack_int nrnk, float tol, lapack_int k,
+                                 lapack_complex_float* reig,
+                                 lapack_complex_float* imeig,
+                                 lapack_complex_float* z,
+                                 lapack_int ldz, lapack_complex_float* res,
+                                 lapack_complex_float* b,
+                                 lapack_int ldb, lapack_complex_float* v,
+                                 lapack_int ldv, lapack_complex_float* s,
+                                 lapack_int lds, lapack_complex_float* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_cgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldf_t = MAX(1,m);
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldv_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        lapack_complex_float* f_t = NULL;
+        lapack_complex_float* x_t = NULL;
+        lapack_complex_float* y_t = NULL;
+        lapack_complex_float* z_t = NULL;
+        lapack_complex_float* b_t = NULL;
+        lapack_complex_float* v_t = NULL;
+        lapack_complex_float* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldf < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldx < n ) {
+            info = -14;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -26;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( ldv < n ) {
+            info = -28;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -30;
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 || liwork == -1 ) {
+            LAPACK_cgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                            &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                            imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                            work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        f_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldf_t * MAX(1,n) );
+        if( f_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        x_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        y_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        z_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        v_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) );
+        if( v_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        s_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_6;
+        }
+        /* Transpose input matrices */
+        LAPACKE_cge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t );
+        LAPACKE_cge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_cgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv );
+        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_6:
+        LAPACKE_free( v_t );
+exit_level_5:
+        LAPACKE_free( b_t );
+exit_level_4:
+        LAPACKE_free( z_t );
+exit_level_3:
+        LAPACKE_free( y_t );
+exit_level_2:
+        LAPACKE_free( x_t );
+exit_level_1:
+        LAPACKE_free( f_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_cgedmdq_work", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_dgedmd.c b/LAPACKE/src/lapacke_dgedmd.c
new file mode 100644
index 0000000000..246d7f649b
--- /dev/null
+++ b/LAPACKE/src/lapacke_dgedmd.c
@@ -0,0 +1,112 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function dgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dgedmd( int matrix_layout, char jobs, char jobz, char jobf,
+                           lapack_int whtsvd, lapack_int m, lapack_int n,
+                           double* x, lapack_int ldx, double* y, lapack_int ldy,
+                           lapack_int k, double* reig, double* imeig, double* z,
+                           lapack_int ldz, double* res, double* b, lapack_int ldb,
+                           double* w, lapack_int ldw, double* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    double* work = NULL;
+    lapack_int* iwork = NULL;
+    double work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_dgedmd", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -8;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -10;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -15;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -18;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -20;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, w, ldw ) ) {
+            return -22;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_dgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, &work_query, lwork,
+                                &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = (lapack_int) work_query;
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (double*)LAPACKE_malloc( sizeof(double) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_dgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, work, lwork, iwork,
+                                liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_dgedmd", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_dgedmd_work.c b/LAPACKE/src/lapacke_dgedmd_work.c
new file mode 100644
index 0000000000..4d1169de92
--- /dev/null
+++ b/LAPACKE/src/lapacke_dgedmd_work.c
@@ -0,0 +1,179 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function dgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, double* x, lapack_int ldx,
+                                double* y, lapack_int ldy, lapack_int k,
+                                double* reig, double* imeig, double* z,
+                                lapack_int ldz, double* res, double* b,
+                                lapack_int ldb, double* w, lapack_int ldw,
+                                double* s, lapack_int lds, double* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_dgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                       &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                       work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldw_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        double* x_t = NULL;
+        double* y_t = NULL;
+        double* z_t = NULL;
+        double* b_t = NULL;
+        double* w_t = NULL;
+        double* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldx < n ) {
+            info = -9;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -11;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -19;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( ldw < n ) {
+            info = -21;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 ) {
+            LAPACK_dgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                           &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                           work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        x_t = (double*)LAPACKE_malloc( sizeof(double) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        y_t = (double*)LAPACKE_malloc( sizeof(double) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        z_t = (double*)LAPACKE_malloc( sizeof(double) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        w_t = (double*)LAPACKE_malloc( sizeof(double) * ldw_t * MAX(1,n) );
+        if( w_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        s_t = (double*)LAPACKE_malloc( sizeof(double) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        /* Transpose input matrices */
+        LAPACKE_dge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_dgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t,
+                       &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t,
+                       w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_5:
+        LAPACKE_free( w_t );
+exit_level_4:
+        LAPACKE_free( b_t );
+exit_level_3:
+        LAPACKE_free( z_t );
+exit_level_2:
+        LAPACKE_free( y_t );
+exit_level_1:
+        LAPACKE_free( x_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_dgedmd_work", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_dgedmdq.c b/LAPACKE/src/lapacke_dgedmdq.c
new file mode 100644
index 0000000000..f3d621ba93
--- /dev/null
+++ b/LAPACKE/src/lapacke_dgedmdq.c
@@ -0,0 +1,119 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function dgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dgedmdq( int matrix_layout, char jobs, char jobz, char jobr,
+                            char jobq, char jobt, char jobf, lapack_int whtsvd,
+                            lapack_int m, lapack_int n, double* f, lapack_int ldf,
+                            double* x, lapack_int ldx, double* y, lapack_int ldy,
+                            lapack_int nrnk, double tol, lapack_int k,
+                            double* reig, double* imeig, double* z,
+                            lapack_int ldz, double* res, double* b, lapack_int ldb,
+                            double* v, lapack_int ldv, double* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    double* work = NULL;
+    lapack_int* iwork = NULL;
+    double work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_dgedmdq", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, f, ldf ) ) {
+            return -11;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -13;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -15;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -22;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -25;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, v, ldv ) ) {
+            return -27;
+        }
+        if( LAPACKE_dge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -29;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_dgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, &work_query, lwork,
+                                 &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = (lapack_int) work_query;
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (double*)LAPACKE_malloc( sizeof(double) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_dgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, work, lwork, iwork,
+                                 liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_dgedmdq", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_dgedmdq_work.c b/LAPACKE/src/lapacke_dgedmdq_work.c
new file mode 100644
index 0000000000..51b2a66d85
--- /dev/null
+++ b/LAPACKE/src/lapacke_dgedmdq_work.c
@@ -0,0 +1,200 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function dgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_dgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 double* f, lapack_int ldf, double* x,
+                                 lapack_int ldx, double* y, lapack_int ldy,
+                                 lapack_int nrnk, double tol, lapack_int k,
+                                 double* reig, double* imeig, double* z,
+                                 lapack_int ldz, double* res, double* b,
+                                 lapack_int ldb, double* v, lapack_int ldv,
+                                 double* s, lapack_int lds, double* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_dgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldf_t = MAX(1,m);
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldv_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        double* f_t = NULL;
+        double* x_t = NULL;
+        double* y_t = NULL;
+        double* z_t = NULL;
+        double* b_t = NULL;
+        double* v_t = NULL;
+        double* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldf < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldx < n ) {
+            info = -14;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -26;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( ldv < n ) {
+            info = -28;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -30;
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 || liwork == -1 ) {
+            LAPACK_dgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                            &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                            imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                            work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        f_t = (double*)LAPACKE_malloc( sizeof(double) * ldf_t * MAX(1,n) );
+        if( f_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        x_t = (double*)LAPACKE_malloc( sizeof(double) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        y_t = (double*)LAPACKE_malloc( sizeof(double) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        z_t = (double*)LAPACKE_malloc( sizeof(double) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        v_t = (double*)LAPACKE_malloc( sizeof(double) * ldv_t * MAX(1,n) );
+        if( v_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        s_t = (double*)LAPACKE_malloc( sizeof(double) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_6;
+        }
+        /* Transpose input matrices */
+        LAPACKE_dge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t );
+        LAPACKE_dge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_dgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv );
+        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_6:
+        LAPACKE_free( v_t );
+exit_level_5:
+        LAPACKE_free( b_t );
+exit_level_4:
+        LAPACKE_free( z_t );
+exit_level_3:
+        LAPACKE_free( y_t );
+exit_level_2:
+        LAPACKE_free( x_t );
+exit_level_1:
+        LAPACKE_free( f_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_dgedmdq_work", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_sgedmd.c b/LAPACKE/src/lapacke_sgedmd.c
new file mode 100644
index 0000000000..879631b1d0
--- /dev/null
+++ b/LAPACKE/src/lapacke_sgedmd.c
@@ -0,0 +1,112 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function sgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_sgedmd( int matrix_layout, char jobs, char jobz, char jobf,
+                           lapack_int whtsvd, lapack_int m, lapack_int n,
+                           float* x, lapack_int ldx, float* y, lapack_int ldy,
+                           lapack_int k, float* reig, float* imeig, float* z,
+                           lapack_int ldz, float* res, float* b, lapack_int ldb,
+                           float* w, lapack_int ldw, float* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    float* work = NULL;
+    lapack_int* iwork = NULL;
+    float work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_sgedmd", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -8;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -10;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -15;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -18;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -20;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, w, ldw ) ) {
+            return -22;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_sgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, &work_query, lwork,
+                                &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = (lapack_int) work_query;
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (float*)LAPACKE_malloc( sizeof(float) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_sgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, work, lwork, iwork,
+                                liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_sgedmd", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_sgedmd_work.c b/LAPACKE/src/lapacke_sgedmd_work.c
new file mode 100644
index 0000000000..762a9b2717
--- /dev/null
+++ b/LAPACKE/src/lapacke_sgedmd_work.c
@@ -0,0 +1,179 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function sgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_sgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, float* x, lapack_int ldx,
+                                float* y, lapack_int ldy, lapack_int k,
+                                float* reig, float* imeig, float* z,
+                                lapack_int ldz, float* res, float* b,
+                                lapack_int ldb, float* w, lapack_int ldw,
+                                float* s, lapack_int lds, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_sgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                       &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                       work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldw_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        float* x_t = NULL;
+        float* y_t = NULL;
+        float* z_t = NULL;
+        float* b_t = NULL;
+        float* w_t = NULL;
+        float* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldx < n ) {
+            info = -9;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -11;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -19;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( ldw < n ) {
+            info = -21;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 ) {
+            LAPACK_sgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                           &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                           work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        x_t = (float*)LAPACKE_malloc( sizeof(float) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        y_t = (float*)LAPACKE_malloc( sizeof(float) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        z_t = (float*)LAPACKE_malloc( sizeof(float) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        w_t = (float*)LAPACKE_malloc( sizeof(float) * ldw_t * MAX(1,n) );
+        if( w_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        s_t = (float*)LAPACKE_malloc( sizeof(float) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        /* Transpose input matrices */
+        LAPACKE_sge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_sgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t,
+                       &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t,
+                       w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_5:
+        LAPACKE_free( w_t );
+exit_level_4:
+        LAPACKE_free( b_t );
+exit_level_3:
+        LAPACKE_free( z_t );
+exit_level_2:
+        LAPACKE_free( y_t );
+exit_level_1:
+        LAPACKE_free( x_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_sgedmd_work", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_sgedmdq.c b/LAPACKE/src/lapacke_sgedmdq.c
new file mode 100644
index 0000000000..e202d7fbdd
--- /dev/null
+++ b/LAPACKE/src/lapacke_sgedmdq.c
@@ -0,0 +1,119 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function sgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_sgedmdq( int matrix_layout, char jobs, char jobz, char jobr,
+                            char jobq, char jobt, char jobf, lapack_int whtsvd,
+                            lapack_int m, lapack_int n, float* f, lapack_int ldf,
+                            float* x, lapack_int ldx, float* y, lapack_int ldy,
+                            lapack_int nrnk, float tol, lapack_int k,
+                            float* reig, float* imeig, float* z,
+                            lapack_int ldz, float* res, float* b, lapack_int ldb,
+                            float* v, lapack_int ldv, float* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    float* work = NULL;
+    lapack_int* iwork = NULL;
+    float work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_sgedmdq", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, f, ldf ) ) {
+            return -11;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -13;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -15;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -22;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -25;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, v, ldv ) ) {
+            return -27;
+        }
+        if( LAPACKE_sge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -29;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_sgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, &work_query, lwork,
+                                 &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = (lapack_int) work_query;
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (float*)LAPACKE_malloc( sizeof(float) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_sgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, work, lwork, iwork,
+                                 liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_sgedmdq", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_sgedmdq_work.c b/LAPACKE/src/lapacke_sgedmdq_work.c
new file mode 100644
index 0000000000..9039898d26
--- /dev/null
+++ b/LAPACKE/src/lapacke_sgedmdq_work.c
@@ -0,0 +1,200 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function sgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_sgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 float* f, lapack_int ldf, float* x,
+                                 lapack_int ldx, float* y, lapack_int ldy,
+                                 lapack_int nrnk, float tol, lapack_int k,
+                                 float* reig, float* imeig, float* z,
+                                 lapack_int ldz, float* res, float* b,
+                                 lapack_int ldb, float* v, lapack_int ldv,
+                                 float* s, lapack_int lds, float* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_sgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldf_t = MAX(1,m);
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldv_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        float* f_t = NULL;
+        float* x_t = NULL;
+        float* y_t = NULL;
+        float* z_t = NULL;
+        float* b_t = NULL;
+        float* v_t = NULL;
+        float* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldf < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldx < n ) {
+            info = -14;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -26;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( ldv < n ) {
+            info = -28;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -30;
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 || liwork == -1 ) {
+            LAPACK_sgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                            &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                            imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                            work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        f_t = (float*)LAPACKE_malloc( sizeof(float) * ldf_t * MAX(1,n) );
+        if( f_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        x_t = (float*)LAPACKE_malloc( sizeof(float) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        y_t = (float*)LAPACKE_malloc( sizeof(float) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        z_t = (float*)LAPACKE_malloc( sizeof(float) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        v_t = (float*)LAPACKE_malloc( sizeof(float) * ldv_t * MAX(1,n) );
+        if( v_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        s_t = (float*)LAPACKE_malloc( sizeof(float) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_6;
+        }
+        /* Transpose input matrices */
+        LAPACKE_sge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t );
+        LAPACKE_sge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_sgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv );
+        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_6:
+        LAPACKE_free( v_t );
+exit_level_5:
+        LAPACKE_free( b_t );
+exit_level_4:
+        LAPACKE_free( z_t );
+exit_level_3:
+        LAPACKE_free( y_t );
+exit_level_2:
+        LAPACKE_free( x_t );
+exit_level_1:
+        LAPACKE_free( f_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_sgedmdq_work", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_zgedmd.c b/LAPACKE/src/lapacke_zgedmd.c
new file mode 100644
index 0000000000..f3f421c54d
--- /dev/null
+++ b/LAPACKE/src/lapacke_zgedmd.c
@@ -0,0 +1,116 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function zgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_zgedmd( int matrix_layout, char jobs, char jobz, char jobf,
+                           lapack_int whtsvd, lapack_int m, lapack_int n,
+                           lapack_complex_double* x, lapack_int ldx,
+                           lapack_complex_double* y, lapack_int ldy,
+                           lapack_int k, lapack_complex_double* reig,
+                           lapack_complex_double* imeig, lapack_complex_double* z,
+                           lapack_int ldz, lapack_complex_double* res,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* w, lapack_int ldw,
+                           lapack_complex_double* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    lapack_complex_double* work = NULL;
+    lapack_int* iwork = NULL;
+    lapack_complex_double work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_zgedmd", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -8;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -10;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -15;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -18;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -20;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, w, ldw ) ) {
+            return -22;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_zgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, &work_query, lwork,
+                                &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = LAPACK_Z2INT( work_query );
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_zgedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n,
+                                x, ldx, y, ldy, k, reig, imeig, z, ldz, res,
+                                b, ldb, w, ldw, s, lds, work, lwork, iwork,
+                                liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_zgedmd", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_zgedmd_work.c b/LAPACKE/src/lapacke_zgedmd_work.c
new file mode 100644
index 0000000000..2554411eca
--- /dev/null
+++ b/LAPACKE/src/lapacke_zgedmd_work.c
@@ -0,0 +1,182 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function zgedmd
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_zgedmd_work( int matrix_layout, char jobs, char jobz,
+                                char jobf, lapack_int whtsvd, lapack_int m,
+                                lapack_int n, lapack_complex_double* x,
+                                lapack_int ldx, lapack_complex_double* y,
+                                lapack_int ldy, lapack_int k,
+                                lapack_complex_double* reig,
+                                lapack_complex_double* imeig, lapack_complex_double* z,
+                                lapack_int ldz, lapack_complex_double* res,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* w, lapack_int ldw,
+                                lapack_complex_double* s, lapack_int lds,
+                                lapack_complex_double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_zgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                       &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                       work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldw_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        lapack_complex_double* x_t = NULL;
+        lapack_complex_double* y_t = NULL;
+        lapack_complex_double* z_t = NULL;
+        lapack_complex_double* b_t = NULL;
+        lapack_complex_double* w_t = NULL;
+        lapack_complex_double* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldx < n ) {
+            info = -9;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -11;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -19;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( ldw < n ) {
+            info = -21;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 ) {
+            LAPACK_zgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy,
+                           &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds,
+                           work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        x_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        y_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        z_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        w_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldw_t * MAX(1,n) );
+        if( w_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        s_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        /* Transpose input matrices */
+        LAPACKE_zge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_zgedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t,
+                       &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t,
+                       w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_5:
+        LAPACKE_free( w_t );
+exit_level_4:
+        LAPACKE_free( b_t );
+exit_level_3:
+        LAPACKE_free( z_t );
+exit_level_2:
+        LAPACKE_free( y_t );
+exit_level_1:
+        LAPACKE_free( x_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_zgedmd_work", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_zgedmdq.c b/LAPACKE/src/lapacke_zgedmdq.c
new file mode 100644
index 0000000000..3648ffdf20
--- /dev/null
+++ b/LAPACKE/src/lapacke_zgedmdq.c
@@ -0,0 +1,123 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native high-level C interface to LAPACK function zgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_zgedmdq( int matrix_layout, char jobs, char jobz, char jobr,
+                            char jobq, char jobt, char jobf, lapack_int whtsvd,
+                            lapack_int m, lapack_int n, lapack_complex_double* f,
+                            lapack_int ldf, lapack_complex_double* x,
+                            lapack_int ldx, lapack_complex_double* y,
+                            lapack_int ldy, lapack_int nrnk, double tol,
+                            lapack_int k, lapack_complex_double* reig,
+                            lapack_complex_double* imeig,
+                            lapack_complex_double* z, lapack_int ldz,
+                            lapack_complex_double* res, lapack_complex_double* b,
+                            lapack_int ldb, lapack_complex_double* v,
+                            lapack_int ldv, lapack_complex_double* s, lapack_int lds)
+{
+    lapack_int info = 0;
+    lapack_int lwork = -1;
+    lapack_int liwork = -1;
+    lapack_complex_double* work = NULL;
+    lapack_int* iwork = NULL;
+    lapack_complex_double work_query;
+    lapack_int iwork_query;
+    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
+        LAPACKE_xerbla( "LAPACKE_cgedmdq", -1 );
+        return -1;
+    }
+#ifndef LAPACK_DISABLE_NAN_CHECK
+    if( LAPACKE_get_nancheck() ) {
+        /* Optionally check input matrices for NaNs */
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, f, ldf ) ) {
+            return -11;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, x, ldx ) ) {
+            return -13;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, y, ldy ) ) {
+            return -15;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, z, ldz ) ) {
+            return -22;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, b, ldb ) ) {
+            return -25;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, v, ldv ) ) {
+            return -27;
+        }
+        if( LAPACKE_zge_nancheck( matrix_layout, m, n, s, lds ) ) {
+            return -29;
+        }
+    }
+#endif
+    /* Query optimal working array(s) size */
+    info = LAPACKE_zgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, &work_query, lwork,
+                                 &iwork_query, liwork );
+
+    if( info != 0 ) {
+        goto exit_level_0;
+    }
+    lwork  = LAPACK_Z2INT( work_query );
+    liwork = iwork_query;
+    /* Allocate memory for work arrays */
+    work  = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
+    if( work == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_0;
+    }
+    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
+    if( iwork == NULL ) {
+        info = LAPACK_WORK_MEMORY_ERROR;
+        goto exit_level_1;
+    }
+    /* Call middle-level interface */
+    info = LAPACKE_zgedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt,
+                                 jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy,
+                                 nrnk, tol, k, reig, imeig, z, ldz, res,
+                                 b, ldb, v, ldv, s, lds, work, lwork, iwork,
+                                 liwork );
+    /* Release memory and exit */
+    LAPACKE_free( iwork );
+exit_level_1:
+    LAPACKE_free( work );
+exit_level_0:
+    if( info == LAPACK_WORK_MEMORY_ERROR ) {
+        LAPACKE_xerbla( "LAPACKE_zgedmdq", info );
+    }
+    return info;
+}
diff --git a/LAPACKE/src/lapacke_zgedmdq_work.c b/LAPACKE/src/lapacke_zgedmdq_work.c
new file mode 100644
index 0000000000..9afceba07d
--- /dev/null
+++ b/LAPACKE/src/lapacke_zgedmdq_work.c
@@ -0,0 +1,205 @@
+/*****************************************************************************
+  Copyright (c) 2014, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************
+* Contents: Native middle-level C interface to LAPACK function zgedmdq
+* Author: Intel Corporation
+*****************************************************************************/
+
+#include "lapacke_utils.h"
+
+lapack_int LAPACKE_zgedmdq_work( int matrix_layout, char jobs, char jobz,
+                                 char jobr, char jobq, char jobt, char jobf,
+                                 lapack_int whtsvd, lapack_int m, lapack_int n,
+                                 lapack_complex_double* f, lapack_int ldf,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 lapack_complex_double* y, lapack_int ldy,
+                                 lapack_int nrnk, double tol, lapack_int k,
+                                 lapack_complex_double* reig,
+                                 lapack_complex_double* imeig,
+                                 lapack_complex_double* z,
+                                 lapack_int ldz, lapack_complex_double* res,
+                                 lapack_complex_double* b,
+                                 lapack_int ldb, lapack_complex_double* v,
+                                 lapack_int ldv, lapack_complex_double* s,
+                                 lapack_int lds, lapack_complex_double* work,
+                                 lapack_int lwork, lapack_int* iwork,
+                                 lapack_int liwork )
+{
+    lapack_int info = 0;
+    if( matrix_layout == LAPACK_COL_MAJOR ) {
+        /* Call LAPACK function and adjust info */
+        LAPACK_zgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
+        lapack_int ldf_t = MAX(1,m);
+        lapack_int ldx_t = MAX(1,m);
+        lapack_int ldy_t = MAX(1,m);
+        lapack_int ldz_t = MAX(1,m);
+        lapack_int ldb_t = MAX(1,m);
+        lapack_int ldv_t = MAX(1,m);
+        lapack_int lds_t = MAX(1,m);
+        lapack_complex_double* f_t = NULL;
+        lapack_complex_double* x_t = NULL;
+        lapack_complex_double* y_t = NULL;
+        lapack_complex_double* z_t = NULL;
+        lapack_complex_double* b_t = NULL;
+        lapack_complex_double* v_t = NULL;
+        lapack_complex_double* s_t = NULL;
+        /* Check leading dimension(s) */
+        if( ldf < n ) {
+            info = -12;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldx < n ) {
+            info = -14;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldy < n ) {
+            info = -16;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldz < n ) {
+            info = -23;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldb < n ) {
+            info = -26;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( ldv < n ) {
+            info = -28;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        if( lds < n ) {
+            info = -30;
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+            return info;
+        }
+        /* Query optimal working array(s) size if requested */
+        if( lwork == -1 || liwork == -1 ) {
+            LAPACK_zgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                            &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                            imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                            work, &lwork, iwork, &liwork, &info );
+            return (info < 0) ? (info - 1) : info;
+        }
+        /* Allocate memory for temporary array(s) */
+        f_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldf_t * MAX(1,n) );
+        if( f_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_0;
+        }
+        x_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldx_t * MAX(1,n) );
+        if( x_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_1;
+        }
+        y_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldy_t * MAX(1,n) );
+        if( y_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_2;
+        }
+        z_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldz_t * MAX(1,n) );
+        if( z_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_3;
+        }
+        b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) );
+        if( b_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_4;
+        }
+        v_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldv_t * MAX(1,n) );
+        if( v_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_5;
+        }
+        s_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lds_t * MAX(1,n) );
+        if( s_t == NULL ) {
+            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
+            goto exit_level_6;
+        }
+        /* Transpose input matrices */
+        LAPACKE_zge_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t );
+        LAPACKE_zge_trans( matrix_layout, m, n, s, lds, s_t, lds_t );
+        /* Call LAPACK function and adjust info */
+        LAPACK_zgedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m,
+                        &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig,
+                        imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds,
+                        work, &lwork, iwork, &liwork, &info );
+        if( info < 0 ) {
+            info = info - 1;
+        }
+        /* Transpose output matrices */
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv );
+        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds );
+        /* Release memory and exit */
+        LAPACKE_free( s_t );
+exit_level_6:
+        LAPACKE_free( v_t );
+exit_level_5:
+        LAPACKE_free( b_t );
+exit_level_4:
+        LAPACKE_free( z_t );
+exit_level_3:
+        LAPACKE_free( y_t );
+exit_level_2:
+        LAPACKE_free( x_t );
+exit_level_1:
+        LAPACKE_free( f_t );
+exit_level_0:
+        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
+            LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+        }
+    } else {
+        info = -1;
+        LAPACKE_xerbla( "LAPACKE_zgedmdq_work", info );
+    }
+    return info;
+}
diff --git a/SRC/CMakeLists.txt b/SRC/CMakeLists.txt
index 5d2e072584..8d3d2bcb8e 100644
--- a/SRC/CMakeLists.txt
+++ b/SRC/CMakeLists.txt
@@ -158,7 +158,7 @@ set(SLASRC
    ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
    ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
    ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
-   sgesvdq.f)
+   sgesvdq.f sgedmd.f90 sgedmdq.f90)
 
 set(DSLASRC
     sgetrf.f sgetrf2.f sgetrs.f sisnan.f slaisnan.f slaswp.f spotrf.f spotrf2.f
@@ -262,7 +262,7 @@ set(CLASRC
    chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
    cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
    chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
-   cgesvdq.f)
+   cgesvdq.f cgedmd.f90 cgedmdq.f90)
 
 set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
    cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@@ -360,7 +360,7 @@ set(DLASRC
    dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
    dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
    dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
-   dgesvdq.f)
+   dgesvdq.f dgedmd.f90 dgedmdq.f90)
 
 set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
    dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@@ -464,7 +464,7 @@ set(ZLASRC
    zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
    zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
    zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
-   zgesvdq.f)
+   zgesvdq.f zgedmd.f90 zgedmdq.f90)
 
 set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
    zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
diff --git a/SRC/Makefile b/SRC/Makefile
index 35b8c64aea..e89ffeeab1 100644
--- a/SRC/Makefile
+++ b/SRC/Makefile
@@ -191,7 +191,7 @@ SLASRC = \
    ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \
    ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \
    ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \
-   sgesvdq.o
+   sgesvdq.o sgedmd.o sgedmdq.o
 
 DSLASRC = spotrs.o sgetrs.o spotrf.o sgetrf.o
 
@@ -295,7 +295,7 @@ CLASRC = \
    chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \
    cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \
    chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \
-   cgesvdq.o
+   cgesvdq.o cgedmd.o cgedmdq.o
 
 ifdef USEXBLAS
 CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
@@ -394,7 +394,7 @@ DLASRC = \
    dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \
    dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \
    dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \
-   dgesvdq.o
+   dgesvdq.o dgedmd.o dgedmdq.o
 
 ifdef USEXBLAS
 DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
@@ -501,7 +501,7 @@ ZLASRC = \
    zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \
    zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \
    zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \
-   zgesvdq.o
+   zgesvdq.o zgedmd.o zgedmdq.o
 
 ifdef USEXBLAS
 ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \
diff --git a/SRC/cgedmd.f90 b/SRC/cgedmd.f90
new file mode 100644
index 0000000000..499489270d
--- /dev/null
+++ b/SRC/cgedmd.f90
@@ -0,0 +1,995 @@
+      SUBROUTINE CGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
+                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
+                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
+                         RWORK, LRWORK, IWORK, LIWORK, INFO )
+!   March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32
+!.....
+!     Scalar arguments
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LIWORK, LRWORK, LZWORK
+      INTEGER,       INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN)   ::    TOL
+!     Array arguments
+      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                         W(LDW,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
+      INTEGER,          INTENT(OUT)   :: IWORK(*)
+!............................................................
+!     Purpose
+!     =======
+!     CGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, CGEDMD computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular
+!     vectors of X. Optionally, CGEDMD returns the residuals
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!......................................................................
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations
+!     expressed in this material are those of the author and
+!     do not necessarily reflect the views of the DARPA SBIR
+!     Program Office
+!============================================================
+!     Distribution Statement A:
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================
+!......................................................................
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix.
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'N' :: No data scaling.
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product X(:,1:K)*W, where X
+!            contains a POD basis (leading left singular vectors
+!            of the data matrix X) and W contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of K, X, W, Z.
+!     'N' :: The eigenvectors are not computed.
+!.....
+!     JOBR (input) CHARACTER*1
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will be
+!            computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: CGESVD (the QR SVD algorithm)
+!     2 :: CGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M>= 0
+!     The state space dimension (the row dimension of X, Y).
+!.....
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshot pairs
+!     (the number of columns of X and Y).
+!.....
+!     X (input/output) COMPLEX(KIND=WP) M-by-N array
+!   > On entry, X contains the data snapshot matrix X. It is
+!     assumed that the column norms of X are in the range of
+!     the normalized floating point numbers.
+!   < On exit, the leading K columns of X contain a POD basis,
+!     i.e. the leading K left singular vectors of the input
+!     data matrix X, U(:,1:K). All N columns of X contain all
+!     left singular vectors of the input matrix X.
+!     See the descriptions of K, Z and W.
+!.....
+!     LDX (input) INTEGER, LDX >= M
+!     The leading dimension of the array X.
+!.....
+!     Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
+!   > On entry, Y contains the data snapshot matrix Y
+!   < On exit,
+!     If JOBR == 'R', the leading K columns of Y  contain
+!     the residual vectors for the computed Ritz pairs.
+!     See the description of RES.
+!     If JOBR == 'N', Y contains the original input data,
+!                     scaled according to the value of JOBS.
+!.....
+!     LDY (input) INTEGER , LDY >= M
+!     The leading dimension of the array Y.
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.
+!     The numerical rank can be enforced by using positive
+!     value of NRNK as follows:
+!     0 < NRNK <= N :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the descriptions of TOL and  K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.
+!.....
+!     K (output) INTEGER,  0 <= K <= N
+!     The dimension of the POD basis for the data snapshot
+!     matrix X and the number of the computed Ritz pairs.
+!     The value of K is determined according to the rule set
+!     by the parameters NRNK and TOL.
+!     See the descriptions of NRNK and TOL.
+!.....
+!     EIGS (output) COMPLEX(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of EIGS contain
+!     the computed eigenvalues (Ritz values).
+!     See the descriptions of K, and Z.
+!.....
+!     Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
+!     If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
+!     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!     the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
+!     is an eigenvector corresponding to EIGS(i). The columns
+!     of W(1:k,1:K) are the computed eigenvectors of the
+!     K-by-K Rayleigh quotient.
+!     See the descriptions of EIGS, X and W.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) N-by-1 array
+!     RES(1:K) contains the residuals for the K computed
+!     Ritz pairs,
+!     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!     See the description of EIGS and Z.
+!.....
+!     B (output) COMPLEX(KIND=WP)  M-by-N array.
+!     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further
+!     details in the provided references.
+!     If JOBF == 'E', B(1:M,1:K) contains
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.
+!     If JOBF =='N', then B is not referenced.
+!     See the descriptions of X, W, K.
+!.....
+!     LDB (input) INTEGER, LDB >= M
+!     The leading dimension of the array B.
+!.....
+!     W (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!     On exit, W(1:K,1:K) contains the K computed
+!     eigenvectors of the matrix Rayleigh quotient.
+!     The Ritz vectors (returned in Z) are the
+!     product of X (containing a POD basis for the input
+!     matrix X) and W. See the descriptions of K, S, X and Z.
+!     W is also used as a workspace to temporarily store the
+!     right singular vectors of X.
+!.....
+!     LDW (input) INTEGER, LDW >= N
+!     The leading dimension of the array W.
+!.....
+!     S (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by CGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N
+!     The leading dimension of the array S.
+!.....
+!     ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
+!     ZWORK is used as complex workspace in the complex SVD, as
+!     specified by WHTSVD (1,2, 3 or 4) and for CGEEV for computing
+!     the eigenvalues of a Rayleigh quotient.
+!     If the call to CGEDMD is only workspace query, then
+!     ZWORK(1) contains the minimal complex workspace length and
+!     ZWORK(2) is the optimal complex workspace length.
+!     Hence, the length of work is at least 2.
+!     See the description of LZWORK.
+!.....
+!     LZWORK (input) INTEGER
+!     The minimal length of the workspace vector ZWORK.
+!     LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_CGEEV),
+!     where LZWORK_CGEEV = MAX( 1, 2*N )  and the minimal
+!     LZWORK_SVD is calculated as follows
+!     If WHTSVD == 1 :: CGESVD ::
+!        LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+!     If WHTSVD == 2 :: CGESDD ::
+!        LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+!     If WHTSVD == 3 :: CGESVDQ ::
+!        LZWORK_SVD = obtainable by a query
+!     If WHTSVD == 4 :: CGEJSV ::
+!        LZWORK_SVD = obtainable by a query
+!     If on entry LZWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths and returns them in
+!     LZWORK(1) and LZWORK(2), respectively.
+!.....
+!     RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
+!     On exit, RWORK(1:N) contains the singular values of
+!     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!     If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
+!     scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
+!     and Y to avoid overflow in the SVD of X.
+!     This may be of interest if the scaling option is off
+!     and as many as possible smallest eigenvalues are
+!     desired to the highest feasible accuracy.
+!     If the call to CGEDMD is only workspace query, then
+!     RWORK(1) contains the minimal workspace length.
+!     See the description of LRWORK.
+!.....
+!     LRWORK (input) INTEGER
+!     The minimal length of the workspace vector RWORK.
+!     LRWORK is calculated as follows:
+!     LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_CGEEV), where
+!     LRWORK_CGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
+!     for the SVD subroutine determined by the input parameter
+!     WHTSVD.
+!     If WHTSVD == 1 :: CGESVD ::
+!        LRWORK_SVD = 5*MIN(M,N)
+!     If WHTSVD == 2 :: CGESDD ::
+!        LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
+!        2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+!     If WHTSVD == 3 :: CGESVDQ ::
+!        LRWORK_SVD = obtainable by a query
+!     If WHTSVD == 4 :: CGEJSV ::
+!        LRWORK_SVD = obtainable by a query
+!     If on entry LRWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     real workspace length and returns it in RWORK(1).
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for  ZWORK, RWORK and
+!     IWORK. See the descriptions of ZWORK, RWORK and IWORK.
+!.....
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,   &
+                       SSUM,  XSCL1,  XSCL2
+      INTEGER       ::  i,  j, IMINWR,  INFO1, INFO2,   &
+                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
+                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                       OLWORK, MLRWRK
+      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY, &
+                        WNTEX, WNTREF, WNTRES, WNTVEC
+      CHARACTER     ::  JOBZL, T_OR_N
+      CHARACTER     ::  JSVOPT
+!
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: RDUMMY(2)
+
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) CLANGE, SLAMCH, SCNRM2
+      EXTERNAL      CLANGE, SLAMCH, SCNRM2, ICAMAX
+      INTEGER                               ICAMAX
+      LOGICAL       SISNAN, LSAME
+      EXTERNAL      SISNAN, LSAME
+
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      CAXPY,  CGEMM,  CSSCAL
+      EXTERNAL      CGEEV,  CGEJSV, CGESDD, CGESVD, CGESVDQ, &
+                    CLACPY, CLASCL, CLASSQ, XERBLA
+
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     FLOAT, INT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
+                                  .OR. ( LRWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -17
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -20
+      ELSE IF ( LDW < N ) THEN
+          INFO = -22
+      ELSE IF ( LDS < N ) THEN
+          INFO = -24
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                RWORK(1) = 1
+                ZWORK(1) = 2
+                ZWORK(2) = 2
+            ELSE
+               K   =  0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+
+         IMINWR = 1
+         MLRWRK = MAX(1,N)
+         MLWORK = 2
+         OLWORK = 2
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of CGESVD:
+             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MLWORK = MAX(MLWORK,MWRSVD)
+             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
+             IF ( LQUERY ) THEN
+                CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
+                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
+                LWRSVD = INT( ZWORK(1) )
+                OLWORK = MAX(OLWORK,LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of CGESDD:
+             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
+             ! In LAPACK 3.10.1 RWORK is defined differently.
+             ! Below we take max over the two versions.
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+             MLWORK = MAX(MLWORK,MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             MLRWRK = MAX( MLRWRK,  N +                    &
+                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
+                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
+                           2*MAX(M,N)*MIN(M,N)+            &
+                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+             IF ( LQUERY ) THEN
+                CALL CGESDD( 'O', M, N, X, LDX, RWORK, B,     &
+                     LDB, W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
+                LWRSDD = MAX(MWRSDD,INT( ZWORK(1) ))
+                OLWORK = MAX(OLWORK,LWRSDD)
+             END IF
+         CASE (3)
+             CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
+                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVQ)
+             MLRWRK = MAX(MLRWRK,N + INT(RDUMMY(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVQ)
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
+                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVJ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVJ)
+             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
+             IF ( LQUERY ) THEN
+                LWRSVJ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the CGEEV call
+         MWRKEV = MAX( 1, 2*N )
+         MLWORK = MAX(MLWORK,MWRKEV)
+         MLRWRK = MAX(MLRWRK,N+2*N)
+         IF ( LQUERY ) THEN
+             CALL CGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
+              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 ) ! LAPACK CALL
+                LWRKEV = INT(ZWORK(1))
+                OLWORK = MAX( OLWORK, LWRKEV )
+                OLWORK = MAX( 2, OLWORK )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
+         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26
+
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'CGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          RWORK(1) = MLRWRK
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = SLAMCH('O')*SLAMCH('P')
+      SMALL = SLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using CLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = SCNRM2( M, X(1,i), 1 )
+            SCALE  = ZERO
+            CALL CLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('CGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), LDX, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               X(1,i), LDX, INFO2 )             ! LAPACK CALL
+!                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+          ! All columns of X are zero. Return error code -8.
+          ! (the 8th input variable had an illegal value)
+          K = 0
+          INFO = -8
+          CALL XERBLA('CGEDMD',-INFO)
+          RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL CSSCAL( M, ONE/RWORK(i), Y(1,i), 1 ) ! BLAS CALL
+!               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)      ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/FLOAT(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(Y(ICAMAX(M, Y(1,i),1),i ))  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL CSSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using CLASSQ.
+          DO i = 1, N
+            !RWORK(i) = SCNRM2( M, Y(1,i), 1 )
+            SCALE  = ZERO
+            CALL CLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('CGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), LDY, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 Y(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               Y(1,i), LDY, INFO2 )              ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL CSSCAL( M, ONE/RWORK(i), X(1,i), 1 )  ! BLAS CALL
+!               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/FLOAT(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(X(ICAMAX(M, X(1,i),1),i ))  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
+                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
+             T_OR_N = 'C'
+         CASE (2)
+            CALL CGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
+                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'C'
+         CASE (3)
+              CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, ZWORK,     &
+                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
+              CALL CLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'C'
+         CASE (4)
+              CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL CLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = RWORK(N+1)
+              XSCL2 = RWORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case CGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL CLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( RWORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('CGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( RWORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^H * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^H is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL CSSCAL( N, ONE/RWORK(i), W(1,i), 1 )   ! BLAS CALL
+           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that CGESVD, CGESVDQ and CGESDD return the
+          ! adjoint matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW )  ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              RWORK(N+i) = ONE/RWORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside CGEDMD).
+          CALL CGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
+                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL CGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
+                      LDZ, ZZERO, S, LDS )                       ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^H * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL CGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
+                   ZZERO, Z, LDZ )                                  ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
+        !
+        CALL CGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
+                    LDW, ZZERO, S, LDS )                        ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^H * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^H is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL CLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL CLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL CGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, &
+           LDW, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient.  See the description of Z.
+      ! Also, see the description of CGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! CGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+          IF ( WNTRES ) THEN
+              IF ( WNTREF ) THEN
+                ! Here, if the refinement is requested, we have
+                ! A*U(:,1:K) already computed and stored in Z.
+                ! For the residuals, need Y = A * U(:,1;K) * W.
+                CALL CGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
+                           LDW, ZZERO, Y, LDY )              ! BLAS CALL
+                ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
+                ! This frees Z; Y contains A * U(:,1:K) * W.
+              ELSE
+                ! Compute S = V_k * Sigma_k^(-1) * W, where
+                ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
+                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                           W, LDW, ZZERO, S, LDS)
+                ! Then, compute Z = Y * S =
+                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+                ! = A * U(:,1:K) * W(1:K,1:K)
+                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                           LDS, ZZERO, Z, LDZ)
+                ! Save a copy of Z into Y and free Z for holding
+                ! the Ritz vectors.
+                CALL CLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+                IF ( WNTEX ) CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
+              END IF
+          ELSE IF ( WNTEX ) THEN
+              ! Compute S = V_k * Sigma_k^(-1) * W, where
+                ! V_k * Sigma_k^(-1) is stored in Z
+                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                           W, LDW, ZZERO, S, LDS)
+                ! Then, compute Z = Y * S =
+                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+                ! = A * U(:,1:K) * W(1:K,1:K)
+                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                           LDS, ZZERO, B, LDB)
+                ! The above call replaces the following two calls
+                ! that were used in the developing-testing phase.
+                ! CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                !           LDS, ZZERO, Z, LDZ)
+                ! Save a copy of Z into Y and free Z for holding
+                ! the Ritz vectors.
+                ! CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+!
+          ! Compute the Ritz vectors
+          IF ( WNTVEC ) CALL CGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
+                       ZZERO, Z, LDZ )                          ! BLAS CALL
+          ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+          IF ( WNTRES ) THEN
+             DO i = 1, K
+                CALL CAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+                ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
+                RES(i) = SCNRM2( M, Y(1,i), 1)                        ! BLAS CALL
+             END DO
+          END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          RWORK(N+1) = XSCL1
+          RWORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE CGEDMD
+
diff --git a/SRC/cgedmdq.f90 b/SRC/cgedmdq.f90
new file mode 100644
index 0000000000..52c1669c7d
--- /dev/null
+++ b/SRC/cgedmdq.f90
@@ -0,0 +1,689 @@
+SUBROUTINE CGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  EIGS,         &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
+                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
+                    IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE 
+      INTEGER, PARAMETER :: WP = real32
+!.....      
+!     Scalar arguments       
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LZWORK,  LWORK, LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,   K      
+      REAL(KIND=WP), INTENT(IN)    ::   TOL     
+!     Array arguments      
+      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
+                                         Z(LDZ,*), B(LDB,*), &
+                                         V(LDV,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!.....      
+!     Purpose  
+!     =======
+!     CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices, using a QR factorization
+!     based compression of the data. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, CGEDMDQ computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular 
+!     vectors of X. Optionally, CGEDMDQ returns the residuals 
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].      
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.      
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations 
+!     expressed in this material are those of the author and 
+!     do not necessarily reflect the views of the DARPA SBIR 
+!     Program Office.      
+!============================================================
+!     Distribution Statement A: 
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022      
+!============================================================      
+!......................................................................      
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix. The data snapshots are the columns
+!     of F. The leading N-1 columns of F are denoted X and the
+!     trailing N-1 columns are denoted Y. 
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)    
+!     'N' :: No data scaling.   
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Z*V, where Z
+!            is orthonormal and V contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of F, V, Z.
+!     'Q' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Q*Z, where Z
+!            contains the eigenvectors of the compression of the
+!            underlying discretised operator onto the span of
+!            the data snapshots. See the descriptions of F, V, Z.   
+!            Q is from the inital QR facorization.    
+!     'N' :: The eigenvectors are not computed.  
+!.....      
+!     JOBR (input) CHARACTER*1 
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will
+!            be computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBQ (input) CHARACTER*1 
+!     Specifies whether to explicitly compute and return the
+!     unitary matrix from the QR factorization.
+!     'Q' :: The matrix Q of the QR factorization of the data
+!            snapshot matrix is computed and stored in the
+!            array F. See the description of F.       
+!     'N' :: The matrix Q is not explicitly computed.
+!.....
+!     JOBT (input) CHARACTER*1 
+!     Specifies whether to return the upper triangular factor
+!     from the QR factorization.
+!     'R' :: The matrix R of the QR factorization of the data 
+!            snapshot matrix F is returned in the array Y.
+!            See the description of Y and Further details.       
+!     'N' :: The matrix R is not returned. 
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are 
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.   
+!     To be useful on exit, this option needs JOBQ='Q'.    
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: CGESVD (the QR SVD algorithm)
+!     2 :: CGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger 
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M >= 0 
+!     The state space dimension (the number of rows of F).
+!.....      
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshots from a single trajectory,
+!     taken at equidistant discrete times. This is the 
+!     number of columns of F.
+!.....
+!     F (input/output) COMPLEX(KIND=WP) M-by-N array
+!     > On entry,
+!     the columns of F are the sequence of data snapshots 
+!     from a single trajectory, taken at equidistant discrete
+!     times. It is assumed that the column norms of F are 
+!     in the range of the normalized floating point numbers. 
+!     < On exit,
+!     If JOBQ == 'Q', the array F contains the orthogonal 
+!     matrix/factor of the QR factorization of the initial 
+!     data snapshots matrix F. See the description of JOBQ. 
+!     If JOBQ == 'N', the entries in F strictly below the main
+!     diagonal contain, column-wise, the information on the 
+!     Householder vectors, as returned by CGEQRF. The 
+!     remaining information to restore the orthogonal matrix
+!     of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). 
+!     See the description of ZWORK.
+!.....
+!     LDF (input) INTEGER, LDF >= M 
+!     The leading dimension of the array F.
+!.....
+!     X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
+!     X is used as workspace to hold representations of the
+!     leading N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, the leading K columns of X contain the leading
+!     K left singular vectors of the above described content
+!     of X. To lift them to the space of the left singular
+!     vectors U(:,1:K) of the input data, pre-multiply with the 
+!     Q factor from the initial QR factorization. 
+!     See the descriptions of F, K, V  and Z.
+!.....      
+!     LDX (input) INTEGER, LDX >= N  
+!     The leading dimension of the array X. 
+!.....
+!     Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
+!     Y is used as workspace to hold representations of the
+!     trailing N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, 
+!     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!     triangular factor from the QR factorization of the data
+!     snapshot matrix F.
+!.....      
+!     LDY (input) INTEGER , LDY >= N
+!     The leading dimension of the array Y.   
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.      
+!     The numerical rank can be enforced by using positive 
+!     value of NRNK as follows: 
+!     0 < NRNK <= N-1 :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the description of K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.  
+!.....
+!     K (output) INTEGER,  0 <= K <= N 
+!     The dimension of the SVD/POD basis for the leading N-1
+!     data snapshots (columns of F) and the number of the 
+!     computed Ritz pairs. The value of K is determined
+!     according to the rule set by the parameters NRNK and 
+!     TOL. See the descriptions of NRNK and TOL. 
+!.....
+!     EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<=N-1) entries of EIGS contain
+!     the computed eigenvalues (Ritz values).
+!     See the descriptions of K, and Z.
+!.....
+!     Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
+!     If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
+!     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!     Z*V, where Z contains orthonormal matrix (the product of
+!     Q from the initial QR factorization and the SVD/POD_basis
+!     returned by CGEDMD in X) and the second factor (the 
+!     eigenvectors of the Rayleigh quotient) is in the array V, 
+!     as returned by CGEDMD. That is,  X(:,1:K)*V(:,i)
+!     is an eigenvector corresponding to EIGS(i). The columns 
+!     of V(1:K,1:K) are the computed eigenvectors of the 
+!     K-by-K Rayleigh quotient.  
+!     See the descriptions of EIGS, X and V.      
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!     RES(1:K) contains the residuals for the K computed 
+!     Ritz pairs, 
+!     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!     See the description of EIGS and Z.      
+!.....
+!     B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further 
+!     details in the provided references. 
+!     If JOBF == 'E', B(1:N,1;K) contains 
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.   
+!     In both cases, the content of B can be lifted to the 
+!     original dimension of the input data by pre-multiplying
+!     with the Q factor from the initial QR factorization. 
+!     Here A denotes a compression of the underlying operator.      
+!     See the descriptions of F and X.
+!     If JOBF =='N', then B is not referenced.
+!.....
+!     LDB (input) INTEGER, LDB >= MIN(M,N)
+!     The leading dimension of the array B.
+!.....
+!     V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!     On exit, V(1:K,1:K) V contains the K eigenvectors of
+!     the Rayleigh quotient. The Ritz vectors
+!     (returned in Z) are the product of Q from the initial QR
+!     factorization (see the description of F) X (see the 
+!     description of X) and V.
+!.....
+!     LDV (input) INTEGER, LDV >= N-1
+!     The leading dimension of the array V.
+!.....      
+!     S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by CGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N-1        
+!     The leading dimension of the array S.
+!.....
+!     ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
+!     On exit, 
+!     ZWORK(1:MIN(M,N)) contains the scalar factors of the 
+!     elementary reflectors as returned by CGEQRF of the 
+!     M-by-N input matrix F.   
+!     If the call to CGEDMDQ is only workspace query, then
+!     ZWORK(1) contains the minimal complex workspace length and
+!     ZWORK(2) is the optimal complex workspace length. 
+!     Hence, the length of work is at least 2.
+!     See the description of LZWORK.      
+!.....      
+!     LZWORK (input) INTEGER
+!     The minimal length of the  workspace vector ZWORK.
+!     LZWORK is calculated as follows:
+!     Let MLWQR  = N (minimal workspace for CGEQRF[M,N])
+!         MLWDMD = minimal workspace for CGEDMD (see the
+!                  description of LWORK in CGEDMD)
+!         MLWMQR = N (minimal workspace for 
+!                    ZUNMQR['L','N',M,N,N])
+!         MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
+!         MINMN  = MIN(M,N)      
+!     Then
+!     LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
+!     is further updated as follows:
+!        if   JOBZ == 'V' or JOBZ == 'F' THEN 
+!             LZWORK = MAX( LZWORK, MINMN+MLWMQR )
+!        if   JOBQ == 'Q' THEN
+!             LZWORK = MAX( ZLWORK, MINMN+MLWGQR)      
+!
+!.....      
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit,
+!     WORK(1:N-1) contains the singular values of 
+!     the input submatrix F(1:M,1:N-1).
+!     If the call to CGEDMDQ is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the  workspace vector WORK.
+!     LWORK is the same as in CGEDMD, because in CGEDMDQ
+!     only CGEDMD requires real workspace for snapshots
+!     of dimensions MIN(M,N)-by-(N-1).
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.          
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     Let M1=MIN(M,N), N1=N-1. Then      
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!..... 
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.  
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~      
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!      
+!     Local scalars      
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
+                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  & 
+                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
+                           OLWORK, OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!      
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME 
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      CGEQRF, CLACPY, CLASET, CUNGQR, & 
+                    CUNMQR, XERBLA
+
+!     External subroutines
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      CGEDMD 
+      
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT         
+ !..........................................................  
+ !
+ !    Test the input arguments    
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R') 
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')     
+      MINMN  = MIN(M,N)
+      INFO = 0 
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!       
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
+                                  LSAME(JOBS,'N')) )  THEN 
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+           INFO = -4                 
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
+                LSAME(JOBF,'N') ) )                     THEN
+          INFO = -6    
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -21
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -24
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -26
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -28
+      END IF
+!      
+      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF     
+      IF ( INFO == 0 ) THEN  
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and 
+          ! determine minimal and optimal sizes of the 
+          ! workspace at any moment of the run.         
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN  
+               IWORK(1) = 1
+                WORK(1) = 2
+                WORK(2) = 2
+            ELSE                
+               K = 0
+            END IF             
+            INFO = 1  
+            RETURN
+         END IF     
+         
+         MLRWRK = 2
+         MLWORK = 2
+         OLWORK = 2 
+         IMINWR = 1
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for CGEQRF.
+         MLWORK = MAX(MLWORK,MINMN + MLWQR) 
+
+         IF ( LQUERY ) THEN 
+             CALL CGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
+                          INFO1 )
+             OLWQR  = INT(ZWORK(1))
+             OLWORK = MAX(OLWORK,MINMN + OLWQR)           
+         END IF
+         CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
+                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    & 
+                      S, LDS, ZWORK, LZWORK, WORK, -1, IWORK,&
+                      LIWORK, INFO1 )
+         MLWDMD = INT(ZWORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
+         IMINWR = MAX(IMINWR, IWORK(1))
+         IF ( LQUERY ) THEN 
+             OLWDMD = INT(ZWORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N) 
+            MLWORK = MAX(MLWORK, MINMN+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL CUNMQR( 'L','N', M, N, MINMN, F, LDF,  & 
+                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
+               OLWMQR = INT(ZWORK(1))
+               OLWORK = MAX(OLWORK, MINMN+OLWMQR)
+            END IF
+         END IF  
+         IF ( WANTQ ) THEN
+            MLWGQR = MAX(1,N)
+            MLWORK = MAX(MLWORK, MINMN+MLWGQR)
+            IF ( LQUERY ) THEN 
+                CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
+                             ZWORK, -1, INFO1 )               
+                OLWGQR = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK, MINMN+OLWGQR)
+            END IF            
+         END IF          
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
+         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
+      END IF  
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'CGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          WORK(1)  = MLRWRK
+          WORK(2)  = MLRWRK
+          RETURN
+      END IF   
+!.....	  
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >>N , at this place 
+!     one can use an out of core QRF.
+!   
+      CALL CGEQRF( M, N, F, LDF, ZWORK,                & 
+                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+!      
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL CLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
+      CALL CLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL CLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL CLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
+                       Y(3,1), LDY )  
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)   
+      CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
+                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
+                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
+                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN,    & 
+                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code. See CGEDMD for details.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF    
+!      
+!     The Ritz vectors (Koopman modes) can be explicitly 
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.  
+        IF ( M > MINMN ) CALL CLASET( 'A', M-MINMN, K, ZZERO, &
+                                     ZZERO, Z(MINMN+1,1), LDZ )
+        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
+             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      ELSE IF ( WNTVCF ) THEN   
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and 
+        !   the SVD/POD_basis returned by CGEDMD in X) and the 
+        !   second factor (the eigenvectors of the Rayleigh 
+        !   quotient) is in the array V, as returned by CGEDMD.
+        CALL CLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL CLASET( 'A', M-N, K, ZZERO, ZZERO, & 
+                                 Z(N+1,1), LDZ )
+        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
+                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      END IF
+!     
+!     Some optional output variables:
+!
+!     The upper triangular factor R in the initial QR 
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to CGEDMDQ is to be 
+      
+!     followed by a streaming DMD that is implemented in a 
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
+         CALL CLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
+         CALL CLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
+      END IF    
+!
+!     The orthonormal/unitary factor Q in the initial QR 
+!     factorization is optionally returned in the array F. 
+!     Same as with the triangular factor above, this is 
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN                   ! Q overwrites F 
+         CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
+                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )  
+      END IF
+!      
+      RETURN
+!      
+      END SUBROUTINE CGEDMDQ
+    
\ No newline at end of file
diff --git a/SRC/dgedmd.f90 b/SRC/dgedmd.f90
new file mode 100644
index 0000000000..20424808f9
--- /dev/null
+++ b/SRC/dgedmd.f90
@@ -0,0 +1,1054 @@
+      SUBROUTINE DGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,  &
+                         K, REIG,  IMEIG,   Z, LDZ,  RES,  &
+                         B, LDB, W,  LDW,   S, LDS,        &
+                         WORK, LWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+!.....
+!     Scalar arguments
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LWORK,  LIWORK
+      INTEGER,   INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN)  :: TOL
+!     Array arguments
+      REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                      W(LDW,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!............................................................
+!     Purpose
+!     =======
+!     DGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, DGEDMD computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular
+!     vectors of X. Optionally, DGEDMD returns the residuals
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!......................................................................
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations
+!     expressed in this material are those of the author and
+!     do not necessarily reflect the views of the DARPA SBIR
+!     Program Office
+!============================================================
+!     Distribution Statement A:
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================
+!............................................................
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix.
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'N' :: No data scaling.
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product X(:,1:K)*W, where X
+!            contains a POD basis (leading left singular vectors
+!            of the data matrix X) and W contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of K, X, W, Z.
+!     'N' :: The eigenvectors are not computed.
+!.....
+!     JOBR (input) CHARACTER*1
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will be
+!            computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: DGESVD (the QR SVD algorithm)
+!     2 :: DGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M>= 0
+!     The state space dimension (the row dimension of X, Y).
+!.....
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshot pairs
+!     (the number of columns of X and Y).
+!.....
+!     X (input/output) REAL(KIND=WP) M-by-N array
+!     > On entry, X contains the data snapshot matrix X. It is
+!     assumed that the column norms of X are in the range of
+!     the normalized floating point numbers.
+!     < On exit, the leading K columns of X contain a POD basis,
+!     i.e. the leading K left singular vectors of the input
+!     data matrix X, U(:,1:K). All N columns of X contain all
+!     left singular vectors of the input matrix X.
+!     See the descriptions of K, Z and W.
+!.....
+!     LDX (input) INTEGER, LDX >= M
+!     The leading dimension of the array X.
+!.....
+!     Y (input/workspace/output) REAL(KIND=WP) M-by-N array
+!     > On entry, Y contains the data snapshot matrix Y
+!     < On exit,
+!     If JOBR == 'R', the leading K columns of Y  contain
+!     the residual vectors for the computed Ritz pairs.
+!     See the description of RES.
+!     If JOBR == 'N', Y contains the original input data,
+!                     scaled according to the value of JOBS.
+!.....
+!     LDY (input) INTEGER , LDY >= M
+!     The leading dimension of the array Y.
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1).
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.
+!
+!     The numerical rank can be enforced by using positive
+!     value of NRNK as follows:
+!     0 < NRNK <= N :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the descriptions of TOL and  K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.
+!.....
+!     K (output) INTEGER,  0 <= K <= N
+!     The dimension of the POD basis for the data snapshot
+!     matrix X and the number of the computed Ritz pairs.
+!     The value of K is determined according to the rule set
+!     by the parameters NRNK and TOL.
+!     See the descriptions of NRNK and TOL.
+!.....
+!     REIG (output) REAL(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of REIG contain
+!     the real parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     See the descriptions of K, IMEIG, and Z.
+!.....
+!     IMEIG (output) REAL(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of IMEIG contain
+!     the imaginary parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     The eigenvalues are determined as follows:
+!     If IMEIG(i) == 0, then the corresponding eigenvalue is
+!     real, LAMBDA(i) = REIG(i).
+!     If IMEIG(i)>0, then the corresponding complex
+!     conjugate pair of eigenvalues reads
+!     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!     That is, complex conjugate pairs have consecutive
+!     indices (i,i+1), with the positive imaginary part
+!     listed first.
+!     See the descriptions of K, REIG, and Z.
+!.....
+!     Z (workspace/output) REAL(KIND=WP)  M-by-N array
+!     If JOBZ =='V' then
+!        Z contains real Ritz vectors as follows:
+!        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!        the i-th Ritz value; ||Z(:,i)||_2=1.
+!        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!        [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!        the Ritz values extracted from this subspace are
+!        REIG(i) + sqrt(-1)*IMEIG(i) and
+!        REIG(i) - sqrt(-1)*IMEIG(i).
+!        The corresponding eigenvectors are
+!        Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!        || Z(:,i:i+1)||_F = 1.
+!     If JOBZ == 'F', then the above descriptions hold for
+!     the columns of X(:,1:K)*W(1:K,1:K), where the columns
+!     of W(1:k,1:K) are the computed eigenvectors of the
+!     K-by-K Rayleigh quotient. The columns of W(1:K,1:K)
+!     are similarly structured: If IMEIG(i) == 0 then
+!     X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0
+!     then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and
+!          X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1)
+!     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!     See the descriptions of REIG, IMEIG, X and W.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) N-by-1 array
+!     RES(1:K) contains the residuals for the K computed
+!     Ritz pairs.
+!     If LAMBDA(i) is real, then
+!        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!     then
+!     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!     It holds that
+!     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!     See the description of REIG, IMEIG and Z.
+!.....
+!     B (output) REAL(KIND=WP)  M-by-N array.
+!     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further
+!     details in the provided references.
+!     If JOBF == 'E', B(1:M,1;K) contains
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.
+!     If JOBF =='N', then B is not referenced.
+!     See the descriptions of X, W, K.
+!.....
+!     LDB (input) INTEGER, LDB >= M
+!     The leading dimension of the array B.
+!.....
+!     W (workspace/output) REAL(KIND=WP) N-by-N array
+!     On exit, W(1:K,1:K) contains the K computed
+!     eigenvectors of the matrix Rayleigh quotient (real and
+!     imaginary parts for each complex conjugate pair of the
+!     eigenvalues). The Ritz vectors (returned in Z) are the
+!     product of X (containing a POD basis for the input
+!     matrix X) and W. See the descriptions of K, S, X and Z.
+!     W is also used as a workspace to temporarily store the
+!     right singular vectors of X.
+!.....
+!     LDW (input) INTEGER, LDW >= N
+!     The leading dimension of the array W.
+!.....
+!     S (workspace/output) REAL(KIND=WP) N-by-N array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by DGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N
+!     The leading dimension of the array S.
+!.....
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit, WORK(1:N) contains the singular values of
+!     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!     If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain
+!     scaling factor WORK(N+2)/WORK(N+1) used to scale X
+!     and Y to avoid overflow in the SVD of X.
+!     This may be of interest if the scaling option is off
+!     and as many as possible smallest eigenvalues are
+!     desired to the highest feasible accuracy.
+!     If the call to DGEDMD is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. Hence, the
+!     leng of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the workspace vector WORK.
+!     LWORK is calculated as follows:
+!     If WHTSVD == 1 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)).
+!        If JOBZ == 'N'  then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)).
+!        Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal
+!        workspace length of DGESVD.
+!     If WHTSVD == 2 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N))
+!        Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the
+!        minimal workspace length of DGESDD.
+!     If WHTSVD == 3 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!        Here LWORK_SVD = N+M+MAX(3*N+1,
+!                        MAX(1,3*N+M,5*N),MAX(1,N))
+!        is the minimal workspace length of DGESVDQ.
+!     If WHTSVD == 4 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!        Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the
+!        minimal workspace length of DGEJSV.
+!     The above expressions are not simplified in order to
+!     make the usage of WORK more transparent, and for
+!     easier checking. In any case, LWORK >= 2.
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!.....
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,    ROOTSC, SCALE,  SMALL,  &
+                       SSUM,   XSCL1,  XSCL2
+      INTEGER       :: i,   j, IMINWR,  INFO1, INFO2,  &
+                       LWRKEV, LWRSDD, LWRSVD,         &
+                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                       OLWORK
+      LOGICAL       :: BADXY,  LQUERY, SCCOLX, SCCOLY, &
+                       WNTEX,  WNTREF, WNTRES, WNTVEC
+      CHARACTER     :: JOBZL,  T_OR_N
+      CHARACTER     :: JSVOPT
+
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2)
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) DLANGE, DLAMCH, DNRM2
+      EXTERNAL      DLANGE, DLAMCH, DNRM2, IDAMAX
+      INTEGER       IDAMAX
+      LOGICAL       DISNAN, LSAME
+      EXTERNAL      DISNAN, LSAME
+
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      DAXPY,  DGEMM,  DSCAL
+      EXTERNAL      DGEEV,  DGEJSV, DGESDD, DGESVD, DGESVDQ, &
+                    DLACPY, DLASCL, DLASSQ, XERBLA
+
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     DBLE, INT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -18
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -21
+      ELSE IF ( LDW < N ) THEN
+          INFO = -23
+      ELSE IF ( LDS < N ) THEN
+          INFO = -25
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                WORK(1)  = 2
+                WORK(2)  = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+         MLWORK = MAX(2,N)
+         OLWORK = MAX(2,N)
+         IMINWR = 1
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of DGESVD:
+             ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MLWORK = MAX(MLWORK,N + MWRSVD)
+             IF ( LQUERY ) THEN
+                CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, &
+                           B, LDB, W, LDW, RDUMMY, -1, INFO1 )
+                LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of DGESDD:
+             ! MWRSDD = 3*MIN(M,N)*MIN(M,N) +
+             ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 3*MIN(M,N)*MIN(M,N) +                &
+              MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             MLWORK = MAX(MLWORK,N + MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             IF ( LQUERY ) THEN
+                CALL DGESDD( 'O', M, N, X, LDX, WORK, B,     &
+                     LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 )
+                LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSDD)
+             END IF
+         CASE (3)
+             !LWQP3 = 3*N+1
+             !LWORQ = MAX(N, 1)
+             !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ ) + MAX(M,2)
+             !MLWORK = N +  MWRSVQ
+             !IMINWR = M+N-1
+             CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                             X, LDX, WORK, Z, LDZ, W, LDW,   &
+                             NUMRNK, IWORK, LIWORK, RDUMMY,  &
+                             -1, RDUMMY2, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(RDUMMY(2))
+             MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = MAX( MWRSVQ, INT(RDUMMY(1)) )
+                OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1)))
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N ) ! for JSVOPT='V'
+             MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 )
+             MLWORK = MAX(MLWORK,N+MWRSVJ)
+             IMINWR = MAX( 3, M+3*N )
+             IF ( LQUERY ) THEN
+                OLWORK =  MAX(OLWORK,N+MWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the DGEEV call
+         IF ( LSAME(JOBZL,'V') ) THEN
+             MWRKEV = MAX( 1, 4*N )
+         ELSE
+             MWRKEV = MAX( 1, 3*N )
+         END IF
+         MLWORK = MAX(MLWORK,N+MWRKEV)
+         IF ( LQUERY ) THEN
+                CALL DGEEV( 'N', JOBZL, N, S, LDS, REIG, &
+                    IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 )
+                LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) )
+                OLWORK = MAX( OLWORK, N+LWRKEV )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'DGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = DLAMCH('O')
+      SMALL = DLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using DLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, X(1,i), 1 )
+            SCALE  = ZERO
+            CALL DLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('DGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               X(1,i), M, INFO2 )              ! LAPACK CALL
+!                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+          ! All columns of X are zero. Return error code -8.
+          ! (the 8th input variable had an illegal value)
+          K = 0
+          INFO = -8
+          CALL XERBLA('DGEDMD',-INFO)
+          RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL DSCAL( M, ONE/WORK(i), Y(1,i), 1 )  ! BLAS CALL
+!               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL DLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/DBLE(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( Y(IDAMAX(M, Y(1,i),1),i )  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL DSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using DLASSQ.
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, Y(1,i), 1 )
+            SCALE  = ZERO
+            CALL DLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('DGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               Y(1,i), M, INFO2 )              ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL DSCAL( M, ONE/WORK(i), X(1,i), 1 )  ! BLAS CALL
+!               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL DLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/DBLE(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( X(IDAMAX(M, X(1,i),1),i )  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, B, &
+                  LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL
+             T_OR_N = 'T'
+         CASE (2)
+            CALL DGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, &
+                 LDW, WORK(N+1), LWORK-N, IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'T'
+         CASE (3)
+              CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, WORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),&
+                   LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1)     ! LAPACK CALL
+              CALL DLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'T'
+         CASE (4)
+              CALL DGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, WORK, Z, LDZ, W, LDW, &
+                   WORK(N+1), LWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL DLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = WORK(N+1)
+              XSCL2 = WORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case DGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL DLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( WORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('DGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( WORK(i+1) <= WORK(i)*TOL  ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( WORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^T * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^T is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL DSCAL( N, ONE/WORK(i), W(1,i), 1 )    ! BLAS CALL
+           ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that DGESVD, DGESVDQ and DGESDD return the
+          ! transposed matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL DSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              WORK(N+i) = ONE/WORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = (WORK(N+i))*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside DGEDMD).
+          CALL DGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, &
+                      LDW, ZERO, Z, LDZ )                        ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL DGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, &
+                      LDZ, ZERO, S, LDS )                        ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^T * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL DGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, &
+                   ZERO, Z, LDZ )                                   ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
+        ! In the two DGEMM calls here, can use K for LDZ.
+        CALL DGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, &
+                    LDW, ZERO, S, LDS )                         ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^T * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^T is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL DLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL DLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL DGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, &
+                  LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 )   ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient. Even in the case of complex spectrum, all
+      ! computation is done in real arithmetic. REIG and
+      ! IMEIG are the real and the imaginary parts of the
+      ! eigenvalues, so that the spectrum is given as
+      ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs
+      ! are listed at consecutive positions. For such a
+      ! complex conjugate pair of the eigenvalues, the
+      ! corresponding eigenvectors are also a complex
+      ! conjugate pair with the real and imaginary parts
+      ! stored column-wise in W at the corresponding
+      ! consecutive column indices. See the description of Z.
+      ! Also, see the description of DGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! DGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+      IF ( WNTRES ) THEN
+          IF ( WNTREF ) THEN
+            ! Here, if the refinement is requested, we have
+            ! A*U(:,1:K) already computed and stored in Z.
+            ! For the residuals, need Y = A * U(:,1;K) * W.
+            CALL DGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, &
+                       LDW, ZERO, Y, LDY )               ! BLAS CALL
+            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
+            ! This frees Z; Y contains A * U(:,1:K) * W.
+          ELSE
+            ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS)
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, Z, LDZ)
+            ! Save a copy of Z into Y and free Z for holding
+            ! the Ritz vectors.
+            CALL DLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+            IF ( WNTEX ) CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+      ELSE IF ( WNTEX ) THEN
+          ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, B, LDB )
+            ! The above call replaces the following two calls
+            ! that were used in the developing-testing phase.
+            ! CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+            !           LDS, ZERO, Z, LDZ)
+            ! Save a copy of Z into B and free Z for holding
+            ! the Ritz vectors.
+            ! CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB )
+      END IF
+!
+      ! Compute the real form of the Ritz vectors
+      IF ( WNTVEC ) CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                   ZERO, Z, LDZ )                           ! BLAS CALL
+      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+      IF ( WNTRES ) THEN
+         i = 1
+         DO WHILE ( i <= K )
+            IF ( IMEIG(i) == ZERO ) THEN
+                ! have a real eigenvalue with real eigenvector
+                CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+                ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! INTRINSIC
+                RES(i) = DNRM2( M, Y(1,i), 1)                         ! BLAS CALL
+                i = i + 1
+            ELSE
+               ! Have a complex conjugate pair
+               ! REIG(i) +- sqrt(-1)*IMEIG(i).
+               ! Since all computation is done in real
+               ! arithmetic, the formula for the residual
+               ! is recast for real representation of the
+               ! complex conjugate eigenpair. See the
+               ! description of RES.
+               AB(1,1) =  REIG(i)
+               AB(2,1) = -IMEIG(i)
+               AB(1,2) =  IMEIG(i)
+               AB(2,2) =  REIG(i)
+               CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                           LDZ, AB, 2, ONE, Y(1,i), LDY )          ! BLAS CALL
+               ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+               RES(i)   = DLANGE( 'F', M, 2, Y(1,i), LDY, &
+                                  WORK(N+1) )                      ! LAPACK CALL
+               RES(i+1) = RES(i)
+               i = i + 2
+            END IF
+         END DO
+      END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          WORK(N+1) = XSCL1
+          WORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE DGEDMD
+
diff --git a/SRC/dgedmdq.f90 b/SRC/dgedmdq.f90
new file mode 100644
index 0000000000..bedfba4720
--- /dev/null
+++ b/SRC/dgedmdq.f90
@@ -0,0 +1,704 @@
+SUBROUTINE DGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
+                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64      
+!.....      
+!     Scalar arguments       
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LWORK,  LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,    K      
+      REAL(KIND=WP), INTENT(IN)    ::   TOL     
+!     Array arguments      
+      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
+                                      Z(LDZ,*), B(LDB,*),  &
+                                      V(LDV,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!.....      
+!     Purpose  
+!     =======
+!     DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices, using a QR factorization
+!     based compression of the data. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, DGEDMDQ computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular 
+!     vectors of X. Optionally, DGEDMDQ returns the residuals 
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].      
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.      
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations 
+!     expressed in this material are those of the author and 
+!     do not necessarily reflect the views of the DARPA SBIR 
+!     Program Office.      
+!============================================================
+!     Distribution Statement A: 
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022  
+!============================================================      
+!......................................................................      
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix. The data snapshots are the columns
+!     of F. The leading N-1 columns of F are denoted X and the
+!     trailing N-1 columns are denoted Y. 
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)    
+!     'N' :: No data scaling.   
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Z*V, where Z
+!            is orthonormal and V contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of F, V, Z.
+!     'Q' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Q*Z, where Z
+!            contains the eigenvectors of the compression of the
+!            underlying discretized operator onto the span of
+!            the data snapshots. See the descriptions of F, V, Z.  
+!            Q is from the initial QR factorization.      
+!     'N' :: The eigenvectors are not computed.  
+!.....      
+!     JOBR (input) CHARACTER*1 
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will
+!            be computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBQ (input) CHARACTER*1 
+!     Specifies whether to explicitly compute and return the
+!     orthogonal matrix from the QR factorization.
+!     'Q' :: The matrix Q of the QR factorization of the data
+!            snapshot matrix is computed and stored in the
+!            array F. See the description of F.       
+!     'N' :: The matrix Q is not explicitly computed.
+!.....
+!     JOBT (input) CHARACTER*1 
+!     Specifies whether to return the upper triangular factor
+!     from the QR factorization.
+!     'R' :: The matrix R of the QR factorization of the data 
+!            snapshot matrix F is returned in the array Y.
+!            See the description of Y and Further details.       
+!     'N' :: The matrix R is not returned.    
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are 
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!     To be useful on exit, this option needs JOBQ='Q'.
+!.....      
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: DGESVD (the QR SVD algorithm)
+!     2 :: DGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger 
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M >= 0 
+!     The state space dimension (the number of rows of F).
+!.....      
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshots from a single trajectory,
+!     taken at equidistant discrete times. This is the 
+!     number of columns of F.
+!.....
+!     F (input/output) REAL(KIND=WP) M-by-N array
+!     > On entry,
+!     the columns of F are the sequence of data snapshots 
+!     from a single trajectory, taken at equidistant discrete
+!     times. It is assumed that the column norms of F are 
+!     in the range of the normalized floating point numbers. 
+!     < On exit,
+!     If JOBQ == 'Q', the array F contains the orthogonal 
+!     matrix/factor of the QR factorization of the initial 
+!     data snapshots matrix F. See the description of JOBQ. 
+!     If JOBQ == 'N', the entries in F strictly below the main
+!     diagonal contain, column-wise, the information on the 
+!     Householder vectors, as returned by DGEQRF. The 
+!     remaining information to restore the orthogonal matrix
+!     of the initial QR factorization is stored in WORK(1:N). 
+!     See the description of WORK.
+!.....
+!     LDF (input) INTEGER, LDF >= M 
+!     The leading dimension of the array F.
+!.....
+!     X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!     X is used as workspace to hold representations of the
+!     leading N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, the leading K columns of X contain the leading
+!     K left singular vectors of the above described content
+!     of X. To lift them to the space of the left singular
+!     vectors U(:,1:K)of the input data, pre-multiply with the 
+!     Q factor from the initial QR factorization. 
+!     See the descriptions of F, K, V  and Z.
+!.....      
+!     LDX (input) INTEGER, LDX >= N  
+!     The leading dimension of the array X. 
+!.....
+!     Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!     Y is used as workspace to hold representations of the
+!     trailing N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, 
+!     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!     triangular factor from the QR factorization of the data
+!     snapshot matrix F.
+!.....      
+!     LDY (input) INTEGER , LDY >= N
+!     The leading dimension of the array Y.   
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.  
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.       
+!     The numerical rank can be enforced by using positive 
+!     value of NRNK as follows: 
+!     0 < NRNK <= N-1 :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the description of K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.  
+!.....
+!     K (output) INTEGER,  0 <= K <= N 
+!     The dimension of the SVD/POD basis for the leading N-1
+!     data snapshots (columns of F) and the number of the 
+!     computed Ritz pairs. The value of K is determined
+!     according to the rule set by the parameters NRNK and 
+!     TOL. See the descriptions of NRNK and TOL. 
+!.....
+!     REIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<=N) entries of REIG contain
+!     the real parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     See the descriptions of K, IMEIG, Z.
+!.....
+!     IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<N) entries of REIG contain
+!     the imaginary parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     The eigenvalues are determined as follows:
+!     If IMEIG(i) == 0, then the corresponding eigenvalue is
+!     real, LAMBDA(i) = REIG(i).
+!     If IMEIG(i)>0, then the corresponding complex
+!     conjugate pair of eigenvalues reads
+!     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!     That is, complex conjugate pairs have consequtive
+!     indices (i,i+1), with the positive imaginary part
+!     listed first.
+!     See the descriptions of K, REIG, Z.     
+!.....      
+!     Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
+!     If JOBZ =='V' then
+!        Z contains real Ritz vectors as follows:
+!        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!        the i-th Ritz value.
+!        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!        [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!        the Ritz values extracted from this subspace are
+!        REIG(i) + sqrt(-1)*IMEIG(i) and
+!        REIG(i) - sqrt(-1)*IMEIG(i).
+!        The corresponding eigenvectors are
+!        Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!     If JOBZ == 'F', then the above descriptions hold for
+!     the columns of Z*V, where the columns of V are the
+!     eigenvectors of the K-by-K Rayleigh quotient, and Z is
+!     orthonormal. The columns of V are similarly structured:
+!     If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if 
+!     IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
+!                       Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
+!     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!     See the descriptions of REIG, IMEIG, X and V.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!     RES(1:K) contains the residuals for the K computed 
+!     Ritz pairs.       
+!     If LAMBDA(i) is real, then
+!        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!     then
+!     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!     It holds that
+!     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!     See the description of Z.
+!.....
+!     B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further 
+!     details in the provided references. 
+!     If JOBF == 'E', B(1:N,1;K) contains 
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.   
+!     In both cases, the content of B can be lifted to the 
+!     original dimension of the input data by pre-multiplying
+!     with the Q factor from the initial QR factorization.
+!     Here A denotes a compression of the underlying operator.
+!     See the descriptions of F and X.
+!     If JOBF =='N', then B is not referenced.
+!.....
+!     LDB (input) INTEGER, LDB >= MIN(M,N)
+!     The leading dimension of the array B.
+!.....      
+!     V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!     On exit, V(1:K,1:K) contains the K eigenvectors of
+!     the Rayleigh quotient. The eigenvectors of a complex
+!     conjugate pair of eigenvalues are returned in real form
+!     as explained in the description of Z. The Ritz vectors
+!     (returned in Z) are the product of X and V; see
+!     the descriptions of X and Z.
+!.....
+!     LDV (input) INTEGER, LDV >= N-1
+!     The leading dimension of the array V.
+!.....      
+!     S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by DGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N-1        
+!     The leading dimension of the array S.
+!.....
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit, 
+!     WORK(1:MIN(M,N)) contains the scalar factors of the 
+!     elementary reflectors as returned by DGEQRF of the 
+!     M-by-N input matrix F.
+!     WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of 
+!     the input submatrix F(1:M,1:N-1).
+!     If the call to DGEDMDQ is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. Hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the  workspace vector WORK.
+!     LWORK is calculated as follows:
+!     Let MLWQR  = N (minimal workspace for DGEQRF[M,N])
+!         MLWDMD = minimal workspace for DGEDMD (see the
+!                  description of LWORK in DGEDMD) for 
+!                  snapshots of dimensions MIN(M,N)-by-(N-1)
+!         MLWMQR = N (minimal workspace for 
+!                    DORMQR['L','N',M,N,N])
+!         MLWGQR = N (minimal workspace for DORGQR[M,N,N])
+!     Then
+!     LWORK = MAX(N+MLWQR, N+MLWDMD)
+!     is updated as follows:
+!        if   JOBZ == 'V' or JOBZ == 'F' THEN 
+!             LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR )
+!        if   JOBQ == 'Q' THEN
+!             LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR)
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.          
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     Let M1=MIN(M,N), N1=N-1. Then    
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!..... 
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.  
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~      
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!      
+!     Local scalars      
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
+                           MLWMQR, MLWORK, MLWQR,  MINMN,  & 
+                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
+                           OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!      
+!     Local array      
+!     ~~~~~~~~~~~      
+      REAL(KIND=WP) :: RDUMMY(2)
+!      
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME 
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      DGEMM 
+      EXTERNAL      DGEQRF, DLACPY, DLASET, DORGQR, & 
+                    DORMQR, XERBLA
+
+!     External subroutines
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      DGEDMD 
+      
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT         
+ !..........................................................  
+ !
+ !    Test the input arguments    
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R') 
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')     
+      MINMN  = MIN(M,N)
+      INFO = 0 
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!       
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
+                                  LSAME(JOBS,'N')) )  THEN 
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+          INFO = -4                 
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -6    
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -22
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -25
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -27
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -29
+      END IF
+!      
+      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF     
+      IF ( INFO == 0 ) THEN  
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and 
+          ! determine minimal and optimal sizes of the 
+          ! workspace at any moment of the run.         
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN  
+               IWORK(1) = 1
+                WORK(1) = 2
+                WORK(2) = 2
+            ELSE                
+               K = 0
+            END IF             
+            INFO = 1  
+            RETURN
+         END IF     
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for DGEQRF.
+         MLWORK = MINMN + MLWQR 
+         IF ( LQUERY ) THEN 
+             CALL DGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
+                          INFO1 )
+             OLWQR  = INT(RDUMMY(1))
+             OLWORK = MIN(M,N) + OLWQR           
+         END IF
+         CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
+                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     & 
+                      V, LDV, S, LDS, WORK, -1, IWORK,       &
+                      LIWORK, INFO1 )
+         MLWDMD = INT(WORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         IMINWR = IWORK(1)
+         IF ( LQUERY ) THEN 
+             OLWDMD = INT(WORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N) 
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL DORMQR( 'L','N', M, N, MINMN, F, LDF,  & 
+                            WORK, Z, LDZ, WORK, -1, INFO1 )
+               OLWMQR = INT(WORK(1))
+               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
+            END IF
+         END IF  
+         IF ( WANTQ ) THEN
+            MLWGQR = N
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
+            IF ( LQUERY ) THEN 
+                CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+                             WORK, -1, INFO1 )        
+                OLWGQR = INT(WORK(1))
+                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
+            END IF            
+         END IF   
+         IMINWR = MAX( 1, IMINWR )
+         MLWORK = MAX( 2, MLWORK )      
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
+      END IF  
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'DGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF   
+!.....	  
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >>N , at this place 
+!     one can use an out of core QRF.
+!   
+      CALL DGEQRF( M, N, F, LDF, WORK,               & 
+                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
+!      
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL DLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
+      CALL DLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL DLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL DLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
+                       Y(3,1), LDY )  
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)   
+      CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
+                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
+                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
+                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, & 
+                   IWORK, LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code. See DGEDMD for details.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF    
+!      
+!     The Ritz vectors (Koopman modes) can be explicitly 
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.  
+        IF ( M > MINMN ) CALL DLASET( 'A', M-MINMN, K, ZERO, &
+                                     ZERO, Z(MINMN+1,1), LDZ )
+        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      ELSE IF ( WNTVCF ) THEN   
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and 
+        !   the SVD/POD_basis returned by DGEDMD in X) and the 
+        !   second factor (the eigenvectors of the Rayleigh 
+        !   quotient) is in the array V, as returned by DGEDMD.
+        CALL DLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL DLASET( 'A', M-N, K, ZERO, ZERO,   & 
+                                  Z(N+1,1), LDZ )
+        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      END IF
+!     
+!     Some optional output variables:
+!
+!     The upper triangular factor R in the initial QR 
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to DGEDMDQ is to be 
+!     followed by a streaming DMD that is implemented in a 
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
+         CALL DLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
+         CALL DLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
+      END IF    
+!
+!     The orthonormal/orthogonal factor Q in the initial QR 
+!     factorization is optionally returned in the array F. 
+!     Same as with the triangular factor above, this is 
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN               ! Q overwrites F 
+         CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )  
+      END IF
+!      
+      RETURN
+!      
+      END SUBROUTINE DGEDMDQ
+    
\ No newline at end of file
diff --git a/SRC/sgedmd.f90 b/SRC/sgedmd.f90
new file mode 100644
index 0000000000..49cb11527c
--- /dev/null
+++ b/SRC/sgedmd.f90
@@ -0,0 +1,1054 @@
+      SUBROUTINE SGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,  &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,  &
+                         K, REIG,  IMEIG,   Z, LDZ,  RES,  &
+                         B, LDB, W,  LDW,   S, LDS,        &
+                         WORK, LWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32
+!.....
+!     Scalar arguments
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LWORK,  LIWORK
+      INTEGER,   INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN) ::  TOL
+!     Array arguments
+      REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      REAL(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                      W(LDW,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*), &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!............................................................
+!     Purpose
+!     =======
+!     SGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, SGEDMD computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular
+!     vectors of X. Optionally, SGEDMD returns the residuals
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!......................................................................
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations
+!     expressed in this material are those of the author and
+!     do not necessarily reflect the views of the DARPA SBIR
+!     Program Office
+!============================================================
+!     Distribution Statement A:
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================
+!......................................................................
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix.
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'N' :: No data scaling.
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product X(:,1:K)*W, where X
+!            contains a POD basis (leading left singular vectors
+!            of the data matrix X) and W contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of K, X, W, Z.
+!     'N' :: The eigenvectors are not computed.
+!.....
+!     JOBR (input) CHARACTER*1
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will be
+!            computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: SGESVD (the QR SVD algorithm)
+!     2 :: SGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M>= 0
+!     The state space dimension (the row dimension of X, Y).
+!.....
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshot pairs
+!     (the number of columns of X and Y).
+!.....
+!     X (input/output) REAL(KIND=WP) M-by-N array
+!     > On entry, X contains the data snapshot matrix X. It is
+!     assumed that the column norms of X are in the range of
+!     the normalized floating point numbers.
+!     < On exit, the leading K columns of X contain a POD basis,
+!     i.e. the leading K left singular vectors of the input
+!     data matrix X, U(:,1:K). All N columns of X contain all
+!     left singular vectors of the input matrix X.
+!     See the descriptions of K, Z and W.
+!.....
+!     LDX (input) INTEGER, LDX >= M
+!     The leading dimension of the array X.
+!.....
+!     Y (input/workspace/output) REAL(KIND=WP) M-by-N array
+!     > On entry, Y contains the data snapshot matrix Y
+!     < On exit,
+!     If JOBR == 'R', the leading K columns of Y  contain
+!     the residual vectors for the computed Ritz pairs.
+!     See the description of RES.
+!     If JOBR == 'N', Y contains the original input data,
+!                     scaled according to the value of JOBS.
+!.....
+!     LDY (input) INTEGER , LDY >= M
+!     The leading dimension of the array Y.
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.
+!     The numerical rank can be enforced by using positive
+!     value of NRNK as follows:
+!     0 < NRNK <= N :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the descriptions of TOL and  K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.
+!.....
+!     K (output) INTEGER,  0 <= K <= N
+!     The dimension of the POD basis for the data snapshot
+!     matrix X and the number of the computed Ritz pairs.
+!     The value of K is determined according to the rule set
+!     by the parameters NRNK and TOL.
+!     See the descriptions of NRNK and TOL.
+!.....
+!     REIG (output) REAL(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of REIG contain
+!     the real parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     See the descriptions of K, IMEIG, and Z.
+!.....
+!     IMEIG (output) REAL(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of IMEIG contain
+!     the imaginary parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     The eigenvalues are determined as follows:
+!     If IMEIG(i) == 0, then the corresponding eigenvalue is
+!     real, LAMBDA(i) = REIG(i).
+!     If IMEIG(i)>0, then the corresponding complex
+!     conjugate pair of eigenvalues reads
+!     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!     That is, complex conjugate pairs have consecutive
+!     indices (i,i+1), with the positive imaginary part
+!     listed first.
+!     See the descriptions of K, REIG, and Z.
+!.....
+!     Z (workspace/output) REAL(KIND=WP)  M-by-N array
+!     If JOBZ =='V' then
+!        Z contains real Ritz vectors as follows:
+!        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!        the i-th Ritz value; ||Z(:,i)||_2=1.
+!        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!        [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!        the Ritz values extracted from this subspace are
+!        REIG(i) + sqrt(-1)*IMEIG(i) and
+!        REIG(i) - sqrt(-1)*IMEIG(i).
+!        The corresponding eigenvectors are
+!        Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!        || Z(:,i:i+1)||_F = 1.
+!     If JOBZ == 'F', then the above descriptions hold for
+!     the columns of X(:,1:K)*W(1:K,1:K), where the columns
+!     of W(1:k,1:K) are the computed eigenvectors of the
+!     K-by-K Rayleigh quotient. The columns of W(1:K,1:K)
+!     are similarly structured: If IMEIG(i) == 0 then
+!     X(:,1:K)*W(:,i) is an eigenvector, and if IMEIG(i)>0
+!     then X(:,1:K)*W(:,i)+sqrt(-1)*X(:,1:K)*W(:,i+1) and
+!          X(:,1:K)*W(:,i)-sqrt(-1)*X(:,1:K)*W(:,i+1)
+!     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!     See the descriptions of REIG, IMEIG, X and W.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) N-by-1 array
+!     RES(1:K) contains the residuals for the K computed
+!     Ritz pairs.
+!     If LAMBDA(i) is real, then
+!        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!     then
+!     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!     It holds that
+!     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!     See the description of REIG, IMEIG and Z.
+!.....
+!     B (output) REAL(KIND=WP)  M-by-N array.
+!     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further
+!     details in the provided references.
+!     If JOBF == 'E', B(1:M,1;K) contains
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.
+!     If JOBF =='N', then B is not referenced.
+!     See the descriptions of X, W, K.
+!.....
+!     LDB (input) INTEGER, LDB >= M
+!     The leading dimension of the array B.
+!.....
+!     W (workspace/output) REAL(KIND=WP) N-by-N array
+!     On exit, W(1:K,1:K) contains the K computed
+!     eigenvectors of the matrix Rayleigh quotient (real and
+!     imaginary parts for each complex conjugate pair of the
+!     eigenvalues). The Ritz vectors (returned in Z) are the
+!     product of X (containing a POD basis for the input
+!     matrix X) and W. See the descriptions of K, S, X and Z.
+!     W is also used as a workspace to temporarily store the
+!     left singular vectors of X.
+!.....
+!     LDW (input) INTEGER, LDW >= N
+!     The leading dimension of the array W.
+!.....
+!     S (workspace/output) REAL(KIND=WP) N-by-N array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by SGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N
+!     The leading dimension of the array S.
+!.....
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit, WORK(1:N) contains the singular values of
+!     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!     If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain
+!     scaling factor WORK(N+2)/WORK(N+1) used to scale X
+!     and Y to avoid overflow in the SVD of X.
+!     This may be of interest if the scaling option is off
+!     and as many as possible smallest eigenvalues are
+!     desired to the highest feasible accuracy.
+!     If the call to SGEDMD is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. Hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the workspace vector WORK.
+!     LWORK is calculated as follows:
+!     If WHTSVD == 1 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N)).
+!        If JOBZ == 'N'  then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N)).
+!        Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal
+!        workspace length of SGESVD.
+!     If WHTSVD == 2 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N + LWORK_SVD, N+MAX(1,3*N))
+!        Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the
+!        minimal workspace length of SGESDD.
+!     If WHTSVD == 3 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!        Here LWORK_SVD = N+M+MAX(3*N+1,
+!                        MAX(1,3*N+M,5*N),MAX(1,N))
+!        is the minimal workspace length of SGESVDQ.
+!     If WHTSVD == 4 ::
+!        If JOBZ == 'V', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,4*N))
+!        If JOBZ == 'N', then
+!        LWORK >= MAX(2, N+LWORK_SVD,N+MAX(1,3*N))
+!        Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the
+!        minimal workspace length of SGEJSV.
+!     The above expressions are not simplified in order to
+!     make the usage of WORK more transparent, and for
+!     easier checking. In any case, LWORK >= 2.
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!.....
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,   &
+                       SSUM,  XSCL1,  XSCL2
+      INTEGER       ::  i,  j, IMINWR,  INFO1, INFO2,  &
+                       LWRKEV, LWRSDD, LWRSVD, &
+                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                       OLWORK
+      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY, &
+                        WNTEX, WNTREF, WNTRES, WNTVEC
+      CHARACTER     ::  JOBZL, T_OR_N
+      CHARACTER     ::  JSVOPT
+
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: AB(2,2), RDUMMY(2), RDUMMY2(2)
+
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) SLANGE, SLAMCH, SNRM2
+      EXTERNAL      SLANGE, SLAMCH, SNRM2, ISAMAX
+      INTEGER       ISAMAX
+      LOGICAL       SISNAN, LSAME
+      EXTERNAL      SISNAN, LSAME
+
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      SAXPY,  SGEMM,  SSCAL
+      EXTERNAL      SGEEV,  SGEJSV, SGESDD, SGESVD, SGESVDQ, &
+                    SLACPY, SLASCL, SLASSQ, XERBLA
+
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     INT, FLOAT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -18
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -21
+      ELSE IF ( LDW < N ) THEN
+          INFO = -23
+      ELSE IF ( LDS < N ) THEN
+          INFO = -25
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                WORK(1)  = 2
+                WORK(2)  = 2
+            ELSE
+               K = 0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+         MLWORK = MAX(2,N)
+         OLWORK = MAX(2,N)
+         IMINWR = 1
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of SGESVD:
+             ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             MLWORK = MAX(MLWORK,N + MWRSVD)
+             IF ( LQUERY ) THEN
+                CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, &
+                           B, LDB, W, LDW, RDUMMY, -1, INFO1 )
+                LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of SGESDD:
+             ! MWRSDD = 3*MIN(M,N)*MIN(M,N) +
+             ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 3*MIN(M,N)*MIN(M,N) +                &
+              MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) )
+             MLWORK = MAX(MLWORK,N + MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             IF ( LQUERY ) THEN
+                CALL SGESDD( 'O', M, N, X, LDX, WORK, B,     &
+                     LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 )
+                LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) )
+                OLWORK = MAX(OLWORK,N + LWRSDD)
+             END IF
+         CASE (3)
+             !LWQP3 = 3*N+1
+             !LWORQ = MAX(N, 1)
+             !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
+             !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ )+ MAX(M,2)
+             !MLWORK = N + MWRSVQ
+             !IMINWR = M+N-1
+             CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                          X, LDX, WORK, Z, LDZ, W, LDW,   &
+                             NUMRNK, IWORK, -1, RDUMMY,   &
+                             -1, RDUMMY2, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(RDUMMY(2))
+             MLWORK = MAX(MLWORK,N+MWRSVQ+INT(RDUMMY2(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = INT(RDUMMY(1))
+                OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1)))
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N )! for JSVOPT='V'
+             MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 )
+             MLWORK = MAX(MLWORK,N+MWRSVJ)
+             IMINWR = MAX( 3, M+3*N )
+             IF ( LQUERY ) THEN
+                OLWORK = MAX(OLWORK,N+MWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the SGEEV call
+         IF ( LSAME(JOBZL,'V') ) THEN
+             MWRKEV = MAX( 1, 4*N )
+         ELSE
+             MWRKEV = MAX( 1, 3*N )
+         END IF
+         MLWORK = MAX(MLWORK,N+MWRKEV)
+         IF ( LQUERY ) THEN
+                CALL SGEEV( 'N', JOBZL, N, S, LDS, REIG, &
+                    IMEIG, W, LDW, W, LDW, RDUMMY, -1, INFO1 )
+                LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) )
+                OLWORK = MAX( OLWORK, N+LWRKEV )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -29
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -27
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'SGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = SLAMCH('O')
+      SMALL = SLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using SLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, X(1,i), 1 )
+            SCALE  = ZERO
+            CALL SLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('SGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               X(1,i), M, INFO2 )              ! LAPACK CALL
+!                 X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+          ! All columns of X are zero. Return error code -8.
+          ! (the 8th input variable had an illegal value)
+          K = 0
+          INFO = -8
+          CALL XERBLA('SGEDMD',-INFO)
+          RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL SSCAL( M, ONE/WORK(i), Y(1,i), 1 )  ! BLAS CALL
+!               Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL SLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/FLOAT(M), M, 1, Y(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( Y(ISAMAX(M, Y(1,i),1),i )  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL SSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using SLASSQ.
+          DO i = 1, N
+            !WORK(i) = DNRM2( M, Y(1,i), 1 )
+            SCALE  = ZERO
+            CALL SLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('SGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), M, INFO2 )
+                  WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  WORK(i) =   SCALE * ROOTSC
+                  CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, &
+                               Y(1,i), M, INFO2 )              ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               WORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( WORK(i) >  ZERO ) THEN
+                CALL SSCAL( M, ONE/WORK(i), X(1,i), 1 )  ! BLAS CALL
+!               X(1:M,i) = (ONE/WORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( WORK(i) < ZERO ) THEN
+                CALL SLASCL( 'G', 0, 0, -WORK(i),          &
+                     ONE/FLOAT(M), M, 1, X(1,i), M, INFO2 ) ! LAPACK CALL
+            ELSE IF ( X(ISAMAX(M, X(1,i),1),i )  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, B, &
+                  LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL
+             T_OR_N = 'T'
+         CASE (2)
+            CALL SGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, &
+                 LDW, WORK(N+1), LWORK-N, IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'T'
+         CASE (3)
+              CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, WORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1),&
+                   LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1)     ! LAPACK CALL
+              CALL SLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'T'
+         CASE (4)
+              CALL SGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
+                   N, X, LDX, WORK, Z, LDZ, W, LDW, &
+                   WORK(N+1), LWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL SLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = WORK(N+1)
+              XSCL2 = WORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case SGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL SLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( WORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('SGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( WORK(i+1) <= WORK(i)*TOL  ) .OR. &
+                      ( WORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( WORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^T * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^T is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL SSCAL( N, ONE/WORK(i), W(1,i), 1 )    ! BLAS CALL
+           ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that SGESVD, SGESVDQ and SGESDD return the
+          ! transposed matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL SSCAL( N, ONE/WORK(i), W(i,1), LDW )    ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              WORK(N+i) = ONE/WORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = (WORK(N+i))*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside SGEDMD).
+          CALL SGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, &
+                      LDW, ZERO, Z, LDZ )                        ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL SGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, &
+                      LDZ, ZERO, S, LDS )                        ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^T * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL SGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, &
+                   ZERO, Z, LDZ )                                   ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
+        ! In the two SGEMM calls here, can use K for LDZ
+        CALL SGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, &
+                    LDW, ZERO, S, LDS )                         ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^T * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^T is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL SLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL SLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL SGEEV( 'N', JOBZL, K, S, LDS, REIG, IMEIG, W, &
+                  LDW, W, LDW, WORK(N+1), LWORK-N, INFO1 )   ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient. Even in the case of complex spectrum, all
+      ! computation is done in real arithmetic. REIG and
+      ! IMEIG are the real and the imaginary parts of the
+      ! eigenvalues, so that the spectrum is given as
+      ! REIG(:) + sqrt(-1)*IMEIG(:). Complex conjugate pairs
+      ! are listed at consecutive positions. For such a
+      ! complex conjugate pair of the eigenvalues, the
+      ! corresponding eigenvectors are also a complex
+      ! conjugate pair with the real and imaginary parts
+      ! stored column-wise in W at the corresponding
+      ! consecutive column indices. See the description of Z.
+      ! Also, see the description of SGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! SGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+      IF ( WNTRES ) THEN
+          IF ( WNTREF ) THEN
+            ! Here, if the refinement is requested, we have
+            ! A*U(:,1:K) already computed and stored in Z.
+            ! For the residuals, need Y = A * U(:,1;K) * W.
+            CALL SGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, &
+                       LDW, ZERO, Y, LDY )               ! BLAS CALL
+            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
+            ! This frees Z; Y contains A * U(:,1:K) * W.
+          ELSE
+            ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, Z, LDZ )
+            ! Save a copy of Z into Y and free Z for holding
+            ! the Ritz vectors.
+            CALL SLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+            IF ( WNTEX ) CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+      ELSE IF ( WNTEX ) THEN
+          ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, &
+                       W, LDW, ZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+                       LDS, ZERO, B, LDB )
+            ! The above call replaces the following two calls
+            ! that were used in the developing-testing phase.
+            ! CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, &
+            !           LDS, ZERO, Z, LDZ)
+            ! Save a copy of Z into B and free Z for holding
+            ! the Ritz vectors.
+            ! CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB )
+      END IF
+!
+      ! Compute the real form of the Ritz vectors
+      IF ( WNTVEC ) CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                   ZERO, Z, LDZ )                           ! BLAS CALL
+      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+      IF ( WNTRES ) THEN
+         i = 1
+         DO WHILE ( i <= K )
+            IF ( IMEIG(i) == ZERO ) THEN
+                ! have a real eigenvalue with real eigenvector
+                CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+                ! Y(1:M,i) = Y(1:M,i) - REIG(i) * Z(1:M,i)            ! INTRINSIC
+                RES(i) = SNRM2( M, Y(1,i), 1 )                         ! BLAS CALL
+                i = i + 1
+            ELSE
+               ! Have a complex conjugate pair
+               ! REIG(i) +- sqrt(-1)*IMEIG(i).
+               ! Since all computation is done in real
+               ! arithmetic, the formula for the residual
+               ! is recast for real representation of the
+               ! complex conjugate eigenpair. See the
+               ! description of RES.
+               AB(1,1) =  REIG(i)
+               AB(2,1) = -IMEIG(i)
+               AB(1,2) =  IMEIG(i)
+               AB(2,2) =  REIG(i)
+               CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                           LDZ, AB, 2, ONE, Y(1,i), LDY )          ! BLAS CALL
+               ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+               RES(i)   = SLANGE( 'F', M, 2, Y(1,i), LDY, &
+                                  WORK(N+1) )                      ! LAPACK CALL
+               RES(i+1) = RES(i)
+               i = i + 2
+            END IF
+         END DO
+      END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          WORK(N+1) = XSCL1
+          WORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE SGEDMD
+
diff --git a/SRC/sgedmdq.f90 b/SRC/sgedmdq.f90
new file mode 100644
index 0000000000..acd5d56c61
--- /dev/null
+++ b/SRC/sgedmdq.f90
@@ -0,0 +1,703 @@
+SUBROUTINE SGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
+                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env 
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real32     
+!.....      
+!     Scalar arguments       
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LWORK,  LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,   K      
+      REAL(KIND=WP), INTENT(IN)    ::   TOL     
+!     Array arguments      
+      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
+                                      Z(LDZ,*), B(LDB,*),  &
+                                      V(LDV,*), S(LDS,*)
+      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
+                                      RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!.....      
+!     Purpose  
+!     =======
+!     SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices, using a QR factorization
+!     based compression of the data. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, SGEDMDQ computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular 
+!     vectors of X. Optionally, SGEDMDQ returns the residuals 
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].      
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.      
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations 
+!     expressed in this material are those of the author and 
+!     do not necessarily reflect the views of the DARPA SBIR 
+!     Program Office.      
+!============================================================
+!     Distribution Statement A: 
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================      
+!......................................................................      
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix. The data snapshots are the columns
+!     of F. The leading N-1 columns of F are denoted X and the
+!     trailing N-1 columns are denoted Y. 
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)    
+!     'N' :: No data scaling.   
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Z*V, where Z
+!            is orthonormal and V contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of F, V, Z.
+!     'Q' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Q*Z, where Z
+!            contains the eigenvectors of the compression of the
+!            underlying discretized operator onto the span of
+!            the data snapshots. See the descriptions of F, V, Z. 
+!            Q is from the initial QR factorization.  
+!     'N' :: The eigenvectors are not computed.  
+!.....      
+!     JOBR (input) CHARACTER*1 
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will
+!            be computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBQ (input) CHARACTER*1 
+!     Specifies whether to explicitly compute and return the
+!     orthogonal matrix from the QR factorization.
+!     'Q' :: The matrix Q of the QR factorization of the data
+!            snapshot matrix is computed and stored in the
+!            array F. See the description of F.       
+!     'N' :: The matrix Q is not explicitly computed.
+!.....
+!     JOBT (input) CHARACTER*1 
+!     Specifies whether to return the upper triangular factor
+!     from the QR factorization.
+!     'R' :: The matrix R of the QR factorization of the data 
+!            snapshot matrix F is returned in the array Y.
+!            See the description of Y and Further details.       
+!     'N' :: The matrix R is not returned.    
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are 
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.   
+!     To be useful on exit, this option needs JOBQ='Q'.      
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: SGESVD (the QR SVD algorithm)
+!     2 :: SGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger 
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M >= 0 
+!     The state space dimension (the number of rows of F)
+!.....      
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshots from a single trajectory,
+!     taken at equidistant discrete times. This is the 
+!     number of columns of F.
+!.....
+!     F (input/output) REAL(KIND=WP) M-by-N array
+!     > On entry,
+!     the columns of F are the sequence of data snapshots 
+!     from a single trajectory, taken at equidistant discrete
+!     times. It is assumed that the column norms of F are 
+!     in the range of the normalized floating point numbers. 
+!     < On exit,
+!     If JOBQ == 'Q', the array F contains the orthogonal 
+!     matrix/factor of the QR factorization of the initial 
+!     data snapshots matrix F. See the description of JOBQ. 
+!     If JOBQ == 'N', the entries in F strictly below the main
+!     diagonal contain, column-wise, the information on the 
+!     Householder vectors, as returned by SGEQRF. The 
+!     remaining information to restore the orthogonal matrix
+!     of the initial QR factorization is stored in WORK(1:N). 
+!     See the description of WORK.
+!.....
+!     LDF (input) INTEGER, LDF >= M 
+!     The leading dimension of the array F.
+!.....
+!     X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!     X is used as workspace to hold representations of the
+!     leading N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, the leading K columns of X contain the leading
+!     K left singular vectors of the above described content
+!     of X. To lift them to the space of the left singular
+!     vectors U(:,1:K)of the input data, pre-multiply with the 
+!     Q factor from the initial QR factorization. 
+!     See the descriptions of F, K, V  and Z.
+!.....      
+!     LDX (input) INTEGER, LDX >= N  
+!     The leading dimension of the array X 
+!.....
+!     Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
+!     Y is used as workspace to hold representations of the
+!     trailing N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, 
+!     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!     triangular factor from the QR factorization of the data
+!     snapshot matrix F.
+!.....      
+!     LDY (input) INTEGER , LDY >= N
+!     The leading dimension of the array Y   
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.   
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.     
+!     The numerical rank can be enforced by using positive 
+!     value of NRNK as follows: 
+!     0 < NRNK <= N-1 :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the description of K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.  
+!.....
+!     K (output) INTEGER,  0 <= K <= N 
+!     The dimension of the SVD/POD basis for the leading N-1
+!     data snapshots (columns of F) and the number of the 
+!     computed Ritz pairs. The value of K is determined
+!     according to the rule set by the parameters NRNK and 
+!     TOL. See the descriptions of NRNK and TOL. 
+!.....
+!     REIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<=N) entries of REIG contain
+!     the real parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     See the descriptions of K, IMEIG, Z.
+!.....
+!     IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<N) entries of REIG contain
+!     the imaginary parts of the computed eigenvalues
+!     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
+!     The eigenvalues are determined as follows:
+!     If IMEIG(i) == 0, then the corresponding eigenvalue is
+!     real, LAMBDA(i) = REIG(i).
+!     If IMEIG(i)>0, then the corresponding complex
+!     conjugate pair of eigenvalues reads
+!     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
+!     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
+!     That is, complex conjugate pairs have consecutive
+!     indices (i,i+1), with the positive imaginary part
+!     listed first.
+!     See the descriptions of K, REIG, Z.     
+!.....      
+!     Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
+!     If JOBZ =='V' then
+!        Z contains real Ritz vectors as follows:
+!        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
+!        the i-th Ritz value.
+!        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
+!        [Z(:,i) Z(:,i+1)] span an invariant subspace and
+!        the Ritz values extracted from this subspace are
+!        REIG(i) + sqrt(-1)*IMEIG(i) and
+!        REIG(i) - sqrt(-1)*IMEIG(i).
+!        The corresponding eigenvectors are
+!        Z(:,i) + sqrt(-1)*Z(:,i+1) and
+!        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
+!     If JOBZ == 'F', then the above descriptions hold for
+!     the columns of Z*V, where the columns of V are the
+!     eigenvectors of the K-by-K Rayleigh quotient, and Z is
+!     orthonormal. The columns of V are similarly structured:
+!     If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if 
+!     IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
+!                       Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
+!     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
+!     See the descriptions of REIG, IMEIG, X and V.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!     RES(1:K) contains the residuals for the K computed 
+!     Ritz pairs.       
+!     If LAMBDA(i) is real, then
+!        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
+!     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
+!     then
+!     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
+!     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
+!               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
+!     It holds that
+!     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
+!     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
+!     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
+!           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
+!     See the description of Z.
+!.....
+!     B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further 
+!     details in the provided references. 
+!     If JOBF == 'E', B(1:N,1;K) contains 
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.   
+!     In both cases, the content of B can be lifted to the 
+!     original dimension of the input data by pre-multiplying
+!     with the Q factor from the initial QR factorization.     
+!     Here A denotes a compression of the underlying operator.      
+!     See the descriptions of F and X.
+!     If JOBF =='N', then B is not referenced.
+!.....
+!     LDB (input) INTEGER, LDB >= MIN(M,N)
+!     The leading dimension of the array B.
+!.....      
+!     V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!     On exit, V(1:K,1:K) contains the K eigenvectors of
+!     the Rayleigh quotient. The eigenvectors of a complex
+!     conjugate pair of eigenvalues are returned in real form
+!     as explained in the description of Z. The Ritz vectors
+!     (returned in Z) are the product of X and V; see
+!     the descriptions of X and Z.
+!.....
+!     LDV (input) INTEGER, LDV >= N-1
+!     The leading dimension of the array V.
+!.....      
+!     S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by SGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N-1        
+!     The leading dimension of the array S.
+!.....
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit, 
+!     WORK(1:MIN(M,N)) contains the scalar factors of the 
+!     elementary reflectors as returned by SGEQRF of the 
+!     M-by-N input matrix F.
+!     WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of 
+!     the input submatrix F(1:M,1:N-1).
+!     If the call to SGEDMDQ is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. Hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the  workspace vector WORK.
+!     LWORK is calculated as follows:
+!     Let MLWQR  = N (minimal workspace for SGEQRF[M,N])
+!         MLWDMD = minimal workspace for SGEDMD (see the
+!                  description of LWORK in SGEDMD) for 
+!                  snapshots of dimensions MIN(M,N)-by-(N-1)
+!         MLWMQR = N (minimal workspace for 
+!                    SORMQR['L','N',M,N,N])
+!         MLWGQR = N (minimal workspace for SORGQR[M,N,N])
+!     Then
+!     LWORK = MAX(N+MLWQR, N+MLWDMD)
+!     is updated as follows:
+!        if   JOBZ == 'V' or JOBZ == 'F' THEN 
+!             LWORK = MAX( LWORK,MIN(M,N)+N-1 +MLWMQR )
+!        if   JOBQ == 'Q' THEN
+!             LWORK = MAX( LWORK,MIN(M,N)+N-1+MLWGQR)
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.          
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     Let M1=MIN(M,N), N1=N-1. Then
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
+!     If on entry LIWORK = -1, then a worskpace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!..... 
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.  
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~      
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!      
+!     Local scalars      
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
+                           MLWMQR, MLWORK, MLWQR,  MINMN,  & 
+                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
+                           OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!      
+!     Local array      
+!     ~~~~~~~~~~~      
+      REAL(KIND=WP) :: RDUMMY(2)
+!      
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME 
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      SGEMM 
+      EXTERNAL      SGEQRF, SLACPY, SLASET, SORGQR, & 
+                    SORMQR, XERBLA
+
+!     External subroutines
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      SGEDMD 
+      
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT         
+ !..........................................................  
+ !
+ !    Test the input arguments    
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V') 
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q')
+      WNTREF = LSAME(JOBF,'R') 
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')     
+      MINMN  = MIN(M,N)
+      INFO = 0 
+      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
+!       
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. LSAME(JOBS,'N')) )  THEN 
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+          INFO = -4                 
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -6    
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -22
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -25
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -27
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -29
+      END IF
+!      
+      IF ( WNTVEC .OR. WNTVCF ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF     
+      IF ( INFO == 0 ) THEN  
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and 
+          ! determine minimal and optimal sizes of the 
+          ! workspace at any moment of the run.         
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN  
+               IWORK(1) = 1
+                WORK(1) = 2
+                WORK(2) = 2
+            ELSE                
+               K = 0
+            END IF             
+            INFO = 1  
+            RETURN
+         END IF     
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for SGEQRF.
+         MLWORK = MIN(M,N) + MLWQR 
+         IF ( LQUERY ) THEN 
+             CALL SGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
+                          INFO1 )
+             OLWQR = INT(RDUMMY(1))
+             OLWORK = MIN(M,N) + OLWQR           
+         END IF
+         CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
+                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     & 
+                      V, LDV, S, LDS, WORK, -1, IWORK,       &
+                      LIWORK, INFO1 )
+         MLWDMD = INT(WORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         IMINWR = IWORK(1)
+         IF ( LQUERY ) THEN 
+             OLWDMD = INT(WORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N) 
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL SORMQR( 'L','N', M, N, MINMN, F, LDF,  & 
+                            WORK, Z, LDZ, WORK, -1, INFO1 )
+               OLWMQR = INT(WORK(1))
+               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
+            END IF
+         END IF  
+         IF ( WANTQ ) THEN
+            MLWGQR = N
+            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
+            IF ( LQUERY ) THEN 
+                CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+                             WORK, -1, INFO1 )        
+                OLWGQR = INT(WORK(1))
+                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
+            END IF            
+         END IF   
+         IMINWR = MAX( 1, IMINWR )
+         MLWORK = MAX( 2, MLWORK )      
+         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
+      END IF  
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'SGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          WORK(1)  = MLWORK
+          WORK(2)  = OLWORK
+          RETURN
+      END IF   
+!.....	  
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >>N , at this place 
+!     one can use an out of core QRF.
+!   
+      CALL SGEQRF( M, N, F, LDF, WORK,         & 
+                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
+!      
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL SLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
+      CALL SLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL SLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL SLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
+                       Y(3,1), LDY )  
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)   
+      CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
+                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
+                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
+                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, IWORK,  & 
+                   LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF    
+!      
+!     The Ritz vectors (Koopman modes) can be explicitly 
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.  
+        IF ( M > MINMN ) CALL SLASET( 'A', M-MINMN, K, ZERO, &
+                                     ZERO, Z(MINMN+1,1), LDZ )
+        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+                     LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      ELSE IF ( WNTVCF ) THEN   
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and 
+        !   the SVD/POD_basis returned by SGEDMD in X) and the 
+        !   second factor (the eigenvectors of the Rayleigh 
+        !   quotient) is in the array V, as returned by SGEDMD.
+        CALL SLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL SLASET( 'A', M-N, K, ZERO, ZERO,   & 
+                                  Z(N+1,1), LDZ )
+        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
+             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
+      END IF
+!     
+!     Some optional output variables:
+!
+!     The upper triangular factor in the initial QR 
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to SGEDMDQ is to be 
+!     followed by a streaming DMD that is implemented in a 
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
+         CALL SLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
+         CALL SLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
+      END IF    
+!
+!     The orthonormal/orthogonal factor in the initial QR 
+!     factorization is optionally returned in the array F. 
+!     Same as with the triangular factor above, this is 
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN  ! Q overwrites F 
+         CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
+              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )  
+      END IF
+!      
+      RETURN
+!      
+      END SUBROUTINE SGEDMDQ
+    
\ No newline at end of file
diff --git a/SRC/zgedmd.f90 b/SRC/zgedmd.f90
new file mode 100644
index 0000000000..090641ad84
--- /dev/null
+++ b/SRC/zgedmd.f90
@@ -0,0 +1,996 @@
+      SUBROUTINE ZGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
+                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
+                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
+                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
+                         RWORK, LRWORK, IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+
+!.....
+!     Scalar arguments
+      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
+      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
+                                 NRNK, LDZ, LDB, LDW,  LDS, &
+                                 LIWORK, LRWORK, LZWORK
+      INTEGER,       INTENT(OUT)  :: K, INFO
+      REAL(KIND=WP), INTENT(IN)   ::    TOL
+!     Array arguments
+      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
+                                         W(LDW,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
+      INTEGER,          INTENT(OUT)   :: IWORK(*)
+!............................................................
+!     Purpose
+!     =======
+!     ZGEDMD computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, ZGEDMD computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular
+!     vectors of X. Optionally, ZGEDMD returns the residuals
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!......................................................................
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations
+!     expressed in this material are those of the author and
+!     do not necessarily reflect the views of the DARPA SBIR
+!     Program Office
+!============================================================
+!     Distribution Statement A:
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================
+!............................................................
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix.
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'N' :: No data scaling.
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product X(:,1:K)*W, where X
+!            contains a POD basis (leading left singular vectors
+!            of the data matrix X) and W contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of K, X, W, Z.
+!     'N' :: The eigenvectors are not computed.
+!.....
+!     JOBR (input) CHARACTER*1
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will be
+!            computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: ZGESVD (the QR SVD algorithm)
+!     2 :: ZGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M>= 0
+!     The state space dimension (the row dimension of X, Y).
+!.....
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshot pairs
+!     (the number of columns of X and Y).
+!.....
+!     X (input/output) COMPLEX(KIND=WP) M-by-N array
+!   > On entry, X contains the data snapshot matrix X. It is
+!     assumed that the column norms of X are in the range of
+!     the normalized floating point numbers.
+!   < On exit, the leading K columns of X contain a POD basis,
+!     i.e. the leading K left singular vectors of the input
+!     data matrix X, U(:,1:K). All N columns of X contain all
+!     left singular vectors of the input matrix X.
+!     See the descriptions of K, Z and W.
+!.....
+!     LDX (input) INTEGER, LDX >= M
+!     The leading dimension of the array X.
+!.....
+!     Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
+!   > On entry, Y contains the data snapshot matrix Y
+!   < On exit,
+!     If JOBR == 'R', the leading K columns of Y  contain
+!     the residual vectors for the computed Ritz pairs.
+!     See the description of RES.
+!     If JOBR == 'N', Y contains the original input data,
+!                     scaled according to the value of JOBS.
+!.....
+!     LDY (input) INTEGER , LDY >= M
+!     The leading dimension of the array Y.
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.
+!     The numerical rank can be enforced by using positive
+!     value of NRNK as follows:
+!     0 < NRNK <= N :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the descriptions of TOL and  K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.
+!.....
+!     K (output) INTEGER,  0 <= K <= N
+!     The dimension of the POD basis for the data snapshot
+!     matrix X and the number of the computed Ritz pairs.
+!     The value of K is determined according to the rule set
+!     by the parameters NRNK and TOL.
+!     See the descriptions of NRNK and TOL.
+!.....
+!     EIGS (output) COMPLEX(KIND=WP) N-by-1 array
+!     The leading K (K<=N) entries of EIGS contain
+!     the computed eigenvalues (Ritz values).
+!     See the descriptions of K, and Z.
+!.....
+!     Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
+!     If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
+!     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!     the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
+!     is an eigenvector corresponding to EIGS(i). The columns
+!     of W(1:k,1:K) are the computed eigenvectors of the
+!     K-by-K Rayleigh quotient.
+!     See the descriptions of EIGS, X and W.
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) N-by-1 array
+!     RES(1:K) contains the residuals for the K computed
+!     Ritz pairs,
+!     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!     See the description of EIGS and Z.
+!.....
+!     B (output) COMPLEX(KIND=WP)  M-by-N array.
+!     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further
+!     details in the provided references.
+!     If JOBF == 'E', B(1:M,1:K) contains
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.
+!     If JOBF =='N', then B is not referenced.
+!     See the descriptions of X, W, K.
+!.....
+!     LDB (input) INTEGER, LDB >= M
+!     The leading dimension of the array B.
+!.....
+!     W (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!     On exit, W(1:K,1:K) contains the K computed
+!     eigenvectors of the matrix Rayleigh quotient.
+!     The Ritz vectors (returned in Z) are the
+!     product of X (containing a POD basis for the input
+!     matrix X) and W. See the descriptions of K, S, X and Z.
+!     W is also used as a workspace to temporarily store the
+!     right singular vectors of X.
+!.....
+!     LDW (input) INTEGER, LDW >= N
+!     The leading dimension of the array W.
+!.....
+!     S (workspace/output) COMPLEX(KIND=WP) N-by-N array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by ZGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N
+!     The leading dimension of the array S.
+!.....
+!     ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
+!     ZWORK is used as complex workspace in the complex SVD, as
+!     specified by WHTSVD (1,2, 3 or 4) and for ZGEEV for computing
+!     the eigenvalues of a Rayleigh quotient.
+!     If the call to ZGEDMD is only workspace query, then
+!     ZWORK(1) contains the minimal complex workspace length and
+!     ZWORK(2) is the optimal complex workspace length.
+!     Hence, the length of work is at least 2.
+!     See the description of LZWORK.
+!.....
+!     LZWORK (input) INTEGER
+!     The minimal length of the workspace vector ZWORK.
+!     LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_ZGEEV),
+!     where LZWORK_ZGEEV = MAX( 1, 2*N )  and the minimal
+!     LZWORK_SVD is calculated as follows
+!     If WHTSVD == 1 :: ZGESVD ::
+!        LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+!     If WHTSVD == 2 :: ZGESDD ::
+!        LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+!     If WHTSVD == 3 :: ZGESVDQ ::
+!        LZWORK_SVD = obtainable by a query
+!     If WHTSVD == 4 :: ZGEJSV ::
+!        LZWORK_SVD = obtainable by a query
+!     If on entry LZWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths and returns them in
+!     LZWORK(1) and LZWORK(2), respectively.
+!.....
+!     RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
+!     On exit, RWORK(1:N) contains the singular values of
+!     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
+!     If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
+!     scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
+!     and Y to avoid overflow in the SVD of X.
+!     This may be of interest if the scaling option is off
+!     and as many as possible smallest eigenvalues are
+!     desired to the highest feasible accuracy.
+!     If the call to ZGEDMD is only workspace query, then
+!     RWORK(1) contains the minimal workspace length.
+!     See the description of LRWORK.
+!.....
+!     LRWORK (input) INTEGER
+!     The minimal length of the workspace vector RWORK.
+!     LRWORK is calculated as follows:
+!     LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_ZGEEV), where
+!     LRWORK_ZGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
+!     for the SVD subroutine determined by the input parameter
+!     WHTSVD.
+!     If WHTSVD == 1 :: ZGESVD ::
+!        LRWORK_SVD = 5*MIN(M,N)
+!     If WHTSVD == 2 :: ZGESDD ::
+!        LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
+!        2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+!     If WHTSVD == 3 :: ZGESVDQ ::
+!        LRWORK_SVD = obtainable by a query
+!     If WHTSVD == 4 :: ZGEJSV ::
+!        LRWORK_SVD = obtainable by a query
+!     If on entry LRWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     real workspace length and returns it in RWORK(1).
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for  ZWORK, RWORK and
+!     IWORK. See the descriptions of ZWORK, RWORK and IWORK.
+!.....
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~
+      REAL(KIND=WP),    PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP),    PARAMETER :: ZERO = 0.0_WP
+      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+
+!     Local scalars
+!     ~~~~~~~~~~~~~
+      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,    &
+                       SSUM,  XSCL1,  XSCL2
+      INTEGER       ::  i,  j,  IMINWR,  INFO1, INFO2,  &
+                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
+                        LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
+                        MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
+                        OLWORK, MLRWRK
+      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY,  &
+                        WNTEX, WNTREF, WNTRES, WNTVEC
+      CHARACTER     ::  JOBZL, T_OR_N
+      CHARACTER     ::  JSVOPT
+!
+!     Local arrays
+!     ~~~~~~~~~~~~
+      REAL(KIND=WP) :: RDUMMY(2)
+
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      REAL(KIND=WP) ZLANGE, DLAMCH, DZNRM2
+      EXTERNAL      ZLANGE, DLAMCH, DZNRM2, IZAMAX
+      INTEGER                               IZAMAX
+      LOGICAL       DISNAN, LSAME
+      EXTERNAL      DISNAN, LSAME
+
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      ZAXPY,  ZGEMM,  ZDSCAL
+      EXTERNAL      ZGEEV,  ZGEJSV, ZGESDD, ZGESVD, ZGESVDQ, &
+                    ZLACPY, ZLASCL, ZLASSQ, XERBLA
+
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC     DBLE, INT, MAX, SQRT
+!............................................................
+!
+!    Test the input arguments
+!
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTREF = LSAME(JOBF,'R')
+      WNTEX  = LSAME(JOBF,'E')
+      INFO   = 0
+      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
+                                  .OR. ( LRWORK == -1 ) )
+!
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
+                                  LSAME(JOBS,'N')) )   THEN
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
+                              .OR. LSAME(JOBZ,'F')) )  THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
+                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
+                LSAME(JOBF,'N') ) )                    THEN
+          INFO = -4
+      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
+                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
+          INFO = -5
+      ELSE IF ( M < 0 )   THEN
+          INFO = -6
+      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
+          INFO = -7
+      ELSE IF ( LDX < M ) THEN
+          INFO = -9
+      ELSE IF ( LDY < M ) THEN
+          INFO = -11
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
+                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
+          INFO = -12
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
+          INFO = -13
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -17
+      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
+          INFO = -20
+      ELSE IF ( LDW < N ) THEN
+          INFO = -22
+      ELSE IF ( LDS < N ) THEN
+          INFO = -24
+      END IF
+!
+      IF ( INFO == 0 ) THEN
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and
+          ! determine minimal and optimal sizes of the
+          ! workspace at any moment of the run.
+         IF ( N == 0 ) THEN
+             ! Quick return. All output except K is void.
+             ! INFO=1 signals the void input.
+             ! In case of a workspace query, the default
+             ! minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN
+                IWORK(1) = 1
+                RWORK(1) = 1
+                ZWORK(1) = 2
+                ZWORK(2) = 2
+            ELSE
+               K   =  0
+            END IF
+            INFO = 1
+            RETURN
+         END IF
+
+         IMINWR = 1
+         MLRWRK = MAX(1,N)
+         MLWORK = 2
+         OLWORK = 2
+         SELECT CASE ( WHTSVD )
+         CASE (1)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of ZGESVD:
+             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
+             MLWORK = MAX(MLWORK,MWRSVD)
+             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
+             IF ( LQUERY ) THEN
+                CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
+                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
+                LWRSVD = INT( ZWORK(1) )
+                OLWORK = MAX(OLWORK,LWRSVD)
+             END IF
+         CASE (2)
+             ! The following is specified as the minimal
+             ! length of WORK in the definition of ZGESDD:
+             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
+             ! In LAPACK 3.10.1 RWORK is defined differently.
+             ! Below we take max over the two versions.
+             ! IMINWR = 8*MIN(M,N)
+             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
+             MLWORK = MAX(MLWORK,MWRSDD)
+             IMINWR = 8*MIN(M,N)
+             MLRWRK = MAX( MLRWRK,  N +                    &
+                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
+                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
+                           2*MAX(M,N)*MIN(M,N)+            &
+                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
+             IF ( LQUERY ) THEN
+                CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B,LDB,&
+                     W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
+                LWRSDD = MAX( MWRSDD,INT( ZWORK(1) ))
+                ! Possible bug in ZGESDD optimal workspace size.
+                OLWORK = MAX(OLWORK,LWRSDD)
+             END IF
+         CASE (3)
+             CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
+                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVQ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVQ)
+             MLRWRK  = MAX(MLRWRK,N + INT(RDUMMY(1)))
+             IF ( LQUERY ) THEN
+                LWRSVQ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVQ)
+             END IF
+         CASE (4)
+             JSVOPT = 'J'
+             CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
+                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
+             IMINWR = IWORK(1)
+             MWRSVJ = INT(ZWORK(2))
+             MLWORK = MAX(MLWORK,MWRSVJ)
+             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
+             IF ( LQUERY ) THEN
+                LWRSVJ = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,LWRSVJ)
+             END IF
+         END SELECT
+         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
+             JOBZL = 'V'
+         ELSE
+             JOBZL = 'N'
+         END IF
+         ! Workspace calculation to the ZGEEV call
+         MWRKEV = MAX( 1, 2*N )
+         MLWORK = MAX(MLWORK,MWRKEV)
+         MLRWRK = MAX(MLRWRK,N+2*N)
+         IF ( LQUERY ) THEN
+             CALL ZGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
+              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 )
+                LWRKEV = INT(ZWORK(1))
+                OLWORK = MAX( OLWORK, LWRKEV )
+         END IF
+!
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
+         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26
+
+      END IF
+!
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'ZGEDMD', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          RWORK(1) = MLRWRK
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          RETURN
+      END IF
+!............................................................
+!
+      OFL   = DLAMCH('O')
+      SMALL = DLAMCH('S')
+      BADXY = .FALSE.
+!
+!     <1> Optional scaling of the snapshots (columns of X, Y)
+!     ==========================================================
+      IF ( SCCOLX ) THEN
+          ! The columns of X will be normalized.
+          ! To prevent overflows, the column norms of X are
+          ! carefully computed using ZLASSQ.
+          K = 0
+          DO i = 1, N
+            !WORK(i) = DZNRM2( M, X(1,i), 1 )
+            SCALE  = ZERO
+            CALL ZLASSQ( M, X(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -8
+                CALL XERBLA('ZGEDMD',-INFO)
+            END IF
+            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of X(:,i) overflows. First, X(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
+!                 Next, the norm of X(:,i) is stored without
+!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of X(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, X(1,i), LDX, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 X(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               X(1,i), LDX, INFO2 )      ! LAPACK CALL
+!                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)   ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+               K = K + 1
+            END IF
+          END DO
+          IF ( K == N ) THEN
+             ! All columns of X are zero. Return error code -8.
+             ! (the 8th input variable had an illegal value)
+             K = 0
+             INFO = -8
+             CALL XERBLA('ZGEDMD',-INFO)
+             RETURN
+          END IF
+          DO i = 1, N
+!           Now, apply the same scaling to the columns of Y.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL ZDSCAL( M, ONE/RWORK(i), Y(1,i), 1 )  ! BLAS CALL
+!               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)       ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/DBLE(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(Y(IZAMAX(M, Y(1,i),1),i ))  &
+                                            /= ZERO ) THEN
+!               X(:,i) is zero vector. For consistency,
+!               Y(:,i) should also be zero. If Y(:,i) is not
+!               zero, then the data might be inconsistent or
+!               corrupted. If JOBS == 'C', Y(:,i) is set to
+!               zero and a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+                IF ( LSAME(JOBS,'C')) &
+                CALL ZDSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
+            END IF
+          END DO
+      END IF
+  !
+      IF ( SCCOLY ) THEN
+          ! The columns of Y will be normalized.
+          ! To prevent overflows, the column norms of Y are
+          ! carefully computed using ZLASSQ.
+          DO i = 1, N
+            !RWORK(i) = DZNRM2( M, Y(1,i), 1 )
+            SCALE  = ZERO
+            CALL ZLASSQ( M, Y(1,i), 1, SCALE, SSUM )
+            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
+                K    =  0
+                INFO = -10
+                CALL XERBLA('ZGEDMD',-INFO)
+            END IF
+            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
+               ROOTSC = SQRT(SSUM)
+               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
+!                 Norm of Y(:,i) overflows. First, Y(:,i)
+!                 is scaled by
+!                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
+!                 Next, the norm of Y(:,i) is stored without
+!                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
+!                 the minus sign indicating the 1/M factor.
+!                 Scaling is performed without overflow, and
+!                 underflow may occur in the smallest entries
+!                 of Y(:,i). The relative backward and forward
+!                 errors are small in the ell_2 norm.
+                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
+                               M, 1, Y(1,i), LDY, INFO2 )
+                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
+               ELSE
+!                 Y(:,i) will be scaled to unit 2-norm
+                  RWORK(i) =   SCALE * ROOTSC
+                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
+                               Y(1,i), LDY, INFO2 )             ! LAPACK CALL
+!                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
+               END IF
+            ELSE
+               RWORK(i) = ZERO
+            END IF
+         END DO
+         DO i = 1, N
+!           Now, apply the same scaling to the columns of X.
+            IF ( RWORK(i) >  ZERO ) THEN
+                CALL ZDSCAL( M, ONE/RWORK(i), X(1,i), 1 ) ! BLAS CALL
+!               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
+            ELSE IF ( RWORK(i) < ZERO ) THEN
+                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
+                     ONE/DBLE(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
+            ELSE IF ( ABS(X(IZAMAX(M, X(1,i),1),i ))  &
+                                           /= ZERO ) THEN
+!               Y(:,i) is zero vector.  If X(:,i) is not
+!               zero, then a warning flag is raised.
+!               The computation continues but the
+!               situation will be reported in the output.
+                BADXY = .TRUE.
+            END IF
+         END DO
+       END IF
+!
+!     <2> SVD of the data snapshot matrix X.
+!     =====================================
+!     The left singular vectors are stored in the array X.
+!     The right singular vectors are in the array W.
+!     The array W will later on contain the eigenvectors
+!     of a Rayleigh quotient.
+      NUMRNK = N
+      SELECT CASE ( WHTSVD )
+         CASE (1)
+             CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
+                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
+             T_OR_N = 'C'
+         CASE (2)
+            CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
+                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
+            T_OR_N = 'C'
+         CASE (3)
+              CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
+                   X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   NUMRNK, IWORK, LIWORK, ZWORK,     &
+                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
+              CALL ZLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
+         T_OR_N = 'C'
+         CASE (4)
+              CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
+                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
+                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
+              CALL ZLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
+              T_OR_N = 'N'
+              XSCL1 = RWORK(N+1)
+              XSCL2 = RWORK(N+2)
+              IF ( XSCL1 /=  XSCL2 ) THEN
+                 ! This is an exceptional situation. If the
+                 ! data matrices are not scaled and the
+                 ! largest singular value of X overflows.
+                 ! In that case ZGEJSV can return the SVD
+                 ! in scaled form. The scaling factor can be used
+                 ! to rescale the data (X and Y).
+                 CALL ZLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
+              END IF
+      END SELECT
+!
+      IF ( INFO1 > 0 ) THEN
+         ! The SVD selected subroutine did not converge.
+         ! Return with an error code.
+         INFO = 2
+         RETURN
+      END IF
+!
+      IF ( RWORK(1) == ZERO ) THEN
+          ! The largest computed singular value of (scaled)
+          ! X is zero. Return error code -8
+          ! (the 8th input variable had an illegal value).
+          K = 0
+          INFO = -8
+          CALL XERBLA('ZGEDMD',-INFO)
+          RETURN
+      END IF
+!
+      !<3> Determine the numerical rank of the data
+      !    snapshots matrix X. This depends on the
+      !    parameters NRNK and TOL.
+
+      SELECT CASE ( NRNK )
+          CASE ( -1 )
+               K = 1
+               DO i = 2, NUMRNK
+                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE ( -2 )
+               K = 1
+               DO i = 1, NUMRNK-1
+                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
+                      ( RWORK(i) <= SMALL ) ) EXIT
+                 K = K + 1
+               END DO
+          CASE DEFAULT
+               K = 1
+               DO i = 2, NRNK
+                  IF ( RWORK(i) <= SMALL ) EXIT
+                  K = K + 1
+               END DO
+          END SELECT
+      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
+      !   snapshot data in the input matrix X.
+
+      !<4> Compute the Rayleigh quotient S = U^H * A * U.
+      !    Depending on the requested outputs, the computation
+      !    is organized to compute additional auxiliary
+      !    matrices (for the residuals and refinements).
+      !
+      !    In all formulas below, we need V_k*Sigma_k^(-1)
+      !    where either V_k is in W(1:N,1:K), or V_k^H is in
+      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
+      IF ( LSAME(T_OR_N, 'N') ) THEN
+          DO i = 1, K
+           CALL ZDSCAL( N, ONE/RWORK(i), W(1,i), 1 )    ! BLAS CALL
+           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
+          END DO
+      ELSE
+          ! This non-unit stride access is due to the fact
+          ! that ZGESVD, ZGESVDQ and ZGESDD return the
+          ! adjoint matrix of the right singular vectors.
+          !DO i = 1, K
+          ! CALL ZDSCAL( N, ONE/RWORK(i), W(i,1), LDW )    ! BLAS CALL
+          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
+          !END DO
+          DO i = 1, K
+              RWORK(N+i) = ONE/RWORK(i)
+          END DO
+          DO j = 1, N
+             DO i = 1, K
+                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
+             END DO
+          END DO
+      END IF
+!
+      IF ( WNTREF ) THEN
+         !
+         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
+         ! for computing the refined Ritz vectors
+         ! (optionally, outside ZGEDMD).
+          CALL ZGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
+                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C'
+          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))                   ! INTRINSIC, for T_OR_N=='N'
+          !
+          ! At this point Z contains
+          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
+          ! this is needed for computing the residuals.
+          ! This matrix is  returned in the array B and
+          ! it can be used to compute refined Ritz vectors.
+          CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
+          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC
+
+          CALL ZGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
+                      LDZ, ZZERO, S, LDS )                        ! BLAS CALL
+          ! S(1:K,1:K) = MATMUL(TRANSPOSE(CONJG(X(1:M,1:K))),Z(1:M,1:K)) ! INTRINSIC
+          ! At this point S = U^H * A * U is the Rayleigh quotient.
+      ELSE
+        ! A * U(:,1:K) is not explicitly needed and the
+        ! computation is organized differently. The Rayleigh
+        ! quotient is computed more efficiently.
+        CALL ZGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
+                   ZZERO, Z, LDZ )                                         ! BLAS CALL
+        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(CONJG(X(1:M,1:K))), Y(1:M,1:N) )  ! INTRINSIC
+        !
+        CALL ZGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
+                    LDW, ZZERO, S, LDS )                         ! BLAS CALL
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='T'
+        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))                 ! INTRINSIC, for T_OR_N=='N'
+        ! At this point S = U^H * A * U is the Rayleigh quotient.
+        ! If the residuals are requested, save scaled V_k into Z.
+        ! Recall that V_k or V_k^H is stored in W.
+        IF ( WNTRES .OR. WNTEX ) THEN
+          IF ( LSAME(T_OR_N, 'N') ) THEN
+              CALL ZLACPY( 'A', N, K, W, LDW, Z, LDZ )
+          ELSE
+              CALL ZLACPY( 'A', K, N, W, LDW, Z, LDZ )
+          END IF
+        END IF
+      END IF
+!
+      !<5> Compute the Ritz values and (if requested) the
+      !   right eigenvectors of the Rayleigh quotient.
+      !
+      CALL ZGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, LDW, &
+            W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
+      !
+      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
+      ! quotient.  See the description of Z.
+      ! Also, see the description of ZGEEV.
+      IF ( INFO1 > 0 ) THEN
+         ! ZGEEV failed to compute the eigenvalues and
+         ! eigenvectors of the Rayleigh quotient.
+         INFO = 3
+         RETURN
+      END IF
+!
+      ! <6> Compute the eigenvectors (if requested) and,
+      ! the residuals (if requested).
+      !
+      IF ( WNTVEC .OR. WNTEX ) THEN
+      IF ( WNTRES ) THEN
+          IF ( WNTREF ) THEN
+            ! Here, if the refinement is requested, we have
+            ! A*U(:,1:K) already computed and stored in Z.
+            ! For the residuals, need Y = A * U(:,1;K) * W.
+            CALL ZGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
+                       LDW, ZZERO, Y, LDY )               ! BLAS CALL
+            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)        ! INTRINSIC
+            ! This frees Z; Y contains A * U(:,1:K) * W.
+          ELSE
+            ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
+            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                       W, LDW, ZZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                       LDS, ZZERO, Z, LDZ )
+            ! Save a copy of Z into Y and free Z for holding
+            ! the Ritz vectors.
+            CALL ZLACPY( 'A', M, K, Z, LDZ, Y, LDY )
+            IF ( WNTEX ) CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
+          END IF
+      ELSE IF ( WNTEX ) THEN
+          ! Compute S = V_k * Sigma_k^(-1) * W, where
+            ! V_k * Sigma_k^(-1) is stored in Z
+            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
+                       W, LDW, ZZERO, S, LDS )
+            ! Then, compute Z = Y * S =
+            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
+            ! = A * U(:,1:K) * W(1:K,1:K)
+            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+                       LDS, ZZERO, B, LDB )
+            ! The above call replaces the following two calls
+            ! that were used in the developing-testing phase.
+            ! CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
+            !           LDS, ZZERO, Z, LDZ)
+            ! Save a copy of Z into B and free Z for holding
+            ! the Ritz vectors.
+            ! CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
+      END IF
+!
+      ! Compute the Ritz vectors
+      IF ( WNTVEC ) CALL ZGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
+                   ZZERO, Z, LDZ )                          ! BLAS CALL
+      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
+!
+      IF ( WNTRES ) THEN
+         DO i = 1, K
+            CALL ZAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
+            ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
+            RES(i) = DZNRM2( M, Y(1,i), 1 )                       ! BLAS CALL
+         END DO
+      END IF
+      END IF
+!
+      IF ( WHTSVD == 4 ) THEN
+          RWORK(N+1) = XSCL1
+          RWORK(N+2) = XSCL2
+      END IF
+!
+!     Successful exit.
+      IF ( .NOT. BADXY ) THEN
+         INFO = 0
+      ELSE
+         ! A warning on possible data inconsistency.
+         ! This should be a rare event.
+         INFO = 4
+      END IF
+!............................................................
+      RETURN
+!     ......
+      END SUBROUTINE ZGEDMD
+
diff --git a/SRC/zgedmdq.f90 b/SRC/zgedmdq.f90
new file mode 100644
index 0000000000..51be72a32b
--- /dev/null
+++ b/SRC/zgedmdq.f90
@@ -0,0 +1,689 @@
+SUBROUTINE ZGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
+                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
+                    LDY,   NRNK,  TOL,   K,  EIGS,         &
+                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
+                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
+                    IWORK, LIWORK, INFO )
+! March 2023
+!.....
+      USE                   iso_fortran_env
+      IMPLICIT NONE
+      INTEGER, PARAMETER :: WP = real64
+!.....      
+!     Scalar arguments       
+      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
+                                JOBT, JOBF
+      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
+                                LDY, NRNK, LDZ, LDB, LDV,  &
+                                LDS, LZWORK,  LWORK, LIWORK
+      INTEGER,   INTENT(OUT) :: INFO,   K      
+      REAL(KIND=WP), INTENT(IN)    ::   TOL     
+!     Array arguments      
+      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
+                                         Z(LDZ,*), B(LDB,*), &
+                                         V(LDV,*), S(LDS,*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
+      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
+      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
+      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
+      INTEGER,       INTENT(OUT)   :: IWORK(*)
+!.....      
+!     Purpose  
+!     =======
+!     ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
+!     a pair of data snapshot matrices, using a QR factorization
+!     based compression of the data. For the input matrices
+!     X and Y such that Y = A*X with an unaccessible matrix
+!     A, ZGEDMDQ computes a certain number of Ritz pairs of A using
+!     the standard Rayleigh-Ritz extraction from a subspace of
+!     range(X) that is determined using the leading left singular 
+!     vectors of X. Optionally, ZGEDMDQ returns the residuals 
+!     of the computed Ritz pairs, the information needed for
+!     a refinement of the Ritz vectors, or the eigenvectors of
+!     the Exact DMD.
+!     For further details see the references listed
+!     below. For more details of the implementation see [3].      
+!
+!     References
+!     ==========
+!     [1] P. Schmid: Dynamic mode decomposition of numerical
+!         and experimental data,
+!         Journal of Fluid Mechanics 656, 5-28, 2010.
+!     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
+!         decompositions: analysis and enhancements,
+!         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
+!     [3] Z. Drmac: A LAPACK implementation of the Dynamic
+!         Mode Decomposition I. Technical report. AIMDyn Inc.
+!         and LAPACK Working Note 298.      
+!     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
+!         Brunton, N. Kutz: On Dynamic Mode Decomposition:
+!         Theory and Applications, Journal of Computational
+!         Dynamics 1(2), 391 -421, 2014.
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     and supported by
+!     - DARPA SBIR project "Koopman Operator-Based Forecasting
+!     for Nonstationary Processes from Near-Term, Limited
+!     Observational Data" Contract No: W31P4Q-21-C-0007
+!     - DARPA PAI project "Physics-Informed Machine Learning
+!     Methodologies" Contract No: HR0011-18-9-0033
+!     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
+!     Framework for Space-Time Analysis of Process Dynamics"
+!     Contract No: HR0011-16-C-0116
+!     Any opinions, findings and conclusions or recommendations 
+!     expressed in this material are those of the author and 
+!     do not necessarily reflect the views of the DARPA SBIR 
+!     Program Office.      
+!============================================================
+!     Distribution Statement A: 
+!     Approved for Public Release, Distribution Unlimited.
+!     Cleared by DARPA on September 29, 2022
+!============================================================      
+!......................................................................      
+!     Arguments
+!     =========
+!     JOBS (input) CHARACTER*1
+!     Determines whether the initial data snapshots are scaled
+!     by a diagonal matrix. The data snapshots are the columns
+!     of F. The leading N-1 columns of F are denoted X and the
+!     trailing N-1 columns are denoted Y. 
+!     'S' :: The data snapshots matrices X and Y are multiplied
+!            with a diagonal matrix D so that X*D has unit
+!            nonzero columns (in the Euclidean 2-norm)
+!     'C' :: The snapshots are scaled as with the 'S' option.
+!            If it is found that an i-th column of X is zero
+!            vector and the corresponding i-th column of Y is
+!            non-zero, then the i-th column of Y is set to
+!            zero and a warning flag is raised.
+!     'Y' :: The data snapshots matrices X and Y are multiplied
+!            by a diagonal matrix D so that Y*D has unit
+!            nonzero columns (in the Euclidean 2-norm)    
+!     'N' :: No data scaling.   
+!.....
+!     JOBZ (input) CHARACTER*1
+!     Determines whether the eigenvectors (Koopman modes) will
+!     be computed.
+!     'V' :: The eigenvectors (Koopman modes) will be computed
+!            and returned in the matrix Z.
+!            See the description of Z.
+!     'F' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Z*V, where Z
+!            is orthonormal and V contains the eigenvectors
+!            of the corresponding Rayleigh quotient.
+!            See the descriptions of F, V, Z.
+!     'Q' :: The eigenvectors (Koopman modes) will be returned
+!            in factored form as the product Q*Z, where Z
+!            contains the eigenvectors of the compression of the
+!            underlying discretized operator onto the span of
+!            the data snapshots. See the descriptions of F, V, Z.
+!            Q is from the initial QR factorization.  
+!     'N' :: The eigenvectors are not computed.  
+!.....      
+!     JOBR (input) CHARACTER*1 
+!     Determines whether to compute the residuals.
+!     'R' :: The residuals for the computed eigenpairs will
+!            be computed and stored in the array RES.
+!            See the description of RES.
+!            For this option to be legal, JOBZ must be 'V'.
+!     'N' :: The residuals are not computed.
+!.....
+!     JOBQ (input) CHARACTER*1 
+!     Specifies whether to explicitly compute and return the
+!     unitary matrix from the QR factorization.
+!     'Q' :: The matrix Q of the QR factorization of the data
+!            snapshot matrix is computed and stored in the
+!            array F. See the description of F.       
+!     'N' :: The matrix Q is not explicitly computed.
+!.....
+!     JOBT (input) CHARACTER*1 
+!     Specifies whether to return the upper triangular factor
+!     from the QR factorization.
+!     'R' :: The matrix R of the QR factorization of the data 
+!            snapshot matrix F is returned in the array Y.
+!            See the description of Y and Further details.       
+!     'N' :: The matrix R is not returned. 
+!.....
+!     JOBF (input) CHARACTER*1
+!     Specifies whether to store information needed for post-
+!     processing (e.g. computing refined Ritz vectors)
+!     'R' :: The matrix needed for the refinement of the Ritz
+!            vectors is computed and stored in the array B.
+!            See the description of B.
+!     'E' :: The unscaled eigenvectors of the Exact DMD are 
+!            computed and returned in the array B. See the
+!            description of B.
+!     'N' :: No eigenvector refinement data is computed.   
+!     To be useful on exit, this option needs JOBQ='Q'.    
+!.....
+!     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
+!     Allows for a selection of the SVD algorithm from the
+!     LAPACK library.
+!     1 :: ZGESVD (the QR SVD algorithm)
+!     2 :: ZGESDD (the Divide and Conquer algorithm; if enough
+!          workspace available, this is the fastest option)
+!     3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
+!          are the most accurate options)
+!     4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
+!          are the most accurate options)
+!     For the four methods above, a significant difference in
+!     the accuracy of small singular values is possible if
+!     the snapshots vary in norm so that X is severely
+!     ill-conditioned. If small (smaller than EPS*||X||)
+!     singular values are of interest and JOBS=='N',  then
+!     the options (3, 4) give the most accurate results, where
+!     the option 4 is slightly better and with stronger 
+!     theoretical background.
+!     If JOBS=='S', i.e. the columns of X will be normalized,
+!     then all methods give nearly equally accurate results.
+!.....
+!     M (input) INTEGER, M >= 0 
+!     The state space dimension (the number of rows of F).
+!.....      
+!     N (input) INTEGER, 0 <= N <= M
+!     The number of data snapshots from a single trajectory,
+!     taken at equidistant discrete times. This is the 
+!     number of columns of F.
+!.....
+!     F (input/output) COMPLEX(KIND=WP) M-by-N array
+!     > On entry,
+!     the columns of F are the sequence of data snapshots 
+!     from a single trajectory, taken at equidistant discrete
+!     times. It is assumed that the column norms of F are 
+!     in the range of the normalized floating point numbers. 
+!     < On exit,
+!     If JOBQ == 'Q', the array F contains the orthogonal 
+!     matrix/factor of the QR factorization of the initial 
+!     data snapshots matrix F. See the description of JOBQ. 
+!     If JOBQ == 'N', the entries in F strictly below the main
+!     diagonal contain, column-wise, the information on the 
+!     Householder vectors, as returned by ZGEQRF. The 
+!     remaining information to restore the orthogonal matrix
+!     of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). 
+!     See the description of ZWORK.
+!.....
+!     LDF (input) INTEGER, LDF >= M 
+!     The leading dimension of the array F.
+!.....
+!     X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
+!     X is used as workspace to hold representations of the
+!     leading N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, the leading K columns of X contain the leading
+!     K left singular vectors of the above described content
+!     of X. To lift them to the space of the left singular
+!     vectors U(:,1:K) of the input data, pre-multiply with the 
+!     Q factor from the initial QR factorization. 
+!     See the descriptions of F, K, V  and Z.
+!.....      
+!     LDX (input) INTEGER, LDX >= N  
+!     The leading dimension of the array X. 
+!.....
+!     Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
+!     Y is used as workspace to hold representations of the
+!     trailing N-1 snapshots in the orthonormal basis computed
+!     in the QR factorization of F.
+!     On exit, 
+!     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
+!     triangular factor from the QR factorization of the data
+!     snapshot matrix F.
+!.....      
+!     LDY (input) INTEGER , LDY >= N
+!     The leading dimension of the array Y.   
+!.....
+!     NRNK (input) INTEGER
+!     Determines the mode how to compute the numerical rank,
+!     i.e. how to truncate small singular values of the input
+!     matrix X. On input, if
+!     NRNK = -1 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(1)
+!                  This option is recommended.  
+!     NRNK = -2 :: i-th singular value sigma(i) is truncated
+!                  if sigma(i) <= TOL*sigma(i-1)
+!                  This option is included for R&D purposes.
+!                  It requires highly accurate SVD, which
+!                  may not be feasible.      
+!     The numerical rank can be enforced by using positive 
+!     value of NRNK as follows: 
+!     0 < NRNK <= N-1 :: at most NRNK largest singular values
+!     will be used. If the number of the computed nonzero
+!     singular values is less than NRNK, then only those
+!     nonzero values will be used and the actually used
+!     dimension is less than NRNK. The actual number of
+!     the nonzero singular values is returned in the variable
+!     K. See the description of K.
+!.....
+!     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
+!     The tolerance for truncating small singular values.
+!     See the description of NRNK.  
+!.....
+!     K (output) INTEGER,  0 <= K <= N 
+!     The dimension of the SVD/POD basis for the leading N-1
+!     data snapshots (columns of F) and the number of the 
+!     computed Ritz pairs. The value of K is determined
+!     according to the rule set by the parameters NRNK and 
+!     TOL. See the descriptions of NRNK and TOL. 
+!.....
+!     EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
+!     The leading K (K<=N-1) entries of EIGS contain
+!     the computed eigenvalues (Ritz values).
+!     See the descriptions of K, and Z.
+!.....
+!     Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
+!     If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
+!     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
+!     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
+!     Z*V, where Z contains orthonormal matrix (the product of
+!     Q from the initial QR factorization and the SVD/POD_basis
+!     returned by ZGEDMD in X) and the second factor (the 
+!     eigenvectors of the Rayleigh quotient) is in the array V, 
+!     as returned by ZGEDMD. That is,  X(:,1:K)*V(:,i)
+!     is an eigenvector corresponding to EIGS(i). The columns 
+!     of V(1:K,1:K) are the computed eigenvectors of the 
+!     K-by-K Rayleigh quotient.  
+!     See the descriptions of EIGS, X and V.      
+!.....
+!     LDZ (input) INTEGER , LDZ >= M
+!     The leading dimension of the array Z.
+!.....
+!     RES (output) REAL(KIND=WP) (N-1)-by-1 array
+!     RES(1:K) contains the residuals for the K computed 
+!     Ritz pairs, 
+!     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
+!     See the description of EIGS and Z.      
+!.....
+!     B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
+!     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
+!     be used for computing the refined vectors; see further 
+!     details in the provided references. 
+!     If JOBF == 'E', B(1:N,1;K) contains 
+!     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
+!     Exact DMD, up to scaling by the inverse eigenvalues.   
+!     In both cases, the content of B can be lifted to the 
+!     original dimension of the input data by pre-multiplying
+!     with the Q factor from the initial QR factorization.   
+!     Here A denotes a compression of the underlying operator.      
+!     See the descriptions of F and X.
+!     If JOBF =='N', then B is not referenced.
+!.....
+!     LDB (input) INTEGER, LDB >= MIN(M,N)
+!     The leading dimension of the array B.
+!.....
+!     V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!     On exit, V(1:K,1:K) V contains the K eigenvectors of
+!     the Rayleigh quotient. The Ritz vectors
+!     (returned in Z) are the product of Q from the initial QR
+!     factorization (see the description of F) X (see the 
+!     description of X) and V.
+!.....
+!     LDV (input) INTEGER, LDV >= N-1
+!     The leading dimension of the array V.
+!.....      
+!     S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
+!     The array S(1:K,1:K) is used for the matrix Rayleigh
+!     quotient. This content is overwritten during
+!     the eigenvalue decomposition by ZGEEV.
+!     See the description of K.
+!.....
+!     LDS (input) INTEGER, LDS >= N-1        
+!     The leading dimension of the array S.
+!.....
+!     ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
+!     On exit, 
+!     ZWORK(1:MIN(M,N)) contains the scalar factors of the 
+!     elementary reflectors as returned by ZGEQRF of the 
+!     M-by-N input matrix F.   
+!     If the call to ZGEDMDQ is only workspace query, then
+!     ZWORK(1) contains the minimal complex workspace length and
+!     ZWORK(2) is the optimal complex workspace length. 
+!     Hence, the length of work is at least 2.
+!     See the description of LZWORK.      
+!.....      
+!     LZWORK (input) INTEGER
+!     The minimal length of the  workspace vector ZWORK.
+!     LZWORK is calculated as follows:
+!     Let MLWQR  = N (minimal workspace for ZGEQRF[M,N])
+!         MLWDMD = minimal workspace for ZGEDMD (see the
+!                  description of LWORK in ZGEDMD)
+!         MLWMQR = N (minimal workspace for 
+!                    ZUNMQR['L','N',M,N,N])
+!         MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
+!         MINMN  = MIN(M,N)      
+!     Then
+!     LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
+!     is further updated as follows:
+!        if   JOBZ == 'V' or JOBZ == 'F' THEN 
+!             LZWORK = MAX(LZWORK, MINMN+MLWMQR)
+!        if   JOBQ == 'Q' THEN
+!             LZWORK = MAX(ZLWORK, MINMN+MLWGQR)      
+!
+!.....      
+!     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
+!     On exit,
+!     WORK(1:N-1) contains the singular values of 
+!     the input submatrix F(1:M,1:N-1).
+!     If the call to ZGEDMDQ is only workspace query, then
+!     WORK(1) contains the minimal workspace length and
+!     WORK(2) is the optimal workspace length. hence, the
+!     length of work is at least 2.
+!     See the description of LWORK.
+!.....
+!     LWORK (input) INTEGER
+!     The minimal length of the  workspace vector WORK.
+!     LWORK is the same as in ZGEDMD, because in ZGEDMDQ
+!     only ZGEDMD requires real workspace for snapshots
+!     of dimensions MIN(M,N)-by-(N-1). 
+!     If on entry LWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace length for WORK.          
+!.....
+!     IWORK (workspace/output) INTEGER LIWORK-by-1 array
+!     Workspace that is required only if WHTSVD equals
+!     2 , 3 or 4. (See the description of WHTSVD).
+!     If on entry LWORK =-1 or LIWORK=-1, then the
+!     minimal length of IWORK is computed and returned in
+!     IWORK(1). See the description of LIWORK.
+!.....
+!     LIWORK (input) INTEGER
+!     The minimal length of the workspace vector IWORK.
+!     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
+!     Let M1=MIN(M,N), N1=N-1. Then
+!     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
+!     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
+!     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
+!     If on entry LIWORK = -1, then a workspace query is
+!     assumed and the procedure only computes the minimal
+!     and the optimal workspace lengths for both WORK and
+!     IWORK. See the descriptions of WORK and IWORK.
+!..... 
+!     INFO (output) INTEGER
+!     -i < 0 :: On entry, the i-th argument had an
+!               illegal value
+!        = 0 :: Successful return.
+!        = 1 :: Void input. Quick exit (M=0 or N=0).
+!        = 2 :: The SVD computation of X did not converge.
+!               Suggestion: Check the input data and/or
+!               repeat with different WHTSVD.
+!        = 3 :: The computation of the eigenvalues did not
+!               converge.
+!        = 4 :: If data scaling was requested on input and
+!               the procedure found inconsistency in the data
+!               such that for some column index i,
+!               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
+!               to zero if JOBS=='C'. The computation proceeds
+!               with original or modified data and warning
+!               flag is set with INFO=4.  
+!.............................................................
+!.............................................................
+!     Parameters
+!     ~~~~~~~~~~      
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!      
+!     Local scalars      
+!     ~~~~~~~~~~~~~
+      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
+                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  & 
+                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
+                           OLWORK, OLWQR
+      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
+                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
+                           WNTVCQ, WNTREF, WNTEX
+      CHARACTER(LEN=1)  :: JOBVL
+!      
+!     External functions (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~
+      LOGICAL       LSAME
+      EXTERNAL      LSAME 
+!
+!     External subroutines (BLAS and LAPACK)
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      ZGEQRF, ZLACPY, ZLASET, ZUNGQR, & 
+                    ZUNMQR, XERBLA
+
+!     External subroutines
+!     ~~~~~~~~~~~~~~~~~~~~
+      EXTERNAL      ZGEDMD 
+      
+!     Intrinsic functions
+!     ~~~~~~~~~~~~~~~~~~~
+      INTRINSIC      MAX, MIN, INT         
+ !..........................................................  
+ !
+ !    Test the input arguments    
+      WNTRES = LSAME(JOBR,'R')
+      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
+      SCCOLY = LSAME(JOBS,'Y')
+      WNTVEC = LSAME(JOBZ,'V')
+      WNTVCF = LSAME(JOBZ,'F')
+      WNTVCQ = LSAME(JOBZ,'Q') 
+      WNTREF = LSAME(JOBF,'R') 
+      WNTEX  = LSAME(JOBF,'E')
+      WANTQ  = LSAME(JOBQ,'Q')
+      WNTTRF = LSAME(JOBT,'R')     
+      MINMN  = MIN(M,N)
+      INFO = 0 
+      LQUERY = ( (LZWORK == -1) .OR. (LWORK == -1) .OR. (LIWORK == -1) )
+!       
+      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
+                                  LSAME(JOBS,'N')) )  THEN 
+          INFO = -1
+      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
+                              .OR. LSAME(JOBZ,'N')) ) THEN
+          INFO = -2
+      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
+          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
+          INFO = -3
+      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
+           INFO = -4                 
+      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
+          INFO = -5
+       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
+                LSAME(JOBF,'N') ) )                     THEN
+          INFO = -6    
+      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
+                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
+          INFO = -7
+      ELSE IF ( M < 0 ) THEN
+          INFO = -8
+      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
+          INFO = -9
+      ELSE IF ( LDF < M ) THEN
+          INFO = -11
+      ELSE IF ( LDX < MINMN ) THEN
+          INFO = -13
+      ELSE IF ( LDY < MINMN ) THEN
+          INFO = -15
+      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
+                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
+          INFO = -16
+      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
+          INFO = -17
+      ELSE IF ( LDZ < M ) THEN
+          INFO = -21
+      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
+          INFO = -24
+      ELSE IF ( LDV < N-1 ) THEN
+          INFO = -26
+      ELSE IF ( LDS < N-1 ) THEN
+          INFO = -28
+      END IF
+!      
+      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
+          JOBVL = 'V'
+      ELSE
+          JOBVL = 'N'
+      END IF     
+      IF ( INFO == 0 ) THEN  
+          ! Compute the minimal and the optimal workspace
+          ! requirements. Simulate running the code and 
+          ! determine minimal and optimal sizes of the 
+          ! workspace at any moment of the run.         
+         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
+             ! All output except K is void. INFO=1 signals
+             ! the void input. In case of a workspace query,
+             ! the minimal workspace lengths are returned.
+            IF ( LQUERY ) THEN  
+               IWORK(1) = 1
+               ZWORK(1) = 2
+               ZWORK(2) = 2
+               WORK(1)  = 2
+               WORK(2)  = 2
+            ELSE                
+               K = 0
+            END IF             
+            INFO = 1  
+            RETURN
+         END IF   
+         
+         MLRWRK = 2
+         MLWORK = 2
+         OLWORK = 2 
+         IMINWR = 1
+         MLWQR  = MAX(1,N)  ! Minimal workspace length for ZGEQRF.
+         MLWORK = MAX(MLWORK,MINMN + MLWQR) 
+         
+         IF ( LQUERY ) THEN 
+             CALL ZGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
+                          INFO1 )
+             OLWQR  = INT(ZWORK(1))
+             OLWORK = MAX(OLWORK,MINMN + OLWQR)         
+         END IF
+         CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
+                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
+                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    & 
+                      S, LDS, ZWORK, -1, WORK, -1, IWORK,&
+                      -1, INFO1 )
+         MLWDMD = INT(ZWORK(1))
+         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
+         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
+         IMINWR = MAX(IMINWR, IWORK(1))
+         IF ( LQUERY ) THEN 
+             OLWDMD = INT(ZWORK(2))
+             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
+         END IF
+         IF ( WNTVEC .OR. WNTVCF ) THEN
+            MLWMQR = MAX(1,N) 
+            MLWORK = MAX(MLWORK,MINMN+MLWMQR)
+            IF ( LQUERY ) THEN
+               CALL ZUNMQR( 'L','N', M, N, MINMN, F, LDF,  & 
+                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
+               OLWMQR = INT(ZWORK(1))
+               OLWORK = MAX(OLWORK,MINMN+OLWMQR)
+            END IF
+         END IF  
+         IF ( WANTQ ) THEN
+            MLWGQR = MAX(1,N)
+            MLWORK = MAX(MLWORK,MINMN+MLWGQR)
+            IF ( LQUERY ) THEN 
+                CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
+                             ZWORK, -1, INFO1 )               
+                OLWGQR = INT(ZWORK(1))
+                OLWORK = MAX(OLWORK,MINMN+OLWGQR)
+            END IF            
+         END IF         
+         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
+         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
+         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
+      END IF  
+      IF( INFO /= 0 ) THEN
+         CALL XERBLA( 'ZGEDMDQ', -INFO )
+         RETURN
+      ELSE IF ( LQUERY ) THEN
+!     Return minimal and optimal workspace sizes
+          IWORK(1) = IMINWR
+          ZWORK(1) = MLWORK
+          ZWORK(2) = OLWORK
+          WORK(1)  = MLRWRK
+          WORK(2)  = MLRWRK
+          RETURN
+      END IF   
+!.....	  
+!     Initial QR factorization that is used to represent the
+!     snapshots as elements of lower dimensional subspace.
+!     For large scale computation with M >> N, at this place 
+!     one can use an out of core QRF.
+!   
+      CALL ZGEQRF( M, N, F, LDF, ZWORK,                & 
+                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+!      
+!     Define X and Y as the snapshots representations in the
+!     orthogonal basis computed in the QR factorization.
+!     X corresponds to the leading N-1 and Y to the trailing
+!     N-1 snapshots.
+      CALL ZLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
+      CALL ZLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
+      CALL ZLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
+      IF ( M >= 3 ) THEN
+          CALL ZLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
+                       Y(3,1), LDY )  
+      END IF
+!
+!     Compute the DMD of the projected snapshot pairs (X,Y)   
+      CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
+                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
+                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
+                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN, & 
+                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
+      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
+          ! Return with error code. See ZGEDMD for details.
+          INFO = INFO1
+          RETURN
+      ELSE
+          INFO = INFO1
+      END IF    
+!      
+!     The Ritz vectors (Koopman modes) can be explicitly 
+!     formed or returned in factored form.
+      IF ( WNTVEC ) THEN
+        ! Compute the eigenvectors explicitly.  
+        IF ( M > MINMN ) CALL ZLASET( 'A', M-MINMN, K, ZZERO, &
+                                     ZZERO, Z(MINMN+1,1), LDZ )
+        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
+             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      ELSE IF ( WNTVCF ) THEN   
+        !   Return the Ritz vectors (eigenvectors) in factored
+        !   form Z*V, where Z contains orthonormal matrix (the
+        !   product of Q from the initial QR factorization and 
+        !   the SVD/POD_basis returned by ZGEDMD in X) and the 
+        !   second factor (the eigenvectors of the Rayleigh 
+        !   quotient) is in the array V, as returned by ZGEDMD.
+        CALL ZLACPY( 'A', N, K, X, LDX, Z, LDZ )
+        IF ( M > N ) CALL ZLASET( 'A', M-N, K, ZZERO, ZZERO, & 
+                                 Z(N+1,1), LDZ )
+        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
+                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
+      END IF
+!     
+!     Some optional output variables:
+!
+!     The upper triangular factor R in the initial QR 
+!     factorization is optionally returned in the array Y.
+!     This is useful if this call to ZGEDMDQ is to be 
+!     followed by a streaming DMD that is implemented in a 
+!     QR compressed form.
+      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
+         CALL ZLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
+         CALL ZLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
+      END IF    
+!
+!     The orthonormal/unitary factor Q in the initial QR 
+!     factorization is optionally returned in the array F. 
+!     Same as with the triangular factor above, this is 
+!     useful in a streaming DMD.
+      IF ( WANTQ ) THEN                   ! Q overwrites F 
+         CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
+                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )  
+      END IF
+!      
+      RETURN
+!      
+      END SUBROUTINE ZGEDMDQ
+    
\ No newline at end of file
diff --git a/TESTING/EIG/Makefile b/TESTING/EIG/Makefile
index e403586638..5de315b6e6 100644
--- a/TESTING/EIG/Makefile
+++ b/TESTING/EIG/Makefile
@@ -64,6 +64,8 @@ SEIGTST = schkee.o \
    sort03.o ssbt21.o ssgt01.o sslect.o sspt21.o sstt21.o \
    sstt22.o ssyl01.o ssyt21.o ssyt22.o
 
+SDMDEIGTST = schkdmd.o
+
 CEIGTST = cchkee.o \
    cbdt01.o cbdt02.o cbdt03.o cbdt05.o \
    cchkbb.o cchkbd.o cchkbk.o cchkbl.o cchkec.o \
@@ -81,6 +83,8 @@ CEIGTST = cchkee.o \
    csgt01.o cslect.o csyl01.o\
    cstt21.o cstt22.o cunt01.o cunt03.o
 
+CDMDEIGTST = cchkdmd.o
+
 DZIGTST = dlafts.o dlahd2.o dlasum.o dlatb9.o dstech.o dstect.o \
    dsvdch.o dsvdct.o dsxt1.o
 
@@ -101,6 +105,8 @@ DEIGTST = dchkee.o \
    dort03.o dsbt21.o dsgt01.o dslect.o dspt21.o dstt21.o \
    dstt22.o dsyl01.o dsyt21.o dsyt22.o
 
+DDMDEIGTST = dchkdmd.o
+
 ZEIGTST = zchkee.o \
    zbdt01.o zbdt02.o zbdt03.o zbdt05.o \
    zchkbb.o zchkbd.o zchkbk.o zchkbl.o zchkec.o \
@@ -118,14 +124,28 @@ ZEIGTST = zchkee.o \
    zsgt01.o zslect.o zsyl01.o\
    zstt21.o zstt22.o zunt01.o zunt03.o
 
+ZDMDEIGTST = zchkdmd.o
+
 .PHONY: all
 all: single complex double complex16
 
 .PHONY: single complex double complex16
-single: xeigtsts
-complex: xeigtstc
-double: xeigtstd
-complex16: xeigtstz
+single: xeigtsts xdmdeigtsts
+complex: xeigtstc xdmdeigtstc
+double: xeigtstd xdmdeigtstd
+complex16: xeigtstz xdmdeigtstz
+
+xdmdeigtsts: $(SDMDEIGTST) $(TMGLIB) $(LAPACKLIB) $(BLASLIB)
+	$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
+
+xdmdeigtstc: $(CDMDEIGTST) $(TMGLIB) $(LAPACKLIB) $(BLASLIB)
+	$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
+
+xdmdeigtstd: $(DDMDEIGTST) $(TMGLIB) $(LAPACKLIB) $(BLASLIB)
+	$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
+
+xdmdeigtstz: $(ZDMDEIGTST) $(TMGLIB) $(LAPACKLIB) $(BLASLIB)
+	$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
 
 xeigtsts: $(SEIGTST) $(SCIGTST) $(AEIGTST) $(TMGLIB) $(LAPACKLIB) $(BLASLIB)
 	$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
@@ -139,6 +159,10 @@ xeigtstd: $(DEIGTST) $(DZIGTST) $(AEIGTST) $(TMGLIB) $(LAPACKLIB) $(BLASLIB)
 xeigtstz: $(ZEIGTST) $(DZIGTST) $(AEIGTST) $(TMGLIB) $(LAPACKLIB) $(BLASLIB)
 	$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
 
+$(SDMDEIGTST): $(FRC)
+$(CDMDEIGTST): $(FRC)
+$(DDMDEIGTST): $(FRC)
+$(ZDMDEIGTST): $(FRC)
 $(AEIGTST): $(FRC)
 $(SCIGTST): $(FRC)
 $(DZIGTST): $(FRC)
@@ -155,7 +179,7 @@ clean: cleanobj cleanexe
 cleanobj:
 	rm -f *.o
 cleanexe:
-	rm -f xeigtst*
+	rm -f xeigtst* xdmdeigtst*
 
 schkee.o: schkee.F
 	$(FC) $(FFLAGS_DRV) -c -o $@ $<
@@ -165,3 +189,11 @@ cchkee.o: cchkee.F
 	$(FC) $(FFLAGS_DRV) -c -o $@ $<
 zchkee.o: zchkee.F
 	$(FC) $(FFLAGS_DRV) -c -o $@ $<
+schkdmd.o: schkdmd.f90
+	$(FC) $(FFLAGS_DRV) -c -o $@ $<
+cchkdmd.o: cchkdmd.f90
+	$(FC) $(FFLAGS_DRV) -c -o $@ $<
+dchkdmd.o: dchkdmd.f90
+	$(FC) $(FFLAGS_DRV) -c -o $@ $<
+zchkdmd.o: zchkdmd.f90
+	$(FC) $(FFLAGS_DRV) -c -o $@ $<
diff --git a/TESTING/EIG/cchkdmd.f90 b/TESTING/EIG/cchkdmd.f90
new file mode 100644
index 0000000000..a9c181da9b
--- /dev/null
+++ b/TESTING/EIG/cchkdmd.f90
@@ -0,0 +1,721 @@
+!   This is a test program for checking the implementations of
+!   the implementations of the following subroutines
+!
+!   CGEDMD,  for computation of the
+!            Dynamic Mode Decomposition (DMD)
+!   CGEDMDQ, for computation of a
+!            QR factorization based compressed DMD
+!
+!   Developed and supported by:
+!   ===========================
+!   Developed and coded by Zlatko Drmac, Faculty of Science,
+!   University of Zagreb;  drmac@math.hr
+!   In cooperation with
+!   AIMdyn Inc., Santa Barbara, CA.
+!   ========================================================
+!   How to run the code (compiler, link info)
+!   ========================================================
+!   Compile as FORTRAN 90 (or later) and link with BLAS and
+!   LAPACK libraries.
+!   NOTE: The code is developed and tested on top of the
+!   Intel MKL library (versions 2022.0.3 and 2022.2.0),
+!   using the Intel Fortran compiler.
+!
+!   For developers of the C++ implementation
+!   ========================================================
+!   See the LAPACK++ and Template Numerical Toolkit (TNT)
+!
+!   Note on a development of the GPU HP implementation
+!   ========================================================
+!   Work in progress. See CUDA, MAGMA, SLATE.
+!   NOTE: The four SVD subroutines used in this code are
+!   included as a part of R&D and for the completeness.
+!   This was also an opportunity to test those SVD codes.
+!   If the scaling option is used all four are essentially
+!   equally good. For implementations on HP platforms,
+!   one can use whichever SVD is available.
+!............................................................
+
+!............................................................
+!............................................................
+!
+      PROGRAM DMD_TEST
+
+      use iso_fortran_env
+      IMPLICIT NONE
+      integer, parameter :: WP = real32
+!............................................................
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+
+      COMPLEX(KIND=WP), PARAMETER ::  CONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: CZERO = ( 0.0_WP, 0.0_WP )
+!............................................................
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   :: RES, &
+                     RES1, RESEX, SINGVX, SINGVQX, WORK
+      INTEGER      , ALLOCATABLE, DIMENSION(:)   ::   IWORK
+      REAL(KIND=WP) :: WDUMMY(2)
+      INTEGER       :: IDUMMY(4), ISEED(4)
+      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS,       &
+                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
+                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_XW, &
+                       TMP_EX
+!............................................................
+      COMPLEX(KIND=WP) :: CMAX
+      INTEGER :: LCWORK
+      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::  A, AC,  &
+                                 AU, F, F0, F1, S, W,  &
+                                 X, X0, Y, Y0, Y1, Z, Z1
+      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::  CDA, CDR, &
+                                       CDL, CEIGS, CEIGSA, CWORK
+      COMPLEX(KIND=WP) ::  CDUMMY(22), CDUM2X2(2,2)
+!............................................................
+      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
+                 LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK
+      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j,     &
+                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
+                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL,  &
+                 NFAIL_Z_XV,  MODE, MODEL, MODER, WHTSVD
+      INTEGER :: iNRNK, iWHTSVD,  K_traj, LWMINOPT
+      CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
+                   SCALE, RESIDS, WANTQ, WANTR
+      LOGICAL :: TEST_QRDMD
+
+!..... external subroutines (BLAS and LAPACK)
+      EXTERNAL CAXPY, CGEEV, CGEMM, CGEMV, CLASCL
+!.....external subroutines DMD package
+!     subroutines under test
+      EXTERNAL CGEDMD, CGEDMDQ
+!..... external functions (BLAS and LAPACK)
+      EXTERNAL         SCNRM2, SLAMCH
+      REAL(KIND=WP) :: SCNRM2, SLAMCH
+      EXTERNAL         CLANGE
+      REAL(KIND=WP) :: CLANGE
+      EXTERNAL ICAMAX
+      INTEGER  ICAMAX
+      EXTERNAL LSAME
+      LOGICAL  LSAME
+
+      INTRINSIC ABS, INT, MIN, MAX, SIGN
+!............................................................
+
+
+      WRITE(*,*) 'COMPLEX CODE TESTING'
+
+      ! The test is always in pairs : ( CGEDMD and CGEDMDQ)
+      ! because the test includes comparing the results (in pairs).
+!.....................................................................................
+      ! This code by default performs tests on CGEDMDQ
+      ! Since the QR factorizations based algorithm is designed for
+      ! single trajectory data, only single trajectory tests will
+      ! be performed with xGEDMDQ.
+
+      WANTQ = 'Q'
+      WANTR = 'R'
+!.................................................................................
+
+      EPS = SLAMCH( 'P' )  ! machine precision WP
+
+      ! Global counters of failures of some particular tests
+      NFAIL      = 0
+      NFAIL_REZ  = 0
+      NFAIL_REZQ = 0
+      NFAIL_Z_XV = 0
+      NFAIL_F_QR = 0
+      NFAIL_AU   = 0
+      NFAIL_SVDIFF = 0
+      NFAIL_TOTAL  = 0
+      NFAILQ_TOTAL = 0
+
+      DO LLOOP = 1, 4
+
+      WRITE(*,*) 'L Loop Index = ', LLOOP
+
+      ! Set the dimensions of the problem ...
+      READ(*,*) M
+      WRITE(*,*) 'M = ', M
+      ! ... and the number of snapshots.
+      READ(*,*) N
+      WRITE(*,*) 'N = ', N
+
+      ! Test the dimensions
+      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
+          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
+          STOP
+      END IF
+!.............
+      ! The seed inside the LLOOP so that each pass can be reproduced easily.
+      ISEED(1) = 4
+      ISEED(2) = 3
+      ISEED(3) = 2
+      ISEED(4) = 1
+
+      LDA  = M
+      LDF  = M
+      LDX  = M
+      LDY  = M
+      LDW  = N
+      LDZ  = M
+      LDAU = M
+      LDS  = N
+
+      TMP_XW  = ZERO
+      TMP_AU   = ZERO
+      TMP_REZ  = ZERO
+      TMP_REZQ = ZERO
+      SVDIFF   = ZERO
+      TMP_EX   = ZERO
+
+      ALLOCATE( A(LDA,M) )
+      ALLOCATE( AC(LDA,M) )
+      ALLOCATE( F(LDF,N+1) )
+      ALLOCATE( F0(LDF,N+1) )
+      ALLOCATE( F1(LDF,N+1) )
+      ALLOCATE( X(LDX,N) )
+      ALLOCATE( X0(LDX,N) )
+      ALLOCATE( Y(LDY,N+1) )
+      ALLOCATE( Y0(LDY,N+1) )
+      ALLOCATE( Y1(LDY,N+1) )
+      ALLOCATE( AU(LDAU,N) )
+      ALLOCATE( W(LDW,N) )
+      ALLOCATE( S(LDS,N) )
+      ALLOCATE( Z(LDZ,N) )
+      ALLOCATE( Z1(LDZ,N) )
+      ALLOCATE( RES(N) )
+      ALLOCATE( RES1(N) )
+      ALLOCATE( RESEX(N) )
+      ALLOCATE( CEIGS(N) )
+      ALLOCATE( SINGVX(N) )
+      ALLOCATE( SINGVQX(N) )
+
+      TOL  = 10*M*EPS
+      TOL2 = 10*M*N*EPS
+
+!.............
+
+      DO K_traj = 1, 2
+      !  Number of intial conditions in the simulation/trajectories (1 or 2)
+
+      COND   = 1.0D4
+      CMAX   = (1.0D1,1.0D1)
+      RSIGN  = 'F'
+      GRADE  = 'N'
+      MODEL  = 6
+      CONDL  = 1.0D1
+      MODER  = 6
+      CONDR  = 1.0D1
+      PIVTNG = 'N'
+      ! Loop over all parameter MODE values for CLATMR (+-1,..,+-6)
+
+      DO MODE = 1, 6
+
+      ALLOCATE( IWORK(2*M) )
+      ALLOCATE( CDA(M) )
+      ALLOCATE( CDL(M) )
+      ALLOCATE( CDR(M) )
+
+      CALL CLATMR( M, M, 'N', ISEED, 'N', CDA, MODE, COND, &
+                   CMAX, RSIGN, GRADE, CDL, MODEL,  CONDL, &
+                   CDR, MODER, CONDR, PIVTNG, IWORK, M, M, &
+                   ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO )
+      DEALLOCATE( CDR )
+      DEALLOCATE( CDL )
+      DEALLOCATE( CDA )
+      DEALLOCATE( IWORK )
+
+      LCWORK = MAX(1,2*M)
+      ALLOCATE( CEIGSA(M) )
+      ALLOCATE( CWORK(LCWORK) )
+      ALLOCATE( WORK(2*M) )
+      AC(1:M,1:M) = A(1:M,1:M)
+      CALL CGEEV( 'N','N', M, AC, LDA, CEIGSA, CDUM2X2, 2, &
+                  CDUM2X2, 2, CWORK, LCWORK, WORK, INFO ) ! LAPACK CALL
+      DEALLOCATE(WORK)
+      DEALLOCATE(CWORK)
+
+      TMP = ABS(CEIGSA(ICAMAX(M, CEIGSA, 1))) ! The spectral radius of A
+      ! Scale the matrix A to have unit spectral radius.
+      CALL CLASCL( 'G',0, 0, TMP, ONE, M, M, &
+                   A, LDA, INFO )
+      CALL CLASCL( 'G',0, 0, TMP, ONE, M, 1, &
+                   CEIGSA, M, INFO )
+      ANORM = CLANGE( 'F', M, M, A, LDA, WDUMMY )
+
+      IF ( K_traj == 2 ) THEN
+          ! generate data as two trajectories
+          ! with two inital conditions
+          CALL CLARNV(2, ISEED, M, F(1,1) )
+          DO i = 1, N/2
+             CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1,  &
+                  CZERO, F(1,i+1), 1 )
+          END DO
+          X0(1:M,1:N/2) = F(1:M,1:N/2)
+          Y0(1:M,1:N/2) = F(1:M,2:N/2+1)
+
+          CALL CLARNV(2, ISEED, M, F(1,1) )
+          DO i = 1, N-N/2
+             CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1,  &
+                  CZERO, F(1,i+1), 1 )
+          END DO
+          X0(1:M,N/2+1:N) = F(1:M,1:N-N/2)
+          Y0(1:M,N/2+1:N) = F(1:M,2:N-N/2+1)
+      ELSE
+          CALL CLARNV(2, ISEED, M, F(1,1) )
+          DO i = 1, N
+             CALL CGEMV( 'N', M, M, CONE, A, M, F(1,i), 1,  &
+                  CZERO, F(1,i+1), 1 )
+          END DO
+          F0(1:M,1:N+1) = F(1:M,1:N+1)
+          X0(1:M,1:N) = F0(1:M,1:N)
+          Y0(1:M,1:N) = F0(1:M,2:N+1)
+      END IF
+
+      DEALLOCATE( CEIGSA )
+!........................................................................
+
+      DO iJOBZ = 1, 4
+
+          SELECT CASE ( iJOBZ )
+          CASE(1)
+              JOBZ   = 'V'
+              RESIDS = 'R'
+          CASE(2)
+              JOBZ   = 'V'
+              RESIDS = 'N'
+          CASE(3)
+              JOBZ   = 'F'
+              RESIDS = 'N'
+          CASE(4)
+              JOBZ   = 'N'
+              RESIDS = 'N'
+          END SELECT
+
+      DO iJOBREF = 1, 3
+
+          SELECT CASE ( iJOBREF )
+          CASE(1)
+              JOBREF = 'R'
+          CASE(2)
+              JOBREF = 'E'
+          CASE(3)
+              JOBREF = 'N'
+          END SELECT
+
+      DO iSCALE = 1, 4
+
+          SELECT CASE ( iSCALE )
+          CASE(1)
+              SCALE = 'S'
+          CASE(2)
+              SCALE = 'C'
+          CASE(3)
+              SCALE = 'Y'
+          CASE(4)
+              SCALE = 'N'
+          END SELECT
+
+      DO iNRNK = -1, -2, -1
+          NRNK   = iNRNK
+
+      DO iWHTSVD = 1,  3
+         ! Check all four options to compute the POD basis
+         ! via the SVD.
+         WHTSVD   = iWHTSVD
+
+      DO LWMINOPT = 1, 2
+         ! Workspace query for the minimal (1) and for the optimal
+         ! (2) workspace lengths determined by workspace query.
+
+      ! CGEDMD is always tested and its results are also used for
+      ! comparisons with CGEDMDQ.
+
+      X(1:M,1:N) = X0(1:M,1:N)
+      Y(1:M,1:N) = Y0(1:M,1:N)
+
+      CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,  &
+                M,  N, X, LDX, Y, LDY, NRNK, TOL,  &
+                K, CEIGS, Z, LDZ,  RES,  &
+                AU, LDAU, W,  LDW,   S, LDS,        &
+                CDUMMY, -1, WDUMMY, -1, IDUMMY, -1, INFO )
+
+      IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) &
+                       .OR. ( INFO < 0 ) ) THEN
+        WRITE(*,*) 'Call to CGEDMD workspace query failed. &
+                   &Check the calling sequence and the code.'
+        WRITE(*,*) 'The error code is ', INFO
+        WRITE(*,*) 'The input parameters were ',      &
+        SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,          &
+        M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS
+        STOP
+      ELSE
+        !WRITE(*,*) '... done. Workspace length computed.'
+      END IF
+
+      LCWORK = INT(CDUMMY(LWMINOPT))
+      ALLOCATE(CWORK(LCWORK))
+      LIWORK = IDUMMY(1)
+      ALLOCATE(IWORK(LIWORK))
+      LWORK = INT(WDUMMY(1))
+      ALLOCATE(WORK(LWORK))
+
+      CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,  &
+                   M,  N, X, LDX, Y, LDY, NRNK, TOL,  &
+                   K, CEIGS, Z, LDZ,  RES,  &
+                   AU, LDAU, W,  LDW,   S, LDS,        &
+                   CWORK, LCWORK, WORK, LWORK, IWORK, LIWORK, INFO )
+      IF ( INFO /= 0 ) THEN
+           WRITE(*,*) 'Call to CGEDMD failed. &
+           &Check the calling sequence and the code.'
+           WRITE(*,*) 'The error code is ', INFO
+           WRITE(*,*) 'The input parameters were ',&
+           SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+           M, N, LDX, LDY, NRNK, TOL
+           STOP
+      END IF
+      SINGVX(1:N) = WORK(1:N)
+
+      !...... CGEDMD check point
+      IF ( LSAME(JOBZ,'V')  ) THEN
+          ! Check that Z = X*W, on return from CGEDMD
+          ! This checks that the returned eigenvectors in Z are
+          ! the product of the SVD'POD basis returned in X
+          ! and the eigenvectors of the Rayleigh quotient
+          ! returned in W
+          CALL CGEMM( 'N', 'N', M, K, K, CONE, X, LDX, W, LDW, &
+                      CZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL CAXPY( M, -CONE, Z(1,i), 1, Z1(1,i), 1)
+             TMP = MAX(TMP, SCNRM2( M, Z1(1,i), 1 ) )
+          END DO
+          TMP_XW = MAX(TMP_XW, TMP )
+          IF ( TMP_XW <= TOL ) THEN
+              !WRITE(*,*) ' :) .... OK .........CGEDMD PASSED.'
+          ELSE
+              NFAIL_Z_XV = NFAIL_Z_XV + 1
+              WRITE(*,*) ':( .................CGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+      END IF
+      !...... CGEDMD check point
+
+      IF ( LSAME(JOBREF,'R') ) THEN
+           ! The matrix A*U is returned for computing refined Ritz vectors.
+           ! Check that A*U is computed correctly using the formula
+           ! A*U = Y * V * inv(SIGMA). This depends on the
+           ! accuracy in the computed singular values and vectors of X.
+           ! See the paper for an error analysis.
+           ! Note that the left singular vectors of the input matrix X
+           ! are returned in the array X.
+           CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, X, LDX, &
+                      CZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL CAXPY( M, -CONE, AU(1,i), 1, Z1(1,i), 1)
+             TMP = MAX( TMP, SCNRM2( M, Z1(1,i),1 ) * &
+                     SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_AU = MAX( TMP_AU, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) ':) .... OK .........CGEDMD PASSED.'
+          ELSE
+              NFAIL_AU = NFAIL_AU + 1
+              WRITE(*,*) ':( .................CGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL2
+          END IF
+      ELSEIF ( LSAME(JOBREF,'E') ) THEN
+          ! The unscaled vectors of the Exact DMD are computed.
+          ! This option is included for the sake of completeness,
+          ! for users who prefer the Exact DMD vectors. The
+          ! returned vectors are in the real form, in the same way
+          ! as the Ritz vectors. Here we just save the vectors
+          ! and test them separately using a Matlab script.
+          CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, AU, LDAU, CZERO, Y1, LDY )
+
+          DO i=1, K
+             CALL CAXPY( M, -CEIGS(i), AU(1,i), 1, Y1(1,i), 1 )
+             RESEX(i) = SCNRM2( M, Y1(1,i), 1) / SCNRM2(M,AU(1,i),1)
+          END DO
+      END IF
+      !...... CGEDMD check point
+
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by CGEDMD with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in CGEDMD,)
+
+          DO i=1, K
+                ! have a real eigenvalue with real eigenvector
+                CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 )
+                RES1(i) = SCNRM2( M, Y1(1,i), 1)
+          END DO
+          TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                    SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_REZ = MAX( TMP_REZ, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) ':) .... OK ..........CGEDMD PASSED.'
+          ELSE
+              NFAIL_REZ = NFAIL_REZ + 1
+              WRITE(*,*) ':( ..................CGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+
+         IF ( LSAME(JOBREF,'E') ) THEN
+            TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
+          END DO
+          TMP_EX = MAX(TMP_EX,TMP)
+         END IF
+
+      END IF
+
+      DEALLOCATE(CWORK)
+      DEALLOCATE(WORK)
+      DEALLOCATE(IWORK)
+
+!.......................................................................................................
+
+      IF ( K_traj == 1 ) THEN
+
+          F(1:M,1:N+1) = F0(1:M,1:N+1)
+          CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
+                    WHTSVD, M, N+1, F, LDF,  X, LDX,  Y, LDY,  &
+                    NRNK,  TOL, K, CEIGS, Z, LDZ, RES,  AU,  &
+                    LDAU, W, LDW, S, LDS, CDUMMY, -1,   &
+                    WDUMMY,  -1, IDUMMY, -1, INFO )
+
+          LCWORK = INT(CDUMMY(LWMINOPT))
+          ALLOCATE(CWORK(LCWORK))
+          LIWORK = IDUMMY(1)
+          ALLOCATE(IWORK(LIWORK))
+          LWORK = INT(WDUMMY(1))
+          ALLOCATE(WORK(LWORK))
+
+          CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
+                        WHTSVD, M, N+1, F, LDF,  X, LDX,  Y, LDY,  &
+                        NRNK,  TOL, KQ, CEIGS, Z, LDZ, RES,  AU,  &
+                        LDAU, W, LDW, S, LDS, CWORK, LCWORK,   &
+                        WORK,  LWORK, IWORK, LIWORK, INFO )
+          IF ( INFO /= 0 ) THEN
+                 WRITE(*,*) 'Call to CGEDMDQ failed. &
+                 &Check the calling sequence and the code.'
+                 WRITE(*,*) 'The error code is ', INFO
+                 WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+                 STOP
+          END IF
+          SINGVQX(1:N) =WORK(1:N)
+
+          !..... ZGEDMDQ check point
+
+          TMP = ZERO
+          DO i = 1, MIN(K, KQ)
+             TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
+                                   SINGVX(1) )
+          END DO
+          SVDIFF = MAX( SVDIFF, TMP )
+          IF ( TMP > TOL2 ) THEN
+               WRITE(*,*) 'FAILED! Something was wrong with the run.'
+             NFAIL_SVDIFF = NFAIL_SVDIFF + 1
+          END IF
+          !..... CGEDMDQ check point
+
+          !..... CGEDMDQ check point
+          IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
+             ! Check that the QR factors are computed and returned
+             ! as requested. The residual ||F-Q*R||_F / ||F||_F
+             ! is compared to M*N*EPS.
+             F1(1:M,1:N+1) = F0(1:M,1:N+1)
+             CALL CGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -CONE, F, &
+                         LDF, Y, LDY, CONE, F1, LDF )
+             TMP_FQR = CLANGE( 'F', M, N+1, F1, LDF, WORK ) / &
+                   CLANGE( 'F', M, N+1, F0,  LDF, WORK )
+             IF ( TMP_FQR <= TOL2 ) THEN
+                !WRITE(*,*) ':) CGEDMDQ ........ PASSED.'
+             ELSE
+                WRITE(*,*) ':( CGEDMDQ ........ FAILED.'
+                NFAIL_F_QR = NFAIL_F_QR + 1
+             END IF
+          END IF
+          !..... ZGEDMDQ checkpoint
+                 !..... ZGEDMDQ checkpoint
+          IF ( LSAME(RESIDS, 'R') ) THEN
+              ! Compare the residuals returned by ZGEDMDQ with the
+              ! explicitly computed residuals using the matrix A.
+              ! Compute explicitly Y1 = A*Z
+              CALL CGEMM( 'N', 'N', M, KQ, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY )
+              ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+              ! of the invariant subspaces that correspond to complex conjugate
+              ! pairs of eigencalues. (See the description of Z in ZGEDMDQ)
+              DO i = 1, KQ
+                    ! have a real eigenvalue with real eigenvector
+                    CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 )
+                    ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
+                    RES1(i) = SCNRM2( M, Y1(1,i), 1)
+              END DO
+              TMP = ZERO
+              DO i = 1, KQ
+              TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                  SINGVQX(KQ)/(ANORM*SINGVQX(1)) )
+              END DO
+              TMP_REZQ = MAX( TMP_REZQ, TMP )
+              IF ( TMP <= TOL2 ) THEN
+                  !WRITE(*,*) '.... OK ........ CGEDMDQ PASSED.'
+              ELSE
+                  NFAIL_REZQ = NFAIL_REZQ + 1
+                  WRITE(*,*) '................ CGEDMDQ FAILED!', &
+                      'Check the code for implementation errors.'
+              END IF
+          END IF
+
+          DEALLOCATE(CWORK)
+          DEALLOCATE(WORK)
+          DEALLOCATE(IWORK)
+
+      END IF
+
+      END DO   ! LWMINOPT
+      !write(*,*) 'LWMINOPT loop completed'
+      END DO   ! iWHTSVD
+      !write(*,*) 'WHTSVD loop completed'
+      END DO   ! iNRNK  -2:-1
+      !write(*,*) 'NRNK loop completed'
+      END DO   ! iSCALE  1:4
+      !write(*,*) 'SCALE loop completed'
+      END DO
+      !write(*,*) 'JOBREF loop completed'
+      END DO   ! iJOBZ
+      !write(*,*) 'JOBZ loop completed'
+
+      END DO ! MODE -6:6
+      !write(*,*) 'MODE loop completed'
+      END DO ! 1 or 2 trajectories
+      !write(*,*) 'trajectories  loop completed'
+
+      DEALLOCATE( A )
+      DEALLOCATE( AC )
+      DEALLOCATE( Z )
+      DEALLOCATE( F )
+      DEALLOCATE( F0 )
+      DEALLOCATE( F1 )
+      DEALLOCATE( X )
+      DEALLOCATE( X0 )
+      DEALLOCATE( Y )
+      DEALLOCATE( Y0 )
+      DEALLOCATE( Y1 )
+      DEALLOCATE( AU )
+      DEALLOCATE( W )
+      DEALLOCATE( S )
+      DEALLOCATE( Z1 )
+      DEALLOCATE( RES )
+      DEALLOCATE( RES1 )
+      DEALLOCATE( RESEX )
+      DEALLOCATE( CEIGS )
+      DEALLOCATE( SINGVX )
+      DEALLOCATE( SINGVQX )
+
+      END DO ! LLOOP
+
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for CGEDMD :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+      IF ( NFAIL_Z_XV == 0 ) THEN
+          WRITE(*,*) '>>>> Z - U*V test PASSED.'
+      ELSE
+          WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
+          WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_XW
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_z_XV
+      END IF
+
+      IF ( NFAIL_AU == 0 ) THEN
+          WRITE(*,*) '>>>> A*U test PASSED. '
+      ELSE
+          WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
+          WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
+      END IF
+
+
+      IF ( NFAIL_REZ == 0 ) THEN
+         WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+        WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
+        WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
+      END IF
+      IF ( NFAIL_TOTAL == 0 ) THEN
+        WRITE(*,*) '>>>> CGEDMD :: ALL TESTS PASSED.'
+      ELSE
+        WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
+        WRITE(*,*) '>>>>>>>>>>>>>> CGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for CGEDMDQ :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+
+      IF ( NFAIL_SVDIFF == 0 ) THEN
+        WRITE(*,*) '>>>> CGEDMD and CGEDMDQ computed singular &
+           &values test PASSED.'
+      ELSE
+        WRITE(*,*) 'ZGEDMD and ZGEDMDQ discrepancies in &
+            &the singular values unacceptable ', &
+            NFAIL_SVDIFF, ' times. Test FAILED.'
+        WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
+      END IF
+      IF ( NFAIL_F_QR == 0 ) THEN
+        WRITE(*,*) '>>>> F - Q*R test PASSED.'
+      ELSE
+        WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
+        WRITE(*,*) 'The largest relative residual was ', TMP_FQR
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
+      END IF
+
+      IF ( NFAIL_REZQ == 0 ) THEN
+        WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+        WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
+        WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
+      END IF
+
+      IF ( NFAILQ_TOTAL == 0 ) THEN
+        WRITE(*,*) '>>>>>>> CGEDMDQ :: ALL TESTS PASSED.'
+      ELSE
+        WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
+        WRITE(*,*) '>>>>>>> CGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) 'Test completed.'
+      STOP
+      END
diff --git a/TESTING/EIG/dchkdmd.f90 b/TESTING/EIG/dchkdmd.f90
new file mode 100644
index 0000000000..4fbf7531b3
--- /dev/null
+++ b/TESTING/EIG/dchkdmd.f90
@@ -0,0 +1,813 @@
+!     This is a test program for checking the implementations of
+!     the implementations of the following subroutines
+!
+!     DGEDMD  for computation of the
+!             Dynamic Mode Decomposition (DMD)
+!     DGEDMDQ for computation of a
+!             QR factorization based compressed DMD
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     ========================================================
+!     How to run the code (compiler, link info)
+!     ========================================================
+!     Compile as FORTRAN 90 (or later) and link with BLAS and
+!     LAPACK libraries.
+!     NOTE: The code is developed and tested on top of the
+!     Intel MKL library (versions 2022.0.3 and 2022.2.0),
+!     using the Intel Fortran compiler.
+!
+!     For developers of the C++ implementation
+!     ========================================================
+!     See the LAPACK++ and Template Numerical Toolkit (TNT)
+!
+!     Note on a development of the GPU HP implementation
+!     ========================================================
+!     Work in progress. See CUDA, MAGMA, SLATE.
+!     NOTE: The four SVD subroutines used in this code are
+!     included as a part of R&D and for the completeness.
+!     This was also an opportunity to test those SVD codes.
+!     If the scaling option is used all four are essentially
+!     equally good. For implementations on HP platforms,
+!     one can use whichever SVD is available.
+!...  .........................................................
+!     NOTE:
+!     When using the Intel MKL 2022.0.3 the subroutine xGESVDQ
+!     (optionally used in xGEDMD) may cause access violation
+!     error for x = S, D, C, Z, but only if called with the
+!     work space query. (At least in our Windows 10 MSVS 2019.)
+!     The problem can be mitigated by downloading the source
+!     code of xGESVDQ from the LAPACK repository and use it
+!     localy instead of the one in the MKL. This seems to
+!     indicate that the problem is indeed in the MKL.
+!     This problem did not appear whith Intel MKL 2022.2.0.
+!
+!     NOTE:
+!     xGESDD seems to have a problem with workspace. In some
+!     cases the length of the optimal workspace is returned
+!     smaller than the minimal workspace, as specified in the
+!     code. As a precaution, all optimal workspaces are
+!     set as MAX(minimal, optimal).
+!     Latest implementations of complex xGESDD have different
+!     length of the real worksapce. We use max value over
+!     two versions.
+!............................................................
+!............................................................
+!
+      PROGRAM DMD_TEST
+      use iso_fortran_env, only: real64
+      IMPLICIT NONE
+      integer, parameter :: WP = real64
+
+!............................................................
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!............................................................
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::          &
+                     A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,&
+                     Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::          &
+                     DA, DL, DR, REIG, REIGA, REIGQ, IEIG,   &
+                     IEIGA, IEIGQ,  RES, RES1, RESEX, SINGVX,&
+                     SINGVQX, WORK
+      INTEGER      , ALLOCATABLE, DIMENSION(:)   ::   IWORK
+      REAL(KIND=WP) :: AB(2,2),   WDUMMY(2)
+      INTEGER       :: IDUMMY(2), ISEED(4), RJOBDATA(8)
+      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, &
+                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
+                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_ZXW, &
+                       TMP_EX, XNORM, YNORM
+!............................................................
+      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
+                 LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK
+      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, KDIFF,  &
+                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
+                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, &
+                 NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD
+      INTEGER    iNRNK, iWHTSVD, K_TRAJ, LWMINOPT
+      CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
+                       SCALE, RESIDS, WANTQ, WANTR
+
+      LOGICAL  TEST_QRDMD
+!..... external subroutines (BLAS and LAPACK)
+      EXTERNAL DAXPY,  DGEEV, DGEMM, DGEMV, DLACPY, DLASCL
+      EXTERNAL DLARNV, DLATMR
+!.....external subroutines DMD package, part 1
+!     subroutines under test
+      EXTERNAL DGEDMD, DGEDMDQ
+
+!..... external functions (BLAS and LAPACK)
+      EXTERNAL         DLAMCH, DLANGE, DNRM2
+      REAL(KIND=WP) :: DLAMCH, DLANGE, DNRM2
+      EXTERNAL         LSAME
+      LOGICAL          LSAME
+
+      INTRINSIC ABS, INT, MIN, MAX
+!............................................................
+
+      ! The test is always in pairs : ( DGEDMD and DGEDMDQ )
+      ! because the test includes comparing the results (in pairs).
+!.....................................................................................
+      TEST_QRDMD = .TRUE. ! This code by default performs tests on DGEDMDQ
+                          ! Since the QR factorizations based algorithm is designed for
+                          ! single trajectory data, only single trajectory tests will
+                          ! be performed with xGEDMDQ.
+      WANTQ = 'Q'
+      WANTR = 'R'
+!.................................................................................
+
+      EPS = DLAMCH( 'P' )  ! machine precision DP
+
+      ! Global counters of failures of some particular tests
+      NFAIL      = 0
+      NFAIL_REZ  = 0
+      NFAIL_REZQ = 0
+      NFAIL_Z_XV = 0
+      NFAIL_F_QR = 0
+      NFAIL_AU   = 0
+      KDIFF      = 0
+      NFAIL_SVDIFF = 0
+      NFAIL_TOTAL  = 0
+      NFAILQ_TOTAL = 0
+
+
+      DO LLOOP = 1, 4
+
+      WRITE(*,*) 'L Loop Index = ', LLOOP
+
+      ! Set the dimensions of the problem ...
+      WRITE(*,*) 'M = '
+      READ(*,*) M
+      WRITE(*,*) M
+      ! ... and the number of snapshots.
+      WRITE(*,*) 'N = '
+      READ(*,*) N
+      WRITE(*,*) N
+
+      ! ... Test the dimensions
+      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
+          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
+          STOP
+      END IF
+!.............
+      ! The seed inside the LLOOP so that each pass can be reproduced easily.
+
+      ISEED(1) = 4
+      ISEED(2) = 3
+      ISEED(3) = 2
+      ISEED(4) = 1
+
+      LDA  = M
+      LDF  = M
+      LDX  = MAX(M,N+1)
+      LDY  = MAX(M,N+1)
+      LDW  = N
+      LDZ  = M
+      LDAU = MAX(M,N+1)
+      LDS  = N
+
+      TMP_ZXW  = ZERO
+      TMP_AU   = ZERO
+      TMP_REZ  = ZERO
+      TMP_REZQ = ZERO
+      SVDIFF   = ZERO
+      TMP_EX   = ZERO
+
+      !
+      ! Test the subroutines on real data snapshots. All
+      ! computation is done in real arithmetic, even when
+      ! Koopman eigenvalues and modes are real.
+      !
+      ! Allocate memory space
+      ALLOCATE( A(LDA,M) )
+      ALLOCATE( AC(LDA,M) )
+      ALLOCATE( DA(M) )
+      ALLOCATE( DL(M) )
+      ALLOCATE( F(LDF,N+1) )
+      ALLOCATE( F1(LDF,N+1) )
+      ALLOCATE( F2(LDF,N+1) )
+      ALLOCATE( X(LDX,N) )
+      ALLOCATE( X0(LDX,N) )
+      ALLOCATE( SINGVX(N) )
+      ALLOCATE( SINGVQX(N) )
+      ALLOCATE( Y(LDY,N+1) )
+      ALLOCATE( Y0(LDY,N+1) )
+      ALLOCATE( Y1(M,N+1) )
+      ALLOCATE( Z(LDZ,N) )
+      ALLOCATE( Z1(LDZ,N) )
+      ALLOCATE( RES(N)  )
+      ALLOCATE( RES1(N) )
+      ALLOCATE( RESEX(N) )
+      ALLOCATE( REIG(N) )
+      ALLOCATE( IEIG(N) )
+      ALLOCATE( REIGQ(N) )
+      ALLOCATE( IEIGQ(N) )
+      ALLOCATE( REIGA(M) )
+      ALLOCATE( IEIGA(M) )
+      ALLOCATE( VA(LDA,M) )
+      ALLOCATE( LAMBDA(N,2) )
+      ALLOCATE( LAMBDAQ(N,2) )
+      ALLOCATE( EIGA(M,2) )
+      ALLOCATE( W(LDW,N) )
+      ALLOCATE( AU(LDAU,N) )
+      ALLOCATE( S(N,N) )
+
+      TOL  = M*EPS
+      ! This mimics O(M*N)*EPS bound for accumulated roundoff error.
+      ! The factor 10 is somewhat arbitrary.
+      TOL2 = 10*M*N*EPS
+
+!.............
+
+      DO K_TRAJ = 1, 2
+      !  Number of intial conditions in the simulation/trajectories (1 or 2)
+
+      COND = 1.0D8
+      DMAX = 1.0D2
+      RSIGN = 'F'
+      GRADE = 'N'
+      MODEL = 6
+      CONDL = 1.0D2
+      MODER = 6
+      CONDR = 1.0D2
+      PIVTNG = 'N'
+
+      ! Loop over all parameter MODE values for ZLATMR (+1,..,+6)
+      DO MODE = 1, 6
+
+      ALLOCATE( IWORK(2*M) )
+      ALLOCATE(DR(N))
+      CALL DLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, &
+                   DMAX, RSIGN, GRADE, DL, MODEL,  CONDL, &
+                   DR, MODER, CONDR, PIVTNG, IWORK, M, M, &
+                   ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO )
+      DEALLOCATE(IWORK)
+      DEALLOCATE(DR)
+
+      LWORK = 4*M+1
+      ALLOCATE(WORK(LWORK))
+      AC  = A
+      CALL DGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, &
+                  VA, M, WORK, LWORK, INFO ) ! LAPACK CALL
+      DEALLOCATE(WORK)
+      TMP = ZERO
+      DO i = 1, M
+         EIGA(i,1) = REIGA(i)
+         EIGA(i,2) = IEIGA(i)
+         TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2))
+      END DO
+
+      ! Scale A to have the desirable spectral radius.
+      CALL DLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO )
+      CALL DLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO )
+
+      ! Compute the norm of A
+      ANORM = DLANGE( 'F', N, N, A, M, WDUMMY )
+
+      IF ( K_TRAJ == 2 ) THEN
+          ! generate data with two inital conditions
+      CALL DLARNV(2, ISEED, M, F1(1,1) )
+      F1(1:M,1) = 1.0E-10*F1(1:M,1)
+      DO i = 1, N/2
+         CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
+              F1(1,i+1), 1 )
+      END DO
+      X0(1:M,1:N/2) = F1(1:M,1:N/2)
+      Y0(1:M,1:N/2) = F1(1:M,2:N/2+1)
+
+      CALL DLARNV(2, ISEED, M, F1(1,1) )
+      DO i = 1, N-N/2
+         CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
+              F1(1,i+1), 1 )
+      END DO
+      X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2)
+      Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1)
+      ELSE
+      CALL DLARNV(2, ISEED, M, F(1,1) )
+      DO i = 1, N
+         CALL DGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, &
+              F(1,i+1), 1 )
+      END DO
+      X0(1:M,1:N) = F(1:M,1:N)
+      Y0(1:M,1:N) = F(1:M,2:N+1)
+      END IF
+
+      XNORM = DLANGE( 'F', M, N, X0, LDX, WDUMMY )
+      YNORM = DLANGE( 'F', M, N, Y0, LDX, WDUMMY )
+!............................................................
+
+      DO iJOBZ = 1, 4
+
+          SELECT CASE ( iJOBZ )
+          CASE(1)
+              JOBZ   = 'V' ! Ritz vectors will be computed
+              RESIDS = 'R' ! Residuals will be computed
+          CASE(2)
+              JOBZ   = 'V'
+              RESIDS = 'N'
+          CASE(3)
+              JOBZ   = 'F' ! Ritz vectors in factored form
+              RESIDS = 'N'
+          CASE(4)
+              JOBZ   = 'N'
+              RESIDS = 'N'
+          END SELECT
+
+      DO iJOBREF = 1, 3
+
+          SELECT CASE ( iJOBREF )
+          CASE(1)
+              JOBREF = 'R' ! Data for refined Ritz vectors
+          CASE(2)
+              JOBREF = 'E' ! Exact DMD vectors
+          CASE(3)
+              JOBREF = 'N'
+          END SELECT
+
+      DO iSCALE = 1, 4
+
+          SELECT CASE ( iSCALE )
+          CASE(1)
+              SCALE = 'S' ! X data normalized
+          CASE(2)
+              SCALE = 'C' ! X normalized, consist. check
+          CASE(3)
+              SCALE = 'Y' ! Y data normalized
+          CASE(4)
+              SCALE = 'N'
+          END SELECT
+
+      DO iNRNK = -1, -2, -1
+          ! Two truncation strategies. The "-2" case for R&D
+          ! purposes only - it uses possibly low accuracy small
+          ! singular values, in which case the formulas used in
+          ! the DMD are highly sensitive.
+          NRNK   = iNRNK
+
+      DO iWHTSVD = 1, 4
+          ! Check all four options to compute the POD basis
+          ! via the SVD.
+          WHTSVD   = iWHTSVD
+
+      DO LWMINOPT = 1, 2
+          ! Workspace query for the minimal (1) and for the optimal
+          ! (2) workspace lengths determined by workspace query.
+
+      X(1:M,1:N) = X0(1:M,1:N)
+      Y(1:M,1:N) = Y0(1:M,1:N)
+
+      ! DGEDMD: Workspace query and workspace allocation
+      CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
+           N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
+           LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, &
+           IDUMMY, -1, INFO )
+
+      LIWORK = IDUMMY(1)
+      ALLOCATE( IWORK(LIWORK) )
+      LWORK = INT(WDUMMY(LWMINOPT))
+      ALLOCATE( WORK(LWORK) )
+
+      ! DGEDMD test: CALL DGEDMD
+      CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
+           N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
+           LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,&
+           IWORK, LIWORK, INFO )
+
+      SINGVX(1:N) = WORK(1:N)
+
+      !...... DGEDMD check point
+      IF ( LSAME(JOBZ,'V')  ) THEN
+          ! Check that Z = X*W, on return from DGEDMD
+          ! This checks that the returned aigenvectors in Z are
+          ! the product of the SVD'POD basis returned in X
+          ! and the eigenvectors of the rayleigh quotient
+          ! returned in W
+          CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                      ZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL DAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1)
+             TMP = MAX(TMP, DNRM2( M, Z1(1,i), 1 ) )
+          END DO
+          TMP_ZXW = MAX(TMP_ZXW, TMP )
+
+          IF ( TMP_ZXW > 10*M*EPS ) THEN
+              NFAIL_Z_XV = NFAIL_Z_XV + 1
+              WRITE(*,*) ':( .................DGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+      END IF
+
+      !...... DGEDMD check point
+      IF ( LSAME(JOBREF,'R') ) THEN
+          ! The matrix A*U is returned for computing refined Ritz vectors.
+          ! Check that A*U is computed correctly using the formula
+          ! A*U = Y * V * inv(SIGMA). This depends on the
+          ! accuracy in the computed singular values and vectors of X.
+          ! See the paper for an error analysis.
+          ! Note that the left singular vectors of the input matrix X
+          ! are returned in the array X.
+          CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, &
+                     ZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+              CALL DAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1)
+              TMP = MAX( TMP, DNRM2( M, Z1(1,i),1 ) * &
+                       SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_AU = MAX( TMP_AU, TMP )
+
+          IF ( TMP > TOL2 ) THEN
+              NFAIL_AU = NFAIL_AU + 1
+              WRITE(*,*) ':( .................DGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+      ELSEIF ( LSAME(JOBREF,'E') ) THEN
+      ! The unscaled vectors of the Exact DMD are computed.
+      ! This option is included for the sake of completeness,
+      ! for users who prefer the Exact DMD vectors. The
+      ! returned vectors are in the real form, in the same way
+      ! as the Ritz vectors. Here we just save the vectors
+      ! and test them separately using a Matlab script.
+
+       CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M )
+       i=1
+       DO WHILE ( i <= K )
+           IF ( IEIG(i) == ZERO ) THEN
+           ! have a real eigenvalue with real eigenvector
+           CALL DAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 )
+           RESEX(i) = DNRM2( M, Y1(1,i), 1) / DNRM2(M,AU(1,i),1)
+           i = i + 1
+           ELSE
+           ! Have a complex conjugate pair
+           ! REIG(i) +- sqrt(-1)*IMEIG(i).
+           ! Since all computation is done in real
+           ! arithmetic, the formula for the residual
+           ! is recast for real representation of the
+           ! complex conjugate eigenpair. See the
+           ! description of RES.
+           AB(1,1) =  REIG(i)
+           AB(2,1) = -IEIG(i)
+           AB(1,2) =  IEIG(i)
+           AB(2,2) =  REIG(i)
+           CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), &
+                       M, AB, 2, ONE, Y1(1,i), M )
+           RESEX(i)   = DLANGE( 'F', M, 2, Y1(1,i), M, &
+                        WORK )/ DLANGE( 'F', M, 2, AU(1,i), M, &
+                        WORK )
+           RESEX(i+1) = RESEX(i)
+           i = i + 2
+           END IF
+       END DO
+
+      END IF
+
+      !...... DGEDMD check point
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by DGEDMD with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in DGEDMD,)
+          i = 1
+          DO WHILE ( i <= K )
+              IF ( IEIG(i) == ZERO ) THEN
+                  ! have a real eigenvalue with real eigenvector
+                  CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 )
+                  RES1(i) = DNRM2( M, Y1(1,i), 1)
+                  i = i + 1
+              ELSE
+                  ! Have a complex conjugate pair
+                  ! REIG(i) +- sqrt(-1)*IMEIG(i).
+                  ! Since all computation is done in real
+                  ! arithmetic, the formula for the residual
+                  ! is recast for real representation of the
+                  ! complex conjugate eigenpair. See the
+                  ! description of RES.
+                  AB(1,1) =  REIG(i)
+                  AB(2,1) = -IEIG(i)
+                  AB(1,2) =  IEIG(i)
+                  AB(2,2) =  REIG(i)
+                  CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                              M, AB, 2, ONE, Y1(1,i), M )
+                  RES1(i)   = DLANGE( 'F', M, 2, Y1(1,i), M, &
+                                     WORK )
+                  RES1(i+1) = RES1(i)
+                  i = i + 2
+              END IF
+          END DO
+          TMP = ZERO
+          DO i = 1, K
+              TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                        SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_REZ = MAX( TMP_REZ, TMP )
+
+          IF ( TMP > TOL2 ) THEN
+              NFAIL_REZ = NFAIL_REZ + 1
+              WRITE(*,*) ':( ..................DGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+          IF ( LSAME(JOBREF,'E') ) THEN
+              TMP = ZERO
+              DO i = 1, K
+                  TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
+              END DO
+              TMP_EX = MAX(TMP_EX,TMP)
+          END IF
+
+      END IF
+
+      !..... store the results for inspection
+      DO i = 1, K
+          LAMBDA(i,1) = REIG(i)
+          LAMBDA(i,2) = IEIG(i)
+      END DO
+
+      DEALLOCATE(IWORK)
+      DEALLOCATE(WORK)
+
+      !======================================================================
+      !     Now test the DGEDMDQ
+      !======================================================================
+      IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN
+          RJOBDATA(2) = 1
+          F1 = F
+
+          ! DGEDMDQ test: Workspace query and workspace allocation
+          CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
+               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
+               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
+               RES, AU, LDAU, W, LDW, S, LDS, WDUMMY,      &
+               -1, IDUMMY, -1, INFO )
+          LIWORK = IDUMMY(1)
+          ALLOCATE( IWORK(LIWORK) )
+          LWORK = INT(WDUMMY(LWMINOPT))
+          ALLOCATE(WORK(LWORK))
+          ! DGEDMDQ test: CALL DGEDMDQ
+          CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
+               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
+               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
+               RES, AU, LDAU, W, LDW, S, LDS,              &
+               WORK, LWORK, IWORK, LIWORK, INFO )
+
+          SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ)
+
+          !..... DGEDMDQ check point
+          IF ( KQ /= K ) THEN
+              KDIFF = KDIFF+1
+          END IF
+
+          TMP = ZERO
+          DO i = 1, MIN(K, KQ)
+              TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
+                                    SINGVX(1) )
+          END DO
+          SVDIFF = MAX( SVDIFF, TMP )
+          IF ( TMP > M*N*EPS ) THEN
+              WRITE(*,*) 'FAILED! Something was wrong with the run.'
+              NFAIL_SVDIFF = NFAIL_SVDIFF + 1
+              DO j =1, 3
+                  write(*,*) j, SINGVX(j), SINGVQX(j)
+                  read(*,*)
+              END DO
+          END IF
+
+          !..... DGEDMDQ check point
+          IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
+              ! Check that the QR factors are computed and returned
+              ! as requested. The residual ||F-Q*R||_F / ||F||_F
+              ! is compared to M*N*EPS.
+              F2 = F
+              CALL DGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, &
+                          LDF, Y, LDY, ONE, F2, LDF )
+              TMP_FQR = DLANGE( 'F', M, N+1, F2, LDF, WORK ) / &
+                    DLANGE( 'F', M, N+1, F,  LDF, WORK )
+              IF ( TMP_FQR > TOL2 ) THEN
+                  WRITE(*,*) 'FAILED! Something was wrong with the run.'
+                  NFAIL_F_QR = NFAIL_F_QR + 1
+              END IF
+          END IF
+
+          !..... DGEDMDQ check point
+          IF ( LSAME(RESIDS, 'R') ) THEN
+              ! Compare the residuals returned by DGEDMDQ with the
+              ! explicitly computed residuals using the matrix A.
+              ! Compute explicitly Y1 = A*Z
+              CALL DGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M )
+              ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+              ! of the invariant subspaces that correspond to complex conjugate
+              ! pairs of eigencalues. (See the description of Z in DGEDMDQ)
+              i = 1
+              DO WHILE ( i <= KQ )
+                  IF ( IEIGQ(i) == ZERO ) THEN
+                      ! have a real eigenvalue with real eigenvector
+                      CALL DAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 )
+                      ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
+                      RES1(i) = DNRM2( M, Y1(1,i), 1)
+                      i = i + 1
+                  ELSE
+                     ! Have a complex conjugate pair
+                     ! REIG(i) +- sqrt(-1)*IMEIG(i).
+                     ! Since all computation is done in real
+                     ! arithmetic, the formula for the residual
+                     ! is recast for real representation of the
+                     ! complex conjugate eigenpair. See the
+                     ! description of RES.
+                     AB(1,1) =  REIGQ(i)
+                     AB(2,1) = -IEIGQ(i)
+                     AB(1,2) =  IEIGQ(i)
+                     AB(2,2) =  REIGQ(i)
+                     CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                                 M, AB, 2, ONE, Y1(1,i), M )             ! BLAS CALL
+                     ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+                     RES1(i)   = DLANGE( 'F', M, 2, Y1(1,i), M, &
+                                        WORK )                           ! LAPACK CALL
+                     RES1(i+1) = RES1(i)
+                     i = i + 2
+                  END IF
+              END DO
+              TMP = ZERO
+              DO i = 1, KQ
+                  TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                      SINGVQX(K)/(ANORM*SINGVQX(1)) )
+              END DO
+              TMP_REZQ = MAX( TMP_REZQ, TMP )
+              IF ( TMP > TOL2 ) THEN
+                  NFAIL_REZQ = NFAIL_REZQ + 1
+                  WRITE(*,*) '................ DGEDMDQ FAILED!', &
+                      'Check the code for implementation errors.'
+                  STOP
+              END IF
+
+          END IF
+
+          DO i = 1, KQ
+              LAMBDAQ(i,1) = REIGQ(i)
+              LAMBDAQ(i,2) = IEIGQ(i)
+          END DO
+
+          DEALLOCATE(WORK)
+          DEALLOCATE(IWORK)
+      END IF ! TEST_QRDMD
+!======================================================================
+
+      END DO ! LWMINOPT
+      !write(*,*) 'LWMINOPT loop completed'
+      END DO ! WHTSVD LOOP
+      !write(*,*) 'WHTSVD loop completed'
+      END DO ! NRNK LOOP
+      !write(*,*) 'NRNK loop completed'
+      END DO ! SCALE LOOP
+      !write(*,*) 'SCALE loop completed'
+      END DO ! JOBF LOOP
+      !write(*,*) 'JOBREF loop completed'
+      END DO ! JOBZ LOOP
+      !write(*,*) 'JOBZ loop completed'
+
+      END DO ! MODE -6:6
+      !write(*,*) 'MODE loop completed'
+      END DO ! 1 or 2 trajectories
+      !write(*,*) 'trajectories  loop completed'
+
+      DEALLOCATE(A)
+      DEALLOCATE(AC)
+      DEALLOCATE(DA)
+      DEALLOCATE(DL)
+      DEALLOCATE(F)
+      DEALLOCATE(F1)
+      DEALLOCATE(F2)
+      DEALLOCATE(X)
+      DEALLOCATE(X0)
+      DEALLOCATE(SINGVX)
+      DEALLOCATE(SINGVQX)
+      DEALLOCATE(Y)
+      DEALLOCATE(Y0)
+      DEALLOCATE(Y1)
+      DEALLOCATE(Z)
+      DEALLOCATE(Z1)
+      DEALLOCATE(RES)
+      DEALLOCATE(RES1)
+      DEALLOCATE(RESEX)
+      DEALLOCATE(REIG)
+      DEALLOCATE(IEIG)
+      DEALLOCATE(REIGQ)
+      DEALLOCATE(IEIGQ)
+      DEALLOCATE(REIGA)
+      DEALLOCATE(IEIGA)
+      DEALLOCATE(VA)
+      DEALLOCATE(LAMBDA)
+      DEALLOCATE(LAMBDAQ)
+      DEALLOCATE(EIGA)
+      DEALLOCATE(W)
+      DEALLOCATE(AU)
+      DEALLOCATE(S)
+
+!............................................................
+      !     Generate random M-by-M matrix A. Use DLATMR from
+      END DO ! LLOOP
+
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for DGEDMD :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+      IF ( NFAIL_Z_XV == 0 ) THEN
+          WRITE(*,*) '>>>> Z - U*V test PASSED.'
+      ELSE
+          WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
+          WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV
+      END IF
+      IF ( NFAIL_AU == 0 ) THEN
+          WRITE(*,*) '>>>> A*U test PASSED. '
+      ELSE
+          WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
+          WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
+      END IF
+
+      IF ( NFAIL_REZ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
+      END IF
+
+      IF ( NFAIL_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>> DGEDMD :: ALL TESTS PASSED.'
+      ELSE
+          WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
+          WRITE(*,*) '>>>>>>>>>>>>>> DGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      IF ( TEST_QRDMD ) THEN
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for DGEDMDQ :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+
+      IF ( NFAIL_SVDIFF == 0 ) THEN
+          WRITE(*,*) '>>>> DGEDMD and DGEDMDQ computed singular &
+              &values test PASSED.'
+      ELSE
+          WRITE(*,*) 'DGEDMD and DGEDMDQ discrepancies in &
+              &the singular values unacceptable ', &
+              NFAIL_SVDIFF, ' times. Test FAILED.'
+          WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
+      END IF
+
+      IF ( NFAIL_F_QR == 0 ) THEN
+          WRITE(*,*) '>>>> F - Q*R test PASSED.'
+      ELSE
+          WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
+          WRITE(*,*) 'The largest relative residual was ', TMP_FQR
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
+      END IF
+
+      IF ( NFAIL_REZQ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
+      END IF
+
+      IF ( NFAILQ_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>>>>> DGEDMDQ :: ALL TESTS PASSED.'
+      ELSE
+         WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
+         WRITE(*,*) '>>>>>>> DGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) 'Test completed.'
+      STOP
+      END
diff --git a/TESTING/EIG/schkdmd.f90 b/TESTING/EIG/schkdmd.f90
new file mode 100644
index 0000000000..77e3e46c05
--- /dev/null
+++ b/TESTING/EIG/schkdmd.f90
@@ -0,0 +1,792 @@
+!     This is a test program for checking the implementations of
+!     the implementations of the following subroutines
+!
+!     SGEDMD  for computation of the
+!             Dynamic Mode Decomposition (DMD)
+!     SGEDMDQ for computation of a
+!             QR factorization based compressed DMD
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     ========================================================
+!     How to run the code (compiler, link info)
+!     ========================================================
+!     Compile as FORTRAN 90 (or later) and link with BLAS and
+!     LAPACK libraries.
+!     NOTE: The code is developed and tested on top of the
+!     Intel MKL library (versions 2022.0.3 and 2022.2.0),
+!     using the Intel Fortran compiler.
+!
+!     For developers of the C++ implementation
+!     ========================================================
+!     See the LAPACK++ and Template Numerical Toolkit (TNT)
+!
+!     Note on a development of the GPU HP implementation
+!     ========================================================
+!     Work in progress. See CUDA, MAGMA, SLATE.
+!     NOTE: The four SVD subroutines used in this code are
+!     included as a part of R&D and for the completeness.
+!     This was also an opportunity to test those SVD codes.
+!     If the scaling option is used all four are essentially
+!     equally good. For implementations on HP platforms,
+!     one can use whichever SVD is available.
+!...  .........................................................
+!     NOTE:
+!     When using the Intel MKL 2022.0.3 the subroutine xGESVDQ
+!     (optionally used in xGEDMD) may cause access violation
+!     error for x = S, D, C, Z, but only if called with the
+!     work space query. (At least in our Windows 10 MSVS 2019.)
+!     The problem can be mitigated by downloading the source
+!     code of xGESVDQ from the LAPACK repository and use it
+!     localy instead of the one in the MKL. This seems to
+!     indicate that the problem is indeed in the MKL.
+!     This problem did not appear whith Intel MKL 2022.2.0.
+!
+!     NOTE:
+!     xGESDD seems to have a problem with workspace. In some
+!     cases the length of the optimal workspace is returned
+!     smaller than the minimal workspace, as specified in the
+!     code. As a precaution, all optimal workspaces are
+!     set as MAX(minimal, optimal).
+!     Latest implementations of complex xGESDD have different
+!     length of the real worksapce. We use max value over
+!     two versions.
+!............................................................
+!............................................................
+!
+      PROGRAM DMD_TEST
+      use iso_fortran_env, only: real32
+      IMPLICIT NONE
+      integer, parameter :: WP = real32
+
+!............................................................
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+!............................................................
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::          &
+                     A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,&
+                     Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::          &
+                     DA, DL, DR, REIG, REIGA, REIGQ, IEIG,   &
+                     IEIGA, IEIGQ,  RES, RES1, RESEX, SINGVX,&
+                     SINGVQX, WORK
+      INTEGER      , ALLOCATABLE, DIMENSION(:)   ::   IWORK
+      REAL(KIND=WP) :: AB(2,2),   WDUMMY(2)
+      INTEGER       :: IDUMMY(2), ISEED(4), RJOBDATA(8)
+      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, &
+                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
+                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_ZXW, &
+                       TMP_EX, XNORM, YNORM
+!............................................................
+      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
+                 LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK
+      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, KDIFF,     &
+                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
+                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, &
+                 NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD
+      INTEGER    iNRNK, iWHTSVD, K_TRAJ, LWMINOPT
+      CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
+                       SCALE, RESIDS, WANTQ, WANTR
+
+      LOGICAL          TEST_QRDMD
+!..... external subroutines (BLAS and LAPACK)
+      EXTERNAL SAXPY,  SGEEV, SGEMM, SGEMV, SLACPY, SLASCL
+      EXTERNAL SLARNV, SLATMR
+!.....external subroutines DMD package, part 1
+!     subroutines under test
+      EXTERNAL SGEDMD, SGEDMDQ
+
+!..... external functions (BLAS and LAPACK)
+      EXTERNAL         SLAMCH, SLANGE, SNRM2
+      REAL(KIND=WP) :: SLAMCH, SLANGE, SNRM2
+      EXTERNAL         LSAME
+      LOGICAL          LSAME
+
+      INTRINSIC ABS, INT, MIN, MAX
+!............................................................
+
+      ! The test is always in pairs : ( SGEDMD and SGEDMDQ )
+      ! because the test includes comparing the results (in pairs).
+!.....................................................................................
+      TEST_QRDMD = .TRUE. ! This code by default performs tests on SGEDMDQ
+                          ! Since the QR factorizations based algorithm is designed for
+                          ! single trajectory data, only single trajectory tests will
+                          ! be performed with xGEDMDQ.
+      WANTQ = 'Q'
+      WANTR = 'R'
+!.................................................................................
+
+      EPS = SLAMCH( 'P' )  ! machine precision SP
+
+      ! Global counters of failures of some particular tests
+      NFAIL      = 0
+      NFAIL_REZ  = 0
+      NFAIL_REZQ = 0
+      NFAIL_Z_XV = 0
+      NFAIL_F_QR = 0
+      NFAIL_AU   = 0
+      KDIFF      = 0
+      NFAIL_SVDIFF = 0
+      NFAIL_TOTAL  = 0
+      NFAILQ_TOTAL = 0
+
+
+      DO LLOOP = 1, 4
+
+      WRITE(*,*) 'L Loop Index = ', LLOOP
+
+      ! Set the dimensions of the problem ...
+      WRITE(*,*) 'M = '
+      READ(*,*) M
+      WRITE(*,*) M
+      ! ... and the number of snapshots.
+      WRITE(*,*) 'N = '
+      READ(*,*) N
+      WRITE(*,*) N
+
+      ! ... Test the dimensions
+      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
+          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
+          STOP
+      END IF
+!.............
+      ! The seed inside the LLOOP so that each pass can be reproduced easily.
+
+      ISEED(1) = 4
+      ISEED(2) = 3
+      ISEED(3) = 2
+      ISEED(4) = 1
+
+      LDA = M
+      LDF = M
+      LDX = MAX(M,N+1)
+      LDY = MAX(M,N+1)
+      LDW = N
+      LDZ = M
+      LDAU = MAX(M,N+1)
+      LDS = N
+
+      TMP_ZXW  = ZERO
+      TMP_AU   = ZERO
+      TMP_REZ  = ZERO
+      TMP_REZQ = ZERO
+      SVDIFF   = ZERO
+      TMP_EX   = ZERO
+
+      !
+      ! Test the subroutines on real data snapshots. All
+      ! computation is done in real arithmetic, even when
+      ! Koopman eigenvalues and modes are real.
+      !
+      ! Allocate memory space
+      ALLOCATE( A(LDA,M) )
+      ALLOCATE( AC(LDA,M) )
+      ALLOCATE( DA(M) )
+      ALLOCATE( DL(M) )
+      ALLOCATE( F(LDF,N+1) )
+      ALLOCATE( F1(LDF,N+1) )
+      ALLOCATE( F2(LDF,N+1) )
+      ALLOCATE( X(LDX,N) )
+      ALLOCATE( X0(LDX,N) )
+      ALLOCATE( SINGVX(N) )
+      ALLOCATE( SINGVQX(N) )
+      ALLOCATE( Y(LDY,N+1) )
+      ALLOCATE( Y0(LDY,N+1) )
+      ALLOCATE( Y1(M,N+1) )
+      ALLOCATE( Z(LDZ,N) )
+      ALLOCATE( Z1(LDZ,N) )
+      ALLOCATE( RES(N)  )
+      ALLOCATE( RES1(N) )
+      ALLOCATE( RESEX(N) )
+      ALLOCATE( REIG(N) )
+      ALLOCATE( IEIG(N) )
+      ALLOCATE( REIGQ(N) )
+      ALLOCATE( IEIGQ(N) )
+      ALLOCATE( REIGA(M) )
+      ALLOCATE( IEIGA(M) )
+      ALLOCATE( VA(LDA,M) )
+      ALLOCATE( LAMBDA(N,2) )
+      ALLOCATE( LAMBDAQ(N,2) )
+      ALLOCATE( EIGA(M,2) )
+      ALLOCATE( W(LDW,N) )
+      ALLOCATE( AU(LDAU,N) )
+      ALLOCATE( S(N,N) )
+
+      TOL  = M*EPS
+      ! This mimics O(M*N)*EPS bound for accumulated roundoff error.
+      ! The factor 10 is somewhat arbitrary.
+      TOL2 = 10*M*N*EPS
+
+!.............
+
+      DO K_TRAJ = 1, 2
+      !  Number of intial conditions in the simulation/trajectories (1 or 2)
+
+      COND = 1.0D8
+      DMAX = 1.0D2
+      RSIGN = 'F'
+      GRADE = 'N'
+      MODEL = 6
+      CONDL = 1.0D2
+      MODER = 6
+      CONDR = 1.0D2
+      PIVTNG = 'N'
+
+      ! Loop over all parameter MODE values for ZLATMR (+1,..,+6)
+      DO MODE = 1, 6
+
+      ALLOCATE( IWORK(2*M) )
+      ALLOCATE(DR(N))
+      CALL SLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, &
+                   DMAX, RSIGN, GRADE, DL, MODEL,  CONDL, &
+                   DR, MODER, CONDR, PIVTNG, IWORK, M, M, &
+                   ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO )
+      DEALLOCATE(IWORK)
+      DEALLOCATE(DR)
+
+      LWORK = 4*M+1
+      ALLOCATE(WORK(LWORK))
+      AC  = A
+      CALL SGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, &
+                  VA, M, WORK, LWORK, INFO ) ! LAPACK CALL
+      DEALLOCATE(WORK)
+      TMP = ZERO
+      DO i = 1, M
+          EIGA(i,1) = REIGA(i)
+          EIGA(i,2) = IEIGA(i)
+          TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2))
+      END DO
+
+      ! Scale A to have the desirable spectral radius.
+      CALL SLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO )
+      CALL SLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO )
+
+      ! Compute the norm of A
+      ANORM = SLANGE( 'F', N, N, A, M, WDUMMY )
+
+      IF ( K_TRAJ == 2 ) THEN
+          ! generate data with two inital conditions
+      CALL SLARNV(2, ISEED, M, F1(1,1) )
+      F1(1:M,1) = 1.0E-10*F1(1:M,1)
+      DO i = 1, N/2
+         CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
+              F1(1,i+1), 1 )
+      END DO
+      X0(1:M,1:N/2) = F1(1:M,1:N/2)
+      Y0(1:M,1:N/2) = F1(1:M,2:N/2+1)
+
+      CALL SLARNV(2, ISEED, M, F1(1,1) )
+      DO i = 1, N-N/2
+         CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, &
+              F1(1,i+1), 1 )
+      END DO
+      X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2)
+      Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1)
+      ELSE
+          ! single trajectory
+      CALL SLARNV(2, ISEED, M, F(1,1) )
+      DO i = 1, N
+         CALL SGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, &
+              F(1,i+1), 1 )
+      END DO
+      X0(1:M,1:N) = F(1:M,1:N)
+      Y0(1:M,1:N) = F(1:M,2:N+1)
+      END IF
+
+      XNORM = SLANGE( 'F', M, N, X0, LDX, WDUMMY )
+      YNORM = SLANGE( 'F', M, N, Y0, LDX, WDUMMY )
+!............................................................
+
+      DO iJOBZ = 1, 4
+
+          SELECT CASE ( iJOBZ )
+          CASE(1)
+              JOBZ   = 'V' ! Ritz vectors will be computed
+              RESIDS = 'R' ! Residuals will be computed
+          CASE(2)
+              JOBZ   = 'V'
+              RESIDS = 'N'
+          CASE(3)
+              JOBZ   = 'F' ! Ritz vectors in factored form
+              RESIDS = 'N'
+          CASE(4)
+              JOBZ   = 'N'
+              RESIDS = 'N'
+          END SELECT
+
+      DO iJOBREF = 1, 3
+
+          SELECT CASE ( iJOBREF )
+          CASE(1)
+              JOBREF = 'R' ! Data for refined Ritz vectors
+          CASE(2)
+              JOBREF = 'E' ! Exact DMD vectors
+          CASE(3)
+              JOBREF = 'N'
+          END SELECT
+
+      DO iSCALE = 1, 4
+
+          SELECT CASE ( iSCALE )
+          CASE(1)
+              SCALE = 'S' ! X data normalized
+          CASE(2)
+              SCALE = 'C' ! X normalized, consist. check
+          CASE(3)
+              SCALE = 'Y' ! Y data normalized
+          CASE(4)
+              SCALE = 'N'
+          END SELECT
+
+      DO iNRNK = -1, -2, -1
+          ! Two truncation strategies. The "-2" case for R&D
+          ! purposes only - it uses possibly low accuracy small
+          ! singular values, in which case the formulas used in
+          ! the DMD are highly sensitive.
+          NRNK   = iNRNK
+
+      DO iWHTSVD = 1, 4
+          ! Check all four options to compute the POD basis
+          ! via the SVD.
+          WHTSVD   = iWHTSVD
+
+      DO LWMINOPT = 1, 2
+          ! Workspace query for the minimal (1) and for the optimal
+          ! (2) workspace lengths determined by workspace query.
+
+       X(1:M,1:N) = X0(1:M,1:N)
+       Y(1:M,1:N) = Y0(1:M,1:N)
+
+       ! SGEDMD: Workspace query and workspace allocation
+       CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
+            N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
+            LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, &
+            IDUMMY, -1, INFO )
+
+       LIWORK = IDUMMY(1)
+       ALLOCATE( IWORK(LIWORK) )
+       LWORK = INT(WDUMMY(LWMINOPT))
+       ALLOCATE( WORK(LWORK) )
+
+       ! SGEDMD test: CALL SGEDMD
+       CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, &
+            N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, &
+            LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,&
+            IWORK, LIWORK, INFO )
+
+       SINGVX(1:N) = WORK(1:N)
+
+       !...... SGEDMD check point
+       IF ( LSAME(JOBZ,'V')  ) THEN
+          ! Check that Z = X*W, on return from SGEDMD
+          ! This checks that the returned aigenvectors in Z are
+          ! the product of the SVD'POD basis returned in X
+          ! and the eigenvectors of the rayleigh quotient
+          ! returned in W
+          CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, &
+                      ZERO, Z1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL SAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1)
+             TMP = MAX(TMP, SNRM2( M, Z1(1,i), 1 ) )
+          END DO
+          TMP_ZXW = MAX(TMP_ZXW, TMP )
+
+          IF ( TMP_ZXW > 10*M*EPS ) THEN
+              NFAIL_Z_XV = NFAIL_Z_XV + 1
+          END IF
+
+       END IF
+
+       !...... SGEDMD check point
+       IF ( LSAME(JOBREF,'R') ) THEN
+           ! The matrix A*U is returned for computing refined Ritz vectors.
+           ! Check that A*U is computed correctly using the formula
+           ! A*U = Y * V * inv(SIGMA). This depends on the
+           ! accuracy in the computed singular values and vectors of X.
+           ! See the paper for an error analysis.
+           ! Note that the left singular vectors of the input matrix X
+           ! are returned in the array X.
+           CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, &
+                      ZERO, Z1, LDZ )
+           TMP = ZERO
+           DO i = 1, K
+              CALL SAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1)
+              TMP = MAX( TMP, SNRM2( M, Z1(1,i),1 ) * &
+                       SINGVX(K)/(ANORM*SINGVX(1)) )
+           END DO
+           TMP_AU = MAX( TMP_AU, TMP )
+
+           IF ( TMP > TOL2 ) THEN
+               NFAIL_AU = NFAIL_AU + 1
+           END IF
+
+       ELSEIF ( LSAME(JOBREF,'E') ) THEN
+       ! The unscaled vectors of the Exact DMD are computed.
+       ! This option is included for the sake of completeness,
+       ! for users who prefer the Exact DMD vectors. The
+       ! returned vectors are in the real form, in the same way
+       ! as the Ritz vectors. Here we just save the vectors
+       ! and test them separately using a Matlab script.
+
+       CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M )
+       i=1
+       DO WHILE ( i <= K )
+       IF ( IEIG(i) == ZERO ) THEN
+        ! have a real eigenvalue with real eigenvector
+        CALL SAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 )
+        RESEX(i) = SNRM2( M, Y1(1,i), 1) / SNRM2(M,AU(1,i),1)
+        i = i + 1
+       ELSE
+       ! Have a complex conjugate pair
+       ! REIG(i) +- sqrt(-1)*IMEIG(i).
+       ! Since all computation is done in real
+       ! arithmetic, the formula for the residual
+       ! is recast for real representation of the
+       ! complex conjugate eigenpair. See the
+       ! description of RES.
+       AB(1,1) =  REIG(i)
+       AB(2,1) = -IEIG(i)
+       AB(1,2) =  IEIG(i)
+       AB(2,2) =  REIG(i)
+       CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), &
+                   M, AB, 2, ONE, Y1(1,i), M )
+       RESEX(i)   = SLANGE( 'F', M, 2, Y1(1,i), M, &
+                    WORK )/ SLANGE( 'F', M, 2, AU(1,i), M, &
+                    WORK )
+       RESEX(i+1) = RESEX(i)
+       i = i + 2
+       END IF
+       END DO
+
+       END IF
+
+      !...... SGEDMD check point
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by SGEDMD with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in SGEDMD,)
+          i = 1
+          DO WHILE ( i <= K )
+            IF ( IEIG(i) == ZERO ) THEN
+                ! have a real eigenvalue with real eigenvector
+                CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 )
+                RES1(i) = SNRM2( M, Y1(1,i), 1)
+                i = i + 1
+            ELSE
+               ! Have a complex conjugate pair
+               ! REIG(i) +- sqrt(-1)*IMEIG(i).
+               ! Since all computation is done in real
+               ! arithmetic, the formula for the residual
+               ! is recast for real representation of the
+               ! complex conjugate eigenpair. See the
+               ! description of RES.
+               AB(1,1) =  REIG(i)
+               AB(2,1) = -IEIG(i)
+               AB(1,2) =  IEIG(i)
+               AB(2,2) =  REIG(i)
+               CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                           M, AB, 2, ONE, Y1(1,i), M )
+               RES1(i)   = SLANGE( 'F', M, 2, Y1(1,i), M, &
+                                  WORK )
+               RES1(i+1) = RES1(i)
+               i = i + 2
+            END IF
+          END DO
+          TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                    SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_REZ = MAX( TMP_REZ, TMP )
+
+          IF ( TMP > TOL2 ) THEN
+              NFAIL_REZ = NFAIL_REZ + 1
+          END IF
+
+         IF ( LSAME(JOBREF,'E') ) THEN
+            TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
+          END DO
+          TMP_EX = MAX(TMP_EX,TMP)
+         END IF
+
+      END IF
+
+      ! ... store the results for inspection
+      DO i = 1, K
+          LAMBDA(i,1) = REIG(i)
+          LAMBDA(i,2) = IEIG(i)
+      END DO
+
+      DEALLOCATE(IWORK)
+      DEALLOCATE(WORK)
+
+      !======================================================================
+      !     Now test the SGEDMDQ, if requested.
+      !======================================================================
+      IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN
+          RJOBDATA(2) = 1
+          F1 = F
+
+          ! SGEDMDQ test: Workspace query and workspace allocation
+          CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
+               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
+               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
+               RES, AU, LDAU, W, LDW, S, LDS, WDUMMY,      &
+               -1, IDUMMY, -1, INFO )
+          LIWORK = IDUMMY(1)
+          ALLOCATE( IWORK(LIWORK) )
+          LWORK = INT(WDUMMY(LWMINOPT))
+          ALLOCATE(WORK(LWORK))
+
+          ! SGEDMDQ test: CALL SGEDMDQ
+          CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, &
+               JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, &
+               LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ,   &
+               RES, AU, LDAU, W, LDW, S, LDS,              &
+               WORK, LWORK, IWORK, LIWORK, INFO )
+
+          SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ)
+
+          !..... SGEDMDQ check point
+          IF ( KQ /= K ) THEN
+             KDIFF = KDIFF+1
+          END IF
+
+          TMP = ZERO
+          DO i = 1, MIN(K, KQ)
+             TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
+                                   SINGVX(1) )
+          END DO
+          SVDIFF = MAX( SVDIFF, TMP )
+          IF ( TMP > M*N*EPS ) THEN
+             NFAIL_SVDIFF = NFAIL_SVDIFF + 1
+          END IF
+
+          !..... SGEDMDQ check point
+          IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
+             ! Check that the QR factors are computed and returned
+             ! as requested. The residual ||F-Q*R||_F / ||F||_F
+             ! is compared to M*N*EPS.
+             F2 = F
+             CALL SGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, &
+                         LDF, Y, LDY, ONE, F2, LDF )
+             TMP_FQR = SLANGE( 'F', M, N+1, F2, LDF, WORK ) / &
+                   SLANGE( 'F', M, N+1, F,  LDF, WORK )
+             IF ( TMP_FQR > TOL2 ) THEN
+                 NFAIL_F_QR = NFAIL_F_QR + 1
+             END IF
+          END IF
+
+          !..... SGEDMDQ checkpoint
+          IF ( LSAME(RESIDS, 'R') ) THEN
+              ! Compare the residuals returned by SGEDMDQ with the
+              ! explicitly computed residuals using the matrix A.
+              ! Compute explicitly Y1 = A*Z
+              CALL SGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M )
+              ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+              ! of the invariant subspaces that correspond to complex conjugate
+              ! pairs of eigencalues. (See the description of Z in SGEDMDQ)
+              i = 1
+              DO WHILE ( i <= KQ )
+                IF ( IEIGQ(i) == ZERO ) THEN
+                    ! have a real eigenvalue with real eigenvector
+                    CALL SAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 )
+                    ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
+                    RES1(i) = SNRM2( M, Y1(1,i), 1)
+                    i = i + 1
+                ELSE
+                   ! Have a complex conjugate pair
+                   ! REIG(i) +- sqrt(-1)*IMEIG(i).
+                   ! Since all computation is done in real
+                   ! arithmetic, the formula for the residual
+                   ! is recast for real representation of the
+                   ! complex conjugate eigenpair. See the
+                   ! description of RES.
+                   AB(1,1) =  REIGQ(i)
+                   AB(2,1) = -IEIGQ(i)
+                   AB(1,2) =  IEIGQ(i)
+                   AB(2,2) =  REIGQ(i)
+                   CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), &
+                               M, AB, 2, ONE, Y1(1,i), M )             ! BLAS CALL
+                   ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB   ! INTRINSIC
+                   RES1(i)   = SLANGE( 'F', M, 2, Y1(1,i), M, &
+                                      WORK )                           ! LAPACK CALL
+                   RES1(i+1) = RES1(i)
+                   i = i + 2
+                END IF
+              END DO
+              TMP = ZERO
+              DO i = 1, KQ
+              TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                  SINGVQX(K)/(ANORM*SINGVQX(1)) )
+              END DO
+              TMP_REZQ = MAX( TMP_REZQ, TMP )
+              IF ( TMP > TOL2 ) THEN
+                  NFAIL_REZQ = NFAIL_REZQ + 1
+              END IF
+
+          END IF
+
+          DO i = 1, KQ
+              LAMBDAQ(i,1) = REIGQ(i)
+              LAMBDAQ(i,2) = IEIGQ(i)
+          END DO
+
+      DEALLOCATE(WORK)
+      DEALLOCATE(IWORK)
+      END IF            ! TEST_QRDMD
+!======================================================================
+
+      END DO ! LWMINOPT
+      !write(*,*) 'LWMINOPT loop completed'
+      END DO ! WHTSVD LOOP
+      !write(*,*) 'WHTSVD loop completed'
+      END DO ! NRNK LOOP
+      !write(*,*) 'NRNK loop completed'
+      END DO ! SCALE LOOP
+      !write(*,*) 'SCALE loop completed'
+      END DO ! JOBF LOOP
+      !write(*,*) 'JOBREF loop completed'
+      END DO ! JOBZ LOOP
+      !write(*,*) 'JOBZ loop completed'
+
+      END DO ! MODE -6:6
+      !write(*,*) 'MODE loop completed'
+      END DO ! 1 or 2 trajectories
+      !write(*,*) 'trajectories  loop completed'
+
+      DEALLOCATE(A)
+      DEALLOCATE(AC)
+      DEALLOCATE(DA)
+      DEALLOCATE(DL)
+      DEALLOCATE(F)
+      DEALLOCATE(F1)
+      DEALLOCATE(F2)
+      DEALLOCATE(X)
+      DEALLOCATE(X0)
+      DEALLOCATE(SINGVX)
+      DEALLOCATE(SINGVQX)
+      DEALLOCATE(Y)
+      DEALLOCATE(Y0)
+      DEALLOCATE(Y1)
+      DEALLOCATE(Z)
+      DEALLOCATE(Z1)
+      DEALLOCATE(RES)
+      DEALLOCATE(RES1)
+      DEALLOCATE(RESEX)
+      DEALLOCATE(REIG)
+      DEALLOCATE(IEIG)
+      DEALLOCATE(REIGQ)
+      DEALLOCATE(IEIGQ)
+      DEALLOCATE(REIGA)
+      DEALLOCATE(IEIGA)
+      DEALLOCATE(VA)
+      DEALLOCATE(LAMBDA)
+      DEALLOCATE(LAMBDAQ)
+      DEALLOCATE(EIGA)
+      DEALLOCATE(W)
+      DEALLOCATE(AU)
+      DEALLOCATE(S)
+
+!............................................................
+      !     Generate random M-by-M matrix A. Use DLATMR from
+      END DO ! LLOOP
+
+
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for SGEDMD :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+      IF ( NFAIL_Z_XV == 0 ) THEN
+          WRITE(*,*) '>>>> Z - U*V test PASSED.'
+      ELSE
+          WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
+          WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV
+      END IF
+      IF ( NFAIL_AU == 0 ) THEN
+          WRITE(*,*) '>>>> A*U test PASSED. '
+      ELSE
+          WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
+          WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
+      END IF
+
+      IF ( NFAIL_REZ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
+      END IF
+
+      IF ( NFAIL_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>> SGEDMD :: ALL TESTS PASSED.'
+      ELSE
+          WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
+          WRITE(*,*) '>>>>>>>>>>>>>> SGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      IF ( TEST_QRDMD ) THEN
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for SGEDMDQ :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+
+      IF ( NFAIL_SVDIFF == 0 ) THEN
+          WRITE(*,*) '>>>> SGEDMD and SGEDMDQ computed singular &
+              &values test PASSED.'
+      ELSE
+          WRITE(*,*) 'SGEDMD and SGEDMDQ discrepancies in &
+              &the singular values unacceptable ', &
+              NFAIL_SVDIFF, ' times. Test FAILED.'
+          WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
+      END IF
+
+      IF ( NFAIL_F_QR == 0 ) THEN
+          WRITE(*,*) '>>>> F - Q*R test PASSED.'
+      ELSE
+          WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
+          WRITE(*,*) 'The largest relative residual was ', TMP_FQR
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
+      END IF
+
+      IF ( NFAIL_REZQ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
+      END IF
+
+      IF ( NFAILQ_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>>>>> SGEDMDQ :: ALL TESTS PASSED.'
+      ELSE
+         WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
+         WRITE(*,*) '>>>>>>> SGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) 'Test completed.'
+      STOP
+      END
diff --git a/TESTING/EIG/zchkdmd.f90 b/TESTING/EIG/zchkdmd.f90
new file mode 100644
index 0000000000..873d956c40
--- /dev/null
+++ b/TESTING/EIG/zchkdmd.f90
@@ -0,0 +1,745 @@
+!     This is a test program for checking the implementations of
+!     the implementations of the following subroutines
+!
+!     ZGEDMD,  for computation of the
+!              Dynamic Mode Decomposition (DMD)
+!     ZGEDMDQ, for computation of a
+!              QR factorization based compressed DMD
+!
+!     Developed and supported by:
+!     ===========================
+!     Developed and coded by Zlatko Drmac, Faculty of Science,
+!     University of Zagreb;  drmac@math.hr
+!     In cooperation with
+!     AIMdyn Inc., Santa Barbara, CA.
+!     ========================================================
+!     How to run the code (compiler, link info)
+!     ========================================================
+!     Compile as FORTRAN 90 (or later) and link with BLAS and
+!     LAPACK libraries.
+!     NOTE: The code is developed and tested on top of the
+!     Intel MKL library (versions 2022.0.3 and 2022.2.0),
+!     using the Intel Fortran compiler.
+!
+!     For developers of the C++ implementation
+!     ========================================================
+!     See the LAPACK++ and Template Numerical Toolkit (TNT)
+!
+!     Note on a development of the GPU HP implementation
+!     ========================================================
+!     Work in progress. See CUDA, MAGMA, SLATE.
+!     NOTE: The four SVD subroutines used in this code are
+!     included as a part of R&D and for the completeness.
+!     This was also an opportunity to test those SVD codes.
+!     If the scaling option is used all four are essentially
+!     equally good. For implementations on HP platforms,
+!     one can use whichever SVD is available.
+!............................................................
+
+!............................................................
+!............................................................
+!
+      PROGRAM DMD_TEST
+      use iso_fortran_env, only: real64
+      IMPLICIT NONE
+      integer, parameter :: WP = real64
+
+!............................................................
+      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
+      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
+
+      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
+      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
+!............................................................
+      REAL(KIND=WP), ALLOCATABLE, DIMENSION(:)   :: RES, &
+                     RES1, RESEX, SINGVX, SINGVQX, WORK
+      INTEGER      , ALLOCATABLE, DIMENSION(:)   :: IWORK
+      REAL(KIND=WP) :: WDUMMY(2)
+      INTEGER       :: IDUMMY(4), ISEED(4)
+      REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS,       &
+                       TOL, TOL2, SVDIFF, TMP, TMP_AU,       &
+                       TMP_FQR, TMP_REZ, TMP_REZQ,  TMP_ZXW, &
+                       TMP_EX
+
+!............................................................
+      COMPLEX(KIND=WP) :: ZMAX
+      INTEGER :: LZWORK
+      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) ::  ZA, ZAC,  &
+                                 ZAU, ZF, ZF0, ZF1, ZS, ZW,       &
+                                 ZX, ZX0, ZY, ZY0, ZY1, ZZ, ZZ1
+      COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:)   ::  ZDA, ZDR, &
+                                       ZDL, ZEIGS, ZEIGSA, ZWORK
+      COMPLEX(KIND=WP) ::  ZDUMMY(22), ZDUM2X2(2,2)
+!............................................................
+      INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY,  &
+                 LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK, NRNKsp
+      INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j,     &
+                 NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ,     &
+                 NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL,  &
+                 NFAIL_Z_XV,  MODE, MODEL, MODER, WHTSVD,     &
+                 WHTSVDsp
+      INTEGER :: iNRNK, iWHTSVD,  K_TRAJ, LWMINOPT
+      CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN,   &
+                       SCALE, RESIDS, WANTQ, WANTR
+      LOGICAL :: TEST_QRDMD
+
+!.....external subroutines (BLAS and LAPACK)
+      EXTERNAL DAXPY,  DGEEV, DGEMM, DGEMV, DLACPY, DLASCL
+      EXTERNAL ZGEEV,  ZGEMV, ZLASCL
+      EXTERNAL ZLARNV, ZLATMR
+      EXTERNAL ZAXPY,  ZGEMM
+!.....external subroutines DMD package, part 1
+!     subroutines under test
+      EXTERNAL ZGEDMD, ZGEDMDQ
+!.....external functions (BLAS and LAPACK)
+      EXTERNAL         DLAMCH,  DZNRM2
+      REAL(KIND=WP) :: DLAMCH,  DZNRM2
+      REAL(KIND=WP) ::          ZLANGE
+      EXTERNAL IZAMAX
+      INTEGER  IZAMAX
+      EXTERNAL LSAME
+      LOGICAL  LSAME
+
+      INTRINSIC ABS, INT, MIN, MAX, SIGN
+!............................................................
+
+      ! The test is always in pairs : ( ZGEDMD and ZGEDMDQ )
+      ! because the test includes comparing the results (in pairs).
+!.....................................................................................
+      TEST_QRDMD = .TRUE. ! This code by default performs tests on ZGEDMDQ
+                          ! Since the QR factorizations based algorithm is designed for
+                          ! single trajectory data, only single trajectory tests will
+                          ! be performed with xGEDMDQ.
+      WANTQ = 'Q'
+      WANTR = 'R'
+!.................................................................................
+
+      EPS = DLAMCH( 'P' )  ! machine precision DP
+
+      ! Global counters of failures of some particular tests
+      NFAIL      = 0
+      NFAIL_REZ  = 0
+      NFAIL_REZQ = 0
+      NFAIL_Z_XV = 0
+      NFAIL_F_QR = 0
+      NFAIL_AU   = 0
+      NFAIL_SVDIFF = 0
+      NFAIL_TOTAL  = 0
+      NFAILQ_TOTAL = 0
+
+      DO LLOOP = 1, 4
+
+      WRITE(*,*) 'L Loop Index = ', LLOOP
+
+      ! Set the dimensions of the problem ...
+      WRITE(*,*) 'M = '
+      READ(*,*) M
+      WRITE(*,*) M
+      ! ... and the number of snapshots.
+      WRITE(*,*) 'N = '
+      READ(*,*) N
+      WRITE(*,*) N
+
+      ! ... Test the dimensions
+      IF ( ( MIN(M,N) == 0 ) .OR. ( M < N )  ) THEN
+          WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.'
+          STOP
+      END IF
+!.............
+      ! The seed inside the LLOOP so that each pass can be reproduced easily.
+      ISEED(1) = 4
+      ISEED(2) = 3
+      ISEED(3) = 2
+      ISEED(4) = 1
+
+      LDA  = M
+      LDF  = M
+      LDX  = M
+      LDY  = M
+      LDW  = N
+      LDZ  = M
+      LDAU = M
+      LDS  = N
+
+      TMP_ZXW  = ZERO
+      TMP_AU   = ZERO
+      TMP_REZ  = ZERO
+      TMP_REZQ = ZERO
+      SVDIFF   = ZERO
+      TMP_EX   = ZERO
+
+      ALLOCATE( ZA(LDA,M) )
+      ALLOCATE( ZAC(LDA,M) )
+      ALLOCATE( ZF(LDF,N+1) )
+      ALLOCATE( ZF0(LDF,N+1) )
+      ALLOCATE( ZF1(LDF,N+1) )
+      ALLOCATE( ZX(LDX,N) )
+      ALLOCATE( ZX0(LDX,N) )
+      ALLOCATE( ZY(LDY,N+1) )
+      ALLOCATE( ZY0(LDY,N+1) )
+      ALLOCATE( ZY1(LDY,N+1) )
+      ALLOCATE( ZAU(LDAU,N) )
+      ALLOCATE( ZW(LDW,N) )
+      ALLOCATE( ZS(LDS,N) )
+      ALLOCATE( ZZ(LDZ,N) )
+      ALLOCATE( ZZ1(LDZ,N) )
+      ALLOCATE( RES(N) )
+      ALLOCATE( RES1(N) )
+      ALLOCATE( RESEX(N) )
+      ALLOCATE( ZEIGS(N) )
+      ALLOCATE( SINGVX(N) )
+      ALLOCATE( SINGVQX(N) )
+
+      TOL  = M*EPS
+      ! This mimics O(M*N)*EPS bound for accumulated roundoff error.
+      ! The factor 10 is somewhat arbitrary.
+      TOL2 = 10*M*N*EPS
+
+!.............
+
+      DO K_TRAJ = 1, 2
+      !  Number of intial conditions in the simulation/trajectories (1 or 2)
+
+      COND = 1.0D4
+      ZMAX = (1.0D1,1.0D1)
+      RSIGN = 'F'
+      GRADE = 'N'
+      MODEL = 6
+      CONDL = 1.0D1
+      MODER = 6
+      CONDR = 1.0D1
+      PIVTNG = 'N'
+
+      ! Loop over all parameter MODE values for ZLATMR (+1,..,+6)
+      DO MODE = 1, 6
+
+      ALLOCATE( IWORK(2*M) )
+      ALLOCATE( ZDA(M) )
+      ALLOCATE( ZDL(M) )
+      ALLOCATE( ZDR(M) )
+
+      CALL ZLATMR( M, M, 'N', ISEED, 'N', ZDA, MODE, COND, &
+                   ZMAX, RSIGN, GRADE, ZDL, MODEL,  CONDL, &
+                   ZDR, MODER, CONDR, PIVTNG, IWORK, M, M, &
+                   ZERO, -ONE, 'N', ZA, LDA, IWORK(M+1), INFO )
+      DEALLOCATE( ZDR )
+      DEALLOCATE( ZDL )
+      DEALLOCATE( ZDA )
+      DEALLOCATE( IWORK )
+
+      LZWORK = MAX(1,2*M)
+      ALLOCATE( ZEIGSA(M) )
+      ALLOCATE( ZWORK(LZWORK) )
+      ALLOCATE( WORK(2*M) )
+      ZAC(1:M,1:M) = ZA(1:M,1:M)
+      CALL ZGEEV( 'N','N', M, ZAC, LDA, ZEIGSA, ZDUM2X2, 2, &
+                  ZDUM2X2, 2, ZWORK, LZWORK, WORK, INFO ) ! LAPACK CALL
+      DEALLOCATE(WORK)
+      DEALLOCATE(ZWORK)
+
+      TMP = ABS(ZEIGSA(IZAMAX(M, ZEIGSA, 1))) ! The spectral radius of ZA
+      ! Scale the matrix ZA to have unit spectral radius.
+      CALL ZLASCL( 'G',0, 0, TMP, ONE, M, M, &
+                   ZA, LDA, INFO )
+      CALL ZLASCL( 'G',0, 0, TMP, ONE, M, 1, &
+                   ZEIGSA, M, INFO )
+      ANORM = ZLANGE( 'F', M, M, ZA, LDA, WDUMMY )
+
+      IF ( K_TRAJ == 2 ) THEN
+          ! generate data as two trajectories
+          ! with two inital conditions
+          CALL ZLARNV(2, ISEED, M, ZF(1,1) )
+          DO i = 1, N/2
+             CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1,  &
+                  ZZERO, ZF(1,i+1), 1 )
+          END DO
+          ZX0(1:M,1:N/2) = ZF(1:M,1:N/2)
+          ZY0(1:M,1:N/2) = ZF(1:M,2:N/2+1)
+
+          CALL ZLARNV(2, ISEED, M, ZF(1,1) )
+          DO i = 1, N-N/2
+             CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1,  &
+                  ZZERO, ZF(1,i+1), 1 )
+          END DO
+          ZX0(1:M,N/2+1:N) = ZF(1:M,1:N-N/2)
+          ZY0(1:M,N/2+1:N) = ZF(1:M,2:N-N/2+1)
+      ELSE
+          CALL ZLARNV(2, ISEED, M, ZF(1,1) )
+          DO i = 1, N
+             CALL ZGEMV( 'N', M, M, ZONE, ZA, M, ZF(1,i), 1,  &
+                  ZZERO, ZF(1,i+1), 1 )
+          END DO
+          ZF0(1:M,1:N+1) = ZF(1:M,1:N+1)
+          ZX0(1:M,1:N) = ZF0(1:M,1:N)
+          ZY0(1:M,1:N) = ZF0(1:M,2:N+1)
+      END IF
+
+      DEALLOCATE( ZEIGSA )
+!........................................................................
+
+      DO iJOBZ = 1, 4
+
+          SELECT CASE ( iJOBZ )
+          CASE(1)
+              JOBZ   = 'V'
+              RESIDS = 'R'
+          CASE(2)
+              JOBZ   = 'V'
+              RESIDS = 'N'
+          CASE(3)
+              JOBZ   = 'F'
+              RESIDS = 'N'
+          CASE(4)
+              JOBZ   = 'N'
+              RESIDS = 'N'
+          END SELECT
+
+      DO iJOBREF = 1, 3
+
+          SELECT CASE ( iJOBREF )
+          CASE(1)
+              JOBREF = 'R'
+          CASE(2)
+              JOBREF = 'E'
+          CASE(3)
+              JOBREF = 'N'
+          END SELECT
+
+      DO iSCALE = 1, 4
+
+          SELECT CASE ( iSCALE )
+          CASE(1)
+              SCALE = 'S'
+          CASE(2)
+              SCALE = 'C'
+          CASE(3)
+              SCALE = 'Y'
+          CASE(4)
+              SCALE = 'N'
+          END SELECT
+
+      DO iNRNK = -1, -2, -1
+         NRNK   = iNRNK
+         NRNKsp = iNRNK
+
+      DO iWHTSVD = 1,  3
+         ! Check all four options to compute the POD basis
+         ! via the SVD.
+         WHTSVD   = iWHTSVD
+         WHTSVDsp = iWHTSVD
+
+      DO LWMINOPT = 1, 2
+         ! Workspace query for the minimal (1) and for the optimal
+         ! (2) workspace lengths determined by workspace query.
+
+      ! ZGEDMD is always tested and its results are also used for
+      ! comparisons with ZGEDMDQ.
+
+      ZX(1:M,1:N) = ZX0(1:M,1:N)
+      ZY(1:M,1:N) = ZY0(1:M,1:N)
+
+      CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,   &
+                   M,  N, ZX, LDX, ZY, LDY, NRNK, TOL,    &
+                   K, ZEIGS, ZZ, LDZ,  RES, ZAU, LDAU,    &
+                   ZW,  LDW, ZS, LDS,  ZDUMMY, -1,        &
+                   WDUMMY, -1, IDUMMY, -1, INFO )
+      IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) &
+                          .OR. ( INFO < 0 ) ) THEN
+           WRITE(*,*) 'Call to ZGEDMD workspace query failed. &
+                      &Check the calling sequence and the code.'
+           WRITE(*,*) 'The error code is ', INFO
+           WRITE(*,*) 'The input parameters were ',      &
+           SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,          &
+           M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS
+           STOP
+      END IF
+
+      LZWORK = INT(ZDUMMY(LWMINOPT))
+      LWORK  = INT(WDUMMY(1))
+      LIWORK = IDUMMY(1)
+
+      ALLOCATE(ZWORK(LZWORK))
+      ALLOCATE( WORK(LWORK))
+      ALLOCATE(IWORK(LIWORK))
+
+      CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD,  &
+                   M,  N, ZX, LDX, ZY, LDY, NRNK, TOL,   &
+                   K, ZEIGS, ZZ, LDZ,  RES, ZAU, LDAU,   &
+                   ZW,  LDW,  ZS, LDS, ZWORK,  LZWORK,   &
+                   WORK, LWORK, IWORK, LIWORK, INFO )
+
+      IF ( INFO /= 0 ) THEN
+           WRITE(*,*) 'Call to ZGEDMD failed. &
+           &Check the calling sequence and the code.'
+           WRITE(*,*) 'The error code is ', INFO
+           WRITE(*,*) 'The input parameters were ',&
+           SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+           M, N, LDX, LDY, NRNK, TOL
+           STOP
+      END IF
+
+      SINGVX(1:N) = WORK(1:N)
+
+      !...... ZGEDMD check point
+      IF ( LSAME(JOBZ,'V')  ) THEN
+          ! Check that Z = X*W, on return from ZGEDMD
+          ! This checks that the returned eigenvectors in Z are
+          ! the product of the SVD'POD basis returned in X
+          ! and the eigenvectors of the rayleigh quotient
+          ! returned in W
+          CALL ZGEMM( 'N', 'N', M, K, K, ZONE, ZX, LDX, ZW, LDW, &
+                      ZZERO, ZZ1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+             CALL ZAXPY( M, -ZONE, ZZ(1,i), 1, ZZ1(1,i), 1)
+             TMP = MAX(TMP, DZNRM2( M, ZZ1(1,i), 1 ) )
+          END DO
+          TMP_ZXW = MAX(TMP_ZXW, TMP )
+          IF ( TMP_ZXW <= 10*M*EPS ) THEN
+              !WRITE(*,*) ' :) .... OK .........ZGEDMD PASSED.'
+          ELSE
+              NFAIL_Z_XV = NFAIL_Z_XV + 1
+              WRITE(*,*) ':( .................ZGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+      END IF
+
+
+      !...... ZGEDMD check point
+      IF ( LSAME(JOBREF,'R') ) THEN
+           ! The matrix A*U is returned for computing refined Ritz vectors.
+           ! Check that A*U is computed correctly using the formula
+           ! A*U = Y * V * inv(SIGMA). This depends on the
+           ! accuracy in the computed singular values and vectors of X.
+           ! See the paper for an error analysis.
+           ! Note that the left singular vectors of the input matrix X
+           ! are returned in the array X.
+           CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZX, LDX, &
+                      ZZERO, ZZ1, LDZ )
+          TMP = ZERO
+          DO i = 1, K
+            CALL ZAXPY( M, -ZONE, ZAU(1,i), 1, ZZ1(1,i), 1)
+            TMP = MAX( TMP, DZNRM2( M, ZZ1(1,i),1 ) * &
+                     SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_AU = MAX( TMP_AU, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) ':) .... OK .........ZGEDMD PASSED.'
+          ELSE
+              NFAIL_AU = NFAIL_AU + 1
+              WRITE(*,*) ':( .................ZGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+      ELSEIF ( LSAME(JOBREF,'E') ) THEN
+       ! The unscaled vectors of the Exact DMD are computed.
+       ! This option is included for the sake of completeness,
+       ! for users who prefer the Exact DMD vectors. The
+       ! returned vectors are in the real form, in the same way
+       ! as the Ritz vectors. Here we just save the vectors
+       ! and test them separately using a Matlab script.
+
+
+       CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZAU, LDAU, ZZERO, ZY1, LDY )
+
+               DO i=1, K
+                  ! have a real eigenvalue with real eigenvector
+                CALL ZAXPY( M, -ZEIGS(i), ZAU(1,i), 1, ZY1(1,i), 1 )
+                RESEX(i) = DZNRM2( M, ZY1(1,i), 1) / DZNRM2(M,ZAU(1,i),1)
+               END DO
+      END IF
+      !...... ZGEDMD check point
+
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by ZGEDMD with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in ZGEDMD,)
+
+          DO i=1, K
+                ! have a real eigenvalue with real eigenvector
+                CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 )
+                RES1(i) = DZNRM2( M, ZY1(1,i), 1)
+          END DO
+          TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+                    SINGVX(K)/(ANORM*SINGVX(1)) )
+          END DO
+          TMP_REZ = MAX( TMP_REZ, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) ':) .... OK ..........ZGEDMD PASSED.'
+          ELSE
+              NFAIL_REZ = NFAIL_REZ + 1
+              WRITE(*,*) ':( ..................ZGEDMD FAILED!', &
+                  'Check the code for implementation errors.'
+              WRITE(*,*) 'The input parameters were ',&
+                 SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, &
+                 M, N, LDX, LDY, NRNK, TOL
+          END IF
+
+
+         IF ( LSAME(JOBREF,'E') ) THEN
+            TMP = ZERO
+          DO i = 1, K
+          TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) )
+          END DO
+          TMP_EX = MAX(TMP_EX,TMP)
+         END IF
+
+      END IF
+
+      DEALLOCATE(ZWORK)
+      DEALLOCATE(WORK)
+      DEALLOCATE(IWORK)
+
+      IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN
+
+      ZF(1:M,1:N+1) = ZF0(1:M,1:N+1)
+
+      CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
+                    WHTSVD, M, N+1, ZF, LDF,  ZX, LDX,  ZY, LDY,  &
+                    NRNK,  TOL, K, ZEIGS, ZZ, LDZ, RES,  ZAU,  &
+                    LDAU, ZW, LDW, ZS, LDS, ZDUMMY, -1,   &
+                    WDUMMY,  -1, IDUMMY, -1, INFO )
+
+      LZWORK = INT(ZDUMMY(LWMINOPT))
+      ALLOCATE( ZWORK(LZWORK) )
+      LIWORK = IDUMMY(1)
+      ALLOCATE(IWORK(LIWORK))
+      LWORK = INT(WDUMMY(1))
+      ALLOCATE(WORK(LWORK))
+
+      CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, &
+                    WHTSVD, M, N+1, ZF, LDF,  ZX, LDX,  ZY, LDY,  &
+                    NRNK,  TOL, KQ, ZEIGS, ZZ, LDZ, RES,  ZAU,  &
+                    LDAU, ZW, LDW, ZS, LDS, ZWORK, LZWORK,   &
+                    WORK,  LWORK, IWORK, LIWORK, INFO )
+
+      IF ( INFO /= 0 ) THEN
+             WRITE(*,*) 'Call to ZGEDMDQ failed. &
+             &Check the calling sequence and the code.'
+             WRITE(*,*) 'The error code is ', INFO
+             WRITE(*,*) 'The input parameters were ',&
+             SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, &
+             M, N, LDX, LDY, NRNK, TOL
+             STOP
+      END IF
+      SINGVQX(1:N) = WORK(1:N)
+
+      !..... ZGEDMDQ check point
+
+          IF ( 1 == 0 ) THEN
+              ! Comparison of ZGEDMD and ZGEDMDQ singular values disabled
+          TMP = ZERO
+          DO i = 1, MIN(K, KQ)
+             TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / &
+                                   SINGVX(1) )
+          END DO
+          SVDIFF = MAX( SVDIFF, TMP )
+          IF ( TMP > M*N*EPS ) THEN
+             WRITE(*,*) 'FAILED! Something was wrong with the run.'
+             NFAIL_SVDIFF = NFAIL_SVDIFF + 1
+             DO j =1, 3
+                 write(*,*) j, SINGVX(j), SINGVQX(j)
+                 read(*,*)
+             END DO
+
+          END IF
+          END IF
+
+      !..... ZGEDMDQ check point
+      IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN
+         ! Check that the QR factors are computed and returned
+         ! as requested. The residual ||F-Q*R||_F / ||F||_F
+         ! is compared to M*N*EPS.
+         ZF1(1:M,1:N+1) = ZF0(1:M,1:N+1)
+         CALL ZGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ZONE, ZF, &
+                     LDF, ZY, LDY, ZONE, ZF1, LDF )
+         TMP_FQR = ZLANGE( 'F', M, N+1, ZF1, LDF, WORK ) / &
+               ZLANGE( 'F', M, N+1, ZF0,  LDF, WORK )
+         IF ( TMP_FQR > TOL2 ) THEN
+              WRITE(*,*) 'FAILED! Something was wrong with the run.'
+             NFAIL_F_QR = NFAIL_F_QR + 1
+         ELSE
+             !WRITE(*,*) '........ PASSED.'
+         END IF
+      END IF
+
+      !..... ZGEDMDQ check point
+      IF ( LSAME(RESIDS, 'R') ) THEN
+          ! Compare the residuals returned by ZGEDMDQ with the
+          ! explicitly computed residuals using the matrix A.
+          ! Compute explicitly Y1 = A*Z
+          CALL ZGEMM( 'N', 'N', M, KQ, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY )
+          ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms
+          ! of the invariant subspaces that correspond to complex conjugate
+          ! pairs of eigencalues. (See the description of Z in ZGEDMDQ)
+
+          DO i=1, KQ
+                ! have a real eigenvalue with real eigenvector
+                CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 )
+                ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i)
+                RES1(i) = DZNRM2( M, ZY1(1,i), 1)
+          END DO
+          TMP = ZERO
+          DO i = 1, KQ
+          TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * &
+              SINGVQX(KQ)/(ANORM*SINGVQX(1)) )
+          END DO
+          TMP_REZQ = MAX( TMP_REZQ, TMP )
+          IF ( TMP <= TOL2 ) THEN
+              !WRITE(*,*) '.... OK ........ ZGEDMDQ PASSED.'
+          ELSE
+              NFAIL_REZQ = NFAIL_REZQ + 1
+              WRITE(*,*) '................ ZGEDMDQ FAILED!', &
+                  'Check the code for implementation errors.'
+              STOP
+          END IF
+
+      END IF
+
+      DEALLOCATE( ZWORK )
+      DEALLOCATE( WORK  )
+      DEALLOCATE( IWORK )
+
+      END IF ! ZGEDMDQ
+
+!.......................................................................................................
+
+      END DO   ! LWMINOPT
+      !write(*,*) 'LWMINOPT loop completed'
+      END DO   ! iWHTSVD
+      !write(*,*) 'WHTSVD loop completed'
+      END DO   ! iNRNK  -2:-1
+      !write(*,*) 'NRNK loop completed'
+      END DO   ! iSCALE  1:4
+      !write(*,*) 'SCALE loop completed'
+      END DO
+      !write(*,*) 'JOBREF loop completed'
+      END DO   ! iJOBZ
+      !write(*,*) 'JOBZ loop completed'
+
+      END DO ! MODE -6:6
+      !write(*,*) 'MODE loop completed'
+      END DO ! 1 or 2 trajectories
+      !write(*,*) 'trajectories  loop completed'
+
+      DEALLOCATE( ZA )
+      DEALLOCATE( ZAC )
+      DEALLOCATE( ZZ )
+      DEALLOCATE( ZF )
+      DEALLOCATE( ZF0 )
+      DEALLOCATE( ZF1 )
+      DEALLOCATE( ZX )
+      DEALLOCATE( ZX0 )
+      DEALLOCATE( ZY )
+      DEALLOCATE( ZY0 )
+      DEALLOCATE( ZY1 )
+      DEALLOCATE( ZAU )
+      DEALLOCATE( ZW )
+      DEALLOCATE( ZS )
+      DEALLOCATE( ZZ1 )
+      DEALLOCATE( RES )
+      DEALLOCATE( RES1 )
+      DEALLOCATE( RESEX )
+      DEALLOCATE( ZEIGS )
+      DEALLOCATE( SINGVX )
+      DEALLOCATE( SINGVQX )
+
+      END DO ! LLOOP
+
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for ZGEDMD :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+      IF ( NFAIL_Z_XV == 0 ) THEN
+         WRITE(*,*) '>>>> Z - U*V test PASSED.'
+      ELSE
+         WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)'
+         WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW
+         NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV
+      END IF
+      IF ( NFAIL_AU == 0 ) THEN
+        WRITE(*,*) '>>>> A*U test PASSED. '
+      ELSE
+        WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)'
+        WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU
+      END IF
+
+      IF ( NFAIL_REZ == 0 ) THEN
+        WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+        WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)'
+        WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ
+        WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+        NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ
+      END IF
+
+      IF ( NFAIL_TOTAL == 0 ) THEN
+        WRITE(*,*) '>>>> ZGEDMD :: ALL TESTS PASSED.'
+      ELSE
+        WRITE(*,*) NFAIL_TOTAL, 'FAILURES!'
+        WRITE(*,*) '>>>>>>>>>>>>>> ZGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      IF ( TEST_QRDMD ) THEN
+      WRITE(*,*)
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*) ' Test summary for ZGEDMDQ :'
+      WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>'
+      WRITE(*,*)
+
+      IF ( NFAIL_SVDIFF == 0 ) THEN
+          WRITE(*,*) '>>>> ZGEDMD and ZGEDMDQ computed singular &
+              &values test PASSED.'
+      ELSE
+         WRITE(*,*) 'ZGEDMD and ZGEDMDQ discrepancies in &
+             &the singular values unacceptable ', &
+             NFAIL_SVDIFF, ' times. Test FAILED.'
+         WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF
+         WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+         NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF
+      END IF
+
+      IF ( NFAIL_F_QR == 0 ) THEN
+          WRITE(*,*) '>>>> F - Q*R test PASSED.'
+      ELSE
+          WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)'
+          WRITE(*,*) 'The largest relative residual was ', TMP_FQR
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR
+      END IF
+
+      IF ( NFAIL_REZQ == 0 ) THEN
+          WRITE(*,*) '>>>> Rezidual computation test PASSED.'
+      ELSE
+          WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)'
+          WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ
+          WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS
+          NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ
+      END IF
+
+      IF ( NFAILQ_TOTAL == 0 ) THEN
+          WRITE(*,*) '>>>>>>> ZGEDMDQ :: ALL TESTS PASSED.'
+      ELSE
+         WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!'
+         WRITE(*,*) '>>>>>>> ZGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.'
+      END IF
+
+      END IF
+
+      WRITE(*,*)
+      WRITE(*,*) 'Test completed.'
+      STOP
+      END
diff --git a/TESTING/Makefile b/TESTING/Makefile
index bdea2bfaa4..3963260ac0 100644
--- a/TESTING/Makefile
+++ b/TESTING/Makefile
@@ -61,6 +61,8 @@ SEIGTST= snep.out \
          scsd.out \
          slse.out
 
+SDMDEIGTST= sdmd.out
+
 CEIGTST= cnep.out \
          csep.out \
          cse2.out \
@@ -82,6 +84,8 @@ CEIGTST= cnep.out \
          ccsd.out \
          clse.out
 
+CDMDEIGTST= cdmd.out
+
 DEIGTST= dnep.out \
          dsep.out \
          dse2.out \
@@ -103,6 +107,8 @@ DEIGTST= dnep.out \
          dcsd.out \
          dlse.out
 
+DDMDEIGTST= ddmd.out
+
 ZEIGTST= znep.out \
          zsep.out \
          zse2.out \
@@ -124,6 +130,7 @@ ZEIGTST= znep.out \
          zcsd.out \
          zlse.out
 
+ZDMDEIGTST= zdmd.out
 
 SLINTST= stest.out
 
@@ -142,10 +149,10 @@ ZLINTST= ztest.out
 ZLINTSTPROTO= zctest.out ztest_rfp.out
 
 .PHONY: single complex double complex16
-single:         $(SLINTST) $(SEIGTST)
-complex:        $(CLINTST) $(CEIGTST)
-double:         $(DLINTST) $(DEIGTST)
-complex16:      $(ZLINTST) $(ZEIGTST)
+single:         $(SLINTST) $(SEIGTST) $(SDMDEIGTST)
+complex:        $(CLINTST) $(CEIGTST) $(CDMDEIGTST)
+double:         $(DLINTST) $(DEIGTST) $(DDMDEIGTST)
+complex16:      $(ZLINTST) $(ZEIGTST) $(ZDMDEIGTST)
 
 .PHONY: singleproto complexproto doubleproto complex16proto
 singleproto:    $(SLINTSTPROTO)
@@ -297,6 +304,10 @@ scsd.out: csd.in EIG/xeigtsts
 slse.out: lse.in EIG/xeigtsts
 	@echo LSE: Testing Constrained Linear Least Squares routines
 	./EIG/xeigtsts < lse.in > $@ 2>&1
+
+sdmd.out: sdmd.in EIG/xdmdeigtsts
+	@echo DMD: Testing Dynamic Mode Decomposition routines
+	./EIG/xdmdeigtsts < sdmd.in > $@ 2>&1
 #
 # ======== COMPLEX EIG TESTS ===========================
 
@@ -379,6 +390,10 @@ ccsd.out: csd.in EIG/xeigtstc
 clse.out: lse.in EIG/xeigtstc
 	@echo LSE: Testing Constrained Linear Least Squares routines
 	./EIG/xeigtstc < lse.in > $@ 2>&1
+
+cdmd.out: cdmd.in EIG/xdmdeigtstc
+	@echo DMD: Testing Dynamic Mode Decomposition routines
+	./EIG/xdmdeigtstc < cdmd.in > $@ 2>&1
 #
 # ======== DOUBLE EIG TESTS ===========================
 
@@ -461,6 +476,10 @@ dcsd.out: csd.in EIG/xeigtstd
 dlse.out: lse.in EIG/xeigtstd
 	@echo LSE: Testing Constrained Linear Least Squares routines
 	./EIG/xeigtstd < lse.in > $@ 2>&1
+
+ddmd.out: ddmd.in EIG/xdmdeigtstd
+	@echo DMD: Testing Dynamic Mode Decomposition routines
+	./EIG/xdmdeigtstd < ddmd.in > $@ 2>&1
 #
 # ======== COMPLEX16 EIG TESTS ===========================
 
@@ -543,6 +562,10 @@ zcsd.out: csd.in EIG/xeigtstz
 zlse.out: lse.in EIG/xeigtstz
 	@echo LSE: Testing Constrained Linear Least Squares routines
 	./EIG/xeigtstz < lse.in > $@ 2>&1
+
+zdmd.out: zdmd.in EIG/xdmdeigtstz
+	@echo DMD: Testing Dynamic Mode Decomposition routines
+	./EIG/xdmdeigtstz < zdmd.in > $@ 2>&1
 # ==============================================================================
 
 LIN/xlintsts: $(FRCLIN) $(FRC)
@@ -578,15 +601,27 @@ LIN/xlintstzc: $(FRCLIN) $(FRC)
 EIG/xeigtsts: $(FRCEIG) $(FRC)
 	$(MAKE) -C EIG xeigtsts
 
+EIG/xdmdeigtsts: $(FRCEIG) $(FRC)
+	$(MAKE) -C EIG xdmdeigtsts
+
 EIG/xeigtstc: $(FRCEIG) $(FRC)
 	$(MAKE) -C EIG xeigtstc
 
+EIG/xdmdeigtstc: $(FRCEIG) $(FRC)
+	$(MAKE) -C EIG xdmdeigtstc
+
 EIG/xeigtstd: $(FRCEIG) $(FRC)
 	$(MAKE) -C EIG xeigtstd
 
+EIG/xdmdeigtstd: $(FRCEIG) $(FRC)
+	$(MAKE) -C EIG xdmdeigtstd
+
 EIG/xeigtstz: $(FRCEIG) $(FRC)
 	$(MAKE) -C EIG xeigtstz
 
+EIG/xdmdeigtstz: $(FRCEIG) $(FRC)
+	$(MAKE) -C EIG xdmdeigtstz
+
 .PHONY: clean cleantest
 clean: cleantest
 cleantest:
diff --git a/TESTING/cdmd.in b/TESTING/cdmd.in
new file mode 100644
index 0000000000..42d046e01b
--- /dev/null
+++ b/TESTING/cdmd.in
@@ -0,0 +1,11 @@
+10
+5
+
+20
+10
+
+30
+11
+
+50
+20
diff --git a/TESTING/ddmd.in b/TESTING/ddmd.in
new file mode 100644
index 0000000000..42d046e01b
--- /dev/null
+++ b/TESTING/ddmd.in
@@ -0,0 +1,11 @@
+10
+5
+
+20
+10
+
+30
+11
+
+50
+20
diff --git a/TESTING/sdmd.in b/TESTING/sdmd.in
new file mode 100644
index 0000000000..42d046e01b
--- /dev/null
+++ b/TESTING/sdmd.in
@@ -0,0 +1,11 @@
+10
+5
+
+20
+10
+
+30
+11
+
+50
+20
diff --git a/TESTING/zdmd.in b/TESTING/zdmd.in
new file mode 100644
index 0000000000..42d046e01b
--- /dev/null
+++ b/TESTING/zdmd.in
@@ -0,0 +1,11 @@
+10
+5
+
+20
+10
+
+30
+11
+
+50
+20
diff --git a/lapack_testing.py b/lapack_testing.py
index abda368981..ae59926b88 100755
--- a/lapack_testing.py
+++ b/lapack_testing.py
@@ -199,6 +199,8 @@ def run_summary_test( f, cmdline, short_summary):
     range_prec=[1,3]
 elif test=='rfp':
     range_test=[18]
+elif test=='dmd':
+    range_test=[20]
 elif test=='eig':
     range_test=list(range(16))
 else:
@@ -227,19 +229,19 @@ def run_summary_test( f, cmdline, short_summary):
     letter+"gd",letter+"sb",letter+"sg",
     letter+"bb","glm","gqr",
     "gsv","csd","lse",
-    letter+"test", letter+dtypes[0][dtype-1]+"test",letter+"test_rfp"),
+    letter+"test", letter+dtypes[0][dtype-1]+"test",letter+"test_rfp",letter+"dmd"),
     ("Nonsymmetric-Eigenvalue-Problem", "Symmetric-Eigenvalue-Problem", "Symmetric-Eigenvalue-Problem-2-stage", "Singular-Value-Decomposition",
     "Eigen-Condition","Nonsymmetric-Eigenvalue","Nonsymmetric-Generalized-Eigenvalue-Problem",
     "Nonsymmetric-Generalized-Eigenvalue-Problem-driver", "Symmetric-Eigenvalue-Problem", "Symmetric-Eigenvalue-Generalized-Problem",
     "Banded-Singular-Value-Decomposition-routines", "Generalized-Linear-Regression-Model-routines", "Generalized-QR-and-RQ-factorization-routines",
     "Generalized-Singular-Value-Decomposition-routines", "CS-Decomposition-routines", "Constrained-Linear-Least-Squares-routines",
-    "Linear-Equation-routines", "Mixed-Precision-linear-equation-routines","RFP-linear-equation-routines"),
+    "Linear-Equation-routines", "Mixed-Precision-linear-equation-routines","RFP-linear-equation-routines","Dynamic-Mode-Decomposition"),
     (letter+"nep", letter+"sep", letter+"se2", letter+"svd",
     letter+"ec",letter+"ed",letter+"gg",
     letter+"gd",letter+"sb",letter+"sg",
     letter+"bb",letter+"glm",letter+"gqr",
     letter+"gsv",letter+"csd",letter+"lse",
-    letter+"test", letter+dtypes[0][dtype-1]+"test",letter+"test_rfp"),
+    letter+"test", letter+dtypes[0][dtype-1]+"test",letter+"test_rfp",letter+"dmd"),
     )
 
 
@@ -260,6 +262,9 @@ def run_summary_test( f, cmdline, short_summary):
             elif dtest==18:
                 # PROTO LIN TESTS
                 cmdbase="xlintstrf"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out"
+            elif dtest==20:
+                # DMD EIG TESTS
+                cmdbase="xdmdeigtst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out"
             else:
                 # EIG TESTS
                 cmdbase="xeigtst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out"