From b562de94686342fc76cd262c2b2ddcb1138f288e Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <ryan.lewis@ayasdi.com>
Date: Wed, 20 Jan 2016 15:30:21 -0800
Subject: [PATCH 01/13] initial commit.

---
 include/El/lapack_like/tsvd.hpp | 121 ++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 include/El/lapack_like/tsvd.hpp
diff --git a/include/El/lapack_like/tsvd.hpp b/include/El/lapack_like/tsvd.hpp
new file mode 100644
index 0000000000..22a2c04f52
--- /dev/null
+++ b/include/El/lapack_like/tsvd.hpp
@@ -0,0 +1,121 @@
+#pragma once
+#include <tuple>
+
+namespace El {
+
+template< typename F, 
+          typename ForwardOperator,
+          typename AdjointOperator>
+std::tuple< bool, Base<F>, Base<F> >
+BidiagLanczos(
+    Int m,
+    Int n, 
+    const ForwardOperator& A,
+    const AdjointOperator& AAdj,
+    Int steps,
+    Base<F> tau,
+    std::vector<Base<F>>& alphaList,
+    std::vector<Base<F>>& betaList,
+    DistMatrix<F>& U,
+    DistMatrix<F>& V,
+    //Base<F>& normA, //can be an estimate?
+    std::vector<Base<F>>& muList,
+    std::vector<Base<F>>& nuList,
+    bool reorthIn,
+    Base<F> tolError){
+    typedef Base<F> Real;
+    typename DistMatrix<F> DM;
+
+    bool reorthB = reorthIn;
+    Int nReorth = 0;
+    Int nReorthVec = 0;
+    muList.reserve( steps);
+    nuList.reserve( steps);
+    Int iter = alphaList.size()-1;
+    DM u = U(ALL, IR(iter+1));
+    DM v = V(ALL, IR(iter));
+    Real beta = betaList[iter];
+
+    
+    DM vOld( v);
+    for( Int j = iter+1; j <= iter+steps; ++j)
+    {
+        vOld = v;
+        v = u;
+        AAdj( v);
+        Axpy( -beta, vOld, v);
+        Real alpha = Nrm2(v);
+        //run omega recurrence
+        for(Int i = 0; i < j; ++i)
+        {
+            
+        }
+
+    }
+
+}
+
+
+/**
+* TSVD
+*/
+template< typename F, 
+          typename ForwardOperator,
+          typename AdjointOperator>
+inline Int 
+tsvd( 
+      Int m,
+      Int n, 
+      const ForwardOperator& A,
+      const AdjointOperator& AAdj,
+      Int nVals, 
+      Int maxIter, 
+      DistMatrix<F>& initialVec, 
+      Base<F> tolConv=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7)),
+      Base<F> tolError=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7)),
+      ) //requires InfiniteCharacteristic<F> && ForwardOperator::ScalarType is F
+      {
+        typedef DistMatrix<F> DM;
+        typedef Base<F> Real;
+        const Grid& g = initialVec.Grid();
+        auto tau = InfinityNorm(A); //TODO: Impl this?
+        DM z( g);
+        Zeros(z, 1,1);
+        Int cc = Max((Int)5, nVals);
+        Int steps = 5;
+        Real initNorm = Nrm2(initial_vec);
+        DM v( initialVec);
+        AAdj( v);
+        Scale(Real(1)/initNorm, v);       
+        Real alpha = Nrm2(v);
+        Scale(Real(1)/alpha, v);
+        DM V( n, 1, g);
+        Fill( V, Real(1));
+        //TODO: Make this the right size upfront?
+        DM alphaList( 1, 1, g); //TODO: make this a Matrix<Real>
+        
+        Real nu = 1 + tau/alpha; //think of nu = 1+\epsilon
+        DM u( v);
+        A( u);
+        DM uOld( initVec); //TODO: similar?
+        Scale(Real(1)/initNorm, initVec);       
+        Axpy(-alpha, uOld, u);
+        Real beta = FrobeniusNorm(u);
+        Scale(Real(1)/beta,u);
+
+        DM U(g);
+        HCat(uOld, u, U);
+        //TODO: Make this the right size upfront
+        DM betaList( 1, 1, g); //TODO: make this a Matrix<Real>
+        Real mu = tau/beta;
+        std::vector< Real> maxMuList; 
+        std::vector< Real> maxNuList; 
+        maxMuList.reserve(maxIter);
+        maxNuList.reserve(maxIter);
+       ///TODO: Implement the rest.
+
+}
+} //end namespace El
+
+
+

From 0139195b0f9a4f80f97c97378acda2cad18ce433 Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <ryan.lewis@ayasdi.com>
Date: Wed, 20 Jan 2016 17:06:46 -0800
Subject: [PATCH 02/13] tsvd.hpp

---
 include/El/lapack_like/tsvd.hpp | 106 ++++++++++++++++++++++++++++++--
 1 file changed, 100 insertions(+), 6 deletions(-)

diff --git a/include/El/lapack_like/tsvd.hpp b/include/El/lapack_like/tsvd.hpp
index 22a2c04f52..b10eaba4ba 100644
--- a/include/El/lapack_like/tsvd.hpp
+++ b/include/El/lapack_like/tsvd.hpp
@@ -3,10 +3,13 @@
 
 namespace El {
 
+template< typename Real>
+Real CopySign(const Real& x, const Real& y){ return y >= Real(0) ? Abs(x): -Abs(x); }
+
 template< typename F, 
           typename ForwardOperator,
           typename AdjointOperator>
-std::tuple< bool, Base<F>, Base<F> >
+void
 BidiagLanczos(
     Int m,
     Int n, 
@@ -22,9 +25,11 @@ BidiagLanczos(
     std::vector<Base<F>>& muList,
     std::vector<Base<F>>& nuList,
     bool reorthIn,
-    Base<F> tolError){
+    Base<F> tolError,
+    bool partialProject=false){
     typedef Base<F> Real;
     typename DistMatrix<F> DM;
+    const Real eps = limits::Epsilon<Real>();
 
     bool reorthB = reorthIn;
     Int nReorth = 0;
@@ -37,6 +42,8 @@ BidiagLanczos(
     Real beta = betaList[iter];
 
     
+    std::vector< Int> reorth_nu;
+    if( partialProject){ reorth_nu.reserve( steps); }
     DM vOld( v);
     for( Int j = iter+1; j <= iter+steps; ++j)
     {
@@ -46,13 +53,95 @@ BidiagLanczos(
         Axpy( -beta, vOld, v);
         Real alpha = Nrm2(v);
         //run omega recurrence
+        reorth_nu.clear();
         for(Int i = 0; i < j; ++i)
         {
-            
+            Real nu = betaList[i]*muList[i+1] + 
+                      alphaList[i]*muList[i] - beta*nuList[i];
+            nu = (nu + CopySign(tau, nu))/alpha;
+            if( partialProject && Abs(v) > tolError) { 
+                reorth_nu.emplace_back( i);
+            }
+            nuList[i] = nu;
         }
-
+        Real maxElt = *std::max_element( nuList.begin(), nuList.end(), Abs);
+        maxNuList.emplace_back( maxElt);
+    }
+    nuList.emplace_back( Real(1));
+    if (reorth_b || reorthNu.size()>0){
+        if( partialProject){
+            for( Int i: reorth_nu){
+                auto vi = V(ALL, IR(i));
+                Axpy( -Dot(vi, v), vi, v);
+                nuList[i] = 2*eps;
+                nReorthVecs++;
+            }
+        } else { //full reorthogalization
+             for( Int i = 0; i < j; ++i){
+                auto vi = V(ALL, IR(i));
+                Axpy( -Dot(vi, v), vi, v);
+                nuList[i] = eps;
+                nReorthVecs++;
+            }
+        }
+        alpha = Nrm2( v);
+        reorth_b = !reorth_b;
     }
+    alphaList.emplace_back(alpha);
+    Scale(Real(1)/alpha, v);
+    auto vj = V(ALL,IR(j));
+    vj = v;
+
+    //The u step
+    uOld = u;
+    u = v;
+    // apply the operator
+    A(u);
+    Axpy(-alpha, uOld, u);
+    beta = Nrm2( u);
 
+    //compute omega recurrence
+    std::vector< Int> reorth_mu;
+    if( partialProject){ reorth_mu.reserve( steps); }
+    for( Int i = 0; i <= j; ++i){
+        Real mu = alphaList[i]*nuList[i] - alpha*muList[i];
+        if (i > 0){
+            mu += betaList[i-1]*nuList[i-1];
+        }
+        mu = (mu + CopySign(tau, mu))/beta;
+        if( partialProject && Abs(mu) > tolError){
+            reorth_mu.emplace_back( i);
+        }
+        muList[ i] = mu;
+    }
+    Real maxElt = *std::max_element( muList.begin(), muList.end(), Abs);
+    maxMuList.emplace_back( maxElt);
+    muList.emplace_back( 1);
+    
+    if (reorth_b || reorthMu.size()>0){
+        if( partialProject){
+            for( Int i: reorth_mu){
+                auto ui = U(ALL, IR(i));
+                Axpy( -Dot(ui, u), ui, u);
+                muList[i] = 2*eps;
+                nReorthVecs++;
+            }
+        } else { //full reorthogalization
+             for( Int i = 0; i < j; ++i){
+                auto ui = U(ALL, IR(i));
+                Axpy( -Dot(ui, u), ui, u);
+                muList[i] = eps;
+                nReorthVecs++;
+            }
+        }
+        alpha = Nrm2( u);
+        reorth_b = !reorth_b;
+        nReorth++;
+    }
+    betaList.emplace_back(beta);
+    Scale(Real(1)/beta, u);
+    auto uj = U(ALL,IR(j));
+    uj = u;
 }
 
 
@@ -81,7 +170,6 @@ tsvd(
         auto tau = InfinityNorm(A); //TODO: Impl this?
         DM z( g);
         Zeros(z, 1,1);
-        Int cc = Max((Int)5, nVals);
         Int steps = 5;
         Real initNorm = Nrm2(initial_vec);
         DM v( initialVec);
@@ -112,7 +200,13 @@ tsvd(
         std::vector< Real> maxNuList; 
         maxMuList.reserve(maxIter);
         maxNuList.reserve(maxIter);
-       ///TODO: Implement the rest.
+        ///TODO: Implement the rest.
+        BidiagLanczos( m, n, A, AAdj, numSteps, 
+                       tau, alphaList, betaList, U, V, 
+                       muList, nuList, 
+                       reorthIn, tolError);
+        for( 
+
 
 }
 } //end namespace El

From cd17473a2f4623d876109204477282e84598047d Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <ryan.lewis@ayasdi.com>
Date: Thu, 21 Jan 2016 15:29:31 -0800
Subject: [PATCH 03/13] TSVD initial implementation.

---
 include/El/lapack_like/tsvd.hpp | 295 +++++++++++++++++---------------
 1 file changed, 156 insertions(+), 139 deletions(-)

diff --git a/include/El/lapack_like/tsvd.hpp b/include/El/lapack_like/tsvd.hpp
index b10eaba4ba..ea262e0a07 100644
--- a/include/El/lapack_like/tsvd.hpp
+++ b/include/El/lapack_like/tsvd.hpp
@@ -1,3 +1,7 @@
+/***
+* 
+* TSVD Based off of implementation by Andreas Noack. See: http://github.com/andreasnoack/TSVD.jl
+*/
 #pragma once
 #include <tuple>
 
@@ -6,10 +10,49 @@ namespace El {
 template< typename Real>
 Real CopySign(const Real& x, const Real& y){ return y >= Real(0) ? Abs(x): -Abs(x); }
 
+struct BidiagInfo {
+    Int numVecsReorth;
+}; //struct BidiagInfo
+
+template< typename F>
+struct BidiagCtrl {
+    bool reorthIn=false;
+    Base<F> reorthogTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7)),
+    Base<F> convValTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7)),
+    Base<F> convVecTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.5)),
+    bool partialProject=false;
+}; //end struct BidiagCtrl
+
+template< typename F>
+Int reorth( DistMatrix<F>& Q,
+            Int j,
+            std::vector<Base<F>>& termList,
+            const BidiagCtrl<F>& ctrl,
+            DistMatrix<F>& x,
+            Matrix<Base<F>>& diagList){
+    //TODO: Make batch with Gemv
+    Int numVecsReorth = 0;
+    termList.emplace_back( Real( 1));
+    for( Int i = 0; i < j; ++i){
+        auto vi = Q(ALL, IR(i));
+        Axpy( -Dot(qi, x), qi, x);
+        termList[i] = eps;
+        numVecsReorth++;
+    }
+    alpha = Nrm2( x);
+    diagList.Set(j,0,alpha);
+    Scale(Real(1)/alpha, v);
+    auto qj = Q(ALL,IR(j));
+    qj = x;
+    return numVecsReorth;
+}
+
+
+
 template< typename F, 
           typename ForwardOperator,
           typename AdjointOperator>
-void
+BidiagInfo
 BidiagLanczos(
     Int m,
     Int n, 
@@ -17,34 +60,29 @@ BidiagLanczos(
     const AdjointOperator& AAdj,
     Int steps,
     Base<F> tau,
-    std::vector<Base<F>>& alphaList,
-    std::vector<Base<F>>& betaList,
+    Matrix<Base<F>>& mainDiag,
+    Matrix<Base<F>>& superDiag,
     DistMatrix<F>& U,
     DistMatrix<F>& V,
     //Base<F>& normA, //can be an estimate?
     std::vector<Base<F>>& muList,
     std::vector<Base<F>>& nuList,
-    bool reorthIn,
-    Base<F> tolError,
-    bool partialProject=false){
+    const BidiagCtrl& control){
     typedef Base<F> Real;
     typename DistMatrix<F> DM;
     const Real eps = limits::Epsilon<Real>();
 
-    bool reorthB = reorthIn;
-    Int nReorth = 0;
-    Int nReorthVec = 0;
-    muList.reserve( steps);
-    nuList.reserve( steps);
-    Int iter = alphaList.size()-1;
-    DM u = U(ALL, IR(iter+1));
-    DM v = V(ALL, IR(iter));
-    Real beta = betaList[iter];
-
-    
-    std::vector< Int> reorth_nu;
-    if( partialProject){ reorth_nu.reserve( steps); }
+    bool reorthB = control.reorthIn;
+    Int numVecsReorth = 0;
+    Int iter = mainDiag.size()-1;
+    std::vector< Base<F>> maxMuList, maxNuList;
+    maxMuList.reserve(steps);
+    maxNuList.reserve(steps);
+
+    DM v = V( ALL, IR(iter));
+    DM u = U( ALL, IR(iter+1));
     DM vOld( v);
+    Real beta = superDiag[iter];
     for( Int j = iter+1; j <= iter+steps; ++j)
     {
         vOld = v;
@@ -53,95 +91,50 @@ BidiagLanczos(
         Axpy( -beta, vOld, v);
         Real alpha = Nrm2(v);
         //run omega recurrence
-        reorth_nu.clear();
+        bool foundInaccurate = false;
         for(Int i = 0; i < j; ++i)
         {
-            Real nu = betaList[i]*muList[i+1] + 
-                      alphaList[i]*muList[i] - beta*nuList[i];
+            Real nu = superDiag.Get(i,0)*muList[i+1] + 
+                      mainDiag.Get(i,0)*muList[i] - beta*nuList[i];
             nu = (nu + CopySign(tau, nu))/alpha;
-            if( partialProject && Abs(v) > tolError) { 
-                reorth_nu.emplace_back( i);
-            }
+            foundInaccurate |= (Abs(nu) > ctrl.reorthogTol); 
             nuList[i] = nu;
         }
         Real maxElt = *std::max_element( nuList.begin(), nuList.end(), Abs);
         maxNuList.emplace_back( maxElt);
-    }
-    nuList.emplace_back( Real(1));
-    if (reorth_b || reorthNu.size()>0){
-        if( partialProject){
-            for( Int i: reorth_nu){
-                auto vi = V(ALL, IR(i));
-                Axpy( -Dot(vi, v), vi, v);
-                nuList[i] = 2*eps;
-                nReorthVecs++;
-            }
-        } else { //full reorthogalization
-             for( Int i = 0; i < j; ++i){
-                auto vi = V(ALL, IR(i));
-                Axpy( -Dot(vi, v), vi, v);
-                nuList[i] = eps;
-                nReorthVecs++;
-            }
-        }
-        alpha = Nrm2( v);
-        reorth_b = !reorth_b;
-    }
-    alphaList.emplace_back(alpha);
-    Scale(Real(1)/alpha, v);
-    auto vj = V(ALL,IR(j));
-    vj = v;
-
-    //The u step
-    uOld = u;
-    u = v;
-    // apply the operator
-    A(u);
-    Axpy(-alpha, uOld, u);
-    beta = Nrm2( u);
-
-    //compute omega recurrence
-    std::vector< Int> reorth_mu;
-    if( partialProject){ reorth_mu.reserve( steps); }
-    for( Int i = 0; i <= j; ++i){
-        Real mu = alphaList[i]*nuList[i] - alpha*muList[i];
-        if (i > 0){
-            mu += betaList[i-1]*nuList[i-1];
-        }
-        mu = (mu + CopySign(tau, mu))/beta;
-        if( partialProject && Abs(mu) > tolError){
-            reorth_mu.emplace_back( i);
+        if( reorthB || foundInaccurate ){
+            numVecsReorth += reorth( V, j, nuList, ctrl, v, mainDiag);
         }
-        muList[ i] = mu;
-    }
-    Real maxElt = *std::max_element( muList.begin(), muList.end(), Abs);
-    maxMuList.emplace_back( maxElt);
-    muList.emplace_back( 1);
-    
-    if (reorth_b || reorthMu.size()>0){
-        if( partialProject){
-            for( Int i: reorth_mu){
-                auto ui = U(ALL, IR(i));
-                Axpy( -Dot(ui, u), ui, u);
-                muList[i] = 2*eps;
-                nReorthVecs++;
-            }
-        } else { //full reorthogalization
-             for( Int i = 0; i < j; ++i){
-                auto ui = U(ALL, IR(i));
-                Axpy( -Dot(ui, u), ui, u);
-                muList[i] = eps;
-                nReorthVecs++;
+
+        //The u step
+        uOld = u;
+        u = v;
+        // apply the operator
+        A(u);
+        Axpy(-alpha, uOld, u);
+        beta = Nrm2( u);
+
+        //compute omega recurrence
+        foundInaccurate=false;
+        for( Int i = 0; i <= j; ++i){
+            Real mu = mainDiag.Get(i,0)*nuList[i] - alpha*muList[i];
+            if (i > 0){
+                mu += superDiag.Get(i-1, 0)*nuList[i-1];
             }
+            mu = (mu + CopySign(tau, mu))/beta;
+            foundInaccurate |= (Abs(mu) > ctrl.reorthogTol); 
+            muList[ i] = mu;
+        }
+        Real maxElt = *std::max_element( muList.begin(), muList.end(), Abs);
+        maxMuList.emplace_back( maxElt);
+        muList.emplace_back( 1);
+        if( reorth_b || foundInaccurate){
+            numVecsReorth += reorth( U, j, muList, ctrl, u, superDiag); 
         }
-        alpha = Nrm2( u);
-        reorth_b = !reorth_b;
-        nReorth++;
     }
-    betaList.emplace_back(beta);
-    Scale(Real(1)/beta, u);
-    auto uj = U(ALL,IR(j));
-    uj = u;
+    Bidiag info;
+    info.numVecsReorth = numVecsReorth;
+    return info;
 }
 
 
@@ -158,58 +151,82 @@ tsvd(
       const ForwardOperator& A,
       const AdjointOperator& AAdj,
       Int nVals, 
-      Int maxIter, 
       DistMatrix<F>& initialVec, 
-      Base<F> tolConv=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7)),
-      Base<F> tolError=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7)),
-      ) //requires InfiniteCharacteristic<F> && ForwardOperator::ScalarType is F
+      Int maxIter=Min(1000,Max(m,n)),
+      const BidiagCtrl<F>& ctrl=BidiagCtrl<F>()) //requires InfiniteCharacteristic<F> && ForwardOperator::ScalarType is F
       {
         typedef DistMatrix<F> DM;
+        typedef Matrix<Base<F>> RealMatrix;
         typedef Base<F> Real;
         const Grid& g = initialVec.Grid();
+        //1) Compute nu, a tolerance for reorthoganlization
         auto tau = InfinityNorm(A); //TODO: Impl this?
-        DM z( g);
-        Zeros(z, 1,1);
-        Int steps = 5;
-        Real initNorm = Nrm2(initial_vec);
-        DM v( initialVec);
-        AAdj( v);
-        Scale(Real(1)/initNorm, v);       
-        Real alpha = Nrm2(v);
-        Scale(Real(1)/alpha, v);
-        DM V( n, 1, g);
-        Fill( V, Real(1));
-        //TODO: Make this the right size upfront?
-        DM alphaList( 1, 1, g); //TODO: make this a Matrix<Real>
         
+        Real initNorm = Nrm2(initialVec);
+        Scale(Real(1)/initNorm, initialVec);       
+        DM V( n, maxIter, g);
+        auto v = V(ALL, 0);
+        v = initialVec;
+        AAdj( v); //v = A'initialVec;
+        Real alpha = Nrm2(v); //record the norm, for reorth.
+        Scale(Real(1)/alpha, v); //make v a unit vector
         Real nu = 1 + tau/alpha; //think of nu = 1+\epsilon
-        DM u( v);
-        A( u);
-        DM uOld( initVec); //TODO: similar?
-        Scale(Real(1)/initNorm, initVec);       
-        Axpy(-alpha, uOld, u);
-        Real beta = FrobeniusNorm(u);
-        Scale(Real(1)/beta,u);
-
-        DM U(g);
-        HCat(uOld, u, U);
-        //TODO: Make this the right size upfront
-        DM betaList( 1, 1, g); //TODO: make this a Matrix<Real>
+        
+        DM U(m, maxIter, g);
+        auto u0 = U(ALL, 0);
+        auto u1 = U(ALL, 1);
+        u0 = initialVec;
+        u1 = v;
+        A( u1);
+        Axpy(-alpha, u0, u1);
+        Real beta = Nrm2(u1);
+        Scale(Real(1)/beta,u1);
         Real mu = tau/beta;
-        std::vector< Real> maxMuList; 
-        std::vector< Real> maxNuList; 
-        maxMuList.reserve(maxIter);
-        maxNuList.reserve(maxIter);
-        ///TODO: Implement the rest.
-        BidiagLanczos( m, n, A, AAdj, numSteps, 
-                       tau, alphaList, betaList, U, V, 
-                       muList, nuList, 
-                       reorthIn, tolError);
-        for( 
 
 
+        RealMatrix mainDiag( maxIter+1, 1); //TODO: make this a Matrix<Real>
+        RealMatrix superDiag( maxIter, 1, g); //TODO: make this a Matrix<Real>
+        //TODO: Implement the rest.
+        std::vector<Base<F>> muList, nuList;
+        muList.reserve( maxIter);
+        nuList.reserve( maxIter);
+        muList.push_back( mu);
+        muList.push_back( 1);
+        nuList.push_back( 1);
+       
+        MatrixReal sOld( maxIter+1, 1);
+        Int blockSize = Max(nVals, 50);
+        for(Int i = 0; i < maxIter; i+=blockSize){
+            Int numSteps = Min(blockSize,maxIter-i); 
+            BidiagLanczos
+            ( m, n, A, AAdj, numSteps, 
+              tau, mainDiag, superDiag, U, V, 
+              muList, nuList, ctrl);
+            auto s = mainDiag;
+            auto superDiagCopy = superDiag;
+            MatrixReal VT(i+numSteps,nVals);
+            MatrixReal U(nVals,i+numSteps);
+            lapack::BidiagQRAlg
+            (
+             'U', i+numSteps, nVals, nVals,
+             s.Buffer(), superDiagCopy.Buffer(), 
+             VT.Buffer(), VT.LDim(),
+             U.Buffer(), U.LDim());
+
+            Real beta = superDiag.Get(i+numSteps-1,0);
+            bool converged=true;
+            for(Int j = 0; j < nVals; ++j){
+                if( Abs(s.Get(j,0) - sOld.Get(j,0)) > ctrl.convValTol){
+                    converged = false;
+                }
+                if( beta*Abs(VT.Get(j, i+numSteps)) > ctrl.convVecTol){
+                    converged = false;
+                }
+                if( beta*Abs(U.Get(i+numSteps, j)) > ctrl.convVecTol){
+                    converged=false;
+                }
+            }
+            sOld = s;
+        }
 }
 } //end namespace El
-
-
-

From 8e8792d0b616e0255d0584e242a68d20935ab39b Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <ryan.lewis@ayasdi.com>
Date: Thu, 21 Jan 2016 16:00:11 -0800
Subject: [PATCH 04/13] TSVD initial implementation compiling.

---
 include/El/lapack_like/spectral.hpp           |  1 +
 .../{tsvd.hpp => spectral/TSVD.hpp}           | 38 ++++++++++---------
 2 files changed, 22 insertions(+), 17 deletions(-)
 rename include/El/lapack_like/{tsvd.hpp => spectral/TSVD.hpp} (90%)

diff --git a/include/El/lapack_like/spectral.hpp b/include/El/lapack_like/spectral.hpp
index 5c60d6fcd6..c430f50abf 100644
--- a/include/El/lapack_like/spectral.hpp
+++ b/include/El/lapack_like/spectral.hpp
@@ -1016,5 +1016,6 @@ DistMatrix<Int,VR,STAR> HessenbergSpectralCloud
 
 #include "./spectral/Lanczos.hpp"
 #include "./spectral/ProductLanczos.hpp"
+#include "./spectral/TSVD.hpp"
 
 #endif // ifndef EL_SPECTRAL_HPP
diff --git a/include/El/lapack_like/tsvd.hpp b/include/El/lapack_like/spectral/TSVD.hpp
similarity index 90%
rename from include/El/lapack_like/tsvd.hpp
rename to include/El/lapack_like/spectral/TSVD.hpp
index ea262e0a07..5647d17702 100644
--- a/include/El/lapack_like/tsvd.hpp
+++ b/include/El/lapack_like/spectral/TSVD.hpp
@@ -17,12 +17,13 @@ struct BidiagInfo {
 template< typename F>
 struct BidiagCtrl {
     bool reorthIn=false;
-    Base<F> reorthogTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7)),
-    Base<F> convValTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7)),
-    Base<F> convVecTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.5)),
+    Base<F> reorthogTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7));
+    Base<F> convValTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7));
+    Base<F> convVecTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.5));
     bool partialProject=false;
 }; //end struct BidiagCtrl
 
+//TODO: Make batch with Gemv
 template< typename F>
 Int reorth( DistMatrix<F>& Q,
             Int j,
@@ -30,18 +31,19 @@ Int reorth( DistMatrix<F>& Q,
             const BidiagCtrl<F>& ctrl,
             DistMatrix<F>& x,
             Matrix<Base<F>>& diagList){
-    //TODO: Make batch with Gemv
+    typedef Base<F> Real;
+    const Real eps = limits::Epsilon<Real>();
     Int numVecsReorth = 0;
     termList.emplace_back( Real( 1));
     for( Int i = 0; i < j; ++i){
-        auto vi = Q(ALL, IR(i));
+        auto qi = Q(ALL, IR(i));
         Axpy( -Dot(qi, x), qi, x);
         termList[i] = eps;
         numVecsReorth++;
     }
-    alpha = Nrm2( x);
+    Real alpha = Nrm2( x);
     diagList.Set(j,0,alpha);
-    Scale(Real(1)/alpha, v);
+    Scale(Real(1)/alpha, x);
     auto qj = Q(ALL,IR(j));
     qj = x;
     return numVecsReorth;
@@ -67,12 +69,12 @@ BidiagLanczos(
     //Base<F>& normA, //can be an estimate?
     std::vector<Base<F>>& muList,
     std::vector<Base<F>>& nuList,
-    const BidiagCtrl& control){
+    const BidiagCtrl<F>& ctrl){
     typedef Base<F> Real;
-    typename DistMatrix<F> DM;
+    typedef DistMatrix<F> DM;
     const Real eps = limits::Epsilon<Real>();
 
-    bool reorthB = control.reorthIn;
+    bool reorthB = ctrl.reorthIn;
     Int numVecsReorth = 0;
     Int iter = mainDiag.size()-1;
     std::vector< Base<F>> maxMuList, maxNuList;
@@ -82,6 +84,7 @@ BidiagLanczos(
     DM v = V( ALL, IR(iter));
     DM u = U( ALL, IR(iter+1));
     DM vOld( v);
+    DM uOld( u);
     Real beta = superDiag[iter];
     for( Int j = iter+1; j <= iter+steps; ++j)
     {
@@ -125,14 +128,14 @@ BidiagLanczos(
             foundInaccurate |= (Abs(mu) > ctrl.reorthogTol); 
             muList[ i] = mu;
         }
-        Real maxElt = *std::max_element( muList.begin(), muList.end(), Abs);
+        maxElt = *std::max_element( muList.begin(), muList.end(), Abs);
         maxMuList.emplace_back( maxElt);
         muList.emplace_back( 1);
-        if( reorth_b || foundInaccurate){
+        if( reorthB || foundInaccurate){
             numVecsReorth += reorth( U, j, muList, ctrl, u, superDiag); 
         }
     }
-    Bidiag info;
+    BidiagInfo info;
     info.numVecsReorth = numVecsReorth;
     return info;
 }
@@ -152,12 +155,13 @@ tsvd(
       const AdjointOperator& AAdj,
       Int nVals, 
       DistMatrix<F>& initialVec, 
-      Int maxIter=Min(1000,Max(m,n)),
+      Int maxIter=Int(1000),
       const BidiagCtrl<F>& ctrl=BidiagCtrl<F>()) //requires InfiniteCharacteristic<F> && ForwardOperator::ScalarType is F
       {
         typedef DistMatrix<F> DM;
         typedef Matrix<Base<F>> RealMatrix;
         typedef Base<F> Real;
+        maxIter = Min(maxIter,Min(m,n));
         const Grid& g = initialVec.Grid();
         //1) Compute nu, a tolerance for reorthoganlization
         auto tau = InfinityNorm(A); //TODO: Impl this?
@@ -194,7 +198,7 @@ tsvd(
         muList.push_back( 1);
         nuList.push_back( 1);
        
-        MatrixReal sOld( maxIter+1, 1);
+        RealMatrix sOld( maxIter+1, 1);
         Int blockSize = Max(nVals, 50);
         for(Int i = 0; i < maxIter; i+=blockSize){
             Int numSteps = Min(blockSize,maxIter-i); 
@@ -204,8 +208,8 @@ tsvd(
               muList, nuList, ctrl);
             auto s = mainDiag;
             auto superDiagCopy = superDiag;
-            MatrixReal VT(i+numSteps,nVals);
-            MatrixReal U(nVals,i+numSteps);
+            RealMatrix VT(i+numSteps,nVals);
+            RealMatrix U(nVals,i+numSteps);
             lapack::BidiagQRAlg
             (
              'U', i+numSteps, nVals, nVals,

From be9754f0ed9c39700de0930ee47205a1cc5b171c Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <ryan.lewis@ayasdi.com>
Date: Fri, 22 Jan 2016 11:26:34 -0800
Subject: [PATCH 05/13] trying to finish things off..

---
 include/El/lapack_like/spectral/TSVD.hpp |  78 +++++++++++-----
 tests/lapack_like/TSVD.cpp               | 108 +++++++++++++++++++++++
 2 files changed, 164 insertions(+), 22 deletions(-)
 create mode 100644 tests/lapack_like/TSVD.cpp

diff --git a/include/El/lapack_like/spectral/TSVD.hpp b/include/El/lapack_like/spectral/TSVD.hpp
index 5647d17702..47332123bb 100644
--- a/include/El/lapack_like/spectral/TSVD.hpp
+++ b/include/El/lapack_like/spectral/TSVD.hpp
@@ -148,13 +148,16 @@ template< typename F,
           typename ForwardOperator,
           typename AdjointOperator>
 inline Int 
-tsvd( 
+TSVD( 
       Int m,
       Int n, 
       const ForwardOperator& A,
       const AdjointOperator& AAdj,
-      Int nVals, 
-      DistMatrix<F>& initialVec, 
+      Int nVals,
+      AbstractDistMatrix<F>& U,
+      AbstractDistMatrix<F>& S,
+      AbstractDistMatrix<F>& V,
+      AbstractDistMatrix<F>& initialVec, //should be of length m 
       Int maxIter=Int(1000),
       const BidiagCtrl<F>& ctrl=BidiagCtrl<F>()) //requires InfiniteCharacteristic<F> && ForwardOperator::ScalarType is F
       {
@@ -168,17 +171,17 @@ tsvd(
         
         Real initNorm = Nrm2(initialVec);
         Scale(Real(1)/initNorm, initialVec);       
-        DM V( n, maxIter, g);
-        auto v = V(ALL, 0);
+        DM Vtilde( n, maxIter, g);
+        auto v = Vtilde(ALL, 0);
         v = initialVec;
         AAdj( v); //v = A'initialVec;
         Real alpha = Nrm2(v); //record the norm, for reorth.
         Scale(Real(1)/alpha, v); //make v a unit vector
         Real nu = 1 + tau/alpha; //think of nu = 1+\epsilon
         
-        DM U(m, maxIter, g);
-        auto u0 = U(ALL, 0);
-        auto u1 = U(ALL, 1);
+        DM Utilde(m, maxIter, g);
+        auto u0 = Utilde(ALL, 0);
+        auto u1 = Utilde(ALL, 1);
         u0 = initialVec;
         u1 = v;
         A( u1);
@@ -188,9 +191,8 @@ tsvd(
         Real mu = tau/beta;
 
 
-        RealMatrix mainDiag( maxIter+1, 1); //TODO: make this a Matrix<Real>
-        RealMatrix superDiag( maxIter, 1, g); //TODO: make this a Matrix<Real>
-        //TODO: Implement the rest.
+        RealMatrix mainDiag( maxIter+1, 1); 
+        RealMatrix superDiag( maxIter, 1, g); 
         std::vector<Base<F>> muList, nuList;
         muList.reserve( maxIter);
         nuList.reserve( maxIter);
@@ -201,21 +203,20 @@ tsvd(
         RealMatrix sOld( maxIter+1, 1);
         Int blockSize = Max(nVals, 50);
         for(Int i = 0; i < maxIter; i+=blockSize){
-            Int numSteps = Min(blockSize,maxIter-i); 
+            Int numSteps = Min( blockSize, maxIter-i); 
             BidiagLanczos
             ( m, n, A, AAdj, numSteps, 
-              tau, mainDiag, superDiag, U, V, 
+              tau, mainDiag, superDiag, Utilde, Vtilde, 
               muList, nuList, ctrl);
             auto s = mainDiag;
             auto superDiagCopy = superDiag;
-            RealMatrix VT(i+numSteps,nVals);
-            RealMatrix U(nVals,i+numSteps);
+            RealMatrix VTHat(i+numSteps,nVals);
+            RealMatrix UHat(nVals,i+numSteps);
             lapack::BidiagQRAlg
-            (
-             'U', i+numSteps, nVals, nVals,
-             s.Buffer(), superDiagCopy.Buffer(), 
-             VT.Buffer(), VT.LDim(),
-             U.Buffer(), U.LDim());
+            ( 'U', i+numSteps, nVals, nVals,
+              s.Buffer(), superDiagCopy.Buffer(), 
+              VTHat.Buffer(), VTHat.LDim(),
+              UHat.Buffer(), UHat.LDim());
 
             Real beta = superDiag.Get(i+numSteps-1,0);
             bool converged=true;
@@ -223,14 +224,47 @@ tsvd(
                 if( Abs(s.Get(j,0) - sOld.Get(j,0)) > ctrl.convValTol){
                     converged = false;
                 }
-                if( beta*Abs(VT.Get(j, i+numSteps)) > ctrl.convVecTol){
+                if( beta*Abs(VTHat.Get(j, i+numSteps)) > ctrl.convVecTol){
                     converged = false;
                 }
-                if( beta*Abs(U.Get(i+numSteps, j)) > ctrl.convVecTol){
+                if( beta*Abs(UHat.Get(i+numSteps, j)) > ctrl.convVecTol){
                     converged=false;
                 }
             }
+            if( converged){ 
+                El::Gemm(El::NORMAL, El::ADJOINT, Real(1), Vtilde, VTHat, Real(0), V);
+                El::Gemm(El::NORMAL,  El::NORMAL, Real(1), Utilde,  UHat, Real(0), U);
+                DistMatrix<F,STAR,STAR> S_STAR_STAR( S.Grid());
+                S_STAR_STAR.LockedAttach( S.Height(), S.Width(), S.Buffer(), S.LDim() );
+                Copy(S_STAR_STAR, S);
+                return i+numSteps; 
+            }
             sOld = s;
         }
 }
+
+/**
+* TSVD don't provide initial guess.
+*/
+template< typename F, 
+          typename ForwardOperator,
+          typename AdjointOperator>
+inline Int 
+TSVD( 
+      Int m,
+      Int n, 
+      const ForwardOperator& A,
+      const AdjointOperator& AAdj,
+      Int nVals,
+      AbstractDistMatrix<F>& U,
+      AbstractDistMatrix<F>& S,
+      AbstractDistMatrix<F>& V,
+      Int maxIter=Int(1000),
+      const BidiagCtrl<F>& ctrl=BidiagCtrl<F>()) {
+    DistMatrix<F> initialVec(U.Grid());
+    F center = rand();
+    Uniform( initialVec, m, 1, center);
+    return TSVD(m ,n, A, AAdj, nVals, U,S,V, initialVec, maxIter, ctrl);
+}
+
 } //end namespace El
diff --git a/tests/lapack_like/TSVD.cpp b/tests/lapack_like/TSVD.cpp
new file mode 100644
index 0000000000..81f9839784
--- /dev/null
+++ b/tests/lapack_like/TSVD.cpp
@@ -0,0 +1,108 @@
+/*
+   Copyright (c) 2009-2015, Jack Poulson
+   All rights reserved.
+
+   This file is part of Elemental and is under the BSD 2-Clause License, 
+   which can be found in the LICENSE file in the root directory, or at 
+   http://opensource.org/licenses/BSD-2-Clause
+*/
+#include "El.hpp"
+using namespace El;
+
+template< typename Matrix>
+class AOp {
+
+    public:
+    AOp( const Matrix& A_, Orientation o_ = El::NORMAL) : A( A_), o(o_) {}
+    
+    template< typename V>
+    void operator()( V& v) const {
+        V w;
+        Gemv(o, 1.0, A, v, w);
+        v = w;
+    }
+
+    Int Height() const {
+        if( o == El::NORMAL){ return A.Height(); }
+        return A.Width();
+    }
+
+    Int Width() const {
+        if( o == El::NORMAL){ return A.Width(); }
+        return A.Height();
+    }
+
+    const Matrix& A;
+    const Orientation o = El::NORMAL;
+}; //Matrix Operator
+
+
+template<typename F>
+void TestTSVD( const DistMatrix<F>& A, Int k=3){
+    typedef Base<F> Real;
+    const auto& g = A.Grid();
+    int m = A.Height();
+    int n = A.Width();
+    DistMatrix<F> Uk( g), Sk( g), Vk( g);
+    DistMatrix<F> U( g), V( g);
+    DistMatrix<Base<F>, VR, STAR> S( g);
+    if( g.Rank() == 0 )
+        Output("  Starting TSVD factorization...");
+    const double startTime = mpi::Time();
+    typedef AOp<DistMatrix<F>> Operator; 
+    Operator Aop( A);
+    Operator AAdjop( A, El::ADJOINT);
+    TSVD( m, n, Aop, AAdjop, k, Uk, Sk, Vk);
+    const double runTime = mpi::Time() - startTime;
+    Output("TSVD Time = ",runTime);
+    mpi::Barrier( g.Comm() );
+    if( g.Rank() == 0 )
+        Output("  Starting SVD factorization..."); 
+    SVD( A, U, S, V);  
+    mpi::Barrier( g.Comm() );
+    for( Int i = 0; i < k; ++i){
+        std::cout <<  i << ": ";
+        const Real& A = Sk.Get(i,0);
+        const Real& B = S.Get(i,0);
+        std::cout << S.Get(i,0) << std::endl;
+        //std::cout << Norm(Abs(Uk( ALL, IR(i))) - Abs(U( ALL, IR(i)))) << " ";
+        //std::cout << Norm(Abs(Vk( ALL, IR(i))) - Abs(V( ALL, IR(i)))) << " ";
+    }
+}
+
+int 
+main( int argc, char* argv[] )
+{
+    Environment env( argc, argv );
+    mpi::Comm comm = mpi::COMM_WORLD;
+    const Int commRank = mpi::Rank( comm );
+
+    try
+    {
+        const bool colMajor = Input("--colMajor","column-major ordering?",true);
+        const Int m = Input("--height","height of matrix",100);
+        const Int n = Input("--width","width of matrix",100);
+        const Int nb = Input("--nb","algorithmic blocksize",96);
+        const bool testCorrectness = Input
+            ("--correctness","test correctness?",true);
+        const bool print = Input("--print","print matrices?",false);
+        ProcessInput();
+        PrintInputReport();
+
+        const GridOrder order = ( colMajor ? COLUMN_MAJOR : ROW_MAJOR );
+        const Grid g( comm, order );
+        SetBlocksize( nb );
+        ComplainIfDebug();
+        if( commRank == 0 )
+            Output("Will test TSQR");
+
+        if( commRank == 0 )
+            Output("Testing with doubles:");
+        DistMatrix<double> A;
+        Laplacian(A, m, n);
+        TestTSVD( A, 3);
+    }
+    catch( exception& e ) { ReportException(e); }
+
+    return 0;
+}

From 66aab5d7dc2bd16a9f7c41da9b1f90346826b0f1 Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <me@ryanlewis.net>
Date: Mon, 22 Aug 2016 21:44:23 -0700
Subject: [PATCH 06/13] fix up tests and header.

---
 include/El/lapack_like/spectral/TSVD.hpp |  7 +++++--
 tests/lapack_like/TSVD.cpp               | 16 +++++++++++++---
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/include/El/lapack_like/spectral/TSVD.hpp b/include/El/lapack_like/spectral/TSVD.hpp
index 47332123bb..b1ff669393 100644
--- a/include/El/lapack_like/spectral/TSVD.hpp
+++ b/include/El/lapack_like/spectral/TSVD.hpp
@@ -2,8 +2,10 @@
 * 
 * TSVD Based off of implementation by Andreas Noack. See: http://github.com/andreasnoack/TSVD.jl
 */
-#pragma once
+#ifndef EL_LAPACKLIKE_SPECTRAL_TSVD_HPP
+#define EL_LAPACKLIKE_SPECTRAL_TSVD_HPP
 #include <tuple>
+#include <El/blas_like/level3.hpp>
 
 namespace El {
 
@@ -212,7 +214,7 @@ TSVD(
             auto superDiagCopy = superDiag;
             RealMatrix VTHat(i+numSteps,nVals);
             RealMatrix UHat(nVals,i+numSteps);
-            lapack::BidiagQRAlg
+            lapack::BidiagSVDQRAlg
             ( 'U', i+numSteps, nVals, nVals,
               s.Buffer(), superDiagCopy.Buffer(), 
               VTHat.Buffer(), VTHat.LDim(),
@@ -268,3 +270,4 @@ TSVD(
 }
 
 } //end namespace El
+#endif //ifndef EL_LAPACKLIKE_SPECTRAL_TSVD
diff --git a/tests/lapack_like/TSVD.cpp b/tests/lapack_like/TSVD.cpp
index 81f9839784..56124df901 100644
--- a/tests/lapack_like/TSVD.cpp
+++ b/tests/lapack_like/TSVD.cpp
@@ -64,9 +64,19 @@ void TestTSVD( const DistMatrix<F>& A, Int k=3){
         std::cout <<  i << ": ";
         const Real& A = Sk.Get(i,0);
         const Real& B = S.Get(i,0);
-        std::cout << S.Get(i,0) << std::endl;
-        //std::cout << Norm(Abs(Uk( ALL, IR(i))) - Abs(U( ALL, IR(i)))) << " ";
-        //std::cout << Norm(Abs(Vk( ALL, IR(i))) - Abs(V( ALL, IR(i)))) << " ";
+        Output( "Sk = ", A, "SVD: S_k", B);
+	auto eigenvalue_error = A - B;
+        auto left_svec_error = Norm(Abs(Uk( ALL, IR(i))) - Abs(U( ALL, IR(i)))); 
+        auto right_svec_error = Norm(Abs(Vk( ALL, IR(i))) - Abs(V( ALL, IR(i)))); 
+        if( eigenvalue_error > limits<Real>::Epsilon()){
+		RuntimeError("Convergence Issue in TSVD.");
+	}
+	if( left_svec_error > limits<Real>::Epsilon()){
+		RuntimeError("Convergence Issue in TSVD.");
+	}
+	if( right_svec_error > limits<Real>::Epsilon()){
+		RuntimeError("Convergence Issue in TSVD.");
+	}
     }
 }
 

From ac943c3db55c573d582405800a9c1c52d105ea5f Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <me@ryanlewis.net>
Date: Fri, 17 Feb 2017 11:01:06 -0800
Subject: [PATCH 07/13] added Abs and need to fix Abs for vectors still.

---
 tests/lapack_like/TSVD.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/lapack_like/TSVD.cpp b/tests/lapack_like/TSVD.cpp
index 56124df901..3d4a6c494d 100644
--- a/tests/lapack_like/TSVD.cpp
+++ b/tests/lapack_like/TSVD.cpp
@@ -65,18 +65,18 @@ void TestTSVD( const DistMatrix<F>& A, Int k=3){
         const Real& A = Sk.Get(i,0);
         const Real& B = S.Get(i,0);
         Output( "Sk = ", A, "SVD: S_k", B);
-	auto eigenvalue_error = A - B;
-        auto left_svec_error = Norm(Abs(Uk( ALL, IR(i))) - Abs(U( ALL, IR(i)))); 
-        auto right_svec_error = Norm(Abs(Vk( ALL, IR(i))) - Abs(V( ALL, IR(i)))); 
+	       auto eigenvalue_error = Abs(A - B);
         if( eigenvalue_error > limits<Real>::Epsilon()){
-		RuntimeError("Convergence Issue in TSVD.");
-	}
-	if( left_svec_error > limits<Real>::Epsilon()){
-		RuntimeError("Convergence Issue in TSVD.");
-	}
-	if( right_svec_error > limits<Real>::Epsilon()){
-		RuntimeError("Convergence Issue in TSVD.");
-	}
+		          RuntimeError("Convergence Issue in TSVD.");
+	       }
+        auto left_svec_error = Norm(Abs(Uk( ALL, IR(i))) - Abs(U( ALL, IR(i))));
+	       if( left_svec_error > limits<Real>::Epsilon()){
+		          RuntimeError("Convergence Issue in TSVD.");
+	       }
+        auto right_svec_error = Norm(Abs(Vk( ALL, IR(i))) - Abs(V( ALL, IR(i))));
+	       if( right_svec_error > limits<Real>::Epsilon()){
+		          RuntimeError("Convergence Issue in TSVD.");
+	       }
     }
 }
 

From 4fd1ec7dfc7310351cba3a1a4b9747130d2e2374 Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <me@ryanlewis.net>
Date: Fri, 17 Feb 2017 11:29:42 -0800
Subject: [PATCH 08/13] Patch CMake to not build every commit. Stolen from BFGS
 branch. Fix test more.

---
 CMakeLists.txt             | 30 +++++++++---------------------
 tests/lapack_like/TSVD.cpp | 12 ++++++++++--
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8a6d8951a8..22df7a284b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -265,29 +265,17 @@ if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR)
   message(FATAL_ERROR "In-source build attempted; please clean the CMake cache and then switch to an out-of-source build, e.g.,\nrm CMakeCache.txt && rm -Rf CMakeFiles/\nmkdir build/ && cd build/ && cmake ..")
 endif()
 
-# Get the Git revision
-include(GetGitRevisionDescription)
-get_git_head_revision(GIT_REFSPEC GIT_SHA1)
-
-# Ensure that the build type is set to either Release or Debug
-if(CMAKE_BUILD_TYPE STREQUAL "Release")
-  # This option is okay as-is
-  set(CMAKE_BUILD_TYPE Release)
-elseif(CMAKE_BUILD_TYPE STREQUAL "Debug")
-  # This option is okay as-is
-  set(CMAKE_BUILD_TYPE Debug)
-elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
-  message(WARNING "RelWithDebInfo not supported; switching to Release")
-  set(CMAKE_BUILD_TYPE Release)
-elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
-  message(WARNING "MinSizeRel not supported; switching to Release")
-  set(CMAKE_BUILD_TYPE Release)
-else()
-  message(WARNING "Build mode not specified, defaulting to Release build.")
-  set(CMAKE_BUILD_TYPE Release)
+if (NOT CMAKE_BUILD_TYPE)
+   set( CMAKE_BUILD_TYPE Release)
+endif()
+
+set(ACCEPTED_BUILD_TYPES Release Debug RelWithDebInfo MinSizeRel)
+list(FIND ACCEPTED_BUILD_TYPES ${CMAKE_BUILD_TYPE} IS_BUILD_TYPE_ACCEPTED)
+if(${IS_BUILD_TYPE_ACCEPTED} EQUAL -1)
+  message(FATAL_ERROR "CMAKE_BUILD_TYPE of ${CMAKE_BUILD_TYPE} is not accepted.")
 endif()
 
-if(CMAKE_BUILD_TYPE STREQUAL "Release")
+if(CMAKE_BUILD_TYPE STREQUAL "Release" OR CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
   set(EL_RELEASE TRUE)
 else()
   set(EL_RELEASE FALSE)
diff --git a/tests/lapack_like/TSVD.cpp b/tests/lapack_like/TSVD.cpp
index 3d4a6c494d..3881ba8f1a 100644
--- a/tests/lapack_like/TSVD.cpp
+++ b/tests/lapack_like/TSVD.cpp
@@ -69,11 +69,19 @@ void TestTSVD( const DistMatrix<F>& A, Int k=3){
         if( eigenvalue_error > limits<Real>::Epsilon()){
 		          RuntimeError("Convergence Issue in TSVD.");
 	       }
-        auto left_svec_error = Norm(Abs(Uk( ALL, IR(i))) - Abs(U( ALL, IR(i))));
+	       F left_svec_error = F(0);
+        F right_svec_error = F(0);
+        for (int j = 0; j <= m; ++j){
+	         auto t = Abs(Uk.Get( j, i)) - Abs(U.Get( j, i));
+	         left_svec_error += t*t;
+          auto s = Abs(Vk.Get( j, i)) - Abs(V.Get( j, i));
+	         right_svec_error += s*s;
+        }
+        left_svec_error = Sqrt(left_svec_error);
+        right_svec_error = Sqrt(right_svec_error);
 	       if( left_svec_error > limits<Real>::Epsilon()){
 		          RuntimeError("Convergence Issue in TSVD.");
 	       }
-        auto right_svec_error = Norm(Abs(Vk( ALL, IR(i))) - Abs(V( ALL, IR(i))));
 	       if( right_svec_error > limits<Real>::Epsilon()){
 		          RuntimeError("Convergence Issue in TSVD.");
 	       }

From c7ab140d7f805275a311629c2e419c0c32e59a4a Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <me@ryanlewis.net>
Date: Fri, 17 Feb 2017 11:37:46 -0800
Subject: [PATCH 09/13] fix the Clang builds.

---
 travis/install-mpi.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/travis/install-mpi.sh b/travis/install-mpi.sh
index 0d3513c141..31532abd2a 100644
--- a/travis/install-mpi.sh
+++ b/travis/install-mpi.sh
@@ -2,7 +2,7 @@
 set -e
 case $1 in
   mpich) set -x;
-    sudo apt-get install -q mpich libmpich-dev;;
+    sudo apt-get install -q --force-yes mpich libmpich-dev;;
   openmpi) set -x;
     sudo apt-get install openmpi-bin openmpi-dev;;
   *)

From 701489a8fa6626d385e7a34d235393e8a8a01330 Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <me@ryanlewis.net>
Date: Fri, 17 Feb 2017 12:42:42 -0800
Subject: [PATCH 10/13] fix up some bugs.

---
 include/El/lapack_like/spectral/TSVD.hpp | 11 +++++------
 tests/lapack_like/TSVD.cpp               | 12 +++++-------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/include/El/lapack_like/spectral/TSVD.hpp b/include/El/lapack_like/spectral/TSVD.hpp
index b1ff669393..26c470ffac 100644
--- a/include/El/lapack_like/spectral/TSVD.hpp
+++ b/include/El/lapack_like/spectral/TSVD.hpp
@@ -142,7 +142,6 @@ BidiagLanczos(
     return info;
 }
 
-
 /**
 * TSVD
 */
@@ -169,7 +168,7 @@ TSVD(
         maxIter = Min(maxIter,Min(m,n));
         const Grid& g = initialVec.Grid();
         //1) Compute nu, a tolerance for reorthoganlization
-        auto tau = InfinityNorm(A); //TODO: Impl this?
+        auto tau = InfinityNorm<F>(A, AAdj, g); 
         
         Real initNorm = Nrm2(initialVec);
         Scale(Real(1)/initNorm, initialVec);       
@@ -194,7 +193,7 @@ TSVD(
 
 
         RealMatrix mainDiag( maxIter+1, 1); 
-        RealMatrix superDiag( maxIter, 1, g); 
+        RealMatrix superDiag( maxIter, 1);
         std::vector<Base<F>> muList, nuList;
         muList.reserve( maxIter);
         nuList.reserve( maxIter);
@@ -212,8 +211,8 @@ TSVD(
               muList, nuList, ctrl);
             auto s = mainDiag;
             auto superDiagCopy = superDiag;
-            RealMatrix VTHat(i+numSteps,nVals);
-            RealMatrix UHat(nVals,i+numSteps);
+            DM VTHat(i+numSteps,nVals, g);
+            DM UHat(nVals,i+numSteps, g);
             lapack::BidiagSVDQRAlg
             ( 'U', i+numSteps, nVals, nVals,
               s.Buffer(), superDiagCopy.Buffer(), 
@@ -237,7 +236,7 @@ TSVD(
                 El::Gemm(El::NORMAL, El::ADJOINT, Real(1), Vtilde, VTHat, Real(0), V);
                 El::Gemm(El::NORMAL,  El::NORMAL, Real(1), Utilde,  UHat, Real(0), U);
                 DistMatrix<F,STAR,STAR> S_STAR_STAR( S.Grid());
-                S_STAR_STAR.LockedAttach( S.Height(), S.Width(), S.Buffer(), S.LDim() );
+                S_STAR_STAR.LockedAttach( S.Height(), S.Width(), S.Grid(), S.ColAlign(), S.RowAlign(), S.Buffer(), S.LDim(), S.Root());
                 Copy(S_STAR_STAR, S);
                 return i+numSteps; 
             }
diff --git a/tests/lapack_like/TSVD.cpp b/tests/lapack_like/TSVD.cpp
index 3881ba8f1a..29bc0d0a76 100644
--- a/tests/lapack_like/TSVD.cpp
+++ b/tests/lapack_like/TSVD.cpp
@@ -60,14 +60,15 @@ void TestTSVD( const DistMatrix<F>& A, Int k=3){
         Output("  Starting SVD factorization..."); 
     SVD( A, U, S, V);  
     mpi::Barrier( g.Comm() );
+    Real eps = limits::Epsilon<Real>();
     for( Int i = 0; i < k; ++i){
         std::cout <<  i << ": ";
         const Real& A = Sk.Get(i,0);
         const Real& B = S.Get(i,0);
         Output( "Sk = ", A, "SVD: S_k", B);
 	       auto eigenvalue_error = Abs(A - B);
-        if( eigenvalue_error > limits<Real>::Epsilon()){
-		          RuntimeError("Convergence Issue in TSVD.");
+        if( eigenvalue_error > eps){
+		          RuntimeError("Convergence Issue in TSVD. Eigenvalues are not close enough.");
 	       }
 	       F left_svec_error = F(0);
         F right_svec_error = F(0);
@@ -79,11 +80,8 @@ void TestTSVD( const DistMatrix<F>& A, Int k=3){
         }
         left_svec_error = Sqrt(left_svec_error);
         right_svec_error = Sqrt(right_svec_error);
-	       if( left_svec_error > limits<Real>::Epsilon()){
-		          RuntimeError("Convergence Issue in TSVD.");
-	       }
-	       if( right_svec_error > limits<Real>::Epsilon()){
-		          RuntimeError("Convergence Issue in TSVD.");
+        if( left_svec_error > eps || right_svec_error > eps){
+		          RuntimeError("Convergence Issue in TSVD. Eigenvector Entries do not match.");
 	       }
     }
 }

From d34f0c676c4d821697801d86d4e92f0a69c245ef Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <me@ryanlewis.net>
Date: Fri, 17 Feb 2017 14:45:39 -0800
Subject: [PATCH 11/13] temporarily remote TSVD from spectral header to make
 debugging faster.

---
 include/El/lapack_like/spectral.hpp      |   2 +-
 include/El/lapack_like/spectral/TSVD.hpp | 138 +++++++++++++----------
 tests/lapack_like/TSVD.cpp               |  18 +--
 3 files changed, 90 insertions(+), 68 deletions(-)

diff --git a/include/El/lapack_like/spectral.hpp b/include/El/lapack_like/spectral.hpp
index 56fbcb965b..1d1951b9a4 100644
--- a/include/El/lapack_like/spectral.hpp
+++ b/include/El/lapack_like/spectral.hpp
@@ -2012,7 +2012,7 @@ DistMatrix<Int,VR,STAR> HessenbergSpectralCloud
 #include <El/lapack_like/spectral/Schur.hpp>
 #include <El/lapack_like/spectral/HermitianEig.hpp>
 #include <El/lapack_like/spectral/SVD.hpp>
-#include <El/lapack_like/spectral/TSVD.hpp>
+//#include <El/lapack_like/spectral/TSVD.hpp>
 #include <El/lapack_like/spectral/Lanczos.hpp>
 #include <El/lapack_like/spectral/ProductLanczos.hpp>
 
diff --git a/include/El/lapack_like/spectral/TSVD.hpp b/include/El/lapack_like/spectral/TSVD.hpp
index 26c470ffac..0fca7414f2 100644
--- a/include/El/lapack_like/spectral/TSVD.hpp
+++ b/include/El/lapack_like/spectral/TSVD.hpp
@@ -58,12 +58,13 @@ template< typename F,
           typename AdjointOperator>
 BidiagInfo
 BidiagLanczos(
+    Int iter,
     Int m,
     Int n, 
     const ForwardOperator& A,
     const AdjointOperator& AAdj,
     Int steps,
-    Base<F> tau,
+    Base<F>& tau,
     Matrix<Base<F>>& mainDiag,
     Matrix<Base<F>>& superDiag,
     DistMatrix<F>& U,
@@ -78,7 +79,6 @@ BidiagLanczos(
 
     bool reorthB = ctrl.reorthIn;
     Int numVecsReorth = 0;
-    Int iter = mainDiag.size()-1;
     std::vector< Base<F>> maxMuList, maxNuList;
     maxMuList.reserve(steps);
     maxNuList.reserve(steps);
@@ -87,14 +87,18 @@ BidiagLanczos(
     DM u = U( ALL, IR(iter+1));
     DM vOld( v);
     DM uOld( u);
-    Real beta = superDiag[iter];
-    for( Int j = iter+1; j <= iter+steps; ++j)
+    Real beta = superDiag.Get(iter,0);
+    auto absComp = [](const Real& a, const Real& b){ return Abs(a) < Abs(b);  };
+    for( Int j = iter+1; j < iter+steps; ++j)
     {
         vOld = v;
         v = u;
         AAdj( v);
         Axpy( -beta, vOld, v);
         Real alpha = Nrm2(v);
+
+	       tau = Max(tau, eps*(alpha+beta));
+
         //run omega recurrence
         bool foundInaccurate = false;
         for(Int i = 0; i < j; ++i)
@@ -105,8 +109,10 @@ BidiagLanczos(
             foundInaccurate |= (Abs(nu) > ctrl.reorthogTol); 
             nuList[i] = nu;
         }
-        Real maxElt = *std::max_element( nuList.begin(), nuList.end(), Abs);
+        Real maxElt = *std::max_element( nuList.begin(), nuList.end(), absComp);
         maxNuList.emplace_back( maxElt);
+        
+        //Reorthogonalize if necessary.
         if( reorthB || foundInaccurate ){
             numVecsReorth += reorth( V, j, nuList, ctrl, v, mainDiag);
         }
@@ -118,7 +124,7 @@ BidiagLanczos(
         A(u);
         Axpy(-alpha, uOld, u);
         beta = Nrm2( u);
-
+        tau = Max(tau, eps*(alpha+beta));
         //compute omega recurrence
         foundInaccurate=false;
         for( Int i = 0; i <= j; ++i){
@@ -130,7 +136,7 @@ BidiagLanczos(
             foundInaccurate |= (Abs(mu) > ctrl.reorthogTol); 
             muList[ i] = mu;
         }
-        maxElt = *std::max_element( muList.begin(), muList.end(), Abs);
+        maxElt = *std::max_element( muList.begin(), muList.end(), absComp);
         maxMuList.emplace_back( maxElt);
         muList.emplace_back( 1);
         if( reorthB || foundInaccurate){
@@ -165,10 +171,10 @@ TSVD(
         typedef DistMatrix<F> DM;
         typedef Matrix<Base<F>> RealMatrix;
         typedef Base<F> Real;
+    	
+        const Real eps = limits::Epsilon<Real>();
         maxIter = Min(maxIter,Min(m,n));
         const Grid& g = initialVec.Grid();
-        //1) Compute nu, a tolerance for reorthoganlization
-        auto tau = InfinityNorm<F>(A, AAdj, g); 
         
         Real initNorm = Nrm2(initialVec);
         Scale(Real(1)/initNorm, initialVec);       
@@ -178,7 +184,6 @@ TSVD(
         AAdj( v); //v = A'initialVec;
         Real alpha = Nrm2(v); //record the norm, for reorth.
         Scale(Real(1)/alpha, v); //make v a unit vector
-        Real nu = 1 + tau/alpha; //think of nu = 1+\epsilon
         
         DM Utilde(m, maxIter, g);
         auto u0 = Utilde(ALL, 0);
@@ -189,59 +194,72 @@ TSVD(
         Axpy(-alpha, u0, u1);
         Real beta = Nrm2(u1);
         Scale(Real(1)/beta,u1);
-        Real mu = tau/beta;
+        
+       	auto tau = eps*(alpha+beta);
+	
+       //1) Compute nu, a tolerance for reorthoganlization
+       Real nu = 1 + tau/alpha; //think of nu = 1+\epsilon
+       Real mu = tau/beta;
 
 
-        RealMatrix mainDiag( maxIter+1, 1); 
-        RealMatrix superDiag( maxIter, 1);
-        std::vector<Base<F>> muList, nuList;
-        muList.reserve( maxIter);
-        nuList.reserve( maxIter);
-        muList.push_back( mu);
-        muList.push_back( 1);
-        nuList.push_back( 1);
+       RealMatrix mainDiag( maxIter+1, 1);
+       RealMatrix superDiag( maxIter, 1);
+       std::vector<Base<F>> muList, nuList;
+       muList.reserve( maxIter);
+       nuList.reserve( maxIter);
+       muList.push_back( mu);
+       muList.push_back( 1);
+       nuList.push_back( 1);
        
-        RealMatrix sOld( maxIter+1, 1);
-        Int blockSize = Max(nVals, 50);
-        for(Int i = 0; i < maxIter; i+=blockSize){
-            Int numSteps = Min( blockSize, maxIter-i); 
-            BidiagLanczos
-            ( m, n, A, AAdj, numSteps, 
-              tau, mainDiag, superDiag, Utilde, Vtilde, 
-              muList, nuList, ctrl);
-            auto s = mainDiag;
-            auto superDiagCopy = superDiag;
-            DM VTHat(i+numSteps,nVals, g);
-            DM UHat(nVals,i+numSteps, g);
-            lapack::BidiagSVDQRAlg
-            ( 'U', i+numSteps, nVals, nVals,
-              s.Buffer(), superDiagCopy.Buffer(), 
-              VTHat.Buffer(), VTHat.LDim(),
-              UHat.Buffer(), UHat.LDim());
-
-            Real beta = superDiag.Get(i+numSteps-1,0);
-            bool converged=true;
-            for(Int j = 0; j < nVals; ++j){
-                if( Abs(s.Get(j,0) - sOld.Get(j,0)) > ctrl.convValTol){
-                    converged = false;
-                }
-                if( beta*Abs(VTHat.Get(j, i+numSteps)) > ctrl.convVecTol){
-                    converged = false;
-                }
-                if( beta*Abs(UHat.Get(i+numSteps, j)) > ctrl.convVecTol){
-                    converged=false;
-                }
-            }
-            if( converged){ 
-                El::Gemm(El::NORMAL, El::ADJOINT, Real(1), Vtilde, VTHat, Real(0), V);
-                El::Gemm(El::NORMAL,  El::NORMAL, Real(1), Utilde,  UHat, Real(0), U);
-                DistMatrix<F,STAR,STAR> S_STAR_STAR( S.Grid());
-                S_STAR_STAR.LockedAttach( S.Height(), S.Width(), S.Grid(), S.ColAlign(), S.RowAlign(), S.Buffer(), S.LDim(), S.Root());
-                Copy(S_STAR_STAR, S);
-                return i+numSteps; 
-            }
-            sOld = s;
-        }
+       RealMatrix sOld( maxIter+1, 1);
+       Int blockSize = Max(nVals, 50);
+       for(Int i = 0; i < maxIter; i+=blockSize){
+           std::cout << "iter " << i << ": ";
+           Int numSteps = Min( blockSize, maxIter-i);
+           BidiagLanczos( i, m, n, A, AAdj, numSteps, tau, mainDiag, superDiag, Utilde, Vtilde, muList, nuList, ctrl);
+           auto s = mainDiag;
+           auto superDiagCopy = superDiag;
+           DM VTHat(i+numSteps,nVals, g);
+           DM UHat(nVals,i+numSteps, g);
+           lapack::BidiagSVDQRAlg
+           ( 'U', i+numSteps, nVals, nVals,
+             s.Buffer(), superDiagCopy.Buffer(),
+             VTHat.Buffer(), VTHat.LDim(),
+             UHat.Buffer(), UHat.LDim());
+           for(Int j = 0; j < nVals; ++j){
+             std::cout << s.Get(j,0) << " " << std::flush;
+           }
+           std::cout << std::endl << std::endl;
+           Real beta = superDiag.Get(i+numSteps-1,0);
+           bool converged=true;
+           for(Int j = 0; j < nVals; ++j){
+               if( Abs(s.Get(j,0) - sOld.Get(j,0)) > ctrl.convValTol){
+                   converged = false;
+                   break;
+               }
+               if( beta*Abs(VTHat.Get(j, i+numSteps)) > ctrl.convVecTol){
+                   converged = false;
+                   break;
+               }
+               if( beta*Abs(UHat.Get(i+numSteps, j)) > ctrl.convVecTol) {
+                 converged = false;
+                 break;
+               }
+           }
+           if( converged){
+               std::cout << "converged!" << std::endl;
+               El::Gemm(El::NORMAL, El::ADJOINT, Real(1), Vtilde, VTHat, Real(0), V);
+               El::Gemm(El::NORMAL,  El::NORMAL, Real(1), Utilde,  UHat, Real(0), U);
+               std::cout << "copying s" << std::endl;
+               DistMatrix<F,STAR,STAR> S_STAR_STAR( S.Grid());
+               S_STAR_STAR.LockedAttach( S.Height(), S.Width(), S.Grid(), S.ColAlign(), S.RowAlign(), s.Buffer(), s.LDim(), S.Root());
+               Copy(S_STAR_STAR, S);
+               return i+numSteps;
+           }
+           sOld = s;
+	          tau = eps*s.Get(0,0);
+       }
+	return Int(-1);
 }
 
 /**
diff --git a/tests/lapack_like/TSVD.cpp b/tests/lapack_like/TSVD.cpp
index 29bc0d0a76..896acb211a 100644
--- a/tests/lapack_like/TSVD.cpp
+++ b/tests/lapack_like/TSVD.cpp
@@ -7,6 +7,7 @@
    http://opensource.org/licenses/BSD-2-Clause
 */
 #include "El.hpp"
+#include <El/lapack_like/spectral/TSVD.hpp>
 using namespace El;
 
 template< typename Matrix>
@@ -46,6 +47,14 @@ void TestTSVD( const DistMatrix<F>& A, Int k=3){
     DistMatrix<F> Uk( g), Sk( g), Vk( g);
     DistMatrix<F> U( g), V( g);
     DistMatrix<Base<F>, VR, STAR> S( g);
+    mpi::Barrier( g.Comm() );
+    if( g.Rank() == 0 )
+      Output("  Starting SVD factorization...");
+    SVD( A, U, S, V);
+    mpi::Barrier( g.Comm() );
+    std::cout << "Singular Values: " << std::flush;
+    Display(S, "Singular Values");
+    std::cout << std::endl;
     if( g.Rank() == 0 )
         Output("  Starting TSVD factorization...");
     const double startTime = mpi::Time();
@@ -55,11 +64,6 @@ void TestTSVD( const DistMatrix<F>& A, Int k=3){
     TSVD( m, n, Aop, AAdjop, k, Uk, Sk, Vk);
     const double runTime = mpi::Time() - startTime;
     Output("TSVD Time = ",runTime);
-    mpi::Barrier( g.Comm() );
-    if( g.Rank() == 0 )
-        Output("  Starting SVD factorization..."); 
-    SVD( A, U, S, V);  
-    mpi::Barrier( g.Comm() );
     Real eps = limits::Epsilon<Real>();
     for( Int i = 0; i < k; ++i){
         std::cout <<  i << ": ";
@@ -96,8 +100,8 @@ main( int argc, char* argv[] )
     try
     {
         const bool colMajor = Input("--colMajor","column-major ordering?",true);
-        const Int m = Input("--height","height of matrix",100);
-        const Int n = Input("--width","width of matrix",100);
+        const Int m = Input("--height","height of matrix",10);
+        const Int n = Input("--width","width of matrix",10);
         const Int nb = Input("--nb","algorithmic blocksize",96);
         const bool testCorrectness = Input
             ("--correctness","test correctness?",true);

From dc4a539839f6940290c00c7d3bb55e83822ae44e Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <me@ryanlewis.net>
Date: Sat, 18 Feb 2017 13:34:48 -0800
Subject: [PATCH 12/13] Refactored code to make it more readable.

---
 include/El/lapack_like/spectral/TSVD.hpp | 510 +++++++++++++----------
 tests/lapack_like/TSVD.cpp               |   4 +-
 2 files changed, 288 insertions(+), 226 deletions(-)

diff --git a/include/El/lapack_like/spectral/TSVD.hpp b/include/El/lapack_like/spectral/TSVD.hpp
index 0fca7414f2..f739220cd8 100644
--- a/include/El/lapack_like/spectral/TSVD.hpp
+++ b/include/El/lapack_like/spectral/TSVD.hpp
@@ -9,256 +9,321 @@
 
 namespace El {
 
-template< typename Real>
-Real CopySign(const Real& x, const Real& y){ return y >= Real(0) ? Abs(x): -Abs(x); }
+template<typename Real>
+Real CopySign(const Real& x, const Real& y) { return y >= Real(0) ? Abs(x) : -Abs(x); }
 
 struct BidiagInfo {
-    Int numVecsReorth;
+ Int numVecsReorth;
 }; //struct BidiagInfo
 
-template< typename F>
+template<typename F>
 struct BidiagCtrl {
-    bool reorthIn=false;
-    Base<F> reorthogTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7));
-    Base<F> convValTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.7));
-    Base<F> convVecTol=Pow(limits::Epsilon<Base<F>>(), Base<F>(.5));
-    bool partialProject=false;
+ bool reorthIn = false;
+ Int minBlockSize=50;
+ Base <F> reorthogTol = Pow(El::limits::Epsilon<El::Base<F>>(), El::Base<F>(.7));
+ Base <F> convValTol = Pow(El::limits::Epsilon<El::Base<F>>(), El::Base<F>(.7));
+ Base <F> convVecTol = Pow(El::limits::Epsilon<El::Base<F>>(), El::Base<F>(.5));
+ bool partialProject = false;
 }; //end struct BidiagCtrl
 
+template<typename F>
+struct ReorthogonalizationData {
+ ReorthogonalizationData(El::Int N, F mu, F nu, F tau_): tau(tau_) {
+  muList.reserve(N);
+  nuList.reserve(N);
+  muList.push_back(mu);
+  muList.push_back(1);
+  nuList.push_back(nu);
+  nuList.push_back(1);
+ }
+ std::vector<El::Base<F>> muList, nuList;
+ F tau;
+};
+
 //TODO: Make batch with Gemv
-template< typename F>
-Int reorth( DistMatrix<F>& Q,
-            Int j,
-            std::vector<Base<F>>& termList,
-            const BidiagCtrl<F>& ctrl,
-            DistMatrix<F>& x,
-            Matrix<Base<F>>& diagList){
-    typedef Base<F> Real;
-    const Real eps = limits::Epsilon<Real>();
-    Int numVecsReorth = 0;
-    termList.emplace_back( Real( 1));
-    for( Int i = 0; i < j; ++i){
-        auto qi = Q(ALL, IR(i));
-        Axpy( -Dot(qi, x), qi, x);
-        termList[i] = eps;
-        numVecsReorth++;
-    }
-    Real alpha = Nrm2( x);
-    diagList.Set(j,0,alpha);
-    Scale(Real(1)/alpha, x);
-    auto qj = Q(ALL,IR(j));
-    qj = x;
-    return numVecsReorth;
+template<typename F>
+Int Reorthogonalize(DistMatrix <F>& Q,
+                    Int j,
+                    std::vector<El::Base<F>>& termList,
+                    const BidiagCtrl<F>& ctrl,
+                    DistMatrix <F>& x,
+                    Matrix <Base<F>>& diagList) {
+ typedef Base<F> Real;
+ const Real eps = limits::Epsilon<Real>();
+ Int numVecsReorth = 0;
+ termList.emplace_back(Real(1));
+ for (Int i = 0; i < j; ++i) {
+  auto qi = Q(ALL, IR(i));
+  Axpy(-Dot(qi, x), qi, x);
+  termList[i] = eps;
+  numVecsReorth++;
+ }
+ Real alpha = Nrm2(x);
+ diagList.Set(j, 0, alpha);
+ Scale(Real(1) / alpha, x);
+ auto qj = Q(ALL, IR(j));
+ qj = x;
+ return numVecsReorth;
 }
 
+template<typename F>
+bool omegaRecurrenceV(const Int j,
+                      const El::Base<F>& tau,
+                      const BidiagonalMatrix <F>& B,
+                      const std::vector<El::Base<F>>& muList,
+                      const El::Base<F>& alpha,
+                      const El::Base<F>& beta,
+                      const BidiagCtrl<F>& ctrl,
+                      std::vector<El::Base<F>>& nuList) {
+ auto& superDiag = B.superDiag;
+ auto& mainDiag = B.mainDiag;
+ bool foundInaccurate = false; //run omega recurrence
+ for (Int i = 0; i < j; ++i) {
+  auto nu = superDiag.Get(i, 0) * muList[i + 1] +
+            mainDiag.Get(i, 0) * muList[i] - beta * nuList[i];
+  nu = ( nu + CopySign(tau, nu) ) / alpha;
+  foundInaccurate |= ( Abs(nu) > ctrl.reorthogTol );
+  nuList[i] = nu;
+ }
+ return foundInaccurate;
+}
 
 
-template< typename F, 
-          typename ForwardOperator,
-          typename AdjointOperator>
-BidiagInfo
-BidiagLanczos(
-    Int iter,
-    Int m,
-    Int n, 
-    const ForwardOperator& A,
-    const AdjointOperator& AAdj,
-    Int steps,
-    Base<F>& tau,
-    Matrix<Base<F>>& mainDiag,
-    Matrix<Base<F>>& superDiag,
-    DistMatrix<F>& U,
-    DistMatrix<F>& V,
-    //Base<F>& normA, //can be an estimate?
-    std::vector<Base<F>>& muList,
-    std::vector<Base<F>>& nuList,
-    const BidiagCtrl<F>& ctrl){
-    typedef Base<F> Real;
-    typedef DistMatrix<F> DM;
-    const Real eps = limits::Epsilon<Real>();
+template<typename F>
+bool omegaRecurrenceU(const Int j,
+                      const El::Base<F>& tau,
+                      const BidiagonalMatrix <F>& B,
+                      const std::vector<El::Base<F>>& nuList,
+                      const El::Base<F>& alpha,
+                      const El::Base<F>& beta,
+                      const BidiagCtrl<F>& ctrl,
+                      std::vector<El::Base<F>>& muList) {
+ auto& superDiag = B.superDiag;
+ auto& mainDiag = B.mainDiag;
+ bool foundInaccurate = false; //run omega recurrence
+ for (Int i = 0; i <= j; ++i) {
+  auto mu = mainDiag.Get(i, 0) * nuList[i] - alpha * muList[i];
+  if ( i > 0 ) {
+   mu += superDiag.Get(i - 1, 0) * nuList[i - 1];
+  }
+  mu = ( mu + CopySign(tau, mu) ) / beta;
+  foundInaccurate |= ( Abs(mu) > ctrl.reorthogTol );
+  muList[i] = mu;
+ };
+ return foundInaccurate;
+}
 
-    bool reorthB = ctrl.reorthIn;
-    Int numVecsReorth = 0;
-    std::vector< Base<F>> maxMuList, maxNuList;
-    maxMuList.reserve(steps);
-    maxNuList.reserve(steps);
 
-    DM v = V( ALL, IR(iter));
-    DM u = U( ALL, IR(iter+1));
-    DM vOld( v);
-    DM uOld( u);
-    Real beta = superDiag.Get(iter,0);
-    auto absComp = [](const Real& a, const Real& b){ return Abs(a) < Abs(b);  };
-    for( Int j = iter+1; j < iter+steps; ++j)
-    {
-        vOld = v;
-        v = u;
-        AAdj( v);
-        Axpy( -beta, vOld, v);
-        Real alpha = Nrm2(v);
+template<typename F,
+ typename ForwardOperator,
+ typename AdjointOperator>
+BidiagInfo
+GolubKahan(
+ El::Int iter,
+ El::Int steps,
+ const ForwardOperator& A, const AdjointOperator& AAdj,
+ BidiagonalMatrix <F>& B, El::DistMatrix<F>& U, El::DistMatrix<F>& V,
+ ReorthogonalizationData<F>& reorthData, const El::BidiagCtrl<F>& ctrl) {
+ 
+ typedef El::Base<F> Real;
+ typedef El::DistMatrix<F> DM;
+ const Real eps = El::limits::Epsilon<Real>();
+ 
+ bool reorthB = ctrl.reorthIn;
+ auto& nuList = reorthData.nuList;
+ auto& muList = reorthData.muList;
+ El::Int numVecsReorth = 0;
+ std::vector<El::Base<F>> maxMuList, maxNuList;
+ maxMuList.reserve(steps);
+ maxNuList.reserve(steps);
+ 
+ DM v = V(ALL, IR(iter));
+ DM u = U(ALL, IR(iter + 1));
+ DM vOld(v);
+ DM uOld(u);
+ Real beta = B.superDiag.Get(iter, 0);
+ auto absComp = [](const Real& a, const Real& b) { return Abs(a) < Abs(b); };
+ for (Int j = iter + 1; j < iter + steps; ++j) {
+  vOld = v;
+  v = AAdj * u;
+  Axpy(-beta, vOld, v);
+  Real alpha = Nrm2(v);
+  
+  //reorthData.tau = Max(reorthData.tau, eps * ( alpha + beta ));
+  //bool foundInaccurate;
+  //Real maxElt;
+  //auto foundInaccurateV = omegaRecurrenceV(j, reorthData.tau, B, muList, alpha, beta, ctrl, nuList);
+  //maxNuList.emplace_back(*std::max_element(nuList.begin(), nuList.end(), absComp));
+  ////Reorthogonalize if necessary.
+  //if ( reorthB || foundInaccurateV ) {
+   numVecsReorth += Reorthogonalize(V, j, nuList, ctrl, v, B.mainDiag);
+  //}
+  
+  //The u step
+  // apply the operator
+  uOld = u;
+  u = A * v;
+  Axpy(-alpha, uOld, u);
+  beta = Nrm2(u);
+  
+  //reorthData.tau = Max(reorthData.tau, eps * ( alpha + beta ));
+  ////compute omega recurrence
+  //auto foundInaccurateU = omegaRecurrenceU(j, reorthData.tau, B, nuList, alpha, beta, ctrl, muList);
+  //maxMuList.emplace_back(*std::max_element(muList.begin(), muList.end(), absComp));
+  //muList.emplace_back(1);
+  //if ( reorthB || foundInaccurateU ) {
+   numVecsReorth += Reorthogonalize(U, j, muList, ctrl, u, B.superDiag);
+  //}
+ }
+ BidiagInfo info;
+ info.numVecsReorth = numVecsReorth;
+ return info;
+}
 
-	       tau = Max(tau, eps*(alpha+beta));
+template<typename F>
+struct BidiagonalMatrix {
+ typedef Matrix <Base<F>> RealMatrix;
+ 
+ BidiagonalMatrix(El::Int N) : mainDiag(N, 1), superDiag(N - 1, 1) {}
+ 
+ RealMatrix mainDiag;
+ RealMatrix superDiag;
+}; //BidiagonalMatrix
 
-        //run omega recurrence
-        bool foundInaccurate = false;
-        for(Int i = 0; i < j; ++i)
-        {
-            Real nu = superDiag.Get(i,0)*muList[i+1] + 
-                      mainDiag.Get(i,0)*muList[i] - beta*nuList[i];
-            nu = (nu + CopySign(tau, nu))/alpha;
-            foundInaccurate |= (Abs(nu) > ctrl.reorthogTol); 
-            nuList[i] = nu;
-        }
-        Real maxElt = *std::max_element( nuList.begin(), nuList.end(), absComp);
-        maxNuList.emplace_back( maxElt);
-        
-        //Reorthogonalize if necessary.
-        if( reorthB || foundInaccurate ){
-            numVecsReorth += reorth( V, j, nuList, ctrl, v, mainDiag);
-        }
+template<typename F>
+El::Matrix<F> BidiagonalSVD(const BidiagonalMatrix<F>& B,
+                            Int n, Int numColsVT, Int numRowsU,
+                            El::DistMatrix<F>& U, El::DistMatrix<F> VT) {
+ auto s = B.mainDiag;
+ auto superDiagCopy = B.superDiag;
+ lapack::BidiagSVDQRAlg('U', n, numColsVT, numRowsU,
+                        s.Buffer(), superDiagCopy.Buffer(),
+                        VT.Buffer(), VT.LDim(),
+                        U.Buffer(), U.LDim());
+ return s;
+}
 
-        //The u step
-        uOld = u;
-        u = v;
-        // apply the operator
-        A(u);
-        Axpy(-alpha, uOld, u);
-        beta = Nrm2( u);
-        tau = Max(tau, eps*(alpha+beta));
-        //compute omega recurrence
-        foundInaccurate=false;
-        for( Int i = 0; i <= j; ++i){
-            Real mu = mainDiag.Get(i,0)*nuList[i] - alpha*muList[i];
-            if (i > 0){
-                mu += superDiag.Get(i-1, 0)*nuList[i-1];
-            }
-            mu = (mu + CopySign(tau, mu))/beta;
-            foundInaccurate |= (Abs(mu) > ctrl.reorthogTol); 
-            muList[ i] = mu;
-        }
-        maxElt = *std::max_element( muList.begin(), muList.end(), absComp);
-        maxMuList.emplace_back( maxElt);
-        muList.emplace_back( 1);
-        if( reorthB || foundInaccurate){
-            numVecsReorth += reorth( U, j, muList, ctrl, u, superDiag); 
-        }
-    }
-    BidiagInfo info;
-    info.numVecsReorth = numVecsReorth;
-    return info;
+template<typename F>
+bool hasConverged(Int nVals,
+                  Int newIndex,
+                  const Matrix <Base<F>>& s,
+                  const Matrix <Base<F>>& sOld,
+                  const DistMatrix <F>& VTHat,
+                  const DistMatrix <F>& UHat,
+                  const El::Base<F>& beta,
+                  const BidiagCtrl<F>& ctrl) {
+ for (Int j = 0; j < nVals; ++j) {
+  if ( Abs(s.Get(j, 0) - sOld.Get(j, 0)) > ctrl.convValTol ) {
+   return false;
+  }
+  if ( beta * Abs(VTHat.Get(j, newIndex)) > ctrl.convVecTol ) {
+   return false;
+  }
+  if ( beta * Abs(UHat.Get(newIndex, j)) > ctrl.convVecTol ) {
+   return false;
+  }
+ }
+ return true;
 }
 
+template< typename F,
+          typename ForwardOperator,
+          typename AdjointOperator>
+ReorthogonalizationData<F> InitialIteration(const ForwardOperator& A, const AdjointOperator& AAdj,
+                                            El::AbstractDistMatrix<F>& initialVec,
+                                            DistMatrix<F>& UTilde, DistMatrix<F>& VTilde){
+   typedef El::Base<F> Real;
+   
+   const Real eps = El::limits::Epsilon<Real>();
+   
+   Real initNorm = Nrm2(initialVec);
+   Scale(Real(1)/initNorm, initialVec);
+   VTilde(El::ALL, 0) = AAdj * initialVec; //v = A'initialVec;
+   
+   auto v = VTilde(El::ALL, 0);
+  
+   Real alpha = El::Nrm2(v); //record the norm, for reorth.
+   El::Scale(Real(1)/alpha, v); //make v a unit vector
+   
+   UTilde(El::ALL, 0) = initialVec;
+   UTilde(El::ALL, 1) = A * v;
+   
+   auto u0 = UTilde(El::ALL, 0);
+   auto u1 = UTilde(El::ALL, 1);
+   
+   El::Axpy(-alpha, u0, u1);
+   Real beta = El::Nrm2(u1);
+   El::Scale(Real(1)/beta,u1);
+   auto tau = eps*(alpha + beta);
+ 
+   //1) Compute nu, a tolerance for reorthoganlization
+   Real nu = 1 + tau/alpha; //think of nu = 1+\epsilon1
+   Real mu = tau/beta;
+   //first argument is maxIter which at the time of typing this was VTilde.Height and UTilde.Height().
+   //If this changes revisit.
+   ReorthogonalizationData<F> reorthData(VTilde.Height(), nu, mu, tau);
+   
+   return reorthData;
+}
 /**
 * TSVD
 */
 template< typename F, 
           typename ForwardOperator,
           typename AdjointOperator>
-inline Int 
+inline El::Int
 TSVD( 
-      Int m,
-      Int n, 
       const ForwardOperator& A,
       const AdjointOperator& AAdj,
-      Int nVals,
-      AbstractDistMatrix<F>& U,
-      AbstractDistMatrix<F>& S,
-      AbstractDistMatrix<F>& V,
-      AbstractDistMatrix<F>& initialVec, //should be of length m 
-      Int maxIter=Int(1000),
-      const BidiagCtrl<F>& ctrl=BidiagCtrl<F>()) //requires InfiniteCharacteristic<F> && ForwardOperator::ScalarType is F
+      El::Int nVals,
+      El::AbstractDistMatrix<F>& U,
+      El::AbstractDistMatrix<F>& S,
+      El::AbstractDistMatrix<F>& V,
+      El::AbstractDistMatrix<F>& initialVec, //should be of length m
+      El::Int maxIter=El::Int(1000),
+      const El::BidiagCtrl<F>& ctrl=El::BidiagCtrl<F>()) //requires InfiniteCharacteristic<F> && ForwardOperator::ScalarType is F
       {
-        typedef DistMatrix<F> DM;
-        typedef Matrix<Base<F>> RealMatrix;
-        typedef Base<F> Real;
-    	
-        const Real eps = limits::Epsilon<Real>();
-        maxIter = Min(maxIter,Min(m,n));
-        const Grid& g = initialVec.Grid();
+        typedef El::DistMatrix<F> DM;
+        typedef El::Matrix<El::Base<F>> RealMatrix;
+        typedef El::Base<F> Real;
         
-        Real initNorm = Nrm2(initialVec);
-        Scale(Real(1)/initNorm, initialVec);       
-        DM Vtilde( n, maxIter, g);
-        auto v = Vtilde(ALL, 0);
-        v = initialVec;
-        AAdj( v); //v = A'initialVec;
-        Real alpha = Nrm2(v); //record the norm, for reorth.
-        Scale(Real(1)/alpha, v); //make v a unit vector
+        const Real eps = El::limits::Epsilon<Real>();
+        const El::Grid& g = initialVec.Grid();
+        const El::Int m = A.Height();
+        const El::Int n = A.Width();
+       
+        maxIter = El::Min(maxIter,El::Min(m,n));
+ 
+        DM VTilde( n, maxIter, g);
+        DM UTilde( m, maxIter, g);
+       
+      	 auto reorthData = InitialIteration(A, AAdj, initialVec, UTilde, VTilde);
+        BidiagonalMatrix<F> B(maxIter+1);
         
-        DM Utilde(m, maxIter, g);
-        auto u0 = Utilde(ALL, 0);
-        auto u1 = Utilde(ALL, 1);
-        u0 = initialVec;
-        u1 = v;
-        A( u1);
-        Axpy(-alpha, u0, u1);
-        Real beta = Nrm2(u1);
-        Scale(Real(1)/beta,u1);
+        RealMatrix sOld( maxIter+1, 1);
         
-       	auto tau = eps*(alpha+beta);
-	
-       //1) Compute nu, a tolerance for reorthoganlization
-       Real nu = 1 + tau/alpha; //think of nu = 1+\epsilon
-       Real mu = tau/beta;
-
-
-       RealMatrix mainDiag( maxIter+1, 1);
-       RealMatrix superDiag( maxIter, 1);
-       std::vector<Base<F>> muList, nuList;
-       muList.reserve( maxIter);
-       nuList.reserve( maxIter);
-       muList.push_back( mu);
-       muList.push_back( 1);
-       nuList.push_back( 1);
-       
-       RealMatrix sOld( maxIter+1, 1);
-       Int blockSize = Max(nVals, 50);
-       for(Int i = 0; i < maxIter; i+=blockSize){
-           std::cout << "iter " << i << ": ";
-           Int numSteps = Min( blockSize, maxIter-i);
-           BidiagLanczos( i, m, n, A, AAdj, numSteps, tau, mainDiag, superDiag, Utilde, Vtilde, muList, nuList, ctrl);
-           auto s = mainDiag;
-           auto superDiagCopy = superDiag;
-           DM VTHat(i+numSteps,nVals, g);
-           DM UHat(nVals,i+numSteps, g);
-           lapack::BidiagSVDQRAlg
-           ( 'U', i+numSteps, nVals, nVals,
-             s.Buffer(), superDiagCopy.Buffer(),
-             VTHat.Buffer(), VTHat.LDim(),
-             UHat.Buffer(), UHat.LDim());
-           for(Int j = 0; j < nVals; ++j){
-             std::cout << s.Get(j,0) << " " << std::flush;
-           }
-           std::cout << std::endl << std::endl;
-           Real beta = superDiag.Get(i+numSteps-1,0);
-           bool converged=true;
-           for(Int j = 0; j < nVals; ++j){
-               if( Abs(s.Get(j,0) - sOld.Get(j,0)) > ctrl.convValTol){
-                   converged = false;
-                   break;
-               }
-               if( beta*Abs(VTHat.Get(j, i+numSteps)) > ctrl.convVecTol){
-                   converged = false;
-                   break;
-               }
-               if( beta*Abs(UHat.Get(i+numSteps, j)) > ctrl.convVecTol) {
-                 converged = false;
-                 break;
-               }
-           }
-           if( converged){
-               std::cout << "converged!" << std::endl;
-               El::Gemm(El::NORMAL, El::ADJOINT, Real(1), Vtilde, VTHat, Real(0), V);
-               El::Gemm(El::NORMAL,  El::NORMAL, Real(1), Utilde,  UHat, Real(0), U);
-               std::cout << "copying s" << std::endl;
-               DistMatrix<F,STAR,STAR> S_STAR_STAR( S.Grid());
-               S_STAR_STAR.LockedAttach( S.Height(), S.Width(), S.Grid(), S.ColAlign(), S.RowAlign(), s.Buffer(), s.LDim(), S.Root());
-               Copy(S_STAR_STAR, S);
-               return i+numSteps;
-           }
-           sOld = s;
-	          tau = eps*s.Get(0,0);
-       }
+        El::Int blockSize = El::Max(nVals, ctrl.minBlockSize);
+        for(Int i = 0; i < maxIter; i+=blockSize){
+            Int numSteps = Min( blockSize, maxIter-i);
+            Int k = i+blockSize;
+            //Write U_kA'V_k = B_k where k is i+numStep and B_k is diagonal.
+            GolubKahan( i, numSteps, A, AAdj, B, UTilde, VTilde, reorthData, ctrl);
+            //C++17 this should be auto [UHat, S, VThat] = BidiagonalSVD(B,n,m);
+            DM VTHat(i+numSteps,nVals, g);
+            DM UHat(nVals,i+numSteps, g);
+            auto s = BidiagonalSVD(B,k,nVals,nVals,UHat,VTHat);
+            Real beta = B.superDiag.Get(k-1,0);
+            if( hasConverged(nVals,  k, s, sOld,VTHat, UHat, beta, ctrl)){
+                El::Gemm(El::NORMAL, El::NORMAL,  Real(1), UTilde,  UHat, Real(0), U);
+                El::Gemm(El::NORMAL, El::ADJOINT, Real(1), VTilde, VTHat, Real(0), V);
+                DistMatrix<F,STAR,STAR> S_STAR_STAR( S.Grid());
+                S_STAR_STAR.LockedAttach( S.Height(), S.Width(), S.Grid(), S.ColAlign(), S.RowAlign(), s.Buffer(), s.LDim(), S.Root());
+                Copy(S_STAR_STAR, S);
+                return i+numSteps;
+            }
+            sOld = s;
+	           reorthData.tau = eps*s.Get(0,0);
+        }
 	return Int(-1);
 }
 
@@ -269,10 +334,7 @@ template< typename F,
           typename ForwardOperator,
           typename AdjointOperator>
 inline Int 
-TSVD( 
-      Int m,
-      Int n, 
-      const ForwardOperator& A,
+TSVD( const ForwardOperator& A,
       const AdjointOperator& AAdj,
       Int nVals,
       AbstractDistMatrix<F>& U,
@@ -282,8 +344,8 @@ TSVD(
       const BidiagCtrl<F>& ctrl=BidiagCtrl<F>()) {
     DistMatrix<F> initialVec(U.Grid());
     F center = rand();
-    Uniform( initialVec, m, 1, center);
-    return TSVD(m ,n, A, AAdj, nVals, U,S,V, initialVec, maxIter, ctrl);
+    Uniform( initialVec, A.Height(), 1, center);
+    return TSVD(A, AAdj, nVals, U,S,V, initialVec, maxIter, ctrl);
 }
 
 } //end namespace El
diff --git a/tests/lapack_like/TSVD.cpp b/tests/lapack_like/TSVD.cpp
index 896acb211a..18a2d6cefd 100644
--- a/tests/lapack_like/TSVD.cpp
+++ b/tests/lapack_like/TSVD.cpp
@@ -17,10 +17,10 @@ class AOp {
     AOp( const Matrix& A_, Orientation o_ = El::NORMAL) : A( A_), o(o_) {}
     
     template< typename V>
-    void operator()( V& v) const {
+    void operator*( V& v) const {
         V w;
         Gemv(o, 1.0, A, v, w);
-        v = w;
+        return w;
     }
 
     Int Height() const {

From 936a5a570f027b66d91ce819350dae33c3cff14b Mon Sep 17 00:00:00 2001
From: "Ryan H. Lewis" <me@ryanlewis.net>
Date: Sat, 18 Feb 2017 15:23:58 -0800
Subject: [PATCH 13/13] get the code to compile.

---
 include/El/lapack_like/spectral/TSVD.hpp | 130 +++++++++++++----------
 tests/lapack_like/TSVD.cpp               |  10 +-
 2 files changed, 78 insertions(+), 62 deletions(-)

diff --git a/include/El/lapack_like/spectral/TSVD.hpp b/include/El/lapack_like/spectral/TSVD.hpp
index f739220cd8..15bd5fa9cc 100644
--- a/include/El/lapack_like/spectral/TSVD.hpp
+++ b/include/El/lapack_like/spectral/TSVD.hpp
@@ -12,6 +12,17 @@ namespace El {
 template<typename Real>
 Real CopySign(const Real& x, const Real& y) { return y >= Real(0) ? Abs(x) : -Abs(x); }
 
+template<typename F>
+struct BidiagonalMatrix {
+ typedef Matrix <Base<F>> RealMatrix;
+ 
+ BidiagonalMatrix(El::Int N) : mainDiag(N, 1), superDiag(N - 1, 1) {}
+ 
+ RealMatrix mainDiag;
+ RealMatrix superDiag;
+}; //BidiagonalMatrix
+
+
 struct BidiagInfo {
  Int numVecsReorth;
 }; //struct BidiagInfo
@@ -61,8 +72,6 @@ Int Reorthogonalize(DistMatrix <F>& Q,
  Real alpha = Nrm2(x);
  diagList.Set(j, 0, alpha);
  Scale(Real(1) / alpha, x);
- auto qj = Q(ALL, IR(j));
- qj = x;
  return numVecsReorth;
 }
 
@@ -120,7 +129,7 @@ template<typename F,
 BidiagInfo
 GolubKahan(
  El::Int iter,
- El::Int steps,
+ El::Int k,
  const ForwardOperator& A, const AdjointOperator& AAdj,
  BidiagonalMatrix <F>& B, El::DistMatrix<F>& U, El::DistMatrix<F>& V,
  ReorthogonalizationData<F>& reorthData, const El::BidiagCtrl<F>& ctrl) {
@@ -133,38 +142,26 @@ GolubKahan(
  auto& nuList = reorthData.nuList;
  auto& muList = reorthData.muList;
  El::Int numVecsReorth = 0;
- std::vector<El::Base<F>> maxMuList, maxNuList;
- maxMuList.reserve(steps);
- maxNuList.reserve(steps);
+ //std::vector<El::Base<F>> maxMuList, maxNuList;
+ //maxMuList.reserve(steps);
+ //maxNuList.reserve(steps);
+ 
+ DM v_j = V(ALL, IR(iter));
+ DM u_j = U(ALL, IR(iter + 1));
+ DM v_prev(v_j);
+ DM u_prev(u_j);
+ Real beta_j = 0;
+ Real alpha = B.superDiag.Get(iter, 0);
+ //auto absComp = [](const Real& a, const Real& b) { return Abs(a) < Abs(b); };
+ for (Int j = iter + 1; j < k; ++j) {
  
- DM v = V(ALL, IR(iter));
- DM u = U(ALL, IR(iter + 1));
- DM vOld(v);
- DM uOld(u);
- Real beta = B.superDiag.Get(iter, 0);
- auto absComp = [](const Real& a, const Real& b) { return Abs(a) < Abs(b); };
- for (Int j = iter + 1; j < iter + steps; ++j) {
-  vOld = v;
-  v = AAdj * u;
-  Axpy(-beta, vOld, v);
-  Real alpha = Nrm2(v);
-  
-  //reorthData.tau = Max(reorthData.tau, eps * ( alpha + beta ));
-  //bool foundInaccurate;
-  //Real maxElt;
-  //auto foundInaccurateV = omegaRecurrenceV(j, reorthData.tau, B, muList, alpha, beta, ctrl, nuList);
-  //maxNuList.emplace_back(*std::max_element(nuList.begin(), nuList.end(), absComp));
-  ////Reorthogonalize if necessary.
-  //if ( reorthB || foundInaccurateV ) {
-   numVecsReorth += Reorthogonalize(V, j, nuList, ctrl, v, B.mainDiag);
-  //}
-  
   //The u step
   // apply the operator
-  uOld = u;
-  u = A * v;
-  Axpy(-alpha, uOld, u);
-  beta = Nrm2(u);
+  u_prev = u_j;
+  u_j = A * v_prev;
+  Axpy(-alpha, u_prev, u_j);
+  beta_j = Nrm2(u_j);
+  Scale(F(1.0)/beta_j, u_j);
   
   //reorthData.tau = Max(reorthData.tau, eps * ( alpha + beta ));
   ////compute omega recurrence
@@ -172,7 +169,25 @@ GolubKahan(
   //maxMuList.emplace_back(*std::max_element(muList.begin(), muList.end(), absComp));
   //muList.emplace_back(1);
   //if ( reorthB || foundInaccurateU ) {
-   numVecsReorth += Reorthogonalize(U, j, muList, ctrl, u, B.superDiag);
+  numVecsReorth += Reorthogonalize(U, j, muList, ctrl, u_j, B.superDiag);
+  U(ALL, IR(j)) = u_j;
+  //}
+  
+  v_prev = v_j;
+  v_j = AAdj * u_j;
+  Axpy(-beta_j, v_prev, v_j);
+  Real alpha = Nrm2(v_j);
+  Scale(F(1.0)/alpha, v_j);
+  
+  //reorthData.tau = Max(reorthData.tau, eps * ( alpha + beta ));
+  //bool foundInaccurate;
+  //Real maxElt;
+  //auto foundInaccurateV = omegaRecurrenceV(j, reorthData.tau, B, muList, alpha, beta, ctrl, nuList);
+  //maxNuList.emplace_back(*std::max_element(nuList.begin(), nuList.end(), absComp));
+  ////Reorthogonalize if necessary.
+  //if ( reorthB || foundInaccurateV ) {
+  numVecsReorth += Reorthogonalize(V, j, nuList, ctrl, v_j, B.mainDiag);
+  V(ALL, IR(j)) = v_j;
   //}
  }
  BidiagInfo info;
@@ -180,16 +195,6 @@ GolubKahan(
  return info;
 }
 
-template<typename F>
-struct BidiagonalMatrix {
- typedef Matrix <Base<F>> RealMatrix;
- 
- BidiagonalMatrix(El::Int N) : mainDiag(N, 1), superDiag(N - 1, 1) {}
- 
- RealMatrix mainDiag;
- RealMatrix superDiag;
-}; //BidiagonalMatrix
-
 template<typename F>
 El::Matrix<F> BidiagonalSVD(const BidiagonalMatrix<F>& B,
                             Int n, Int numColsVT, Int numRowsU,
@@ -231,31 +236,34 @@ template< typename F,
           typename AdjointOperator>
 ReorthogonalizationData<F> InitialIteration(const ForwardOperator& A, const AdjointOperator& AAdj,
                                             El::AbstractDistMatrix<F>& initialVec,
-                                            DistMatrix<F>& UTilde, DistMatrix<F>& VTilde){
+                                            DistMatrix<F>& UTilde, DistMatrix<F>& VTilde,
+                                            BidiagonalMatrix<F>& B){
    typedef El::Base<F> Real;
    
    const Real eps = El::limits::Epsilon<Real>();
    
    Real initNorm = Nrm2(initialVec);
    Scale(Real(1)/initNorm, initialVec);
-   VTilde(El::ALL, 0) = AAdj * initialVec; //v = A'initialVec;
+   UTilde(El::ALL, 0) = initialVec;
+   DistMatrix<F> x = UTilde(El::ALL, IR(0));
+   VTilde(El::ALL, 0) = AAdj * x;
    
    auto v = VTilde(El::ALL, 0);
   
    Real alpha = El::Nrm2(v); //record the norm, for reorth.
    El::Scale(Real(1)/alpha, v); //make v a unit vector
    
-   UTilde(El::ALL, 0) = initialVec;
    UTilde(El::ALL, 1) = A * v;
    
-   auto u0 = UTilde(El::ALL, 0);
-   auto u1 = UTilde(El::ALL, 1);
+   auto u0 = UTilde(El::ALL, IR(0));
+   auto u1 = UTilde(El::ALL, IR(1));
    
    El::Axpy(-alpha, u0, u1);
    Real beta = El::Nrm2(u1);
    El::Scale(Real(1)/beta,u1);
    auto tau = eps*(alpha + beta);
- 
+   B.mainDiag.Set(0,0, alpha);
+   B.superDiag.Set(0,0, beta);
    //1) Compute nu, a tolerance for reorthoganlization
    Real nu = 1 + tau/alpha; //think of nu = 1+\epsilon1
    Real mu = tau/beta;
@@ -296,30 +304,38 @@ TSVD(
  
         DM VTilde( n, maxIter, g);
         DM UTilde( m, maxIter, g);
-       
-      	 auto reorthData = InitialIteration(A, AAdj, initialVec, UTilde, VTilde);
+ 
         BidiagonalMatrix<F> B(maxIter+1);
+      	 auto reorthData = InitialIteration(A, AAdj, initialVec, UTilde, VTilde, B);
+        El::Display(UTilde, "UTilde");
+        El::Display(VTilde, "VTilde");
         
         RealMatrix sOld( maxIter+1, 1);
         
-        El::Int blockSize = El::Max(nVals, ctrl.minBlockSize);
+        El::Int blockSize = 1;//El::Max(nVals, ctrl.minBlockSize);
         for(Int i = 0; i < maxIter; i+=blockSize){
             Int numSteps = Min( blockSize, maxIter-i);
-            Int k = i+blockSize;
+            Int k = i+numSteps;
             //Write U_kA'V_k = B_k where k is i+numStep and B_k is diagonal.
-            GolubKahan( i, numSteps, A, AAdj, B, UTilde, VTilde, reorthData, ctrl);
+            GolubKahan( i, k, A, AAdj, B, UTilde, VTilde, reorthData, ctrl);
+            El::Display(UTilde(ALL, Range<Int>(0,k)), "UTilde");
+            El::Display(VTilde(ALL, Range<Int>(0,k)), "VTilde");
+         
             //C++17 this should be auto [UHat, S, VThat] = BidiagonalSVD(B,n,m);
-            DM VTHat(i+numSteps,nVals, g);
-            DM UHat(nVals,i+numSteps, g);
+            DM VTHat(k, nVals, g);
+            DM UHat(nVals, k, g);
             auto s = BidiagonalSVD(B,k,nVals,nVals,UHat,VTHat);
+            Display(UHat, "UHat");
+            Display(VTHat, "VTHat");
             Real beta = B.superDiag.Get(k-1,0);
             if( hasConverged(nVals,  k, s, sOld,VTHat, UHat, beta, ctrl)){
+                std::cout << "Apparently, we have converged" << std::endl;
                 El::Gemm(El::NORMAL, El::NORMAL,  Real(1), UTilde,  UHat, Real(0), U);
                 El::Gemm(El::NORMAL, El::ADJOINT, Real(1), VTilde, VTHat, Real(0), V);
                 DistMatrix<F,STAR,STAR> S_STAR_STAR( S.Grid());
                 S_STAR_STAR.LockedAttach( S.Height(), S.Width(), S.Grid(), S.ColAlign(), S.RowAlign(), s.Buffer(), s.LDim(), S.Root());
                 Copy(S_STAR_STAR, S);
-                return i+numSteps;
+                return k;
             }
             sOld = s;
 	           reorthData.tau = eps*s.Get(0,0);
diff --git a/tests/lapack_like/TSVD.cpp b/tests/lapack_like/TSVD.cpp
index 18a2d6cefd..83c29466be 100644
--- a/tests/lapack_like/TSVD.cpp
+++ b/tests/lapack_like/TSVD.cpp
@@ -17,7 +17,7 @@ class AOp {
     AOp( const Matrix& A_, Orientation o_ = El::NORMAL) : A( A_), o(o_) {}
     
     template< typename V>
-    void operator*( V& v) const {
+    V operator*( V& v) const {
         V w;
         Gemv(o, 1.0, A, v, w);
         return w;
@@ -53,7 +53,7 @@ void TestTSVD( const DistMatrix<F>& A, Int k=3){
     SVD( A, U, S, V);
     mpi::Barrier( g.Comm() );
     std::cout << "Singular Values: " << std::flush;
-    Display(S, "Singular Values");
+    Display(S(Range<Int>(0,k), 0), "Singular Values");
     std::cout << std::endl;
     if( g.Rank() == 0 )
         Output("  Starting TSVD factorization...");
@@ -61,7 +61,7 @@ void TestTSVD( const DistMatrix<F>& A, Int k=3){
     typedef AOp<DistMatrix<F>> Operator; 
     Operator Aop( A);
     Operator AAdjop( A, El::ADJOINT);
-    TSVD( m, n, Aop, AAdjop, k, Uk, Sk, Vk);
+    TSVD( Aop, AAdjop, k, Uk, Sk, Vk, 5);
     const double runTime = mpi::Time() - startTime;
     Output("TSVD Time = ",runTime);
     Real eps = limits::Epsilon<Real>();
@@ -100,8 +100,8 @@ main( int argc, char* argv[] )
     try
     {
         const bool colMajor = Input("--colMajor","column-major ordering?",true);
-        const Int m = Input("--height","height of matrix",10);
-        const Int n = Input("--width","width of matrix",10);
+        const Int m = Input("--height","height of matrix",3);
+        const Int n = Input("--width","width of matrix",3);
         const Int nb = Input("--nb","algorithmic blocksize",96);
         const bool testCorrectness = Input
             ("--correctness","test correctness?",true);