using Iterators

# Builds the matrix entry by entry
function meshKernelFull(kernel, evalPts1::Array{Float64, 2}, evalPts2::Array{Float64, 2})
    n1 = size(evalPts1)[2]
    n2 = size(evalPts2)[2]

    X = zeros(Float64, size(evalPts1)[1], n1*n2)
    Y = zeros(Float64, size(evalPts2)[1], n1*n2)
    k = 1
    for j = 1:n2 # Careful with the ordering
        for i = 1:n1
            X[:,k] = evalPts1[:,i]
            Y[:,k] = evalPts2[:,j]
            k = k+1;
        end
    end
    A = reshape(kernel(X, Y), (n1, n2))

    return A
end

# Use this to decide on the way to compute the error
# We now do Linf approx, as required by the algorithm
function relativeErrorAppTrue(AtApp, AtTrue)
    # return vecnorm(AtTrue - AtApp)/vecnorm(AtTrue)
    return maximum(abs.(AtApp[:] - AtTrue[:])) ./ maximum(abs.(AtTrue[:]))
end

# Points are in R space
# Kernel takes points in X space
function evalKernelInterpolators(kernel, b1::BoundingBox, b2::BoundingBox, evalPts1::Array{Float64, 2}, evalPts2::Array{Float64, 2}, interp1, interp2, wi_x_xi1=nothing, wi_x_xi2=nothing)
    if wi_x_xi1 == nothing && wi_x_xi2 == nothing
        Ub = evalBasis(interp1, evalPts1)'
        Vb = evalBasis(interp2, evalPts2)'
    else
        Ub = evalBasis(interp1, evalPts1, wi_x_xi1)'
        Vb = evalBasis(interp2, evalPts2, wi_x_xi2)'
    end
    F = zeros(Float64, (size(interp1.xk)[2], size(interp2.xk)[2]))

    # Build a grid xk x yk
    m = size(interp1.xk)[2]
    n = size(interp2.xk)[2]
    X = zeros(Float64, size(interp1.xk)[1], m*n)
    Y = zeros(Float64, size(interp2.xk)[1], m*n)
    k = 1
    for j = 1:n #Careful with the ordering
        for i = 1:m
            X[:,k] = interp1.xk[:,i]
            Y[:,k] = interp2.xk[:,j]
            k = k+1;
        end
    end
    F = reshape(kernel(RtoX(b1,X),RtoX(b2,Y)), (m, n))
    
    return (Ub, F, Vb)
end

# Find the tensor interpolator that fits kernel over testPts[1] x testPts[2]
# interpolators is some initial tensor interpolator
# Is is the space over which we interpolate (X)
# bs are the bounding boxes (X <-> R)
# kernel is a function (x, y) -> k(x,y)
function getTensorInterpolatorAdaptive(interpolators, Is::Array{Array{Interval{Float64},1},1}, ordersmax::Array{Array{Int64,1},1}, dims::Array{Int64,1}, testPts::Array{Array{Float64,2},1}, bs, kernel, AtTrue::Array{Float64,2}, iteration::Int64, tol::Float64, iterations_max::Int64, err::Float64, ruletype::RULETYPE, logLevel::LogLevel)
    
    while true
        
        # Monitoring
        if err <= tol 
            if logLevel >= SI.debug
                @printf "%d. Converged\n" iteration
            end
            break
        elseif iteration > iterations_max || (all(interpolators[1].orders .>= ordersmax[1]) && all(interpolators[2].orders .>= ordersmax[2]))
            if logLevel >= SI.debug
                @printf "%d. Aborting\n" iteration
            end
            warn("Aborting tensor construction")
            break
        end

        iteration += 1

        errMin = Inf
        xyMin  = -1
        dMin   = -1
        errs   = Array{Array{Float64, 1}, 1}(2)

        # Try both directions
        for xy = 1:2

            errs[xy] = Array{Float64, 1}(dims[xy])

            # Try increasing every order by 1
            for d = 1:dims[xy]
                if interpolators[xy].orders[d] == ordersmax[xy][d]
                    if logLevel >= SI.debug
                        @printf "   %d-%d Skipping dim %d (ordermax reached) \n" iteration xy d
                    end
                    continue
                end
                # Try increasing orders[d] by 1
                newOrder = copy(interpolators[xy].orders)
                newOrder[d] += 1
                # Compute new tensor interpolator
                newinterp = TensorInterpolator{Float64}(Is[xy], newOrder, ruletype)
                # Evaluate error
                if xy == 1
                    (UtApp, FtApp, VtApp) = evalKernelInterpolators(kernel, bs[1], bs[2], testPts[1], testPts[2], newinterp, interpolators[2])
                elseif xy == 2
                    (UtApp, FtApp, VtApp) = evalKernelInterpolators(kernel, bs[1], bs[2], testPts[1], testPts[2], interpolators[1], newinterp)
                end
                errs[xy][d] = relativeErrorAppTrue(UtApp*FtApp*VtApp', AtTrue)
                if errs[xy][d] < errMin
                    #@printf "   %d-%d Order %s has error %e [min]\n" iteration xy string(newOrder) errs[xy][d] 
                    errMin = errs[xy][d]
                    xyMin = xy
                    dMin = d
                else
                    #@printf "   %d-%d Order %s has error %e\n" iteration xy string(newOrder) errs[xy][d] 
                end
            end

        end

        # Get optimal and update tensor
        newOrder = copy(interpolators[xyMin].orders)
        newOrder[dMin] += 1
        interpolators[xyMin] = TensorInterpolator{Float64}(Is[xyMin], newOrder, ruletype)
        err = errMin
        if logLevel >= SI.verbose
            @printf "%d. Adding %d %s with error %e | rank is now %s\n" iteration xyMin string(newOrder) errMin string((size(interpolators[1].xk)[2],size(interpolators[2].xk)[2]))
        end

    end

    return interpolators

end

# Main function of the paper
# - kernel is a function (x, y) -> f(x, y)
# - b1, b2 are boxes in X and Y space, respectively, enclosing the data and providing mappings from X/Y to R1/R2 that are boxes where interpolation is performed.
# - evalPts1, evalPts2 are mesh points in X and Y spaces
# - tol is the relative tolerance
# - ruletype is the 1d rule to be used (default: SI.chevyshev)
# - useSI wether or not to use Skeletonized Interpolation (default: true). If false, use the usual method with SVD recompression at the end
# - extraDiags wether or not to compute extra diagnostics for SI (default: false)
# - logLevel (SI.all, SI.info, SI.verbose or SI.debug) (default: SI.info)
# It returns (U, S, V, Xint, Yint, Ub, F, Vb, I1, I2, diags) where
# - the factorization is either U*S*V' for useSI=false and U*(S\V') for useSI=true
# - Xint and Yint are the nodes used in the final factorization (either Xbar and Ybar for useSI=false or Xhat and Yhat for useSI=true)
# - Ub, F, Vb are the original interpolation basis based on Xbar and Ybar if useSI=true. Otherwise, Ub and Vb are empty
# - I1 and I2 are the interpolators
# - diags is a structure with timings and other information
function meshKernelLowRankAdaptive(kernel, b1::BoundingBox, b2::BoundingBox, evalPts1::Array{Float64, 2}, evalPts2::Array{Float64, 2}, tol::Float64 ; ruletype::RULETYPE=SI.chebyshev, useSI=true, extraDiags=false, logLevel::LogLevel=SI.info, faster=true)

    diags = Dict()
    t_init = time_ns()
    
    if logLevel >= SI.verbose
        if ruletype == legendre
            @printf "Using legendre 1d rule.\n"
        elseif ruletype == chebyshev
            @printf "Using chebyshev 1d rule.\n"
        end
        @printf "Use SI is set to %s.\n" string(useSI)
    end
    if useSI
        tolInterp = 0.1
    else
        tolInterp = 1.0
    end
    if logLevel >= SI.verbose
        @printf "[Info] Using tolInterp = %e\n" tolInterp
        @printf "[Info] Effective tol * tolInterp = %e\n" (tolInterp * tol)
        @printf "[Info] Extra Diags is set to %s.\n" string(extraDiags)
    end

    t_checks = time_ns()
    diags["time_checks_ns"]=t_checks-t_init

    # 0. Initial information
    dim1 = b1.dim_r
    dim2 = b2.dim_r

    # 1. We need to decide on the maximum order in case stuff go wrong
    maxOrder = 40
    ordersmax1 = Array{Int64, 1}(dim1)
    ordersmax2 = Array{Int64, 1}(dim2)
    npts1 = ones(Int64, dim1)
    npts2 = ones(Int64, dim2)
    mid1 = (b1.starts + b1.ends)/2.0
    mid2 = (b2.starts + b2.ends)/2.0

    ordersmax = [ordersmax1, ordersmax2]
    mids      = [mid1, mid2]
    dims      = [dim1, dim2]
    bs        = [b1, b2]
    npts      = [npts1, npts2]
    
    Is = Array{Array{Interval{Float64},1},1}(2)
    for xy = 1:2
        Is[xy] = Array{Interval{Float64},1}(dims[xy])
        for d = 1:dims[xy]
            Is[xy][d] = Interval{Float64}(bs[xy].starts[d], bs[xy].ends[d])
        end
    end

    t_setup = time_ns()
    diags["time_setup_ns"]=t_setup-t_checks

    #
    # Some initial rough approximation used to get ordersmax and the initial order
    #
    for xy = 1:2 
        for d = 1:dims[xy]
            ordersmax[xy][d] = 1
            err = Inf
            while true
                # Interpolate
                # Compute candidate points on [a b]
                xy_array = linspace(bs[xy].starts[d], bs[xy].ends[d], 10)'
                # Interpolate on [a b]
                I = [Interval{Float64}(bs[xy].starts[d], bs[xy].ends[d])]
                interp = TensorInterpolator{Float64}(I, [ordersmax[xy][d]], ruletype)
                npts[xy][d] = size(interp.xk)[2]
                function kerneld(xy_in) # xy_in is [dim=1 x #pts]
                    @assert size(xy_in)[1] == 1
                    n = size(xy_in)[2]
                    if xy == 1
                        x = RtoX(b1,vcat(mid1[1:d-1] * ones((1,n)) , xy_in , mid1[d+1:end] * ones((1,n))))
                        y = RtoX(b2,mid2 * ones((1,n)))
                    else
                        x = RtoX(b1,mid1 * ones((1,n)))
                        y = RtoX(b2,vcat(mid2[1:d-1] * ones((1,n)) , xy_in , mid2[d+1:end] * ones((1,n))))
                    end
                    return kernel(x,y)
                end
                fapp = interpolate(interp, xy_array, kerneld)
                # Compute true value
                ftrue = kerneld(xy_array)
                # Compute error
                err = maximum(abs.(fapp - ftrue))/maximum(abs.(ftrue))
                if logLevel >= SI.debug
                    if xy == 1
                        @printf "x[%d] order %d nptsx %d error %e <?> %e\n" d ordersmax[xy][d] size(interp.xk)[2] err (tolInterp * tol)
                    else
                        @printf "y[%d] order %d nptsy %d error %e <?> %e\n" d ordersmax[xy][d] size(interp.xk)[2] err (tolInterp * tol)
                    end
                end
                # Decide what to do
                if err <= (tolInterp * tol)
                    print_log("Tolerance reached.", logLevel, SI.debug)
                    break
                end
                if ordersmax[xy][d] >= maxOrder
                    warn("Max order reached.")
                    break
                end
                ordersmax[xy][d] += 1
            end
        end
    end

    # Use that as a starting order
    ordersinit = copy(ordersmax)

    # Relax a bit the bound above
    for xy = 1:2
        ordersmax[xy] += 6
        if logLevel >= SI.debug
            @printf "Ordermax[%d] = %s\n" xy string(ordersmax[xy])
        end
    end

    t_ordermax = time_ns()
    diags["time_ordermax_ns"]=t_ordermax-t_setup

    #
    # 2. Adaptive low rank approximation 
    #

    if faster

        # Build the Xbar, Ybar interpolation nodes (implicitely)

        diags["time_initinterp_ns"]=0
        # We just decide on using the 1d rule, +2 (little difference)
        @show ordersinit

        orders = copy(ordersinit)
        for xy = 1:2
            orders[xy][:] = div.(orders[xy], 1) - 2 # div(orders[xy],4)
        end
       
        @show orders

        interpolators = Array{Any, 1}(2)
        start = time_ns()
        for xy = 1:2
            interpolators[xy] = TensorInterpolator{Float64}(Is[xy], orders[xy], ruletype)
        end
        time_interp = time_ns()
        diags["time_interp_ns"] = time_interp - start
        
        Xbar = interpolators[1].xk
        Ybar = interpolators[2].xk
        Wx   = interpolators[1].w_intk
        Wy   = interpolators[2].w_intk

        ## Keep doing

        @assert useSI

        # # Build Xhat, Yhat using ACA over M = Wx F(Xbar, Ybar) Wy implicitely

        # Xhat_id = Array{Int64,1}(0)
        # Yhat_id = Array{Int64,1}(0)

        # u = Array{Array{Float64,1},1}(0)
        # v = Array{Array{Float64,1},1}(0)

        # # ACA-style decomposition
        # Zrow = xi -> Wx[xi] .* ( kernel(RtoX(b1, Xbar[:,xi]), RtoX(b2, Ybar)) .* Wy )
        # Zcol = yj -> ( Wx .* kernel(RtoX(b1, Xbar), RtoX(b2, Ybar[:,yj])) ) .* Wy[yj]

        # xi = 1
        # push!(Xhat_id, xi)
        # Rtilde_I = Zrow(xi)

        # (trash, yj) = findmax(abs.(Rtilde_I))
        # push!(Yhat_id, yj)
        # Rtilde_J = Zcol(yj)

        # v1 = Rtilde_I / Rtilde_I[yj]
        # u1 = Rtilde_J
        # push!(u, u1)
        # push!(v, v1)

        # Ztilde2 = norm(u1)^2 * norm(v1)^2

        # tolQR = 0.5

        # for r = 1:min(size(Xbar)[2], size(Ybar)[2])-1
        #     
        #     if logLevel >= SI.verbose
        #         @printf "%d. %e <?< %e\n" r norm(u[end])*norm(v[end]) tolQR * tol * sqrt(Ztilde2)
        #     end
        #     if norm(u[end]) * norm(v[end]) <= tolQR * tol * sqrt(Ztilde2)
        #         break
        #     end
       
        #     Xs = copy(Rtilde_J)
        #     Xs[Xhat_id] = 0
        #     (trash, xi) = findmax(abs.(Xs))
        #     Rtilde_I = Zrow(xi)
        #     for i = 1:r
        #         Rtilde_I -= u[i][xi] * v[i]
        #     end

        #     Ys = copy(Rtilde_I)
        #     Ys[Yhat_id] = 0
        #     (trash, yj) = findmax(abs.(Ys))
        #     Rtilde_J = Zcol(yj)
        #     for i = 1:r
        #         Rtilde_J -= u[i] * v[i][yj]
        #     end
        #     
        #     vr = Rtilde_I / Rtilde_I[yj]
        #     ur = Rtilde_J

        #     push!(Xhat_id, xi)
        #     push!(Yhat_id, yj)

        #     push!(u, ur)
        #     push!(v, vr)
        #     
        #     for i = 1:r
        #         Ztilde2 += 2 * abs(u[i]' * ur) * abs(v[i]' * vr)
        #     end
        #     
        #     Ztilde2 += norm(ur)^2 * norm(vr)^2

        # end

        # Xhat = Xbar[:, Xhat_id]
        # Yhat = Ybar[:, Yhat_id]

        # t_aca = time_ns()

        # if logLevel >= SI.info
        #     @printf "ACA compressed Xbar, Ybar from rank %d to %d\n" min(size(Xbar)[2],size(Ybar)[2]) min(size(Xhat)[2],size(Yhat)[2])
        # end
        # 
        # diags["time_aca_ns"] = t_aca - time_interp
        #     
        # evalPts1Extra = RtoX(b1, Xhat)
        # evalPts2Extra = RtoX(b2, Yhat)
       
        F = meshKernelFull(kernel, RtoX(b1, Xbar), RtoX(b2, Ybar))
        Fw = diagm(sqrt.(Wx)) * F * diagm(sqrt.(Wy))
        tolQR = 0.1

        # X
        (QFx, RFx, pFx) = qr(Fw', Val{true})
        nPx = cut_spectrum_l2(abs.(diag(RFx)), tolQR * tol, bypasssort=true)
        
        # Y
        (QFy, RFy, pFy) = qr(Fw[pFx[1:nPx],:], Val{true})
        nPy = cut_spectrum_l2(abs.(diag(RFy)), tolQR * tol, bypasssort=true)

        nP = max(nPx, nPy)
        nPx = nP
        nPy = nP
        
        if logLevel >= SI.info
            @printf "RRQR compressed Xbar, Ybar from rank %d to %d\n" min(size(Xbar)[2],size(Ybar)[2]) nP
        end
            
        evalPts1Extra = RtoX(b1,interpolators[1].xk[:,pFx[1:nPx]])
        evalPts2Extra = RtoX(b2,interpolators[2].xk[:,pFy[1:nPy]])

        t_qr = time_ns()
           
        U = meshKernelFull(kernel, evalPts1, evalPts2Extra)
        S = lufact(meshKernelFull(kernel, evalPts1Extra, evalPts2Extra)) # Better than inv of course, but supports same ops
        V = meshKernelFull(kernel, evalPts1Extra, evalPts2)'

        t_lu = time_ns()
        
        diags["time_lu_ns"] = t_lu - t_qr

        # In this case, the factorization is (U * Sinv * V') = U * (S \ V')
        return (U, S, V, evalPts1Extra, evalPts2Extra, nothing, nothing, nothing, interpolators[1], interpolators[2], diags)

    else

        iteration = 1
        iterations_max = 250

        # 2a. Initial order is [1, 1, ..., 1]
        interpolators = Array{Any, 1}(2)
        for xy = 1:2
            if logLevel >= SI.debug
                @printf "Tensor case : ordersinit[%d] = %s\n" xy string(ordersinit[xy])
            end
            interpolators[xy] = TensorInterpolator{Float64}(Is[xy], ordersinit[xy], ruletype)
        end
        # 2b. Pick test set 
        # That's actually something important for robustness
        testPts = Array{Array{Float64, 2}, 1}(2) 
        npts = [10, 10, 5]
        if logLevel >= SI.verbose
            @printf "Using #pts %s for evaluation.\n" string(npts)
        end
        for xy = 1:2
            testPts1d = Array{Array{Float64, 1}, 1}(dims[xy])
            for d = 1:dims[xy]
                # Method 1
                # For the control points, use something proportional to the # of points of the 1D rule computed at the beginning (ordermax) - not clear it's better, really (even though it could seem like it does)
                # testPts1d[d] = Array(linspace(Is[xy][d].a, Is[xy][d].b, max(npts[xy][d]+1,3)))
                # Method 2
                testPts1d[d] = Array(linspace(Is[xy][d].a, Is[xy][d].b, npts[d]))
            end
            testPts[xy] = tensor_grid(testPts1d)
        end
        # 2c. Compute initial error of order 1 over test set
        AtTrue = meshKernelFull(kernel, RtoX(b1, testPts[1]), RtoX(b2, testPts[2]))
        (UtApp, FtApp, VtApp) = evalKernelInterpolators(kernel, b1, b2, testPts[1], testPts[2], interpolators[1], interpolators[2])
        err = relativeErrorAppTrue(UtApp*FtApp*VtApp', AtTrue)
        if logLevel >= SI.verbose
            @printf "%d. Initial error %e\n" iteration err
        end

        t_initinterp = time_ns()
        diags["time_initinterp_ns"]=t_initinterp-t_ordermax

        #
        # 3. Find true tensor (i.e., find Xbar and Ybar)
        #

        interpolators = getTensorInterpolatorAdaptive(interpolators,
                                                      Is,
                                                      ordersmax,
                                                      dims,
                                                      testPts,
                                                      bs,
                                                      kernel,
                                                      AtTrue,
                                                      iteration, 
                                                      (tolInterp * tol),
                                                      iterations_max,
                                                      err,
                                                      ruletype,
                                                      logLevel)

        t_interp = time_ns()
        diags["time_interp_ns"] = t_interp - t_initinterp

        # We have reached the accuracy desired
        # Output the real stuff
        Ub = nothing
        Vb = nothing
        if useSI
            F = meshKernelFull(kernel, RtoX(b1, interpolators[1].xk), RtoX(b2, interpolators[2].xk))
        else
            # Need the basis as well
            (Ub, F, Vb) = evalKernelInterpolators(kernel, b1, b2, XtoR(b1, evalPts1), XtoR(b2, evalPts2), interpolators[1], interpolators[2])
        end
        t_eval = time_ns()
        diags["time_eval_ns"] = t_eval - t_interp

        if useSI

            # 4. RRQR smart idea

            # Pivot points
            tolQR = 1
            if logLevel >= SI.debug
                @printf "Using SI technique\n"
                @printf "Was Using tolInterp = %e\n" tolInterp
                @printf "Using tolQR = %e\n" tolQR
                @printf "Effective tolQR * tol = %e\n" (tolQR * tol)
            end

            #
            # Find F weighted
            #
            Fw = diagm(sqrt.(interpolators[1].w_intk)) * F * diagm(sqrt.(interpolators[2].w_intk))

            # X
            (QFx, RFx, pFx) = qr(Fw', Val{true})
            nPx = cut_spectrum_l2(abs.(diag(RFx)), tolQR * tol, bypasssort=true)
            
            # Y
            (QFy, RFy, pFy) = qr(Fw[pFx[1:nPx],:], Val{true})
            nPy = cut_spectrum_l2(abs.(diag(RFy)), tolQR * tol, bypasssort=true)

            # Some analysis
            errX = NaN
            errY = NaN
            if extraDiags   
                errX = norm(QFx[:,1:nPx] * RFx[1:nPx,:] - Fw'[:,pFx])/norm(Fw) 
                errY = norm(QFy[:,1:nPy] * RFy[1:nPy,:] - Fw[pFx[1:nPx],pFy])/norm(Fw[pFx[1:nPx],:])
                (UFw, SFx, VFw) = svd(Fw)
                diags["errX"] = errX
                diags["errY"] = errY
                diags["Fw"] = Fw
                diags["Fw_svds"] = SFx
                diags["RFx"] = RFx
                diags["RFy"] = RFy
                diags["QFx"] = QFx
                diags["QFy"] = QFy
                diags["pFx"] = pFx
                diags["pFy"] = pFy 
                diags["nPx"] = nPx
                diags["nPy"] = nPy
                diags["Fw_wx"] = interpolators[1].w_intk
                diags["Fw_wy"] = interpolators[2].w_intk
            end
                
            @assert nPx > 0
            @assert nPy > 0

            if logLevel >= SI.verbose
                @printf "Originally nPx = %d, nPy = %d (err %e, %e - don't worry if you see NaNs here) \n" nPx nPy errX errY
            end

            nP = max(nPx, nPy)
            nPx = nP
            nPy = nP
            
            if extraDiags
                diags["wx"] = interpolators[1].w_intk[pFx[1:nPx]]
                diags["wy"] = interpolators[2].w_intk[pFy[1:nPy]]
            end

            #
            # Build Xhat and Yhat
            #
            evalPts1Extra = RtoX(b1,interpolators[1].xk[:,pFx[1:nPx]])
            evalPts2Extra = RtoX(b2,interpolators[2].xk[:,pFy[1:nPy]])
           
            if logLevel >= SI.info
                @printf "Recompressing from rank %d to %d using Skeletonized Interpolation\n" minimum(size(Fw)) nP
            end
            
            t_qr = time_ns()
            diags["time_qr_ns"] = t_qr - t_eval

            #
            # Build K(X, Yhat), K(Xhat, Y)
            # Factor K(Xhat, Yhat)
            #
            U = meshKernelFull(kernel, evalPts1, evalPts2Extra)
            S = lufact(meshKernelFull(kernel, evalPts1Extra, evalPts2Extra)) # Better than inv of course, but supports same ops
            V = meshKernelFull(kernel, evalPts1Extra, evalPts2)'

            t_lu = time_ns()
            diags["time_lu_ns"] = t_lu - t_qr

            #
            # That's it - as promised :)
            #
            # In this case, the factorization is (U * Sinv * V') = U * (S \ V')
            return (U, S, V, evalPts1Extra, evalPts2Extra, Ub, F, Vb, interpolators[1], interpolators[2], diags)

        else

            #
            # In the usual method we are given K(X, Y) ~= Ub * K(Xbar, Ybar) * Vb'
            # that we further recompress
            #
            tolSVD = 0.1
            if logLevel >= SI.debug
                @printf "[Info] Using tolSVD = %e\n" tolSVD
            end
            # 4. Recompress
            (Qu,Ru) = qr(Ub)
            (Qv,Rv) = qr(Vb)
            (Us,Ss,Vs) = svd(Ru*F*Rv')
            idxToKeep = find(x -> x >= Ss[1]*tol*tolSVD, Ss)
            Us = Us[:,idxToKeep]
            Ss = Ss[idxToKeep]
            Vs = Vs[:,idxToKeep]
            if logLevel >= SI.info
                @printf "Recompression from rank %d to %d using classical method and SVD\n" minimum(size(F)) minimum(size(Ss)) 
            end
            U = Qu*Us
            V = Qv*Vs
            
            t_svd = time_ns()
            diags["time_svd_ns"] = t_svd - t_eval
            return (U, diagm(Ss), V, RtoX(b1,interpolators[1].xk), RtoX(b2,interpolators[2].xk), Ub, F, Vb, interpolators[1], interpolators[2], diags)

        end

    end

    # We're all done

end
