function fastMeshKernelLowRankApprox(kernel, Ix::Array{Interval{Float64},1}, Iy::Array{Interval{Float64},1}, X::Array{Float64, 2}, Y::Array{Float64, 2}, tol::Float64 ; ruletype::RULETYPE=SI.chebyshev, tolInterp = 1.0, tolQR = 0.5, logLevel=SI.verbose, refx=nothing, refy=nothing, nRandom = 50)

    Is = [Ix, Iy]
    dims = [0,0]
    for xy = 1:2
        dims[xy] = length(Is[xy])
    end
    if refx == nothing || refy == nothing
        mids = [zeros(dims[1]), zeros(dims[2])]
        for xy = 1:2
            for d = 1:dims[xy]
                mids[xy][d] = Is[xy][d].mid
            end
        end
        refx = mids[1]
        refy = mids[2]
    end
    maxOrder = 40
    orders = [zeros(Int64, dims[1]), zeros(Int64, dims[2])]

    # Get a rough estimate of the order
    for xy = 1:2 
        for d = 1:dims[xy]
            orders[xy][d] = 1
            err = Inf
            while true
                # Interpolate
                # Compute candidate points on [a b]
                # xy_array = reshape(linspace(Is[xy][d].a, Is[xy][d].b, 10), (1, 10))
                xy_array = Is[xy][d].a+(Is[xy][d].b-Is[xy][d].a)*rand(nRandom)
                sort!(xy_array)
                xy_array = reshape(xy_array, (1, nRandom))
                I = Is[xy][d]
                interp = TensorInterpolator{Float64}([I], [orders[xy][d]], ruletype)
                function kerneld(xy_in) # xy_in is [dim=1 x #pts]
                    @assert size(xy_in)[1] == 1
                    n = size(xy_in)[2]
                    if xy == 1
                        x = vcat(refx[1:d-1] * ones((1,n)) , xy_in , refx[d+1:end] * ones((1,n)))
                        y = refy * ones((1,n))
                    else
                        x = refx * ones((1,n))
                        y = vcat(refy[1:d-1] * ones((1,n)) , xy_in , refy[d+1:end] * ones((1,n)))
                    end
                    return kernel(x,y)
                end
                # Compute interpolation
                fapp = interpolate(interp, xy_array, kerneld)
                # Compute true value
                ftrue = kerneld(xy_array)
                # Compute error
                err = maximum(abs.(fapp - ftrue))/maximum(abs.(ftrue))
                if logLevel >= SI.debug
                    if xy == 1
                        @printf "x[%d] order %d nptsx %d error %e <?> %e\n" d orders[xy][d] size(interp.xk)[2] err (tolInterp * tol)
                    else
                        @printf "y[%d] order %d nptsy %d error %e <?> %e\n" d orders[xy][d] size(interp.xk)[2] err (tolInterp * tol)
                    end
                end
                # Decide what to do
                if err <= (tolInterp * tol)
                    print_log("Tolerance reached.", logLevel, SI.debug)
                    break
                end
                if orders[xy][d] >= maxOrder
                    warn("Max order reached.")
                    break
                end
                orders[xy][d] += 1
            end
        end
    end

    # Get Xbar, Ybar
    interpolators = Array{Any, 1}(2)
    for xy = 1:2
        interpolators[xy] = TensorInterpolator{Float64}(Is[xy], orders[xy], ruletype)
    end
    
    Xbar = interpolators[1].xk
    Ybar = interpolators[2].xk
    Wx   = interpolators[1].w_intk
    Wy   = interpolators[2].w_intk

    # Get Xhat, Yhat
    F = meshKernelFull(kernel, Xbar, Ybar)
    Fw = diagm(sqrt.(Wx)) * F * diagm(sqrt.(Wy))

    (QFx, RFx, pFx) = qr(Fw', Val{true})
    nPx = cut_spectrum_l2(abs.(diag(RFx)), tolQR * tol, bypasssort=true)
    
    (QFy, RFy, pFy) = qr(Fw, Val{true})
    nPy = cut_spectrum_l2(abs.(diag(RFy)), tolQR * tol, bypasssort=true)

    nP = max(nPx, nPy)
    nPx = nP
    nPy = nP
   
    if logLevel >= SI.info
        @printf "RRQR compressed Xbar, Ybar from rank %d to %d\n" min(size(Xbar,2),size(Ybar,2)) nP
    end
        
    Xhat = copy(Xbar[:,pFx[1:nPx]])
    Yhat = copy(Ybar[:,pFy[1:nPy]])

    @assert size(Xhat,2) == nP
    @assert size(Yhat,2) == nP
       
    U = meshKernelFull(kernel, X, Yhat)
    Khat = meshKernelFull(kernel, Xhat, Yhat)
    S = lufact(Khat) # Better than inv of course, but supports same ops
    V = meshKernelFull(kernel, Xhat, Y)'
  
    # Final re-compression
    L = U
    R = S\(V')

    Lqr = qrfact(L)
    Ql = Lqr[:Q]
    Rl = Lqr[:R]
    Rqr = qrfact(R')
    Qr = Rqr[:Q]
    Rr = Rqr[:R]
    USV = svdfact(Rl*Rr')
    Ul = USV[:U]
    Slr = USV[:S]
    Vr = USV[:V]
    U2 = Ql*Ul
    S2 = Slr
    V2 = Qr*Vr
    nKeep = SI.cut_spectrum_fro(Slr, tol)
    if logLevel >= SI.info
        @printf "SVD compressed Xhat, Yhat from rank %d to %d\n" nP nKeep
    end

    Ucomp = U2[:,1:nKeep] 
    Scomp = diagm(S2[1:nKeep]) 
    Vcomp = V2[:,1:nKeep]

    # Factorization is U*(S\V') (uncompressed)
    # or Ucomp * Scomp * Vcomp' (compressed)
    return (U, S, V, Xhat, Yhat, Xbar, Ybar, Ucomp, Scomp, Vcomp)

end
