#include<iostream>
#include<fstream>
//#include<sstream>
#include<string>
#include<stdlib.h>
#include<vector>
#include<random>
#include<algorithm>
#include<utility>

#define M 860022
#define N 9793
#define thinN 979
#define testSize 1000
const int numSubsets = 1;
const int subsets[numSubsets] = {M-testSize};
const int numHs = 6;
const int hValues[numHs] = {2, 3, 4, 5, 8, 16};


//-----------------------------------------------------------------------------
//source for time: https://stackoverflow.com/questions/17432502/how-can-i-measure-cpu-time-and-wall-clock-time-on-both-linux-windows
#include<time.h>
#include<sys/time.h>
double get_wall_time(){
    struct timeval time;
    if (gettimeofday(&time,NULL)){
        //  Handle error
        std::cerr<< "ERROR reading real time\n";
    }
    return (double)time.tv_sec + (double)time.tv_usec * .000001;
}
double get_cpu_time(){
    return (double)clock() / CLOCKS_PER_SEC;
}
//-----------------------------------------------------------------------------

void ReadVCF(const char *, bool[][N]);
//void Randomize(int[], int);
//void Randomize(bool[], int);
void PBWT(bool[][N], int[][N+1], int [][N+1], int[][N], int [][N],int [], int);

void ReadVCF(const char* inFile, bool panel[][N]){
    using namespace std;
    string line = "1", z = "1";
    ifstream in(inFile);
    if(!in.is_open()){
        std::cout << "Panel VCF file failed to open!\n";
        exit(0);
    }

    while (line[0] != '#' || line[1] != 'C')
        getline(in, line);
    for(int j = 0; j<N; ++j){
        in >> z >> z >> z
            >> z >> z >> z
            >> z >> z >> z;
        getline(in, line);
        int k = 1;
        for (int i = 0; i<M; ++i){
            panel[i][j] = (line[k] == '1');
            k += 2;
        }
    }
    in.close();
}

/*void Randomize(int order[], int num){
    std::random_device seed;
    int s = seed();
    std::cout << "seed:" << s << std::endl;
    std::default_random_engine generator(s);
    for (int i = num-1; i>0; --i){
        std::uniform_int_distribution<int> dis(0, i);
        std::swap(order[i], order[dis(generator)]);
    }
}

void Randomize(bool array[], int num){
    std::random_device seed;
    int s = seed();
    std::cout << "seed:" << s << std::endl;
    std::default_random_engine generator(s);
    for (int i = num-1; i>0; --i){
        std::uniform_int_distribution<int> dis(0, i);
        std::swap(array[i], array[dis(generator)]);
    }
}*/

void PBWT(bool panel[][thinN], int prefix[][thinN+1], int divergence[][thinN+1],
        int u[][thinN], int v[][thinN], int order [], int num){
    for (int i = 0; i<num; ++i){
        prefix[i][0] = order[i];
        divergence[i][0] = 0;
    }
    for (int k = 0; k<thinN; ++k){
        int u2 = 0, v2 = 0, p = k+1, q = k+1;
        std::vector<int> a,b,d,e;
        for (int i = 0; i<num; ++i){
            u[i][k] = u2;
            v[i][k] = v2;
            if (divergence[i][k] > p) { p = divergence[i][k];}
            if (divergence[i][k] > q) { q = divergence[i][k];}
            if (!panel[prefix[i][k]][k]){
                a.push_back(prefix[i][k]);
                d.push_back(p);
                ++u2;
                p = 0;
            }
            else{
                b.push_back(prefix[i][k]);
                e.push_back(q);
                ++v2;
                q = 0;
            }
        }
        for (int i = 0; i<num; ++i){
            v[i][k] += a.size();
            if (i < a.size()){
                prefix[i][k+1] = a[i];
                divergence[i][k+1] = d[i];
            }
            else{
                prefix[i][k+1] = b[i-a.size()];
                divergence[i][k+1] = e[i-a.size()];
            }
        }
    }
}

void readBeagleVCF(const char * inFile, bool panel [][N]){
    using namespace std;
    string line = "1", z = "1";
    ifstream in(inFile);
    if(!in.is_open()){
        std::cout << "Panel VCF file failed to open!\n";
        exit(0);
    }

    while (line[0] != '#' || line[1] != 'C')
        getline(in, line);
    for(int j = 0; j<N; ++j){
        in >> z >> z >> z
            >> z >> z >> z
            >> z >> z >> z;
        char c = 0, d = 0, e = 0;
        for (int i = 0; i<testSize; ++i){
            in >> c >> d >> e >> line;
            panel[i][j] = c == '1';
            panel[++i][j] = e == '1';
        }
    }
    in.close();
}

void checkBeagleCorrectness(bool beagleImputedCorrectly[][N], bool lowRes[], int order[], bool panel[][N]){
    bool beaglePanel[testSize][N];
    readBeagleVCF("BGL.vcf", beaglePanel);
    unsigned long long total = 0, correct = 0;
    for (int i = 0; i<testSize; ++i){
        for (int j = 0; j<N; ++j){
            beagleImputedCorrectly[i][j] = (!lowRes[j] && panel[order[M-testSize+i]][j] == beaglePanel[i][j]);
            correct += beagleImputedCorrectly[i][j];
            total += !lowRes[j];
        }
    }
    std::cout << "smooth Beagle overall: " << correct << '/' << total << std::endl;
}

void imputationBenchmark(bool panel[][N], bool smoothlowResPanel[][thinN], int prefix[][thinN+1],
		int divergence[][thinN+1], int u[][thinN], int v[][thinN], bool lowRes[],
        int highLowMap[], int lowHighMap[], const int subsets[]){

    int order[M];
    for (int y = 0; y<M; ++y){
        order[y] = y;
    }
    std::ifstream hapsIn("hapsToImpute");
    if (!hapsIn.is_open()){
        std::cout << "Haplotypes to impute file failed to open!\n";
        exit(1);
    }
    unsigned long long count = 0, hap = 0;
    while (hapsIn >> hap && ++count)
        std::swap(order[M-testSize+count-1], order[hap]);
    hapsIn.close();
    if (count != testSize){
        std::cout << "Found " << count << " haplotypes in haplotypes to impute file, expected " << testSize << ".\n";
        exit(1);
    }
    std::sort(order, order + M - testSize);
    std::sort(order+M-testSize, order+M);

    //beagleImputedCorrectly[i][j] stores whether the jth site for haplotype order[M-testSize+i] was imputed correctly (false if not imputed)
    bool beagleImputedCorrectly[testSize][N];
    checkBeagleCorrectness(beagleImputedCorrectly, lowRes, order, panel);
    //0: imputed correctly
    //1: imputed incorrectly
    //2: failed to impute due to no MPSC
    //3: failed to impute due to non unanimous vote
    //4: leftover
    //5: Beagle imputed correctly
    //6: Beagle imputed incorrectly
    //7: Beagle not counted due to MPSC failing to impute
    //8: Beagle leftover
    int imputed[9]    = {0,0,0,0,(N-thinN)*testSize,0,0,0,(N-thinN)*testSize}; //lengthMAXMPSC
    int imputedSol[9] = {0,0,0,0,(N-thinN)*testSize,0,0,0,(N-thinN)*testSize}; //solution space
    int imputedH[numHs][9];
    for (int i = 0; i<numHs; ++i){
        imputedH[i][0] = imputedH[i][1] = imputedH[i][2] = imputedH[i][3] = 0;
        imputedH[i][4] = (N-thinN)*testSize;
        imputedH[i][5] = imputedH[i][6] = imputedH[i][7] = 0;
        imputedH[i][8] = (N-thinN)*testSize;
    }

    double oldtime, oldcputime;
    oldtime = get_wall_time();
    oldcputime = get_cpu_time();
    PBWT(smoothlowResPanel, prefix, divergence, u, v, order, M-testSize);
    std::cout << "Building PBWT took " << get_cpu_time() - oldcputime << " CPU seconds and " << get_wall_time()-oldtime << " real seconds"<<std::endl;

    for (int q = 0; q < testSize; ++q){
        oldtime = get_wall_time();
        oldcputime = get_cpu_time();
        //get length maximal MPSC
        int Oid = order[M - testSize + q]; 
        int t[thinN+1];
        t[0] = 0;
        for (int i = 0; i<thinN; ++i)
            if (t[i] != M-testSize)
                t[i+1] = (smoothlowResPanel[Oid][i]) ? v[t[i]][i] : u[t[i]][i];
            else
                t[i+1] = (smoothlowResPanel[Oid][i]) ? M-testSize : v[0][i];

        int zd[thinN+2], bd[thinN+2], mind[thinN+2];
        zd[thinN+1] = bd[thinN+1] = mind[thinN+1] = thinN;
        std::vector<int> setMaxStart, setMaxEnd;
        for (int j = thinN; j>=0; --j){
            zd[j] = std::min(zd[j+1], j);
            bd[j] = std::min(bd[j+1], j);
            if (t[j] != 0)
                while (zd[j] && smoothlowResPanel[Oid][zd[j]-1] == smoothlowResPanel[prefix[t[j]-1][j]][zd[j]-1])
                    --zd[j];
            if (t[j] != M-testSize)
                while (bd[j] && smoothlowResPanel[Oid][bd[j]-1] == smoothlowResPanel[prefix[t[j]  ][j]][bd[j]-1])
                    --bd[j];

            mind[j] = std::min (zd[j], bd[j]);
            if (mind[j] < mind[j+1]){
                setMaxStart.push_back(mind[j]);
                setMaxEnd.push_back(j-1);
            }
        }

        //get leftmost mpsc
        std::vector<int> l; 
        int k = mind[thinN], oldk = thinN;
        while (k != oldk){
            l.push_back(k);
            oldk = k;
            k = mind[k];
        }
        if (k != 0){
            std::cout << Oid << ":No MPSC in test " << 0 << "\n";
            imputed[2] += N-thinN;
            imputed[7] += N-thinN;
            imputed[4] -= N-thinN;
            imputed[8] -= N-thinN;
            imputedSol[2] += N-thinN;
            imputedSol[7] += N-thinN;
            imputedSol[4] -= N-thinN;
            imputedSol[8] -= N-thinN;
            for (int i = 0; i<numHs; ++i){
                imputedH[i][2] += N-thinN;
                imputedH[i][7] += N-thinN;
                imputedH[i][4] -= N-thinN;
                imputedH[i][8] -= N-thinN;
            }
            //no MPSC exists of this haplotype
            //skip all other benchmarks

            //output failure in output files
            continue;
        }

        //get rightmost MPSC
        //b is the opposite of the divergence array
        //b[i] is the end of the longest match starting at position i, i-1 if none
        //mind[i] is the start of the longest match ending at position i-1, i if none
        int *b = new int[thinN];
        //calculation of b array, d is first site calculated so far
        //k is current site
        int d = thinN; 
        k = thinN;
        while (d != 0){
            while (mind[k] < d)
                b[--d] = k-1;
            --k;
        }

        //ending point of i th positional substring in rightmost MPSC
        std::vector<int> r;
        //k = first site not covered so far
        k = 0;
        while (k != thinN){
            r.push_back(b[k]);
            k = b[k]+1;
        }

        //length maximal MPSC output
        int mpscSize = r.size(),
            numSMMs = setMaxStart.size();
        std::vector<int> *SMMContains = new std::vector<int>[mpscSize];
        if (numSMMs == 1){
            SMMContains[0].push_back(0);
        }
        else{
            //identifying set maximal matches that fully contain each fragile region
            //SMMContains[i] is an array of the set maximal matches that contain the i-th required region
            //SMMContains[i] is sorted by first site of each set maximal match
            //the 0-th and |C|-1 th required regions are only fully contained by the first and last set maximal
            //matches because they are the only SMMs that contain sites 0 and benchmarkN-1 respectively
            SMMContains[0].push_back(0);
            SMMContains[mpscSize-1].push_back(numSMMs-1);
            //NOTE: REMEMBER, setMaxStart[i] is the start of the numSMMs-i-1 th set maximal match
            int currentRegion = 1;
            int currRegStart = r[currentRegion-1]+1;
            int currRegEnd = l[mpscSize-currentRegion-2]-1;
            //std::cout << setMaxStart[1] << ',' << setMaxEnd[1] << std::endl;
            for (int j = numSMMs-2; j>0; --j){
                //j is the index of the i-th SMM in setMaxStart for i = 1 to numSMMs-2. i = numSMMs-j-1
                //l[mpscSize-currentRegion-2]-1 is the last site in the currentRegion required region
                //r[currentRegion-1]+1 is the first site in the currentRegion required region
                while(currRegStart < setMaxStart[j]){
                    ++currentRegion;
                    if (currentRegion > mpscSize-2)
                        break;
                    //if currentRegion = 0, start = 0
                    currRegStart = r[currentRegion-1]+1;
                    currRegEnd = l[mpscSize-currentRegion-2]-1;
                    //std::cout << "Region " << currentRegion << ", j:" << j << std::endl;
                }
                if (currentRegion > mpscSize-2)
                    break;
                if (setMaxEnd[j] >= currRegEnd)
                    SMMContains[currentRegion].push_back(numSMMs-j-1);
            }
        }
        //holds length of longest path from node to sink
        int **mpscLength = new int*[mpscSize];
        for (int j = 0; j<mpscSize; ++j)
            mpscLength[j] = new int[SMMContains[j].size()];
        //calculate lengths
        //length of longest path of last SMM is the length of the last SMM
        mpscLength[mpscSize-1][0] = setMaxEnd[0] - setMaxStart[0] + 1;
        for (int j = mpscSize-2; j>=0; --j){
            //calculate length of longest paths of j th required region
            //using lengths of longest paths of j+1 th required region
            //prevMax is max of lengths of paths of nodes 0 through prevMaxIndex (inclusive)
            int prevMax = -1, prevMaxIndex = -1;
            for (int k = 0; k<SMMContains[j].size(); ++k){
                while (prevMaxIndex+1 < SMMContains[j+1].size() && 
                        setMaxEnd[numSMMs - SMMContains[j][k] - 1] >= 
                        setMaxStart[numSMMs - SMMContains[j+1][prevMaxIndex+1] - 1]-1)
                    if (mpscLength[j+1][++prevMaxIndex] > prevMax){
                        prevMax = mpscLength[j+1][prevMaxIndex];
                    }
                mpscLength[j][k] = prevMax + setMaxEnd[numSMMs - SMMContains[j][k] - 1] - setMaxStart[numSMMs - SMMContains[j][k] - 1] + 1;
            }
        }
        //mpscLength[0][0] now contains the length of the length maximal MPSC
        int *lengthMAXMPSC = new int[mpscSize];
        lengthMAXMPSC[0] = 0;
        int max = mpscLength[0][0] - 
            (setMaxEnd[numSMMs - 1] - setMaxStart[numSMMs - 1] + 1);
        //backtracking step to find length max mpsc
        for (int j = 1; j < mpscSize; ++j){
            for (int k = 0; k<SMMContains[j].size(); ++k)
                if (mpscLength[j][k] == max){
                    lengthMAXMPSC[j] = SMMContains[j][k];
                    max = mpscLength[j][k] - 
                        (setMaxEnd[numSMMs - SMMContains[j][k] - 1] - setMaxStart[numSMMs - SMMContains[j][k] - 1] + 1);
                    break;
                }
        }


        int votes0[N], votes1[N];   //stores vote counts, -1 if not to be imputed 
        for (int i = 0; i<N; ++i)
            votes0[i] = votes1[i] = (lowRes[i])? -1 : 0; 
        //firstAdj is first site that has current posiitonal substring adjacent to it in full resolution (sites to be imputed don't count towards adjacency)
        //lastAdj is first site firstAdj that has current positional substring not adjacent to it in full resolution
        //equivalently, lastAdj is first site after current positional substring in low resolution in full resolution
        for (int j = 0; j<mpscSize; ++j){
            int firstAdj = 0;
            if (j > 0)
                firstAdj = lowHighMap[setMaxStart[numSMMs - lengthMAXMPSC[j] - 1] - 1] + 1; 
            int lastAdj =  N; 
            if (j < mpscSize - 1)
                lastAdj = lowHighMap[setMaxEnd[numSMMs - lengthMAXMPSC[j] - 1] + 1];

            bool hasSiteToImpute = false;
            for (int i = firstAdj; i < lastAdj; ++i){
                if (!lowRes[i]) {
                    hasSiteToImpute = true;
                    break;
                }
            }
            if (hasSiteToImpute){
                //get block
                int pbwtCol = setMaxEnd[numSMMs - lengthMAXMPSC[j] - 1] + 1;
                int div = setMaxStart[numSMMs - lengthMAXMPSC[j] - 1]; 
                int blockStart, blockEnd; //block is [blockStart, blockEnd)
                blockStart =  blockEnd = t[pbwtCol];
                //get block
                if (zd[pbwtCol] == div){
                    --blockStart;
                    while (blockStart > 0 && divergence[blockStart][pbwtCol] <= div)
                        --blockStart;
                }
                if (bd[pbwtCol] == div){
                    ++blockEnd;
                    while (blockEnd < M-testSize && divergence[blockEnd][pbwtCol] <= div)
                        ++blockEnd;
                }

                for (int i = firstAdj; i < lastAdj; ++i){
                    if (!lowRes[i]) {
                        for (int k = blockStart; k<blockEnd; ++k)
                            (panel[prefix[k][pbwtCol]][i]) ? (++votes1[i]) : (++votes0[i]);
                    }
                }
            }
        }

        //calculate numPathToEnd
        long double **numPathsToEnd = new long double*[mpscSize],
        **numPathsFromBegin = new long double*[mpscSize];
        for (int j = 0; j<mpscSize; ++j){
            numPathsToEnd[j] = new long double[SMMContains[j].size()];
            numPathsFromBegin[j] = new long double[SMMContains[j].size()];
        }

        numPathsToEnd[mpscSize-1][0] = 1;
        for (int j = mpscSize-2; j>=0; --j){
            //calculate numPathsToEnd for j th required region
            //use numPathsToEnd of j+1 th required region
            //prevNumPaths is numPathsToEnd of previous node evaluated
            //prevMaxIndex is index of last node in j+1 th region that has an edge to it from the previous evaluated node
            long double prevNumPaths = 0;
            int prevMaxIndex = -1;
            for (int k = 0; k<SMMContains[j].size(); ++k){
                while (prevMaxIndex+1 <SMMContains[j+1].size() &&
                        setMaxEnd[numSMMs - SMMContains[j][k] - 1] >= 
                        setMaxStart[numSMMs - SMMContains[j+1][prevMaxIndex+1] - 1]-1)
                    prevNumPaths += numPathsToEnd[j+1][++prevMaxIndex];
                numPathsToEnd[j][k] = prevNumPaths;
            }
        }

        numPathsFromBegin[0][0] = 1;
        for (int j = 1; j<mpscSize; ++j){
            //calculate numPathsFromBegin for j th required region
            //use numPathsFromBegin for j-1 th required region
            //prevNumPaths is numPathsFromBegin of previous node evaluated
            //prevMaxIndex is index of first node in j-1 th region that has an edge from it to the previous evaluated node
            long double prevNumPaths = 0;
            int prevMaxIndex = SMMContains[j-1].size();
            for (int k = SMMContains[j].size()-1; k>=0; --k){
                while (prevMaxIndex-1 >= 0 &&
                        setMaxStart[numSMMs - SMMContains[j][k] - 1] <=
                        setMaxEnd[numSMMs - SMMContains[j-1][prevMaxIndex-1] -1]+1)
                    prevNumPaths += numPathsFromBegin[j-1][--prevMaxIndex];
                numPathsFromBegin[j][k] = prevNumPaths;
            }
        }

        long double *ratioSetMaxMPSCsContaining = new long double[numSMMs];
        for (int i = 0; i<mpscSize; ++i)
            for (int j = 0; j<SMMContains[i].size(); ++j)
                ratioSetMaxMPSCsContaining[SMMContains[i][j]] = 
                    (numPathsFromBegin[i][j]/numPathsToEnd[0][0])*numPathsToEnd[i][j];

        long double votes0Sol[N], votes1Sol[N];
        for (int i = 0; i<N; ++i)
            votes0Sol[i] = votes1Sol[i] = (lowRes[i])? -1 : 0;
        //firstAdj is first site that has current set maximal match adjacent to it in full resolution (sites to be imputed don't count towards adjacency)
        //lastAdj is first site firstAdj that has current set maximal match not adjacent to it in full resolution
        //equivalently, lastAdj is first site after current positional substring in low resolution in full resolution
        for (int j = 0 ; j<numSMMs; ++j){
            int firstAdj = 0;
            if (j > 0)
                firstAdj = lowHighMap[setMaxStart[numSMMs - j - 1] - 1] + 1; 
            int lastAdj =  N; 
            if (j < numSMMs - 1)
                lastAdj = lowHighMap[setMaxEnd[numSMMs - j - 1] + 1];

            bool hasSiteToImpute = false;
            for (int i = firstAdj; i < lastAdj; ++i){
                if (!lowRes[i]) {
                    hasSiteToImpute = true;
                    break;
                }
            }

            if (hasSiteToImpute){
                //get block
                int pbwtCol = setMaxEnd[numSMMs - j - 1] + 1;
                int div = setMaxStart[numSMMs - j - 1]; 
                int blockStart, blockEnd; //block is [blockStart, blockEnd)
                blockStart =  blockEnd = t[pbwtCol];
                //get block
                if (zd[pbwtCol] == div){
                    --blockStart;
                    while (blockStart > 0 && divergence[blockStart][pbwtCol] <= div)
                        --blockStart;
                }
                if (bd[pbwtCol] == div){
                    ++blockEnd;
                    while (blockEnd < M && divergence[blockEnd][pbwtCol] <= div)
                        ++blockEnd;
                }

                for (int i = firstAdj; i < lastAdj; ++i){
                    if (!lowRes[i]) {
                        int v0 = 0, v1 = 0;
                        for (int k = blockStart; k<blockEnd; ++k)
                            (panel[prefix[k][pbwtCol]][i]) ? (++v1) : (++v0);
                        votes0Sol[i] += ratioSetMaxMPSCsContaining[j]*v0;
                        votes1Sol[i] += ratioSetMaxMPSCsContaining[j]*v1;
                    }
                }
            }
        }


        //impute with h-MPSC
        for (int nH = 0; nH < numHs; ++nH){
            std::vector<int> hMatchStart;
            //obtain h-MPSC
            //at the start of the loop, we have the block of haplotypes in 
            //prefix[][j] that match z on [i, j)
            //the block is stored by [f,g), i<= j, f<=g
            //all haplotypes match empty strings (i = j)
            //we obtain the block of sequences that match [i, j+1) in
            //prefix[][j+1], denoted by [fp, gp)
            //if fp gp block doesn't have h haplotypes, we add [i, j) to the h-MPSC
            //if it is not an empty string
            //if it is an empty string, no h-MPSC exists since there are less than
            //h haplotypes with the same allele as z at position i
            int f = 0, g = M-testSize;
            int fp = f, gp = g;
            int i = 0;
            bool exists = true;
            for (int j = 0; j <= thinN; ++j){
                if (j == thinN){
                    fp = gp = 0;
                }
                else{
                    if  (f != M-testSize)
                        fp = (smoothlowResPanel[Oid][j]) ? v[f][j] : u[f][j];
                    else
                        fp = (smoothlowResPanel[Oid][j]) ? M-testSize : v[0][j];
                    if (g != M-testSize)
                        gp = (smoothlowResPanel[Oid][j]) ? v[g][j] : u[g][j];
                    else
                        gp = (smoothlowResPanel[Oid][j]) ? M-testSize : v[0][j];
                }

                if (gp - fp < hValues[nH]){
                    if (j - i < 1){
                        exists = false;
                        std::cout << Oid << ":No " << hValues[nH] 
                            << "-MPSC in test " << 0 << "\n";
                        imputedH[nH][2] += N-thinN;
                        imputedH[nH][7] += N-thinN;
                        imputedH[nH][4] -= N-thinN;
                        imputedH[nH][8] -= N-thinN;
                        //no h-MPSC exists of this haplotype
                        break;
                    }
                    hMatchStart.push_back(i);
                    i = j;
                    if (j != thinN)
                        --j;
                    f = 0;
                    g = M-testSize;
                }
                else{
                    f = fp;
                    g = gp;
                }
            }
            if (exists){
                int numHmatch = hMatchStart.size();
                int votes0h[N], votes1h[N];
                for (int i = 0; i<N; ++i)
                    votes0h[i] = votes1h[i] = (lowRes[i]) ? -1 : 0; 
                //firstAdj is first site that has current posiitonal substring adjacent to it in full resolution (sites to be imputed don't count towards adjacency)
                //lastAdj is first site firstAdj that has current positional substring not adjacent to it in full resolution
                //equivalently, lastAdj is first site after current positional substring in low resolution in full resolution
                for (int i = 0; i < numHmatch; ++i){
                    //i-th positional substring is 
                    //[hMatchStart[i], hMatchStart[i + 1]-1]
                    //for 0 <= i <= numHmatch - 2
                    //for i = numHmatch - 1: [hMatchStart[i], thinN-1]
                    int firstAdj = 0;
                    if (i > 0)
                        firstAdj = lowHighMap[hMatchStart[i]-1]+1;
                    int lastAdj = N;
                    if (i < numHmatch - 1)
                        lastAdj = lowHighMap[hMatchStart[i+1]];

                    bool hasSiteToImpute = false;
                    for (int j = firstAdj; j < lastAdj; ++j){
                        if (!lowRes[j]) {
                            hasSiteToImpute = true;
                            break;
                        }
                    }
                    if (hasSiteToImpute){
                        //get block
                        int pbwtCol = (i == numHmatch - 1) ? thinN : hMatchStart[i+1];
                        int div = hMatchStart[i];
                        int blockStart, blockEnd; //block is [blockStart, blockEnd)
                        blockStart =  blockEnd = t[pbwtCol];
                        //get block
                        if (zd[pbwtCol] <= div){
                            --blockStart;
                            while (blockStart > 0 && divergence[blockStart][pbwtCol] <= div)
                                --blockStart;
                        }
                        if (bd[pbwtCol] <= div){
                            ++blockEnd;
                            while (blockEnd < M-testSize && divergence[blockEnd][pbwtCol] <= div)
                                ++blockEnd;
                        }

                        for (int j = firstAdj; j < lastAdj; ++j){
                            if (!lowRes[j]) {
                                for (int k = blockStart; k<blockEnd; ++k)
                                    (panel[prefix[k][pbwtCol]][j]) ? (++votes1h[j]) : (++votes0h[j]);
                            }
                        }
                    }
                }
                for (int i = 0; i<N; ++i){
                    if (!lowRes[i]){
                        --imputedH[nH][4];
                        --imputedH[nH][8];
                        if (votes0h[i] != 0 && votes1h[i] != 0){
                            ++imputedH[nH][3];
                            ++imputedH[nH][7];
                            //std::cout << "votes 0: " << votes0h[i] << "\tvotes 1: " << votes1h[i] << std::endl;
                        }
                        else{
                            imputedH[nH][0] += ((votes1h[i] > 0) == (panel[Oid][i]));
                            imputedH[nH][1] += ((votes1h[i] > 0) != (panel[Oid][i]));
                            imputedH[nH][5] += beagleImputedCorrectly[q][i];
                            imputedH[nH][6] += !beagleImputedCorrectly[q][i];
                        }
                    }
                }
            }
        }

        oldcputime = get_cpu_time() - oldcputime;
        oldtime = get_wall_time() - oldtime;

        for (int i = 0; i<N; ++i){
            if (!lowRes[i]){
                --imputed[4];
                --imputed[8];
                if ((votes0[i]!= 0) && (votes1[i] != 0))
                    ++imputed[3], ++imputed[7];
                else{ 
                    imputed[0] += ((votes1[i] > 0) == (panel[Oid][i]));
                    imputed[1] += ((votes1[i] > 0) != (panel[Oid][i]));
                    imputed[5] += beagleImputedCorrectly[q][i];
                    imputed[6] += !beagleImputedCorrectly[q][i];
                }
            }
        }
        for (int i = 0; i<N; ++i){
            if (!lowRes[i]){
                --imputedSol[4];
                --imputedSol[8];
                if (votes0Sol[i] != 0 && votes1Sol[i] != 0)
                    ++imputedSol[3], ++imputedSol[7];
                else{
                    imputedSol[0] += ((votes1Sol[i] > 0) == (panel[Oid][i]));
                    imputedSol[1] += ((votes1Sol[i] > 0) != (panel[Oid][i]));
                    imputedSol[5] += beagleImputedCorrectly[q][i];
                    imputedSol[6] += !beagleImputedCorrectly[q][i];
                }
            }
        }
    }
    std::cout << "Length MAX Impute:\t" << imputed[0] << '\t'
        << imputed[1] << '\t'
        << imputed[2] << '\t'
        << imputed[3] << '\t'
        << imputed[4] << '\t'
        << imputed[5] << '\t'
        << imputed[6] << '\t'
        << imputed[7] << '\t'
        << imputed[8] << '\n';
    std::cout << "Sol Space Impute:\t" << imputedSol[0] << '\t'
        << imputedSol[1] << '\t'
        << imputedSol[2] << '\t'
        << imputedSol[3] << '\t'
        << imputedSol[4] << '\t'
        << imputedSol[5] << '\t'
        << imputedSol[6] << '\t'
        << imputedSol[7] << '\t'
        << imputedSol[8] << '\n';
    for (int i = 0; i<numHs; ++i)
        std::cout << hValues[i] << "-MPSC Space Impute:\t" 
            << imputedH[i][0] << '\t'
            << imputedH[i][1] << '\t'
            << imputedH[i][2] << '\t'
            << imputedH[i][3] << '\t'
            << imputedH[i][4] << '\t'
            << imputedH[i][5] << '\t'
            << imputedH[i][6] << '\t'
            << imputedH[i][7] << '\t'
            << imputedH[i][8] << '\n';
}

int main(int argc, char **argv){
    if (argc!=2){
        std::cout << "Input panel file name is required as first and only parameter." << std::endl;
        return 0;
    }
    std::ofstream siteOut("sites.txt");
    bool panel[M][N];
    bool smoothPanel[M][N];
    
	bool lowRes[M]; //lowRes[i] is false iff site needs to be imputed
	bool lowResPanel[M][thinN];
	int prefix[M][thinN+1], divergence[M][thinN+1], u[M][thinN], v[M][thinN];
	//lowHigh maps sites from low to high resolution
	//highLow maps sites from high to most recent low resolution site (-1 if none)
	int lowHighMap[thinN], highLowMap[N];
	for (int i = 0; i<N; ++i)
		lowRes[i] = false;
    std::ifstream sitesIn("sitesToImpute");
    unsigned long long site = 0, count = 0;
    while (sitesIn >> site && ++count)
        lowRes[site] = true;
    sitesIn.close();
    if (count != thinN){
        std::cout << "Found " << count << " sites in imputed sites file, but expected " << thinN << ".\n";
        return 0;
    }
	


	double oldtime, oldcputime;
	oldtime = get_wall_time();
	oldcputime = get_cpu_time();
	ReadVCF(argv[1], panel);
    ReadVCF("completeBritPanel.smooth.vcf", smoothPanel);

	int j = -1;
	for (int i = 0; i<N; ++i){
        siteOut << lowRes[i] << '\n';
		if (lowRes[i]){
			++j;
			lowHighMap[j] = i;
			for (int k = 0; k<M; ++k)
				lowResPanel[k][j] = smoothPanel[k][i];
		}
		highLowMap[i] = j;
	}
	std::cout << "Reading VCF took " << get_cpu_time() - oldcputime << " CPU seconds and " << get_wall_time() - oldtime << " real seconds" << std::endl;
	oldtime = get_wall_time();
	oldcputime = get_cpu_time();
	imputationBenchmark(panel, lowResPanel, prefix, divergence, u, v, lowRes, highLowMap, lowHighMap, subsets);
	std::cout << "Imputation Benchmark took " << get_cpu_time() - oldcputime << " CPU seconds and " << get_wall_time() - oldtime << " real seconds" << std::endl;
	
	//output results

    return 0;
}
