#include<string>
#include<fstream>
#include<sstream>
#include<vector>
#include<iostream>
#include<iomanip>
#include<stdlib.h>
#include<random>
#include<algorithm>

#define M 860022
#define N 9793
#define SUM 971132//10+100+1000+10000+M

//-----------------------------------------------------------------------------
//source for time: https://stackoverflow.com/questions/17432502/how-can-i-measure-cpu-time-and-wall-clock-time-on-both-linux-windows
#include<time.h>
#include<sys/time.h>
double get_wall_time(){
    struct timeval time;
    if (gettimeofday(&time,NULL)){
        //  Handle error
        std::cerr<< "ERROR reading real time\n";
    }
    return (double)time.tv_sec + (double)time.tv_usec * .000001;
}
double get_cpu_time(){
    return (double)clock() / CLOCKS_PER_SEC;
}
//-----------------------------------------------------------------------------

void ReadVCF(const char *, bool[][N]);
void Randomize(int[], int);
void OutputPanel(const char *, bool[][N]);
void PBWT(bool[][N], int[][N+1], int [][N+1], int[][N], int [][N],int [], int);

void ReadVCF(const char* inFile, bool panel[][N]){
    using namespace std;
    string line = "1";
    ifstream in(inFile);
    if(!in.is_open()){
        std::cout << "Panel VCF file failed to open!\n";
        exit(0);
    }
    stringstream linestr;
    int x = 0;
    char y = 0;

    while (line[0] != '#' || line[1] != 'C')
        getline(in, line);
    for(int j = 0; j<N; ++j){
        getline(in, line);
        linestr.str(line);
        linestr.clear();
        for (int i = 0; i<9; ++i)
            linestr >> line;
        for (int i = 0; i<M/2; ++i){
            linestr >> x >> y;
            panel[i*2][j] = (bool)x;
            linestr >> x;
            panel[i*2 + 1][j] = (bool)x;
        }
    }
    in.close();
}

void Randomize(int order[], int num){
    std::random_device seed;
    int s = seed();
    std::cout << "seed:" << s << std::endl;
    std::default_random_engine generator(s);
    for (int i = num-1; i>0; --i){
        std::uniform_int_distribution<int> dis(0, i);
        std::swap(order[i], order[dis(generator)]);
    }
}

void OutputPanel(const char* outFile, bool panel[][N]){
    std::ofstream out(outFile);
    for (int i = 0; i<M; ++i){
        for (int j = 0; j<N; ++j)
            out << std::setw(3) << (int)panel[i][j];
        out << std::endl;
    }
    out.close();
}

void PBWT(bool panel[][N], int prefix[][N+1], int divergence[][N+1],
        int u[][N], int v[][N], int order [], int num){
    for (int i = 0; i<num; ++i){
        prefix[i][0] = order[i];
        divergence[i][0] = 0;
    }
    for (int k = 0; k<N; ++k){
        int u2 = 0, v2 = 0, p = k+1, q = k+1;
        std::vector<int> a,b,d,e;
        for (int i = 0; i<num; ++i){
            u[i][k] = u2;
            v[i][k] = v2;
            if (divergence[i][k] > p) { p = divergence[i][k];}
            if (divergence[i][k] > q) { q = divergence[i][k];}
            if (!panel[prefix[i][k]][k]){
                a.push_back(prefix[i][k]);
                d.push_back(p);
                ++u2;
                p = 0;
            }
            else{
                b.push_back(prefix[i][k]);
                e.push_back(q);
                ++v2;
                q = 0;
            }
        }
        for (int i = 0; i<num; ++i){
            v[i][k] += a.size();
            if (i < a.size()){
                prefix[i][k+1] = a[i];
                divergence[i][k+1] = d[i];
            }
            else{
                prefix[i][k+1] = b[i-a.size()];
                divergence[i][k+1] = e[i-a.size()];
            }
        }
    }
}

void MPSCBenchmark(double time[][SUM], int MPSCsize[], int numRuns, int runs[], bool panel[][N], int prefix[][N+1], 
        int divergence[][N+1], int u[][N], int v[][N]){
    int offset = 0;
    std::ofstream randOut("order.txt");
    if (!randOut.is_open()){
        std::cerr << "order.txt failed to open!" << std::endl;
        exit(1);
    }

    for (int i = 0; i<numRuns; ++i){
        int order[M];
        for (int y = 0; y<M; y++){
            order[y]=y;
        }
        Randomize(order, M);
        for (int y = 0; y<runs[i]; ++y){
            randOut << order[y] << '\n';
        }
        double oldtime, oldcputime;
        oldtime = get_wall_time();
        oldcputime = get_cpu_time();
        PBWT(panel, prefix, divergence, u, v, order,runs[i]);
        std::cout << "Building PBWT took " << get_cpu_time() - oldcputime << " CPU seconds and " << get_wall_time()-oldtime << " real seconds"<<std::endl;

        for (int r = 0; r < runs[i]; ++r){
            //calculate MPSC of prefix[r][N+1]
            oldtime = get_wall_time();
            oldcputime = get_cpu_time();
            //get t[]
            int t[N+1];
            t[0] = r;
            for (int i = 0; i<N; ++i)
                t[i+1] = (panel[order[r]][i]) ? v[t[i]][i] : u[t[i]][i];

            int k = N, oldk, dzk, dzbk; //{k,...,N} is covered
            MPSCsize[r+offset] = 0;
            //std::cout << "haplotype " << r << ":";
            while (k>0){
                oldk = k;
                dzk = divergence[t[k]][k];
                if (t[k] == runs[i]-1)
                    dzbk = oldk;
                else
                    dzbk = divergence[t[k]+1][k];
                k = (dzk <= dzbk) ? dzk : dzbk;
                if (k == oldk){
                    //output no positional substring cover of x_r by X - \{x_r\} exists
                    MPSCsize[r+offset] = -1; //-1 represents does not exist
                    break;
                }
                //std::cout << "["<<k<<','<<oldk-1<<"] ";
                //C=C union \{k,oldk-1,x_r\}
                MPSCsize[r+offset]++;
            }
            //std::cout << std::endl;
            time[1][r+offset] = get_cpu_time() - oldcputime;
            time[0][r+offset] = get_wall_time() - oldtime;

        }
        offset += runs[i];
    }
    randOut.close();
}


int main(int argc, char **argv){
    if (argc!=2){
        std::cout << "Input panel file name is required as first and only parameter." << std::endl;
        return 0;
    }
    std::ofstream timeOut("MPSC.txt");
    //even is real, odd is cpu
    bool panel[M][N];
    int prefix[M][N+1], divergence[M][N+1], u[M][N], v[M][N];
    int numRuns = 6;
    int runs[6] = {10, 100, 1000, 10000, 100000, M};
    double time[2][SUM];
    int MPSCsize[SUM];
    double oldtime, oldcputime;
    oldtime = get_wall_time();
    oldcputime = get_cpu_time();
    ReadVCF(argv[1], panel);
    std::cout << "Reading VCF took " << get_cpu_time() - oldcputime << " CPU seconds and " << get_wall_time()-oldtime << " real seconds"<<std::endl;
    MPSCBenchmark(time, MPSCsize, numRuns, runs, panel, prefix, divergence, u, v);
    timeOut << "Real(s)\tCPU(s)\tMPSC_Size\n";
    for (int i = 0; i<SUM; ++i)
        timeOut << time[0][i] << '\t' << time[1][i] << '\t' << MPSCsize[i] << '\n';
    
    timeOut.close();
    return 0;
}


