import plink2numpy
import pandas as pd
import numpy as np
labelfile = "breed.csv"
Rplink=plink2numpy.read_plink("snps",n_SNP=9278629,n_indi=1390) # Prefix of the PLINK files
feature_info=Rplink.read_bim()
y,label_dict,np_indi=Rplink.construct_input(labelfile)
X=Rplink.read_bed2numpy()
y=np.array(y)

with open("fixedSNP_info.txt","w") as f:
    for i in feature_info:
        f.write("{}\n".format(i))
with open("label_corresponding.txt","w") as f:
    for i in label_dict:
        f.write("{}\t{}\n".format(i,label_dict[i]))

from sklearn.model_selection import StratifiedKFold
skfolds = StratifiedKFold(n_splits=5)
count=0
for train_index, test_index in skfolds.split(X, y):
    count+=1
    X_train_fold = X[train_index]
    y_train_fold = y[train_index]
    sample_train_fold = np_indi[train_index]
    X_test_fold = X[test_index]
    y_test_fold = y[test_index]
    sample_test_fold = np_indi[test_index]
    np.save("./CV{}/X_trainCV{}".format(count,count), X_train_fold)
    np.save("./CV{}/y_trainCV{}".format(count,count), y_train_fold)
    np.save("./CV{}/X_testCV{}".format(count,count), X_test_fold)
    np.save("./CV{}/y_testCV{}".format(count,count), y_test_fold)
    with open("./CV{}/sample_train{}.txt".format(count,count),"w") as f:
        for i in sample_train_fold:
            f.write(i)
            f.write("\n")
    with open("./CV{}/sample_test{}.txt".format(count,count),"w") as f:
        for i in sample_test_fold:
            f.write(i)
            f.write("\n")
