import numpy as np
import os
from sklearn.metrics import recall_score,accuracy_score,roc_auc_score,f1_score,confusion_matrix,precision_score,roc_curve,auc
from sklearn.preprocessing import label_binarize
import joblib
def assess(ytest,y_pred,yprob):
    score={}
    l=np.unique(ytest)
    score["recall"]=recall_score(ytest,y_pred,average = 'weighted')
    score["precesion"] = precision_score(ytest, y_pred, average='weighted')
    score["accuracy"]=accuracy_score(ytest,y_pred)
    score["f1"]=f1_score(ytest,y_pred,average = 'weighted')
    y_test_all = label_binarize(ytest,classes= l)
    score["roc_auc"]=roc_auc_score(y_test_all,yprob,average = 'weighted',multi_class="ovr")
    score["multilabel_confusion_matrix"]=confusion_matrix(ytest,y_pred)
    y_score_all=yprob
    fpr = dict()
    tpr = dict()
    roc_auc_s = dict()
    for i in range(len(l)):
        label=l[i]
        fpr[label], tpr[label], thresholds = roc_curve(y_test_all[:, i],y_score_all[:, i])
        roc_auc_s[label] = auc(fpr[label], tpr[label])
    score["roc_auc_class"]=[fpr,tpr,roc_auc_s]
    return score

def extract_test(X_testfilename,y_testfilename,feature_index=""):
    test_features = np.load(X_testfilename)
    test_labels = np.load(y_testfilename)
    feature_index = np.array(feature_index)
    if feature_index=="":
        test_features_new = test_features
    else:
        test_features_new = test_features[:, feature_index]
    return test_features_new,test_labels

from sklearn.decomposition import PCA

def PCA_model_process(PC_number,classf,X_train, y_train,X_test,y_test,PCA_file,model_file):
    pca = PCA(n_components=PC_number)
    pca_features_train = pca.fit_transform(X_train)

    model = classf.fit(pca_features_train, y_train)

    pca_features_test = pca.transform(X_test)
    y_pred = model.predict(pca_features_test)
    yprob = model.predict_proba(pca_features_test)
    ytest =y_test
    joblib.dump(pca, PCA_file)
    joblib.dump(model, model_file)
    score=assess(ytest, y_pred, yprob)

    result = [ytest, y_pred, yprob]
    return score, result

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
def LDA_model_process(classf,X_train, y_train,X_test,y_test,LDA_file,model_file):
    lda = LDA()
    lda_features_train = lda.fit_transform(X_train,y_train)

    model = classf.fit(lda_features_train, y_train)

    lda_features_test = lda.transform(X_test)
    y_pred = model.predict(lda_features_test)
    yprob = model.predict_proba(lda_features_test)
    ytest =y_test
    joblib.dump(lda, LDA_file)
    joblib.dump(model, model_file)
    score=assess(ytest, y_pred, yprob)

    result = [ytest, y_pred, yprob]
    return score, result

def model_process(classf,X_train, y_train,X_test,y_test,model_file):

    model = classf.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    yprob = model.predict_proba(X_test)
    ytest =y_test
    joblib.dump(model, model_file)
    score=assess(ytest, y_pred, yprob)

    result = [ytest, y_pred, yprob]
    return score, result

import xgboost as xgb
def XGBoost_process(X_train, y_train,X_test,y_test,model_file):
    l=np.unique(y_train)

    param = {'gamma': 0, 'alpha': 1, "eta": 0.05, "colsample_bytree": 0.1, 'objective': 'multi:softprob',
                 'num_class': len(l)}
    num_round = 4
    dtrain = xgb.DMatrix(X_train,label=y_train)
    model = xgb.train(param, dtrain, num_round)
    dtest = xgb.DMatrix(X_test)
    yprob = model.predict(dtest)
    y_pred = np.argmax(yprob, axis=1)
    ytest =y_test
    joblib.dump(model, model_file)
    score=assess(ytest, y_pred, yprob)
    result=[ytest,y_pred, yprob]
    return score,result
import argparse
parser = argparse.ArgumentParser(description='manual to this script')
parser.add_argument("--WorkSpace", type=str, default="./")
parser.add_argument("--trainX", type=str, default="0")
parser.add_argument("--trainy", type=str, default="0")
parser.add_argument("--testX", type=str, default="0")
parser.add_argument("--testy", type=str, default="0")
parser.add_argument("--indexFile", type=str, default="0")
parser.add_argument("--outputName", type=str, default="0")
args = parser.parse_args()

if args.indexFile=="0":
    features_key=""
else:
    f=open(os.path.join(args.WorkSpace,args.indexFile),"r")
    line=f.readline()
    important_feature=[]
    while line!="":
        important_feature.append(int(line.strip("\n")))
        line=f.readline()
    print(args.indexFile,len(important_feature))
    features_key=np.array(important_feature)
test_features,test_labels=extract_test(args.testX,args.testy,features_key)
test_y=test_labels
test_X=test_features

train_features,train_labels=extract_test(args.trainX,args.trainy,features_key)
train_y=train_labels
train_X=train_features
output=os.path.join(args.WorkSpace,args.outputName)
result_pred={}

from sklearn.neighbors import KNeighborsClassifier
from FKNNv2 import FuzzyKNN
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
import xgboost as xgb
import pandas as pd
model_score={}

PCA_method={}

PCA_method["PCA(20PC)_knn"]=[KNeighborsClassifier(n_neighbors=6),20]
PCA_method["PCA(100PC)-knn"]=[KNeighborsClassifier(n_neighbors=6),100]
PCA_method["PCA(200PC)-knn"]=[KNeighborsClassifier(n_neighbors=6),200]
PCA_method["PCA(20PC)_Fknn"]=[FuzzyKNN(k=6),20]
PCA_method["PCA(100PC)-Fknn"]=[FuzzyKNN(k=6),100]
PCA_method["PCA(200PC)-Fknn"]=[FuzzyKNN(k=6),200]

PCA_method["PCA(20PC)_knn_k3"]=[KNeighborsClassifier(n_neighbors=3),20]
PCA_method["PCA(100PC)-knn_k3"]=[KNeighborsClassifier(n_neighbors=3),100]
PCA_method["PCA(200PC)-knn_k3"]=[KNeighborsClassifier(n_neighbors=3),200]
PCA_method["PCA(20PC)_Fknn_k3"]=[FuzzyKNN(k=3),20]
PCA_method["PCA(100PC)-Fknn_k3"]=[FuzzyKNN(k=3),100]
PCA_method["PCA(200PC)-Fknn_k3"]=[FuzzyKNN(k=3),200]

PCA_method["PCA(100PC)_SVM(linear)"]=[svm.SVC(C=1, kernel='linear',probability=True),100]
PCA_method["PCA(100PC)_SVM(Gaussian)"]=[svm.SVC(C=1, probability=True),100]
PCA_method["PCA(20PC)_SVM(linear)"]=[svm.SVC(C=1, kernel='linear',probability=True),20]
PCA_method["PCA(20PC)_SVM(Gaussian)"]=[svm.SVC(C=1, probability=True),20]


method={}
method["SVM(linear)"]=svm.SVC(C=1, kernel='linear',probability=True)
method["SVM(Gaussian)"]=svm.SVC(C=1, probability=True)
method["LogisticRegression_l1"]=LogisticRegression(solver="liblinear",penalty='l1')
method["LogisticRegression_l2"]=LogisticRegression(solver="liblinear",penalty='l2')
method["LogisticRegression_bfgs"]=LogisticRegression(solver="lbfgs",penalty='l2')
method["NB"]=BernoulliNB(fit_prior=False)
method["DecisionTree"]=DecisionTreeClassifier(criterion="entropy", max_leaf_nodes= 50, min_impurity_decrease= 0.01, min_samples_leaf= 1,class_weight='balanced')
method["Bagging_NB"]=BaggingClassifier(base_estimator=BernoulliNB(),n_estimators=100)
method["adaBoost_NB"]=AdaBoostClassifier(base_estimator=BernoulliNB(),n_estimators=100,learning_rate=0.01,random_state=1)
method["RandomForest_entropy_100"]=RandomForestClassifier(criterion="entropy", n_estimators=100,max_leaf_nodes= 70, max_features=200, min_impurity_decrease=0, min_samples_leaf= 4,class_weight='balanced')
method["RandomForest_gini_100"]=RandomForestClassifier(criterion="gini", n_estimators=100,max_leaf_nodes= 70, max_features=200, min_impurity_decrease=0, min_samples_leaf= 4,class_weight='balanced')
method["RandomForest_entropy"]=RandomForestClassifier(criterion="entropy", n_estimators=10,max_leaf_nodes= 70, max_features=200, min_impurity_decrease=0, min_samples_leaf= 4,class_weight='balanced')
method["RandomForest_gini"]=RandomForestClassifier(criterion="gini", n_estimators=10,max_leaf_nodes= 70, max_features=200, min_impurity_decrease=0, min_samples_leaf= 4,class_weight='balanced')
method["RandomForest_entropy_100_10maxFea"]=RandomForestClassifier(criterion="entropy", n_estimators=100,max_leaf_nodes= 70, max_features=10, min_impurity_decrease=0, min_samples_leaf= 4,class_weight='balanced')
method["RandomForest_gini_100_10maxFea"]=RandomForestClassifier(criterion="gini", n_estimators=100,max_leaf_nodes= 70, max_features=10, min_impurity_decrease=0, min_samples_leaf= 4,class_weight='balanced')
method["RandomForest_entropy_10maxFea"]=RandomForestClassifier(criterion="entropy", n_estimators=10,max_leaf_nodes= 70, max_features=10, min_impurity_decrease=0, min_samples_leaf= 4,class_weight='balanced')
method["RandomForest_gini_10maxFea"]=RandomForestClassifier(criterion="gini", n_estimators=10,max_leaf_nodes= 70, max_features=10, min_impurity_decrease=0, min_samples_leaf= 4,class_weight='balanced')

xgb_method={}
xgb_method["XGBoost"]=""
print("print start{}".format(output))

import time
try:
    for i in PCA_method.keys():
        start_time = time.time()
        cls = PCA_method[i][0]
        PC_n = PCA_method[i][1]
        model_score[i], result_pred[i] = PCA_model_process(PC_n, cls, train_X, train_y,test_X,test_y,
                                                 os.path.join(args.WorkSpace,"./model_set_{}/{}_PCA_model.m".format(args.indexFile[:-4],i)),
                                                 os.path.join(args.WorkSpace,"./model_set_{}/{}_train_model.m".format(args.indexFile[:-4],i)),)
        end_time = time.time()
        print("{} runtime:{}".format(i, end_time - start_time))

    for i in method.keys():
        start_time = time.time()
        cls = method[i]
        model_score[i], result_pred[i] = model_process(cls, train_X, train_y,test_X,test_y,
                                                 os.path.join(args.WorkSpace,"./model_set_{}/{}_train_model.m".format(args.indexFile[:-4],i)),)
        end_time = time.time()
        print("{} runtime:{}".format(i, end_time - start_time))

    for i in xgb_method.keys():
        start_time = time.time()
        model_score[i], result_pred[i] = XGBoost_process(train_X, train_y,test_X,test_y,
                                                 os.path.join(args.WorkSpace,"./model_set_{}/{}_train_model.m".format(args.indexFile[:-4],i)),)
        end_time = time.time()
        print("{} runtime:{}".format(i, end_time - start_time))

finally:
    np.save(output + "score",model_score)
    np.save(output + "ptest_pred_prob", result_pred, )

