import warnings
warnings.filterwarnings("ignore")
from sklearn.feature_selection import RFECV
from sklearn.model_selection import cross_val_score
import joblib
import numpy as np
import os
from sklearn.metrics import accuracy_score
import argparse
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier


def extract_test(X_testfilename, y_testfilename, feature_index):
    test_features = np.load(X_testfilename)
    test_labels = np.load(y_testfilename)
    feature_index = np.array(feature_index)
    test_features_new = test_features[:, feature_index]
    return test_features_new, test_labels

parser = argparse.ArgumentParser(description='manual to this script')
parser.add_argument("--WorkSpace", type=str, default="./")
parser.add_argument("--output", type=str, default="./")
parser.add_argument("--trainX", type=str, default="0")
parser.add_argument("--trainy", type=str, default="0")
parser.add_argument("--testX", type=str, default="0")
parser.add_argument("--testy", type=str, default="0")
parser.add_argument("--indexFile", type=str, default="0")
parser.add_argument("--method", type=str, default="0")
parser.add_argument("--filter", type=str, default="0")
parser.add_argument("--min_features_to_select", type=int, default=500, help="Minimum number of features to select")
parser.add_argument("--n_jobs", type=int, default=5, help="Number of jobs for parallel processing")
args = parser.parse_args()

# Load important feature indices
with open(os.path.join(args.WorkSpace, args.indexFile), "r") as f:
    important_feature = [int(line.strip("\n")) for line in f.readlines()]

print(args.indexFile, len(important_feature))
features_key = np.array(important_feature)

# Prepare train and test data
train_X, train_y = extract_test(args.trainX, args.trainy, features_key)
test_X, test_y = extract_test(args.testX, args.testy, features_key)

# Define classifiers
dic_m = {
    "LR2": LogisticRegression(solver="liblinear", penalty='l2'),
    "svmL": svm.SVC(C=1, kernel='linear', probability=True),
    "RF": RandomForestClassifier(criterion="gini", n_estimators=100, max_leaf_nodes=70, max_features=10, min_impurity_decrease=0, min_samples_leaf=4, class_weight='balanced')
}

# Feature selection and model training
def fun1(filter, method, clf, min_features_to_select, n_jobs):
    RFE_model1 = RFECV(
        clf,
        step=1,
        min_features_to_select=min_features_to_select,
        verbose=2,
        scoring='accuracy',
        cv=5,
        n_jobs=n_jobs
    )
    RFE_model1.fit(train_X, train_y)
    joblib.dump(RFE_model1, os.path.join(args.output, "REF_{}_{}_rfe1.m".format(filter, method)))

    # Transform data to selected features
    X_wrapper = RFE_model1.transform(train_X)  
    score = cross_val_score(clf, X_wrapper, train_y, cv=5).mean()
    model = clf.fit(X_wrapper, train_y)
    test_X_wrapper = RFE_model1.transform(test_X)
    test_score = accuracy_score(test_y, model.predict(test_X_wrapper))

    # Print results
    print("RFE_model_{}_{}:".format(filter, method), score, test_score, X_wrapper.shape)

    # Save selected feature indices to file
    filterli = features_key[RFE_model1.support_]
    with open(os.path.join(args.output, "REF_{}_{}index{}.txt".format(filter, method, min_features_to_select)), "w") as f1:
        for i in filterli:
            f1.write("{}\n".format(i))

# Run feature selection and evaluation
fun1(args.filter, args.method, dic_m[args.method], args.min_features_to_select, args.n_jobs)
