import math
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from util import *

def neighbors(seq):
    '''
    Generate a list of 1-mutation neighbors
    i.e. neighbors('AAAAAAAAA')--> 27 neighbors
    '''
    base = ['A', 'U', 'C', 'G']
    neighbors = [seq[:i]+ch+seq[i+1:] for i in range(len(seq)) if seq[i] != 'N' for ch in base if ch != seq[i]]
    return neighbors

# library = 'fepB'
def NeighborCalculator(library, shuffle=False):
    df = readDF(library)
    # df = pd.read_csv(dirname+'/../ReadCountTable_20190130/'+library+'_SDR_rep1_count25.csv') # for yfp
    # df = pd.read_csv(dirname+'/../ReadCountTable_20190130/'+library+'_RNAsubopt_dG.csv') # for Vienna
    n = len(df['LogMean'])
    indexTemp = np.arange(n)

    if shuffle:
        for i_shuffling in tqdm(range(10)):
            random.shuffle(indexTemp)
            # temp = random.sample(list(range(262144)), 262144)
    dic = {i: j for i, j in zip(df['SeqID'], df.loc[indexTemp, 'LogMean'])}
    # dic = {i: j for i, j in zip(df["SeqID"], -df["dG"])}

    ReferenceFitness = np.full(n, np.nan)
    NeighborFitness = np.full((n, 27), np.nan)
    for i, seq in tqdm(zip(range(n), dic), total=n):
        if math.isnan(dic[seq]): continue
        NF = [dic[seqNeighbor] for seqNeighbor in neighbors(seq)]
        if sum(np.isnan(NF))!=0: continue
        ReferenceFitness[i] = dic[seq]
        NeighborFitness[i,:] = NF

    print(sum(np.isnan(ReferenceFitness)))
    print(sum(np.isnan(NeighborFitness[:,0])))

    if shuffle:
        np.save(library+'_shuffle_ReferenceFitness', ReferenceFitness)
        np.save(library+'_shuffle_NeighborFitness', NeighborFitness)
    else:
        np.save(library+'_ReferenceFitness', ReferenceFitness)
        np.save(library+'_NeighborFitness', NeighborFitness)

NeighborCalculator('dmsc', shuffle=True)
# NeighborCalculator('arti', shuffle=True)
# NeighborCalculator('fepb', shuffle=True)
# NeighborCalculator('yfp')
# NeighborCalculator('Vienna')

