import re, os
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

### =======================================
def createDF(n, columns=['xMean', 'yMean', 'yQ1', 'yQ2', 'yQ3']):
    return pd.DataFrame(np.array([[0.0]*len(columns)]*n), columns=columns)

def readDF(library, rep=0):
    if rep:
        file = library+'_SDR_rep'+str(rep)+'.csv'
    else:
        file = library+'_SDR_union_count25.csv'
    dirname = os.path.dirname(os.path.abspath(__file__))
    address = dirname+'/../Table_ReadCount/'
    return pd.read_csv(address+file)

def readPredictDF(library, order):
    file = library+'_SDR_union_count25_o'+str(order)+'.csv'
    address = '../Table_Prediction/'
    return pd.read_csv(address+file)

### =======================================
def readTXT(file):
    with open(file) as f:
        content = f.readlines()
    return [x.strip() for x in content]

### =======================================
def grepNumber(string):
    """
    search any number in a string
    """
    return [float(s) for s in re.findall(r'-?\d+\.?\d*', string)]

### =======================================
def Coefficient_of_Variation(df):
    return df.iloc[:,1:4].std(axis='columns')/df['LogMean']

### =======================================
def GroupPlot(x, y, scale=(0, 3.2), n=20):
    step = (scale[1]-scale[0])/n
    boundary = np.arange(scale[0], scale[1]+step/10, step)
    dfStat = createDF(n)

    for i in range(n):
        index = (x>=boundary[i]) & (x<boundary[i+1])
        dfStat.loc[i, 'xMean'] = np.nanmean(x[index])
        dfStat.loc[i, 'yMean'] = np.nanmean(y[index])
        dfStat.loc[i, 'yQ1'] = np.nanpercentile(y[index], 25)
        dfStat.loc[i, 'yQ2'] = np.nanpercentile(y[index], 50)
        dfStat.loc[i, 'yQ3'] = np.nanpercentile(y[index], 75)

    plt.errorbar(x=dfStat.xMean, y=dfStat.yQ2, yerr=[dfStat.yQ2-dfStat.yQ1, dfStat.yQ3-dfStat.yQ2], fmt='o')

### =======================================
def GFP_to_mRNA(x):
    y = np.full(len(x), np.nan)
    y = 0.0028344*x+0.62957
    return y

### =======================================
def nanCor(a1, a2):
    return pd.DataFrame({0:a1, 1:a2}).corr().iloc[0, 1]

### =======================================
def struct_toBinary(structure):
    structlen = len(structure)
    binaryInfo = np.zeros(structlen)
    for i,j in zip(range(structlen), structure):
        if j != '.':
            binaryInfo[i] = 1
    return binaryInfo

### =======================================
def SeqGenerator(seq):
    """
    generate all permutation of a nucleotide sequence
    ex: 'ANAN' -> ['AAAA','AAAC','AAAG','AAAU','ACAA','ACAC','ACAG','ACAU','AGAA','AGAC','AGAG','AGAU','AUAA','AUAC','AUAG','AUAU']
    """
    seq = np.array([s for s in seq])
    index = np.array([s=='N' for s in seq])
    sequence = np.full(4**sum(index), '0'*len(seq))
    i = 0

    for nucleotide in tqdm(itertools.product('ACGU', repeat=sum(index)), total=4**sum(index)):
        seq[index] = nucleotide
        sequence[i] = ''.join(seq)
        i += 1

    return sequence


