import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from util import *

### =======================================
def findMotif(motif, seqs):
    return np.array([motif in x for x in seqs])

### =======================================
def motifs_LogGFP(motif, df):
    return df.LogMean[findMotif(motif, df.SeqID)].tolist()

### =======================================
library = 'fepb'
df = readDF(library)

LogGFP = []
Motifs = []
Length = []

motifs = ['GGGGG', 'UAAGG', 'AAGGA', 'AGGAG', 'GGAGG', 'GAGGU',
          'GGGGGG', 'UAAGGA', 'AAGGAG', 'AGGAGG', 'GGAGGU',
          'GGGGGGG', 'UAAGGAG', 'AAGGAGG', 'AGGAGGU',
          'GGGGGGGG', 'UAAGGAGG', 'AAGGAGGU']

for m in motifs:
    loggfp = motifs_LogGFP(m, df)
    print(m, str(np.nanmean(loggfp)))
    LogGFP.extend(loggfp)
    Motifs.extend([m]*len(loggfp))
    Length.extend([len(m)]*len(loggfp))

dfPlot = pd.DataFrame({'Motif':Motifs, 'Log(GFP)':LogGFP, 'Length':Length})

### =======================================
import seaborn as sns

plt.figure(figsize=(6, 12))
sns.set(style="whitegrid")
ax = sns.violinplot(x="Log(GFP)", y="Motif", hue='Length', width=2,
                    data=dfPlot, scale="width", palette="Set3")
plt.ylim(18.5, -1.5)
plt.xlim(0, 3.5)
plt.savefig(library+'.svg')
plt.show()

