import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from tqdm import tqdm
from util import *

import os
dirname = os.path.dirname(os.path.abspath(__file__))

def Plot(library, x, y, s=2, xlim=(0, 1000), ylim=(0, 1.5)):
	## remove nan
	dfTemp = pd.DataFrame(np.transpose([x, y])).dropna(how='any')
	x = dfTemp[0]
	y = dfTemp[1]

	## plotting
	plt.figure(figsize=(1.8, 1.2), dpi=300)
	plt.xlim(xlim)
	plt.ylim(ylim)
	plt.scatter(x, y, s=s, c='gray', marker='.', edgecolors='none')
	plt.savefig(library+'_s='+str(s)+'_ErrorRate.tiff')
	# plt.show()

# library = 'arti'
def Main(library, s=2):
	print('>> '+library)
	df1 = readDF(library, 1)
	df2 = readDF(library, 2)
	df3 = readDF(library, 3)

	y1 = df1['LogMean'].values
	y2 = df2['LogMean'].values
	y3 = df3['LogMean'].values
	y = np.transpose(np.array([y1, y2, y3]))
	indexNA = np.array([sum(np.isnan(a))>0 for a in tqdm(y)])
	print(sum(indexNA))
	y = y[~indexNA]

	x1 = df1['Counts']
	x2 = df2['Counts']
	x3 = df3['Counts']
	x = np.transpose(np.array([x1, x2, x3]))
	index0 = np.array([sum(a==0)>0 for a in tqdm(x)]) # index0 is equal to indexNA
	print(sum(index0))
	x = x[~index0]

	ySD = np.array([np.std(a) for a in tqdm(y)])
	xMean = np.array([np.mean(a) for a in tqdm(x)])
	with open(dirname+'/FigS7A - Document.txt', 'a') as f:
		f.write('> '+library+'\n')
		f.write(f'n = {sum(~index0)}\n')
		f.write(f'min: {min(ySD)}\n')
		f.write(f'max: {max(ySD)}\n')
		f.write(f'mean: {np.mean(ySD)}\n')
		f.write(f'median: {np.median(ySD)}\n')
		f.write(f'95%_min: {np.percentile(ySD, 5)}\n')
		f.write(f'95%_max: {np.percentile(ySD, 95)}\n')
		f.write(f'99%_min: {np.percentile(ySD, 1)}\n')
		f.write(f'99%_max: {np.percentile(ySD, 99)}\n')
		f.write(f'99.9%_min: {np.percentile(ySD, 0.1)}\n')
		f.write(f'99.9%_max: {np.percentile(ySD, 99.9)}\n')
		f.write(f'99.99%_min: {np.percentile(ySD, 0.01)}\n')
		f.write(f'99.99%_max: {np.percentile(ySD, 99.99)}\n')
		Pearson_r, p_value = pearsonr(xMean, ySD)
		f.write(f'Pearson_r of neighbor total fitness: {Pearson_r}\n')
		f.write(f'p_value of neighbor total fitness: {p_value}\n\n')

	dfPlot = pd.DataFrame(np.transpose([xMean, ySD]), columns=['x', 'y'])
	dfPlot.to_csv(dirname+'/FigS7A - '+library+'_plot.csv', index=False)
	# Plot(library, xMean, ySD, s=s)

Main('dmsc')
Main('arti')
Main('fepb')
# Main('dmsc', 1)
# Main('arti', 1)
# Main('fepb', 1)
# Main('dmsc', 0.5)
# Main('arti', 0.5)
# Main('fepb', 0.5)



