import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from util import *

def distribution(library, title='', normed=False, c='g', log=False, save=False, mini=0, maxi=3.2, number_bins=20):
    df = readDF(library)
    a = df["LogMean"]
    a = a[~np.isnan(a)]
    if normed == True:
        weights = np.ones_like(a)/float(len(a))
    else:
        weights = np.ones_like(a)

    bins = np.arange(mini, maxi+maxi/number_bins, (maxi-mini)/number_bins)
    print('bins =', bins)
    n, bins, patches = plt.hist(a, weights=weights, bins=bins, color=c, log=log)
    plt.title(title)

    if normed == True:
        plt.yticks(np.arange(0, 0.7, 0.1))
    elif log == True:
        plt.yscale('log')
        plt.ylim(0, 10**6)
        plt.yticks([1,10,100,1000,10000,100000])
    if save == True:
        plt.savefig(title+"_lib_distribution"+".svg")          
    plt.show()

    print("The min value =",np.nanmin(a))
    print("The max value =",np.nanmax(a))
    print("The mean value =",np.nanmean(a))

    print("Total variants =", np.count_nonzero(a))
    print("Coverage = ", "{:.2%}".format(np.count_nonzero(a)/4**9))
    return n, bins, patches

distribution('fepb')
distribution('arti')
distribution('dmsc')
