import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

def cell_size_distribution(DATA_DIR,samples,all_volume=[]):
    # directories
    if len(DATA_DIR)>0 and len(samples)>0:
        data_dir = DATA_DIR

        for sample in samples:
            fmeta = os.path.join(data_dir, '{}_cell_metadata.csv'.format(sample))
            
            # load metadata and read volume
            meta = pd.read_csv(fmeta, index_col=0)       
            vol_temp = list(meta['volume'])
            all_volume.extend(vol_temp)

    # visualize   
    if len(all_volume)>0:   
        plt.hist(all_volume,bins=100)
        print('min size:'+str(np.min(all_volume)))
        print('max size:'+str(np.max(all_volume)))
        print('mean size:'+str(np.mean(all_volume)))
        print('median size:'+str(np.median(all_volume)))
        print('lower 5 percent size:'+str(np.percentile(all_volume, 5)))
        print('higher 5 percent (95 percent) size:'+str(np.percentile(all_volume, 95)))
        print('lower 20 percent size:'+str(np.percentile(all_volume, 20)))
        print('higher 20 percent (80 percent) size:'+str(np.percentile(all_volume, 80)))