import json
import h5py
import tqdm
import datetime
import time
import pandas as pd
import numpy as np
import collections
from scipy import stats
import datashader as ds
import colorcet
import os
import sys
sys.path.insert(0, 'W:/Lab/MERFISH_Data_060421/SingleCellRoutines')
sys.path.insert(0, 'W:/Lab/MERFISH_Data_060421/merfish_vizgen/analysis_local_round1')
import utils
import powerplot
from __init__plots import *
from _powerplot_vizgen_merfish import *

import shapely

def analysis_data_prepare(DATA_DIR,samples,cluster_dat):

    # part1: informations
    #today = datetime.date.today()
    analysis_name='Analysis'
    TOP_fld=DATA_DIR
    figures = TOP_fld+'/figures/'+analysis_name
    results = TOP_fld+'/data/'+analysis_name

    input = TOP_fld+'/data/processed_merfish_ad_mouse_rotated.hdf5'

    samples_annot={}
    for smp in samples:
        samples_annot[smp]=smp

    f_clust = cluster_dat

    # part2: load clustering analysis
    clsts = pd.read_csv(f_clust, sep='\t').set_index('cell')
    clsts['cluster_cat'] = pd.Categorical(clsts['cluster']) #.astype
    
    # part3: load meta data, gene expression data
    gmat = []
    meta = []
    print(samples)
    for sample in samples:
        _gmat = pd.read_hdf(input, 'mat_'+sample)
        gmat.append(_gmat)
        _meta = pd.read_hdf(input, 'meta_'+sample)
        _meta['sample'] = sample 
        meta.append(_meta)

    gmat = pd.concat(gmat)
    meta = pd.concat(meta)

    # part4: modify
    thedata = clsts.drop('sample', axis=1).join(meta) #.join(clsts) // add how='inner' to handle potentially midaligned index between meta and clust by removing the dat from meta...
    genes = gmat.columns
    thedatagmat = thedata.join(gmat)
    print(len(genes), gmat.shape, thedata.shape)

    # part5: 
    clusters = np.sort(thedata['cluster_cat'].cat.categories.values)
    clstcolors_obj = powerplot.CategoricalColors(clusters,colors=sns.color_palette('hls', len(clusters)))
    palette_clst = clstcolors_obj.to_dict(to_hex=True, output=TOP_fld+'/data/palette_clustering.json')

    return thedata,thedatagmat,genes,clusters,clstcolors_obj,samples_annot,figures,results
     


