import sys
import os.path
import pandas as pd
import numpy as np
import scipy.sparse as sps
import pyximport
pyximport.install()

import netrand as cr

def read_networks(filename):
    data = pd.read_table(filename, sep='\t', names=('inta', 'intb', 'weight', 'flag'), header=None)

    data = data[~data.inta.str.contains('_')]

    splice_mask = data.inta.str.contains('_') | data.intb.str.contains('_') 
    regulatory_mask = ((data.weight-0.6).abs()<10e-4) & (data.flag == 1) & ~splice_mask

    splicing = data[splice_mask]
    regulatory = data[regulatory_mask]
    ppi = data[~(regulatory_mask|splice_mask)]

    splicing.drop_duplicates(subset=('inta', 'intb'),inplace=True)
    regulatory.drop_duplicates(subset=('inta', 'intb'),inplace=True)

    # remove duplicates from ppi
    ppi['edgekey'] = np.where(ppi.inta<ppi.intb, ppi.inta.str.cat(ppi.intb.values), ppi.intb.str.cat(ppi.inta.values))
    ppi.drop_duplicates(subset=('edgekey'), inplace=True)
    ppi.drop('edgekey', axis=1, inplace=True)

    return ppi, splicing, regulatory

def dataframe_to_graph(data, directed=None):
    if len(data) == 0:
        return {'g': sps.csr_matrix((0,0)), 'names': pd.Series(), 'rev_names': pd.Series()}
    if directed == None:
        directed = data.flag.iloc[0] == 1
    all_names = np.unique(np.hstack((data.inta, data.intb)))
    names = pd.Series(index=all_names, data=range(len(all_names)))
    rev_names = pd.Series(index=range(len(all_names)), data=all_names)
    if directed: 
        i = names[data.inta]
        j = names[data.intb]
        weight = data.weight
    else:
        i = names[np.hstack((data.inta, data.intb))]
        j = names[np.hstack((data.intb, data.inta))]
        weight = np.hstack((data.weight, data.weight))

    m = max(i.max(), j.max())+1
    graph = sps.coo_matrix((weight, (i,j)), shape=(m,m)).tocsr()
    return {'g': graph, 'names': names, 'rev_names': rev_names}

def sparse_to_file(net, file_or_stm, directed=False):
    if not isinstance(file_or_stm, file):
        with open(file_or_stm, 'a') as f:
            sparse_to_file(g, f, directed)
    else:
        names = net['rev_names']
        g = net['g']
        if directed:
            for u in xrange(g.shape[0]):
                for v in g.indices[g.indptr[u]:g.indptr[u+1]]:
                    file_or_stm.write('%s\t%s\t%f\t1\n' % (names[u], names[v], g[u,v]))
        else:
            for u in xrange(g.shape[0]):
                for v in g.indices[g.indptr[u]:g.indptr[u+1]]:
                    if u<v:
                        file_or_stm.write('%s\t%s\t%f\t0\n' % (names[u], names[v], g[u,v]))

def count_nodes(g):
    return g.shape[0]

def count_edges(g, directed=False):
    if directed:
        return g.nnz
    else:
        return (g.nnz + np.count_nonzero(g.diagonal()))/2

def main():
    import argparse

    parser = argparse.ArgumentParser(description='generate random networks')

    # positional
    parser.add_argument('network_filename', type=str)
    parser.add_argument('-r,--repeats', dest='repeats', type=int, default=10)
    parser.add_argument('-s,--seed', dest='seed', type=int, default=None)
    parser.add_argument('-t,--template', dest='template', type=str, default='random_%d_%s')

    opts = parser.parse_args()

    ppi, splicing, regulatory = read_networks(opts.network_filename)
    ppinet, splicenet, regnet = [dataframe_to_graph(d) for d in (ppi, splicing, regulatory)]

    print 'PPI: V=', count_nodes(ppinet['g']), ' E=', count_edges(ppinet['g'])
    print 'RNA: V=', count_nodes(splicenet['g']), ' E=', count_edges(splicenet['g'])
    print 'DNA: V=', count_nodes(regnet['g']), ' E=', count_edges(regnet['g'])

    np.random.seed(opts.seed)

    for r in xrange(opts.repeats):
        cr.shuffle_edges(ppinet['g'], False, ppinet['g'].nnz * 10, np.random.randint(0,32000))
        cr.shuffle_edges(splicenet['g'], True, splicenet['g'].nnz * 10, np.random.randint(0,32000))
        cr.shuffle_edges(regnet['g'], True, regnet['g'].nnz * 10, np.random.randint(0,32000))
        #cr.degree_preserving_shuffle(ppinet['g'])
        #cr.degree_preserving_shuffle(splicenet['g'], True)
        #cr.degree_preserving_shuffle(regnet['g'], True)

        with open(opts.template % (r, os.path.basename(opts.network_filename)), 'w') as f:
            cr.sparse_to_file(ppinet['g'], f, ppinet['rev_names'], False)
            cr.sparse_to_file(splicenet['g'], f, splicenet['rev_names'], True)
            cr.sparse_to_file(regnet['g'], f, regnet['rev_names'], True)
            #sparse_to_file(ppinet, f)
            #sparse_to_file(splicenet, f, True)
            #sparse_to_file(regnet, f, True)


if __name__ == '__main__':
    main()
