#!usr/bin/Rscript
# for shuffling network for a null distribution for distances between nodes in 2nd net
# for hgt dep net comparison to metabolic net 
# max 12/5/13
# updated 5/21/14 to use a non-crazy dataset
# ENDED UP ONLY USING THE DIRECT CONNECTION (I.E. OVERLAP) RESULTS CAUSE 
# DON'T NEED OTHERS IF THAT ONE LOOKS GOOD, WHICH IT DOES. 

require(igraph)
shuf_num = 1000
perm_intensity = 5

compare_dists_2graphs = function(ref_net,test_net) {
	dists = c()
	kos = V(test_net)
	# ref is already a set of shortest path lengths
	# test is a list of links (to be converted)
	
	# id all pairs somehow, calc dists...
	for (ko in kos) {
#		ko = kos[i]
		assocs = kos[neighbors(test_net,ko,mode='out')]
		dists = append(dists,ref_net[ko,assocs])
		}
#	print(summary(dists[!is.infinite(dists)]))
#	print(length(which(is.infinite(dists))))
	return(dists)
	}

#pos_assoc = as.matrix(read.table('processed_data/raw_hgt_net_adj_071614.txt'))
load('processed_data/hgt_net_dag_transreduced_repro.Rdat')
pos_assoc = as.matrix(adjmat)

#pos_assoc = as.matrix(read.table(gzfile('processed_data/hgt_motree_transreduce_adjmat_052114.txt.gz'),header=T))

kos = rownames(pos_assoc)

##### THIS IS THE RIGHT WAY TO HANDLE METANET
# this net comes from Roie- leaving the filename the same for historical reconstruction purposes.
#metait = as.matrix(read.table('processed_data/kegg_im_bac_kos.kos.nr.TH_0.ignore_P01100.list'))
metait = as.matrix(read.table('processed_data/ko_metabolic_net.txt'))
metanet = graph.edgelist(metait)
metanet = as.undirected(metanet)


print('reading done')

metanodes = unique(append(unique(metait[,1]),unique(metait[,2])))
pos_in_meta = pos_assoc[(kos %in% metanodes),(kos %in% metanodes)]

####
# CONVERT FROM DIGRAPH-->MONOGRAPH!!! 
####
pos_in_meta = as.undirected(graph.adjacency(pos_in_meta))
#alldists = shortest.paths(metanet)
num_edges = sum(get.adjacency(pos_in_meta))

hgt_kos = rownames(get.adjacency(pos_in_meta))
meta_in_pos = get.adjacency(metanet)[hgt_kos,hgt_kos]

# intersection: the igraph graph.intersection() function was behaving strangely,
# so i did something rough-and-ready but understandable
calc_intersect = function(net) {
	adj = get.adjacency(net)
	sumit = as.matrix(adj+meta_in_pos)
	intersect = length(which(sumit == 2))
	return(intersect)
	}

#true_intersect = sum(get.adjacency(graph.intersection(metanet,pos_in_meta))) 
true_intersect = calc_intersect(pos_in_meta)

#print('true dists')
#print(summary(true_dists))
# is it actually half this????
print('true intersect')
print(true_intersect)


means = rep(NA,shuf_num)
medians = rep(NA,shuf_num)
sds = rep(NA,shuf_num)
intersect_size = rep(NA,shuf_num)

shuf = pos_in_meta
for (i in 1:shuf_num) {
	print(i)
	# shuffle graph
#	print('perming')
	shuf = rewire(shuf,mode='simple',niter=(num_edges*perm_intensity))
#	print('perm done')
	# repeat distance calc
#	dists = compare_dists_2graphs(alldists,shuf)
#	finites = dists[!is.infinite(dists)]
#	means[i] = mean(finites)
#	medians[i] = median(finites)
#	sds[i] = sd(finites)
	intersect_size[i] = calc_intersect(shuf)
	print(intersect_size[i])
#	print(wilcox.test(finites,true_dists[!is.infinite(true_dists)]))
	}

p = length(which(intersect_size>=true_intersect)) / shuf_num

pdf('permhist.pdf')
hist(intersect_size, 30, xlab='Number of PGCEs',xlim=c(0,85),main=paste('Pvalue = ',p,sep=''))
abline(v=true_intersect,lty=2,lwd=2)
dev.off()
write.table(intersect_size,paste('permdat_082415.txt'),quote=FALSE,col.name=FALSE,row.name=FALSE)	
