#!usr/bin/Rscript
# for shuffling network for a null distribution for distances between nodes in 2nd net
# for hgt dep net comparison to metabolic net 
# max 12/5/13
# updated 5/21/14 to use a non-crazy dataset
# further updated 6/6/14 to look for associated modules/paths
#print('whatev')

# modified 6/11/2014 to allow parallelization on sage.

#replicate = 'fake'
replicate = commandArgs(trailingOnly=TRUE)[2]

require(igraph)
shuf_num = 10
perm_intensity = 5


#pos_assoc = as.matrix(read.table('hgt_net_treduce_adjmat_071614.txt',header=T))
load('processed_data/hgt_net_dag_transreduced_repro.Rdat')
pos_assoc = adjmat
rm(adjmat)

num_edges = sum(pos_assoc)
pos_net = graph.adjacency(pos_assoc)

pathway = as.matrix(read.table('processed_data/annots_helperfiles/pathway.parsed120113.cut'))	# maps to kos
pathway = pathway[which(pathway[,1] %in% rownames(pos_assoc)),]
pathwayS = unique(pathway[,2])

path_to_path_ps = matrix(rep(0,length(pathwayS)^2),length(pathwayS))
dimnames(path_to_path_ps) = list(pathwayS,pathwayS)

path_to_path = path_to_path_ps

computeOverlaps = function(graph,path_to_path,pathwayS) {
	# not actually computing overlaps but too late to change name
#	print('compute # pathway-pathway links')
#	net = get.adjacency(graph)
	net = graph	
#	print('fuck you')
	# going along an adjacency matrix
	for (path1 in pathwayS) {
#		print(path1)
		kos1 = pathway[pathway[,2] == path1,1]
#		print(kos1[!(kos1 %in% rownames(net))])
		path_in = net[kos1,,drop=FALSE]
#		print(dim(path_in))
#		print(kos1)
		if (length(which(kos1 %in% rownames(net)))<2) {
#			print(path1)
#			print('fail')
			next
			}
#		print('whatev')
		for (path2 in pathwayS) {
	#		cat('path2 is',path2,'\n')
			kos2 = pathway[pathway[,2] ==path2,1]
			

	#		print(kos2)

			path_out = path_in[,kos2,drop=FALSE]
	#		print(sum(path_out))
			path_to_path[path1,path2] = sum(path_out)

			}	
		}
	return(path_to_path)
	}


print('reading done')

# compute sums for real network

realSums = computeOverlaps(pos_assoc,path_to_path,pathwayS)

# for efficiency, omit a bunch of stuff that doesn't show up:
outSums = rowSums(realSums)
inSums = colSums(realSums)

not_enough = names(which(outSums ==0 && inSums ==0))

pathwayS = names(which(outSums >0 | inSums >0))

realSums = realSums[pathwayS,pathwayS]
path_to_path_ps = path_to_path_ps[pathwayS,pathwayS]
path_to_path = path_to_path[pathwayS,pathwayS]


for (i in 1:shuf_num) {
	print(i)
	# shuffle graph
	#print('perming')

	shuf = rewire(pos_net,mode='simple',niter=(num_edges*perm_intensity))
# the 'loop' mode didn't work for whatever reason.
#	shuf = rewire(pos_net,mode='loop',niter=(num_edges*perm_intensity))
	#print('perm done')
	# repeat counting
	shufCounts = computeOverlaps(get.adjacency(shuf),path_to_path,pathwayS)
	higher = shufCounts>=realSums
	path_to_path_ps[higher] = path_to_path_ps[higher] + 1

	}
	
save(path_to_path_ps,file=paste('findOverPath_090915_10rep_',replicate,'.Rdat',sep=''))
