#! /python22/Lib

import string,sys,os,math,re,subprocess,random,cmd,time,statistics
from scipy.stats import fisher_exact

##generates TSS and mC profiles for L1 copies analysed with methylartist

def initiate(TSSs, mC, output):

	positions = {}
	L1s = {}
	used = {}
	f = open(mC)
	thisline = f.readline()
	thisline = f.readline()
	while (thisline):
		data = str.split(thisline)
		chromosome = data[0]
		start = int(data[1])
		stop = int(data[2])
		strand = data[3]
		position = int(data[5])
		methylation = float(data[6])
		try:
			positions[position].append(methylation)
		except:
			positions[position] = [methylation] 
		try:
			L1s[chromosome]
		except:
			L1s[chromosome] = []
		try:
			used[chromosome+"|"+str(start)+"|"+str(stop)+"|"+strand]
		except:
			used[chromosome+"|"+str(start)+"|"+str(stop)+"|"+strand] = "Y"
			L1s[chromosome].append([start,stop,strand])
		thisline = f.readline()
	f.close()

	distances = {}
	f = open(TSSs)
	for thisline in f:
		data = str.split(str.strip(thisline),"|")
		chromosome = data[0]
		position = int(data[1])
		strand = data[2]
		try:
			terms = L1s[chromosome]
		except:
			terms = []
		for i in range(len(terms)):
			if ((position >= terms[i][0]) and (position <= terms[i][1]) and (strand == terms[i][2])):
				if (strand == "+"):
					distance = position-terms[i][0]
				else:
					distance = terms[i][1]-position
				try:
					distances[distance] = distances[distance] + 1
				except:
					distances[distance] = 1
	f.close()

	o = open(output, "w")
	o.write("TSSs_count\tposition\n")
	for i in range(2000):
		try:
			o.write(str(distances[i])+"\t"+str(i)+"\n")
		except:
			o.write("0\t"+str(i)+"\n")
	o.write("average_mC\tposition\n")
	for i in range(2000):
		try:
			o.write(str(sum(positions[i])/float(len(positions[i])))+"\t"+str(i)+"\n")
		except:
			pass
	o.write("TSSs_count\tposition_10\n")
	o.close()

initiate(sys.argv[1], sys.argv[2], sys.argv[3])
sys.exit()