import re,os,sys

def reader(path):
	'''
	function reads tab separated input file of mutations
	with first column the chromosome
	second column the position of mutation
	input: file name (string)
	output: Data in form of a list of lists 
	'''
	datafile=open(path,"r")
	data=datafile.readlines()
	datafile.close()
	DataL=[]
	for i in data:
		DataL+=[i.strip().split('\t')]
	return DataL


def counter_muts(path,times_mut):
	from collections import Counter
	'''
	function identifies positions in the genome mutated X times from a mutation file
	inputs: 1. mutation file name (string), 2. number of recurrent times searched for (integer)
	outputs: List of coordinates at which mutations occur X times in the form e.g. 'chr7_6883702' being one coordinate
	'''
	Data=reader(path)

	Chr_Pos=[]
	for i in Data:
		Chr_Pos+=[i[0]+"_"+i[1]]
	Times_Found=Counter(Chr_Pos)
	Data_Times_Found=Times_Found.items()

	Times_mut=[]
	for i in range(len(Data_Times_Found)):
		if int(Data_Times_Found[i][1])==times_mut:
			if Data_Times_Found[i][0] not in Times_mut:
				Times_mut+=[Data_Times_Found[i][0]]	
	return Times_mut

if __name__=='__main__':
	#Example, extracting substitutions found three times across breast cancer patients
	Muts_rec=counter_muts("Breast_cancer_subs.txt",3)
	for i in range(len(Muts_rec)):
		print Muts_rec[i]



