import sys



def region_filter(info_list):
	if "HLA" in info_list:
		return 1
	return 0

def dna_depth_filter(info_list,std):
	depth=float(info_list.split(";")[0])+float(info_list.split(";")[1])
	if depth<std:
		return 1
	return 0

def dna_var_readc_filter(info_list):
	if info_list=="NA":
		return 1
	var=float(info_list.split(";")[1])
	if var!=0:
		return 1
	return 0

def rna_depth_filter(info_list,std):
	if info_list=="NA":
		return 1
	if float(info_list)<std:
		return 1
	return 0

def rna_var_readc_filter(info_list,std):
	if info_list=="NA":
		return 1
	if float(info_list)<std:
		return 1
	return 0

def nm_tag_filter(nm_list,clip_list,del_list,ins_list,db_list1,db_list2,std,std2):
	if nm_list=="NA" or clip_list=="NA" or del_list=="NA" or ins_list=="NA":
		return 1
	if float(nm_list)>=std:
		if db_list1=="O" or db_list2=="O":
#			print "hit"
			return 0
		else:
			return 1
	elif float(nm_list)==std-1:
		if (float(clip_list.split(";")[3])/100>=std2 or float(del_list.split(";")[3])>=std2 or float(del_list.split(";")[3])/100>=std2) :
			return 1
		else:
			return 0
	return 0
def dist_filter(info_list1,info_list2,std):
	if info_list1=="NA" or info_list2=="NA":
		return 1
	if float(info_list1.split(";")[1])<std or float(info_list2.split(";")[1])<std:
		return 1
	return 0

def strand_bias_filter(info_list1,info_list2):
	if info_list1=="NA" or info_list2=="NA":
		return 1
	if float(info_list1)==0 or float(info_list2)==0:
		return 1
	return 0

def bq_filter(info_list,std):
	if info_list=="NA":
		return 1
	if float(info_list)<std:
		return 1
	return 0

def clip_filter(info_list,std):
	if info_list.split(";")[3]=="NA":
		return 1
	if float(info_list.split(";")[3])/100>=std:
		return 1
	return 0
def mapq_filter(info,info_list1,info_list2,std1,std2):
#       print info_list1
#       print info_list2
#       print std1
#       print std2
#       if info_list1=="NA" or info_list2=="NA":
#               return 1
        if float(info_list2)<std1:
#               print "hit"
                return 1
#        if info_list1!="NA":
#                abs(float(info_list2)-float(info_list1))>std2
#		print info
#		print info_list1
#		print info_list2
#		raw_input()
#               return 1
#       elif abs(info_list2-info_list1)>std2:
#               return 1
        return 0

def multi_base_filter(info_list1,info_list2,info_list3,info_list4,ref,alt):
	target_dic={}

	target_dic["A"]=int(info_list1)
	target_dic["C"]=int(info_list2)
	target_dic["G"]=int(info_list3)
	target_dic["T"]=int(info_list4)

	del(target_dic[ref])
	del(target_dic[alt])
	
	for base in target_dic.keys():
		if target_dic[base]>1:
			return 1
	return 0
	
vcf_file=file(sys.argv[1])
vcf_line=vcf_file.readline().rstrip()
head_info=vcf_line.split("\t")
out_file=file(sys.argv[1]+".tot.incl_nodir.readinfo_filtered.vcf","w")
out_file2=file(sys.argv[1]+".incl_nodir.readinfo_filtered.vcf","w")
out_file.write(vcf_line+"\tF1R2_TF\tF2R1_TF\n")
out_file2.write(vcf_line+"\tF1R2_TF\tF2R1_TF\n")

vcf_line=vcf_file.readline().rstrip()

F1R2_col_list=[]
F2R1_col_list=[]

for i in range(29,59):
	if i%2==1:
		F1R2_col_list.append(i)
	else:
		F2R1_col_list.append(i)

for i in range(61,65):
	F1R2_col_list.append(i)
for i in range(65,69):
	F2R1_col_list.append(i)


col_info_list=[F1R2_col_list,F2R1_col_list]

"""
print "gene_dir"
print head_info[26]
print "gene_region"
print head_info[11]
print "t_"
print head_info[27]
print "n_"
print head_info[28]
print "depth"
print head_info[F1R2_col_list[11]]
print "var"
print head_info[F1R2_col_list[9]]
print "nm_tag"
print head_info[F1R2_col_list[8]]
print "end_l"
print head_info[F1R2_col_list[2]]
print "end_r"
print head_info[F1R2_col_list[3]]
print "read1"
print head_info[F1R2_col_list[12]]
print "read2"
print head_info[F1R2_col_list[13]]
print "bq"
print head_info[F1R2_col_list[14]]
print "clip"
print head_info[F1R2_col_list[4]]
print "ref_mapq"
print head_info[F1R2_col_list[0]]
print "var_mapq"
print head_info[F1R2_col_list[1]]
raw_input()
"""
k=0
while vcf_line:
	vcf_info=vcf_line.split("\t")
#	print vcf_info
	if "GL00" in vcf_info[0] or "MT" in vcf_info[0] or "Y" in vcf_info[0]:
#		print "1"
		vcf_line=vcf_file.readline().rstrip()
		continue
#	print "hit"
#	if not(vcf_info[26]=="+" or vcf_info[26]=="-"):
#		vcf_line=vcf_file.readline().rstrip()
#		continue
	##exclude HLA region
	if region_filter(vcf_info[11])==1:
#		print "2"
		vcf_line=vcf_file.readline().rstrip()
		continue
	##exclude low depth of DNA
	if dna_depth_filter(vcf_info[27],5)==1: ##t
#		print "3"
		vcf_line=vcf_file.readline().rstrip()
		continue
	if dna_depth_filter(vcf_info[28],5)==1: ##n
#		print "4"
		vcf_line=vcf_file.readline().rstrip()
		continue
	##exclude var in DNA	
	if dna_var_readc_filter(vcf_info[27])==1: ##t
#		print "5"
		vcf_line=vcf_file.readline().rstrip()
		continue
	if dna_var_readc_filter(vcf_info[28])==1: ##n
#		print "6"
		vcf_line=vcf_file.readline().rstrip()
		continue
	k+=1
	idx_list=[0,0]
	for i in range(0,2):
		idx=i
		col_list=col_info_list[i]
	##exclude low depth of RNA
		if rna_depth_filter(vcf_info[col_list[11]],4)==1:
#			print vcf_info[col_list[11]]
#			print "7"
			idx_list[idx]=1
			continue
	##exclude low variant count of RNA
		if rna_var_readc_filter(vcf_info[col_list[9]],3)==1:
#			print vcf_info[col_list[9]]
#			print "8"
			idx_list[idx]=1
			continue
#	print vcf_info
#	raw_input()
	##exlcude high nm_tag variant
		
		if nm_tag_filter(vcf_info[col_list[8]],vcf_info[col_list[4]],vcf_info[col_list[5]],vcf_info[col_list[6]],vcf_info[59],vcf_info[60],4,0.5)==1:
#			print "9"
#			print vcf_info
#			print vcf_info[59]
#			print vcf_info[60]
			idx_list[idx]=1
			continue
		
	##exclude variants close to end
		if dist_filter(vcf_info[col_list[2]],vcf_info[col_list[3]],0)==1:
#			print "10"
			idx_list[idx]=1
			continue
	##exclude strand bias
		if strand_bias_filter(vcf_info[col_list[12]],vcf_info[col_list[13]])==1:
#			print "11"
			idx_list[idx]=1
			continue
	##exclude low base quality
		if bq_filter(vcf_info[col_list[14]],20)==1:
#			print "12"
			idx_list[idx]=1
			continue
		if clip_filter(vcf_info[col_list[4]],0.5)==1:
#			print "13"
#			print vcf_info
			idx_list[idx]=1
			continue
		if mapq_filter(vcf_info,vcf_info[col_list[0]],vcf_info[col_list[1]],130,10)==1:
#			print "14"
			idx_list[idx]=1
			continue
#		print vcf_info
#		print vcf_info[col_list[15]]
#		print vcf_info[col_list[18]]
#		print vcf_info[3]
#		print vcf_info[4]
#		raw_input()
		if multi_base_filter(vcf_info[col_list[15]],vcf_info[col_list[16]],vcf_info[col_list[17]],vcf_info[col_list[18]],vcf_info[3],vcf_info[4])==1:
#			print "15"
#			print vcf_info
			idx_list[idx]=1
			continue
#	print idx_list			
	TF_list=[]
#	print idx_list
#	raw_input()
	for idx in idx_list:
		if idx==0:
			TF_list.append("O")
		else:
			TF_list.append("X")
	out_file.write(vcf_line+"\t"+"\t".join(TF_list)+"\n")
	if 0 in idx_list:
		out_file2.write(vcf_line+"\t"+"\t".join(TF_list)+"\n")
	vcf_line=vcf_file.readline().rstrip()
	continue
print k
