import pysam
import pandas as pd
import sys
import os
# Input BAM and BED files
bam_path = sys.argv[1]
#bed_path = sys.argv[2]
APOBEC=sys.argv[2]

if APOBEC=="A3A":
	bed_path="/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/37_bam_depth/gene_sequence_analysis/A3A.txt"
elif APOBEC=="A3B":
	bed_path="/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/37_bam_depth/gene_sequence_analysis/A3B.txt"

#bed_path="/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/37_bam_depth/gene_sequence_analysis/test/A3B.txt"
# BAM filename only (used for "id" column)
bam_id = os.path.basename(bam_path).split(".")[0]
output_txt=bam_id+"."+APOBEC+".base.txt"
# Open BAM file
bamfile = pysam.AlignmentFile(bam_path, "rb")

results = []
#k=0
#j=1
with open(bed_path) as bed:
	for line in bed:
		chrom, start, end = line.strip().split()[:3]
		start = int(start)-1
		end = int(end)
#		print start
#		print end
		for pos in range(start, end):
#			print pos
			base_counts = {'A': 0, 'C': 0, 'G': 0, 'T': 0}

			for read in bamfile.fetch(chrom, pos, pos + 1):
#				print j
#				j+=1
				if read.is_unmapped or read.mapping_quality < 20 or read.is_duplicate:
					continue

				ref_positions = read.get_reference_positions(full_length=True)
				query_bases = read.query_sequence
				base_quals = read.query_qualities

				for read_idx, ref_pos in enumerate(ref_positions):
					if ref_pos == pos:
						if read_idx is not None and read_idx < len(query_bases):
							if base_quals[read_idx] >= 20:
								base = query_bases[read_idx]
								if base in base_counts:
									base_counts[base] += 1
						break

			results.append({
				"#CHROM": chrom,
				"POS": pos+1,
				"A": base_counts['A'],
				"C": base_counts['C'],
				"G": base_counts['G'],
				"T": base_counts['T'],
				"id": bam_id,
				"APOBEC": APOBEC
			})

# Convert to DataFrame
df = pd.DataFrame(results)

# Reorder columns
#print df.columns.tolist()
df = df[["id","APOBEC","#CHROM", "POS", "A", "C", "G", "T"]]

# Save to tab-delimited .txt file
df.to_csv(output_txt, sep='\t', index=False)

# Print (Python 2.7 style)
#print df
