'''
Arranges distance and SHARPR sum data for all evolutionary paths.
Also adds motif annotation and motif change info.
Usage: python3 arrange_path_distance_sharprSums_forR.py <input paths file> <distance and SHARPR sum file> <motif annotation file> <output folder>
'''

import sys, os
import re

if len(sys.argv) != 5:
	sys.exit(__doc__)

di = {}
with open(sys.argv[2], 'r') as f:
	for line in f:
		fields = line.rstrip('\n').split('\t')
		name = fields[0]
		di[name] = fields

with open(sys.argv[3], 'r') as f:
	for line in f:
		fields = line.rstrip('\n').split('\t')
		name = fields[0]
		if name in di:
			motif = fields[1]
			di[name].append(motif)

out_dir = sys.argv[-1].rstrip('/')

try:
	os.mkdir(out_dir)
except FileExistsError:
	print(out_dir + ' already exists. Please indicate a new output folder name.')
	sys.exit(__doc__)

with open(sys.argv[1], 'r') as f:
	for line in f:
		fields = line.rstrip().split('\t')
		out_lines = []
		prev_motif = ''
		for name in fields:
			if prev_motif == '': #ancestral element, no previous motif
				out_lines.append('\t'.join(di[name]) + '\tNA\n')
				prev_motif = di[name][-1]
				continue
			curr_motif = di[name][-1]
			if prev_motif == curr_motif: #No motif change
				out_lines.append('\t'.join(di[name]) + '\tNA\n')
				prev_motif = curr_motif
				continue
			#previous element JUN and DBP motifs
			prev_motif_dbp = int(re.search('([0-9])+D', prev_motif).group().rstrip('D'))
			prev_motif_jun = int(re.search('([0-9])+J', prev_motif).group().rstrip('J'))
			#current element JUN and DBP motifs
			curr_motif_dbp = int(re.search('([0-9])+D', curr_motif).group().rstrip('D'))
			curr_motif_jun = int(re.search('([0-9])+J', curr_motif).group().rstrip('J'))
			#Find what motifs were changed
			motif_diff = ''
			if prev_motif_dbp != curr_motif_dbp:
				dbp_diff = curr_motif_dbp - prev_motif_dbp
				if dbp_diff < 0: #DBP loss
					motif_diff += str(dbp_diff) + 'D'
				else: #DBP gain
					motif_diff += '+' + str(dbp_diff) + 'D'
			if prev_motif_jun != curr_motif_jun:
				jun_diff = curr_motif_jun - prev_motif_jun
				if jun_diff < 0: #JUN loss
					motif_diff += str(jun_diff) + 'J'
				else: #JUN gain
					motif_diff += '+' + str(jun_diff) + 'J'
			out_lines.append('\t'.join(di[name]) + '\t' + motif_diff + '\n')
			prev_motif = curr_motif
		leaf = fields[-1].split('.')[0]
		out_file = out_dir + '/path_' + leaf
		with open(out_file, 'w') as o:
			for line in out_lines:
				o.write(line)

