'''
Randomly assign Basal BC basalNorms to a "Basal tile." Calculates the mean log2FC of each "Basal tile."
Biological replicates are kept together as a "set" for each BC.
Uses 10 BCs per tile.
Usage: python3 randomize_basals_SHARPR_normalize.py <input file> <number of Basal tiles> <number of randomizations> <output file>
'''

import sys
import random
import statistics as stats
import math

if len(sys.argv) != 5:
	sys.exit(__doc__)
try:
	num_tiles = int(sys.argv[2])
	num_randoms = int(sys.argv[3])
except TypeError:
	print('<number of Basal tiles> and <number of randomizations> must be integers')
	sys.exit(__doc__)

di = {}
#Get list of BCs
list_bcs = []
with open(sys.argv[1], 'r') as f:
	first_line = f.readline()
	fields = first_line.rstrip('\n').split('\t')
	num_reps = len(fields) - 2
	bc = fields[0]
	di[bc] = fields[2:]
	for line in f:
		fields = line.rstrip('\n').split('\t')
		bc = fields[0]
		di[bc] = fields[2:]
		list_bcs.append(bc)

#Randomly assign BCs to tiles
num_bcs = 10 #Use 10 BCs per tile
list_seq_ID = []
for randomization in range(num_randoms):
	random.shuffle(list_bcs)
	counter = 0
	for tile in range(num_tiles):
		tile_name = 'Basal' + str(randomization + 1) + '|tile_' + str(tile)
		for bc in range(num_bcs):
			if tile_name not in di:
				di[tile_name] = []
			di[tile_name].append(di[list_bcs[counter]])
			counter += 1
		list_seq_ID.append(tile_name)

output_base = sys.argv[-1]
list_outputs = []
for i in range(num_reps):
	filename = output_base + '_normalize' + str(i+1)
	list_outputs.append(filename)

for i in range(len(list_outputs)):
	with open(list_outputs[i], 'w') as o:
		for seq_ID in list_seq_ID:
			values = []
			for BC in di[seq_ID]:
				values.append(float(BC[i]))
			mean = math.log2(stats.mean(values))
			line = seq_ID + '\t' + str(mean) + '\t' + str(num_bcs) + '\n'
			o.write(line)

