# Date: 2019/01/04
# Dataset: RBSdmsc Rep1
# Last Editor: Antony

from collections import Counter
import itertools
import pandas as pd

def reverse_complement(dna):
    complement = {'A': 'U', 'C': 'G', 'G': 'C', 'T': 'A'}
    return ''.join([complement[base] for base in dna[::-1]])

for i in range(1,9):
    myfile = open('R'+str(i)+'_match.txt')
    w = open('R'+str(i)+'_SD.txt', 'w')
    mytxt = myfile.read().splitlines()
    for j in mytxt:
        w.writelines(reverse_complement(j[28:37]) + '\n') # RBSdmsc backbone
    w.close()

SeqList = [''.join(p) for p in itertools.product("ACGU", repeat=9)]
ExpDict = {}
for i in range(1,9):
    myfile = open('R'+str(i)+'_SD.txt')
    mytxt = myfile.read().splitlines()
    tmp = Counter(mytxt)
    ExpDict['R'+str(i)] = [tmp[SeqList[i]] for i in range(0,len(SeqList))] # ExpList
ExpDict['SeqID'] = SeqList
df = pd.DataFrame.from_dict(ExpDict)
df['Counts'] = df['R1']+df['R2']+df['R3']+df['R4']+df['R5']+df['R6']+df['R7']+df['R8']
df['RankMean'] = (df['R1']+2*df['R2']+3*df['R3']+4*df['R4']+5*df['R5']+6*df['R6']+7*df['R7']+8*df['R8'])/df['Counts']

df.to_csv('dmsc_SDR_rep1.csv', index = False)
