Supplemental Methods Custom scripts # Remove reads mapping to E. coli #!bin/python import os FILES = (each for each in os.listdir(os.getcwd()) if each.endswith('.sam')) for instance in FILES: FILE = open(instance) R1 = open('%s_R1_001_val_1.fq' %(instance.strip('.sam'))) R2 = open('%s_R2_001_val_2.fq' %(instance.strip('.sam'))) FilteredR1 = open('%s_Filtered_R1.fq' %(instance.strip('.sam')), 'a') FilteredR2 = open('%s_Filtered_R2.fq' %(instance.strip('.sam')), 'a') R1Dict = {} RList = [] line = R1.readline() R1Barcode = line.split()[1] while line: RList.append(line.split()[0]) R1Dict[line.split()[0]] = R1.readline()+R1.readline()+R1.readline() line = R1.readline() R2Dict = {} line = R2.readline() R2Barcode = line.split()[1] while line: R2Dict[line.split()[0]] = R2.readline()+R2.readline()+R2.readline() line = R2.readline() for line in FILE: if line[0] != '@': if int(line.split()[4]) >= 30: try: RList.remove('@'+line.split()[0]) except ValueError: pass for seq in RList: FilteredR1.write(seq+' '+R1Barcode+'\n'+R1Dict[seq]) FilteredR2.write(seq+' '+R2Barcode+'\n'+R2Dict[seq]) FILE.close() R1.close() R2.close() FilteredR1.close() FilteredR2.close() #Split reads into subsets for assembly #!bin/python import os FILES = (each for each in os.listdir(os.getcwd()) if each.endswith('.fastq')) for instance in FILES: k = 1 n = 1 FILE = open(instance) OUT = open('%s_%s.fastq' %(instance.split('.')[0], str(k)), 'a') for line in FILE: if n <= 20000000: OUT.write(line) n += 1 else: k += 1 n = 2 OUT.close() OUT = open('%s_%s.fastq' %(instance.split('.')[0], str(k)), 'a') OUT.write(line) FILE.close() OUT.close()