#!/usr/bin/python #################################################### # # # AFFY ELP TRANSLATOR # # MONSTER VERSION # # FIVE INPUT FILES REQUIRED # # # # COPYRIGHT, ALEXANDER KOZIK, 2005 # # # # http://elp.ucdavis.edu/data/analysis/elp_map/ # # # #################################################### def Seqs_Extractor(in_name1, in_name2, in_name3, in_name4, in_name5, out_name, condit): print "======================================" print "INPUT FILE 1 (RANGE) : " + in_name1 print "INPUT FILE 2 ( ID ) : " + in_name2 print "INPUT FILE 3 (VALUES) : " + in_name3 print "INPUT FILE 4 (MARKERS): " + in_name4 print "INPUT FILE 5 (CHIP_ID): " + in_name5 print "OUTPUT FILE : " + out_name print "CONDITION : " + condit print "======================================" exp1_column = 2 exp2_column = 3 exp1_column = int(exp1_column) exp2_column = int(exp2_column) time.sleep(2) in_file1 = open(in_name1, "rb") in_file2 = open(in_name2, "rb") in_file3 = open(in_name3, "rb") in_file4 = open(in_name4, "rb") in_file5 = open(in_name5, "rb") out_file1 = open(out_name + '.exp_all.tab', "wb") out_file2 = open(out_name + '.exp_set1.tab', "wb") out_file3 = open(out_name + '.exp_set2.tab', "wb") out_file2N = open(out_name + '.exp_set1N.tab', "wb") out_file3N = open(out_name + '.exp_set2N.tab', "wb") out_file4 = open(out_name + '.exp_diff1.tab', "wb") out_file5 = open(out_name + '.exp_diff2.tab', "wb") out_file6 = open(out_name + '.exp_diff0.tab', "wb") out_file7 = open(out_name + '.exp_master.tab', "wb") out_file8 = open(out_name + '.exp_xcheck.tab', "wb") out_file9 = open(out_name + '.exp_xdiff.tab', "wb") out_file10 = open(out_name + '.exp_xsetAB.tab', "wb") out_file11 = open(out_name + '.exp_xset01.tab', "wb") out_file12 = open(out_name + '.exp_fixed.tab', "wb") grade_lib = {} # GRADE VALUES min_B_lib = {} # FINAL VALUES max_B_lib = {} min_S_lib = {} max_S_lib = {} min_B_libX = {} # TEMPORARY VALUES max_B_libX = {} min_S_libX = {} max_S_libX = {} id_array = {} at_array = {} id_list = [] at_list = [] class_array = {} chip_k_array = {} chip_values_array0 = {} chip_values_array1 = {} # UPPERCAE chip_values_array2 = {} # CASE SENSITIVE chip_values_array3 = {} # NUMERIC VALUES chip_id_array = {} chip_list = [] ril_list = [] ril_array0 = {} # RIL IDs ril_array1 = {} # EXPERIMENT 1 ril_array2 = {} # EXPERIMENT 2 marker_list = [] marker_array = {} # MARKER IDs scores_array = {} # MARKER SCORES # AFFY RANGES ################################# while 1: t = in_file1.readline() if t == '': break if '\n' in t: t = t[:-1] if '\r' in t: t = t[:-1] t = t.split('\t') id = t[0] grade_lib[id] = t[1] # min_B_lib[id] = float(t[6]) # max_B_lib[id] = float(t[7]) # min_S_lib[id] = float(t[8]) # max_S_lib[id] = float(t[9]) min_B_libX[id] = float(t[6]) max_B_libX[id] = float(t[7]) min_S_libX[id] = float(t[8]) max_S_libX[id] = float(t[9]) out_file12.write(id + '\t' + grade_lib[id] + '\t' + str(round(min_B_libX[id],2)) + '\t' + str(round(max_B_libX[id],2)) + '\t' + \ str(round(min_S_libX[id],2)) + '\t' + str(round(max_S_libX[id],2)) + '\t' + "***" + '\t') # print id + " " + grade_lib[id] min_B_lib[id] = "MIN_B" max_B_lib[id] = "MAX_B" min_S_lib[id] = "MIN_S" max_S_lib[id] = "MAX_S" fixed_status = "__OK__" case_status = "B=S" if t[1] == "ELP": # if t[1] == "X" or t[1] == "S": # if t[1] == "A" or t[1] == "B" or t[1] == "C" or t[1] == "D": # if t[1] == "A": do_it_anyway = "TRUE" # CASE 1 B less than S if min_B_libX[id] < min_S_libX[id]: # if min_S_libX[id] <= max_B_libX[id]: # print " TOO BAD ... OVERLAP IN THE RANGE ... CASE 1 " + id # time.sleep(3) # if min_S_libX[id] > max_B_libX[id]: if do_it_anyway == "TRUE": case_status = "B= 1.1: print " MIN and MAX too far ... FIXING ... " print id + " " + grade_lib[id] + " " + `max_B_libX[id]` + " " + `min_S_libX[id]` total_area_S = max_S_libX[id] - min_S_libX[id] total_area_B = max_B_libX[id] - min_B_libX[id] unit_area_S = total_area_S/1000.0 unit_area_B = total_area_B/1000.0 j = 1 while min_S_libX[id]/max_B_libX[id] > 1.1: min_S_libX[id] = min_S_libX[id] - unit_area_S max_B_libX[id] = max_B_libX[id] + unit_area_B sys.stdout.write("." + `j` + ".") j = j + 1 print " ... FIXED! ... " fixed_status = "FIXED!" final_ratio = min_S_libX[id]/max_B_libX[id] min_B_lib[id] = min_B_libX[id] max_B_lib[id] = max_B_libX[id] min_S_lib[id] = min_S_libX[id] max_S_lib[id] = max_S_libX[id] print id + " " + grade_lib[id] + " " + `max_B_lib[id]` + " " + `min_S_lib[id]` if min_S_libX[id]/max_B_libX[id] < 1.1: print " MIN and MAX too close ... FIXING ... " print id + " " + grade_lib[id] + " " + `max_B_libX[id]` + " " + `min_S_libX[id]` total_area_S = max_S_libX[id] - min_S_libX[id] total_area_B = max_B_libX[id] - min_B_libX[id] unit_area_S = total_area_S/1000.0 unit_area_B = total_area_B/1000.0 j = 1 while min_S_libX[id]/max_B_libX[id] < 1.1: min_S_libX[id] = min_S_libX[id] + unit_area_S max_B_libX[id] = max_B_libX[id] - unit_area_B sys.stdout.write("." + `j` + ".") j = j + 1 print " ... FIXED! ... " fixed_status = "FIXED!" final_ratio = min_S_libX[id]/max_B_libX[id] min_B_lib[id] = min_B_libX[id] max_B_lib[id] = max_B_libX[id] min_S_lib[id] = min_S_libX[id] max_S_lib[id] = max_S_libX[id] print id + " " + grade_lib[id] + " " + `max_B_lib[id]` + " " + `min_S_lib[id]` # time.sleep(3) # CASE 2 S less than B if min_S_libX[id] < min_B_libX[id]: # if min_B_libX[id] <= max_S_libX[id]: # print " TOO BAD ... OVERLAP IN THE RANGE ... CASE 2 " + id # time.sleep(3) # if min_B_libX[id] > max_S_libX[id]: if do_it_anyway == "TRUE": case_status = "B>S" initial_ratio = min_B_libX[id]/max_S_libX[id] if min_B_libX[id]/max_S_libX[id] >= 1.1: print " MIN and MAX too far ... FIXING ... " print id + " " + grade_lib[id] + " " + `max_S_libX[id]` + " " + `min_B_libX[id]` total_area_S = max_S_libX[id] - min_S_libX[id] total_area_B = max_B_libX[id] - min_B_libX[id] unit_area_S = total_area_S/1000.0 unit_area_B = total_area_B/1000.0 j = 1 while min_B_libX[id]/max_S_libX[id] > 1.1: min_B_libX[id] = min_B_libX[id] - unit_area_B max_S_libX[id] = max_S_libX[id] + unit_area_S sys.stdout.write("." + `j` + ".") j = j + 1 print " ... FIXED! ... " fixed_status = "FIXED!" final_ratio = min_B_libX[id]/max_S_libX[id] min_B_lib[id] = min_B_libX[id] max_B_lib[id] = max_B_libX[id] min_S_lib[id] = min_S_libX[id] max_S_lib[id] = max_S_libX[id] print id + " " + grade_lib[id] + " " + `max_S_lib[id]` + " " + `min_B_lib[id]` final_ratio = min_B_libX[id]/max_S_libX[id] if min_B_libX[id]/max_S_libX[id] < 1.1: print " MIN and MAX too close ... FIXING ... " print id + " " + grade_lib[id] + " " + `max_S_libX[id]` + " " + `min_B_libX[id]` total_area_S = max_S_libX[id] - min_S_libX[id] total_area_B = max_B_libX[id] - min_B_libX[id] unit_area_S = total_area_S/1000.0 unit_area_B = total_area_B/1000.0 j = 1 while min_B_libX[id]/max_S_libX[id] < 1.1: min_B_libX[id] = min_B_libX[id] + unit_area_B max_S_libX[id] = max_S_libX[id] - unit_area_S sys.stdout.write("." + `j` + ".") j = j + 1 print " ... FIXED! ... " fixed_status = "FIXED!" final_ratio = min_B_libX[id]/max_S_libX[id] min_B_lib[id] = min_B_libX[id] max_B_lib[id] = max_B_libX[id] min_S_lib[id] = min_S_libX[id] max_S_lib[id] = max_S_libX[id] print id + " " + grade_lib[id] + " " + `max_S_lib[id]` + " " + `min_B_lib[id]` # time.sleep(3) out_file12.write(str(round(min_B_lib[id],2)) + '\t' + str(round(max_B_lib[id],2)) + '\t' + \ str(round(min_S_lib[id],2)) + '\t' + str(round(max_S_lib[id],2)) + '\t' + fixed_status + '\t' + \ str(round(initial_ratio,2)) + '\t' + str(round(final_ratio,2)) + '\t' + case_status + '\n') print "FILE 1 PROCESSED (AFFY RANGE)" time.sleep(2) # AFFY ID ################################# while 1: t = in_file2.readline() if t == '': break if '\n' in t: t = t[:-1] if '\r' in t: t = t[:-1] t = t.split('\t') id = t[0] at = t[1] at_class = t[2] id_array[id] = at at_array[at] = id class_array[id] = at_class at_list.append(at) print id + " " + at + " " + at_class at_list.sort() print at_list time.sleep(2) print "LIST SIZE: " + `len(at_list)` print "FILE 2 PROCESSED (AFFY - AGI IDs)" time.sleep(2) ################################# # READ EXPRESSION DATA # ################################# while 1: t = in_file3.readline() if t == '': break if '\n' in t: t = t[:-1] if '\r' in t: t = t[:-1] t = t.split('\t') ################# id = t[0] if id == ";": init_len = len(t) # t = "\t".join(t) # out_file.write(t + '\n') chip_dupl_status = "NONE" if id == ";;": chip_k = 1 chip_dupl_list = [] while chip_k <= init_len-1: chip_k_array[chip_k] = t[chip_k] ################################ chip_id = t[chip_k] try: chip_query = chip_id_array[chip_id] print "CHIP ID DUPLICATION: " + chip_id chip_dupl_status = "DUPL" chip_dupl_list.append(chip_id) sys.exit() except: chip_id_array[chip_id] = 1 print `chip_k` + " " + chip_id chip_list.append(chip_id) ################################ chip_k = chip_k + 1 print "" print chip_list print "NUMBER OF CHIPS: " + `len(chip_list)` print "" time.sleep(3) if chip_dupl_status == "DUPL": print " DUPLICATED CHIP IDs " print chip_dupl_list sys.exit() if id != ";" and id != ";;": str_len = len(t) at = "WHATEVER" at_flag = "FALSE" try: at = id_array[id] at_flag = "TRUE" except: at = "WHATEVER" at_flag = "FALSE" if str_len == init_len and at_flag == "TRUE": print id + " " + at id_list.append(id) # out_file.write(at + '\t') k = 1 while k <= str_len-1: chip_id = chip_k_array[k] dummy_value = float(t[k]) false_value = "-" case_value = "-" #################################### # CASE 1 B less than S if min_B_lib[id] < min_S_lib[id]: false_value = "-" case_value = "-" if min_S_lib[id] <= max_B_lib[id]: print " TOO BAD ... WRONG RANGE ... CASE 1 " + id time.sleep(10) if min_S_lib[id] > max_B_lib[id]: # print id + " " + grade_lib[id] + " " + `max_B_lib[id]` + " " + `min_S_lib[id]` if condit == "STRONG": if dummy_value >= min_B_lib[id] and dummy_value <= max_B_lib[id]: false_value = "B" case_value = "b" if dummy_value >= min_S_lib[id] and dummy_value <= max_S_lib[id]: false_value = "A" case_value = "A" if condit == "WEAK": if dummy_value <= max_B_lib[id]: false_value = "B" case_value = "b" if dummy_value >= min_S_lib[id]: false_value = "A" case_value = "A" # CASE 2 S less than B if min_S_lib[id] < min_B_lib[id]: false_value = "-" case_value = "-" if min_B_lib[id] <= max_S_lib[id]: print " TOO BAD ... WRONG RANGE ... CASE 2 " + id time.sleep(10) if min_B_lib[id] > max_S_lib[id]: # print id + " " + grade_lib[id] + " " + `max_S_lib[id]` + " " + `min_B_lib[id]` if condit == "STRONG": if dummy_value >= min_B_lib[id] and dummy_value <= max_B_lib[id]: false_value = "B" case_value = "B" if dummy_value >= min_S_lib[id] and dummy_value <= max_S_lib[id]: false_value = "A" case_value = "a" if condit == "WEAK": if dummy_value >= min_B_lib[id]: false_value = "B" case_value = "B" if dummy_value <= max_S_lib[id]: false_value = "A" case_value = "a" chip_values_array0[id,k] = false_value chip_values_array1[id,chip_id] = false_value chip_values_array2[id,chip_id] = case_value chip_values_array3[id,chip_id] = dummy_value k = k + 1 if str_len != init_len: print "XPEH BAM!" sys.exit() print "FILE 3 PROCESSED (EXPRESSION DATA)" time.sleep(2) ## TRANSPOSED TABLE out_file10.write(";" + '\t') out_file11.write(";" + '\t') for id in id_list: out_file10.write(id + '\t') out_file11.write(id + '\t') out_file10.write("-----" + '\n') out_file11.write("-----" + '\n') out_file10.write(";;" + '\t') out_file11.write(";;" + '\t') for id in id_list: out_file10.write(id_array[id] + '\t') out_file11.write(id_array[id] + '\t') out_file10.write("-----" + '\n') out_file11.write("-----" + '\n') for chip in chip_list: out_file10.write(chip + '\t') out_file11.write(chip + '\t') for id in id_list: value01 = "XX" valueAB = "XX" valueAB = chip_values_array2[id,chip] out_file10.write(valueAB + '\t') if valueAB == "A": value01 = "11" if valueAB == "a": value01 = "00" if valueAB == "B": value01 = "10" if valueAB == "b": value01 = "01" if valueAB == "-": value01 = "--" out_file11.write(value01 + '\t') out_file10.write("-----" + '\n') out_file11.write("-----" + '\n') # MOLECULAR MARKERS ################################# l = 0 while 1: t = in_file4.readline() if t == '': break if '\n' in t: t = t[:-1] if '\r' in t: t = t[:-1] t = t.split('\t') # PROCESSING FIRST LINE if l == 0: c = 1 m_length = len(t) while c <= m_length-1: marker_array[c] = t[c] print `c` + " " + t[c] + " step 1" marker_list.append(t[c]) c = c + 1 time.sleep(2) # PROCESSING SCORES if l > 0: d = 1 ril_id = t[0] s_length = len(t) if s_length == m_length: while d <= s_length-1: marker_id = marker_array[d] marker_score = t[d] scores_array[marker_id,ril_id] = marker_score d = d + 1 if s_length != m_length: print " ... TOO BAD ... " sys.exit() print `l` + " " + ril_id + " step 2" l = l + 1 print "FILE 4 PROCESSED (MOLECULAR MARKERS)" time.sleep(2) # CHIP CONVERSION ################################# while 1: t = in_file5.readline() if t == '': break if '\n' in t: t = t[:-1] if '\r' in t: t = t[:-1] t = t.split('\t') ril_order = t[0] ril_id = t[1] exp1_id = t[exp1_column] exp2_id = t[exp2_column] ril_order=int(ril_order) ril_list.append(ril_order) ril_array0[ril_order] = ril_id ril_array1[ril_order] = exp1_id ril_array2[ril_order] = exp2_id print ril_id + " " + exp1_id + " " + exp2_id ril_list.sort() print ril_list print "RIL LIST SIZE: " + `len(ril_list)` print "FILE 5 PROCESSED (CHIP CONVERSION)" time.sleep(3) # FINAL STEP ################################# ### FIRST LINE out_file1.write(";" + '\t') out_file2.write(";" + '\t') out_file3.write(";" + '\t') out_file2N.write(";" + '\t') out_file3N.write(";" + '\t') out_file4.write(";" + '\t') out_file5.write(";" + '\t') out_file6.write(";" + '\t') out_file7.write(";" + '\t') out_file8.write(";" + '\t') out_file9.write(";" + '\t') ### ORDER n = 1 m = 1 for ril_order in ril_list: out_file1.write(`n` + '\t' + `n+1` + '\t') out_file2.write(`m` + '\t') out_file3.write(`m` + '\t') out_file2N.write(`m` + '\t') out_file3N.write(`m` + '\t') out_file4.write(`m` + '\t') out_file5.write(`m` + '\t') out_file6.write(`m` + '\t') out_file7.write(`m` + '\t') out_file8.write(`m` + '\t') out_file9.write(`m` + '\t') n = n + 2 m = m + 1 out_file1.write("-----" + '\t' + "-----" + '\n') out_file2.write("-----" + '\t' + "-----" + '\n') out_file3.write("-----" + '\t' + "-----" + '\n') out_file2N.write("-----" + '\t' + "-----" + '\n') out_file3N.write("-----" + '\t' + "-----" + '\n') out_file4.write("-----" + '\t' + "-----" + '\n') out_file5.write("-----" + '\t' + "-----" + '\n') out_file6.write("-----" + '\t' + "-----" + '\n') out_file7.write("-----" + '\t' + "-----" + '\n') out_file8.write("-----" + '\t' + "-----" + '\n') out_file9.write("-----" + '\t' + "-----" + '\n') out_file1.write(";;" + '\t') out_file2.write(";;" + '\t') out_file3.write(";;" + '\t') out_file2N.write(";;" + '\t') out_file3N.write(";;" + '\t') out_file4.write(";;" + '\t') out_file5.write(";;" + '\t') out_file6.write(";;" + '\t') out_file7.write(";;" + '\t') out_file8.write(";;" + '\t') out_file9.write(";;" + '\t') ### SECOND LINE for ril_order in ril_list: ril_id = ril_array0[ril_order] # chip_id1 = ril_array1[ril_order] # chip_id2 = ril_array2[ril_order] out_file1.write(ril_id + '\t' + ril_id + '\t') out_file2.write(ril_id + '\t') out_file3.write(ril_id + '\t') out_file2N.write(ril_id + '\t') out_file3N.write(ril_id + '\t') out_file4.write(ril_id + '\t') out_file5.write(ril_id + '\t') out_file6.write(ril_id + '\t') out_file7.write(ril_id + '\t') out_file8.write(ril_id + '\t') out_file9.write(ril_id + '\t') out_file1.write("-----" + '\t' + "-----" + '\n') out_file2.write("-----" + '\t' + "-----" + '\n') out_file3.write("-----" + '\t' + "-----" + '\n') out_file2N.write("-----" + '\t' + "-----" + '\n') out_file3N.write("-----" + '\t' + "-----" + '\n') out_file4.write("-----" + '\t' + "-----" + '\n') out_file5.write("-----" + '\t' + "-----" + '\n') out_file6.write("-----" + '\t' + "-----" + '\n') out_file7.write("-----" + '\t' + "-----" + '\n') out_file8.write("-----" + '\t' + "-----" + '\n') out_file9.write("-----" + '\t' + "-----" + '\n') out_file1.write(";;;" + '\t') out_file2.write(";;;" + '\t') out_file3.write(";;;" + '\t') out_file2N.write(";;;" + '\t') out_file3N.write(";;;" + '\t') out_file4.write(";;;" + '\t') out_file5.write(";;;" + '\t') out_file6.write(";;;" + '\t') out_file7.write(";;;" + '\t') out_file8.write(";;;" + '\t') out_file9.write(";;;" + '\t') ### THIRD LINE for ril_order in ril_list: # ril_id = ril_array0[ril_order] chip_id1 = ril_array1[ril_order] chip_id2 = ril_array2[ril_order] out_file1.write(chip_id1 + '\t' + chip_id2 + '\t') out_file2.write(chip_id1 + '\t') out_file3.write(chip_id2 + '\t') out_file2N.write(chip_id1 + '\t') out_file3N.write(chip_id2 + '\t') out_file4.write(chip_id1 + '\t') out_file5.write(chip_id1 + '\t') out_file6.write(chip_id1 + '\t') out_file7.write(chip_id1 + '\t') out_file8.write(chip_id1 + '\t') out_file9.write(chip_id1 + '\t') out_file1.write("-----" + '\t' + "-----" + '\n') out_file2.write("-----" + '\t' + "-----" + '\n') out_file3.write("-----" + '\t' + "-----" + '\n') out_file2N.write("-----" + '\t' + "-----" + '\n') out_file3N.write("-----" + '\t' + "-----" + '\n') out_file4.write("-----" + '\t' + "-----" + '\n') out_file5.write("-----" + '\t' + "-----" + '\n') out_file6.write("-----" + '\t' + "-----" + '\n') out_file7.write("-----" + '\t' + "-----" + '\n') out_file8.write("-----" + '\t' + "-----" + '\n') out_file9.write("-----" + '\t' + "-----" + '\n') out_file4.write(";;;;" + '\t') out_file5.write(";;;;" + '\t') out_file6.write(";;;;" + '\t') out_file7.write(";;;;" + '\t') out_file8.write(";;;;" + '\t') out_file9.write(";;;;" + '\t') ### FOURTH LINE IN DIFF FILES for ril_order in ril_list: # ril_id = ril_array0[ril_order] chip_id1 = ril_array1[ril_order] chip_id2 = ril_array2[ril_order] out_file4.write(chip_id2 + '\t') out_file5.write(chip_id2 + '\t') out_file6.write(chip_id2 + '\t') out_file7.write(chip_id2 + '\t') out_file8.write(chip_id2 + '\t') out_file9.write(chip_id2 + '\t') out_file4.write("-----" + '\t' + "-----" + '\n') out_file5.write("-----" + '\t' + "-----" + '\n') out_file6.write("-----" + '\t' + "-----" + '\n') out_file7.write("-----" + '\t' + "-----" + '\n') out_file8.write("-----" + '\t' + "-----" + '\n') out_file9.write("-----" + '\t' + "-----" + '\n') ### DATA for id in id_list: at = id_array[id] at_class = class_array[id] out_file1.write(at + "_" + at_class + '\t') out_file2.write(at + "_" + at_class + '\t') out_file3.write(at + "_" + at_class + '\t') out_file2N.write(at + "_" + at_class + '\t') out_file3N.write(at + "_" + at_class + '\t') out_file4.write(at + "_" + at_class + '\t') out_file5.write(at + "_" + at_class + '\t') out_file6.write(at + "_" + at_class + '\t') out_file7.write(at + "_" + at_class + '\t') out_file8.write(at + "_" + at_class + '\t') out_file9.write(at + "_" + at_class + '\t') for ril_order in ril_list: chip_id1 = ril_array1[ril_order] chip_id2 = ril_array2[ril_order] id_query1 = "Y" id_query2 = "Y" id_query3 = "Y" id_query4 = "Y" id_query5 = "Y" id_query6 = "Y" id_case1 = "Y" id_case2 = "Y" id_query9 = "Y" id_query11 = "-1" id_query12 = "-1" try: id_query1 = chip_values_array1[id,chip_id1] id_query2 = chip_values_array1[id,chip_id2] id_query11 = chip_values_array3[id,chip_id1] id_query12 = chip_values_array3[id,chip_id2] id_case1 = chip_values_array2[id,chip_id1] id_case2 = chip_values_array2[id,chip_id2] print at + " " + id + " " + chip_id1 + " " + id_query1 + " " + id_case1 + " " + `id_query11` print at + " " + id + " " + chip_id2 + " " + id_query2 + " " + id_case2 + " " + `id_query12` id_query9 = id_case1 + id_case2 if id_query1 == id_query2 and id_query1 == "A": id_query3 = "1" id_query4 = id_query1 + id_query2 id_query5 = "A" id_query6 = "A" if id_query1 == id_query2 and id_query1 == "B": id_query3 = "1" id_query4 = id_query1 + id_query2 id_query5 = "B" id_query6 = "B" if id_query1 == id_query2 and id_query1 == "-": id_query3 = "-" id_query4 = id_query1 + id_query2 id_query5 = "-" id_query6 = "-" if id_query1 != id_query2: id_query3 = "-1" id_query4 = id_query1 + id_query2 id_query5 = "-" id_query6 = "-" if id_query1 != id_query2 and id_query1 == "-": id_query3 = "0" id_query4 = id_query1 + id_query2 id_query5 = "-" if id_query1 != id_query2 and id_query2 == "-": id_query3 = "0" id_query4 = id_query1 + id_query2 id_query5 = "-" if id_query1 == "A" and id_query2 == "-": id_query6 = "D" # NOT B if id_query1 == "-" and id_query2 == "A": id_query6 = "D" # NOT B if id_query1 == "B" and id_query2 == "-": id_query6 = "C" # NOT A if id_query1 == "-" and id_query2 == "B": id_query6 = "C" # NOT A if id_query1 == "A" and id_query2 == "B": id_query6 = "H" # A and B if id_query1 == "B" and id_query2 == "A": id_query6 = "H" # B and A out_file1.write(id_query1 + '\t' + id_query2 + '\t') out_file2.write(id_query1 + '\t') out_file3.write(id_query2 + '\t') out_file2N.write(str(id_query11) + '\t') out_file3N.write(str(id_query12) + '\t') out_file4.write(id_query3 + '\t') out_file5.write(id_query4 + '\t') out_file6.write(id_query5 + '\t') out_file7.write(id_query5 + '\t') out_file8.write(id_query6 + '\t') out_file9.write(id_query9 + '\t') except: sys.stdout.write(".") id_query1 = "X" id_query2 = "X" id_query3 = "X" id_query4 = "X" id_query5 = "-" id_query6 = "-" id_query9 = "-" id_query11 = "-1" id_query12 = "-1" out_file1.write(id_query1 + '\t' + id_query2 + '\t') out_file2.write(id_query1 + '\t') out_file3.write(id_query2 + '\t') out_file2N.write(id_query11 + '\t') out_file3N.write(id_query12 + '\t') out_file4.write(id_query3 + '\t') out_file5.write(id_query4 + '\t') out_file6.write(id_query5 + '\t') out_file7.write(id_query5 + '\t') out_file8.write(id_query6 + '\t') out_file9.write(id_query9 + '\t') out_file1.write("-----" + '\t' + id + '\n') out_file2.write("-----" + '\t' + id + '\n') out_file3.write("-----" + '\t' + id + '\n') out_file2N.write("-----" + '\t' + id + '\n') out_file3N.write("-----" + '\t' + id + '\n') out_file4.write("-----" + '\t' + id + '\n') out_file5.write("-----" + '\t' + id + '\n') out_file6.write("-----" + '\t' + id + '\n') out_file7.write("-----" + '\t' + id + '\n') out_file8.write("-----" + '\t' + id + '\n') out_file9.write("-----" + '\t' + id + '\n') ### MOLECULAR MARKERS for marker in marker_list: out_file7.write(marker + '\t') out_file8.write(marker + '\t') out_file9.write(marker + '\t') for ril_order in ril_list: ril_id = ril_array0[ril_order] print marker + " " + `ril_order` + " " + ril_id try: score = scores_array[marker,ril_id] if score == "S": score = "A" if score == "B": score = "B" if score == "-": score = "-" except: score = "X" ## Bay Sha EXCEPTION if ril_id == "Sha": score = "A" if ril_id == "Bay-0": score = "B" out_file7.write(score + '\t') out_file8.write(score + '\t') out_file9.write(score + '\t') out_file7.write("-----" + '\t' + "-----" + '\n') out_file8.write("-----" + '\t' + "-----" + '\n') out_file9.write("-----" + '\t' + "-----" + '\n') print "-----------------" print " FINAL STEP DONE " print "-----------------" in_file1.close() in_file2.close() in_file3.close() in_file4.close() in_file5.close() out_file1.close() out_file2.close() out_file3.close() out_file2N.close() out_file3N.close() out_file4.close() out_file5.close() out_file6.close() out_file7.close() out_file8.close() out_file9.close() out_file10.close() out_file11.close() out_file12.close() ################## # # # MAIN BODY # # # ################## import math import re import sys import string import time import os if __name__ == "__main__": if len(sys.argv) <= 7 or len(sys.argv) > 8: print "Program usage: " print "input_file1(AFFY_RANGE) input_file2(AFFY_ID) input_file3(AFFY_VALUES) input_file4(MARKERS) input_file5(CHIP_ID) output_file condition" sys.exit() if len(sys.argv) == 8: in_name1 = sys.argv[1] in_name2 = sys.argv[2] in_name3 = sys.argv[3] in_name4 = sys.argv[4] in_name5 = sys.argv[5] out_name = sys.argv[6] condit = sys.argv[7] Seqs_Extractor(in_name1, in_name2, in_name3, in_name4, in_name5, out_name, condit) ### THE END ###