##################################################
#this is a script to identify improperly paired fusions given one end of the fusion
#Aaron L. Sarver NOV 21, 2013
#Sarver@umn.edu
#Version 3.1
###Requirements################################### 
#input.txt
#loc_SB
#samtools/0.1.16
#starts with a bam file indexed makes a list of RNA based insertions for each file and a bed file of all insertions 
##################################################
open INFO, "< input.txt";
while (defined($data = <INFO>)) {
print $data;
chomp $data;
@binfo= split(/\t/, $data);
$dir=$binfo[0];
$name=$binfo[1];
$sam=$binfo[2];
open OUT2, "> Results/RNA_insertion_$name.$sam.txt";

open INFO2, "< $sam.txt";
while (defined($data2 = <INFO2>)) {
print $data2;
chomp $data2;
@loc= split(/\t/, $data2);
$loc1=$loc[1];
$locname=$loc[0];

system "samtools view $dir $loc1 -o working/location_raw";

`cut -f7,8 working/location_raw|sort -k1,1 -k2n,2 > working/loc.txt`;

open SOURCE, "< working/loc.txt";
open OUT, "> working/nr.txt";
$count=0;
while (defined($line = <SOURCE>)) {
chomp $line;
@field= split(/\t|\s+/, $line);
$count++;
print "$count\n";
if ($field[0] ne "=") {
if ($field[0] ne "*") {
$round=300*int(0.5+$field[1]/300);
print OUT "$field[0]\t$round\n";
}
}
}
close OUT;
`uniq -c working/nr.txt > working/cis1.txt`;

open SOURCE, "< working/cis1.txt";
while (defined($line = <SOURCE>)) {
chomp $line;
@field= split(/\t|\s+/, $line);
if ($field[1] > 2) {
$end=$field[3]+150;
$start=$field[3]-150;
$val= `samtools view -c $dir $field[2]:$start-$end`;
chomp $val;
if ($field[1]/$val > 0.01) {
$end=$field[3]+150;
$start=$field[3]-150;
print OUT2 "$field[2]\t$start\t$end\t$name\t$field[1]\t$field[1]\t$val\t$locname\n";
}
}
}


}
close OUT2;
}
`cat Results/RNA* > Results/fus.txt`;
