#!/usr/local/bin/perl ####################################################################### # Instructions: This is to generate the variants location classification relative to the genes according # to FlyBase annotation descripted in Methods section. The iput_file_1 is the variant list file and # input_file_2 is the gene annotation file. ####################################################################### #use strict; #use warnings; if ($#ARGV !=2) { print ("Usage: ./getAnnotation.pl variant_list.txt annotation.txt variant_annotation.txt"); exit -1; } $infile1=$ARGV[0]; $infile2=$ARGV[1]; $outfile=$ARGV[2]; open (INFILE1, "<$infile1"); open (OUTFILE, ">$outfile"); while($line1=){ @myGene=(); $chr=""; @uniqGene=(); chomp($line1); chop($line1); @part1 = split (/\t/, $line1); $chr="chr".$part1[3]; $chr1="chr".$part1[3]; $chr2="chr".$part1[5]; open (INFILE2, "<$infile2"); $line2=; while($line2=){ chomp($line2); chop($line2); @part2 = split (/\t/, $line2); # within an annotated gene region # if($chr eq $part2[2] && (($part2[4]>=$part1[4] && $part2[4]<=$part1[6])||($part2[5]>=$part1[4] && $part2[5]<=$part1[6])||($part2[4]<=$part1[4] && $part2[5]>=$part1[6]))){ # push(@myGene, $part2[12]); # within certain reange (1kb as example), upstream or downstream from TSS or TES of annotated genes if((($chr1 eq $part2[2]) && ($part1[4]>=$part2[4]-1000 && $part1[4]<=$part2[5]+1000))||(($chr2 eq $part2[2]) && ($part1[6]>=$part2[4]-1000 && $part1[6]<=$part2[5]+1000))){ push(@myGene, $part2[12]); my %seenGene; @uniqGene = grep {! $seenGene{$_}++} @myGene; my $size = @uniqGene; if($size >50){ @uniqGene = (); last; } } } $ListGene=join("\/\/",@uniqGene); print OUTFILE "$line1\t$ListGene\n"; close(INFILE2); } close (INFILE1); close (OUTFILE); #!/usr/local/bin/perl ####################################################################### # Instructions: This is to get the genomic distance between variant position to the TSS or TES of # its nearby genes within a given range defined (TES 10kb as example). ####################################################################### #use strict; #use warnings; if ($#ARGV !=2) { print ("Usage: ./getDistance.pl variant_position_file.txt annotation.txt variant_distance.txt"); exit -1; } $infile1=$ARGV[0]; $infile2=$ARGV[1]; $outfile=$ARGV[2]; open (INFILE1, "<$infile1"); open (OUTFILE, ">$outfile"); while($line1=){ $flag=0; $diff=0; $dist=10000; $gene=""; chomp($line1); chop($line1); @part1 = split (/\t/, $line1); open (INFILE2, "<$infile2"); while($line2=){ chomp($line2); chop($line2); @part2 = split (/\t/, $line2); if($part2[0] eq $part1[0]){ if($part2[4] eq "+"){ $diff=$part2[2]-$part1[1]; if($diff>0 && $diff<=$dist){ $gene=$part2[5]; print OUTFILE "$line1\t$gene\t$diff\n"; } } if($part2[4] eq "-"){ $diff=$part1[1]-$part2[3]; if($diff>0 && $diff<=$dist){ $gene=$part2[5]; print OUTFILE "$line1\t$gene\t$diff\n"; } } } } close(INFILE2); } close (INFILE1); close (OUTFILE);