#! /usr/bin/perl -w # # Author : Rotem Sorek rotem@kimura.tau.ac.il # Created : Jul 21 2003 # # This script was used to create the rules set for the article: # "A non EST-based method for exon-skipping prediction" # By Rotem Sorek, Ronen Shemesh, Yuval Cohen, Ortal Basechess, Gil Ast and Ron Shamir #### Packages to use ###################################################### use strict; use Getopt::PrmArgv; use FileHandle; autoflush STDOUT 1; #### Global variables ##################################################### my $Prog = "CompleteParamsIteration.pl"; #### The main section ##################################################### # # Analyze arguments # my ($alt,$con); $alt = $ARGV[0]; $con = $ARGV[1]; # These two variables should be two text files, each containing Tab-delimited features table of the following format: # #EXON D81985_20 #LENGTH 147 #DIV3 1 #EX_IDN 95 #NEXT_LEN 100 #NEXT_IDN 85 #PREV_LEN 69 #PREV_IDN 83 die "two features input files are needed" unless ($alt && $con); # reading the data of alternative and constitutive exons my ($alt_data,$alt_pre) = ReadData ($alt); my ($con_data,$con_pre) = ReadData ($con); # iterating over exon identity level my $exon_id; for ($exon_id=100; $exon_id >= 80;$exon_id--){ # iterating over length of conserved upstream region my $prev_len; for ($prev_len=100; $prev_len >= 0; $prev_len -= 5){ my $next_len; # iterating over length of conserved downstream region for ($next_len=100; $next_len >= 0; $next_len -= 5){ # iterating over identity level of conserved downstream region my $next_id; for ($next_id=100; $next_id >= 65; $next_id -= 3){ $next_id =0 if ($next_len <=10); my $prev_id; # iterating over identity level of conserved upstream region for ($prev_id=100; $prev_id >= 65; $prev_id -= 3){ $prev_id =0 if ($prev_len <=10); # division by 3? for my $div_3 (1,0){ my $exon_len; # iterating over exon length for ($exon_len=18; $exon_len <= 1000; $exon_len+= 5){ # going over all alt exons my $count_alt = 0; my $count_con = 0; # foreach my $ex (@$alt_data){ my @alt_list = @{$alt_pre->{$exon_len}{$div_3}{$exon_id}} if (exists ($alt_pre->{$exon_len}{$div_3}{$exon_id})); foreach my $ex (@alt_list){ if ($ex->{prev_len} >= $prev_len && $ex->{next_len} >= $next_len && $ex->{prev_id} >= $prev_id && $ex->{next_id} >= $next_id){ $count_alt++; } } my @con_list = @{$con_pre->{$exon_len}{$div_3}{$exon_id}} if (exists ($con_pre->{$exon_len}{$div_3}{$exon_id})); foreach my $ex (@con_list){ if ($ex->{prev_len} >= $prev_len && $ex->{next_len} >= $next_len && $ex->{prev_id} >= $prev_id && $ex->{next_id} >= $next_id){ $count_con++; } } if ($count_alt >= 30){ print "Rule: #EX_LN $exon_len #EX_ID $exon_id #PREV_LN $prev_len #PREV_ID $prev_id #NEXT_LN $next_len #NEXT_ID $next_id #DIV3 $div_3 #ALT $count_alt #CON $count_con\n"; } } } } } } } } 0; # End of main of "CompleteParamsIteration.pl" #### Functions section #################################################### sub ReadData { my $file = shift; open (IN,$file) || die "cannot open file $file to read"; my @exons; my %pre_process; while (){ my %ex; chomp; my @line = split; $ex{len} = $line[3]; $ex{div_3} = $line[5]; $ex{id} = $line[7]; $ex{next_len} = $line[9]; $ex{next_id} = $line[11]; $ex{prev_len} = $line[13]; $ex{prev_id} = $line[15]; push @exons, \%ex; # pre processing of data # exon id my $exon_id; for ($exon_id=100; $exon_id >= 80;$exon_id--){ # exon length my $exon_len; for ($exon_len=18; $exon_len <= 1000; $exon_len+= 5){ # division by 3? for my $div_3 (1,0){ if ($ex{len} <= $exon_len && $ex{div_3} == $div_3 && $ex{id} >= $exon_id){ push @{$pre_process{$exon_len}{$div_3}{$exon_id}},\%ex; } } } } } return (\@exons, \%pre_process); } ############################################################################# # End of script "CompleteParamsIteration.pl"