#!/usr/bin/perl -w
use strict;
use File::Basename;
use Bio::SeqIO;
use Cwd;

my $input_pep_dir = "$ARGV[0]";

my $pwd = getcwd;

my $s3_mgf_dir = "$ARGV[1]";
my $db = "$pwd/refseq_pro_nature2014.fasta";

my $s3_res_dir = "$ARGV[2]";

my @inputs = glob("$input_pep_dir/*_input4pepquery.txt");

my $use_samples_file = "$input_pep_dir/use_samples.txt";

my @use_samples = read_samples($use_samples_file);

foreach my $pep (@inputs){
	my $odir = $pep;
	
	$odir =~ s/_input4pepquery.txt$//;
	my $random_n = basename($odir);
	my $n_class = basename(dirname($odir));
	my $prefix = "$n_class"."_"."$random_n";

	foreach my $sample (@use_samples) {
		my $mgf = "$s3_mgf_dir/$sample".".mgf";
		submit_job($pep,$mgf,$db,$prefix,$sample);
	}

}


sub submit_job{
	my $pep = shift;
	my $mgf = shift;
	my $db = shift;
	my $prefix = shift;
	my $name = shift;

	$prefix = "$prefix"."_".$name;
	

	my $pepquery_bin = "pepquery.jar";
	my $generate_ref_db = "generate_ref_db.pl";
	my $pbs = "$prefix" . ".sh";
	open O,">$pbs" or die "$pbs:$!\n";
	print O "#!/bin/bash\n";
    print O "#SBATCH --job-name=$pbs\n";
    print O "#SBATCH --output=$prefix".".out\n";
    print O "#SBATCH --error=$prefix".".err\n";
    print O "#SBATCH --time=100:00:00\n";
    print O "#SBATCH --cpus-per-task=8\n";
    print O "#SBATCH --mem=50000\n";
    print O "#SBATCH --nodes=1\n";
    print O "#SBATCH --ntasks=1\n";
    print O "#set -e -x\n";


    
    my $dir = "/tmp/$prefix";
    
    my $new_db = "$dir/new_ref.fasta";

    print O "rm -rf $dir\n";
    print O "mkdir $dir\n";
    
    print O "aws s3 cp $mgf $dir/\n";

    my $mgf_file = "$dir/".basename($mgf);

	my $m1 = "java -Xmx50G -jar $pepquery_bin -db $new_db -fixMod 6 -varMod 107 -tol 20 -itol 0.5 -ms $mgf_file -pep $pep -m 1 -o $dir/m1 -n 10000 -cpu 8 -minScore 12 -um ";
	my $m2 = "java -Xmx50G -jar $pepquery_bin -db $new_db -fixMod 6 -varMod 107 -tol 20 -itol 0.5 -ms $mgf_file -pep $pep -m 2 -o $dir/m2 -n 10000 -cpu 8 -minScore 12 -um ";
	
	print O "perl $generate_ref_db $pep $db $new_db\n";
	print O "$m1\n";
	print O "$m2\n";
	print O "rm $mgf_file\n";

	print O "aws s3 cp $dir $s3_res_dir/$prefix --recursive\n";
	print O "rm -rf $dir\n";
	
	close O;
	system("sbatch $pbs");

}

sub read_samples{
	my $file = shift;
	open O,"$file" or die "$!\n";
	my @sam;
	while(<O>){
		chomp;
		my @d = split("\t",$_);
		push @sam,$d[0];
	}
	close O;

	return(@sam);
}



