#!/usr/bin/perl

############################################################
##                                                        ##
##                      ALN_DNA_AA.PL                     ##
##                                                        ##
##                       Programmed by MASAFUMI NOZAWA    ##
##                       Last Modified on 2018.12.10      ##
##                                                        ##
############################################################

if ($#ARGV<2)
{
	print "\n";
	print "This program makes a nucleotide alignment file based on an amino-acid alignment which is obtained by other programs such as ClustalW, MAFFT, and Muscle.\n";
	print "aln_dna_aa.pl input nucleotide file name (fasta format)\ninput amino-acid alignment file (fasta format)\noutput file name (fasta format)\n";
	exit;
}

open(IN,"<$ARGV[0]")||die;
open(OUT,">$ARGV[2]")||die;

$line=<IN>;
chomp($line);
$name=$line;
$n_seq='';
while($line=<IN>){
	chomp($line);
	if($line=~/^>/){
		open(IN2,"<$ARGV[1]")||die;
		$counter=0;
		$a_seq='';
		while($line2=<IN2>){
			chomp($line2);
			if($line2=~/^>/){
				if($name eq $line2){
					$counter=1;
				}else{
					$counter=0;
				}
			}else{
				if($counter==1){
					$a_seq=$a_seq.$line2;
				}
			}
		}
		
		$len=length($a_seq);
		$nucleotide='';
		$j=0;
		for($i=0;$i<$len;$i++){
			$AA=substr($a_seq,$i,1);
			if($AA eq '-'){
				$codon=substr($n_seq,$j*3,3);
				if($codon eq '---'){
					$nucleotide=$nucleotide.$codon;
					$j++;
				}else{
					$nucleotide=$nucleotide.'---';
				}
			}else{
				$codon=substr($n_seq,$j*3,3);
				$nucleotide=$nucleotide.$codon;
				$j++;
			}
		}
		print(OUT "$name\n");
		print(OUT "$nucleotide\n");
		
		close(IN2);
		$name=$line;
		$n_seq='';
	}else{
		$n_seq=$n_seq.$line;
	}
}

open(IN2,"<$ARGV[1]")||die;
$counter=0;
$a_seq='';
while($line2=<IN2>){
	chomp($line2);
	if($line2=~/^>/){
		if($name eq $line2){
			$counter=1;
		}else{
			$counter=0;
		}
	}else{
		if($counter==1){
			$a_seq=$a_seq.$line2;
		}
	}
}

$len=length($a_seq);
$nucleotide='';
$j=0;
for($i=0;$i<$len;$i++){
	$AA=substr($a_seq,$i,1);
	if($AA eq '-'){
		$codon=substr($n_seq,$j*3,3);
		if($codon eq '---'){
			$nucleotide=$nucleotide.$codon;
			$j++;
		}else{
			$nucleotide=$nucleotide.'---';
		}
	}else{
		$codon=substr($n_seq,$j*3,3);
		$nucleotide=$nucleotide.$codon;
		$j++;
	}
}
print(OUT "$name\n");
print(OUT "$nucleotide\n");

close(IN2);

close(OUT);
close(IN);
