#!/usr/bin/perl -w
use Cwd;
use strict;

### rm reads that originated from same strand, mapped to same strand and same position.

my $dir = getcwd;
my $outpath = "$dir/res/duprm";

system("mkdir $outpath");
system("mkdir $outpath/dupsummary");
system("mkdir $outpath/uniquetest");

chdir "$dir/res";
my @sams = glob("*.sam");

foreach my $sam (@sams) {
	##duplicated reads calculation
	my %readdup = %{deDuplication($sam,$outpath)};

	open DUP, ">$outpath/dupsummary/${sam}.dulication" || "$!";
	print DUP join("\t","duplication","times"),"\n";
	foreach my $readdup (sort {$a <=> $b} keys %readdup) {
		print DUP join("\t",$readdup,$readdup{$readdup}),"\n";
	}
}


sub deDuplication {
    my $sam = shift;
    my $outpath = shift;

    open SAM, $sam || "$!";
    my ($spname) = split/.sam/,$sam;
    my $output = "${spname}.duprm.sam";
    open OUT, ">$outpath/$output" || "$!";

    my %unique; #unqiueness test because the ciruclar mtDNA
    open UNIQUE, ">$outpath/uniquetest/${sam}.ununique";

    my %reads;
    FLAG: while (<SAM>) {
        if ($_ =~ /^@/) {
            print OUT $_;
            next FLAG;
        }
        chomp;
        my @line = split/\t/,$_;
        my ($rep) = split/\./,$line[0];

        my $id = join("\t",$rep,@line[(1,2,3,9,12)]); #mapped strand,chr start,seq,source strand.
        if (defined $reads{$id}) {
            $reads{$id}++;
            next FLAG;
        }
        print OUT $_,"\n";
        $reads{$id}++;

        my $pair = join("\t",@line[0..1]);
        $unique{$pair}++;
    }

    ##un-unique reads
    foreach my $unique (sort keys %unique) {
        if ($unique{$unique} > 1) {
            print UNIQUE join("\t",$unique,$unique{$unique}),"\n";
        }
    }

    ##duplicated reads
    my %readdup;
    foreach my $read (keys %reads) {
        $readdup{$reads{$read}}++;
    }
    return \%readdup;
}
