#! /usr/bin/perl

use warnings;
use strict;
use IO::CaptureOutput qw(capture_exec);

# a script to downsample to desired size
unless (scalar @ARGV >= 2) {
	die "\nNot enough command line arguments.\n".
	"Usage : Down_sample_bed_file.pl <target number of reads> <bed file list>.\n";

}

my $target = shift @ARGV; #  percent keep reads


# create an array that contains the list of files to be treated
my @bed_files = @ARGV;


foreach my $elem (@bed_files) {

	# matched pattern containing the mark
	# $1 is set to the pattern in the parentheses
	$elem =~ m/(.+)\.bed/;
	my $mark = $1;
	print "Now processing $mark...\n";
	
	#sort bam file before call to samtools
	my $new_filename = "$mark"."_DS_$target.bed";


	# get number of mapped reads in initial file
	my $cmd = "wc -l $elem".' | perl -lane \'$_ =~ m/(\d+)\s/ ;print $1;\'';
	my $start = capture_exec($cmd);
	chomp $start;
	$start =~ s/\s//g;
	print "Mapped reads in $elem: $start\n";

	my $ds_proba = 100 * $target/$start;

	print "Downsizing randomly $elem by $ds_proba\n";
	
	srand(time|$$);
	
	open (BED, $elem) or die "Could not open $elem: $!\n";
	open (OUT, '>',$new_filename) or die "Could not create $new_filename: $!\n";

	while (my $line = <BED>) {
		my $num = rand(100);
		
		if ( $num < $ds_proba) { # proba of keeping the read
			print OUT $line;

		}
	
	}
	
	close BED;
	close OUT;
	
}




exit;

###########################################################
# SUBROUTINES
###########################################################

###########################################################
# a subroutine that separates fields from a data line and
# returns them in an array

sub get_line_data {

    my $line = $_[0];
    
    chomp $line;
    
    my @linedata = split(/\t/, $line);
       
    return @linedata;
}
