#!/usr/bin/perl

=head1 NAME
   qc_readdist.pl

=head1 SYNOPSIS
    USAGE: qc_readdist.pl -r=run_info.txt -o=output_dir

=head1 OPTIONS

B<--run_info, -r>
	Run info file

B<--output_dir, -o>
	Output directory

B<--help,-h>


=head1  DESCRIPTION
	Generate read distribution report

=head1  INPUT

=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./qc_readdist.pl -r=run_info.txt -o=output_dir

=cut

use strict;
use warnings;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

## display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
## make sure everything passed was peachy
&check_parameters(\%options);

## parse X_info files
my $config = new ParseConfig($options{run_info});

my $logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/ReadDistributionQC.log",
								  'LOG_LEVEL'=>$options{'debug'});
$logger = Workflow::Logger::get_logger();

$logger->info("Generate reports started");

$ENV{PATH} = $config->{ToolInfo}->{r}->{value} .":". $ENV{PATH};

create_dir_struct(\%options);

my $this = {};
$this->{output} = "$options{output_dir}/QC";

my $sample_numbers;

samplestatistics();

open(NUM, ">", "$this->{output}/plot.tmp")
	or $logger->logdie("Could not open file $this->{output}/plot.tmp");
open(SMP, ">", "$this->{output}/name.txt")
	or $logger->logdie("Could not open file $this->{output}/name.txt");

print NUM "UsedReads\tMappedReads\tGenomeMapped\tJunctionMapped\n";

foreach my $smpl (sort keys %$sample_numbers) {
	print SMP $smpl."\n";
	my $first = 1;
	foreach my $key ('Used reads', 'Mapped reads', 'Mapped reads (Genome)', 'Mapped reads (Junction)') {
		if ($first) {
			$first = 0;
			print NUM $sample_numbers->{$smpl}{$key};
		} else {
			print NUM "\t".$sample_numbers->{$smpl}{$key};
		}
	}
	print NUM "\n";
}
close(NUM);
close(SMP);

## removing empty lines
my $cmd = "awk NF $this->{output}/plot.tmp > $this->{output}/plot.txt";
execute_cmd($cmd);

$cmd = "rm $this->{output}/plot.tmp";
execute_cmd($cmd);

chdir($this->{output});

## plotting distribution
$cmd = "Rscript $config->{ToolInfo}->{workflow_path}->{value}/rscript/QC_mRNASeq_Rplot.r";
$cmd .= " $this->{output}/plot.txt $this->{output}/name.txt";
execute_cmd($cmd);

if (-s "$this->{output}/ReadsDistribution.png") {
	#$cmd = "rm $this->{output}/plot.txt $this->{output}/name.txt";
	#execute_cmd($cmd);
	;
} else {
	$logger->logdie("$this->{output}/QC_ReadsDistribution.png not exist");
}

$logger->info("Generate reports complete");
exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir);

	foreach my $key (@required) {
		unless ($options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless ($options{'debug'});
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	$logger->info($cmd);
	system($cmd);

	while (( $? >> 8 ) != 0 ){
		$logger->logdie("ERROR: Following command failed to execute. Exiting execution of workflow\n$cmd");

		exit(-1);
	}
}

#############################################################################
sub create_dir_struct {
	my $options = shift;

	my $dir = "$options{output_dir}/QC";
	if ( -d $dir) {
		$logger->info("Directory $dir exist");
	} else {
		execute_cmd("mkdir -p $dir");
	}
}

#############################################################################
sub samplestatistics  {
	my $dest = $options{output_dir}."/SampleStatistics.tsv";

	open(OUT, ">", $dest) or $logger->logdie ("Can not open $dest : $!\n");

	# storing all the numbers in a Hash per sample (one hash)
	print OUT "samples";

	foreach my $sample (sort keys %{$config->{SampleInfo}}) {
		print OUT "\t$sample";

		open(SAMPLE, "<", "$options{output_dir}/numbers/$sample.out")
			or $logger->logdie("Could not open $options{output_dir}/numbers/$sample.out\n$!");

		my @data;
		while(<SAMPLE>) {
			chomp $_;
			push @data, $_;
		}
		close(SAMPLE);

		my $hash;
		map { $hash->{$1} = $2 if( /([^=]+)\s*=\s*([^=]+)/ ) } @data;

		$sample_numbers->{$sample} = $hash;
	}

	print OUT "\n";

	my $tr = "Total reads";
	my $ur = "Used reads";
	my $mr = "Mapped reads";
	my $gr = "Mapped reads (Genome)";
	my $jr = "Mapped reads (Junction)";
	my $gc = "Gene count";
	my $ec = "Exon count";

	foreach my $key (sort keys %$sample_numbers) {
		$tr .= "\t" .CommaFormatted($sample_numbers->{$key}{'Total reads'});
		$ur .= "\t" .CommaFormatted($sample_numbers->{$key}{'Used reads'});
		$mr .= "\t" .CommaFormatted($sample_numbers->{$key}{'Mapped reads'}) . " (" . sprintf("%.1f",($sample_numbers->{$key}{'Mapped reads'}/$sample_numbers->{$key}{'Total reads'})*100) .")";
		$gr .= "\t" .CommaFormatted($sample_numbers->{$key}{'Mapped reads (Genome)'}) . " (" . sprintf("%.1f",($sample_numbers->{$key}{'Mapped reads (Genome)'}/$sample_numbers->{$key}{'Total reads'})*100) .")";
		$jr .= "\t" .CommaFormatted($sample_numbers->{$key}{'Mapped reads (Junction)'}) . " (" . sprintf("%.1f",($sample_numbers->{$key}{'Mapped reads (Junction)'}/$sample_numbers->{$key}{'Total reads'})*100) .")";
		$gc .= "\t" .CommaFormatted($sample_numbers->{$key}{'Gene count'}) . " (" . sprintf("%.1f",($sample_numbers->{$key}{'Gene count'}/$sample_numbers->{$key}{'Total reads'})*100) .")";
		$ec .= "\t" .CommaFormatted($sample_numbers->{$key}{'Exon count'}) . " (" . sprintf("%.1f",($sample_numbers->{$key}{'Exon count'}/$sample_numbers->{$key}{'Total reads'})*100) .")";
	}

	print OUT $tr."\n";
	print OUT $ur."\n";
	print OUT $mr."\n";
	print OUT $gr."\n";
	print OUT $jr."\n";
	print OUT $gc."\n";
	print OUT $ec."\n";
}

#############################################################################
sub CommaFormatted
{
	my $number = shift;

	my $delimiter = ','; # replace comma if desired
	my($n,$d) = split(/\./, $number, 2);
	my @a = ();

	while($n =~ /\d\d\d\d/)
	{
		$n =~ s/(\d\d\d)$//;
		unshift @a,$1;
	}

	unshift @a,$n;
	$n = join $delimiter,@a;
	#$n = "$n\.$d" if ((! undef $d) && (length($d)) && ($d =~ /\d/));
	$n = "$n\.$d" if ((defined( $d )) && (length($d)) && ($d =~ /\d/));

	return $n;
}
