#!/usr/bin/perl

=head1 NAME
   numbers_report.pl

=head1 SYNOPSIS
    USAGE: numbers_report.pl -r=run_info.txt -o=output_dir -s=sample_name

=head1 OPTIONS

B<--run_info, -r>
	Run info file

B<--output_dir, -o>
	Output directory

B<--sample, -s>
	Sample name

B<--help,-h>


=head1  DESCRIPTION
	Generate numbers report

=head1  INPUT

=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./numbers_report.pl -r=run_info.txt -o=output_dir -s=sample_name

=cut

use strict;
use warnings;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'sample|s=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

## display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
## make sure everything passed was peachy
&check_parameters(\%options);

#### parse X_info files
my $config = new ParseConfig($options{run_info});

my $logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/NumbersReport.log",
								  'LOG_LEVEL'=>$options{'debug'});
$logger = Workflow::Logger::get_logger();

$logger->info("Generate numbers report started");

create_dir_struct(\%options);

my $this = {};
$this->{output} = "$options{output_dir}/numbers/$options{sample}.out";
$this->{alignment} = "$options{output_dir}/alignment/tophat_$options{sample}";
$this->{counts} = "$options{output_dir}/counts";

my $num_reads=0;
my $used_reads=0;
my $mapped_reads_genome=0;
my $mapped_reads_junction=0;

open(OUT, ">", $this->{output})
	or $logger->logdie("Could not open file to write $this->{output}");

open(FHD, "<", "$this->{alignment}/prep_reads.info")
	or $logger->logdie("Could not open file $this->{alignment}/prep_reads.info");

while(<FHD>) {
	chomp $_;
	my ($key,$value) = split(/=/, $_);

	if ($key =~ /left_reads_out/i) {
		$used_reads += $value;
	} elsif ($key =~ /left_reads_in/i) {
		$num_reads += $value;
	}

	if ($config->{RunInfo}->{paired} == 1) {
		if ($key =~ /right_reads_out/i) {
			$used_reads += $value;
		} elsif ($key =~ /right_reads_in/i) {
			$num_reads += $value;
		}
	}
}
close(FHD);

print OUT "Total reads=$num_reads\n";
print OUT "Used reads=$used_reads\n";

## print mapped reads
my $cmd = "cat $this->{alignment}/$options{sample}.flagstat | cut -f2 | tr \"\\n\" \" \" | awk '{print \$NF}'";
my $mapped_reads = execute_cmd($cmd);
chomp $mapped_reads;
print OUT "Mapped reads=$mapped_reads\n";

#### print mapped genome and junction reads.
$cmd = "cat $this->{alignment}/$options{sample}-sorted.junction.sam | head -1";
my $junction = execute_cmd($cmd);
chomp $junction;
print OUT "Mapped reads (Genome)=".($mapped_reads - $junction)."\n";
print OUT "Mapped reads (Junction)=$junction\n";

#### print gene count
#### sum on the second column or last column because input gene.count.tsv file is of format
#### 1/2-SBSRNA4.chr4        52
#### A1BG-AS1.chr19  12
#### A1BG.chr19      0
#### A1CF.chr10      2
#### A2LD1.chr13     90
#### A2M.chr12       14
#### A2ML1.chr12     36
#### second column or last column is the raw count from htSeq
$cmd = "cat $this->{counts}/$options{sample}.gene.count.tsv";
$cmd .= " | awk '\$0 !~ /no_feature/ && \$0 !~ /ambiguous/ && \$0 !~ /too_low_aQual/ && \$0 !~ /not_aligned/ && \$0 !~ /alignment_not_unique/'";
$cmd .= " | awk '{print sum+=\$NF;sum}' | tail -1";
my $gene_count = execute_cmd($cmd);

chomp $gene_count;
print OUT "Gene count=$gene_count\n";

#### print exon count
#### sum on the second last column because input exon.count.tsv file is of format
#### chr1    879288  879961  SAMD11  23      0.554924099219828
#### chr1    879583  880180  NOC2L   31      0.842997246553663
#### chr1    880437  880526  NOC2L   7       1.26479801938123
#### chr1    880898  881033  NOC2L   7       0.836998689296402
#### chr1    881553  881666  NOC2L   4       0.570585572653186
#### chr1    881782  881925  NOC2L   5       0.564641972938049
#### last column is rpkm value and second last column is the raw count of exons.
$cmd = "cat $this->{counts}/$options{sample}.exon.count.tsv | awk '{print sum+=\$(NF-1);sum}' | tail -1";
my $exon_count = execute_cmd($cmd);
chomp $exon_count;
print OUT "Exon count=$exon_count";

close(OUT);

$logger->info("Generate numbers report complete");
exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir sample);

	foreach my $key (@required) {
		unless ($options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless ($options{'debug'});
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	$logger->info($cmd);

	return `$cmd`;
}

#############################################################################
sub create_dir_struct {
	my $options = shift;

	my $dir = "$options{output_dir}/numbers";
	if ( -d $dir) {
		$logger->info("Directory $dir exist");
	} else {
		execute_cmd("mkdir -p $dir");
	}
}
