#!/usr/bin/perl

=head1 NAME
   exon_count.pl

=head1 SYNOPSIS
    USAGE: exon_count.pl -r=run_info.txt -s=sample name -o=output_dir

=head1 OPTIONS

B<--run_info, -r>
	Run info file

B<--output_dir, -o>
	Output directory

B<--sample, -s>
	Sample name

B<--help,-h>


=head1  DESCRIPTION
	Run exon-count (bedtools intersect) for a given sample.

=head1  INPUT

=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./exon_count.pl -r=run_info.txt -s=sample_name -o=output_dir

=cut

use strict;
use warnings;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use File::Basename;
use ParseConfig;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'reference|r=s',
						  'output|o=s',
						  'input|i=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

## display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################

#### input file is of format
####
#### chr5    138857855       138858093       "TMEM173_P25797_E7" 64
####

#### reference file is of format
####
#### chr1    14362   14829   WASH7P_NR_024540_E1
####


my %exons = ();
my $chr_hash = {chrX=>23, chrY=>24, chrM=>25, chrUn=>26};

for (my $i=1; $i<23; $i++) {
	$chr_hash->{"chr".$i} = $i;
}

#### create a hash of all exon counts from intesectBed output
open(DAT, "<", $options{input}) or die "Can not open input file $options{input} :$!\n";

my($filename, $path) = fileparse($options{input}); #get filename from full path.
my $m_path = $path;
my $sample = $filename;
$sample =~ s/\.exon\.bed\.i\.next$//;
$m_path =~ s/counts\/$/alignment\/tophat_$sample/;

$m_path .= "/" .$sample. ".flagstat";
my $mapped_reads = `cat $m_path | cut -f2 | tr "\n" " " | awk '{print \$NF}'`;

while(my $l = <DAT>) {
	chomp $l;

	#### split on \s+ because the count is sperated by space not tab.
	my ($chr,$start,$stop,$feature,$count) = split(/\s+/, $l);

	my @gene = split(/\./, $feature);
	my $key = $chr."_".$start."_".$stop."_".$gene[0];

	#### value for sort order.
	my $order = $chr;
	$order =~ s/chr//;
	if ($order eq "X") {
		$order = 23;
	} elsif ($order eq "Y") {
		$order = 24;
	} elsif ($order eq "M") {
		$order = 25;
	} elsif ($order !~ /\d+/){
		$order = 27;
	}

	my $rpkm = ( ( (10**9) * $count) / ($mapped_reads* ( ($stop-$start) + 1 ) ) );

	$exons{$key} = {chr=>$chr, start=>$start, stop=>$stop, gene=>$gene[0], raw=>$count, rpkm=>$rpkm, sort_order=>$order};
	#push(@{$exons->{$key}
}

close(DAT);

#### loop through reference Exon bed and add any exons that does not exists in the
#### intersectBed output to value of zero.

open(REF, "<", $options{reference}) or die "Can not open reference file $options{reference} : $! \n";

while(my $l = <REF>) {
	chomp $l;

	#### chr could be for form
	#### chr#_string
	my ($chr,$start,$stop,$feature) = split(/\t/, $l);

	my @gene = split(/\./, $feature);
	my $key = $chr."_".$start."_".$stop."_".$gene[0];

	#### value for sort order.
	my $order = $chr;
	$order =~ s/chr//;
	if ($order eq "X") {
		$order = 23;
	} elsif ($order eq "Y") {
		$order = 24;
	} elsif ($order eq "M") {
		$order = 25;
	} elsif ($order !~ /\d+/){
		$order = 27;
	}

	if(! exists $exons{$key} ) {
		$exons{$key} = {chr=>$chr, start=>$start, stop=>$stop, gene=>$gene[0], raw=>0, rpkm=>0, sort_order=>$order};
	}
}

close(REF);


#### print out hash with sort order of chr, start, stop and gene_id
open(OUT, ">", $options{output}) or die "Can not write to a file $options{output}: $!\n";

foreach my $key (sort { ($exons{$a}->{sort_order} <=> $exons{$b}->{sort_order}) ||
						($exons{$a}->{start} <=> $exons{$b}->{start}) ||
						($exons{$a}->{stop} <=> $exons{$b}->{stop}) ||
						($exons{$a}->{gene} cmp $exons{$b}->{gene})
					} keys %exons) {


	print OUT join("\t", $exons{$key}->{chr}, $exons{$key}->{start}, $exons{$key}->{stop}, $exons{$key}->{gene}, $exons{$key}->{raw}, $exons{$key}->{rpkm});
	print OUT "\n";
}

close OUT;

exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(reference output input);

	foreach my $key (@required) {
		unless ($options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless ($options{'debug'});
}
