#!/usr/local/biotools/perl/5.14.2/bin/perl

=head1 NAME
   merge_exon.pl

=head1 SYNOPSIS
    USAGE: merge_exon.pl -i=input.list -o=output.file

=head1 OPTIONS

B<--input, -i>
	Required. List of all files of exon counts

B<--output, -o>
	Required. Full path to output file

B<--help,-h>


=head1  DESCRIPTION
	Merge all exon count files in one


=head1  INPUT


=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./merge_exon.pl -i=input.list -o=output.file

=cut

use strict;
use warnings;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use File::Basename;

my %options = ();
my $results = GetOptions (\%options,
                          'input|i=s',
						  'output|o=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

#### display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### make sure everything passed was peachy
&check_parameters(\%options);

my %exons = ();
my %samples = ();
my $file_count = 1;
my $cmd = "";

#### ASSUMPTION:
#### each exon.count.tsv file has been zero filled (backfill)
#### that is for a given exon.bed file
#### each exon.count.tsv file has equal number of rows (one per GENE_ID)
#### and that any gene_ids not found by intersectBed has a count of zero.

#### all input exon.count.tsv files have 6 fields and equal number for rows in
#### same order so all we need to do is paste relavent columns together to create
#### a concenses file.

#### eg: input
#### chr1    12613   12721   DDX11L1 0       0
#### chr1    13221   14408   DDX11L1 4       0.0152778103548438
#### chr1    14362   14829   WASH7P  1022    9.90883523129733
#### chr1    14970   15038   WASH7P  117     7.69403814522415
#### chr1    15796   15947   WASH7P  58      1.73141816560881

#### create a sample hash from input file list
open(LIST, "<", $options{input}) or die "Could not open gene list file $options{input}\n";
while(my $file = <LIST>) {
	chomp $file;

	#### create a sample hash.
	my $sample_name = fileparse($file, ".exon.count.tsv");
	$samples{$sample_name} = {name=>$sample_name, path=>$file};
}
close LIST;

#get filename from full path.
my($filename, $path) = fileparse($options{output});
my $m_path = $path;
my $sample = $filename;

#### open output file and add header for now.
open(OUT, ">", $options{output}) or die "Could not open file to write $options{output}\n";

my $header="Chr\tStart\tStop\tGene";

foreach my $s (sort keys %samples){
	$header .= "\t". $s ."_ExonCount\t" . $s . "_RPKM";

	#### for each file if its the first time in the loop just copy
	#### all data into tmp file, this way we get column info as well
	#### see example input above
	if ($file_count == 1) {
		$cmd = "cp $samples{$s}->{path} $m_path/tmp_exon_count.tsv";
		execute_cmd($cmd);
	} else {
		#### for all subsequent files, we don't need first 4 columns as they are the same
		#### we just need to copy the last two columns representing raw exon counts
		#### and respective rpkm values.
		$cmd = "cut -f5,6 $samples{$s}->{path} | paste $m_path/tmp_exon_count.tsv - > $m_path/tmp_exon.tsv";
		execute_cmd($cmd);

		#### move file back to temp file
		$cmd = "mv $m_path/tmp_exon.tsv $m_path/tmp_exon_count.tsv";
		execute_cmd($cmd);
	}

	$file_count++;
}

print OUT $header ."\n";

close(OUT);

#### concatenate header and rest of merged columns data together.
$cmd = "cat $options{output} $m_path/tmp_exon_count.tsv > $m_path/tmp_exon.tsv";
execute_cmd($cmd);

#### rename file.
$cmd = "mv $m_path/tmp_exon.tsv $options{output}";
execute_cmd($cmd);

#### remove tmp file
$cmd = "rm $m_path/tmp_exon_count.tsv";
execute_cmd($cmd);

exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(input output);

	foreach my $key (@required) {
		unless ($options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	#$logger->info("$cmd");
	system($cmd);

	while (( $? >> 8 ) != 0 ){
		#$logger->logdie("ERROR: Following command failed to execute. Exiting execution of workflow\n$cmd");
		print STDERR "ERROR: Following command failed to execute. Exiting execution of workflow\n$cmd";
		exit(-1);
	}
}
