#!/usr/bin/perl

=head1 NAME
   tophat_fusion_report.pl

=head1 SYNOPSIS

   USAGE: tophat_fusion_report.pl --result=result.txt --potential=potential_fusion.txt --output_dir=output dir

=head1 OPTIONS

B<--result,-r>
   TopHat result.txt file

B<--potential_fusion,-p>
   TopHat potential_fusion.txt file

B<--output_dir,-o>
   output directory

B<--help,-h>
   This help message

=head1  DESCRIPTION

=head1  INPUT


=head1  OUTPUT

=head1  CONTACT
  Jaysheel D. Bhavsar @ bjaysheel[at]gmail[dot]com


==head1 EXAMPLE


=cut

use strict;
use warnings;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;
use MyUtility;

my %options = ();
my $results = GetOptions (\%options,
                          'result|i=s',
						  'potential|p=s',
						  'run_info|r=s',
						  'output_dir|o=s',
						  'threshold|t=s',
						  'help|h') || pod2usage();

## display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### set global vars
#############################################################################
my $LONG_WAIT = 300;

#### make sure everything passed was peachy
&check_parameters(\%options);

#### parse X_info files
my $config = new ParseConfig($options{run_info});
my $util = new MyUtility;

check_input($options{result});

open(RSTXT, "<", $options{result}) or die "Could not open file to read $!\n";
open(TH_OUT, ">", $options{output_dir}."/tophat_fusion_report.txt") or die "Could not open file to write $!\n";

my $header = join("\t","#Sample_name", "Fusion_pair_alphabetical", "Fusion_gene_direction",
					   "Type", "Potential_fusion_mech", "Fusion_strand", "Total", "Total_encompassing_reads",
					   "Total_split_reads", "Exon_boundary_fusion", "Exon1", "Exon2", "Primer");

print TH_OUT $header ."\tBLAT_hits\n";

while(<RSTXT>) {
   chomp $_;

   my @data = split(/\t/, $_);
   my ($direction, $primer) = potentialFusion($data[0], $data[1], $data[4], $data[3], $data[6]);

   print TH_OUT $data[0]."\t"; #sample

   #fusion pair alphabetical
	if ($data[1] lt $data[4]){
		print TH_OUT $data[1]."_".$data[4]."\t";
	} else {
		print TH_OUT $data[4]."_".$data[1]."\t";
	}

   print TH_OUT $data[1]."->".$data[4]."\t"; #fusion gene directional

   #type
	if ($data[2] =~ $data[5]) {
		print TH_OUT "intra\t";

		#potential fusion mechanism
		print TH_OUT "T";

		if ($direction =~ /fr|rf/i){
			print TH_OUT " and I"
		} elsif (($data[3] < $data[6]) && ($direction =~ /ff/i)){
			print TH_OUT " and D";
		} elsif (($data[3] > $data[6]) && ($direction =~ /rr/i)){
			print TH_OUT " and D";
		}

		print TH_OUT "\t";
	} else {
		print TH_OUT "inter\t";

		#potential fusion mechanism
		print TH_OUT "T\t";
	}

	#### strand
	if (substr($direction,0,1) =~ /f/i) {
		print TH_OUT "+\t";
	} else { print TH_OUT "-\t"; }

	my $split_reads = $data[7];
	my $encompassing = $data[8]+$data[9];

	print TH_OUT ($split_reads + $encompassing) ."\t"; #total
	print TH_OUT $encompassing ."\t"; #total_encompassing
	print TH_OUT $split_reads ."\t"; #split_reads

	print TH_OUT "NA\t"; #exon boundary fusion

	print TH_OUT "\t"; #exon 1
	print TH_OUT "\t"; #exon 2
	print TH_OUT $primer."\n"; # primer

	#$myblat->execute($primer, $options{output_dir});
	#print TH_OUT $myblat->numOfHits(90, $options{output_dir})."\n"; #blat verificaiton.
}

close(RSTXT);
close(TH_OUT);

#############################################################################
sub check_parameters {
	my $options = shift;

	my @required = qw(result potential output_dir);

	foreach my $key (@required) {
		unless ($options{$key}) {
		   print STDERR "ARG: $key is required\n";
		   pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
		   exit(-1);
		}
	}
}

#############################################################################
# get primer and direction from potential_fusion.txt
# given exons and coordinates from fusion results.txt file
sub potentialFusion {
	my ($sample, $exon1, $exon2, $coord1, $coord2) = @_;
	my $direction = "";
	my $primer = "";

	#get primer from potential fusion file.
	my $fusion_detail = `grep -A 1 -B 4 "^$exon1.*$exon2" $options{potential}`;

	if ($fusion_detail =~ /.*($coord1)\s($coord2)\s(\w\w).*\n(\w+\s\w+)\n(\w+\s\w+)\n.*/) {
	   $direction = $3;

	   #primer is 1 and 4 of the sequence set in potentail fusion
	   # ----------1----------- ------------2-----------
	   # ----------3----------- ------------4-----------

	   my @p = split(/\s/,$4);
	   $primer = $p[0];

	   @p = split(/\s/,$5);
	   $primer .= $p[1];
	}

	return ($direction, $primer);
}

#############################################################################
sub check_input {
	my $file = shift;

	my $send_mail = 1;

	while (! -s $file) {
		if ($send_mail) {
			$send_mail = 0;

			$util->missingInput($config->{RunInfo}->{email},
								"TophatFusionReport",
								"TophatFusionPost",
								"$file",
								$config->{RunInfo}->{tool});
		}

		sleep $LONG_WAIT;
	}
}
