#!/usr/bin/perl

=head1 NAME
   postProcess.pl

=head1 SYNOPSIS
    USAGE: postProcess.pl -r=run_info.txt -o=output_dir -f=fusion_args -t=tophat_args -s=sample -l=left/right

=head1 OPTIONS

B<--run_info, -r>
	Run info file

B<--output_dir, -o>
	Output directory

B<--tophat_args, -t>
	Tophat arguments

B<--fusion_args, -f>
	Fusion arguments

B<--sample, -s>
	Sample name

B<--side, -l>
	Side left/right

B<--help,-h>


=head1  DESCRIPTION
	Tophat post process step

=head1  INPUT

=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./postProcess.pl -r=run_info.txt -o=output_dir -f=fusion_args -t=tophat_args -s=sample -l=left/right

=cut

use strict;
use warnings;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;
use MyUtility;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'fusion|f=s',
						  'sample|s=s',
						  'side|e=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

## display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
## make sure everything passed was peachy
&check_parameters(\%options);

## parse X_info files
my $config = new ParseConfig($options{run_info});
my $util = new MyUtility;

my $logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/Tophat_PostProcess.$options{sample}.log",
								  'LOG_LEVEL'=>$options{'debug'});
$logger = Workflow::Logger::get_logger();

$logger->info("Tophat post process started");

## export env var.
if ( defined $ENV{'PATH'} ) {
	$ENV{'PATH'}="$config->{ToolInfo}->{r}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{samtools}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{bowtie}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{bowtie1}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{tophat}->{value}:$ENV{'PATH'}";
} else {
	$ENV{'PATH'}="$config->{ToolInfo}->{r}->{value}";
	$ENV{'PATH'}="$config->{ToolInfo}->{samtools}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{bowtie}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{bowtie1}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{tophat}->{value}:$ENV{'PATH'}";
}

if ( defined $ENV{'PYTHONPATH'} ) {
	$ENV{'PYTHONPATH'} = "$config->{ToolInfo}->{pythonpath}->{value}:$config->{ToolInfo}->{rseqcpath}->{value}:$ENV{'PYTHONPATH'}";
} else {
	$ENV{'PYTHONPATH'} = "$config->{ToolInfo}->{pythonpath}->{value}:$config->{ToolInfo}->{rseqcpath}->{value}";
}

#### set up local vars and file names
my $dir = "$options{output_dir}/tmp";

my $num_seg = ($config->{RunInfo}->{readlength}/$config->{ToolInfo}->{segment_size}->{value});

my $obj = {whole=>"",
			segAligned=>"",
			unspliced=>"",
			segs=>"",
			missing=>"",
			m2g=>"",
			segSpliced=>"",
			candidates=>""};

$obj->{whole} = "$dir/$options{side}_kept_reads.bam";
$obj->{unspliced} = "$dir/$options{side}_kept_reads.m2g_um.mapped.bam";
$obj->{m2g} = "$dir/$options{side}_kept_reads.m2g_converted.bam";
$obj->{candidates} = "$dir/$options{side}_kept_reads.m2g_um.candidates_and_unspl.bam";

foreach my $seg (1..$num_seg){
	my $seg = "$dir/$options{side}_kept_reads.m2g_um_seg".$seg;

	## set file names for later use
	$obj->{segs} .= $seg.".fq,";
	$obj->{segAligned} .= $seg.".bam,";
	$obj->{missing} .= $seg."_unmapped.bam,";
	$obj->{segSpliced} .= $seg.".to_spliced.bam,";
}

$obj->{missing} =~ s/,$//;
$obj->{segs} =~ s/,$//;
$obj->{segAligned} =~ s/,$//;
$obj->{segSpliced} =~ s/,$//;


if (-e "$dir/$options{side}_kept_reads.mapped.bam") {
	$obj->{unspliced} = "$dir/$options{side}_kept_reads.mapped.bam";
	$obj->{candidates} = "$dir/$options{side}_kept_reads.candidates_and_unspl.bam";
}

if (-e "$dir/$options{side}_kept_reads.m2g_converted.bam") {
	$obj->{whole} = "$dir/$options{side}_kept_reads.m2g_um.bam";
	$obj->{m2g} = "$dir/$options{side}_kept_reads.m2g_converted.bam";
}

#### set fusion and tophat arguments.
my $read1 = "$config->{SampleInfo}{$options{sample}}[0]->{filepath}/$config->{SampleInfo}{$options{sample}}[0]->{read1}";
my $ILL2SANGER=`perl $config->{ToolInfo}->{workflow_path}->{value}/checkFastqQualityScores.pl $read1 1000`;

my $tophat_args = "--keep-fasta-order --keep-tmp -z0 --no-coverage-search --bowtie1";
$tophat_args .= " --library-type fr-unstranded --max-multihits $config->{ToolInfo}->{max_hits}->{value}";
$tophat_args .= " --rg-id $options{sample} --rg-sample $options{sample} --rg-platform $config->{RunInfo}->{platform}";
$tophat_args .= " --rg-center $config->{RunInfo}->{center} --rg-library $config->{RunInfo}->{genomebuild}";
$tophat_args .= " -o $options{output_dir} -G $config->{ToolInfo}->{features}->{value}";
$tophat_args .= " --segment-length $config->{ToolInfo}->{segment_size}->{value}";
$tophat_args .= " --transcriptome-index $config->{ToolInfo}->{transcriptome_hg19_index}->{value}";

if ($ILL2SANGER > 63) {
	$tophat_args .= " --solexa1.3-quals";
}

## skip fusion search if needed
my $fusion_args = "";
if ($options{fusion}){
	$fusion_args = "--fusion-search --fusion-ignore-chromosomes chrM,M --fusion-min-dist $config->{ToolInfo}->{fusion_min_dist}->{value}";
}

#### execute command
chdir($options{output_dir});

my $cmd = "$config->{ToolInfo}->{python}->{value}/python $config->{ToolInfo}->{tophat}->{value}/tophat";
$cmd .= " -p $config->{MemoryInfo}->{align_threads} $tophat_args --step PostProcess";
$cmd .= " --PostWhole $obj->{whole} --PostUnspliced $obj->{unspliced}";
$cmd .= " --SplicedSegMaps $obj->{segSpliced} --PostSegments $obj->{segs}";
$cmd .= " --PostMissing $obj->{missing} --PostSegAligned $obj->{segAligned}";

if ( -e $obj->{m2g}) {
	$cmd .= " --PostM2G $obj->{m2g}";
}
$cmd .= " $fusion_args";
$cmd .= " $config->{ToolInfo}->{ref_bowtie}->{value}";

execute_cmd($cmd);

#### check execution
my $send_mail = 1;
my $failed = 0;
my $error = 0;
if (exists $ENV{SGE_STDERR_PATH}) {
	#### check for failuer or error messages.
	my $c = "grep \"\\[FAILED\\]\" $ENV{SGE_STDERR_PATH} | wc -l";
	$failed = `$c`;

	$c = "grep \"Error:\" $ENV{SGE_STDERR_PATH} | wc -l";
	$error = `$c`;

	my $count = 0;

	while ((($failed !~ /^0$/) || ($error !~ /^0$/)) && ($count < 3)) {
		$count++;

		#### clear out sge error file.
		system("cat /dev/null > $ENV{SGE_STDERR_PATH}");

		execute_cmd($cmd);

		$c = "grep \"\\[FAILED\\]\" $ENV{SGE_STDERR_PATH} | wc -l";
		$failed = `$c`;

		$c = "grep \"Error:\" $ENV{SGE_STDERR_PATH} | wc -l";
		$error = `$c`;
	}

	if (($failed !~ /^0$/) || ($error !~ /^0$/)) {
		my $error_name = "$config->{RunInfo}->{base_output_dir}/$config->{RunInfo}->{pi}/$config->{RunInfo}->{type}/$config->{RunInfo}->{output_folder}/error/Tophat_PostProcess.$options{side}.$options{sample}.err";
		$util->createErrorFile($error_name, $cmd);

		while (-e $error_name) {

			#### pass Filename, cmd executed, email to, step running, sample name.
			if ($send_mail) {
				$util->reportError($error_name,
							   $cmd,
							   $config->{RunInfo}->{email},
							   "Tophat PostProcess.$options{side}",
							   $options{sample},
							   $ENV{SGE_STDERR_PATH},
							   $ENV{SGE_STDOUT_PATH});
				$send_mail = 0;
			}
			sleep 300;
		}
	}
}

$logger->info("Tophat post process complete");
exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir fusion sample side);

	foreach my $key (@required) {
		unless (defined $options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless ($options{'debug'});
	$options{'side'} = 'left' unless ($options{'side'});
	$options{'fusion'} = 1 unless (defined $options{'fusion'});
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	$logger->info("$cmd");
	system($cmd);
}
