#!/usr/bin/perl

=head1 NAME
   alignGather.pl

=head1 SYNOPSIS
    USAGE: alignGather.pl -r=run_info.txt -o=output_dir -f=fusion -s=sample

=head1 OPTIONS

B<--run_info, -r>
	Run info file

B<--output_dir, -o>
	Output directory

B<--fusion, -f>
	Fusion 1/0

B<--sample, -s>
	Sample name

B<--help,-h>


=head1  DESCRIPTION
	Tophat post process step

=head1  INPUT

=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./alignGather.pl -r=run_info.txt -o=output_dir -f=fusion -s=sample

=cut

use strict;
use warnings;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;
use MyUtility;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'fusion|f=s',
						  'sample|s=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

## display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
## make sure everything passed was peachy
&check_parameters(\%options);

## parse X_info files
my $config = new ParseConfig($options{run_info});
my $util = new MyUtility;

my $logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/Tophat_AlignGather.$options{sample}.log",
								  'LOG_LEVEL'=>$options{'debug'});
$logger = Workflow::Logger::get_logger();

$logger->info("Tophat align gather started");

## export env var.
if ( defined $ENV{'PATH'} ) {
	$ENV{'PATH'}="$config->{ToolInfo}->{r}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{samtools}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{bowtie}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{bowtie1}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{tophat}->{value}:$ENV{'PATH'}";
} else {
	$ENV{'PATH'}="$config->{ToolInfo}->{r}->{value}";
	$ENV{'PATH'}="$config->{ToolInfo}->{samtools}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{bowtie}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{bowtie1}->{value}:$ENV{'PATH'}";
	$ENV{'PATH'}="$config->{ToolInfo}->{tophat}->{value}:$ENV{'PATH'}";
}

if ( defined $ENV{'PYTHONPATH'} ) {
	$ENV{'PYTHONPATH'} = "$config->{ToolInfo}->{pythonpath}->{value}:$config->{ToolInfo}->{rseqcpath}->{value}:$ENV{'PYTHONPATH'}";
} else {
	$ENV{'PYTHONPATH'} = "$config->{ToolInfo}->{pythonpath}->{value}:$config->{ToolInfo}->{rseqcpath}->{value}";
}


#### setup local vars and filenames
my $dir = "$options{output_dir}/tmp";

my $num_seg = ($config->{RunInfo}->{readlength}/$config->{ToolInfo}->{segment_size}->{value});
my $left = {whole=>"",
			segAligned=>"",
			unspliced=>"",
			segs=>"",
			missing=>"",
			m2g=>"",
			segSpliced=>"",
			candidates=>""};

my $right = {whole=>"",
			 segAligned=>"",
			 unspliced=>"",
			 segs=>"",
			 missing=>"",
			 m2g=>"",
			 segSpliced=>"",
			 candidates=>""};

$left->{whole} = "$dir/left_kept_reads.bam";
$left->{unspliced} = "$dir/left_kept_reads.m2g_um.mapped.bam";
$left->{m2g} = "$dir/left_kept_reads.m2g_converted.bam";
$left->{candidates} = "$dir/left_kept_reads.m2g_um.candidates_and_unspl.bam";

$right->{whole} = "$dir/right_kept_reads.bam";
$right->{unspliced} = "$dir/right_kept_reads.m2g_um.mapped.bam";
$right->{m2g} = "$dir/right_kept_reads.m2g_converted.bam";
$right->{candidates} = "$dir/right_kept_reads.m2g_um.candidates_and_unspl.bam";

foreach my $seg (1..$num_seg){
	my $left_seg = "$dir/left_kept_reads.m2g_um_seg".$seg;
	my $right_seg = "$dir/right_kept_reads.m2g_um_seg".$seg;

	## set file names for later use
	$left->{segs} .= $left_seg.".fq,";
	$left->{segAligned} .= $left_seg.".bam,";
	$left->{missing} .= $left_seg."_unmapped.bam,";
	$left->{segSpliced} .= $left_seg.".to_spliced.bam,";

	## set file names for later use
	$right->{segs} .= $right_seg.".fq,";
	$right->{segAligned} .= $right_seg.".bam,";
	$right->{missing} .= $right_seg."_unmapped.bam,";
	$right->{segSpliced} .= $right_seg.".to_spliced.bam,";
}

$left->{missing} =~ s/,$//;
$left->{segs} =~ s/,$//;
$left->{segAligned} =~ s/,$//;
$left->{segSpliced} =~ s/,$//;

$right->{missing} =~ s/,$//;
$right->{segs} =~ s/,$//;
$right->{segAligned} =~ s/,$//;
$right->{segSpliced} =~ s/,$//;

if (-e "$dir/left_kept_reads.mapped.bam") {
	$left->{unspliced} = "$dir/left_kept_reads.mapped.bam";
	$left->{candidates} = "$dir/left_kept_reads.candidates_and_unspl.bam";
}

if (-e "$dir/right_kept_reads.mapped.bam") {
	$right->{unspliced} = "$dir/right_kept_reads.mapped.bam";
	$right->{candidates} = "$dir/right_kept_reads.candidates_and_unspl.bam";
}

#### set fusion and tophat arguments.
my $read1 = "$config->{SampleInfo}{$options{sample}}[0]->{filepath}/$config->{SampleInfo}{$options{sample}}[0]->{read1}";
my $ILL2SANGER=`perl $config->{ToolInfo}->{workflow_path}->{value}/checkFastqQualityScores.pl $read1 1000`;

my $tophat_args = "--keep-fasta-order --keep-tmp -z0 --no-coverage-search --bowtie1";
$tophat_args .= " --library-type fr-unstranded --max-multihits $config->{ToolInfo}->{max_hits}->{value}";
$tophat_args .= " --rg-id $options{sample} --rg-sample $options{sample} --rg-platform $config->{RunInfo}->{platform}";
$tophat_args .= " --rg-center $config->{RunInfo}->{center} --rg-library $config->{RunInfo}->{genomebuild}";
$tophat_args .= " -o $options{output_dir} -G $config->{ToolInfo}->{features}->{value}";
$tophat_args .= " --segment-length $config->{ToolInfo}->{segment_size}->{value}";
$tophat_args .= " --transcriptome-index $config->{ToolInfo}->{transcriptome_hg19_index}->{value}";

if ($ILL2SANGER > 63) {
	$tophat_args .= " --solexa1.3-quals";
}

## skip fusion search if needed
my $fusion_args = "";
if ($options{fusion}){
	$fusion_args = "--fusion-search --fusion-ignore-chromosomes chrM,M --fusion-min-dist $config->{ToolInfo}->{fusion_min_dist}->{value}";
}

#### execute command
chdir($options{output_dir});

my $cmd = "$config->{ToolInfo}->{python}->{value}/python $config->{ToolInfo}->{tophat}->{value}/tophat";
$cmd .= " -p $config->{MemoryInfo}->{align_threads} --step AlignGather $tophat_args";
$cmd .= " --MapLeftWhole $left->{whole} --MapLeftUnspliced $left->{unspliced}";
$cmd .= " --MapLeftSegments $left->{segs} --MapLeftMissing $left->{missing}";
$cmd .= " --MapLeftSegAligned $left->{segAligned}";

if (-e $right->{whole}) {
	$cmd .= " --MapRightWhole $right->{whole} --MapRightUnspliced $right->{unspliced}";
	$cmd .= " --MapRightSegments $right->{segs} --MapRightMissing $right->{missing}";
	$cmd .= " --MapRightSegAligned $right->{segAligned} ";
}

$cmd .= " $fusion_args $config->{ToolInfo}->{ref_bowtie}->{value}";

execute_cmd($cmd);

#### check execution
my $send_mail = 1;
my $failed = 0;
my $error = 0;
if (exists $ENV{SGE_STDERR_PATH}) {
	#### check for failuer or error messages.
	my $c = "grep \"\\[FAILED\\]\" $ENV{SGE_STDERR_PATH} | wc -l";
	$failed = `$c`;

	$c = "grep \"Error:\" $ENV{SGE_STDERR_PATH} | wc -l";
	$error = `$c`;

	my $count = 0;

	while ((($failed !~ /^0$/) || ($error !~ /^0$/)) && ($count < 3)) {
		$count++;

		#### clear out sge error file.
		system("cat /dev/null > $ENV{SGE_STDERR_PATH}");

		execute_cmd($cmd);

		$c = "grep \"\\[FAILED\\]\" $ENV{SGE_STDERR_PATH} | wc -l";
		$failed = `$c`;

		$c = "grep \"Error:\" $ENV{SGE_STDERR_PATH} | wc -l";
		$error = `$c`;
	}

	if (($failed !~ /^0$/) || ($error !~ /^0$/)) {
		my $error_name = "$config->{RunInfo}->{base_output_dir}/$config->{RunInfo}->{pi}/$config->{RunInfo}->{type}/$config->{RunInfo}->{output_folder}/error/Tophat_AlignGather.$options{sample}.err";
		$util->createErrorFile($error_name, $cmd);

		while (-e $error_name) {

			#### pass Filename, cmd executed, email to, step running, sample name.
			if ($send_mail) {
				$util->reportError($error_name,
							   $cmd,
							   $config->{RunInfo}->{email},
							   "Tophat AlignGather",
							   $options{sample},
							   $ENV{SGE_STDERR_PATH},
							   $ENV{SGE_STDOUT_PATH});
				$send_mail = 0;
			}
			sleep 300;
		}
	}
}

$logger->info("Tophat align gather complete");
exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir fusion sample);

	foreach my $key (@required) {
		unless (defined $options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless ($options{'debug'});
	$options{'side'} = 'left' unless ($options{'side'});
	$options{'type'} = 'Segments' unless ($options{'type'});
	$options{'fusion'} = 1 unless (defined $options{'fusion'});
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	$logger->info("$cmd");
	system($cmd);
}
