#!/usr/bin/perl

=head1 NAME
   sortbam.pl

=head1 SYNOPSIS
    USAGE: sortbam.pl -r=run_info.txt -o=output_dir -s=samplename

=head1 OPTIONS

B<--run_info, -r>
	Run info file

B<--output_dir, -o>
	output directory

B<--sample_name, -s>
	sample name

B<--help,-h>


=head1  DESCRIPTION
	sort and re-arrange align bam file

=head1  INPUT

=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./sortbam.pl -r=run_info.pl -o=output_dir -s=samplename

=cut

use strict;
use warnings;
use Data::Dumper;
use Cwd;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;
use MyUtility;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'sample|s=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

#### display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### set global vars
############################################
my $LONG_WAIT = 300;

#### make sure everything passed was peachy
&check_parameters(\%options);

#### parse X_info files
my $config = new ParseConfig($options{run_info});
my $util = new MyUtility;

my $this;
$this->{output_dir} = "$options{output_dir}/alignment/tophat_$options{sample}";
$this->{input_file} = "$this->{output_dir}/accepted_hits.bam";
$this->{max_read_mem} = 1000000;

my $logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/SortBAM.$options{sample}.log",
								  'LOG_LEVEL'=>$options{'debug'});
$logger = Workflow::Logger::get_logger();

$logger->info("Sort BAM for $options{sample} started");

#### check directory structure
create_dir_struct(\%options);

my $cmd = "";

#### check for input
check_input($this->{input_file});

#### create index for accepted_hits.bam file created will be accepted_hits.bam.bai
$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools index $this->{input_file}";
execute_cmd($cmd);

#### SAM file MAPPED to jucntion
if ($config->{RunInfo}->{paired} == 1) {
	$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools view $this->{input_file}";
	$cmd .= " |  awk '\$6 ~/N/' | awk '{ if (\$9 ~ /^-/) {print \$1\"\\t-\"} else print \$1\"\\t+\" }'";
	$cmd .= " | sort -T $this->{output_dir} -u | wc -l > $this->{output_dir}/$options{sample}-sorted.junction.sam";
	execute_cmd($cmd);
} else {
	$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools view $this->{input_file}";
	$cmd .= " |  awk '\$6 ~/N/' | cut -f1 | sort -T $this->{output_dir} -u";
	$cmd .= " | wc -l > $this->{output_dir}/$options{sample}-sorted.junction.sam";
	execute_cmd($cmd);
}

if (! -s "$this->{output_dir}/$options{sample}-sorted.junction.sam") {
	$logger->logdie("ERROR: Junction mapped file is empty for $options{sample}");
}

#### create id sorted bam file
#if ($config->{RunInfo}->{paired} == 1) {
if (defined ($config->{ToolInfo}->{novosort}->{value})) {
	$cmd = "$config->{ToolInfo}->{novosort}->{value}/novosort $config->{ToolInfo}->{novosort_opt}->{value}";
	$cmd .= " --index --tmpdir=$this->{output_dir}/tmp";
	$cmd .= " --namesort $this->{input_file}";
	$cmd .= " -o $this->{output_dir}/$options{sample}-sorted.id.bam";
	execute_cmd($cmd);
} else {
	$cmd = "$config->{ToolInfo}->{java}->{value}/java";
	$cmd .= " -Xmx$config->{MemoryInfo}->{sortbam_jvmx} -Xms512m";
	$cmd .= " -Djava.io.tmpdir=$options{output_dir}/tmp";
	$cmd .= " -jar $config->{ToolInfo}->{picard}->{value}/SortSam.jar";
	$cmd .= " INPUT=$this->{input_file}";
	$cmd .= " OUTPUT=$this->{output_dir}/$options{sample}-sorted.id.bam";
	$cmd .= " SO=queryname MAX_RECORDS_IN_RAM=$this->{max_read_mem}";
	$cmd .= " TMP_DIR=$this->{output_dir}/tmp VALIDATION_STRINGENCY=SILENT";
	execute_cmd($cmd);

	if (! -s "$this->{output_dir}/$options{sample}-sorted.id.bam") {
		$logger->logdie("ERROR : Read id sorted BAM is not generated for $options{sample}");
	}
}
#}

#### remove duplicate for exon calling
#### generating this bam for exon counting
#### in the SAM file NH:i:1 symbolizes the unique mapped read if this number is bigger than 1 then it is multiply mapped read

#### starting for loop in awk with 12 to skip all mandatory fields. NH:i:### is a optionl filed will only occur after first 11
#### adding -h option to samtools to preserver header info.
#### skipping header info in awk command via first if statement
#### update check for NH:i:## value previously NH:i:15 would also pass since value start with 1, wasnt to eliminate anything
#### greater than 1.
$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools view -h $this->{input_file}";
$cmd .= " | awk -F '\\t' '{ if(\$0 ~ \"^@\") {print} else { for(i=12;i<=NF;i++){ if (\$i ~ \"NH:i:1\$\"){print}} } }'";
$cmd .= " | $config->{ToolInfo}->{samtools}->{value}/samtools view -bS - > $this->{output_dir}/$options{sample}-sorted.unique.bam";
execute_cmd($cmd);

if (-s "$this->{output_dir}/$options{sample}-sorted.unique.bam") {
	$logger->info("Unique BAM is generated for $options{sample}");
} else {
	$logger->logdie("ERROR: unique BAM genreation failed for $options{sample}");
}

#### remove fusion alignments from bam file.
$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools view -h $this->{output_dir}/$options{sample}-sorted.unique.bam";
$cmd .= " | awk '{ if(\$0 ~ \"^@\") {print} else { if (\$0 !~ \"XF:Z\") {print} }  }'";
$cmd .= " | $config->{ToolInfo}->{samtools}->{value}/samtools view -bS - > $this->{output_dir}/$options{sample}-sorted.unique.nonF.bam";
execute_cmd($cmd);

if (-s "$this->{output_dir}/$options{sample}-sorted.unique.nonF.bam") {
	$logger->info("Unique non fusion alignment BAM is generated for $options{sample}");
	$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools index $this->{output_dir}/$options{sample}-sorted.unique.nonF.bam";
	execute_cmd($cmd);
} else {
	$logger->logdie("ERROR: unique non fusion alignment BAM genreation failed for $options{sample}");
}

#### picard statistics for a BAM
$cmd = "$config->{ToolInfo}->{java}->{value}/java";
$cmd .= " -Xmx$config->{MemoryInfo}->{sortbam_jvmx} -Xms512m";
$cmd .= " -Djava.io.tmpdir=$options{output_dir}/tmp";
$cmd .= " -jar $config->{ToolInfo}->{picard}->{value}/CollectAlignmentSummaryMetrics.jar";
$cmd .= " INPUT=$this->{input_file}";
$cmd .= " OUTPUT=$this->{output_dir}/$options{sample}.flagstat";
$cmd .= " MAX_RECORDS_IN_RAM=$this->{max_read_mem} TMP_DIR=$this->{output_dir}/tmp VALIDATION_STRINGENCY=SILENT";
execute_cmd($cmd);

#### picard RNA statistics for a BAM
$cmd = "$config->{ToolInfo}->{java}->{value}/java";
$cmd .= " -Xmx$config->{MemoryInfo}->{sortbam_jvmx} -Xms512m";
$cmd .= " -Djava.io.tmpdir=$options{output_dir}/tmp";
$cmd .= " -jar $config->{ToolInfo}->{picard}->{value}/CollectRnaSeqMetrics.jar";
$cmd .= " STRAND_SPECIFICITY=NONE";
$cmd .= " REF_FLAT=$config->{ToolInfo}->{ref_flat}->{value}";
$cmd .= " INPUT=$this->{input_file}";
$cmd .= " OUTPUT=$this->{output_dir}/$options{sample}.RNAMetrics";
$cmd .= " MAX_RECORDS_IN_RAM=$this->{max_read_mem} TMP_DIR=$this->{output_dir}/tmp VALIDATION_STRINGENCY=SILENT";
execute_cmd($cmd);

#### samtools flagstat
$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools flagstat";
$cmd .= " $this->{input_file}";
$cmd .= " > $this->{output_dir}/$options{sample}.samtools.flagstat";
execute_cmd($cmd);

$logger->info("Sort BAM for $options{sample} complete");
exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir sample);

	foreach my $key (@required) {
		unless ($options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless ($options{'debug'});
}

#############################################################################
sub create_dir_struct {
	my $options = shift;

	my $dir = "$this->{output_dir}/tmp";
	if ( -d $dir) {
		$logger->info("Directory $dir exist");
	} else {
		execute_cmd("mkdir -p $dir");
	}
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	$logger->info($cmd);
	system($cmd);

	while (( $? >> 8 ) != 0 ){
		$logger->logdie("ERROR: Following command failed to execute. Exiting execution of workflow\n$cmd");

		exit(-1);
	}
}


#############################################################################
sub check_input {
	my $file = shift;

	my $send_mail = 1;

	while (! -s $file) {
		if ($send_mail) {
			$send_mail = 0;

			$util->missingInput($config->{RunInfo}->{email},
								"SortBam",
								"Alignment",
								"$file",
								$config->{RunInfo}->{tool});
		}

		sleep $LONG_WAIT;
	}
}
