#!/usr/local/biotools/perl/5.14.2/bin/perl

=head1 NAME
   extract_n_sort.pl

=head1 SYNOPSIS
    USAGE: extract_n_sort.pl -r=run_info.txt -o=output_dir -s=samplename

=head1 OPTIONS

B<--run_info, -r>
	Run info file

B<--output_dir, -o>
	output directory

B<--sample_name, -s>
	sample name

B<--help,-h>


=head1  DESCRIPTION
	sort and re-arrange align bam file

=head1  INPUT

=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./extract_n_sort.pl -r=run_info.pl -o=output_dir -s=samplename

=cut

use lib "/data2/bsi/reference/perl_workflow_ref/lib";
use lib "/data2/bsi/reference/perl_workflow_ref/lib/perl5/x86_64-linux/auto";
use strict;
use warnings;
use Data::Dumper;
use Cwd;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;
use MyUtility;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'input_dir|i=s',
						  'sample|s=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

#### display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### set global vars
############################################
my $LONG_WAIT = 300;

#### make sure everything passed was peachy
&check_parameters(\%options);

#### parse X_info files
my $config = new ParseConfig($options{run_info});
my $util = new MyUtility;

my $this;
$this->{output_dir} = "$options{output_dir}/alignment/tophat_$options{sample}";
$this->{file} = "$this->{output_dir}/accepted_hits.bam";
$this->{xmx} = "6g";
$this->{max_read_mem} = 1000000;

my $logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/SortBAM.$options{sample}.log",
								  'LOG_LEVEL'=>$options{'debug'});
$logger = Workflow::Logger::get_logger();

$logger->info("Sort BAM for $options{sample} started");

#### check directory structure
create_dir_struct(\%options);

my $cmd = "";

#### extract accepted_hits from merged file.
if (-s "$options{input_dir}/tophat_$options{sample}/accepted_hits.bam") {
	if ("$options{input_dir}/tophat_$options{sample}" !~ /$this->{output_dir}/i){
		$cmd = "cp $options{input_dir}/tophat_$options{sample}/accepted_hits.bam $this->{file}";
		execute_cmd($cmd);
	}
} else {
	my $bam = "$options{input_dir}/tophat_$options{sample}/$options{sample}_sorted.bam";

	#### extract only aligned reads (accepted_hits.bam)
	$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools view -h $bam";
	$cmd .= " | awk -F '\\t' '{ if ((\$3 !~ \"\\*\") && (\$4 != \"0\")) {print} }'";
	$cmd .= " | $config->{ToolInfo}->{samtools}->{value}/samtools view -bS - > $this->{file}";
	execute_cmd($cmd);
}


#### if input input bam have sort order "sorted" modify it to coordinate
$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools view -H $this->{file}";
$cmd .= " | grep \"SO:sorted\"";
my $sorted = `$cmd`;

if (length($sorted) > 0) {
	$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools view -H $this->{file}";
	$cmd .= " > $this->{file}.header";
	execute_cmd($cmd);

	$cmd = "sed -i 's/SO:sorted/SO:coordinate/' $this->{file}.header";
	execute_cmd($cmd);

	$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools reheader $this->{file}.header  $this->{file} > $this->{file}.reheader";
	$cmd .= " && mv $this->{file}.reheader $this->{file}";
	execute_cmd($cmd);
}


#### check if accepted hits bam fileis sorted.
$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools view -H $this->{file}";
$cmd .= " | grep \"SO:unsorted\"";
my $not_sorted = `$cmd`;

if (length($not_sorted) > 0) {
	#### sort bam file by coordinate.

	$cmd = "$config->{ToolInfo}->{java}->{value}/java -Xmx$this->{xmx} -Xms512m";
	$cmd .= " -Djava.io.tmpdir=$options{output_dir}/tmp";
	$cmd .= " -jar $config->{ToolInfo}->{picard}->{value}/SortSam.jar";
	$cmd .= " INPUT=$this->{file}";
	$cmd .= " OUTPUT=$this->{output_dir}/$options{sample}-sorted.bam";
	$cmd .= " SO=coordinate MAX_RECORDS_IN_RAM=$this->{max_read_mem}";
	$cmd .= " TMP_DIR=$this->{output_dir}/tmp VALIDATION_STRINGENCY=SILENT";
	execute_cmd($cmd);

	$cmd = "mv $this->{output_dir}/$options{sample}-sorted.bam $this->{file}";
	execute_cmd($cmd);
}


$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools index $this->{file}";
execute_cmd($cmd);

#### create id sorted bam file
if ($config->{RunInfo}->{paired} == 1) {
	$cmd = "$config->{ToolInfo}->{java}->{value}/java -Xmx$this->{xmx} -Xms512m";
	$cmd .= " -Djava.io.tmpdir=$options{output_dir}/tmp";
	$cmd .= " -jar $config->{ToolInfo}->{picard}->{value}/SortSam.jar";
	$cmd .= " INPUT=$this->{file}";
	$cmd .= " OUTPUT=$this->{output_dir}/$options{sample}-sorted.id.bam";
	$cmd .= " SO=queryname MAX_RECORDS_IN_RAM=$this->{max_read_mem}";
	$cmd .= " TMP_DIR=$this->{output_dir}/tmp VALIDATION_STRINGENCY=SILENT";
	execute_cmd($cmd);

	if (! -s "$this->{output_dir}/$options{sample}-sorted.id.bam") {
		$logger->logdie("ERROR : Read id sorted BAM is not generated for $options{sample}");
	}
}

#### remove duplicate for exon calling
#### generating this bam for exon counting
#### in the SAM file NH:i:1 symbolizes the unique mapped read if this number is bigger than 1 then it is multiply mapped read

#### starting for loop in awk with 12 to skip all mandatory fields. NH:i:### is a optionl filed will only occur after first 11
#### adding -h option to samtools to preserver header info.
#### skipping header info in awk command via first if statement
#### update check for NH:i:## value previously NH:i:15 would also pass since value start with 1, wasnt to eliminate anything
#### greater than 1.
$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools view -h $this->{file}";
$cmd .= " | awk -F '\\t' '{ if(\$0 ~ \"^@\") {print} else { for(i=12;i<=NF;i++){ if (\$i ~ \"NH:i:1\$\"){print}} } }'";
$cmd .= " | $config->{ToolInfo}->{samtools}->{value}/samtools view -bS - > $this->{output_dir}/$options{sample}-sorted.unique.bam";
execute_cmd($cmd);

if (-s "$this->{output_dir}/$options{sample}-sorted.unique.bam") {
	$logger->info("Unique BAM is generated for $options{sample}");
} else {
	$logger->logdie("ERROR: unique BAM genreation failed for $options{sample}");
}

#### picard statistics for a BAM
$cmd = "$config->{ToolInfo}->{java}->{value}/java -Xmx$this->{xmx} -Xms512m";
$cmd .= " -Djava.io.tmpdir=$options{output_dir}/tmp";
$cmd .= " -jar $config->{ToolInfo}->{picard}->{value}/CollectAlignmentSummaryMetrics.jar";
$cmd .= " INPUT=$this->{file}";
$cmd .= " OUTPUT=$this->{output_dir}/$options{sample}.flagstat";
$cmd .= " MAX_RECORDS_IN_RAM=$this->{max_read_mem} TMP_DIR=$this->{output_dir}/tmp VALIDATION_STRINGENCY=SILENT";
execute_cmd($cmd);

#### samtools flagstat
$cmd = "$config->{ToolInfo}->{samtools}->{value}/samtools flagstat";
$cmd .= " $this->{file}";
$cmd .= " > $this->{output_dir}/$options{sample}.samtools.flagstat";
execute_cmd($cmd);

$logger->info("Sort BAM for $options{sample} complete");
exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir sample);

	foreach my $key (@required) {
		unless ($options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless ($options{'debug'});
}

#############################################################################
sub create_dir_struct {
	my $options = shift;

	my $dir = "$this->{output_dir}/tmp";
	if ( -d $dir) {
		$logger->info("Directory $dir exist");
	} else {
		execute_cmd("mkdir -p $dir");
	}
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	$logger->info($cmd);
	system($cmd);

	while (( $? >> 8 ) != 0 ){
		$logger->logdie("ERROR: Following command failed to execute. Exiting execution of workflow\n$cmd");
		exit(100);
	}
}


#############################################################################
sub check_input {
	my $file = shift;

	my $send_mail = 1;

	while (! -s $file) {
		if ($send_mail) {
			$send_mail = 0;

			$util->missingInput($config->{RunInfo}->{email},
								"SortBam",
								"Alignment",
								"$file",
								$config->{RunInfo}->{tool});
		}

		sleep $LONG_WAIT;
	}
}
