#!/usr/bin/perl

=head1 NAME
   vqsr.pl

=head1 SYNOPSIS
    USAGE: vqsr.pl -r=run_info.txt -o=output_dir -s=samplename

=head1 OPTIONS

B<--run_info, -r>
	Run info file

B<--output_dir, -o>
	output directory

B<--sample_name, -s>
	sample name

B<--help,-h>


=head1  DESCRIPTION
	run vqsr filtering

=head1  INPUT

=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./vqsr.pl -r=run_info.pl -o=output_dir -s=samplename

=cut

use strict;
use warnings;
use Data::Dumper;
use Cwd;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;
use MyUtility;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'sample|s=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

## display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### set global vars
############################################
my $LONG_WAIT = 300;

## make sure everything passed was peachy
&check_parameters(\%options);

## parse X_info files
my $config = new ParseConfig($options{run_info});
my $util = new MyUtility;

#### export env var.
if (defined $ENV{'PATH'}){
	$ENV{'PATH'}="$config->{ToolInfo}->{r}->{value}:$ENV{'PATH'}";
} else {
	$ENV{'PATH'}="$config->{ToolInfo}->{r}->{value}";
}

#### local var
my $this;
$this->{output_dir} = $options{output_dir};

#### check input
check_input("$this->{output_dir}/$options{sample}.gatk.vcf");

#### init logger
my $logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/VQSR.$options{sample}.log",
								  'LOG_LEVEL'=>$options{'debug'});
$logger = Workflow::Logger::get_logger();

$logger->info("VQSR for $options{sample} started");

#### run vqsr training
my $cmd = "$config->{ToolInfo}->{java}->{value}/java";
$cmd .= " -Xmx$config->{MemoryInfo}->{vqsr_jvmx} -Xms512m";
$cmd .= " -jar $config->{ToolInfo}->{gatk}->{value}/GenomeAnalysisTK.jar";

$cmd .= " -R $config->{ToolInfo}->{ref_genome}->{value} -T VariantRecalibrator";
$cmd .= " -mode SNP";
#$cmd .= " -nt $config->{MemoryInfo}->{threads}"
$cmd .= " -input $this->{output_dir}/$options{sample}.gatk.vcf";
$cmd .= " -resource:hapmap,known=false,training=true,truth=true,prior=15.0 $config->{ToolInfo}->{hapmap_vcf}->{value}";
$cmd .= " -resource:omni,known=false,training=true,truth=false,prior=12.0 $config->{ToolInfo}->{omni_vcf}->{value}";

#$cmd .= " -an ReadPosRankSum -an FS        -recalFile $this->{output_dir}/temp/$options{sample}.recal";
$cmd .= " " . join(" -an ", split(/:/, ":" . $config->{ToolInfo}->{gatk_vqsr_features}->{value})) . " -recalFile $this->{output_dir}/temp/$options{sample}.recal";
$cmd .= " -tranchesFile $this->{output_dir}/temp/$options{sample}.tranches";

#$cmd .= " --maxGaussians 4 --percentBadVariants 0.05";
$cmd .= " --maxGaussians " . $config->{ToolInfo}->{gatk_vqsr_gaussians}->{value} ;
$cmd .= " --percentBadVariants " . $config->{ToolInfo}->{gatk_vqsr_pct_bad_variants}->{value} ;

#$cmd .= " -rscriptFile $this->{output_dir}/plot/$options{sample}.plots.R";

#$cmd .= " -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 $config->{ToolInfo}->{dbsnp_ref}->{value}";
#$cmd .= " -an ReadPosRankSum -an FS -an ED -recalFile $this->{output_dir}/temp/$options{sample}.recal";
execute_cmd($cmd);


if(-s "$this->{output_dir}/temp/$options{sample}.tranches" ) {
	#### Apply Recalibrator
	$cmd = "$config->{ToolInfo}->{java}->{value}/java";
	$cmd .= " -Xmx$config->{MemoryInfo}->{vqsr_jvmx} -Xms512m";
	$cmd .= " -jar $config->{ToolInfo}->{gatk}->{value}/GenomeAnalysisTK.jar";

	$cmd .= " -R $config->{ToolInfo}->{ref_genome}->{value}";
	$cmd .= " -mode SNP -T ApplyRecalibration";
	$cmd .= " -input $this->{output_dir}/$options{sample}.gatk.vcf";
	$cmd .= " -recalFile $this->{output_dir}/temp/$options{sample}.recal";
	$cmd .= " -tranchesFile $this->{output_dir}/temp/$options{sample}.tranches";
	$cmd .= " -o $this->{output_dir}/$options{sample}.filter.vcf";
	$cmd .= " --ts_filter " . $config->{ToolInfo}->{gatk_vqsr_trench}->{value} ; 

	execute_cmd($cmd);
} else {
	#### modeling failed apply hard filters

	$cmd = "$config->{ToolInfo}->{java}->{value}/java";
	$cmd .= " -Xmx$config->{MemoryInfo}->{vqsr_jvmx} -Xms512m";
	$cmd .= " -jar $config->{ToolInfo}->{gatk}->{value}/GenomeAnalysisTK.jar";

	$cmd .= " -R $config->{ToolInfo}->{ref_genome}->{value}";
	$cmd .= " -l INFO -T VariantFiltration";
	$cmd .= " -V $this->{output_dir}/$options{sample}.gatk.vcf";
	$cmd .= " -o $this->{output_dir}/$options{sample}.filter.vcf";
#	$cmd .= " --filterExpression \"FS > 20.0\" --filterName FSFilter";
#	$cmd .= " --filterExpression \"ED > 5\" --filterName EDFilter";
#	$cmd .= " --filterExpression \"ReadPosRankSum < -8.0\" --filterName RPRSFilter";
#	$cmd .= " --filterExpression \"ReadPosRankSum > 8.0\" --filterName RPRSFilter";

	my @filter_names = split(":", $config->{ToolInfo}->{gatk_hard_filters_names}->{value});
	my @filter_expressions = split(":", $config->{ToolInfo}->{gatk_hard_filters_exp}->{value});

	if(@filter_names ne @filter_expressions){ 
		$logger->info("ToolInfo: count mismatch gatk_hard_filters_names to gatk_hard_filters_exp"); 
		exit 1 ;
	} 

	for(my $filter_count = 0 ; $filter_count < @filter_names ; $filter_count++){ 
		$cmd .= " --filterExpression " . $filter_expressions[$filter_count] . " --filterName " . $filter_names[$filter_count] ; 
	}

	execute_cmd($cmd);
}

$logger->info("VQSR for $options{sample} complete");
exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir sample);

	foreach my $key (@required) {
		unless ($options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless ($options{'debug'});
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	$logger->info($cmd);
	system($cmd);
}

#############################################################################
sub check_input {
	my $file = shift;

	my $send_mail = 1;

	while (! -s $file) {
		if ($send_mail) {
			$send_mail = 0;

			$util->missingInput($config->{RunInfo}->{email},
								"VQSR",
								"UnifiedGenotyper",
								"$file",
								$config->{RunInfo}->{tool});
		}

		sleep $LONG_WAIT;
	}
}
