#!/usr/local/biotools/perl/5.14.2/bin/perl

=head1 NAME
   cufflinks.pl

=head1 SYNOPSIS

    USAGE: cufflinks.pl -r=run_info.txt -o=output_dir -i=input_dir -s=sample_name [-c=chridx]

=head1 OPTIONS


B<--run_info,-r>
	Required. Complete path to run info file

B<--output_dir, -o>
	Required.  Root output dir.  eg: /data2/bsi/secondary/PI/mrnaseq/RUN_ID

B<--input_dir, -i>
	Required. Input dir where sorted bam file is located.

B<--sample, -s>
	Required. Sample name for which cufflinks should be executed.

B<--chr, -c>
	Optional. Chr index value to run cufflinks against.

B<--help,-h>


=head1 DESCRIPTION
	Run cufflinks module for a given sample

=head1 INPUT
	Run info file and sample name

=head1 OUTPUT

=head1 VERSION
	1.0

=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./cufflinks.pl -r=run_info.pl -s=sample_name

=cut

use lib "/data2/bsi/reference/perl_workflow_ref/lib";
use lib "/data2/bsi/reference/perl_workflow_ref/lib/perl5/x86_64-linux/auto";
use strict;
use warnings;
use Data::Dumper;
use Cwd;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use POSIX;
use ParseConfig;
use MyUtility;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'input_dir|i=s',
						  'sample|s=s',
						  'chr|c=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

#### display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### set global vars
my $MAX_JOB_LIMIT = 3000;
my $LONG_WAIT = 300;
my $SHORT_WAIT = 30;
my $WAIT = 5;

#### create hash of all config info.
my $config = new ParseConfig($options{run_info});
my $util = new MyUtility;

#### make sure everything passed was peachy
&check_parameters(\%options);

my $logger;
#### setup log object
if (defined $ENV{'SGE_TASK_ID'}){
	$logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/Cufflinks.$options{sample}.$ENV{'SGE_TASK_ID'}.log",
								  'LOG_LEVEL'=>$options{'debug'});
} else {
	$logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/Cufflinks.$options{sample}.$options{chr}.log",
							  'LOG_LEVEL'=>$options{'debug'});
}

$logger = Workflow::Logger::get_logger();
$logger->info("Cufflinks for $options{sample} started");

#### set local variables.
my $cmd = "";
my $this;

my @chr = split(/:/, $config->{RunInfo}->{chrindex});
my $chr_idx = "";

if ((defined $ENV{'SGE_TASK_ID'}) && (length $ENV{'SGE_TASK_ID'})) {
	$chr_idx = $ENV{'SGE_TASK_ID'} - 1;
} else {
	$chr_idx = $options{chr} - 1;
}

if ($config->{ToolInfo}->{splitbam}->{value} == 1) {
	$this->{output_dir} = "$options{output_dir}/lincRNA/cufflinks/$options{sample}/chr$chr[$chr_idx]";
	$this->{input} = "$options{input_dir}/$options{sample}_sorted.chr$chr[$chr_idx].bam";
} else {
	$this->{output_dir} = "$options{output_dir}/lincRNA/cufflinks/$options{sample}";
	$this->{input} = "$options{input_dir}/accepted_hits.bam";
}

create_dir_struct(\%options);
check_input($this->{input});

$cmd = "$config->{ToolInfo}->{cufflinks}->{value}/cufflinks -o $this->{output_dir}";
$cmd .= " -p $config->{MemoryInfo}->{cufflinks_threads}";
$cmd .= " -g $config->{ToolInfo}->{features}->{value} $this->{input}";
execute_cmd($cmd);

#### filter all non coverage transcripts.
$cmd = "cat $this->{output_dir}/transcripts.gtf | awk '{if (\$22 !~ \"0.000000\" && \$24 !~ \"0.000000\") print}'";
$cmd .= " > $this->{output_dir}/transcripts.WithCoverage.gtf";
$cmd .= " && mv $this->{output_dir}/transcripts.gtf $this->{output_dir}/transcripts.gtf.orig";
$cmd .= " && mv $this->{output_dir}/transcripts.WithCoverage.gtf $this->{output_dir}/transcripts.gtf";
execute_cmd($cmd);

#### unique-fy each transcript id
#### delay this till after the merger if we
#### are splitting the cufflinks job per chr.
if ($config->{ToolInfo}->{splitbam}->{value} != 1) {
	$cmd = "$config->{ToolInfo}->{workflow_path}->{value}/uniquefy_cufflinks_transcripts.pl";
	$cmd .= " -i=$this->{output_dir}/transcripts.gtf -o=$this->{output_dir}/transcripts.mod.gtf";
	execute_cmd($cmd);

	$cmd = "mv $this->{output_dir}/transcripts.mod.gtf $this->{output_dir}/transcripts.gtf";
	execute_cmd($cmd);
}

$logger->info("Cufflinks complete");
exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir input_dir sample);

	foreach my $key (@required) {
		unless (defined $options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'chr'} = 1 unless (defined $options{'chr'});
	$options{'debug'} = 3 unless (defined $options{'debug'});
}

#############################################################################
sub create_dir_struct {
	my $options = shift;

	my $dir = "$this->{output_dir}";
	if ( -d $dir ) {
		$logger->info("Directory $dir exist");
	} else {
		execute_cmd("mkdir -p $dir");
	}
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	$logger->info("$cmd");
	system($cmd);

	while (( $? >> 8 ) != 0 ){
		$logger->logdie("ERROR: Following command failed to execute. Exiting execution of workflow\n$cmd");
		exit(100);
	}
}

#############################################################################
sub check_input {
	my $file = shift;

	my $job_id = 0;
	my $sgeerr = "";
	my $sgeout = "";

	if (defined $ENV{JOB_ID}){
		$job_id = $ENV{JOB_ID};
	}

	if (defined $ENV{SGE_STDERR_PATH}){
		$sgeerr = $ENV{SGE_STDERR_PATH};
	}

	if (defined $ENV{SGE_STDOUT_PATH}){
		$sgeout = $ENV{SGE_STDOUT_PATH};
	}

	if (! -s $file){
		my $error_name = "$config->{RunInfo}->{base_output_dir}/$config->{RunInfo}->{pi}/$config->{RunInfo}->{type}/$config->{RunInfo}->{output_folder}/error/CUFFLINKS.$options{sample}.$chr_idx.err";
		$util->createErrorFile($error_name, "EXPECTED FILE WHILE RUNNING CUFFLINKS STEP IS MISSING\n\n$file");

		$util->reportErrorSGE($config->{RunInfo}->{email},
							  $file,
							  "Cufflinks",
							  $error_name,
							  $job_id,
							  $sgeerr,
							  $sgeout);
		exit(100);
	}
}
