#!/usr/bin/perl

=head1 NAME
   cleanup.pl

=head1 SYNOPSIS
    USAGE: cleanup.pl -r=run_info.txt -o=output_dir [-b=1]

=head1 OPTIONS

B<--run_info, -r>
	Required. Run info file

B<--output_dir, -o>
	Required. Output directory

B<--beauty, -b>
	Optional.  Is the run related to BEAUTY. Default is set to 0. It is important
	to set to 1 if its a BEAUTY run, as BEAUTY requires to keep aditional files
	that would be deleated otherwise.

B<--help,-h>


=head1  DESCRIPTION
	Cleanup execution space

=head1  INPUT

=head1  OUTPUT


=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./cleanup.pl -r=run_info.txt -o=output_dir [-b=1]

=cut

use strict;
use warnings;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'beauty|b=s',
						  'help|h') || pod2usage();

## display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
## make sure everything passed was peachy
&check_parameters(\%options);

## parse X_info files
my $config = new ParseConfig($options{run_info});

my $cmd="";

#### remove following dirs
my @dirs = qw {error sampling fastq job_ids alignment/tophatfusion_out};
foreach my $d (@dirs) {
	$cmd = "rm -rf $options{output_dir}/$d";
	execute_cmd($cmd);
}


#### Remove temp RSeQC files.
#$cmd = "find $options{output_dir}/RSeQC -name '*.txt' -o -name '*.xls' -o -name '*.r'";
#$cmd = " -o -name '*.bw' -o -name '*.wig' | xargs rm -rf";

$cmd = "find $options{output_dir}/RSeQC -name '*.bw' -o -name '*.wig' | xargs rm -rf";
execute_cmd($cmd);


#### Remove Tophat fusion post tmp files
$cmd = "rm -rf $options{output_dir}/alignment/blast";
$cmd .= " | rm -rf $options{output_dir}/alignment/mcl";
$cmd .= " $options{output_dir}/alignment/ensGene.txt";
$cmd .= " $options{output_dir}/alignment/refGene.txt";
execute_cmd($cmd);

$cmd .= "rm -rf $options{output_dir}/fusion/blast_*";
$cmd .= " | rm -rf $options{output_dir}/fusion/check";
$cmd .= " | rm -rf $options{output_dir}/fusion/fusion_seq*";
$cmd .= " | rm -rf $options{output_dir}/fusion/logs";
$cmd .= " | rm -rf $options{output_dir}/fusion/tmp";
execute_cmd($cmd);


#### Remove circos temp files.
$cmd = "rm -rf $options{output_dir}/fusion/fuse.dat";
$cmd .= " $options{output_dir}/fusion/gene.dat";
$cmd .= " $options{output_dir}/fusion/circos-fusion-all.conf";
execute_cmd($cmd);


#### Remove alignment tmp and logs dir
$cmd = "find $options{output_dir}/alignment -name 'logs' -o -name 'tmp' | xargs rm -rf";
execute_cmd($cmd);

#### Remove unique bam files
$cmd = "find $options{output_dir}/alignment -name '*.unique.bam' | xargs rm -rf";
execute_cmd($cmd);

#### Remove id sorted bam file
$cmd = "find $options{output_dir}/alignment -name '*sorted.id.bam' | xargs rm -rf";
execute_cmd($cmd);

#### Remove junction.sam files
$cmd = "find $options{output_dir}/alignment -name '*junction.sam' | xargs rm -rf";
execute_cmd($cmd);

#### only remove non fusion bam files for non beauty related projects
if (! $options{beauty}) {
	#### Remove unique non Fusion bam file
	$cmd = "find $options{output_dir}/alignment -name '*unique.nonF.bam' -o -name '*unique.nonF.bam.bai' | xargs rm -rf";
	execute_cmd($cmd);
}

#### remove accepted_hits and unmapped bam file
#### if merge_bam module was run, then there will be a sorted.bam file
#### delete files only if sorted.bam file exists.
foreach my $sample (keys %{$config->{SampleInfo}}) {
	if (-s "$options{output_dir}/alignment/tophat_$sample/${sample}_sorted.bam") {
		$cmd = "rm -rf $options{output_dir}/alignment/tophat_$sample/accepted_hits.bam*";
		$cmd .= " | rm -rf $options{output_dir}/alignment/tophat_$sample/unmapped.bam";
		execute_cmd($cmd);
	}
}


#### Remove variant tmp files
$cmd = "find $options{output_dir}/variant -name 'logs' -o -name 'tmp' | xargs rm -rf";
execute_cmd($cmd);


#### tar logs dir
$cmd = "tar -pczf $options{output_dir}/logs.tar.gz $options{output_dir}/logs";
execute_cmd($cmd);

#### remove logs dir
$cmd = "rm -rf $options{output_dir}/logs";
execute_cmd($cmd);

exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir);

	foreach my $key (@required) {
		unless ($options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'beauty'} = 0 unless ($options{'beauty'});
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	print $cmd."\n\n";
	system($cmd);

	if (( $? >> 8 ) != 0 ){
		print STDERR "Above command failed to execute: $!\n";
	}
}
