#!/usr/local/biotools/perl/5.14.2/bin/perl

=head1 NAME
   chromatin_map.pl

=head1 SYNOPSIS

    USAGE: chromatin_map.pl -r=run_info.txt -o=output_dir

=head1 OPTIONS


B<--run_info,-r>
	Required. Complete path to run info file

B<--output_dir, -o>
	Required.  Root output dir.  eg: /data2/bsi/secondary/PI/mrnaseq/RUN_ID

B<--help,-h>


=head1 DESCRIPTION
	Chromatin map

=head1 INPUT


=head1 OUTPUT

=head1 VERSION
	1.0

=head1  CONTACT
  bjaysheel@gmail.com


==head1 EXAMPLE
	./chromatin_map.pl -r=run_info.txt -o=output_dir

=cut

use lib "/data2/bsi/reference/perl_workflow_ref/lib";
use lib "/data2/bsi/reference/perl_workflow_ref/lib/perl5/x86_64-linux/auto";
use strict;
use warnings;
use Data::Dumper;
use File::Basename;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use ParseConfig;
use MyUtility;
use Workflow::Logger;

my %options = ();
my $results = GetOptions (\%options,
                          'run_info|r=s',
						  'output_dir|o=s',
						  'sample|s=s',
						  'log|l=s',
			              'debug=s',
						  'help|h') || pod2usage();

#### display documentation
if( $options{'help'} ){
    pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
}

#############################################################################
#### set global vars

#### create hash of all config info.
my $config = new ParseConfig($options{run_info});
my $util = new MyUtility;

#### make sure everything passed was peachy
&check_parameters(\%options);

#### setup log object
my $logger;

$logger = new Workflow::Logger('LOG_FILE'=>"$config->{RunInfo}->{logs}/ChromatinMap.log",
							  'LOG_LEVEL'=>$options{'debug'});

$logger = Workflow::Logger::get_logger();

$logger->info("Chromatin Map started");

#### set local variables.
my $cmd = "";
my $this;

$this->{output_dir} = "$options{output_dir}/lincRNA/chromatin";
$this->{input} = "$options{output_dir}/lincRNA/all.combined.gtf";

check_input($this->{input});
create_dir_struct(\%options);

my @broadPeak_k36 = split(/\n/, `find $config->{ToolInfo}->{chromatin_peak}->{value} -name "*k36me3StdPk.broadPeak" -print`);
my @broadPeak_k4 = split(/\n/, `find $config->{ToolInfo}->{chromatin_peak}->{value} -name "*k4me3StdPk.broadPeak" -print`);

#### Covert GTF to BED
$cmd = "$config->{ToolInfo}->{workflow_path}->{value}/gtf2bed.pl";
$cmd .= " -g $this->{input}";
$cmd .= " -o $this->{output_dir}/all.combined.bed";
execute_cmd($cmd);

$cmd = "cat $this->{output_dir}/all.combined.bed | cut -f1,2,3,4,6 > $this->{output_dir}/all.transcripts";
execute_cmd($cmd);

my $paste_files;
my $chromatin;
my $tmp_file_to_remove;

$tmp_file_to_remove = "$this->{output_dir}/all.combined.bed $this->{output_dir}/all.transcripts";

foreach my $bp (@broadPeak_k36) {
    my $name = fileparse($bp, qr/\.[^.]*/);

	#### Run this command for all 9 K36 cell line files
	$cmd = "$config->{ToolInfo}->{bedtools}->{value}/intersectBed";
	$cmd .= " -a $this->{output_dir}/all.combined.bed -b $bp -v";
	$cmd .= " > $this->{output_dir}/$name.bed";
	execute_cmd($cmd);

	#### extract transcript id
	$cmd = "cat $this->{output_dir}/$name.bed | cut -f4 > $this->{output_dir}/$name";
	execute_cmd($cmd);

	#### create annotation file indicating weather a chromatin map exist for
	#### each transcript in bed file. 0=No, 1=Yes
	$cmd = "$config->{ToolInfo}->{workflow_path}->{value}/FillAnnotation.pl";
	$cmd .= " $this->{output_dir}/all.transcripts $this->{output_dir}/$name";
	$cmd .= " $this->{output_dir}/$name.Annotation.txt";
	execute_cmd($cmd);

	#### use later to paste all annotations in one file
	$paste_files .= " $this->{output_dir}/$name.Annotation.txt";
	$chromatin .= "\\t$name";

	$tmp_file_to_remove .= " $this->{output_dir}/$name*";
}

foreach my $bp (@broadPeak_k4) {
	my $name = fileparse($bp, qr/\.[^.]*/);

	#### Run this command for all 9 K4 cell line files
	$cmd = "$config->{ToolInfo}->{bedtools}->{value}/windowBed";
	$cmd .= " -a $this->{output_dir}/all.combined.bed -b $bp -w 2000 -v";
	$cmd .= " > $this->{output_dir}/$name.bed";
	execute_cmd($cmd);

	#### extract transcript id
	$cmd = "cat $this->{output_dir}/$name.bed | cut -f4 > $this->{output_dir}/$name";
	execute_cmd($cmd);

	#### create annotation file indicating weather a chromatin map exist for
	#### each transcript in bed file. 0=No, 1=Yes
	$cmd = "$config->{ToolInfo}->{workflow_path}->{value}/FillAnnotation.pl";
	$cmd .= " $this->{output_dir}/all.transcripts $this->{output_dir}/$name";
	$cmd .= " $this->{output_dir}/$name.Annotation.txt";
	execute_cmd($cmd);

	#### use later to paste all annotations in one file
	$paste_files .= " $this->{output_dir}/$name.Annotation.txt";
	$chromatin .= "\\t$name";

	$tmp_file_to_remove .= " $this->{output_dir}/$name*";
}

#### Paste all 18 files together
$cmd = "paste $paste_files | cut -f1,2,3,4,5,6,12,18,24,30,42,48,54,60,66,72,78,84,90,96,102,108,114 > $this->{output_dir}/ChromatinStatus.tmp";
execute_cmd($cmd);

$tmp_file_to_remove .= " $this->{output_dir}/ChromatinStatus.tmp";

#### Fill in with Locus info for respective Transcript IDs
$cmd = "cat $this->{input} | cut -f2,4 -d \" \" | sed -e 's/\"//g' -e 's/;//g' > $this->{output_dir}/Loci.Transcripts";
execute_cmd($cmd);

$tmp_file_to_remove .= " $this->{output_dir}/Loci.Transcripts";

$cmd = "$config->{ToolInfo}->{workflow_path}->{value}/FillLocusInfo.pl $this->{output_dir}/Loci.Transcripts $this->{output_dir}/ChromatinStatus.tmp $this->{output_dir}/LocusAdded";
execute_cmd($cmd);

$tmp_file_to_remove .= " $this->{output_dir}/LocusAdded";

#### Final output for Chromatin Status
$cmd = "echo -e \"Chr\\tStart\\tStop\\tTranscriptID\\tLocusID\\tStrand$chromatin\" >> $this->{output_dir}/ChromatinStaus.txt";
$cmd .= " && cat $this->{output_dir}/LocusAdded | awk 'BEGIN{OFS=\"\\t\"}{print \$1,\$2,\$3,\$4,\$23,\$5,\$6,\$7,\$8,\$9,\$10,\$11,\$12,\$13,\$14,\$15,\$16,\$17,\$18,\$19,\$20,\$21,\$22}' >> $this->{output_dir}/ChromatinStaus.txt";
execute_cmd($cmd);

#### remove temp files
$cmd = "rm $tmp_file_to_remove";
execute_cmd($cmd);

$logger->info("Chromatin Map completed");
exit();

#############################################################################
sub check_parameters {
    my $options = shift;

	my @required = qw(run_info output_dir);

	foreach my $key (@required) {
		unless (defined $options{$key}) {
			print STDERR "ARG: $key is required\n";
			pod2usage({-exitval => 2,  -message => "error message", -verbose => 1, -output => \*STDERR});
			exit(-1);
		}
	}

	$options{'debug'} = 3 unless (defined $options{'debug'});
}


#############################################################################
sub create_dir_struct {
	my $options = shift;

	my $dir = "$options->{output_dir}/lincRNA/chromatin";
	if ( -d $dir ) {
		$logger->info("Directory $dir exist");
	} else {
		execute_cmd("mkdir -p $dir");
	}
}

#############################################################################
sub execute_cmd {
	my $cmd = shift;

	$logger->info("$cmd");
	system($cmd);

	while (( $? >> 8 ) != 0 ){
		$logger->logdie("ERROR: Following command failed to execute. Exiting execution of workflow\n$cmd");
		exit(100);
	}
}

#############################################################################
sub check_input {
	my $file = shift;

	my $job_id = 0;
	my $sgeerr = "";
	my $sgeout = "";

	if (defined $ENV{JOB_ID}){
		$job_id = $ENV{JOB_ID};
	}

	if (defined $ENV{SGE_STDERR_PATH}){
		$sgeerr = $ENV{SGE_STDERR_PATH};
	}

	if (defined $ENV{SGE_STDOUT_PATH}){
		$sgeout = $ENV{SGE_STDOUT_PATH};
	}

	if (! -s $file){
		my $error_name = "$config->{RunInfo}->{base_output_dir}/$config->{RunInfo}->{pi}/$config->{RunInfo}->{type}/$config->{RunInfo}->{output_folder}/error/Chromatinmap.$options{sample}.err";
		$util->createErrorFile($error_name, "EXPECTED FILE WHILE RUNNING Chromatin Map STEP IS MISSING\n\n$file");

		$util->reportErrorSGE($config->{RunInfo}->{email},
							  $file,
							  "Chromatin Map",
							  $error_name,
							  $job_id,
							  $sgeerr,
							  $sgeout);
		exit(100);
	}
}
