#! /usr/bin/perl

if (scalar(@ARGV) != 1)	
{
	die ( "USAGE: checkCONFIG.pl <run_info.txt>\n" );
}

my %variables;
my @errorStr;
open(RUNINFO, "<", $ARGV[0])||die"UNABLE TO OPEN RUN_INFO.txt ($ARGV[0])";
while(<RUNINFO>){
	chomp;
	next if($_ =~ /^#/); ## skip comments
	next if($_ eq ""); ## skip empty
	my($var, $value) = split (/=/,$_);
	if(exists $variables{$var}) {
		push(@errorStr, "RUN_INFO ERROR: Duplicate Variable \'$var\' Line $.");
	}
	$variables{$var}=$value;	
}
### Setup Required Run Info Variables
my %runInfo = (
	PI => 'string',
	USEREMAIL => 'string',
	GENOMEBUILD => 'string',
	PROJECT_NAME => 'string',
	SAMPLENAME => 'string',
	RUNID => 'string',
	#PORTAL_PROJECT_NAME => 'string',
	SEQ_DIR => 'dir',
	SEQ_TYPE => 'string',
	SEQ_SUFFIX => 'string',
	FILTER_TYPE => 'string',
	END1_SEQ => 'string', # TODO This should eventually become a colon-separated value, but this would require several changes to the existing code.
#	END2_SEQ => 'string', # TODO This is optional for SE, required for PE. Will require more advanced checking.
#	CHRINDEX => 'csep',   # TODO This is not currently implemented, but it does make sense to add.
	PEAK_CALLER => 'string',
	#WORK_DIR => 'dir',
	PKGENE_NEIGHDIST => 'int',
	IP_FILE => 'string',
	INPUT_FILE => 'string',
	#INPUT_LABLES => 'string',
	RUN_CEAS => 'boolean',
	RUN_MEME => 'boolean',
	RUN_GOM => 'boolean',
	TOOL_INFO => 'file',
	MEMORY_INFO => 'file',
);

@tmp = do_variable_check(\%runInfo, \%variables);
push(@errorStr, @tmp);

######## Expand to tool info #########
my %toolInfo = (
	BWA_REF => 'file',
	CEAS_REF => 'file',
	UCSC_REF_FLAT => 'file',
	GENOME_TABLE => 'file',
	IGV_REFERENCE_GENOME => 'file',
	ANNOTATION_DIR => 'dir',
	GENE_TSS => 'string',
	ANTI_GAP => 'string',
	TEST_TERM => 'string',
	ANNO_TYPE => 'string',	
	CHIPSEQ_DIR => 'dir',
	SICER => 'dir',
	BWA_PATH => 'dir',
	MACS_PATH => 'dir',
	MACS_PYTHON => 'dir',	
	CEAS_PATH => 'dir',
	CEAS_PYTHON => 'dir',
	JAVA => 'file',
	SAMTOOLS => 'dir',
	BEDTOOLS => 'dir',
	PICARD => 'dir',
	FASTQC => 'file',
	R_PATH => 'string', #### Very strange issue with R as dir!!
	#NGS_PORTAL_PATH => 'dir',
	MEME_PATH => 'file', 
	WIG2BIGWIG => 'file',
	IGVTOOLS => 'file',
	IGV_LINK => 'string',
	IGV_SETUP_PDF => 'file',
	WORKFLOW_SUMMARY_DOC => 'file',
	TOOL_VERSION => 'string',
	ORGANISM => 'string',
	LOCATION => 'string',
	PLATFORM => 'string',
	ANNO_TYPE => 'string',
	TCLR_LIST => 'file',
	FRAGMENT_SIZE => 'string',
	STEP_SIZE => 'string',
	MAP_SE_ARGS => 'string',
	MAP_PE_ARGS => 'string',
	MAP_BOTH_ARGS => 'string',
	MAP_QUALITY => 'boolean',
	REMOVE_DUP => 'string',
	SICER_ARGS => 'string',
	MACS2_ARGS => 'string',
	IDR_ARGS => 'string',
	IDR_CUTOFF => 'string',
	QUEUE => 'string',
	OPEN_SOURCE => 'string'
);
my %tools;
open(TOOL, "<", $variables{TOOL_INFO})||die$!;
while(<TOOL>){
	chomp;
	next if($_ =~ /^#/); ## skip comments
	next if($_ eq ""); ## skip empty
	my($var, $value) = split (/=/,$_);
	if(exists $tools{$var}) {
		push(@errorStr, "TOOL_INFO ERROR: Duplicate Variable \'$var\' Line $.");
	}
	$tools{$var}=$value;	
}

@tmp = do_variable_check(\%toolInfo, \%tools);
push(@errorStr, @tmp);

##############################
### Final Report of Errors ###
##############################
if(@errorStr){
	print "\n".join("\n", @errorStr)."\n\n";
}




sub do_variable_check{
	my %runInfo  = %{$_[0]};
	my %variables  = %{$_[1]};
	my @errorStr;
	
	while ( my ($check, $desiredType) = each(%runInfo) ) {
		if( !exists($variables{$check}) ){
			push(@errorStr, "ERROR: Missing Variable \'$check\'");
		}
		$actual = $variables{$check};
		#print "$check, $desiredType => $actual\n";
	
		if( ($desiredType eq 'file') && (!-e $actual) ){
			push(@errorStr, "ERROR: File Does Not Exist \'$check\'");
		}
		elsif( ($desiredType eq 'dir') && (!-d $actual) ){
			push(@errorStr, "ERROR: Directory Does Not Exist \'$check\'");
		}
		elsif( ($desiredType eq 'boolean') && ($actual !~ /(YES|NO|0|1)/i) ){
			push(@errorStr, "ERROR: Boolean poorly defined \'$check\' [Yes|No|0|1]");
		}
		elsif( ($desiredType eq 'csep') && ($actual =~ m/(\t|\s|\,)/) ){
			push(@errorStr, "ERROR: Colon Separated Values poorly defined \'$check\' [No whitespace, tabs, or commas]");
		}
		elsif( ($desiredType eq 'real') && ($actual !~ /^\d+\.\d+/) ){
			push(@errorStr, "ERROR: Not a Real Number! \'$check\'");
		}
		elsif( ($desiredType eq 'int') && ($actual !~ /^\d+/) ){
			push(@errorStr, "ERROR: Not a Real Number! \'$check\'");
		}
		elsif( ($desiredType eq 'string') && ($actual eq "") ){
			push(@errorStr, "ERROR: Missing Value! \'$check\'");
		}
	}
	return(@errorStr)
}
