#!/bin/bash

############################################################################
## Description:
## The wrapper script to run the CAP-miRSeq workflow
##
## Author: Jared Evans (evans.jared@mayo.edu)
## Date: 5/22/14
##
## Parameters:
## <run_info file> - The CAP-miRSeq run_info.txt config file
##
############################################################################



set -e
set -u


if [ $# != 1 ];
then
	echo "usage: ./CAP-miRseq.sh <run_info file>";
else
	set -x
	echo `date`
	run_info=$1

	if [ ! -s $run_info ]
	then
		echo -e "ERROR : run_info=$run_info does not exist\n";
		exit 1;
	fi
	dos2unix $run_info

	
	# get parameters
	tool_info=$( cat $run_info | grep -w '^TOOL_INFO' | cut -d '=' -f2)
	tool=$( cat $run_info | grep -w '^TOOL' | cut -d '=' -f2)
	version=$( cat $run_info | grep -w '^VERSION' | cut -d '=' -f2)
	flowcell=$( cat $run_info | grep -w '^FLOWCELL' | cut -d '=' -f2)
	sample_info=$( cat $run_info | grep -w '^SAMPLE_INFO' | cut -d '=' -f2)
	dos2unix $sample_info
	email=$( cat $run_info | grep -w '^EMAIL' | cut -d '=' -f2)
	samples=$( cat $run_info | grep -w '^SAMPLENAMES' | cut -d '=' -f2)
	queue=$( cat $tool_info | grep -w '^QUEUE' | cut -d '=' -f2)
	genomebuild=$( cat $run_info | grep -w '^GENOMEBUILD' | cut -d '=' -f2)
	center=$( cat $run_info | grep -w '^CENTER' | cut -d '=' -f2)
	platform=$( cat $run_info | grep -w '^PLATFORM' | cut -d '=' -f2)
	input_dir=$( cat $run_info | grep -w '^INPUT_DIR' | cut -d '=' -f2)
	output_dir=$( cat $run_info | grep -w '^OUTPUT_DIR' | cut -d '=' -f2)
	trim_adapter=$( cat $run_info | grep -w '^TRIM_ADAPTER' | cut -d '=' -f2 | tr "[a-z]" "[A-Z]")
	call_snvs=$( cat $run_info | grep -w '^CALL_SNVS' | cut -d '=' -f2 | tr "[a-z]" "[A-Z]")
	script_path=$( cat $tool_info | grep -w '^SCRIPT_PATH' | cut -d '=' -f2)
	bowtie_ref=$( cat $tool_info | grep -w '^BOWTIE_REF' | cut -d '=' -f2)

	use_sge=$( cat $run_info | grep -w '^USE_SGE' | cut -d '=' -f2)
	use_portal=$( cat $run_info | grep -w '^USE_PORTAL' | cut -d '=' -f2)	

	# check the environment
	$script_path/env_check/env_check -t $tool_info

	# confirm whether to skip the portal update / use Sun Grid Engine
	set +u
	if [ ! $use_portal ]; then 
		use_portal="1"; 
	fi

	if [ ! $use_sge ]; then 
		use_sge="1"; 
	fi
	set -u 

	if [ ! "$use_portal" = "0" ]; then 
		use_portal="1" ; 
	fi

	if [ ! "$use_sge" = "0" ]; then 
		use_sge="1" ; 
	fi

	# these are used in subsequence shell scripts
	export tool_info run_info sample_info use_sge use_portal

	# create dirs
	mkdir -p $output_dir
	mkdir -p $output_dir/logs
	mkdir -p $output_dir/bams
	mkdir -p $output_dir/igv
	mkdir -p $output_dir/mirdeep2
	mkdir -p $output_dir/expression
	mkdir -p $output_dir/variants
	mkdir -p $output_dir/qc
	mkdir -p $output_dir/qc/fastqc_pretrim
	mkdir -p $output_dir/qc/fastqc_posttrim
	mkdir -p $output_dir/qc/other_rna
	mkdir -p $output_dir/differential_expression
	mkdir -p $output_dir/fastqs
	mkdir -p $output_dir/config

	cp $run_info $output_dir/config
	cp $tool_info $output_dir/config
	cp $sample_info $output_dir/config
	if [ -f $output_dir/config/$(basename $run_info) ]
	then
		run_info=$output_dir/config/$(basename $run_info)
		tool_info=$output_dir/config/$(basename $tool_info)
		sample_info=$output_dir/config/$(basename $sample_info)
	fi

	# Update NGS Portal
	if [ "$use_portal" = "1" ]; then 
		$script_path/ngs_portal.sh $run_info Start
	else 
		echo "Skipping Portal Update"
	fi


	sample_count=$(echo $samples | tr ":" "\n" | wc -l)

	# fastqs
	i=1
	for sample in $(echo $samples | tr ":" " ")
	do
		fastqArray[$i]=$(grep -w "^${sample}" $sample_info | cut -d"=" -f2)
		trimmedFastqArray[$i]=$sample.cutadapt.fastq
		readsArray[$i]=$sample.reads.fa
		alignmentArray[$i]=$sample.reads_vs_genome.arf
		let i=i+1
	done

	fastq_list=$(echo ${fastqArray[@]} | tr " " ":")
	trimmed_fastq_list=$(echo ${trimmedFastqArray[@]} | tr " " ":")
	reads_list=$(echo ${readsArray[@]} | tr " " ":")
	alignment_list=$(echo ${alignmentArray[@]} | tr " " ":")


	## hold job ids var

	if [ "$use_sge" = "1" ]; then
		all_job_ids=""
		job_name="$tool.$version.$flowcell"
		args="-V -wd $output_dir/logs -q $queue -m a -M $email"
	fi 

	jobid_ref_idx=""
	## Generate reference index files if needed
	if [ ! -s $bowtie_ref.fa.fai -o ! -s $bowtie_ref.1.ebwt -o ! -s $bowtie_ref.dict ]
	then
		REF_IDX_CMD="$script_path/reference_indexes.sh $tool_info"
		if [ "$use_sge" = "1" ]; then
			mem=$( cat $tool_info | grep -w '^REFERENCE_INDEXES_MEM=' | sed 's/REFERENCE_INDEXES_MEM=//g')
			REF_IDX=$(qsub $args $mem -N $job_name.REFERENCE_INDEXES $REF_IDX_CMD )
			jobid_ref_idx=$(echo $REF_IDX | cut -d ' ' -f3)
			all_job_ids="${all_job_ids}${jobid_ref_idx},"
		else
			echo $REF_IDX_CMD
			REF_IDX=$($REF_IDX_CMD)
		fi
	fi


	## Trim Adapter
	if [ $trim_adapter == "YES" ]
	then
		CUTADAPT_CMD="$script_path/cutadapt.sh $input_dir $output_dir/fastqs/ $samples $fastq_list $tool_info"
		if [ "$use_sge" = "1" ]; then
			trim_args=$args
			if [ ! -z $jobid_ref_idx ]
			then
				trim_args="${trim_args} -hold_jid ${jobid_ref_idx}"
			fi
			mem=$( cat $tool_info | grep -w '^CUTADAPT_MEM=' | sed 's/CUTADAPT_MEM=//g')
			CUTADAPT=$(qsub $trim_args -t 1-$sample_count -N $job_name.CUTADAPT $mem $CUTADAPT_CMD)
			jobid_cutadapt=$(echo $CUTADAPT | cut -d ' ' -f3 | cut -d '.' -f1)
			all_job_ids="${all_job_ids}${jobid_cutadapt},"
		else
			for count in `seq 1 $sample_count`
			do
				echo "Running cutadapt on sample $count"
				echo "With command $CUTADAPT_CMD $count "
				CUTADAPT=$($CUTADAPT_CMD $count )
			done
		fi
	else
		trimmed_fastq_list=$fastq_list
		for fastq in $(echo $fastq_list | tr ":" " ")
		do
			ln -s $input_dir/$fastq $output_dir/fastqs/
		done
	fi

	## Run Fastqc
	PRE_FASTQC_CMD="$script_path/fastqc.sh $input_dir $output_dir/qc/fastqc_pretrim $samples $fastq_list $tool_info"
	POST_FASTQC_CMD="$script_path/fastqc.sh $output_dir/fastqs $output_dir/qc/fastqc_posttrim $samples $trimmed_fastq_list $tool_info"
	if [ "$use_sge" = "1" ]; then
		fastqc_args=$args
		mem=$( cat $tool_info | grep -w '^FASTQC_MEM=' | sed 's/FASTQC_MEM=//g')
		if [ $trim_adapter == "YES" ]
		then
			FASTQC=$(qsub $args -t 1-$sample_count -N $job_name.FASTQC $mem $PRE_FASTQC_CMD)
			jobid_fastqc=$(echo $FASTQC | cut -d ' ' -f3 | cut -d '.' -f1)
			all_job_ids="${all_job_ids}${jobid_fastqc},"
			fastqc_args="${fastqc_args} -hold_jid $jobid_cutadapt"
		fi
		FASTQC=$(qsub $fastqc_args -t 1-$sample_count -N $job_name.FASTQC $mem $POST_FASTQC_CMD)
		jobid_fastqc=$(echo $FASTQC | cut -d ' ' -f3 | cut -d '.' -f1)
		all_job_ids="${all_job_ids}${jobid_fastqc},"
	else
		for count in `seq 1 $sample_count`
		do
			echo "Running fastqc on sample $count"
			if [ $trim_adapter == "YES" ]
			then
				echo "With command $PRE_FASTQC_CMD $count "
				PRE_FASTQC=$($PRE_FASTQC_CMD $count )
			fi
			echo "With command $POST_FASTQC_CMD $count "
			POST_FASTQC=$($POST_FASTQC_CMD $count )
		done
	fi



	## Run mirdeep2 mapper
	MAPPER_CMD="$script_path/mapper.sh $output_dir/fastqs/ $output_dir/mirdeep2/ $samples $trimmed_fastq_list $tool_info"
	if [ "$use_sge" = "1" ]; then 	
		mem=$( cat $tool_info | grep -w '^MIRDEEP2_MAPPER_MEM=' | sed 's/MIRDEEP2_MAPPER_MEM=//g')
		mapper_args=$args
		if [ $trim_adapter == "YES" ]; then
			mapper_args="${args} -hold_jid ${jobid_cutadapt}"
		fi
		MAPPER=$(qsub $mapper_args -N $job_name.MAPPER -t 1-$sample_count -pe threaded 4 $mem $MAPPER_CMD)
		jobid_mapper=$(echo $MAPPER | cut -d ' ' -f3 | cut -d '.' -f1)
		all_job_ids="${all_job_ids}${jobid_mapper},"
	else
		for count in `seq 1 $sample_count`
		do	
			echo "$MAPPER_CMD $count"
			MAPPER=$($MAPPER_CMD $count)
		done
	fi


	## Create BAMs
	BAMS_CMD="$script_path/bams.sh $output_dir/fastqs/ $output_dir/bams/ $samples $trimmed_fastq_list $tool_info"
	if [ "$use_sge" = "1" ]; then 
		mem=$( cat $tool_info | grep -w '^BAMS_MEM=' | sed 's/BAMS_MEM=//g')
		bams_args=$args
		if [ $trim_adapter == "YES" ]; then
			bams_args="${args} -hold_jid ${jobid_cutadapt}"
		fi
		BAMS=$(qsub $bams_args -N $job_name.BAMS -t 1-$sample_count -pe threaded 4 $mem $BAMS_CMD)
		jobid_bams=$(echo $BAMS | cut -d ' ' -f3 | cut -d '.' -f1)
		all_job_ids="${all_job_ids}${jobid_bams},"
	else
		for count in `seq 1 $sample_count`
		do	
			echo $BAMS_CMD $count
			BAMS=$($BAMS_CMD $count)
		done
	fi


	## Run miRDeep2
	MIRDEEP2_CMD="$script_path/mirdeep2.sh $output_dir/mirdeep2/ $output_dir/mirdeep2/ $samples $reads_list $alignment_list $tool_info"
	if [ "$use_sge" = "1" ]; then
		mem=$( cat $tool_info | grep -w '^MIRDEEP2_MEM=' | sed 's/MIRDEEP2_MEM=//g')
		MIRDEEP2=$(qsub $args -N $job_name.MIRDEEP2 -t 1-$sample_count $mem -hold_jid $jobid_mapper $MIRDEEP2_CMD)
		jobid_mirdeep2=$(echo $MIRDEEP2 | cut -d ' ' -f3 | cut -d '.' -f1)
		all_job_ids="${all_job_ids}${jobid_mirdeep2},"
	else
		for count in `seq 1 $sample_count`
		do	
			echo $MIRDEEP2_CMD $count
			MIRDEEP2=$($MIRDEEP2_CMD $count)
		done
	fi

	## Call SNVs
	if [ $call_snvs == "YES" ]; then
		VARIANTS_CMD="$script_path/variants.sh $output_dir/bams $output_dir/variants $run_info"
		if [ "$use_sge" = "1" ]; then
			mem=$( cat $tool_info | grep -w '^VARIANTS_MEM=' | sed 's/VARIANTS_MEM=//g')
			VARIANTS=$(qsub $args -hold_jid $jobid_bams $mem -N $job_name.VARIANTS $VARIANTS_CMD )
			jobid_variants=$(echo $VARIANTS | cut -d ' ' -f3)
			all_job_ids="${all_job_ids}${jobid_variants},"
		else
			echo $VARIANTS_CMD
			VARIANTS=$($VARIANTS_CMD)
		fi
	fi

	## Gencode Classification
	GENCODE_CMD="$script_path/gencode_classification.sh $output_dir/bams $output_dir/qc/other_rna $samples $tool_info"
	if [ "$use_sge" = "1" ]; then
		mem=$( cat $tool_info | grep -w '^GENCODE_CLASSIFICATION_MEM=' | sed 's/GENCODE_CLASSIFICATION_MEM=//g')
		GENCODE=$(qsub $args -hold_jid $jobid_bams $mem -t 1-$sample_count -N $job_name.GENCODE $GENCODE_CMD)
		jobid_gencode=$(echo $GENCODE | cut -d ' ' -f3 | cut -d '.' -f1)
		all_job_ids="${all_job_ids}${jobid_gencode},"
	else
		for count in `seq 1 $sample_count`
		do
			echo $GENCODE_CMD
			GENCODE=$($GENCODE_CMD $count)
		done
	fi

	## Generate expression reports
	EXPRESSION_REPORTS_CMD="$script_path/expression_reports.sh $output_dir/mirdeep2/ $output_dir/expression/ $samples $script_path"
	if [ "$use_sge" = "1" ]; then
		mem=$( cat $tool_info | grep -w '^EXPRESSION_REPORTS_MEM=' | sed 's/EXPRESSION_REPORTS_MEM=//g')
		EXPRESSION_REPORTS=$(qsub $args -N $job_name.EXPRESSION_REPORTS -hold_jid $jobid_mirdeep2 $mem $EXPRESSION_REPORTS_CMD)
		jobid_expression_reports=$(echo $EXPRESSION_REPORTS | cut -d ' ' -f3)
		all_job_ids="${all_job_ids}${jobid_expression_reports},"
	else
		echo $EXPRESSION_REPORTS_CMD
		EXPRESSION_REPORTS=$($EXPRESSION_REPORTS_CMD)
	fi

	## Differential Expression
	DIFF_EXPRS_CMD="$script_path/differential_expression.sh $output_dir/expression/mature_miRNA_expression.xls $output_dir/differential_expression $run_info"
	if [ "$use_sge" = "1" ]; then
		mem=$( cat $tool_info | grep -w '^DIFF_EXPRESSION_MEM=' | sed 's/DIFF_EXPRESSION_MEM=//g')
		DIFF_EXPRS=$(qsub $args -N $job_name.DIFF_EXPRESSION -hold_jid $jobid_expression_reports $mem $DIFF_EXPRS_CMD)
		jobid_diff_exprs=$(echo $DIFF_EXPRS | cut -d ' ' -f3)
		all_job_ids="${all_job_ids}${jobid_diff_exprs},"
	else
		echo $DIFF_EXPRS_CMD
		DIFF_EXPRS=$($DIFF_EXPRS_CMD)
	fi

	## Parse and summarize per-sample stats
	SAMPLE_SUMMARY_CMD="$script_path/sample_summary.sh $output_dir $samples $trim_adapter"
	if [ "$use_sge" = "1" ]; then
		mem=$( cat $tool_info | grep -w '^SAMPLE_SUMMARY_MEM=' | sed 's/SAMPLE_SUMMARY_MEM=//g')
		SAMPLE_SUMMARY=$(qsub $args -N $job_name.SAMPLE_SUMMARY -hold_jid $all_job_ids $mem $SAMPLE_SUMMARY_CMD)
		jobid_sample_summary=$(echo $SAMPLE_SUMMARY | cut -d ' ' -f3)
		all_job_ids="${all_job_ids}${jobid_sample_summary},"
	else
		echo $SAMPLE_SUMMARY_CMD
		SAMPLE_SUMMARY=$($SAMPLE_SUMMARY_CMD)
	fi

	## Generate Main Document
	MAIN_DOC_CMD="$script_path/main_document.sh $output_dir $script_path $run_info"
	if [ "$use_sge" = "1" ]; then
		mem=$( cat $tool_info | grep -w '^MAIN_DOC_MEM=' | sed 's/MAIN_DOC_MEM=//g')
		MAIN_DOC=$(qsub $args -N $job_name.MAIN_DOC -hold_jid $all_job_ids $mem $MAIN_DOC_CMD)
	else
		echo $MAIN_DOC_CMD
		MAIN_DOC=$($MAIN_DOC_CMD)
	fi

	echo `date`

fi
