#!/bin/bash

############################################################################
## Description:
## run Cutadapt to trim adapter sequences from miRNA reads
##
## Author: Jared Evans
## Date: 5/22/14
##
## Parameters:
## <input dir> - Directory where input fastqs are located
## <output_dir> - Directory where trimmed output fastqs should be written
## <sample names(s1:s2)> - Colon seperated list of sample names
## <sample fastqs(fq1:fq2)> - Colon seperated list of fastq files for each sample
## <tool_info> - CAP-miRSeq tool_info.txt config file
## <sample_number> - Optional number indicating which sample to use
##
############################################################################


# determine whether this script is being called through Sun Grid Engine
if [ -z $SGE_TASK_ID ]; then 
	use_sge=0
else 
	use_sge=1
fi

set -e
set -u 

if [ $# != 5 -a "$use_sge" = "1" ]; then 
	echo "usage: <input dir> <output dir> <sample names(sn1:sn2)> <input fastq names (s1:s2)> <tool_info>";
elif [ $# != 6 -a "$use_sge" = "0" ] ; then 
	echo "usage: <input dir> <output dir> <sample names(sn1:sn2)> <input fastq names (s1:s2)> <tool_info> <sample_number>";
else 
	set -x
	echo `date`
	input_dir=$1
	output_dir=$2
	samples=$3
	fastqs=$4
	tool_info=$5


	# SGE passes this as part of an array job, but when run standalone the value needs to be passed on the command line
	if [ "$use_sge" = "1" ]; then
		sample_number=$SGE_TASK_ID
	else
		sample_number=$6
	fi

	cutadapt_path=$( cat $tool_info | grep -w '^CUTADAPT_PATH' | cut -d '=' -f2)
#	fastqc_path=$( cat $tool_info | grep -w '^FASTQC_PATH' | cut -d '=' -f2)
	script_path=$( cat $tool_info | grep -w '^SCRIPT_PATH' | cut -d '=' -f2)
	cutadapt_params=$( cat $tool_info | grep -w '^CUTADAPT_PARAMS' | cut -d '=' -f2)

	# Update NGS Portal
#	if [ "$use_portal" = "1" ]; then
#        	$script_path/ngs_portal.sh $run_info AdapterTrimming	
#	fi

	sample=$( echo $samples | tr ":" "\n" | head -$sample_number | tail -1 )
	fastq=$( echo $fastqs | tr ":" "\n" | head -$sample_number | tail -1 )
	
	# check if input fastqs are gzipped and what encoding they are (SANGER/ILLUMINA)
	extension=$(echo $input_dir/$fastq | sed 's/.*\.//')
	if [ "$extension" == "gz" ];
	then
		zcat $input_dir/$fastq | head -4000 > $output_dir/$fastq.tmp
		ILL2SANGER=$(perl $script_path/checkFastqQualityScores.pl $output_dir/$fastq.tmp 1000)
		rm $output_dir/$fastq.tmp
	else
		ILL2SANGER=$(perl $script_path/checkFastqQualityScores.pl $input_dir/$fastq 1000)
	fi

	if [ "$ILL2SANGER" -gt 65 ];
        then
		qual_trim=51 # Illumina Qual scores
	else
		qual_trim=20 # Sanger Qual scores
	fi
	
	#run cutadapt
	$cutadapt_path/cutadapt $cutadapt_params -q $qual_trim $input_dir/$fastq -o $output_dir/$sample.cutadapt.fastq --too-short-output=$output_dir/${sample}.tooshort.fastq > $output_dir/$sample.cutadapt.log
	
	cat $output_dir/$sample.cutadapt.log

	# check to see if expected output file exist
	if [ ! -s $output_dir/$sample.cutadapt.fastq ]
	then
		echo "ERROR : ${output_dir}/${sample}.cutadapt.fastq is empty!"
	fi
	
	if [ ! -s $output_dir/$sample.tooshort.fastq ]
	then
		echo "WARNING : ${output_dir}/${sample}.tooshort.fastq is empty!"
	fi
	

	echo `date`
fi	

