#!/bin/bash

R1PAIRED_COMMAND=$0
R1PAIRED_SOURCE_DIR=`dirname ${R1PAIRED_COMMAND}`
export R1PAIRED_SOURCE_DIR=`readlink -f ${R1PAIRED_SOURCE_DIR}`
RUN_INFO=$1
run_mode=$2
if [ "$RUN_INFO" == "" ];then
	echo -e "\n\tNo input! For more, please type '${R1PAIRED_COMMAND} -h'\n"
	exit
fi
if [ "$RUN_INFO" == "-h" ] || [ "$RUN_INFO" == "--help" ] ;then
	cat ${R1PAIRED_SOURCE_DIR}/README
	exit
fi
if [ -f "$RUN_INFO" ];then
	source $RUN_INFO
	if [ ! -d "${R1PAIRED_WORK_DIR}" ];then
		echo -e "\n\tR1PAIRED_WORK_DIR '${R1PAIRED_WORK_DIR}' does not exist, please check your run.info.\n" 1>&2
		exit
	fi
	cp $RUN_INFO ${R1PAIRED_WORK_DIR}
else
	echo -e "\n\tRUN_INFO '${RUN_INFO}' does not exist. Please check the path. For more, please type '${R1PAIRED_COMMAND} -h'\n" 1>&2
	exit
fi

if [ -f "${R1PAIRED_TOOL_INFO}" ];then
        source ${R1PAIRED_TOOL_INFO}
	cp ${R1PAIRED_TOOL_INFO} ${R1PAIRED_WORK_DIR}
else
        echo -e "\n\tR1PAIRED_TOOL_INFO '${R1PAIRED_TOOL_INFO}' does not exist, please check your run.info.\n" 1>&2
	exit
fi

if [ ! -f "$R1PAIRED_PYTHON" ];then
	echo -e "\n\tin your tool.info, Python $R1PAIRED_PYTHON does not exist."
	exit
fi
if [ ! -f "$R1PAIRED_JAVA" ];then
	echo -e "\n\tin your tool.info, Java $R1PAIRED_JAVA does not exist."
	exit
fi

if [ ! -f "$R1PAIRED_USEARCH" ];then
	echo -e "\n\tin your tool.info, Usearch $R1PAIRED_USEARCH does not exist."
	echo -e "\tplease download Linux version from http://www.drive5.com/usearch/download.html and set correct path in tool.info."
	echo -e "\twe currently use usearch8.1.1861_i86linux32\n"
	exit
fi

#if [ ! -d "${R1PAIRED_WORK_DIR}" ];then
#	echo -e "\n\tR1PAIRED_WORK_DIR '${R1PAIRED_WORK_DIR}' does not exist, please check your run.info.\n"
#	exit
#fi
[ -e "${R1PAIRED_WORK_DIR}/SOFTLINKS" ]&&rm -r "${R1PAIRED_WORK_DIR}/SOFTLINKS"
mkdir "${R1PAIRED_WORK_DIR}/SOFTLINKS"
sub_check_config_without_map(){
	local fastqs=$1
	echo -e "#SampleID\tFilePath" > ${R1PAIRED_WORK_DIR}/mapping.txt
	for fastq in `echo "$fastqs"`
	do
		base1=`basename ${fastq}`
		base2=`echo "${base1}" | grep -o "^[A-Za-z][A-Za-z0-9]\+_R[12].fastq\($\|.gz$\)"`
		format="[A-Za-z][A-Za-z0-9]+_R[12].fastq(.gz)"
		#echo "$base2"
		if [ "$base2" == "" ];then
			echo "'${base1}' is not a correct file name format, it must follow ${format}, you may use symbolic link to create a new file name. Here is an example: Sample001_R1.fastq" 1>&2
			exit
		fi
			
		ln -s ${fastq} ${R1PAIRED_WORK_DIR}/SOFTLINKS/${base1}
		base3=`echo "${base1}" | sed 's/_R1\.fastq/_R2.fastq/'`
		if [ $R1PAIRED_READ_TYPE -eq 0 ];then
			touch "${R1PAIRED_WORK_DIR}/SOFTLINKS/${base3}"
		fi
		sampleid=`echo "$base1" | grep "_R1.fastq" | cut -d"_" -f 1`
		if [ "${sampleid}" != "" ];then
			if [ $R1PAIRED_READ_TYPE -eq 0 ];then
				echo -e "$sampleid\t${fastq}" >> ${R1PAIRED_WORK_DIR}/mapping.txt
			else
				R2fastq=`echo ${fastq} | sed 's/_R1\.fastq/_R2.fastq/'`
				if [ -e "${R2fastq}" ];then
					echo -e "$sampleid\t${fastq};${R2fastq}" >> ${R1PAIRED_WORK_DIR}/mapping.txt
				else
					echo "WARNING: ${R2fastq} does not exist." 1>&2
					echo -e "$sampleid\t${fastq}" >> ${R1PAIRED_WORK_DIR}/mapping.txt
					touch "${R1PAIRED_WORK_DIR}/SOFTLINKS/${base3}"
				fi
			fi
		fi
	done
}

sub_check_config_with_map(){
	local R1PAIRED_MAPPING_INFO=$1
	unset sampleids
	pathcolumn=`head -1 ${R1PAIRED_MAPPING_INFO} | awk '{for(i=1;i<=NF;i++){if($i=="FilePath"){print i}}}'`
	if [ "$pathcolumn" == "" ];then
		echo "Column name 'FilePath' must present in ${R1PAIRED_MAPPING_INFO} header"
		exit
	fi

	read -a sampleids <<< `cat ${R1PAIRED_MAPPING_INFO} | (read;cat) | sed '/^$/d' | cut -f 1 | tr '\n' ' '`
	read -a samples <<< `cat ${R1PAIRED_MAPPING_INFO} | (read;cat) | sed '/^$/d' | cut -f $pathcolumn | tr '\n' ' '`
	for ((i=0;i<${#sampleids[@]};i++))
	do
		sampleid=`echo "${sampleids[$i]}" | grep -o "^[A-Za-z][A-Za-z0-9]\+$"`
		format="[A-Za-z][A-Za-z0-9]+"
		if [ "$sampleid" == "" ];then
			echo "'${sampleid}' is not a correct sample ID format, it must follow ${format}. Here is an example: Sample001"
			exit
		fi
		R1fastq=`echo "${samples[$i]}" | cut -d";" -f 1`
		extension1=`echo "${R1fastq}" | grep -o ".fastq\($\|.gz$\)"`
		if [ "$extension1" == "" ];then
			echo "'${R1fastq}' is not a correct file extension format, it must follow '.fastq|.fastq.gz'. Here is an example: Sample001_R1.fastq"
			exit
		fi
		if [ ! -f "${R1fastq}" ];then
			echo "'${R1fastq}' does not exist. please check your $R1PAIRED_MAPPING_INFO"
			exit
		fi
		ln -s ${R1fastq} "${R1PAIRED_WORK_DIR}/SOFTLINKS/${sampleids[$i]}_R1${extension1}"

		if [ $R1PAIRED_READ_TYPE -eq 0 ];then
			touch "${R1PAIRED_WORK_DIR}/SOFTLINKS/${sampleids[$i]}_R2${extension1}"
		else
			R2fastq=`echo "${samples[$i]}" | cut -d";" -f 2`
			extension2=`echo "${R2fastq}" | grep -o ".fastq\($\|.gz$\)"`
			if [ "$extension2" == "" ];then
				echo "'${R2fastq}' is not a correct file extension format, it must follow '.fastq|.fastq.gz'. Here is an example: Sample001_R2.fastq"
				exit
			fi
			if [ "${R2fastq}" == "" ];then
				touch "${R1PAIRED_WORK_DIR}/SOFTLINKS/${sampleids[$i]}_R2${extension1}"
			else
				if [ ! -f "${R2fastq}" ];then
					echo "'${R2fastq}' does not exist. please check your $R1PAIRED_MAPPING_INFO"
					exit
				fi
				ln -s ${R2fastq} "${R1PAIRED_WORK_DIR}/SOFTLINKS/${sampleids[$i]}_R2${extension1}"
			fi
		fi
	done
}


if [ -d "$R1PAIRED_INPUT_FILES" ];then
	echo "--- your input is a directory with fastq files. ---"
	fastqlist=`ls ${R1PAIRED_INPUT_FILES}/*.* | grep ${R1PAIRED_FILTER}`
	if [ "$fastqlist" == "" ];then
		echo -e "\n\tno fastq file found in ${R1PAIRED_INPUT_FILES}.\n"
		exit
	fi
	sub_check_config_without_map "$fastqlist"
elif [ -f "$R1PAIRED_INPUT_FILES" ];then
	echo "-- your input is a mapping file. ---"
	sub_check_config_with_map "${R1PAIRED_INPUT_FILES}"
	cp ${R1PAIRED_INPUT_FILES} ${R1PAIRED_WORK_DIR}/mapping.txt
else
	echo "R1PAIRED_INPUT_FILES '${R1PAIRED_INPUT_FILES}' does not exist, please check $RUN_INFO"
	exit
fi

#${R1PAIRED_SOURCE_DIR}/scripts/IMtornado.sh ${R1PAIRED_WORK_DIR} $R1PAIRED_READ_LENGTH
if [ $R1PAIRED_READ_TYPE -ne 0 ];then
	${R1PAIRED_SOURCE_DIR}/scripts/R1Paired.sh ${R1PAIRED_WORK_DIR} $R1PAIRED_READ_LENGTH
fi
