#!/bin/bash

SOURCEPATH=$(dirname $0)
cd $SOURCEPATH
SOURCEPATH=`pwd`
cd -

instruction="\n
Wandy is a program that predicts Aneuploidy and CNV from WGS data, and output a CNV plot for genome (*.png),\n
CNV plots for each chromosome (*.pdf), and CNV segments (*.txt) which recording the start, end, etc.\n
\n
Input requirement:\n
1)	Illumina WGS single or paired end.\n
2)	Sorted and indexed BAM file.\n
\n
Usage:\n
1)	Go to the folder that you want to save your result (your work directory).\n
2)	Type Command $SOURCEPATH/Wandy -i  <a bam file or a directory containing bam files>.\n
3)	The output will be in your current directory\n
\n
Options:\n
	-i [required] a bam file or directory containing bam files\n
	-r [optional] a bin reference file (default hg19 germline)\n
	-l [optional] read length (default 50bp)\n
	-q [optional] mapping quality of reads that will be taken into account (defalut 30)\n
	-e [optional] read type (pair-ended:2, single-ended 1, default: 1)\n
	-B [optional] bin size (base pair) of input BED file (use 10000bp or 500bp, default 10000)\n
	-F [optional] overwrite a previous run\n
	-S [optional] include BAM files in subdirectory\n
	-M [optional] contact email (i.e., your email)\n
	-h [optional] help\n
\n
Notes:\n
1)	Your current directory will be your working directory, all intermediate files and final results will be generated under it.\n
2)	If your input is a bam file folder, the program will take every bam file as input, make sure all of them are sorted and indexed.\n
3)	Just some general ideas: normally, a 4G(BAM file size) low coverage WGS bam will take 30 minutes and a 300G(BAM file size) high coverage WGS bam will take 10 hours to run.\n
4)	If any issue, please contact to Chen.xianfeng@mayo.edu or Wang.chen@mayo.edu\n
"
args(){
	echo -e $instruction
	exit
}
bam=""
readlen=50
readtype=1
qscore=30
binsize=10000
overwrite=0
workdir=`pwd` 
sampleinfo=$workdir\/sample.info
usesubdir=0
email="none"
runid="run.id"
binref=`ls $SOURCEPATH\/reference/summary_10000bin_info_initial.txt`
staticbaselinefile=`ls $SOURCEPATH\/reference/dev_run_summary_ChrPerctg_*.txt`
JARFILE=$SOURCEPATH\/script/jar/HumanGenomeReadInfoWithBedNipt.jar
#WANDY_BIN_INFO  BEDFILE=`ls $SOURCEPATH\/reference/summary_$binsize\bin_info.txt`
RSCRIPTPATH=$SOURCEPATH/script/r/

while getopts i:l:q:e:B:Fh opt
do
	case $opt in
	i) bam="$OPTARG";;
	r) binref="$OPTARG";;
	l) readlen="$OPTARG";;
        q) qscore="$OPTARG";;
        e) readtype="$OPTARG";;
	B) binsize="$OPTARG";;
	F) overwrite=1;;
	h) args;;
        esac
done
generate_single_sample_command(){
	local bamfile=$1
	extensionpos=`echo "${#bamfile}-4"|bc`
	if [ ${bamfile:$extensionpos:4} != ".bam" ];then
		echo $bamfile" is not a bam file." 1>&2
		echo "for additional information, please use -h option." 1>&2
		return 100
	elif [ -z $bamfile\.bai ];then
		echo $bamfile".bai file not found, make sure your bam file is sorted." 1>&2
		echo "for additional information, please use -h option." 1>&2
		return 100
	else
		echo $bamfile
		fsize=`wc -c $bamfile | cut -d" " -f 1`
		if [ $fsize -lt 100000 ];then
			echo "$bamfile is failed due to samll number of reads, please check."
		fi
		bambase=`basename $bamfile`
		sampleid=${bambase%.bam}
		echo "sampleid="$sampleid
		if [ -e "BinInfo/"$sampleid\_q$qscore\_b$binsize\.txt.gz ]&&[ $overwrite -eq 0 ];then
			echo "It seems $bamfile has been processed, please use option -F to overwrite previous result."
		else
			binfile=`pwd`"/BinInfo/"$sampleid"_q"$qscore"_b"$binsize".txt"
			echo -e "$sampleid\t$runid\tsample.type\tchen_xianfeng_m112568\tsequnome_test\t${binfile}.gz" >> sample.info
			sampleid=${bambase%.bam}
			echo "#!/bin/bash" > "${sampleid}.sh"
			mem=`echo "${WANDY_MEM}" | sed 's/G//' | sed 's/g//'`
			mem=`echo "${mem}/3"|bc`
			mem="-Xmx${mem}G"
			echo "$JAVA ${mem} -jar $JARFILE -b $WANDY_BIN_INFO -q $qscore -l $readlen -e $readtype -y 1000000 -s $SAMTOOLS -i $bamfile | cut -f 1,2,3,4,8,16 > BinInfo/${sampleid}_q${qscore}_b${binsize}.txt" >> "${sampleid}.sh"
			echo "gzip --force BinInfo/${sampleid}_q${qscore}_b${binsize}.txt" >> "${sampleid}.sh"
			echo "$RPATH/Rscript $RSCRIPTPATH/Rcall.R $sampleid $runinfo $sampleinfo $workdir ${RSCRIPTPATH}/Rlib_*/wandyCNV.loadPackage.R" >> "${sampleid}.sh"
			chmod a+x "${sampleid}.sh"
		fi
	fi
}


echo "SOURCEPATH="$SOURCEPATH
        #       binannotation=`ls $SOURCEPATH\/reference/summary_$binsize\bin_info_*.txt`

if [ -e $workdir\/tool.info ];then
	echo "${workdir}/tool.info exists."
else
	echo "${workdir}/tool.info not exists, default used."
	cp $SOURCEPATH/tool.info $workdir\/tool.info
#	while read line
#	do
#		eval v=$line #convert ${} to value
#		echo $v
#	done < $SOURCEPATH/tool.info > $workdir\/tool.info
	echo "WANDY_BIN_INFO=$binref" >> $workdir\/tool.info
fi
source $workdir\/tool.info

if [ -s "$WANDY_STATIC_BIN_REFERENCE" ];then
	singlereference=$WANDY_STATIC_BIN_REFERENCE
elif [ "$WANDY_STATIC_BIN_REFERENCE" == "NA" ];then
	echo "WARNING:WANDY_STATIC_BIN_REFERENCE is not used." 1>&2
else
	echo "WARNING:WANDY_STATIC_BIN_REFERENCE '$WANDY_STATIC_BIN_REFERENCE' does not exist, please check the path or set to 'NA' in tool.info." 1>&2
	exit
fi

singlereference=$WANDY_STATIC_BIN_REFERENCE
if [ -e $workdir\/run_info_*.ini ];then
	runinfo=`ls $workdir\/run_info_*.ini`
else # runinfo file not exist
	bin_anno="bin.annotation.file = \"$WANDY_BIN_INFO\""
	single_ref="single.ref.file=\"$singlereference\""
	default_runinfo=`ls $SOURCEPATH\/reference/run_info_*.ini`
	runinfo=$workdir\/`basename $default_runinfo`
	cat $default_runinfo | awk -v bin="$bin_anno" -v ref="$single_ref" '{
					if(index($0, "#")==1){#comment line
						print
					}else if(index($0, "bin.annotation.file")>0){
						print bin
					}else if(index($0, "single.ref.file")>0){
						print ref
					}else{
						print
					}
				}' > $runinfo
fi

sample_header="sample.ID\trun.ID\tsample.type\tcontact.person.ID\tproject.name\tbin.record.file\tsequencing.finish.time\talignment.finish.time\treported.gender\tcase.fraction"
echo -e $sample_header > sample.info
[ ! -d BinInfo ] && mkdir BinInfo
[ ! -d log ] && mkdir log


ret=0
jobids=""
echo "bam=$bam"
if [ -z $bam ];then # if no input arguement
#	echo "bam="${#bam}
	if [ ${#bam} -eq 0 ];then
		echo "You must specify a BAM file." 1>&2
	else
		echo $bam" not exists." 1>&2
	fi
	echo "for additional information, please use -h option." 1>&2
	ret=100
elif [ -f $bam ];then
	echo "input is a single file." 1>&2
	generate_single_sample_command $bam
	bambase=`basename $bam`
	sampleid=${bambase%.bam}
	if [ $WANDY_PARALLELED_MODE -eq 1 ];then
		echo "multiple threads for ${sampleid}"
			jobid=`$WANDY_QSUB ${WANDY_QSUB_PARA} -o "log/${sampleid}.o" -e "log/${sampleid}.e" -N "Wandy${sampleid}" -wd $workdir ./${sampleid}.sh`
			jobids=$(echo $jobid | cut -d' ' -f3)","$jobids
		else
			./${sampleid}.sh
		fi
	ret=$?
elif [ -d $bam ];then
	echo "input is a directory." 1>&2
	ret=101
	for b in `ls $bam/*bam`
	do
		generate_single_sample_command $b
		bambase=`basename $b`
                sampleid=${bambase%.bam}
		
		if [ $WANDY_PARALLELED_MODE -eq 1 ];then
			echo "multiple threads for ${sampleid}"
			echo "$WANDY_QSUB ${WANDY_QSUB_PARA} -o log/${sampleid}.o -e log/${sampleid}.e -N Wandy${sampleid} -wd $workdir ./${sampleid}.sh"
			jobid=`$WANDY_QSUB ${WANDY_QSUB_PARA} -o "log/${sampleid}.o" -e "log/${sampleid}.e" -N "Wandy${sampleid}" -wd $workdir ./${sampleid}.sh`
			jobids=$(echo $jobid | cut -d' ' -f3)","$jobids
		else
			./${sampleid}.sh
		fi
		ret=$?
	done
	jobids=`echo "$jobids" | sed 's/,$//'`
	echo "hold_jid="$jobids
	RPACKAGE=$SOURCEPATH\/script/r/Rlib_*/wandyCNV.loadPackage.R
	SAMPLEINFO=$workdir\/sample.info
	#RSCRIPT=`which Rscript` # must use absolute path!
	RSCRIPT="$RPATH/Rscript"
	runinfo=`ls $workdir\/run_info_*.ini`
	echo "#!/bin/bash" > summary.sh
	echo "$RSCRIPT ${SOURCEPATH}/script/r/Rcallrun.R $RPACKAGE $runinfo $SAMPLEINFO $workdir $staticbaselinefile" >> summary.sh
	chmod a+x summary.sh
	$WANDY_QSUB ${WANDY_QSUB_PARA} -o "log/summary.o" -e "log/summary.e" -N "summary" -hold_jid $jobids -wd $workdir ./summary.sh
else
	echo "something wrong...please check your input..."
	exit
fi

if [ $ret -eq 100 ];then
	echo "Wandy is done with error(s)." 1>&2
elif [ $ret -eq 101 ];then
	echo "bam file not found in $bam." >&2
else
	if [ $WANDY_PARALLELED_MODE -eq 1 ];then
		echo "Wandy jobs are submitted." 1>&2
	else
		echo "Wandy is done." 1>&2
	fi
fi
