#!/bin/bash
### baheti.saurabh@mayo.edu
### Saurabh Baheti
### last updated: NOV 07 2012

if [ $# != 5 ]
then
	echo -e "script to recal BAM file\nUsage: ./ProcessBAM.sh <input BAM file> <sample name> <input directory> <configuraion file>"
	exit 1;
else
	set -x
	START=$(date +%s)
	bam=$1 
	sample=$2 
	recal=$3 
	config=$4
	flag=$5
	mkdir -p $recal/temp
	temp=$recal/temp
	mkdir -p $recal/logs
	logs=$recal/logs
	
	java=$( cat $config | grep '^JAVA=' | sed -e '/JAVA=/s///g')
	gatk=$( cat $config | grep '^GATK=' | sed -e '/GATK=/s///g')
	mem=$( cat $config | grep '^JVM_MEM=' |sed -e '/JVM_MEM=/s///g')
	ref=$( cat $config | grep '^REF_GENOME=' |sed -e '/REF_GENOME=/s///g')
	dbSNP=$( cat $config | grep '^dbSNP_REF=' | sed -e '/dbSNP_REF=/s///g')
	script_path=$( cat $config | grep '^SCRIPT_PATH=' | sed -e '/SCRIPT_PATH=/s///g')
    aligner=$( cat $config | grep '^ALIGNER=' |sed -e '/ALIGNER=/s///g'| tr "[A-Z]" "[a-z]" | tr ":" "\n" | head -n $flag | tail -n 1)
    
	echo -e "\n******* Recalibration $aligner BAM script started *******\n"
	$java/java $mem -Djava.io.tmpdir=$temp \
	-jar $gatk/GenomeAnalysisTK.jar \
    -R $ref \
    -I $bam \
	--knownSites $dbSNP \
    -T CountCovariates \
    -cov ReadGroupCovariate \
    -cov QualityScoreCovariate \
    -cov CycleCovariate \
    -cov DinucCovariate \
    -recalFile $recal/$sample.recal_data.csv > $logs/$sample.CountCovariates.log 2>&1 
	
	$java/java $mem -Djava.io.tmpdir=$temp \
	-jar $gatk/GenomeAnalysisTK.jar \
	-R $ref \
	-I $bam \
	-T TableRecalibration \
	--out $recal/$sample.bam \
	-recalFile $recal/$sample.recal_data.csv > $logs/$sample.TableRecalibration.log 2>&1  
	mv $recal/$sample.bai $recal/$sample.bam.bai
	rm $recal/$sample.recal_data.csv 
	
	END=$(date +%s)
	DIFF=$(( $END - $START ))
	echo "Recalibration for $sample took $DIFF seconds"
	echo -e "\n******* Recalibration $aligner BAM script completed *******\n"
fi	
