#!/bin/bash
# Make sure most recent version of python (2.7.3) is on, with scipy enabled, for SICER.sh
# Make sure most recent version of python (2.7.3) is on, with scipy enabled, for noidr.sh
set -x

if [ $# != 1 ];
then
    echo "usage: noidr.sh <config file>";
	exit 1
fi

#source ~/.bash_mayobiotools

PROJECT_NAME=`grep -w '^PROJECT_NAME' $1 | cut -d '=' -f2`
SEQ_DIR=`grep -w '^SEQ_DIR' $1 | cut -d '=' -f2`
SEQ_TYPE=`grep -w '^SEQ_TYPE' $1 | cut -d '=' -f2`
END1_SEQ=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^END1_SEQ/) print $2}' $1) )
END2_SEQ=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^END2_SEQ/) print $2}' $1) )
SEQ_SUFFIX=`grep -w '^SEQ_SUFFIX' $1 | cut -d '=' -f2`
FILTER_TYPE=`grep -w '^FILTER_TYPE' $1 | cut -d '=' -f2`
PEAK_CALLER=`grep -w '^PEAK_CALLER' $1 | cut -d '=' -f2`
WORK_DIR=`grep -w '^WORK_DIR' $1 | cut -d '=' -f2`
IP_FILE=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^IP_FILE/) print $2}' $1) )
INPUT_FILE=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^INPUT_FILE/) print $2}' $1) )
PKGENE_NEIGHDIST=`grep -w '^PKGENE_NEIGHDIST' $1 | cut -d '=' -f2`
tool_info=`grep -w '^TOOL_INFO' $1 | cut -d '=' -f2`

FASTQC=`grep -w '^FASTQC' $tool_info | cut -d '=' -f2`
BWA_REF=`grep -w '^BWA_REF' $tool_info | cut -d '=' -f2`
BWA_PATH=`grep -w '^BWA_PATH' $tool_info | cut -d '=' -f2`
MACS_PATH=`grep -w '^MACS_PATH' $tool_info | cut -d '=' -f2`
SICER=`grep -w '^SICER' $tool_info | cut -d '=' -f2`
SAMTOOLS=`grep -w '^SAMTOOLS' $tool_info | cut -d '=' -f2`
BEDTOOLS=`grep -w '^BEDTOOLS' $tool_info | cut -d '=' -f2`
PICARD=`grep -w '^PICARD' $tool_info | cut -d '=' -f2`
GENOME_TABLE=`grep -w '^GENOME_TABLE' $tool_info | cut -d '=' -f2`
TCLR_LIST=`grep -w '^TCLR_LIST' $tool_info | cut -d '=' -f2`
UCSC_REF_FLAT=`grep -w '^UCSC_REF_FLAT' $tool_info | cut -d '=' -f2`
SOURCE_DIR=`grep -w '^CHIPSEQ_DIR' $tool_info | cut -d '=' -f2`
SICER_ARGS=`grep -w '^SICER_ARGS' $tool_info | cut -d '=' -f2`
REMOVE_DUP=`grep -w '^REMOVE_DUP' $tool_info | cut -d '=' -f2`
PYTHON=`grep -w '^PYTHON' $tool_info | cut -d '=' -f2`

LABLE=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^INPUT_LABLES/) print $2}' $1) )

# Constants used in this file
MAP_OUTDIR=$WORK_DIR/mapout
SICER_OUTDIR=$WORK_DIR/sicerout

mkdir -p $SICER_OUTDIR

echo "SICER is used to call peaks"

################################### call peaks using SICER
i=$SGE_TASK_ID
let j=$i-1

#### DETERMINE if lables are needed or not, bases on IP file names duplications
NUM_IP=${#IP_FILE[@]}
UNIQ_IP=($(echo "${IP_FILE[@]}" | tr ' ' '\n' | sort | uniq | tr '\n' ' '))
NUM_UNIQ_IP=${#UNIQ_IP[@]}

BASE_SUFFIX="${SEQ_TYPE}.${FILTER_TYPE}.${REMOVE_DUP}"
WITH_LABEL="${SEQ_TYPE}"
if [ "$NUM_UNIQ_IP" -lt "$NUM_IP" ]
then
	BASE_SUFFIX="${SEQ_TYPE}.${FILTER_TYPE}.${REMOVE_DUP}.${LABLE[j]}"
	WITH_LABEL="${SEQ_TYPE}.${LABLE[j]}"
fi



SEQ1NAME=$( basename $SEQ_DIR"/"${IP_FILE[j]} .${SEQ_SUFFIX} )
SEQ2NAME=$( basename $SEQ_DIR"/"${INPUT_FILE[j]} .${SEQ_SUFFIX} )

mkdir ${SICER_OUTDIR}"/"${SEQ1NAME}_TMP$i
SICER_TMPDIR=${SICER_OUTDIR}"/"${SEQ1NAME}_TMP$i


echo " "
echo "SICER is used to call peaks from $( echo $SEQ1NAME ).${SEQ_TYPE}.${FILTER_TYPE}.${REMOVE_DUP}.s1.bam" 
echo "versus $( echo $SEQ2NAME ).${SEQ_TYPE}.${FILTER_TYPE}.${REMOVE_DUP}.s1.bam, $(date)"
echo " "

$BEDTOOLS"/"bamToBed -i $MAP_OUTDIR"/"$( echo $SEQ1NAME ).${SEQ_TYPE}.${FILTER_TYPE}.${REMOVE_DUP}.s1.bam > $SICER_TMPDIR"/"$( echo $SEQ1NAME ).${BASE_SUFFIX}.s1.bed

$BEDTOOLS"/"bamToBed -i $MAP_OUTDIR"/"$( echo $SEQ2NAME ).${SEQ_TYPE}.${FILTER_TYPE}.${REMOVE_DUP}.s1.bam > $SICER_TMPDIR"/"$( echo $SEQ2NAME ).${BASE_SUFFIX}.s1.bed

cd ${SICER_TMPDIR}

### NOTE: This part below generates intermediate chr* files in directory called from
if [[ ! -z $PYTHON ]]
then
	PATH=$PYTHON:$PATH $SICER"/"SICER.sh $SICER_TMPDIR $( echo $SEQ1NAME ).${BASE_SUFFIX}.s1.bed $( echo $SEQ2NAME ).${BASE_SUFFIX}.s1.bed ${SICER_TMPDIR} $SICER_ARGS
else
	$SICER"/"SICER.sh $SICER_TMPDIR $( echo $SEQ1NAME ).${BASE_SUFFIX}.s1.bed $( echo $SEQ2NAME ).${BASE_SUFFIX}.s1.bed ${SICER_TMPDIR} $SICER_ARGS
fi


#### new code
#### 10/14/2013
perl -i -F"\t" -lane 'if($F[6] >=1.95){print join("\t",@F)}' $SICER_TMPDIR"/"$( echo $SEQ1NAME ).${BASE_SUFFIX}.s1-W*-G*-islands-summary-FDR*[0-9]

TEMP_FILE=$( ls $SICER_TMPDIR"/"$( echo $SEQ1NAME ).${BASE_SUFFIX}.s1-W*-G*-FDR*[0-9]-island.bed |awk '{print $NF}' )

rm $TEMP_FILE

cut -f 1-4 $SICER_TMPDIR"/"$( echo $SEQ1NAME ).${BASE_SUFFIX}.s1-W*-G*-islands-summary-FDR*[0-9] > $TEMP_FILE
#### end of new code



Peak_Label=$( echo $SEQ1NAME ).${WITH_LABEL}_sicer_
TRACK_NAME=$(awk -v trackName=${Peak_Label}peaks.bed 'BEGIN {print "track name=\""trackName"\" description=\""trackName"\""}' )
perl -p -i -e "s#^chr#${TRACK_NAME}\nchr# if $. == 1" $SICER_TMPDIR"/"$( echo $SEQ1NAME ).${BASE_SUFFIX}.s1-W*-G*-FDR*[0-9]-island.bed

rm -f $SICER_TMPDIR"/"$( echo $SEQ1NAME ).${BASE_SUFFIX}.s1.bed
rm -f $SICER_TMPDIR"/"$( echo $SEQ2NAME ).${BASE_SUFFIX}.s1.bed

awk '{print $1"\t"$2"\t"$3"\t"$8}' $SICER_TMPDIR"/"$( echo $SEQ1NAME ).${BASE_SUFFIX}.s1-W*-G*-islands-summary-FDR*[0-9] > $SICER_TMPDIR"/"peaksBED.tmp
$SOURCE_DIR"/"find_nearby_genes.pl $SICER_TMPDIR"/"peaksBED.tmp $UCSC_REF_FLAT $SICER_TMPDIR"/"$( echo $SEQ1NAME ).${WITH_LABEL}_peak_vs_gene.xls $PKGENE_NEIGHDIST 

rm -f $SICER_TMPDIR"/"peaksBED.tmp

mv *-island.bed ${SICER_OUTDIR}
mv *islands-summary ${SICER_OUTDIR}
mv *islands-summary-FDR*[0-9] ${SICER_OUTDIR}
mv *_peak_vs_gene.xls ${SICER_OUTDIR}

echo " "
echo "Finish SICER peak calling for $( echo $SEQ1NAME ).${SEQ_TYPE}.${FILTER_TYPE}.${REMOVE_DUP}.s1.bam" 
echo "versus $( echo $SEQ2NAME ).${SEQ_TYPE}.${FILTER_TYPE}.${REMOVE_DUP}.s1.bam, $(date)"
echo " "




