#!/bin/bash
# This script is used to visualize the peaks for both paired-end (PE) and single-end (SE) ChIP-Seq data

set -x

if [ $# != 1 ];
then
        echo "usage: mapvis.sh <config file>";
	exit
fi

# Parse run_info.txt file for variables 
PROJECT_NAME=`grep -w '^PROJECT_NAME' $1 | cut -d '=' -f2`
SEQ_DIR=`grep -w '^SEQ_DIR' $1 | cut -d '=' -f2`
SEQ_TYPE=`grep -w '^SEQ_TYPE' $1 | cut -d '=' -f2`
END1_SEQ=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^END1_SEQ/) print $2}' $1) )
END2_SEQ=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^END2_SEQ/) print $2}' $1) )
SEQ_SUFFIX=`grep -w '^SEQ_SUFFIX' $1 | cut -d '=' -f2`
WORK_DIR=`grep -w '^WORK_DIR' $1 | cut -d '=' -f2`
FILTER_TYPE=`grep -w '^FILTER_TYPE' $1 | cut -d '=' -f2`
PEAK_CALLER=`grep -w '^PEAK_CALLER' $1 | cut -d '=' -f2`
IP_FILE=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^IP_FILE/) print $2}' $1) )
INPUT_FILE=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^INPUT_FILE/) print $2}' $1) )
tool_info=`grep -w '^TOOL_INFO' $1 | cut -d '=' -f2`

# Parse tool_info.txt file for variables 
FASTQC=`grep -w '^FASTQC' $tool_info | cut -d '=' -f2`
BWA_REF=`grep -w '^BWA_REF' $tool_info | cut -d '=' -f2`
BWA_PATH=`grep -w '^BWA_PATH' $tool_info | cut -d '=' -f2`
MACS_PATH=`grep -w '^MACS_PATH' $tool_info | cut -d '=' -f2`
SICER=`grep -w '^SICER' $tool_info | cut -d '=' -f2`
SAMTOOLS=`grep -w '^SAMTOOLS' $tool_info | cut -d '=' -f2`
BEDTOOLS=`grep -w '^BEDTOOLS' $tool_info | cut -d '=' -f2`
PICARD=`grep -w '^PICARD' $tool_info | cut -d '=' -f2`
GENOME_TABLE=`grep -w '^GENOME_TABLE' $tool_info | cut -d '=' -f2`
TCLR_LIST=`grep -w '^TCLR_LIST' $tool_info | cut -d '=' -f2`
FRAGMENT_SIZE=`grep -w '^FRAGMENT_SIZE' $tool_info | cut -d '=' -f2`
STEP_SIZE=`grep -w '^STEP_SIZE' $tool_info | cut -d '=' -f2`
REMOVE_DUP=`grep -w '^REMOVE_DUP' $tool_info | cut -d '=' -f2`
SOURCE_DIR=`grep -w '^CHIPSEQ_DIR' $tool_info | cut -d '=' -f2`
IGV_REFERENCE_GENOME=`grep -w '^IGV_REFERENCE_GENOME' $tool_info | cut -d '=' -f2`
IGVTOOLS=`grep -w '^IGVTOOLS' $tool_info | cut -d '=' -f2`

# Constants used in this file
MAP_OUTDIR=$WORK_DIR/mapout

if [ ! $SGE_TASK_ID ]
then
	echo -e ""
fi

let ARRAY_JOB_INDEX=$SGE_TASK_ID-1

#################### start generate visualization files and fragment size file #############
#################### generate insert size files (only for paired-end data)
#################### generate bedgraph and wig files, normalization

SEQ1NAME=$( basename ${END1_SEQ[ARRAY_JOB_INDEX]} .${SEQ_SUFFIX} )
files=$MAP_OUTDIR"/"${SEQ1NAME}.${SEQ_TYPE}.${FILTER_TYPE}.${REMOVE_DUP}.s1.bam
RCOLOR=$( sed -n $((RANDOM%$(wc -l < ${TCLR_LIST} )+1))p ${TCLR_LIST} )
libsize=$( ${SAMTOOLS}/samtools view -c $files )

if [[ $SEQ_TYPE = "PE" ]]
then
echo -e "\n$files has $libsize non-redundant pairs of mapped reads\n"

${SAMTOOLS}/samtools view $files |awk 'BEGIN {FS="\t"; OFS="\t"} {if ((($2 ==83) || ($2 ==147)) && ($9 >0)) print $3,($4-1),($4-1+$9),".","1","-"; else if ((($2 ==83) || ($2 ==147)) && ($9 <0)) print $3,($4-1),($4-1-$9),".","1","-"; else if ((($2 ==99) || ($2 ==163)) && ($9 >0)) print $3,($4-1),($4-1+$9),".","1","+"; else if ((($2 ==99) || ($2 ==163)) && ($9 <0)) print $3,($4-1),($4-1-$9),".","1","+"}' >$files.bed

${SAMTOOLS}/samtools view $files |awk 'BEGIN {FS="\t"; OFS="\t"} {print $9}' |sed 's/-//g' |awk 'BEGIN {FS="\t";OFS="\t"} {a[$1]++}END{for(i in a){print i,a[i]}}' |sort -k1,1n >$files.size.txt

elif [[ $SEQ_TYPE = "SE" ]]
then
echo " "
echo "$files has $libsize non-redundant mapped reads "

${SAMTOOLS}/samtools view $files |awk -v step2=$FRAGMENT_SIZE 'BEGIN {FS="\t"; OFS="\t"} {if ($2 ==16) print $3,($4+ length($10) - step2),($4-1+ length($10)),".","1","-"; else if ($2 ==0) print $3,$4,($4-1+ step2),".","1","+"}' |awk 'BEGIN {FS="\t"; OFS="\t"} {if ($2 <=0) print $1,"1",$3,$4,$5,$6; else print $0}' >$files.bed

fi

echo " "
echo "##### finish generating file $files.bed"

fileName=$( basename $files .${FILTER_TYPE}.${REMOVE_DUP}.s1.bam )

$BEDTOOLS"/"genomeCoverageBed -i ${files}.bed -g $GENOME_TABLE -bg -trackline -trackopts "name=\""$fileName"_raw_bdg\" description=\""$fileName"_raw_bdg\" visibility=full color="$RCOLOR | \
tee $files.w${FRAGMENT_SIZE}.raw.bdg |grep -v "track" | \
awk -v step=${STEP_SIZE} 'BEGIN {FS="\t"; OFS="\t"} {for (i=$2;i<=$3;i=i+step) print $1,(i+1),$4}' | \
awk -v trackName=$fileName -v Tcolor=$RCOLOR 'BEGIN {FS="\t"; OFS="\t"; getline; print "track type=wiggle_0 name=\""trackName"_raw_wig\" description=\""trackName"_raw_wig\" visibility=full color="Tcolor;print "variableStep chrom="$1" span=1\n"$2,$3;id=$1; line=$0} {if ($1 != id) {print "variableStep chrom="$1" span=1\n"$2,$3; line = $0} else {print $2,$3} id=$1}' | \
awk 'BEGIN {FS="\t"; OFS="\t"; getline;print $0;id=$1; line=$0} {if ($1 != id) {print $0; line = $0; id=$1}}' >$files.s${STEP_SIZE}.raw.wig

echo " "
echo "##### finish generating file $files.w${FRAGMENT_SIZE}.raw.bdg"
echo "##### finish generating file $files.s${STEP_SIZE}.raw.wig"

# rm -f ${files}.bed

echo " "
echo "##### start normalization of raw wig and raw bedgraph files"

echo $(basename $files.w${FRAGMENT_SIZE}.raw.bdg )
echo $(basename $files.s${STEP_SIZE}.raw.wig )

#perl $SOURCE_DIR"/"bedgraph_norm.pl -d $MAP_OUTDIR -i $( echo $(basename $files.w${FRAGMENT_SIZE}.raw.bdg ) ) -c $libsize
perl $SOURCE_DIR"/"wig_norm.pl -d $MAP_OUTDIR -i $( echo $(basename $files.s${STEP_SIZE}.raw.wig ) ) -c $libsize

# Create binary TDF file from .wig
$IGVTOOLS toTDF $files.s${STEP_SIZE}_total_based_norm.wig $files.s${STEP_SIZE}_total_based_norm.tdf $IGV_REFERENCE_GENOME

if [ ! -s $files.s${STEP_SIZE}_total_based_norm.tdf ]
then
	echo -e "ERROR : $files.s${STEP_SIZE}_total_based_norm.tdf does not exist\n";
	exit 1;
fi

gzip $files.w${FRAGMENT_SIZE}.raw.bdg
gzip $files.s${STEP_SIZE}_total_based_norm.wig

echo " "
echo "##### finish normalization"

