#!/bin/bash

set -x

# Parse run_info.txt file
PROJECT_NAME=`grep -w '^PROJECT_NAME' $1 | cut -d '=' -f2`
SEQ_DIR=`grep -w '^SEQ_DIR' $1 | cut -d '=' -f2`
SEQ_TYPE=`grep -w '^SEQ_TYPE' $1 | cut -d '=' -f2`
END1_SEQ=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^END1_SEQ/) print $2}' $1) )
END2_SEQ=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^END2_SEQ/) print $2}' $1) )
SEQ_SUFFIX=`grep -w '^SEQ_SUFFIX' $1 | cut -d '=' -f2`
WORK_DIR=`grep -w '^WORK_DIR' $1 | cut -d '=' -f2`
PEAK_CALLER=`grep -w '^PEAK_CALLER' $1 | cut -d '=' -f2`
IP_FILE=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^IP_FILE/) print $2}' $1) )
INPUT_FILE=( $(awk 'BEGIN {FS="="} {if ($1 ~ /^INPUT_FILE/) print $2}' $1) )
PKGENE_NEIGHDIST=`grep -w '^PKGENE_NEIGHDIST' $1 | cut -d '=' -f2`
tool_info=`grep -w '^TOOL_INFO' $1 | cut -d '=' -f2`

# Parse tool_info.txt file
FASTQC=`grep -w '^FASTQC' $tool_info | cut -d '=' -f2`
BWA_REF=`grep -w '^BWA_REF' $tool_info | cut -d '=' -f2`
BWA_PATH=`grep -w '^BWA_PATH' $tool_info | cut -d '=' -f2`
MACS_PATH=`grep -w '^MACS_PATH' $tool_info | cut -d '=' -f2`
SICER=`grep -w '^SICER' $tool_info | cut -d '=' -f2`
SAMTOOLS=`grep -w '^SAMTOOLS' $tool_info | cut -d '=' -f2`
BEDTOOLS=`grep -w '^BEDTOOLS' $tool_info | cut -d '=' -f2`
PICARD=`grep -w '^PICARD' $tool_info | cut -d '=' -f2`
GENOME_TABLE=`grep -w '^GENOME_TABLE' $tool_info | cut -d '=' -f2`
TCLR_LIST=`grep -w '^TCLR_LIST' $tool_info | cut -d '=' -f2`
UCSC_REF_FLAT=`grep -w '^UCSC_REF_FLAT' $tool_info | cut -d '=' -f2`
SOURCE_DIR=`grep -w '^CHIPSEQ_DIR' $tool_info | cut -d '=' -f2`
IDR_ARGS=`grep -w '^IDR_ARGS' $tool_info | cut -d '=' -f2`
IDR_CUTOFF=`grep -w '^IDR_CUTOFF' $tool_info | cut -d '=' -f2`

# Constants
MACS2_OUTDIR=$WORK_DIR/macs2out
IDR_OUTDIR=$WORK_DIR/idrout

if [[ $PEAK_CALLER = "macs2idr" && ${#IP_FILE[@]} -ge 2 ]] 
then
	echo " "
	echo "start to sort macs2 output peak files, $(date)"

	for files in $MACS2_OUTDIR"/"*_macs2_peaks.encodePeak
	do
		grep -v "track" $files |sort -k8,8nr > $files.sorted
		mv $files.sorted $IDR_OUTDIR
	done
fi

### IDR consistency analysis between biological replicates
### use sorted MACS2 peak files as input

echo " "
echo "start IDR consistency analysis for biological replicates, $(date)"

Rscript $SOURCE_DIR"/"batch-consistency-analysis.r \
${IDR_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[0]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak.sorted \
${IDR_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[1]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak.sorted \
-1 ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_r12 $IDR_ARGS --genometable=$GENOME_TABLE

if [[ ${#IP_FILE[@]} -eq 3 ]]
then
	Rscript ${SOURCE_DIR}"/"batch-consistency-analysis.r \
	${IDR_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[0]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak.sorted \
	${IDR_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[2]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak.sorted \
	-1 ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_r13 $IDR_ARGS --genometable=$GENOME_TABLE

	Rscript ${SOURCE_DIR}"/"batch-consistency-analysis.r \
	${IDR_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[1]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak.sorted \
	${IDR_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[2]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak.sorted \
	-1 ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_r23 $IDR_ARGS --genometable=$GENOME_TABLE
fi

### IDR consistency analysis between pseudo replicates for merged IP and for each IP
### use sorted MACS2 peaks files as input

echo " "
echo "start IDR consistency analysis for pseudo replicates of each IP and merged IP, $(date)"

Rscript ${SOURCE_DIR}"/"batch-consistency-analysis.r \
${IDR_OUTDIR}"/"${PROJECT_NAME}.${SEQ_TYPE}_r0pr1_macs2_peaks.encodePeak.sorted \
${IDR_OUTDIR}"/"${PROJECT_NAME}.${SEQ_TYPE}_r0pr2_macs2_peaks.encodePeak.sorted \
-1 ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_r00 $IDR_ARGS --genometable=$GENOME_TABLE

# TODO Potential for rewrite or parallelization
for ((j=0;j<${#IP_FILE[@]};j=j+1))
do
	FileNum=$(($j + 1))
	SEQ1NAME=$( basename $SEQ_DIR"/"${IP_FILE[j]} .${SEQ_SUFFIX} )

	Rscript ${SOURCE_DIR}"/"batch-consistency-analysis.r \
	${IDR_OUTDIR}"/"$( echo $SEQ1NAME ).${SEQ_TYPE}_r1pr1_macs2_peaks.encodePeak.sorted \
	${IDR_OUTDIR}"/"$( echo $SEQ1NAME ).${SEQ_TYPE}_r1pr2_macs2_peaks.encodePeak.sorted \
	-1 ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_r${FileNum}0 $IDR_ARGS --genometable=$GENOME_TABLE
done

### IDR plot for all *-uri.sav files
### use output files from batch-consistency-analysis.r

echo " "
echo "start idr plot for project ${PROJECT_NAME}, $(date)"

FileNum=$( echo ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-2][2-3]-uri.sav ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-3]0-uri.sav \
${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r00-uri.sav |tr -s " " "\n" |wc -l )

FileList=$( echo ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-2][2-3]-uri.sav ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-3]0-uri.sav \
${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r00-uri.sav |sed 's/-uri.sav//g' )

FileList2=$( echo ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-2][2-3]-uri.sav ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-3]0-uri.sav \
${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r00-uri.sav |sed 's/-uri.sav//g' | sed "s#${IDR_OUTDIR}/##g" )

echo "$FileNum files are used for the consistency plot, which are: $FileList2"

Rscript $SOURCE_DIR"/"batch-consistency-plot.r $( echo $FileNum ) ${IDR_OUTDIR}"/"${PROJECT_NAME}_${#IP_FILE[@]}replicates_ $( echo $FileList )

### start extract peaks based on idr cutoff
### extract both conservative peaks and optimal peaks from merged IP

echo " "
echo "start to extract a subset of peaks based on idr cutoff, $(date)"

TopN1=$( paste $( echo ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-2][2-3]-npeaks-aboveIDR.txt ) |tr -s " " "\t" | \
awk '{for (i=4; i<=NF; i=i+4) printf("%d\t",$i);print "\n"}' | awk '/./' |awk 'BEGIN {FS="\t"; OFS="\t"} {printf("%.2f\t",NR/100); print $0}' | \
awk -v idr=${IDR_CUTOFF} 'BEGIN {FS="\t"; OFS="\t"} {if ($1 == idr) print $0}' |tr -s "\t" "\n" |sort -k1,1n |tail -n 1 )

TopN2=$( paste $( echo ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-2][2-3]-npeaks-aboveIDR.txt \
${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r00-npeaks-aboveIDR.txt ) |tr -s " " "\t" |awk ' {for (i=4; i<=NF; i=i+4) printf("%d\t",$i);print "\n"}' | \
awk '/./' | awk 'BEGIN {FS="\t"; OFS="\t"} {printf("%.2f\t",NR/100); print $0}' | \
awk -v idr=${IDR_CUTOFF} 'BEGIN {FS="\t"; OFS="\t"} {if ($1 == idr) print $0}' |tr -s "\t" "\n" |sort -k1,1n |tail -n 1 )

if [[ $TopN1 -eq $TopN2 ]]
then
	echo " "
	echo "start extract $TopN1 conservative peaks from merged IP vs. merged input at IDR=$IDR_CUTOFF"

	head -n $TopN1 ${IDR_OUTDIR}"/"${PROJECT_NAME}.${SEQ_TYPE}_r0pr0_macs2_peaks.encodePeak.sorted | sort -k1,1 -k2,2n -k3,3n > \
	${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr${IDR_CUTOFF}_top${TopN1}_macs2_peaks.encodePeak

elif [[ $TopN1 -ne $TopN2 ]]
then
	echo " "
	echo "start to extract $TopN1 conservative peaks and $TopN2 optimal peaks from merged IP vs. merged input at IDR=$IDR_CUTOFF"

	head -n $TopN1 ${IDR_OUTDIR}"/"${PROJECT_NAME}.${SEQ_TYPE}_r0pr0_macs2_peaks.encodePeak.sorted | sort -k1,1 -k2,2n -k3,3n > \
	${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr${IDR_CUTOFF}_top${TopN1}_macs2_peaks.encodePeak

	head -n $TopN2 ${IDR_OUTDIR}"/"${PROJECT_NAME}.${SEQ_TYPE}_r0pr0_macs2_peaks.encodePeak.sorted | sort -k1,1 -k2,2n -k3,3n > \
	${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr${IDR_CUTOFF}_top${TopN2}_macs2_peaks.encodePeak
fi

### start to add idr value to the peaks from biological replicates

echo " "
echo "start to add idr to the peaks from biological replicates, $(date)"

if [[ ${#IP_FILE[@]} -ge 2 ]]
then
      awk 'BEGIN {FS="\t"; OFS="\t"} {print $2,$3,$4,$5,$11,"+"}' ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_r12-overlapped-peaks.txt |\
      grep -v "start" > ${IDR_OUTDIR}"/"${PROJECT_NAME}.temp.idr1.txt

      awk 'BEGIN {FS="\t"; OFS="\t"} {print $6,$7,$8,$9,$11,"+"}' ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_r12-overlapped-peaks.txt |\
      grep -v "start" > ${IDR_OUTDIR}"/"${PROJECT_NAME}.temp.idr2.txt

     ${BEDTOOLS}"/"intersectBed  -a ${MACS2_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[0]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak -b \
     ${IDR_OUTDIR}"/"${PROJECT_NAME}.temp.idr1.txt -bed -wa -wb -f 0.5 |cut -f 1-3,15 > ${MACS2_OUTDIR}"/"${PROJECT_NAME}.temp.idr1.encodePeak

     ${BEDTOOLS}"/"intersectBed  -a ${MACS2_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[1]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak -b \
     ${IDR_OUTDIR}"/"${PROJECT_NAME}.temp.idr2.txt -bed -wa -wb -f 0.5 |cut -f 1-3,15 > ${MACS2_OUTDIR}"/"${PROJECT_NAME}.temp.idr2.encodePeak

     awk -v file=${MACS2_OUTDIR}"/"${PROJECT_NAME}.temp.idr1.encodePeak 'BEGIN { while (getline < file) { f[$1"\t"$2"\t"$3] = $4 } } {OFS="\t"} \
     {if (f[$1"\t"$2"\t"$3] ~/[0-9]/) print $0, f[$1"\t"$2"\t"$3]; else  print $0,"1"}' \
     ${MACS2_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[0]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak | \
     awk 'BEGIN {FS="\t"; OFS="\t"} {$4=$11; print}' |cut -f 1-10 > \
     ${IDR_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[0]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_macs2_idr_peaks.encodePeak

     awk -v file=${MACS2_OUTDIR}"/"${PROJECT_NAME}.temp.idr2.encodePeak 'BEGIN { while (getline < file) { f[$1"\t"$2"\t"$3] = $4 } } {OFS="\t"} \
     {if (f[$1"\t"$2"\t"$3] ~/[0-9]/) print $0, f[$1"\t"$2"\t"$3]; else  print $0,"1"}' \
     ${MACS2_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[1]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_r1pr0_macs2_peaks.encodePeak | \
     awk 'BEGIN {FS="\t"; OFS="\t"} {$4=$11; print}' |cut -f 1-10 > \
     ${IDR_OUTDIR}"/"$( echo $( basename $SEQ_DIR"/"${IP_FILE[1]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_macs2_idr_peaks.encodePeak

     rm -f ${IDR_OUTDIR}"/"${PROJECT_NAME}.temp.idr[1-2].txt
     rm -f ${MACS2_OUTDIR}"/"${PROJECT_NAME}.temp.idr[1-2].encodePeak
fi

### IDR summary for comparison between true biological replicates
### set IDR_cutoff in run-info file
# TODO MTK: is the IDR comment above correct? The value is actually 0.05, as far as I can tell.

echo " "
echo "start to generate two IDR analysis summary files, $(date)"

paste $( ls $IDR_OUTDIR"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-3]0-npeaks-aboveIDR.txt |awk 'OFS=" "{print $NF}' |sort |tr -s "\n" " " |tr -s " " "\t" ) |awk 'BEGIN {FS="\t"; OFS="\t"} {for (i=4; i<=NF; i=i+4) printf("%d\t",$i);print "\n"}' |awk '/./' |awk 'BEGIN {FS="\t"; OFS="\t"} {printf("%.2f\t",NR/100); print $0}' | awk -v idr=${IDR_CUTOFF} 'BEGIN {FS="\t"; OFS="\t"} {if ($1 == idr) print $0}' |awk 'BEGIN {OFS="\t"; print "\n\n###### summary of IDR analysis between biological replicates\n# For comparison between pairs of replicates, as listed in columns 2 and 4, respectively\n# Columns 3 is the number of shared peaks between pseudo-replicates from the biological replicate shown in column 2\n# Columns 5 is the number of shared peaks between pseudo-replicates from biological replicate shown in column 4\n# The last column is their ratio (column 3/column 5), which should be between 0.5 and 2\n"; print "#IDR_cutoff","Replicate_A","Replicate_A_peak","Replicate_B","Replicate_B_peak","Peak_ratio"} {if (NF == 3) printf("%.2f\t%s\t%d\t%s\t%d\t%.2f\n",$1,"Replicate1",$2,"Replicate2",$3,$2/$3); else if (NF == 4) printf("%.2f\t%s\t%d\t%s\t%d\t%.2f\n%.2f\t%s\t%d\t%s\t%d\t%.2f\n%.2f\t%s\t%d\t%s\t%d\t%.2f\n",$1,"Replicate1",$2,"Replicate2",$3,$2/$3,$1,"Replicate1",$2,"Replicate3",$4,$2/$4,$1,"Replicate2",$3,"Replicate3",$4,$3/$4)}' > ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_summary.txt

paste $( ls $IDR_OUTDIR"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r00-npeaks-aboveIDR.txt ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-2][2-3]-npeaks-aboveIDR.txt |awk 'OFS=" "{print $NF}' |tr -s "\n" " ") |tr -s " " "\t" |awk 'BEGIN {FS="\t"; OFS="\t"} {for (i=4; i<=NF; i=i+4) printf("%d\t",$i);print "\n"}' |awk '/./' |awk 'BEGIN {FS="\t"; OFS="\t"} {printf("%.2f\t",NR/100); print $0}' | awk -v idr=${IDR_CUTOFF} 'BEGIN {FS="\t"; OFS="\t"} {if ($1 == idr) print $0}' |awk 'BEGIN {OFS="\t"; print "\n\n###### summary of IDR analysis for pooled IP and individual IPs\n# Column 2 is the number of shared peaks called from pseudo-replicates of pooled IP\n# Column 4 is the number of shared peaks called between pairs of biological replicates as shown in column 3\n# The last column is their ratio at the given IDR cutoff, which should not exceed 2\n"; print "#IDR_cutoff","PooledIP_peak","Comparison","Replicate_shared_peak","Peak_ratio"} {if (NF == 3) printf("%.2f\t%d\t%s\t%d\t%.2f\n",$1,$2,"Replicate1_vs_2",$3,$2/$3); else if (NF == 5) printf("%.2f\t%d\t%s\t%d\t%.2f\n%.2f\t%d\t%s\t%d\t%.2f\n%.2f\t%d\t%s\t%d\t%.2f\n",$1,$2,"Replicate1_vs_2",$3,$2/$3,$1,$2,"Replicate1_vs_3",$4,$2/$4,$1,$2,"Replicate2_vs_3",$5,$2/$5)}'  >> ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_summary.txt

echo " " >>$IDR_OUTDIR"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_summary.txt
echo "###### Number of reliable peaks" >>${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_summary.txt
echo "there are $TopN1 conservative peaks and $TopN2 optimal peaks from merged IP vs. merged input at IDR=$IDR_CUTOFF" >>${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_summary.txt
echo " " >>$IDR_OUTDIR"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_summary.txt

ls $IDR_OUTDIR"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-2][2-3]-npeaks-aboveIDR.txt $IDR_OUTDIR"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-3]0-npeaks-aboveIDR.txt $IDR_OUTDIR"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r00-npeaks-aboveIDR.txt |awk 'OFS=" "{print $NF}' |sed 's/-npeaks-aboveIDR.txt//g' |sed "s#${IDR_OUTDIR}/##g" |tr -s "\n" "\t" |awk 'BEGIN {FS="\t"; OFS="\t"; print "\n\n# Columns with idr_r12, idr_r13, or idr_r23 are number of shared peaks between biological replicates\n# idr_r12 indicates comparison between replicates 1 and 2, idr_r13 indicates comparison between replicates 1 and 3, etc\n# Columns with idr_r10, idr_r20, or idr_r30 are number of shared peaks between pseudo replicates from each biological replicate\n# Column with idr_r00 is number of shared peaks between pseudo replicates from pooled IP\n# If two replicates, only idr_r12, idr_r10, idr_r20 and idr_r00 exist\n"} {print "idr",$0}' > ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_combined.txt

paste $( ls ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-2][2-3]-npeaks-aboveIDR.txt ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r[1-3]0-npeaks-aboveIDR.txt ${IDR_OUTDIR}"/"*${PROJECT_NAME}_${SEQ_TYPE}_idr_r00-npeaks-aboveIDR.txt |awk 'OFS=" " {print $NF}' |tr -s "\n" " ") |tr -s " " "\t" |awk '{for (i=4; i<=NF; i=i+4) printf("%d\t",$i);print "\n"}' | awk '/./' |awk 'BEGIN {FS="\t"; OFS="\t"} {printf("%.2f\t",NR/100); print $0}' >> ${IDR_OUTDIR}"/"${PROJECT_NAME}_${SEQ_TYPE}_idr_combined.txt

awk '{print $1"\t"$2"\t"$3"\t"$4}' $IDR_OUTDIR"/"$( echo $( basename ${IP_FILE[0]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_macs2_idr_peaks.encodePeak > $IDR_OUTDIR"/"$( echo $( basename ${IP_FILE[0]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_peaksBED.tmp
$SOURCE_DIR"/"find_nearby_genes.pl $IDR_OUTDIR"/"$( echo $( basename ${IP_FILE[0]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_peaksBED.tmp $UCSC_REF_FLAT $IDR_OUTDIR"/"$( echo $( basename ${IP_FILE[0]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_peak_vs_gene.xls $PKGENE_NEIGHDIST
awk '{print $1"\t"$2"\t"$3"\t"$4}' $IDR_OUTDIR"/"$( echo $( basename ${IP_FILE[1]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_macs2_idr_peaks.encodePeak > $IDR_OUTDIR"/"$( echo $( basename ${IP_FILE[1]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_peaksBED.tmp
$SOURCE_DIR"/"find_nearby_genes.pl $IDR_OUTDIR"/"$( echo $( basename ${IP_FILE[1]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_peaksBED.tmp $UCSC_REF_FLAT $IDR_OUTDIR"/"$( echo $( basename ${IP_FILE[1]} .${SEQ_SUFFIX} ) ).${SEQ_TYPE}_peak_vs_gene.xls $PKGENE_NEIGHDIST
rm -f $IDR_OUTDIR"/*"peaksBED.tmp

echo " "
echo "Finish generate two IDR analysis summary files: ${PROJECT_NAME}_${SEQ_TYPE}_idr_summary.txt and ${PROJECT_NAME}_${SEQ_TYPE}_idr_combined.txt"
echo "Complete IDR analysis for project $PROJECT_NAME, $(date)"
echo " "

