#!/bin/bash

########################################################
######  ALIGNMENT CALLER TO IDENTIFY CIRCULAR RNA

######          Program:                        circRNA_detection.sh
######          Date:                           01/11/2016
######          Summary:                        Extract unmapped reads, identify 20 mer anchors, run bowtie2 and find_circ.py
########################################################

if [ $# != 2 ]
then
	echo -e "Usage: Identify circRNAs\n circRNA_detection.sh \n
	1. full path to config file\n
	2. sample name\n"
else
	set -x
	echo `date`
	config=$1
	sample=$2
	
	bam=$( cat $config | grep -w '^BAM_DIR' | cut -d '=' -f2)
	circ_dir=$( cat $config | grep -w '^CIRC_DIR' | cut -d '=' -f2)
	threads=$( cat $config | grep -w '^THREADS' | cut -d '=' -f2)
	queue=$( cat $config | grep -w '^QUEUE' | cut -d '=' -f2)
	bowtie_index_base=$( cat $config | grep -w '^REF_BOWTIE' | cut -d '=' -f2)
	reference_fa_dir=$( cat $config | grep -w '^REF_GENOME_DIR' | cut -d '=' -f2)
	samtools=$( cat $config | grep -w '^SAMTOOLS' | cut -d '=' -f2)
	python=$( cat $config | grep -w '^PYTHON' | cut -d '=' -f2)
	bowtie2=$( cat $config | grep -w '^BOWTIE' | cut -d '=' -f2)	
	script_path=$( cat $config | grep -w '^WORKFLOW_PATH' | cut -d '=' -f2)
	anchor_size=$( cat $config | grep -w '^ANCHOR_SIZE' | cut -d '=' -f2)

	bam_dir=$bam

		# STEP1: extract 20-mers from both ends
		mkdir $circ_dir/$sample
		sample_dir=$circ_dir/$sample
#		$python/python $script_path/unmapped2anchors.py -a $anchor_size $bam_dir/$sample.unmapped.bam | gzip > $sample_dir/$sample.unmapped.anchors.gz
		$python $script_path/unmapped2anchors.py -a $anchor_size $bam_dir/$sample.unmapped.bam | gzip > $sample_dir/$sample.unmapped.anchors.gz

		# STEP2: align these 20-mers back to genome, preserving the paired ordering
#		$bowtie2/bowtie2 --reorder --mm -D$anchor_size -p $threads --score-min=C,-15,0 -q -x $bowtie_index_base -U $sample_dir/$sample.unmapped.anchors.gz | $python/python $script_path/find_circ.py -G $reference_fa_dir -p ${sample}_ -s $sample_dir/$sample.sites.log > $sample_dir/$sample.sites.bed 2 > $sample_dir/$sample.sites.reads
		$bowtie2/bowtie2 --reorder --mm -D$anchor_size -p $threads --score-min=C,-15,0 -q -x $bowtie_index_base -U $sample_dir/$sample.unmapped.anchors.gz | $python $script_path/find_circ.py -G $reference_fa_dir -p ${sample}_ -s $sample_dir/$sample.sites.log > $sample_dir/$sample.sites.bed 2 > $sample_dir/$sample.sites.reads

#		STEP3: filter down the identified circRNA based on read depth
		cat $sample_dir/$sample.sites.reads | grep circ > $sample_dir/$sample.circRNA.reads
		cat $sample_dir/$sample.circRNA.reads | awk '{if ($5 >= 5) print}' > $sample_dir/$sample.circRNA.expressed.reads
#		convert to junctions,bed format
		cat $sample_dir/$sample.circRNA.expressed.reads | sort -k 1,1 -k 2,2n -k 3,3n > $sample_dir/$sample.circRNA.expressed.sorted.reads
		cat $sample_dir/$sample.circRNA.expressed.sorted.reads | awk '$0 !~ /#/'| cut -f1,2,3,4,5,6 >> $sample_dir/$sample.circRNA.junctions.bed
#		convert to BED format
	echo `date`
fi	
