#!/bin/bash

date
echo "Run smoke tests on RCF and compare expected to actual output"
echo "For more details, see: http://cocomo/wiki/index.php/BioR/operations#Smoke_Test_on_RCF"

echo "Running BioR version: $BIOR_LITE_HOME"
whichBiorDrill=`command -v bior_drill`
echo "bior_drill coming from:  $whichBiorDrill"
echo ""

if [ -z "$RELEASE" ] ; then 
  echo "ERROR: RELEASE variable not set"
  echo "Set the RELEASE variable to the version of BioR to test"
  echo "Ex: export RELEASE=4.3.0"
  exit 1;
fi
echo ""
echo "Checking that the bior version matches the release we are testing..."
numLinesMatch=`echo "$whichBiorDrill" | grep "$RELEASE" | wc -l`
if [ $numLinesMatch == 0 ] ; then
  echo "  ERROR: You are not running the same BioR version that you are testing"
  echo "    BioR version you are running: $whichBiorDrill"
  echo "    BioR version to test:         $RELEASE"
  exit 1;
fi
# NOTE: DIR must be a directory that is accessible by the grid engine!  (NOT one of the /local2/tmp dirs)
DIR=/data5/bsi/BIOR/Deployment/v${RELEASE}
mkdir $DIR
echo "Verifying Java version being used (should be 1.6)... to ensure same order of JSON objects and precision of values"
export JAVA_HOME=/usr/local/biotools/java/jdk1.6.0_05
# Need to use the groovy from Mike's directory for now since we are using Java 6 and would otherwise receive errors
# from groovy about NIO incompatibilities (requires Java 7)
# SEE:  http://a5gocom.blogspot.com/2014/09/groovy-how-to-disable-warning-for.html
export GROOVY_HOME=/home/m054457/groovy-2.4.3/
# export GROOVY_HOME=/data5/bsi/bictools/src/groovy/2.4.7/
export PATH=$JAVA_HOME/bin:$GROOVY_HOME/bin:$PATH

if [[  `which java` =~ "/usr/local/biotools/java/jdk1.6" ]] ; then 
  echo "ok - using correct java version: `java -version`";
else
  echo "Java version should be 1.6 to ensure compatibility"
  echo "Verify that this Java directory exists and/or is the correct version: $JAVA_HOME"
  exit 1;
fi


# Make the "Temp" directory (if it doesn't already exist) and make it world-writeable so anyone can write into it
mkdir -p $DIR/Temp
chmod 777 $DIR/Temp


testToRun=$1

# Create the temp directories for "Diff" and "Actual"
TIMESTAMP=`date +"%Y_%m_%d.%H_%M_%S"`
ACTUAL_DIR=$DIR/Temp/Temp.${TIMESTAMP}.Actual
DIFF_DIR=$DIR/Temp/Temp.${TIMESTAMP}.Diff
TEMP_DIR=$DIR/Temp/Temp.${TIMESTAMP}.Blaster

mkdir -p $ACTUAL_DIR
mkdir -p $DIFF_DIR
mkdir -p $TEMP_DIR

echo "Writing actual output to: " `readlink -f $ACTUAL_DIR`
echo "Writing diff   output to: " `readlink -f $DIFF_DIR`
echo "Writing temp   output to: " `readlink -f $TEMP_DIR`

# Set the permissions to group read,write,execute, so people in biostats group can delete the temp dirs
chmod g+rwx $ACTUAL_DIR
chmod g+rwx $DIFF_DIR
chmod g+rwx $TEMP_DIR


function verifyOutput
{
  testNum=$1

  fileActual=$2
  fileExpected=$3
  diffFile=$DIFF_DIR/$testNum.diff.txt
  errFile=$DIFF_DIR/$testNum.err.txt
  # Replace "/home" with "/usr/local" since readlink retrieves the true path
  # Replace the BioR path with "PATH".  NOTE: We must use pipes instead of slashes because the path contains slashes!
  sed -i "s|/home|/usr/local|g" $fileActual
  sed -i "s|$BIOR_LITE_HOME|PATH|g" $fileActual
  groovy  diff.groovy $fileExpected  $fileActual  true  > $diffFile  2>$errFile
  ### NOTE: Can't really rely on exit code since grep'ing out all lines will return a 1
  # exitCode="$?"
  numLinesDiff=`expr $(cat $diffFile | wc -l) / 5`
  numLinesError=`expr $(cat $errFile | wc -l)`
  if [ $numLinesDiff -gt 0 ] || [ $numLinesError -gt 0 ] ; then
    echo -e "$testNum) Error comparing test:\n    $fileActual \n    $fileExpected"
    echo "# of lines Diff:  $numLinesDiff"
    echo "Errors: ============================================="
    cat $errFile
    echo "====================================================="
  else
    echo "$testNum) OK"
  fi
}


dbSnpCtg=/data5/bsi/catalogs/bior/v1/dbSNP/137/00-All_GRCh37.tsv.bgz
genes=$BIOR_CATALOG/NCBIGene/GRCh37_p13/genes.tsv.bgz


## 1)
if [ -z "$testToRun" ] || [ "$testToRun" == 1 ] ; then
  zcat $dbSnpCtg | head -100 | bior_drill -p INFO.GENEINFO  > $ACTUAL_DIR/1.actual
  verifyOutput  1  $ACTUAL_DIR/1.actual  Expected/1.expected
fi

## 2)
if [ -z "$testToRun" ] || [ "$testToRun" == 2 ] ; then
  zcat $dbSnpCtg | head -100 | bior_overlap -d $BIOR_CATALOG/NCBIGene/GRCh37_p10/genes.tsv.bgz > $ACTUAL_DIR/2.actual
  verifyOutput  2  $ACTUAL_DIR/2.actual  Expected/2.expected
fi

## 3)
if [ -z "$testToRun" ] || [ "$testToRun" == 3 ] ; then
  cat /data5/bsi/BIOR/Test/sampleVariants50.vcf | bior_vcf_to_tjson | bior_same_variant -d $dbSnpCtg | bior_drill -p _id | bior_lookup -d $dbSnpCtg -p ID > $ACTUAL_DIR/3.actual
  verifyOutput  3  $ACTUAL_DIR/3.actual  Expected/3.expected
fi

##4) bior_same_variant where there should be a hit with one alt allele that overlaps: [A,C] -- [G,C]
## From catalog:
## 1       15116256        15116257        CA      ["C","CAA","CAAA","CAAAA","CAAAAAAAAAAAAA"]
## What we'll use as input:
if [ -z "$testToRun" ] || [ "$testToRun" == 4 ] ; then
  variant="#variant\n{'_landmark':'1','_minBP':15116256,'_maxBP':15116257,'_refAllele':'CA','_altAlleles':['CAAAA','AAA']}"
  echo -e $variant | bior_same_variant  -d "$dbSnpCtg" > $ACTUAL_DIR/4.actual
  verifyOutput  4  $ACTUAL_DIR/4.actual  Expected/4.expected
fi

## 5) Drill JSON Objects (not individual key-value pairs)
if [ -z "$testToRun" ] || [ "$testToRun" == 5 ] ; then
  echo "{'INFO':{'InternalObj':{'key':'val'}}}" | bior_drill -p INFO -p INFO.InternalObj -p INFO.InternalObj.key -p INFO.bad  > $ACTUAL_DIR/5.actual
  verifyOutput  5  $ACTUAL_DIR/5.actual  Expected/5.expected
fi

## 6)
if [ -z "$testToRun" ] || [ "$testToRun" == 6 ] ; then
  cat /data5/bsi/BIOR/Test/sampleVariants50.vcf | bior_vcf_to_tjson | bior_drill -p INFO -k > $ACTUAL_DIR/6.actual
  verifyOutput  6  $ACTUAL_DIR/6.actual  Expected/6.expected
fi

## 7)
if [ -z "$testToRun" ] || [ "$testToRun" == 7 ] ; then
  cat /data5/bsi/BIOR/Test/sampleVariants50.vcf | bior_vcf_to_tjson | bior_pretty_print > $ACTUAL_DIR/7.actual
  verifyOutput  7  $ACTUAL_DIR/7.actual  Expected/7.expected
fi

## 8)
if [ -z "$testToRun" ] || [ "$testToRun" == 8 ] ; then
  cat /data5/bsi/BIOR/Test/sampleVariants50.vcf | bior_vcf_to_tjson | bior_overlap -d $dbSnpCtg | bior_tjson_to_vcf | grep -v "fileDate" > $ACTUAL_DIR/8.actual
  verifyOutput  8  $ACTUAL_DIR/8.actual  Expected/8.expected
fi

## 9)
if [ -z "$testToRun" ] || [ "$testToRun" == 9 ] ; then
  echo "9) SKIPPED (old VEP command)"
  # Use sed to convert:
  #  DataSourceProperties="/home/biotools/bior_scripts/3.0.0-beta/bior_pipeline-3.0.0-SNAPSHOT/conf/tools
  # To:
  #  DataSourceProperties="PATH/conf/tools
  # And convert
  #  ColumnProperties="/home/biotools/bior_scripts/3.0.0-beta/bior_pipeline-3.0.0-SNAPSHOT/conf/tools
  # To:
  #  ColumnProperties="PATH/conf/tools
  #echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n22\t29138293\t.\tT\tC\t.\t.\t." | bior_vep --all \
  #  | sed "s/DataSourceProperties=\"[^\"]*\/conf\/tools/DataSourceProperties=\"PATH\/conf\/tools/g"  \
  #  | sed "s/ColumnProperties=\".*\/conf\/tools/ColumnProperties=\"PATH\/conf\/tools/g" \
  #  > $ACTUAL_DIR/9.actual
  #verifyOutput  9  $ACTUAL_DIR/9.actual  Expected/9.expected
fi

##10)
if [ -z "$testToRun" ] || [ "$testToRun" == 10 ] ; then
  echo "10) SKIPPED (old VEP command)"
  # Use sed to convert:
  #  DataSourceProperties="/home/biotools/bior_scripts/3.0.0-beta/bior_pipeline-3.0.0-SNAPSHOT/conf/tools
  # To:
  #  DataSourceProperties="PATH/conf/tools
  # And convert
  #  ColumnProperties="/home/biotools/bior_scripts/3.0.0-beta/bior_pipeline-3.0.0-SNAPSHOT/conf/tools
  # To:
  #  ColumnProperties="PATH/conf/tools
  #echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n22\t29138293\t.\tT\tC\t.\t.\t." | bior_vep \
  #  | sed "s/DataSourceProperties=\"[^\"]*\/conf\/tools/DataSourceProperties=\"PATH\/conf\/tools/g"  \
  #  | sed "s/ColumnProperties=\".*\/conf\/tools/ColumnProperties=\"PATH\/conf\/tools/g" \
  #  > $ACTUAL_DIR/10.actual
  #verifyOutput 10  $ACTUAL_DIR/10.actual Expected/10.expected
fi

##11)
if [ -z "$testToRun" ] || [ "$testToRun" == 11 ] ; then
  bior_concat --nozip -o $ACTUAL_DIR/11.actual  /data5/bsi/BIOR/Vcfs/sample_rawvariants.20a.vcf  /data5/bsi/BIOR/Vcfs/sample_rawvariants.20b.vcf
  verifyOutput 11  $ACTUAL_DIR/11.actual Expected/11.expected
fi


##12)
if [ -z "$testToRun" ] || [ "$testToRun" == 12 ] ; then
  bior_merge --nozip  -o $ACTUAL_DIR/12.actual  /data5/bsi/BIOR/Vcfs/sample_rawvariants.20a.vcf  /data5/bsi/BIOR/Vcfs/sample_rawvariants.20b.vcf
  verifyOutput 12  $ACTUAL_DIR/12.actual Expected/12.expected
fi

##13)  Compare the two previous files to each other (concat vs merge)
if [ -z "$testToRun" ] || [ "$testToRun" == 13 ] ; then
  ### The headers are apparently different, so disregard
  grep -v "^##" $ACTUAL_DIR/11.actual > $ACTUAL_DIR/13a.actual
  grep -v "^##" $ACTUAL_DIR/12.actual > $ACTUAL_DIR/13b.actual
  verifyOutput 13  $ACTUAL_DIR/13a.actual $ACTUAL_DIR/13b.actual
fi

##14) bior_variant_to_tjson - from rsId and chr+pos+alleles
## From dbSNP_142_GRCh37:
## 1       10177   10177   {"ID":"rs201752861","REF":"A","ALT":"C"}
## 1       10177   10177   {"ID":"rs367896724","REF":"A","ALT":"AC"}
if [ -z "$testToRun" ] || [ "$testToRun" == 14 ] ; then
  echo -e "ID\tCHROM\tPOS\tAllele1\tAllele2\nrs201752861\n.\t1\t10177\tA\tAC" | bior_variant_to_tjson > $ACTUAL_DIR/14.actual
  verifyOutput 14 $ACTUAL_DIR/14.actual Expected/14.expected
fi

##15) bior_tjson_to_vcf - where one column is json, and all "bior." columns are added.  Already VCF, but cols added to existing INFO
if [ -z "$testToRun" ] || [ "$testToRun" == 15 ] ; then
  cat In/15.in | bior_vcf_to_tjson | bior_overlap -d $genes | bior_drill -p gene -p gene_synonym -p _strand -p GeneID -p HGNC | bior_tjson_to_vcf -k | bior_pretty_print  > $ACTUAL_DIR/15.actual
  verifyOutput 15 $ACTUAL_DIR/15.actual Expected/15.expected
fi

##16) bior_tjson_to_vcf - where column range is specified, and VCF is built from JSON
if [ -z "$testToRun" ] || [ "$testToRun" == 16 ] ; then
  cat In/16.in | bior_tjson_to_vcf -c 2 -r 2,3..4,-2..-1 --logfile $TEMP_DIR/16.log  | grep -v fileDate > $ACTUAL_DIR/16.actual
  verifyOutput 16 $ACTUAL_DIR/16.actual Expected/16.expected
fi

##17) bior_tjson_to_vcf - examples from help text  (same output for both)
if [ -z "$testToRun" ] || [ "$testToRun" == 17 ] ; then
  cat In/17.in | bior_tjson_to_vcf -r 9..10,12,13  | grep -v fileDate  > $ACTUAL_DIR/17a.actual
  verifyOutput 17a $ACTUAL_DIR/17a.actual Expected/17.expected
  cat In/17.in | bior_tjson_to_vcf -r -5..-4,-2..-1,-1,12  | grep -v fileDate  > $ACTUAL_DIR/17b.actual
  verifyOutput 17b $ACTUAL_DIR/17b.actual Expected/17.expected
fi




##==========================================================
# Medium-length tests
##==========================================================


##==========================================================
# Long running tests
##==========================================================
