#!/bin/bash

if [ "$#" -lt 2 ]; then
   echo "Usage: $0 catalog1_path catalog2_path [chr_list]" >&2
   echo "   chr_list defaults to '1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y M'"
	echo "   e.g. $0 catalog1.tsv.bgz catalog2.tsv.bgz \"1 10 12\" will compare counts catalog1.tsv.bgz and catalog2.tsv.bgz for chrs 1, 10 and 12 and report diffs in number of catalog entries"
	exit 1
fi

command -v bior_drill >/dev/null 2>&1 || { echo >&2 "Can't find bior_drill"; exit 1; }

catalog1_path=$1
shift
if [ ! -e "$catalog1_path" ]; then
   echo "catalog '$catalog1_path' doesn't exist"
   exit 1
fi

catalog2_path=$1
shift
if [ ! -e "$catalog2_path" ]; then
   echo "catalog '$catalog2_path' doesn't exist"
   exit 1
fi

chrs="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y"
if [ "$#" -ne 0 ]; then
   chrs=$1
   shift
fi

if [ "$#" -ne 0 ]; then
   echo "Extra args after chr list ($chrs) ignored"
fi

for c in $chrs; do 
   echo $c
   count1=`tabix $catalog1_path $c | wc -l`
   count2=`tabix $catalog2_path $c | wc -l` 
   if [ "$count1" -ne "$count2" ]; then
      echo "Count is different for chr $c in \"$catalog1_path\" ($count1) vs. \"$catalog2_path\" ($count2)"
   fi
done
