## this is the utility function to generate your own stat reference out of certain number of samples
##
## Usage:
## 
## filtering.autosome.median.range = c(0.5, 1.5)
## filtering.autosome.min.SNR = 2
## filtering.min.SNR = 2
## RefStat(bin.annotation.file="/data2/bsi/RandD/s115463.Aneuploidy/SinlgeSampleWandy/Wandy/reference/summary_10000bin_info.txt", negC, medc.U = filtering.autosome.median.range[2], medc.L = filtering.autosome.median.range[1], SNR.cut = filtering.min.SNR, ref.file="/data5/bsi/bioinf_ext1/s200798.sequenome_analysis/Ascore/results/paper/reference/summary_10kbin_info_3_31_2017.txt")

RefStat <- function((bin.annotation.file, medc.U = 1.5, medc.L = 0.5, SNR.cut = 2, ref.file) { 
  ## single.ref.file: reference file the pipeline is defaulted
  ## negC: a bin by sample matrix of normalized coverage from N samples as referemce3
  ## medc.U: upper limit of Median of normalized coverage across N samples
  ## medc.L: lower limit of Median of normalized coverage across N samples
  ## SNR.cut: lower limit of Signal to Noise Ratio of normalized coverage across N samples
  ## ref.file : path and filename that the new reference file will be stored
  
  statf <- read.table((bin.annotation.file, sep = "\t", header = T)
                                        #1. Bins with GC%=NA
                                        #2. 0.5<med<1.5
                                        #3. SNR>=5
                                        #is.reliable.bin  is defined as GC not NA & usable.bin = 1 &
                                        #Med within range & SNR >= cut
  statf$Chr <- statf$chr
  statf$Start.pos <-  statf$start
  statf$GC.content <- statf$"GC_Content"
  statf$median.vec <- apply(negC, 1,FUN = median, na.rm = T)
  statf$MAD.vec <- apply(negC, 1,FUN = mad, na.rm = T)

  statf$SNR.vec <- statf$median.vec/(1.4826*(statf$MAD.vec + 1e-3))
  statf$is.reliable.bin <- ifelse(!is.na(statf$GC.content) &
                                  statf$"usable_bin" == 1 &
                                  statf$median.vec < medc.U &
                                  statf$median.vec > medc.L &
                                  statf$SNR.vec >= SNR.cut, 1, 0)
  ## output statf
  statf <- statf[, c("Chr", "Start.pos", "GC.content", "median.vec", "MAD.vec", "SNR.vec", "is.reliable.bin")]
  write.table(statf, ref.file, sep = "\t", row.names = F)
}

