## Raymond Moore
## 11/7/2013
## Purpose: generate all the peak count statistics
## for a single file input (macs or sicer, tabbed columns w/ header format)
## NEEDS TO exist = $ mkdir hist
stdin <- commandArgs(TRUE)
input.type <- stdin[1]
input.file <- stdin[2]
output.dir <- stdin[3]


tmp <- strsplit(input.file, split="/")[[1]]
infile.base <- if(length(tmp)>1) tmp[[length(tmp)]] else tmp
#infile.base

if(input.type == "macs"){
	print("It's Macs")
	inTable<-read.table(input.file, header=TRUE)
	
	## File header:
	## Name, peak caller, total lines, Median peak width, mean peak width, sd peak width, mean fold change, sd fold change
	sink(file = paste(output.dir, "/peak_counts.out", sep=""), append = TRUE)
	cat(infile.base, "\t", input.type, "\t", nrow(inTable), "\t",
		median( as.numeric(inTable[,4]), na.rm=TRUE), "\t", #length
		round( mean( as.numeric(inTable[,4]), na.rm=TRUE), 1), "\t", 	#length
		round( sd( as.numeric(inTable[,4]), na.rm=TRUE), 1), "\t",		#length
		round( mean( as.numeric(inTable[,8]), na.rm=TRUE), 1), "\t",	#fold_enrichment
		round( sd( as.numeric(inTable[,8]), na.rm=TRUE), 1), "\n")		#fold_enrichment
	sink()
	
	print( paste("Max Pileup: ",max(inTable[,6]), sep="") )
	
	myBreaks<-c( seq(0, 390, by = 5), max(inTable[,6]) )
	#print( myBreaks)
	## y-axis = # of Peaks/Occurances
	png( paste( output.dir, "/delivery/hist/", substr(infile.base, 1, nchar(infile.base)-4), ".png", sep = "") )
		hist( as.numeric(inTable[,6]), breaks=unique(myBreaks), col="gray",
		main=paste("Pileup Counts w/in ", substr(infile.base, 1, 15),"...", sep=""), xlab="# of Reads Used to Call a Peak", xlim=c(0,400))
	dev.off()
	
} else{
	print("It's Sicer")
	inTable<-read.table(input.file, header=FALSE)
	
	## Need to calculate peak widths.
	widths<-as.numeric(inTable[,3])-as.numeric(inTable[,2])
	
	## Name, peak caller, total lines, Median peak width, mean peak width, sd peak width, mean fold change, sd fold change
	sink(file = paste(output.dir, "/peak_counts.out", sep=""), append = TRUE)
	cat(infile.base, "\t", input.type, "\t", nrow(inTable), "\t",
		median( widths, na.rm=TRUE), "\t",	#length
		round( mean( widths, na.rm=TRUE), 1), "\t", 	#length
		round( sd( widths, na.rm=TRUE), 1), "\t",		#length
		round( mean( as.numeric(inTable[,7]), na.rm=TRUE), 1), "\t",	#fold_enrichment
		round( sd( as.numeric(inTable[,7]), na.rm=TRUE), 1), "\n")		#fold_enrichment
	sink()
	
	print( paste("Max Pileup: ",max(inTable[,4]), sep="") )
	
	myBreaks<-c( seq(0, 390, by = 5), max(inTable[,4]) )
	#print( myBreaks)
	## y-axis = # of Peaks/Occurances
	png( paste(output.dir, "/delivery/hist/", substr(infile.base, 1, nchar(infile.base)-4), ".png", sep = "") )
		hist( as.numeric(inTable[,4]), breaks=unique(myBreaks), col="gray",
		main=paste("Pileup Counts w/in ", substr(infile.base, 1, 15),"...", sep=""), xlab="# of Reads Used to Call a Peak", xlim=c(0,400))
	dev.off()
}

