# Jeff Nie June 2013
# run EdgeR in unix 

args =  commandArgs(TRUE)
fileName=args[1]
control=args[2]
num_of_ctrl=args[3]
case=args[4]
num_of_case=args[5]
if (file.exists(fileName) == FALSE){
  writeLines ("Usage:\nRscript edgeR_pipe.R GeneCountFilePath controlName numSamplesInControl caseName numSamplesIncase\nExample:\nRscript edgeR_pipe.R example.txt control 9 disease 3\nNote:\n1. The input file format should be geneID  control-1   control-2  .... case-1   case-2  ...\n2. GeneReport2EdgeR.pl could be used to generate the input file from GeneCount.tsv\n example:\n cat GeneCount.tsv|GeneReport2EdgeR.pl >edgeR.input.txt\n")
  quit()
}
library(edgeR)
data=read.table(file=fileName, header=TRUE)
counts=data[,-c(1,1)]
rownames(counts)=data[,1]
group <- c(rep(control, num_of_ctrl) , rep(case, num_of_case))
cds=DGEList(counts, group=group)
cds=cds[rowSums(1e+06 * cds$counts/expandAsMatrix(cds$samples$lib.size,dim(cds)) > 1) >= 3, ]
cds <- calcNormFactors( cds) 
cds$samples
png(file="MDS plot.png")
plotMDS(cds, top=10, labels=colnames(cds$counts), main="MDS Plot for Count Data", ndim=2, dim.plot=c(1,2), cex=0.7, col=NULL)
dev.off()
cds <- estimateCommonDisp( cds )
cds <- estimateTagwiseDisp( cds)
png(file="Mean Variance plot.png")
meanVarPlot <- plotMeanVar( cds , show.raw.vars=TRUE ,show.tagwise.vars=TRUE ,show.binned.common.disp.vars=FALSE ,show.ave.raw.vars=FALSE ,dispersion.method = "qcml" , NBline = TRUE ,nbins = 100 ,pch = 16 ,xlab="Mean Expression (Log10 Scale)" ,ylab = "Variance (Log10 Scale)" ,main ="Mean-Variance Plot" )
dev.off()
de.cmn <- exactTest( cds, pair = c( control , case ) ) 
de.tgw <- exactTest( cds, pair = c( control, case ) )
de.poi <- exactTest( cds , dispersion = 1e-06 , pair = c( control , case ) ) #
resultsByFC.tgw <- topTags( de.tgw , n = nrow( de.tgw$table ) , sort.by ="logFC" )$table
resultsTbl.cmn <- topTags( de.cmn , n = nrow( de.cmn$table ) )$table
resultsTbl.tgw <- topTags( de.tgw , n = nrow( de.tgw$table ) )$table
resultsTbl.poi <- topTags( de.poi , n = nrow( de.poi$table ) )$table
de.genes.cmn <- rownames( resultsTbl.cmn )[ resultsTbl.cmn$PValue <= 0.05 ]
de.genes.tgw <- rownames( resultsTbl.tgw )[ resultsTbl.tgw$PValue <= 0.05 ]
de.genes.poi <- rownames( resultsTbl.poi )[ resultsTbl.poi$PValue <= 0.05 ]
sum( de.genes.tgw[1:1000] %in% de.genes.cmn[1:1000] ) / 1000 * 100
sum( de.genes.tgw[1:10] %in% de.genes.poi[1:10] ) / 10 * 100
sum( de.genes.tgw[1:100] %in% de.genes.poi[1:100] )
sum( de.genes.tgw[1:1000] %in% de.genes.poi[1:1000] ) / 1000 * 100
# visualize expression levels for top DE genes
png(file="Differential expression_all genes plot.png")
par( mfrow=c(3 ,1) )
hist( resultsTbl.poi[de.genes.poi[1:100],"logCPM"] , breaks=100 , xlab="Log Concentration" , col="red" , freq=FALSE , main="Poisson: Top 100" )
hist( resultsTbl.cmn[de.genes.cmn[1:100],"logCPM"] , breaks=100 , xlab="Log Concentration" ,col="green" , freq=FALSE , main="Common: Top 100" )
hist( resultsTbl.tgw[de.genes.tgw[1:100],"logCPM"] , breaks=100 , xlab="Log Concentration" , col="blue" , freq=FALSE , main="Tagwise: Top 100" )
dev.off()
# MA plot showing relationship b/w concentration and FC across genes. DE genes
png(file="MA plot_all genes.png")
par( mfrow=c(2,1) )
plotSmear( cds , de.tags=de.genes.poi , main="Poisson" ,pair = c(control,case) ,cex= .35 ,xlab="Log Concentration" , ylab="Log Fold-Change" )
abline( h = c(-2, 2) , col = "dodgerblue" )
plotSmear( cds , de.tags=de.genes.tgw , main="Tagwise" ,pair = c(control,case) ,cex= .35 ,xlab="Log Concentration" , ylab="Log Fold-Change" )
abline( h = c(-2, 2) , col = "dodgerblue" )
par( mfrow=c(1,1) )
dev.off()
# same MA plot, but on top 500 DE genes
png(file="MA plot_top 500 genes.png")
par( mfrow = c(2,1) )
plotSmear( cds , de.tags=de.genes.poi[1:500] , main="Poisson" ,pair=c(control,case),cex=.5 ,xlab="Log Concentration" , ylab="Log Fold-Change" )
abline( h = c(-2, 2) , col = "dodgerblue" )
plotSmear( cds , de.tags=de.genes.tgw[1:500] , main="Tagwise" ,pair=c(control,case),cex = .5 ,xlab="Log Concentration" , ylab="Log Fold-Change" )
abline( h = c(-2, 2) , col = "dodgerblue" )
par( mfrow=c(1,1) )
dev.off()
## outputting results
# re-order count matrix to be in line with the order of results
wh.rows.tgw <- match( rownames( resultsTbl.tgw ) , rownames( cds$counts ) )
wh.rows.cmn <- match( rownames( resultsTbl.cmn ) , rownames( cds$counts ) )
head( wh.rows.tgw )
# tagwise results
combResults.tgw <- cbind( resultsTbl.tgw ,"Tgw.Disp" = cds$tagwise.dispersion[
wh.rows.tgw ] ,"UpDown.Tgw" = decideTestsDGE( de.tgw , p.value = 0.05 )[
wh.rows.tgw ] ,cds$counts[ wh.rows.tgw , ] )
combResults.cmn <- cbind( resultsTbl.cmn ,"Cmn.Disp" = cds$common.dispersion
,"UpDown.Cmn" = decideTestsDGE( de.cmn , p.value = 0.05 )[ wh.rows.cmn ]
,cds$counts[ wh.rows.cmn , ] )
# combining common and tagwise results
wh.rows <- match( rownames( combResults.cmn ) , rownames( combResults.tgw ) )

## add normalized value to the final list #

 normalized = cds$pseudo.counts 

combResults.all <- cbind( combResults.cmn[,1:4] ,combResults.tgw[wh.rows,3:4], "Cmn.Disp" = combResults.cmn[,5],"Tgw.Disp" =combResults.tgw[wh.rows,5],"UpDown.Cmn" = combResults.cmn[,6],"UpDown.Tgw" =combResults.tgw[wh.rows,6],combResults.cmn[,7:ncol(combResults.cmn)] )
head( combResults.all )
# Ouput csv tables of results
tgw_file=paste("tgw",control,"vs",case,"csv",sep=".")
common_file=paste("common",control,"vs",case,"csv",sep=".")
all_file=paste("all",control,"vs",case,"csv",sep=".")
write.table( combResults.all , file =all_file , sep = "," , row.names = TRUE)

 
