setwd('~/Dropbox/Workspace/MayoClinic/2015_11_30_Jeff_R1Paired/')
source('~/Dropbox/Workspace/MayoClinic/Stats.R')

load_package()

load('./Data/Data.RData')

df <- data.list[[1]]$meta.dat
df$PREPARATION <- as.character(df$PREPARATION)
df$PREPARATION[df$PREPARATION == 'UNKNOWN'] <- ''
df$PREPARATION <- gsub('-Treatment', '', df$PREPARATION)
df$Treatment7 <- factor(paste0(df$TREATMENT, df$PREPARATION))
# Major variable of interest: "Treatment7", "SAMPLE_VISIT", "HOST_SUBJECT_ID"
# Gold stanadard
data.obj.G <- data.list[['Paired100']]
dist.obj.G <- dist.list[['Paired100']] 
alpha.obj.G <- list()
alpha.obj.G[['Richness']] <- colSums(data.obj.G$otu.tab != 0)
alpha.obj.G[['Shannon']] <- vegan::diversity(data.obj.G$otu.tab, MARGIN=2)

phy.abund.G <- data.obj.G$abund.list[['Phylum']]
phy.abund.G <- t(t(phy.abund.G) / colSums(phy.abund.G))

gen.abund.G <- data.obj.G$abund.list[['Genus']]
gen.abund.G <- t(t(gen.abund.G) / colSums(gen.abund.G))

data.obj.R1 <- data.list[['R1']]
dist.obj.R1 <- dist.list[['R1']] 
alpha.obj.R1 <- list()
alpha.obj.R1[['Richness']] <- colSums(data.obj.R1$otu.tab != 0)
alpha.obj.R1[['Shannon']] <- vegan::diversity(data.obj.R1$otu.tab, MARGIN=2)

phy.abund.R1 <- data.obj.R1$abund.list[['Phylum']]
phy.abund.R1 <- t(t(phy.abund.R1) / colSums(phy.abund.R1))

gen.abund.R1 <- data.obj.R1$abund.list[['Genus']]
gen.abund.R1 <- t(t(gen.abund.R1) / colSums(gen.abund.R1))

##########################################################################
# Correlation analysis
dist.names <- c('UniFrac', 'WUniFrac', 'BC')
alpha.names <- c('Richness', 'Shannon')
phy.names <- c("Actinobacteria", "Bacteroidetes", "Firmicutes", "Proteobacteria")
gen.names <- rownames(gen.abund.G)[rowMeans(gen.abund.G != 0) > 0.1]


for (remain in c('025', '050', '075')) {
	gen.names1 <- rownames(data.list[[paste0('Paired', remain)]]$abund.list[['Genus']])
	gen.names2 <- rownames(data.list[[paste0('PairedR1', remain)]]$abund.list[['Genus']])
	gen.names <- intersect(gen.names, gen.names1)
	gen.names <- intersect(gen.names, gen.names2)
}

alpha.cor.list <- array(NA, c(3, 2, 3), dimnames=list(Method=c('R1', 'Paired', 'PairedR1'), Measure=alpha.names, Remain=c('025', '050', '075')) )
dist.cor.list <- array(NA, c(3, 3, 3), dimnames=list(Method=c('R1', 'Paired', 'PairedR1'), Measure=dist.names, Remain=c('025', '050', '075')) )
phy.cor.list <- array(NA, c(3, 4, 3), dimnames=list(Method=c('R1', 'Paired', 'PairedR1'), Measure=phy.names, Remain=c('025', '050', '075')) )
gen.cor.list <- array(NA, c(3, length(gen.names), 3), dimnames=list(Method=c('R1', 'Paired', 'PairedR1'), Measure=gen.names, Remain=c('025', '050', '075')) )
		
for (remain in c('025', '050', '075')) {
	cat(remain, '\n')
	
	data.obj.R12 <- data.list[[paste0('Paired', remain)]] 
	dist.obj.R12 <- dist.list[[paste0('Paired', remain)]] 
	alpha.obj.R12 <- list()
	alpha.obj.R12[['Richness']] <- colSums(data.obj.R12$otu.tab != 0)
	alpha.obj.R12[['Shannon']] <- vegan::diversity(data.obj.R12$otu.tab, MARGIN=2)
	phy.abund.R12 <- data.obj.R12$abund.list[['Phylum']]
	phy.abund.R12 <- t(t(phy.abund.R12) / colSums(phy.abund.R12))
	gen.abund.R12 <- data.obj.R12$abund.list[['Genus']]
	gen.abund.R12 <- t(t(gen.abund.R12) / colSums(gen.abund.R12))
	
	for (alpha.name in alpha.names) {
		alpha.cor.list['Paired', alpha.name, remain] <- cor(alpha.obj.R12[[alpha.name]], alpha.obj.G[[alpha.name]])
	}

	for (dist.name in dist.names) {
		dist.mat1 <- as.matrix(dist.obj.G[[dist.name]])
		dist.mat2 <- as.matrix(dist.obj.R12[[dist.name]])
		dist.mat1 <- dist.mat1[lower.tri(dist.mat1)]
		dist.mat2 <- dist.mat2[lower.tri(dist.mat2)]
		dist.cor.list['Paired', dist.name, remain] <- cor(dist.mat1, dist.mat2)
	}
	
	for (phy.name in phy.names) {
		phy.cor.list['Paired', phy.name, remain] <- cor(phy.abund.G[phy.name, ], phy.abund.R12[phy.name, ], method='spearman')
	}
	
	for (gen.name in gen.names) {
		try(gen.cor.list['Paired', gen.name, remain] <- cor(gen.abund.G[gen.name, ], gen.abund.R12[gen.name, ], method='spearman'))
	}
	
	data.obj.PR1 <- data.list[[paste0('PairedR1', remain)]]
	dist.obj.PR1 <- dist.list[[paste0('PairedR1', remain)]] 
	alpha.obj.PR1 <- list()
	alpha.obj.PR1[['Richness']] <- colSums(data.obj.PR1$otu.tab != 0)
	alpha.obj.PR1[['Shannon']] <- vegan::diversity(data.obj.PR1$otu.tab, MARGIN=2)
	phy.abund.PR1 <- data.obj.PR1$abund.list[['Phylum']]
	phy.abund.PR1 <- t(t(phy.abund.PR1) / colSums(phy.abund.PR1))
	gen.abund.PR1 <- data.obj.PR1$abund.list[['Genus']]
	gen.abund.PR1 <- t(t(gen.abund.PR1) / colSums(gen.abund.PR1))
	
	for (alpha.name in alpha.names) {
		alpha.cor.list['PairedR1', alpha.name, remain] <- cor(alpha.obj.PR1[[alpha.name]], alpha.obj.G[[alpha.name]])
	}
	
	for (dist.name in dist.names) {
		dist.mat1 <- as.matrix(dist.obj.G[[dist.name]])
		dist.mat2 <- as.matrix(dist.obj.PR1[[dist.name]])
		dist.mat1 <- dist.mat1[lower.tri(dist.mat1)]
		dist.mat2 <- dist.mat2[lower.tri(dist.mat2)]
		dist.cor.list['PairedR1', dist.name, remain] <- cor(dist.mat1, dist.mat2)
	}
	
	for (phy.name in phy.names) {
		phy.cor.list['PairedR1', phy.name, remain] <- cor(phy.abund.G[phy.name, ], phy.abund.PR1[phy.name, ], method='spearman')
	}
	for (gen.name in gen.names) {
		try(gen.cor.list['PairedR1', gen.name, remain] <- cor(gen.abund.G[gen.name, ], gen.abund.PR1[gen.name, ], method='spearman'))
	}
	
	
	# R1
#	data.obj.R1 <- data.list[['R1']]
#	dist.obj.R1 <- dist.list[['R1']] 
#	alpha.obj.R1 <- list()
#	alpha.obj.R1[['Richness']] <- colSums(data.obj.R1$otu.tab != 0)
#	alpha.obj.R1[['Shannon']] <- vegan::diversity(data.obj.R1$otu.tab, MARGIN=2)
#	phy.abund.R1 <- data.obj.R1$abund.list[['Phylum']]
#	phy.abund.R1 <- t(t(phy.abund.R1) / colSums(phy.abund.R1))
#	gen.abund.R1 <- data.obj.R1$abund.list[['Genus']]
#	gen.abund.R1 <- t(t(gen.abund.R1) / colSums(gen.abund.R1))
	
	for (alpha.name in alpha.names) {
		alpha.cor.list['R1', alpha.name, remain] <- cor(alpha.obj.R1[[alpha.name]], alpha.obj.G[[alpha.name]])
	}
	
	for (dist.name in dist.names) {
		dist.mat1 <- as.matrix(dist.obj.G[[dist.name]])
		dist.mat2 <- as.matrix(dist.obj.R1[[dist.name]])
		dist.mat1 <- dist.mat1[lower.tri(dist.mat1)]
		dist.mat2 <- dist.mat2[lower.tri(dist.mat2)]
		dist.cor.list['R1', dist.name, remain] <- cor(dist.mat1, dist.mat2)
	}
	
	for (phy.name in phy.names) {
		phy.cor.list['R1', phy.name, remain] <- cor(phy.abund.G[phy.name, ], phy.abund.R1[phy.name, ], method='spearman')
	}
	
	for (gen.name in gen.names) {
		try(gen.cor.list['R1', gen.name, remain] <- cor(gen.abund.G[gen.name, ], gen.abund.R1[gen.name, ], method='spearman'))
	}
	
	
}
# Visualization

alpha.cor <- melt(alpha.cor.list)
dist.cor <- melt(dist.cor.list)
phy.cor <- melt(phy.cor.list)
gen.cor <- melt(gen.cor.list)

pdf('./Result/Correlation_gen_cor.pdf', width=14, height=10)
obj <- ggplot(gen.cor, aes(x=Remain, y=value, fill=Method)) +
		geom_bar(stat='identity', position="dodge") +
#		ylim(c(0.9, 1.0)) +
		ylab('Correlation') +
		facet_wrap(~ Measure) +
		theme_bw()
print(obj)
dev.off()

gen.cor$Remain <- factor(gen.cor$Remain)
pdf('./Result/Correlation_gen_cor_boxplot.pdf', width=6, height=5)
obj <- ggplot(gen.cor, aes(x=Remain, y=value, fill=Method)) +
		geom_boxplot() +
#		ylim(c(0.9, 1.0)) +
		ylab('Correlation') +

		theme_bw()
print(obj)
dev.off()

##########################################################################
# Clustering analysis

