Gene Expression Prediction

Published:

Pre-WGCNA

Treatment Effects on Gene Expression in Upper 20% of Predictable Gene Transcripts

# Cluster tress (check if any outliers existed among
# samples)
datExpr_tree <- hclust(dist(datExpr), method = "average")
par(mar = c(0, 5, 2, 0))
plot(datExpr_tree, main = "Sample clustering", sub = "", xlab = "",
    cex.lab = 2, cex.axis = 1, cex.main = 1, cex.lab = 1, cex = 0.5)

KEGG Analysis of Upper 20% of Predictable Gene Transcripts

length(dat)
enrichment_analysis(df = dat, plot_name = "Full Upper 20%")

WGCNA

Soft thresholding

# Set powers to sample
powers = c(c(1:10), seq(from = 12, to = 50, by = 4))

# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 0)
k <- softConnectivity(datE = datExpr, power = sft$powerEstimate)
# Plot the results:
par(mfrow = c(1, 2))
cex1 = 0.9

# Scale-free topology fit index as a function of the
# soft-thresholding power
plot(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[,
    2], xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit,signed R^2",
    type = "n", main = paste("Scale independence"))
text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[,
    2], labels = powers, cex = cex1, col = "red")

# this line corresponds to using an R^2 cut-off of h
abline(h = 0.8, col = "red")

# Mean connectivity as a function of the soft-thresholding
# power
fig <- plot(sft$fitIndices[, 1], sft$fitIndices[, 5], xlab = "Soft Threshold (power)",
    ylab = "Mean Connectivity", type = "n", main = paste("Mean connectivity"))

text(sft$fitIndices[, 1], sft$fitIndices[, 5], labels = powers,
    cex = cex1, col = "red")

par(mfrow = c(1, 2))
hist(k)
scaleFreePlot(k, main = "Check Scale free topology\n")

Block-wise Network Construction & Module Detection

net = blockwiseModules(datExpr, 
                       power = 30, 
                       deepSplit=2, 
                       TOMType = "unsigned",
                       reassignThreshold = 0,
                       maxBlockSize=440000,
                       # mergeCutHeight=0.10,
                       mergeCutHeight = 0.05,
                       numericLabels = TRUE, 
                       pamRespectsDendro = FALSE, 
                       verbose = 0, 
                       pamStage=TRUE)
                       # corType="bicor")

#Plot the dendogram with color assignment
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];

bwnet = blockwiseModules(datExpr, 
                         power = 30, 
                         deepSplit=2, 
                         TOMType = "unsigned",
                         reassignThreshold = 0,
                         maxBlockSize=440000,
                         # mergeCutHeight=0.10,
                         mergeCutHeight = 0.05,
                         numericLabels = TRUE, 
                         pamRespectsDendro = TRUE, 
                         verbose = 0, 
                         pamStage=TRUE)
                         # TOMDenom="mean",
                         # corType="bicor")

#Relabel blockwise modules
bwLabels = matchLabels(bwnet$colors, moduleLabels);
#Labels to colors for plotting
bwModuleColors = labels2colors(bwLabels)

#Compare the single block to the block-wise network analysis
plotDendroAndColors(geneTree,cbind(moduleColors, bwModuleColors), c("Single", "2 blocks"), main = "Single block gene dendrogram and module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE,guideHang = 0.05)

singleBlockMEs = moduleEigengenes(datExpr, moduleColors)$eigengenes
blockwiseMEs = moduleEigengenes(datExpr, bwModuleColors)$eigengenes

single2blockwise = match(names(singleBlockMEs), names(blockwiseMEs))
signif(diag(cor(blockwiseMEs[, single2blockwise], singleBlockMEs)),
    3)
##      MEblue     MEbrown     MEgreen      MEgrey       MEred MEturquoise 
##       0.991       1.000       0.998       0.993       1.000       0.987 
##    MEyellow 
##       1.000

Enrichment Analyses

out <- data.frame(gene = colnames(datExpr), module = bwModuleColors)

mods = unique(bwModuleColors)
go_list = c()
kegg_list = c()
mkegg_list = c()


for (mod in mods) {
    temp = subset(out, module == mod)
    result = filter(dat, dat$Gene %in% temp$gene)
    result = enrichment_analysis(df = result, plot_name = mod)

    try({
        # print(result$go_plot)
        print(result$kegg_plot)
        # print(result$mkegg_plot)

        go_res = result$go_data
        go_list[[mod]] = go_res@result$Description

        kk_res = result$kegg_data
        kegg_list[[mod]] = kk_res@result$Description

        # mkk_res = result$mkegg_data mkegg_list[[mod]] =
        # mkk_res@result$Description

    }, silent = TRUE)

}