Task 6: Multi-Omics Network Construction and Annotation

This task constructs and annotates an integrative multi-omics network connecting differentially expressed genes (DEGs) and differentially methylated regions (DMRs). The network includes:

Gene–Gene partial correlation network
CpG–CpG partial correlation network
CpG–Gene connections from prior regression analysis

All edges are filtered by significance (p < 0.05) and annotated using reference gene and CpG annotation databases. The final network is saved in a unified CSV file for visualization (e.g. Cytoscape).

All required input files are loaded independently to ensure reproducibility and modularity.

Load All Required Libraries for Multi-Omics Analysis

# ---------------------------------------------------------------
# Load Input Data from Previous Parts
# -------------------------------

# 1. Load DEG matrix and subset
rna_deg <- read.csv("RNA_TMM_log2_zscore.csv", row.names = 1, check.names = FALSE)
deg_filtered <- read.csv("DEG_significant_filtered.csv", row.names = 1)
rna_deg <- rna_deg[rownames(rna_deg) %in% rownames(deg_filtered), ]

# 2. Load methylation matrix and subset
meth <- read.csv("Methylation_20000_Top_expressed.csv", row.names = 1, check.names = FALSE)
dmrs_lm <- read.csv("dmrs_methylation_lm_based.csv", row.names = 1)
meth_dmr <- meth[rownames(meth) %in% rownames(dmrs_lm), ]

# 3. Load DEG–DMR regression results
reg_results_df <- read.csv("DEG_DMR_linear_models_withAgeGender.csv")



# ---------------------------------------------------------------
# Ready for Network Construction and Annotation
# ---------------------------------------------------------------

# Load GeneNet for partial correlation analysis
library(GeneNet)

# --------------------------
# 1. Gene–Gene Network
# --------------------------

# Correlation matrix for genes across samples (transpose needed)
gene_corr <- cor(t(rna_deg), method = "pearson", use = "pairwise.complete.obs")

# Estimate partial correlations between genes
gene_pcor <- ggm.estimate.pcor(t(rna_deg))

## Estimating optimal shrinkage intensity lambda (correlation matrix): 0.0938

# Test partial correlations to get edges (undirected network)
gene_edges <- network.test.edges(gene_pcor, direct = FALSE)

## Estimate (local) false discovery rates (partial correlations):
## Step 1... determine cutoff point
## Step 2... estimate parameters of null distribution and eta0
## Step 3... compute p-values and estimate empirical PDF/CDF
## Step 4... compute q-values and local fdr
## Step 5... prepare for plotting

# Keep only significant gene–gene interactions (p < 0.05)
gene_edges_sig <- subset(gene_edges, pval < 0.05)

# Retrieve gene names to map from index to names
gene_names <- rownames(rna_deg)
gene_edges_sig$node1 <- gene_names[gene_edges_sig$node1]
gene_edges_sig$node2 <- gene_names[gene_edges_sig$node2]

# --------------------------
# 2. CpG–CpG Network
# --------------------------

# To save memory, subset to 1000 CpGs
test_meth <- meth_dmr[1:1000, ] 

# Estimate CpG–CpG partial correlations
cpg_pcor <- ggm.estimate.pcor(t(test_meth))

## Estimating optimal shrinkage intensity lambda (correlation matrix): 0.0946

cpg_edges <- network.test.edges(cpg_pcor, direct = FALSE)

## Estimate (local) false discovery rates (partial correlations):
## Step 1... determine cutoff point
## Step 2... estimate parameters of null distribution and eta0
## Step 3... compute p-values and estimate empirical PDF/CDF
## Step 4... compute q-values and local fdr
## Step 5... prepare for plotting

cpg_edges_sig <- subset(cpg_edges, pval < 0.05)

# Map node indices to CpG IDs
cpg_names <- rownames(test_meth)
cpg_edges_sig$node1 <- cpg_names[cpg_edges_sig$node1]
cpg_edges_sig$node2 <- cpg_names[cpg_edges_sig$node2]

# --------------------------
# 3. DEG–DMR Correlation Edges (from regression)
# --------------------------

reg_edges <- data.frame(
  Source = reg_results_df$CpG,
  Target = reg_results_df$Gene,
  Type = "CpG-Gene",
  Weight = -log10(reg_results_df$p_CpG)
)

# --------------------------
# 4. Format Gene–Gene Edges
# --------------------------

gene_edges_fmt <- data.frame(
  Source = gene_edges_sig$node1,
  Target = gene_edges_sig$node2,
  Type = "Gene-Gene",
  Weight = -log10(gene_edges_sig$pval)
)

# --------------------------
# 5. Format CpG–CpG Edges
# --------------------------

cpg_edges_fmt <- data.frame(
  Source = cpg_edges_sig$node1,
  Target = cpg_edges_sig$node2,
  Type = "CpG-CpG",
  Weight = -log10(cpg_edges_sig$pval)
)

# --------------------------
# 6. Combine all edges into full network
# --------------------------

full_network <- rbind(reg_edges, gene_edges_fmt, cpg_edges_fmt)
write.csv(full_network, "full_multiomics_network.csv", row.names = FALSE)

# ---------------------------------------------------------------
# ANNOTATION OF NETWORK EDGES
# ---------------------------------------------------------------

# --------------------------
# Annotate Gene–Gene Edges
# --------------------------

# Ensure correct data types
gene_edges_fmt <- gene_edges_fmt %>%
  mutate(Source = as.character(Source), Target = as.character(Target))

# Get EntrezID–Symbol mapping
gene_ids <- unique(c(gene_edges_fmt$Source, gene_edges_fmt$Target))
gene_anno <- AnnotationDbi::select(
  org.Hs.eg.db,
  keys = gene_ids,
  columns = c("ENTREZID", "SYMBOL"),
  keytype = "ENTREZID"
)

## 'select()' returned 1:1 mapping between keys and columns

# Map symbols for source and target
gene_edges_annot <- gene_edges_fmt %>%
  left_join(gene_anno, by = c("Source" = "ENTREZID")) %>%
  rename(Source_Symbol = SYMBOL) %>%
  left_join(gene_anno, by = c("Target" = "ENTREZID")) %>%
  rename(Target_Symbol = SYMBOL) %>%
  filter(!is.na(Source_Symbol), !is.na(Target_Symbol))  # remove missing

# Save
write.csv(gene_edges_annot, "gene_edges_annotated.csv", row.names = FALSE)

# --------------------------
# Annotate CpG–CpG Edges
# --------------------------

# Load 450k annotation
ann450k <- getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)

# Ensure character columns
cpg_edges_fmt <- cpg_edges_fmt %>%
  mutate(Source = as.character(Source), Target = as.character(Target))

# Get CpG IDs and subset annotation
cpg_ids <- unique(c(cpg_edges_fmt$Source, cpg_edges_fmt$Target))
ann_sub <- ann450k[rownames(ann450k) %in% cpg_ids, ]

# Create CpG annotation dataframe
cpg_anno_df <- data.frame(
  CpG_ID = rownames(ann_sub),
  Gene_Annot = ann_sub$UCSC_RefGene_Name
)

# Keep only first gene name per CpG
cpg_anno_df$Gene_Annot <- sapply(strsplit(as.character(cpg_anno_df$Gene_Annot), ";"), `[`, 1)

# Annotate CpG edges
cpg_edges_annot <- cpg_edges_fmt %>%
  left_join(cpg_anno_df, by = c("Source" = "CpG_ID")) %>%
  rename(Source_CpG_Gene = Gene_Annot) %>%
  left_join(cpg_anno_df, by = c("Target" = "CpG_ID")) %>%
  rename(Target_CpG_Gene = Gene_Annot) %>%
  filter(!is.na(Source_CpG_Gene), !is.na(Target_CpG_Gene))

# Save
write.csv(cpg_edges_annot, "cpg_edges_annotated.csv", row.names = FALSE)

# --------------------------
# Annotate CpG–Gene Edges (from regression)
# --------------------------

reg_edges <- reg_edges %>%
  mutate(Source = as.character(Source), Target = as.character(Target))

# Gene annotation
gene_anno_reg <- AnnotationDbi::select(
  org.Hs.eg.db,
  keys = unique(reg_edges$Target),
  columns = c("ENTREZID", "SYMBOL"),
  keytype = "ENTREZID"
)

## 'select()' returned 1:1 mapping between keys and columns

# Annotate gene symbols
reg_edges_annot <- reg_edges %>%
  left_join(gene_anno_reg, by = c("Target" = "ENTREZID")) %>%
  rename(Target_Symbol = SYMBOL)

# Annotate CpG genes
ann_sub_cpg <- ann450k[rownames(ann450k) %in% reg_edges_annot$Source, ]
cpg_anno_df <- data.frame(
  Source = rownames(ann_sub_cpg),
  CpG_Gene_Annotation = ann_sub_cpg$UCSC_RefGene_Name
)
cpg_anno_df$CpG_Gene_Annotation <- sapply(strsplit(as.character(cpg_anno_df$CpG_Gene_Annotation), ";"), `[`, 1)

# Join CpG annotations
reg_edges_annot <- reg_edges_annot %>%
  left_join(cpg_anno_df, by = "Source") %>%
  filter(!is.na(Target_Symbol), !is.na(CpG_Gene_Annotation))

# Save
write.csv(reg_edges_annot, "gene-cpg_edges_annotated.csv", row.names = FALSE)

Multi-Omics Analysis of Multiple Sclerosis

Rofida Gamal (Original Instructor)

Mohamed Hussein (Code Structuring,Analysis Execution,GitHub Publication)

2025-08-01

Task 6: Multi-Omics Network Construction and Annotation

Load All Required Libraries for Multi-Omics Analysis