Notebook for RCM data with metabolic pathwaysΒΆ

RCM Results_Metabolomics
#load the needed libraries:
library(tidyverse) # used for data manipulation
library(rmarkdown) # used for paged_table function

Samples

Here we use the results of the SiRCle output (RCM-Results).
Moreover, the metabolic signatures used to perform the GSEA based on Gaude et al are used to select specific metabolic pathways of interest.

#RCM Data
RCM_Data<- read.csv("Input_RCM-Results/RCM_all_patients_ccRCC_P0.5-R1.0-M0.1-GENES.csv")

RemoveDublons <-function(MyData){
  MyData <- MyData[complete.cases(MyData),] 
  doublons <- as.character(MyData[duplicated(MyData$external_gene_name),"external_gene_name"])
  print("Number of duplicated genes:")
  print(length(doublons))
  # Keep the entry with the greatest Log2FC:
  MyData$absLogFC <- abs(MyData$logFC_protein)
  MyData <- MyData[ order(MyData$absLogFC), ]
  MyData_Select <- MyData[!duplicated(MyData$external_gene_name),]
  #Safe:
  OutputFileName <-  MyData_Select
}#Remove duplicated genes: Function 
RCM_Data_ND <- RemoveDublons(MyData= RCM_Data)
## [1] "Number of duplicated genes:"
## [1] 273
#----------------------------------------------------------
#Metabolic Pathway
Metabolic_Signature <-read.csv("Input_MetabolicPathways_Gaude/41467_2016_BFncomms13041_MOESM340_ESM.csv")

Correction_Metabolic_Signature <- read.csv("Input_MetabolicPathways_Gaude/41467_2016_BFncomms13041_MOESM341_ESM.csv")%>% 
    mutate(Unique = case_when(associated_Pathways =="1" ~ 'Unique',
                                  TRUE ~ 'In multiple Pathways'))
Metabolic_Signature <-merge(x=Metabolic_Signature, y=Correction_Metabolic_Signature, by.x ="gene", by.y="external_gene_name", all.x=TRUE)

Plots

Protein expression

This refers to the proteomics results used as input data. here we use the results of for the SiRCle cluster (RCM data) to take the protein Log2FC and p.adj as well as the information to which SiRCle cluster a protein was assigned to.

Volcano Plot

Metabolic enzyme can be part of multiple metabolic pathways. Here, we will highlight enzymes that are unique for the pathway and enzymes that are part of multiple pathways.

#Establish function:
library(ggrepel)
library(EnhancedVolcano)
VolcanoPlot_Protein <- function(Input, Signature, Signature_term){
  Pathway <- subset(Signature, term == paste(Signature_term))
  Volcano1  <- merge(x=Pathway,y=Input, by.x="gene", by.y="external_gene_name", all.x=TRUE)%>%
  na.omit()
  #Prepare the colour scheme:
  keyvals <- ifelse(
    Volcano1$Regulation_Grouping_2 == "MDS", "#CF4692",
    ifelse(Volcano1$Regulation_Grouping_2 == "MDS_TMDE", "#A16BAA",
    ifelse(Volcano1$Regulation_Grouping_2 == "MDE", "#6AAF43",
    ifelse(Volcano1$Regulation_Grouping_2 == "MDE_TMDS", "#0E8D6D",
    ifelse(Volcano1$Regulation_Grouping_2 == "TPDS", "#452D76",
    ifelse(Volcano1$Regulation_Grouping_2 == "TPDS_TMDE", "#7E4090",
    ifelse(Volcano1$Regulation_Grouping_2 == "TPDE", "#E58D26",
    ifelse(Volcano1$Regulation_Grouping_2 == "TPDE_TMDS", "#844D13",
    ifelse(Volcano1$Regulation_Grouping_2 == "TMDS", "#3A58A3",
    ifelse(Volcano1$Regulation_Grouping_2 == "TMDE", "#E5322F",
    ifelse(Volcano1$Regulation_Grouping_2 == "None", "grey",       
           "black")))))))))))
  names(keyvals)[is.na(keyvals)] <- "black"
  names(keyvals)[keyvals == 'black'] <- "NA"
  names(keyvals)[keyvals == '#CF4692'] <- "MDS"
  names(keyvals)[keyvals == '#A16BAA'] <- "MDS_TMDE"
  names(keyvals)[keyvals == '#6AAF43'] <- "MDE"
  names(keyvals)[keyvals == '#0E8D6D'] <- "MDE_TMDS"
  names(keyvals)[keyvals == '#452D76'] <- "TPDS"
  names(keyvals)[keyvals == '#7E4090'] <- "TPDS_TMDE"
  names(keyvals)[keyvals == '#E58D26'] <- "TPDE"
  names(keyvals)[keyvals == '#844D13'] <- "TPDE_TMDS"
  names(keyvals)[keyvals == '#3A58A3'] <- "TMDS"
  names(keyvals)[keyvals == '#E5322F'] <- "TMDE"
  names(keyvals)[keyvals == 'grey'] <- "None"
  #Prepare the symbols:
  keyvals.shape <- ifelse(
    Volcano1$Unique == "Unique", 19,
      ifelse(Volcano1$Unique == "In multiple Pathways", 18,
        3))
  keyvals.shape[is.na(keyvals.shape)] <- 3
  names(keyvals.shape)[keyvals.shape == 3] <- 'NA'
  names(keyvals.shape)[keyvals.shape == 19] <- 'Unique'
  names(keyvals.shape)[keyvals.shape == 18] <- 'In multiple Pathways'
  #plot
  VolcanoPlot <- EnhancedVolcano (Volcano1,
                lab = Volcano1$gene,#Metabolite name
                x = "logFC_protein",#Log2FC
                y = "padj_protein",#p-value or q-value
                xlab = "Log2FC (Protein)",
                ylab = bquote(~-Log[10]~`p.adj`),#(~-Log[10]~adjusted~italic(P))
                pCutoff = 0.05,
                FCcutoff = 0.5,#Cut off Log2FC, automatically 2
                pointSize = 3,
                labSize = 1,
                shapeCustom = keyvals.shape,
                colCustom = keyvals,
                titleLabSize = 16,
                col=c("black", "grey", "grey", "purple"),#if you want to change colors
                colAlpha = 0.5,
                title=paste(Signature_term),
                subtitle = bquote(italic("Metabolic Pathway")),
                caption = paste0("total = ", nrow(Volcano1), " genes of ", nrow(Pathway), " genes in pathway"),
                xlim = c((((Reduce(min,Volcano1$logFC_protein))))-0.5,(((Reduce(max,Volcano1$logFC_protein))))+0.5),
                ylim = c(0,((-log10(Reduce(min,Volcano1$padj_protein))))+0.1),
                #drawConnectors = TRUE,
                #widthConnectors = 0.5,
                #colConnectors = "black",
                cutoffLineType = "dashed",
                cutoffLineCol = "black",
                cutoffLineWidth = 0.5,
                legendPosition = 'right',
                legendLabSize = 12,
                legendIconSize = 5.0
                )
  ggsave(file=paste("Output_Figures/Output_VolcanoProtein/VolcanoPlot_GAUDE", Signature_term, "RCM.pdf", sep="_"), plot=VolcanoPlot, width=10, height=8)
  plot(VolcanoPlot)
}

#Run the function:
Pathway_Names <- Metabolic_Signature[!duplicated(Metabolic_Signature$term),]
Pathway_Names <- Pathway_Names$term

for (i in Pathway_Names){
  VolcanoPlot_Protein(Input=RCM_Data_ND,
            Signature= Metabolic_Signature,
            Signature_term=i)
}

Pie Chart

Pie Chart of the distribution in percentage of SiRCle clusters in a Pathway.

# Calculate the Percentage of each cluster being represented in the pathway:
# Establish the function:
PieChart_Protein <- function(Input, Signature, Signature_term){
  Pathway <- subset(Signature, term == paste(Signature_term))
  Signature_Merge <- merge(x=Pathway,y=Input, by.x="gene", by.y="external_gene_name", all.x=TRUE)%>%
    na.omit()
  Signature_Merge_Summary <- Signature_Merge%>%
    group_by(Regulation_Grouping_2) %>% 
    summarise(percent = round(100 * n() / nrow(Signature_Merge)))
  Signature_Merge_Summary$Label <- paste(Signature_Merge_Summary$Regulation_Grouping_2, " (", Signature_Merge_Summary$percent, "%)")

  Signature_Merge_Summary$count <- Signature_Merge%>% 
    group_by(Regulation_Grouping_2) %>%
    summarise(count = sum(Regulation_Grouping_2== 'MDS' ,
                          Regulation_Grouping_2== 'MDE',
                          Regulation_Grouping_2== 'TPDS',
                          Regulation_Grouping_2== 'TPDE',
                          Regulation_Grouping_2== 'TMDS',
                          Regulation_Grouping_2== 'TMDE' ,
                          Regulation_Grouping_2== 'MDE_TMDS',
                          Regulation_Grouping_2== 'MDS_TMDE',
                          Regulation_Grouping_2== 'TPDE_TMDS',
                          Regulation_Grouping_2== 'TPDS_TMDE',
                          Regulation_Grouping_2== 'None'))

  Signature_Merge_Summary <- Signature_Merge_Summary%>% 
      mutate(colour = case_when(Regulation_Grouping_2 =="MDS" ~ '#CF4692',
                              Regulation_Grouping_2 =="MDS_TMDE" ~ '#A16BAA',
                              Regulation_Grouping_2 =="MDE" ~ '#6AAF43',
                              Regulation_Grouping_2 =="MDE_TMDS" ~ '#0E8D6D',
                              Regulation_Grouping_2 =="TPDS" ~ '#452D76',
                              Regulation_Grouping_2 =="TPDS_TMDE" ~ '#7E4090',
                              Regulation_Grouping_2 =="TPDE" ~ '#E58D26',
                              Regulation_Grouping_2 =="TPDE_TMDS" ~ '#844D13',
                              Regulation_Grouping_2 =="TMDS" ~ '#3A58A3',
                              Regulation_Grouping_2 =="TMDE" ~ '#E5322F',
                              Regulation_Grouping_2 =="None" ~ 'grey',
                                  TRUE ~ 'Not_Detected'))

#Plot
  PieChart <- ggplot(Signature_Merge_Summary, aes(x="", y=percent, fill=Label))+
    geom_bar(width = 1, stat = "identity")+
    coord_polar("y", start=0)+
    #scale_fill_grey() + 
    scale_fill_manual(values=Signature_Merge_Summary$colour)+
    theme_minimal()+
    theme(axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          panel.border = element_blank(),
          panel.grid=element_blank(),
          axis.ticks = element_blank(),
          plot.title=element_text(size=14, face="bold"))+
  ggtitle(paste("", Signature_term))+
  labs(fill = "SiRCle cluster")
  ggsave(file=paste("Output_Figures/Output_PieChartProtein/PieChart", Signature_term, ".pdf", sep="_"), plot= PieChart, width=8, height=6)
  plot( PieChart)
}

# Run the function:
Pathway_Names <- Metabolic_Signature[!duplicated(Metabolic_Signature$term),]
Pathway_Names <- Pathway_Names$term

for (i in Pathway_Names){
 PieChart_Protein(Input=RCM_Data_ND,
            Signature= Metabolic_Signature,
            Signature_term=i)
}


For Figure 3a, we need some specific pie charts that are based on the enzymes shown in the specific pathways.

Manual <- read.csv("Input_RCM-Results/PieCharts_ForFigure_Manuall.csv", check.names=FALSE)

ManualPathway <- c("Glycolysis", "TCA cycle", "Serine-Cysteine Biosynthesis")

PieChart_Manual <- function(Input, Signature_term){
  DF <- subset(Input, Pathway == paste(Signature_term))
  
#Plot
  PieChart <- ggplot(DF, aes(x="", y=`Genes %`, fill=`SiRCle Cluster`))+
    geom_bar(width = 1, stat = "identity")+
    coord_polar("y", start=0)+
    scale_fill_grey() + 
    theme_minimal()+
    theme(axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          panel.border = element_blank(),
          panel.grid=element_blank(),
          axis.ticks = element_blank(),
          plot.title=element_text(size=14, face="bold"))+
  ggtitle(paste("", Signature_term))+
  labs(fill = "SiRCle cluster")
  ggsave(file=paste("Output_Figures/Output_Fig3a/Fig3a_ManualPieChart", Signature_term, ".pdf", sep="_"), plot= PieChart, width=8, height=6)
  plot( PieChart)
}


for (i in ManualPathway){
 PieChart_Manual(Input=Manual,
            Signature_term=i)
}

Information about packages and versions used

sessionInfo()
## R version 4.2.0 (2022-04-22 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252 
## [2] LC_CTYPE=English_United Kingdom.1252   
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] EnhancedVolcano_1.14.0 ggrepel_0.9.1          rmarkdown_2.14        
##  [4] forcats_0.5.1          stringr_1.4.0          dplyr_1.0.9           
##  [7] purrr_0.3.4            readr_2.1.2            tidyr_1.2.0           
## [10] tibble_3.1.7           ggplot2_3.3.6          tidyverse_1.3.1       
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.2 xfun_0.31        bslib_0.3.1      haven_2.5.0     
##  [5] colorspace_2.0-3 vctrs_0.4.1      generics_0.1.2   htmltools_0.5.2 
##  [9] yaml_2.3.5       utf8_1.2.2       rlang_1.0.2      jquerylib_0.1.4 
## [13] pillar_1.7.0     withr_2.5.0      glue_1.6.2       DBI_1.1.3       
## [17] dbplyr_2.2.0     modelr_0.1.8     readxl_1.4.0     lifecycle_1.0.1 
## [21] munsell_0.5.0    gtable_0.3.0     cellranger_1.1.0 rvest_1.0.2     
## [25] evaluate_0.15    labeling_0.4.2   knitr_1.39       tzdb_0.3.0      
## [29] fastmap_1.1.0    fansi_1.0.3      highr_0.9        Rcpp_1.0.8.3    
## [33] broom_0.8.0      backports_1.4.1  scales_1.2.0     jsonlite_1.8.0  
## [37] farver_2.1.0     fs_1.5.2         hms_1.1.1        digest_0.6.29   
## [41] stringi_1.7.6    grid_4.2.0       cli_3.3.0        tools_4.2.0     
## [45] magrittr_2.0.3   sass_0.4.1       crayon_1.5.1     pkgconfig_2.0.3 
## [49] ellipsis_0.3.2   xml2_1.3.3       reprex_2.0.1     lubridate_1.8.0 
## [53] assertthat_0.2.1 httr_1.4.3       rstudioapi_0.13  R6_2.5.1        
## [57] compiler_4.2.0