Notebook VAE Part 2 PanCan Figure 4
Consolidate pathways so that we can do the metabolomics vis¶
In [3]:
# Need to add in entrez gene ID and also make labels for genes for ORA
# Imports
import pandas as pd
import os
from sciutil import SciUtil
# Setup file locations and label of the cancer
u = SciUtil()
cancer = 'PanCan'
input_dir = 'Input_RCM'
output_dir = 'Output_Data'
supp_dir = 'Required_Refs'
fig_dir = 'Output_Figures'
regLabel = 'RG2_Changes_filtered'
files = [f for f in os.listdir(fig_dir) if 'GSEA' in f]
# Save consolidated pathways for each of the analyses
conditions = ['S4_vs_S1', 'Late_vs_Early']
for cond in conditions:
c_df = pd.DataFrame()
for f in files:
if cond in f and '_Pathways' in f:
df = pd.read_csv(os.path.join(fig_dir, f))
c_df = pd.concat([c_df, df])
u.dp([f])
print(df[df['padj'] < 0.25])
c_df.to_csv(os.path.join(output_dir, f'{cond}_Pathways.tsv'), sep='\t', index=False)
-------------------------------------------------------------------------------- Late_vs_Early_TPDE_TMDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TMDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TPDS_TMDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDE_TMDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- pathway pval padj \ 0 KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS 0.000234 0.001262 1 KEGG_ASCORBATE_AND_ALDARATE_METABOLISM 0.000236 0.001262 2 KEGG_STEROID_HORMONE_BIOSYNTHESIS 0.000476 0.002203 4 KEGG_STARCH_AND_SUCROSE_METABOLISM 0.000234 0.001262 5 KEGG_RETINOL_METABOLISM 0.000234 0.001262 6 KEGG_PORPHYRIN_AND_CHLOROPHYLL_METABOLISM 0.000236 0.001262 7 KEGG_METABOLISM_OF_XENOBIOTICS_BY_CYTOCHROME_P450 0.000239 0.001262 8 KEGG_DRUG_METABOLISM_CYTOCHROME_P450 0.000236 0.001262 9 KEGG_DRUG_METABOLISM_OTHER_ENZYMES 0.000954 0.003922 30 KEGG_REGULATION_OF_ACTIN_CYTOSKELETON 0.044621 0.165097 ES NES nMoreExtreme size leadingEdge 0 -0.936691 -2.185990 0 7 NaN 1 -0.905553 -2.202955 0 8 NaN 2 -0.778405 -2.155729 1 12 NaN 4 -0.936691 -2.185990 0 7 NaN 5 -0.936691 -2.185990 0 7 NaN 6 -0.938040 -2.281989 0 8 NaN 7 -0.824223 -2.219805 0 11 NaN 8 -0.903212 -2.197261 0 8 NaN 9 -0.822884 -2.072998 3 9 NaN 30 0.752342 1.481521 252 5 NaN -------------------------------------------------------------------------------- Late_vs_Early_All_GSEA_Pathways.csv -------------------------------------------------------------------------------- pathway pval padj \ 2 KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS 0.000356 0.009637 4 KEGG_ASCORBATE_AND_ALDARATE_METABOLISM 0.000390 0.009637 6 KEGG_STEROID_HORMONE_BIOSYNTHESIS 0.000510 0.009637 12 KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_DEGRADATION 0.002042 0.025059 20 KEGG_GLUTATHIONE_METABOLISM 0.038812 0.238163 21 KEGG_STARCH_AND_SUCROSE_METABOLISM 0.000479 0.009637 22 KEGG_O_GLYCAN_BIOSYNTHESIS 0.000571 0.009637 29 KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_... 0.014849 0.125286 31 KEGG_PROPANOATE_METABOLISM 0.013635 0.122711 32 KEGG_BUTANOATE_METABOLISM 0.024190 0.178641 36 KEGG_RETINOL_METABOLISM 0.000510 0.009637 37 KEGG_PORPHYRIN_AND_CHLOROPHYLL_METABOLISM 0.000410 0.009637 39 KEGG_METABOLISM_OF_XENOBIOTICS_BY_CYTOCHROME_P450 0.000719 0.009705 40 KEGG_DRUG_METABOLISM_CYTOCHROME_P450 0.000667 0.009705 41 KEGG_DRUG_METABOLISM_OTHER_ENZYMES 0.003833 0.039807 43 KEGG_DNA_REPLICATION 0.000430 0.009637 48 KEGG_MAPK_SIGNALING_PATHWAY 0.041329 0.242583 63 KEGG_VASCULAR_SMOOTH_MUSCLE_CONTRACTION 0.002765 0.031103 68 KEGG_TGF_BETA_SIGNALING_PATHWAY 0.010533 0.101567 75 KEGG_GAP_JUNCTION 0.025142 0.178641 76 KEGG_COMPLEMENT_AND_COAGULATION_CASCADES 0.018640 0.148027 91 KEGG_LONG_TERM_DEPRESSION 0.029724 0.191085 92 KEGG_OLFACTORY_TRANSDUCTION 0.026492 0.178820 ES NES nMoreExtreme size leadingEdge 2 -0.942764 -2.444498 0 8 NaN 4 -0.884508 -2.469377 0 10 NaN 6 -0.752112 -2.490970 0 17 NaN 12 -0.585282 -1.938436 3 17 NaN 20 -0.482920 -1.542887 80 15 NaN 21 -0.758256 -2.422559 0 15 NaN 22 -0.940797 -1.876084 1 4 NaN 29 -0.854682 -1.704360 51 4 NaN 31 -0.639334 -1.784897 34 10 NaN 32 -0.584562 -1.683046 58 11 NaN 36 -0.664802 -2.201801 0 17 NaN 37 -0.873801 -2.515809 0 11 NaN 39 -0.615137 -2.367093 0 29 NaN 40 -0.595699 -2.266101 0 27 NaN 41 -0.612114 -1.955648 7 15 NaN 43 -0.761750 -2.252539 0 12 NaN 48 0.491306 1.432069 361 32 NaN 63 0.614874 1.680516 22 23 NaN 68 0.631382 1.606041 83 16 NaN 75 0.609436 1.527186 198 15 NaN 76 0.489494 1.485862 166 42 NaN 91 0.659344 1.501829 220 10 NaN 92 0.684786 1.517401 193 9 NaN -------------------------------------------------------------------------------- Late_vs_Early_MDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDS_TMDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- pathway pval padj ES \ 0 KEGG_CALCIUM_SIGNALING_PATHWAY 0.008573 0.029775 0.754625 1 KEGG_NEUROACTIVE_LIGAND_RECEPTOR_INTERACTION 0.014888 0.029775 0.658241 3 KEGG_PATHWAYS_IN_CANCER 0.112269 0.149692 0.644616 NES nMoreExtreme size leadingEdge 0 1.639496 66 6 NaN 1 1.595228 123 9 NaN 3 1.330025 850 5 NaN -------------------------------------------------------------------------------- Late_vs_Early_TPDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TPDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TMDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- pathway pval padj \ 3 KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_DEGRADATION 0.033410 0.211598 8 KEGG_PROPANOATE_METABOLISM 0.008338 0.158413 18 KEGG_PEROXISOME 0.047454 0.225405 19 KEGG_VASCULAR_SMOOTH_MUSCLE_CONTRACTION 0.028779 0.211598 20 KEGG_FOCAL_ADHESION 0.006534 0.158413 23 KEGG_TIGHT_JUNCTION 0.017152 0.162940 25 KEGG_COMPLEMENT_AND_COAGULATION_CASCADES 0.014672 0.162940 27 KEGG_REGULATION_OF_ACTIN_CYTOSKELETON 0.046582 0.225405 ES NES nMoreExtreme size leadingEdge 3 -0.572985 -1.602055 115 13 NaN 8 -0.807692 -1.748648 32 6 NaN 18 -0.798469 -1.512936 204 4 NaN 19 0.756585 1.518011 173 6 NaN 20 0.707873 1.690223 41 11 NaN 23 0.648743 1.618783 111 13 NaN 25 0.544226 1.599616 102 26 NaN 27 0.680449 1.488905 290 8 NaN
In [4]:
# Save consolidated pathways for each of the analyses
conditions = ['S4_vs_S1', 'Late_vs_Early']
for cond in conditions:
c_df = pd.DataFrame()
for f in files:
if cond in f and 'MetabolicPathways' in f:
df = pd.read_csv(os.path.join(fig_dir, f))
c_df = pd.concat([c_df, df])
u.dp([f])
print(df[df['padj'] < 0.25])
c_df.to_csv(os.path.join(output_dir, f'{cond}_MetabolicPathways.tsv'), sep='\t', index=False)
-------------------------------------------------------------------------------- Late_vs_Early_TMDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TMDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- pathway pval padj ES \ 4 Valine, Leucine and Isoleucine Metabolism 0.025925 0.129625 -0.669388 NES nMoreExtreme size leadingEdge 4 -1.673824 95 9 NaN -------------------------------------------------------------------------------- Late_vs_Early_All_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- pathway pval padj ES NES \ 25 O-Glycan Biosynthesis 0.001418 0.060993 -0.940797 -1.851958 nMoreExtreme size leadingEdge 25 4 4 NaN -------------------------------------------------------------------------------- Late_vs_Early_TPDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDE_TMDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TPDE_TMDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TPDS_TMDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDS_TMDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TPDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: []
In [ ]: