Notebook VAE Part 2 PanCan Figure 5
Print out and look at significant pathways¶
In [1]:
# Need to add in entrez gene ID and also make labels for genes for ORA
# Imports
import pandas as pd
import os
from sciutil import SciUtil
# Setup file locations and label of the cancer
u = SciUtil()
cancer = 'PanCan'
input_dir = 'Input_RCM'
output_dir = 'Output_Data'
supp_dir = 'Required_Refs'
fig_dir = 'Output_Figures'
regLabel = 'RG2_Changes_filtered'
files = [f for f in os.listdir(fig_dir) if 'GSEA' in f]
for f in files:
df = pd.read_csv(os.path.join(fig_dir, f))
u.dp([f])
print(df[df['padj'] < 0.25])
-------------------------------------------------------------------------------- Late_vs_Early_TPDE_TMDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TMDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TMDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TPDS_TMDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDE_TMDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- pathway pval padj \ 0 KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS 0.000234 0.001262 1 KEGG_ASCORBATE_AND_ALDARATE_METABOLISM 0.000236 0.001262 2 KEGG_STEROID_HORMONE_BIOSYNTHESIS 0.000476 0.002203 4 KEGG_STARCH_AND_SUCROSE_METABOLISM 0.000234 0.001262 5 KEGG_RETINOL_METABOLISM 0.000234 0.001262 6 KEGG_PORPHYRIN_AND_CHLOROPHYLL_METABOLISM 0.000236 0.001262 7 KEGG_METABOLISM_OF_XENOBIOTICS_BY_CYTOCHROME_P450 0.000239 0.001262 8 KEGG_DRUG_METABOLISM_CYTOCHROME_P450 0.000236 0.001262 9 KEGG_DRUG_METABOLISM_OTHER_ENZYMES 0.000954 0.003922 30 KEGG_REGULATION_OF_ACTIN_CYTOSKELETON 0.044621 0.165097 ES NES nMoreExtreme size leadingEdge 0 -0.936691 -2.185990 0 7 NaN 1 -0.905553 -2.202955 0 8 NaN 2 -0.778405 -2.155729 1 12 NaN 4 -0.936691 -2.185990 0 7 NaN 5 -0.936691 -2.185990 0 7 NaN 6 -0.938040 -2.281989 0 8 NaN 7 -0.824223 -2.219805 0 11 NaN 8 -0.903212 -2.197261 0 8 NaN 9 -0.822884 -2.072998 3 9 NaN 30 0.752342 1.481521 252 5 NaN -------------------------------------------------------------------------------- Late_vs_Early_All_GSEA_Pathways.csv -------------------------------------------------------------------------------- pathway pval padj \ 2 KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS 0.000356 0.009637 4 KEGG_ASCORBATE_AND_ALDARATE_METABOLISM 0.000390 0.009637 6 KEGG_STEROID_HORMONE_BIOSYNTHESIS 0.000510 0.009637 12 KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_DEGRADATION 0.002042 0.025059 20 KEGG_GLUTATHIONE_METABOLISM 0.038812 0.238163 21 KEGG_STARCH_AND_SUCROSE_METABOLISM 0.000479 0.009637 22 KEGG_O_GLYCAN_BIOSYNTHESIS 0.000571 0.009637 29 KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_... 0.014849 0.125286 31 KEGG_PROPANOATE_METABOLISM 0.013635 0.122711 32 KEGG_BUTANOATE_METABOLISM 0.024190 0.178641 36 KEGG_RETINOL_METABOLISM 0.000510 0.009637 37 KEGG_PORPHYRIN_AND_CHLOROPHYLL_METABOLISM 0.000410 0.009637 39 KEGG_METABOLISM_OF_XENOBIOTICS_BY_CYTOCHROME_P450 0.000719 0.009705 40 KEGG_DRUG_METABOLISM_CYTOCHROME_P450 0.000667 0.009705 41 KEGG_DRUG_METABOLISM_OTHER_ENZYMES 0.003833 0.039807 43 KEGG_DNA_REPLICATION 0.000430 0.009637 48 KEGG_MAPK_SIGNALING_PATHWAY 0.041329 0.242583 63 KEGG_VASCULAR_SMOOTH_MUSCLE_CONTRACTION 0.002765 0.031103 68 KEGG_TGF_BETA_SIGNALING_PATHWAY 0.010533 0.101567 75 KEGG_GAP_JUNCTION 0.025142 0.178641 76 KEGG_COMPLEMENT_AND_COAGULATION_CASCADES 0.018640 0.148027 91 KEGG_LONG_TERM_DEPRESSION 0.029724 0.191085 92 KEGG_OLFACTORY_TRANSDUCTION 0.026492 0.178820 ES NES nMoreExtreme size leadingEdge 2 -0.942764 -2.444498 0 8 NaN 4 -0.884508 -2.469377 0 10 NaN 6 -0.752112 -2.490970 0 17 NaN 12 -0.585282 -1.938436 3 17 NaN 20 -0.482920 -1.542887 80 15 NaN 21 -0.758256 -2.422559 0 15 NaN 22 -0.940797 -1.876084 1 4 NaN 29 -0.854682 -1.704360 51 4 NaN 31 -0.639334 -1.784897 34 10 NaN 32 -0.584562 -1.683046 58 11 NaN 36 -0.664802 -2.201801 0 17 NaN 37 -0.873801 -2.515809 0 11 NaN 39 -0.615137 -2.367093 0 29 NaN 40 -0.595699 -2.266101 0 27 NaN 41 -0.612114 -1.955648 7 15 NaN 43 -0.761750 -2.252539 0 12 NaN 48 0.491306 1.432069 361 32 NaN 63 0.614874 1.680516 22 23 NaN 68 0.631382 1.606041 83 16 NaN 75 0.609436 1.527186 198 15 NaN 76 0.489494 1.485862 166 42 NaN 91 0.659344 1.501829 220 10 NaN 92 0.684786 1.517401 193 9 NaN -------------------------------------------------------------------------------- Late_vs_Early_MDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDS_TMDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- pathway pval padj ES \ 0 KEGG_CALCIUM_SIGNALING_PATHWAY 0.008573 0.029775 0.754625 1 KEGG_NEUROACTIVE_LIGAND_RECEPTOR_INTERACTION 0.014888 0.029775 0.658241 3 KEGG_PATHWAYS_IN_CANCER 0.112269 0.149692 0.644616 NES nMoreExtreme size leadingEdge 0 1.639496 66 6 NaN 1 1.595228 123 9 NaN 3 1.330025 850 5 NaN -------------------------------------------------------------------------------- Late_vs_Early_TPDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TMDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- pathway pval padj ES \ 4 Valine, Leucine and Isoleucine Metabolism 0.025925 0.129625 -0.669388 NES nMoreExtreme size leadingEdge 4 -1.673824 95 9 NaN -------------------------------------------------------------------------------- Late_vs_Early_All_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- pathway pval padj ES NES \ 25 O-Glycan Biosynthesis 0.001418 0.060993 -0.940797 -1.851958 nMoreExtreme size leadingEdge 25 4 4 NaN -------------------------------------------------------------------------------- Late_vs_Early_MDE_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TPDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Stage_IV_vs_Stage_I_All_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- pathway pval padj ES NES \ 10 Fatty Acid Metabolism 0.007258 0.156043 -0.845232 -1.840108 25 O-Glycan Biosynthesis 0.007107 0.156043 -0.875318 -1.751518 nMoreExtreme size leadingEdge 10 22 5 NaN 25 23 4 NaN -------------------------------------------------------------------------------- Late_vs_Early_MDE_TMDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TPDE_TMDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDS_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TPDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_TMDS_GSEA_Pathways.csv -------------------------------------------------------------------------------- pathway pval padj \ 3 KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_DEGRADATION 0.033410 0.211598 8 KEGG_PROPANOATE_METABOLISM 0.008338 0.158413 18 KEGG_PEROXISOME 0.047454 0.225405 19 KEGG_VASCULAR_SMOOTH_MUSCLE_CONTRACTION 0.028779 0.211598 20 KEGG_FOCAL_ADHESION 0.006534 0.158413 23 KEGG_TIGHT_JUNCTION 0.017152 0.162940 25 KEGG_COMPLEMENT_AND_COAGULATION_CASCADES 0.014672 0.162940 27 KEGG_REGULATION_OF_ACTIN_CYTOSKELETON 0.046582 0.225405 ES NES nMoreExtreme size leadingEdge 3 -0.572985 -1.602055 115 13 NaN 8 -0.807692 -1.748648 32 6 NaN 18 -0.798469 -1.512936 204 4 NaN 19 0.756585 1.518011 173 6 NaN 20 0.707873 1.690223 41 11 NaN 23 0.648743 1.618783 111 13 NaN 25 0.544226 1.599616 102 26 NaN 27 0.680449 1.488905 290 8 NaN -------------------------------------------------------------------------------- Late_vs_Early_TPDS_TMDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDS_TMDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Late_vs_Early_MDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: [] -------------------------------------------------------------------------------- Stage_IV_vs_Stage_I_All_GSEA_Pathways.csv -------------------------------------------------------------------------------- pathway pval padj \ 2 KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS 0.000378 0.014308 4 KEGG_ASCORBATE_AND_ALDARATE_METABOLISM 0.000415 0.014308 6 KEGG_STEROID_HORMONE_BIOSYNTHESIS 0.000530 0.014308 12 KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_DEGRADATION 0.003710 0.055644 20 KEGG_GLUTATHIONE_METABOLISM 0.035910 0.217432 21 KEGG_STARCH_AND_SUCROSE_METABOLISM 0.000998 0.019238 22 KEGG_O_GLYCAN_BIOSYNTHESIS 0.008688 0.081081 29 KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_... 0.007490 0.077776 36 KEGG_RETINOL_METABOLISM 0.009009 0.081081 37 KEGG_PORPHYRIN_AND_CHLOROPHYLL_METABOLISM 0.000428 0.014308 39 KEGG_METABOLISM_OF_XENOBIOTICS_BY_CYTOCHROME_P450 0.000763 0.017162 40 KEGG_DRUG_METABOLISM_CYTOCHROME_P450 0.004342 0.058611 41 KEGG_DRUG_METABOLISM_OTHER_ENZYMES 0.007481 0.077776 43 KEGG_DNA_REPLICATION 0.000448 0.014308 46 KEGG_MISMATCH_REPAIR 0.015007 0.119172 48 KEGG_MAPK_SIGNALING_PATHWAY 0.039364 0.217432 51 KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION 0.006874 0.077776 56 KEGG_OOCYTE_MEIOSIS 0.031453 0.204915 60 KEGG_ENDOCYTOSIS 0.012401 0.104631 63 KEGG_VASCULAR_SMOOTH_MUSCLE_CONTRACTION 0.030141 0.204915 68 KEGG_TGF_BETA_SIGNALING_PATHWAY 0.017494 0.131203 75 KEGG_GAP_JUNCTION 0.040265 0.217432 81 KEGG_JAK_STAT_SIGNALING_PATHWAY 0.043424 0.225472 82 KEGG_HEMATOPOIETIC_CELL_LINEAGE 0.039454 0.217432 91 KEGG_LONG_TERM_DEPRESSION 0.031876 0.204915 92 KEGG_OLFACTORY_TRANSDUCTION 0.002540 0.042859 ES NES nMoreExtreme size leadingEdge 2 -0.898580 -2.329636 0 8 NaN 4 -0.809543 -2.262086 0 10 NaN 6 -0.731958 -2.432592 0 17 NaN 12 -0.593754 -1.973285 6 17 NaN 20 -0.498527 -1.594581 71 15 NaN 21 -0.670604 -2.144984 1 15 NaN 22 -0.875318 -1.739470 28 4 NaN 29 -0.885899 -1.760496 24 4 NaN 36 -0.537028 -1.784761 16 17 NaN 37 -0.774664 -2.234936 0 11 NaN 39 -0.575897 -2.219468 0 29 NaN 40 -0.506121 -1.913013 5 27 NaN 41 -0.580631 -1.857198 14 15 NaN 43 -0.847143 -2.505850 0 12 NaN 46 -0.763810 -1.768182 43 6 NaN 48 0.489639 1.434607 343 32 NaN 51 0.490159 1.545325 62 51 NaN 56 -0.463758 -1.578369 57 18 NaN 60 0.591450 1.582008 102 20 NaN 63 0.537426 1.481752 253 23 NaN 68 0.613004 1.559851 140 16 NaN 75 0.586990 1.470216 321 15 NaN 81 0.573911 1.460375 349 16 NaN 82 0.578377 1.471739 317 16 NaN 91 0.654310 1.481621 241 10 NaN 92 0.763859 1.687205 18 9 NaN -------------------------------------------------------------------------------- Late_vs_Early_TPDE_GSEA_MetabolicPathways.csv -------------------------------------------------------------------------------- Empty DataFrame Columns: [pathway, pval, padj, ES, NES, nMoreExtreme, size, leadingEdge] Index: []
In [ ]: