Notebook for Methylation processing¶
Build the DNA methylation data frame¶
Here we build the dataframe and also add in any patient information.
We filter out poor quality samples and also create teh dataset for differential analysis.
import os
import pandas as pd
from collections import defaultdict
from multiprocessing.dummy import Pool as ThreadPool
base_dir = '../data/'
data_dir = f'{base_dir}raw_downloads/CPTAC/Methylation_hg38_v1.0_20180809/' # Data downloaded from CPTAC
output_dir = f'{base_dir}sircle/F1_DE_input_TvN/'
fig_dir = '../figures/'
supp_dir = f'{base_dir}raw_downloads/supps/'
gene_name = 'hgnc_symbol'
save_fig = False
Had issues with the downloaded merged methylation file, so downloaded the unmerged files¶
from sciutil import SciUtil
u = SciUtil()
u.dp(['Running multiprocessing...'])
-------------------------------------------------------------------------------- Running multiprocessing... --------------------------------------------------------------------------------
files
['C3N_00314.T.csv.gz', 'C3N_00315.T.csv.gz', 'C3N_01220.T.csv.gz', 'C3N_01361.T.csv.gz', 'C3L_01557.T.csv.gz', 'C3L_01281.T.csv.gz', 'C3L_00817.T.csv.gz', 'C3N_00852.T.csv.gz', 'C3L_01607.T.csv.gz', 'C3L_01352.T.csv.gz', 'C3L_00561.T.csv.gz', 'C3L_00103.T.csv.gz', 'C3N_00320.T.csv.gz', 'C3N_01214.T.csv.gz', 'C3N_01648.T.csv.gz', 'C3N_01649.T.csv.gz', 'C3N_00242.T.csv.gz', 'C3N_00832.T.csv.gz', 'C3N_00390.T.csv.gz', 'C3L_00800.T.csv.gz', 'C3L_00908.T.csv.gz', 'C3L_00902.T.csv.gz', 'C3L_01836.T.csv.gz', 'Manifest.txt', 'C3N_00150.T.csv.gz', 'C3L_00766.T.csv.gz', 'C3N_00577.T.csv.gz', 'C3N_00244.T.csv.gz', 'C3N_00834.T.csv.gz', 'C3N_00305.T.csv.gz', 'C3N_01808.T.csv.gz', 'C3N_00437.T.csv.gz', 'C3L_00011.T.csv.gz', 'C3L_00010.T.csv.gz', 'C3N_00168.T.csv.gz', 'C3N_00491.T.csv.gz', 'C3L_00088.T.csv.gz', 'C3L_00447.T.csv.gz', 'C3N_00313.T.csv.gz', 'C3N_00312.T.csv.gz', 'C3N_00646.T.csv.gz', 'C3N_00380.T.csv.gz', 'C3L_00088.N.csv.gz', 'C3L_00799.T.csv.gz', 'C3L_01287.T.csv.gz', 'C3L_01286.T.csv.gz', 'C3N_01176.T.csv.gz', 'C3L_00369.T.csv.gz', 'C3L_00792.T.csv.gz', 'C3L_00011.N.csv.gz', 'C3L_00010.N.csv.gz', 'C3N_00733.T.csv.gz', 'C3L_00583.T.csv.gz', 'C3L_00418.T.csv.gz', 'C3N_01213.T.csv.gz', 'C3N_00494.T.csv.gz', 'C3N_00495.T.csv.gz', 'C3L_00765.T.csv.gz', 'C3N_01200.T.csv.gz', 'C3N_01180.T.csv.gz', 'C3N_00573.T.csv.gz', 'C3L_00610.T.csv.gz', 'C3N_00831.T.csv.gz', 'C3L_00448.T.csv.gz', 'C3N_01261.T.csv.gz', 'C3L_00359.T.csv.gz', 'C3L_00796.T.csv.gz', 'C3N_00953.T.csv.gz', 'C3L_01560.T.csv.gz', 'C3L_01313.T.csv.gz', 'C3L_01288.T.csv.gz', 'C3N_01524.T.csv.gz', 'C3N_01179.T.csv.gz', 'C3N_01178.T.csv.gz', 'C3L_00917.T.csv.gz', 'Processing.txt', 'C3L_01885.T.csv.gz', 'C3N_00317.T.csv.gz', 'C3L_00606.T.csv.gz', 'C3L_00607.T.csv.gz', 'C3L_00416.T.csv.gz', 'C3L_01283.T.csv.gz', 'C3L_00814.T.csv.gz', 'C3L_01861.T.csv.gz', 'C3N_00194.T.csv.gz', 'C3L_00791.T.csv.gz', 'C3L_00790.T.csv.gz', 'C3L_00079.T.csv.gz', 'C3L_00360.T.csv.gz', 'C3L_00910.T.csv.gz', 'C3L_00581.T.csv.gz', 'C3N_01522.T.csv.gz', 'C3L_01882.T.csv.gz', 'C3N_00310.T.csv.gz', 'C3N_00177.T.csv.gz', 'C3L_01553.T.csv.gz', 'C3N_00148.T.csv.gz', 'C3N_00149.T.csv.gz', 'C3L_00812.T.csv.gz', 'C3L_00813.T.csv.gz', 'C3N_01646.T.csv.gz', 'C3N_01175.T.csv.gz', 'C3L_01603.T.csv.gz', 'C3L_00004.T.csv.gz', 'C3L_00097.T.csv.gz', 'C3L_00026.N.csv.gz', 'C3L_00096.T.csv.gz', 'C3L_00026.T.csv.gz', 'C3L_00097.N.csv.gz', 'C3L_00096.N.csv.gz', 'C3L_00004.N.csv.gz', 'C3L_01302.T.csv.gz', 'C3N_00154.T.csv.gz', 'C3L_00907.T.csv.gz', 'C3N_00492.T.csv.gz', 'C3L_00183.T.csv.gz', 'C3N_00246.T.csv.gz', 'C3N_00435.T.csv.gz', 'C3N_01651.T.csv.gz', 'C3L_00079.N.csv.gz']
#"Locus","Beta","chr","pos","strand","Name","Probe_rs","Probe_maf","CpG_rs","CpG_maf","SBE_rs","SBE_maf","Islands_Name","Relation_to_Island","UCSC_RefGene_Group","Phantom4_Enhancers","Phantom5_Enhancers","DMR","X450k_Enhancer","HMM_Island","Regulatory_Feature_Name","Regulatory_Feature_Group","GencodeBasicV12_NAME","GencodeBasicV12_Accession","GencodeBasicV12_Group","GencodeCompV12_NAME","GencodeCompV12_Accession","GencodeCompV12_Group","DNase_Hypersensitivity_NAME","DNase_Hypersensitivity_Evidence_Count","OpenChromatin_NAME","OpenChromatin_Evidence_Count","TFBS_NAME","TFBS_Evidence_Count","Methyl27_Loci","Methyl450_Loci","Random_Loci"
#"cg00000029",0.698308967012916,"chr16",53434200,"+","cg00000029",NA,NA,NA,NA,NA,NA,"chr16:53434372-53435297","N_Shore","TSS1500",NA,NA,"",NA,NA,"16:53433926-53435773","Promoter_Associated","RBL2","ENST00000262133.6","TSS1500","RBL2;RBL2;RBL2;RBL2","ENST00000262133.6;ENST00000567964.1;ENST00000544405.2;ENST00000567964.1","TSS1500;1stExon;TSS1500;5'UTR","chr16:53434093-53434558",3,NA,NA,NA,NA,NA,TRUE,NA
sample_df = pd.DataFrame()
case_ids = []
condition_labels = []
condition_ids = []
sample_id = []
first = True
cpg_df = pd.DataFrame()
files = list(os.listdir(f'{data_dir}'))
# We want to make a sample DF and also merge the reils together
for f in files:
try:
if 'Manifest' not in f and 'Processing' not in f:
tmp_f = pd.read_csv(f'{data_dir}{f}')
tumour_state = "Tumor" if f.split('.')[1] == "T" else "Normal"
case_id = f.split('.')[0].replace("_", '-')
cond_id = 1 if tumour_state == "Tumor" else 0
new_name = f'{case_id}_{tumour_state}'
case_ids.append(case_id)
condition_labels.append(tumour_state)
condition_ids.append(cond_id)
sample_id.append(new_name)
# Now we want to add to the df
if first:
for c in tmp_f.columns:
if c != 'Beta':
cpg_df[c] = tmp_f[c].values
first = False
cpg_df[new_name] = tmp_f['Beta'].values
u.dp([f])
except:
print(f)
# Run in paralell
# pool = ThreadPool(20)
# results = pool.map(run_multi, files)
/Users/ariane/opt/miniconda3/envs/clean_ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py:3072: DtypeWarning: Columns (8,10,15,17,19,34,35,36) have mixed types.Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
-------------------------------------------------------------------------------- C3N_00314.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00315.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01220.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01361.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01557.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01281.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00817.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00852.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01607.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01352.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00561.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00103.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00320.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01214.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01648.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01649.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00242.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00832.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00390.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00800.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00908.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00902.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01836.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00150.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00766.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00577.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00244.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00834.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00305.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01808.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00437.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00011.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00010.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00168.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00491.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00088.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00447.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00313.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00312.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00646.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00380.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00088.N.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00799.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01287.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01286.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01176.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00369.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00792.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00011.N.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00010.N.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00733.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00583.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00418.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01213.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00494.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00495.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00765.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01200.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01180.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00573.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00610.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00831.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00448.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01261.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00359.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00796.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00953.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01560.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01313.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01288.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01524.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01179.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01178.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00917.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01885.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00317.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00606.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00607.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00416.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01283.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00814.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01861.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00194.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00791.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00790.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00079.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00360.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00910.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00581.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01522.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01882.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00310.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00177.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01553.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00148.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00149.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00812.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00813.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01646.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01175.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01603.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00004.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00097.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00026.N.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00096.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00026.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00097.N.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00096.N.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00004.N.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_01302.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00154.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00907.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00492.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00183.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00246.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_00435.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3N_01651.T.csv.gz -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- C3L_00079.N.csv.gz --------------------------------------------------------------------------------
Methylation summary from the supplementary files of CPATC:¶
Supplemental table 1 info shows there are no normal samples, but there are some Table: Sample size of different data sets after excluding samples with low quality.
| CCRCC-tumor | CCRCC-NAT* | Non-CCRCC tumor | Non-CCRCC NAT | |
|-----------------|------------|-----------------|---------------|----|
| Protein/phospho | 103 | 80 | 7 | 3 |
| CNV/Mutation | 103 | 80 | 7 | 3 |
| RNA | 103 | 72 | 7 | 2 |
| Methylation | 100** | NA | 7 | NA |
import os
import pandas as pd
from collections import defaultdict
files = os.listdir(f'{data_dir}')
#"Locus","Beta","chr","pos","strand","Name","Probe_rs","Probe_maf","CpG_rs","CpG_maf","SBE_rs","SBE_maf","Islands_Name","Relation_to_Island","UCSC_RefGene_Group","Phantom4_Enhancers","Phantom5_Enhancers","DMR","X450k_Enhancer","HMM_Island","Regulatory_Feature_Name","Regulatory_Feature_Group","GencodeBasicV12_NAME","GencodeBasicV12_Accession","GencodeBasicV12_Group","GencodeCompV12_NAME","GencodeCompV12_Accession","GencodeCompV12_Group","DNase_Hypersensitivity_NAME","DNase_Hypersensitivity_Evidence_Count","OpenChromatin_NAME","OpenChromatin_Evidence_Count","TFBS_NAME","TFBS_Evidence_Count","Methyl27_Loci","Methyl450_Loci","Random_Loci"
#"cg00000029",0.698308967012916,"chr16",53434200,"+","cg00000029",NA,NA,NA,NA,NA,NA,"chr16:53434372-53435297","N_Shore","TSS1500",NA,NA,"",NA,NA,"16:53433926-53435773","Promoter_Associated","RBL2","ENST00000262133.6","TSS1500","RBL2;RBL2;RBL2;RBL2","ENST00000262133.6;ENST00000567964.1;ENST00000544405.2;ENST00000567964.1","TSS1500;1stExon;TSS1500;5'UTR","chr16:53434093-53434558",3,NA,NA,NA,NA,NA,TRUE,NA
n_c = 0
t_c = 0
# We want to make a sample DF and also merge the reils together
for f in files:
try:
t = f.split('.')[1]
if t == 'N':
n_c += 1
elif t == 'T':
t_c += 1
except:
print(f)
print(n_c, t_c)
8 110
Add in sample information¶
Here we get the cases from the methylation and find out the different attributes of the patients
cases = [c.split('.')[0].replace('_', '-') for c in files]
conds = ['Tumor' if c.split('.')[1] == 'T' else 'Not_ccRCC' for c in files]
# Read in the sample clinical information
clin_df = pd.read_csv(f'{output_dir}clinical_sircle.csv')
# Create a methylation sample data frame for each of the tumours
sample_df = pd.DataFrame()
sample_df['SampleId'] = [f'{c}_{conds[i]}' for i, c in enumerate(cases)]
sample_df['CondId'] = [1 if c == 'Tumor' else 0 for c in conds]
sample_df['CondName'] = conds
sample_df['CaseId'] = cases
sample_df['SafeCases'] = [c.replace('-', '.') for c in cases]
sample_df['FullLabel'] = sample_df['SampleId'].values
## -------- DNA methylation
new_full_label_map = {}
new_full_label = []
for full_label in sample_df['FullLabel'].values:
new_label = f'CpG_{full_label.split("_")[1]}_{full_label.split("_")[0].replace("-", ".")}_1'
new_full_label.append(new_label)
new_full_label_map[full_label] = new_label
sample_df['FullLabel'] = new_full_label
clin_df = pd.read_csv(f'{output_dir}clinical_sircle.csv')
# We want to make a RNAseq clinical dataframe using the case IDs in the labels
sample_df = sample_df.set_index("CaseId").join(clin_df.set_index("case_id"), how="left", rsuffix='_')
sample_df.to_csv(f'{output_dir}CCRCC_Clark_Cell2019_meth_sample_df.csv')
sample_df['CondName'].value_counts()
Tumor 110 Not_ccRCC 10 Name: CondName, dtype: int64
Remove duplicates CpGs¶
# Have a look at how many are in each region (CpG island, shore etc)
print(cpg_df['Relation_to_Island'].value_counts())
cpg_df = cpg_df.drop_duplicates(subset='Locus') # Remove duplicates from the locus
OpenSea 478827 Island 195866 N_Shore 94323 S_Shore 86237 N_Shelf 32905 S_Shelf 29502 Name: Relation_to_Island, dtype: int64
Get paired samples and check correlations/PCA between patients¶
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
all_cases = [c for c in cpg_df.columns if 'C3' in c]
corr = cpg_df[all_cases].corr()
sns.clustermap(corr,
xticklabels=corr.columns.values,
yticklabels=corr.columns.values, cmap='RdBu_r', row_cluster=True, col_cluster=True)
if save_fig:
plt.savefig(f'{fig_dir}Heatmap_CpGs.svg')
plt.show()
# Print out the minimum correlation:
mean_cor = np.mean(corr, axis=1)
corr['mean_corr'] = mean_cor
corr.sort_values(by=['mean_corr'])
# Plot out the mean correlation values so we can choose a good filter.
plt.hist(mean_cor, bins=20)
plt.title(f'min corr: {min(np.min(corr))}')
/Users/ariane/opt/miniconda3/envs/clean_ml/lib/python3.6/site-packages/seaborn/matrix.py:649: UserWarning: Clustering large matrix with scipy. Installing `fastcluster` may give better performance. warnings.warn(msg)
Text(0.5, 1.0, 'min corr: -0.08916083103700168')
from sciutil import SciUtil
u = SciUtil()
# Actually remove those columns! (But make sure you keep the ID column)
corr_sorted = corr.sort_values(by=['mean_corr'])
corr_sorted = corr_sorted[corr_sorted['mean_corr'] < 0.75]
u.dp([len(corr_sorted), 'patients with avg. correlations less than 75. Filtering out these samples, and printing cases.'])
cols_to_omit = [c for c in corr_sorted.index if 'C3' in c]
case_ids = [c.split('_')[0] for c in corr_sorted.index if 'C3' in c]
print('\n'.join(case_ids))
print('\n'.join(cols_to_omit))
cols_to_keep = [c for c in cpg_df.columns if c not in cols_to_omit]
cpg_filtered = cpg_df[cols_to_keep]
# Fill NAs with 0's
cpg_filtered = cpg_filtered.fillna(0)
value_cols = [c for c in cols_to_keep if 'C3' in c]
#Check what the heatmap looks like again
corr = cpg_filtered[value_cols].corr()
sns.clustermap(corr,
xticklabels=corr.columns.values,
yticklabels=corr.columns.values, cmap='RdBu_r', row_cluster=True, col_cluster=True)
if save_fig:
plt.savefig(f'{fig_dir}Heatmap_CpG_removed_corr-leq-0.75.svg')
# Case ids to remove
print(case_ids)
-------------------------------------------------------------------------------- 3 patients with avg. correlations less than 75. Filtering out these samples, and printing cases. -------------------------------------------------------------------------------- C3L-01885 C3L-01882 C3L-01281 C3L-01885_Tumor C3L-01882_Tumor C3L-01281_Tumor
/Users/ariane/opt/miniconda3/envs/clean_ml/lib/python3.6/site-packages/seaborn/matrix.py:649: UserWarning: Clustering large matrix with scipy. Installing `fastcluster` may give better performance. warnings.warn(msg)
['C3L-01885', 'C3L-01882', 'C3L-01281']
# Filter the cpg df
cpg_filtered = cpg_df[[c for c in cpg_df if c not in ['C3L-01885_Tumor', 'C3L-01882_Tumor', 'C3L-01281_Tumor']]]
# We want to just keep part of the annotated information they provide
cpg_filtered_min = cpg_filtered[['Locus', "GencodeCompV12_Accession", "GencodeCompV12_NAME", "GencodeCompV12_Group"]]
cpg_filtered_min = cpg_filtered_min[cpg_filtered_min['GencodeCompV12_NAME'].notnull()]
print(cpg_filtered_min["GencodeCompV12_Group"].value_counts())
cpg_filtered_min
5'UTR 48049 TSS1500 47093 3'UTR 40288 TSS200 24880 5'UTR;5'UTR 22031 ... 5'UTR;1stExon;1stExon;1stExon;5'UTR;TSS200;TSS200;5'UTR;5'UTR;5'UTR;5'UTR;5'UTR;5'UTR;5'UTR 1 1stExon;1stExon;1stExon;1stExon;1stExon;5'UTR;5'UTR;5'UTR;5'UTR;TSS200;TSS200;TSS200;TSS200;TSS200;TSS200;5'UTR 1 TSS1500;TSS1500;TSS1500;TSS200;3'UTR;TSS1500;TSS1500;5'UTR 1 3'UTR;TSS1500;TSS1500;TSS1500;TSS1500;1stExon;1stExon;1stExon;1stExon;5'UTR 1 5'UTR;5'UTR;1stExon;5'UTR;1stExon;1stExon;1stExon;TSS200;TSS200;TSS200;5'UTR;5'UTR;5'UTR;5'UTR 1 Name: GencodeCompV12_Group, Length: 44695, dtype: int64
Locus | GencodeCompV12_Accession | GencodeCompV12_NAME | GencodeCompV12_Group | |
---|---|---|---|---|
0 | cg00000029 | ENST00000262133.6;ENST00000567964.1;ENST000005... | RBL2;RBL2;RBL2;RBL2 | TSS1500;1stExon;TSS1500;5'UTR |
1 | cg00000109 | ENST00000478016.1;ENST00000469491.1 | FNDC3B;FNDC3B | 5'UTR;5'UTR |
2 | cg00000155 | ENST00000459068.1;ENST00000467558.1;ENST000004... | snoU13;BRAT1;BRAT1;BRAT1 | 1stExon;3'UTR;3'UTR;5'UTR |
3 | cg00000158 | ENST00000375629.3 | IARS | 5'UTR |
5 | cg00000221 | ENST00000572321.1 | ANKFN1 | TSS200 |
... | ... | ... | ... | ... |
917631 | ch.X.665616F | ENST00000453805.2 | USP9X | 3'UTR |
917637 | ch.X.772253F | ENST00000490869.1;ENST00000456273.1 | UBA1;INE1 | 5'UTR;TSS1500 |
917638 | ch.X.772254F | ENST00000490869.1;ENST00000456273.1 | UBA1;INE1 | 5'UTR;TSS1500 |
917639 | ch.X.798879F | ENST00000496365.1;ENST00000467954.1;ENST000004... | FTSJ1;FTSJ1;FTSJ1;FTSJ1;FTSJ1;FTSJ1;FTSJ1 | 5'UTR;5'UTR;5'UTR;3'UTR;3'UTR;3'UTR;3'UTR |
917642 | ch.X.881546R | ENST00000342160.3;ENST00000262854.6 | HUWE1;HUWE1 | 5'UTR;5'UTR |
586055 rows × 4 columns
Add entrez gene ID and ensembl gene ID for the gene names¶
annotation_file = f'{supp_dir}hsapiens_gene_ensembl-GRCh38.p13.csv'
annot = pd.read_csv(annotation_file)
# Merge the Hg37 with Hg38
hg37_annot = pd.read_csv(f'{supp_dir}hsapiens_gene_ensembl-GRCh37.p13.csv') # Read in hg37 since that is what
# it appears to be in whilst all the rest are in HG38.
hg37_annot = hg37_annot.merge(annot, on="ensembl_gene_id", how="inner", suffixes=('_hg37', '_hg38'))
# What we want is an ensembl ID mapping for all. basically let's go through and map all the ensembl t ids
enst_to_ensembl_gene_hg38 = dict(zip(hg37_annot['ensembl_transcript_id'], hg37_annot['ensembl_gene_id']))
# Try do a mapping from any possible gene name to ensembl gene ID
hgnc_to_ensembl_gene_hg38 = dict(zip(annot['hgnc_symbol'], annot['ensembl_gene_id']))
gene_name_to_ensembl_gene_hg38 = dict(zip(annot['external_gene_name'], annot['ensembl_gene_id']))
syn_to_ensembl_gene_hg38 = dict(zip(annot['external_synonym'], annot['ensembl_gene_id']))
hgnc37_to_ensembl_gene_hg38 = dict(zip(hg37_annot['hgnc_symbol_hg37'], hg37_annot['ensembl_gene_id']))
gene_name37_to_ensembl_gene_hg38 = dict(zip(hg37_annot['external_gene_name_hg37'], hg37_annot['ensembl_gene_id']))
syn37_to_ensembl_gene_hg38 = dict(zip(hg37_annot['external_synonym_hg37'], hg37_annot['ensembl_gene_id']))
print('Number of hg37 gene names:', len(set(hg37_annot['hgnc_symbol_hg37'].values)))
/Users/ariane/opt/miniconda3/envs/clean_ml/lib/python3.6/site-packages/IPython/core/interactiveshell.py:3072: DtypeWarning: Columns (2) have mixed types.Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
Number of hg37 gene names: 32361
Annotations can be collapsed, so to map between the hg37 and hg38 we uncollapse¶
cpg_filtered_min
Locus | GencodeCompV12_Accession | GencodeCompV12_NAME | GencodeCompV12_Group | |
---|---|---|---|---|
0 | cg00000029 | ENST00000262133.6;ENST00000567964.1;ENST000005... | RBL2;RBL2;RBL2;RBL2 | TSS1500;1stExon;TSS1500;5'UTR |
1 | cg00000109 | ENST00000478016.1;ENST00000469491.1 | FNDC3B;FNDC3B | 5'UTR;5'UTR |
2 | cg00000155 | ENST00000459068.1;ENST00000467558.1;ENST000004... | snoU13;BRAT1;BRAT1;BRAT1 | 1stExon;3'UTR;3'UTR;5'UTR |
3 | cg00000158 | ENST00000375629.3 | IARS | 5'UTR |
5 | cg00000221 | ENST00000572321.1 | ANKFN1 | TSS200 |
... | ... | ... | ... | ... |
917631 | ch.X.665616F | ENST00000453805.2 | USP9X | 3'UTR |
917637 | ch.X.772253F | ENST00000490869.1;ENST00000456273.1 | UBA1;INE1 | 5'UTR;TSS1500 |
917638 | ch.X.772254F | ENST00000490869.1;ENST00000456273.1 | UBA1;INE1 | 5'UTR;TSS1500 |
917639 | ch.X.798879F | ENST00000496365.1;ENST00000467954.1;ENST000004... | FTSJ1;FTSJ1;FTSJ1;FTSJ1;FTSJ1;FTSJ1;FTSJ1 | 5'UTR;5'UTR;5'UTR;3'UTR;3'UTR;3'UTR;3'UTR |
917642 | ch.X.881546R | ENST00000342160.3;ENST00000262854.6 | HUWE1;HUWE1 | 5'UTR;5'UTR |
586055 rows × 4 columns
# Map each row to a gene ID
values = cpg_filtered_min.values
gene_map = {}
for i, g in enumerate(cpg_filtered_min['Locus'].values):
gene_map[g] = values[i]
rows = []
order = {"TSS200": 1, "1stExon": 2, "5'UTR": 3, "TSS1500": 4, "3'UTR": 5, "ExonJunction": 6, "0": 7}
for g, vals in gene_map.items():
if ~isinstance(vals[2], float):
genes = vals[2].split(';')
transcripts = vals[1].split(';')
pos = vals[3].split(';')
visited = []
for i, g in enumerate(genes):
if g != '0' and g not in visited:
rows.append([vals[0], genes[i], transcripts[i], pos[i], order.get(pos[i])])
visited.append(g)
# Basically now want to remove the duplicates ordering by
new_df = pd.DataFrame(data=rows, columns=['Locus', 'GencodeCompV12_NAME_FILT', 'GencodeCompV12_Accession_FILT', 'GencodeCompV12_Group_FILT', 'Order'])
new_df = new_df.sort_values('Order', ascending=True).drop_duplicates('Locus')
new_df
Locus | GencodeCompV12_NAME_FILT | GencodeCompV12_Accession_FILT | GencodeCompV12_Group_FILT | Order | |
---|---|---|---|---|---|
632178 | cg23840643 | CSN1S2AP | ENST00000512167.1 | TSS200 | 1.0 |
493346 | cg17980423 | FLG2 | ENST00000388718.4 | TSS200 | 1.0 |
321048 | cg11478018 | CTD-2335O3.3 | ENST00000509423.1 | TSS200 | 1.0 |
606370 | cg22770352 | PEX12 | ENST00000225873.3 | TSS200 | 1.0 |
228986 | cg08064156 | RAB4A | ENST00000489342.1 | TSS200 | 1.0 |
... | ... | ... | ... | ... | ... |
725556 | cg27651452 | PLCB3 | ENST00000540288.1 | ExonBnd | NaN |
725843 | cg27663279 | CYP2W1 | ENST00000308919.7 | ExonBnd | NaN |
726092 | ch.10.2686225R | OAT | ENST00000539214.1 | ExonBnd | NaN |
726361 | ch.14.569218R | SAV1 | ENST00000555720.1 | ExonBnd | NaN |
726379 | ch.15.1310449F | MORF4L1 | ENST00000379535.4 | ExonBnd | NaN |
586055 rows × 5 columns
# Check that we have fixed up those gencode values
new_df["GencodeCompV12_Group_FILT"].value_counts()
5'UTR 180925 TSS1500 170449 3'UTR 103660 TSS200 85071 1stExon 40696 ExonBnd 5254 Name: GencodeCompV12_Group_FILT, dtype: int64
Add in the annotated info and join with CPATC data¶
filtered_c = cpg_filtered.set_index('Locus')
new_df = new_df.set_index("Locus")
filtered_cptac = new_df.join(filtered_c, how='inner')
filtered_cptac
GencodeCompV12_NAME_FILT | GencodeCompV12_Accession_FILT | GencodeCompV12_Group_FILT | Order | chr | pos | strand | Name | Probe_rs | Probe_maf | ... | C3L-00004_Normal | C3L-01302_Tumor | C3N-00154_Tumor | C3L-00907_Tumor | C3N-00492_Tumor | C3L-00183_Tumor | C3N-00246_Tumor | C3N-00435_Tumor | C3N-01651_Tumor | C3L-00079_Normal | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Locus | |||||||||||||||||||||
cg23840643 | CSN1S2AP | ENST00000512167.1 | TSS200 | 1.0 | chr4 | 70073863 | - | cg23840643 | NaN | NaN | ... | 0.803866 | 0.820861 | 0.692819 | 0.460177 | 0.822501 | 0.496993 | 0.828998 | 0.720203 | 0.623552 | 0.857658 |
cg17980423 | FLG2 | ENST00000388718.4 | TSS200 | 1.0 | chr1 | 152360009 | + | cg17980423 | NaN | NaN | ... | 0.902274 | 0.923112 | 0.921182 | 0.835816 | 0.879014 | 0.856317 | 0.886480 | 0.899418 | 0.880485 | 0.923162 |
cg11478018 | CTD-2335O3.3 | ENST00000509423.1 | TSS200 | 1.0 | chr5 | 51451039 | + | cg11478018 | NaN | NaN | ... | 0.849223 | 0.844487 | 0.647779 | 0.499653 | 0.901740 | 0.740788 | 0.832933 | 0.830111 | 0.748816 | 0.879334 |
cg22770352 | PEX12 | ENST00000225873.3 | TSS200 | 1.0 | chr17 | 35578706 | + | cg22770352 | rs321599 | 0.446905 | ... | 0.050360 | 0.058908 | 0.047284 | 0.045892 | 0.038892 | 0.048999 | 0.104711 | 0.057124 | 0.063113 | 0.049991 |
cg08064156 | RAB4A | ENST00000489342.1 | TSS200 | 1.0 | chr1 | 229286419 | - | cg08064156 | rs61825327 | 0.010512 | ... | 0.725614 | 0.771253 | 0.637772 | 0.397874 | 0.659367 | 0.564448 | 0.658909 | 0.713612 | 0.565489 | 0.752409 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
cg27651452 | PLCB3 | ENST00000540288.1 | ExonBnd | NaN | chr11 | 64267589 | - | cg27651452 | NaN | NaN | ... | 0.849726 | 0.877327 | 0.901601 | 0.852546 | 0.329027 | 0.622806 | 0.836825 | 0.869634 | 0.762166 | 0.919437 |
cg27663279 | CYP2W1 | ENST00000308919.7 | ExonBnd | NaN | chr7 | 984588 | - | cg27663279 | NaN | NaN | ... | 0.969205 | 0.978932 | 0.977448 | 0.870755 | 0.977345 | 0.953699 | 0.976406 | 0.973642 | 0.971852 | 0.975655 |
ch.10.2686225R | OAT | ENST00000539214.1 | ExonBnd | NaN | chr10 | 124401712 | + | ch.10.2686225R | NaN | NaN | ... | 0.038583 | 0.050667 | 0.074185 | 0.161708 | 0.078753 | 0.058136 | 0.054807 | 0.071956 | 0.085594 | 0.055943 |
ch.14.569218R | SAV1 | ENST00000555720.1 | ExonBnd | NaN | chr14 | 50640736 | + | ch.14.569218R | NaN | NaN | ... | 0.160425 | 0.080493 | 0.082056 | 0.254433 | 0.045871 | 0.058197 | 0.200949 | 0.057020 | 0.193732 | 0.171011 |
ch.15.1310449F | MORF4L1 | ENST00000379535.4 | ExonBnd | NaN | chr15 | 78891481 | + | ch.15.1310449F | NaN | NaN | ... | 0.029367 | 0.036926 | 0.040849 | 0.119281 | 0.032884 | 0.029524 | 0.032956 | 0.039342 | 0.074056 | 0.037042 |
586055 rows × 154 columns
filtered_cptac = new_df.join(filtered_c, how='inner')
# Remove any of the non-ccRCC data
filtered_cptac = filtered_cptac.dropna(subset=[c for c in filtered_cptac if 'Tumor' in c], how='all')
filtered_cptac
GencodeCompV12_NAME_FILT | GencodeCompV12_Accession_FILT | GencodeCompV12_Group_FILT | Order | chr | pos | strand | Name | Probe_rs | Probe_maf | ... | C3L-00004_Normal | C3L-01302_Tumor | C3N-00154_Tumor | C3L-00907_Tumor | C3N-00492_Tumor | C3L-00183_Tumor | C3N-00246_Tumor | C3N-00435_Tumor | C3N-01651_Tumor | C3L-00079_Normal | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Locus | |||||||||||||||||||||
cg23840643 | CSN1S2AP | ENST00000512167.1 | TSS200 | 1.0 | chr4 | 70073863 | - | cg23840643 | NaN | NaN | ... | 0.803866 | 0.820861 | 0.692819 | 0.460177 | 0.822501 | 0.496993 | 0.828998 | 0.720203 | 0.623552 | 0.857658 |
cg17980423 | FLG2 | ENST00000388718.4 | TSS200 | 1.0 | chr1 | 152360009 | + | cg17980423 | NaN | NaN | ... | 0.902274 | 0.923112 | 0.921182 | 0.835816 | 0.879014 | 0.856317 | 0.886480 | 0.899418 | 0.880485 | 0.923162 |
cg11478018 | CTD-2335O3.3 | ENST00000509423.1 | TSS200 | 1.0 | chr5 | 51451039 | + | cg11478018 | NaN | NaN | ... | 0.849223 | 0.844487 | 0.647779 | 0.499653 | 0.901740 | 0.740788 | 0.832933 | 0.830111 | 0.748816 | 0.879334 |
cg22770352 | PEX12 | ENST00000225873.3 | TSS200 | 1.0 | chr17 | 35578706 | + | cg22770352 | rs321599 | 0.446905 | ... | 0.050360 | 0.058908 | 0.047284 | 0.045892 | 0.038892 | 0.048999 | 0.104711 | 0.057124 | 0.063113 | 0.049991 |
cg08064156 | RAB4A | ENST00000489342.1 | TSS200 | 1.0 | chr1 | 229286419 | - | cg08064156 | rs61825327 | 0.010512 | ... | 0.725614 | 0.771253 | 0.637772 | 0.397874 | 0.659367 | 0.564448 | 0.658909 | 0.713612 | 0.565489 | 0.752409 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
cg27651452 | PLCB3 | ENST00000540288.1 | ExonBnd | NaN | chr11 | 64267589 | - | cg27651452 | NaN | NaN | ... | 0.849726 | 0.877327 | 0.901601 | 0.852546 | 0.329027 | 0.622806 | 0.836825 | 0.869634 | 0.762166 | 0.919437 |
cg27663279 | CYP2W1 | ENST00000308919.7 | ExonBnd | NaN | chr7 | 984588 | - | cg27663279 | NaN | NaN | ... | 0.969205 | 0.978932 | 0.977448 | 0.870755 | 0.977345 | 0.953699 | 0.976406 | 0.973642 | 0.971852 | 0.975655 |
ch.10.2686225R | OAT | ENST00000539214.1 | ExonBnd | NaN | chr10 | 124401712 | + | ch.10.2686225R | NaN | NaN | ... | 0.038583 | 0.050667 | 0.074185 | 0.161708 | 0.078753 | 0.058136 | 0.054807 | 0.071956 | 0.085594 | 0.055943 |
ch.14.569218R | SAV1 | ENST00000555720.1 | ExonBnd | NaN | chr14 | 50640736 | + | ch.14.569218R | NaN | NaN | ... | 0.160425 | 0.080493 | 0.082056 | 0.254433 | 0.045871 | 0.058197 | 0.200949 | 0.057020 | 0.193732 | 0.171011 |
ch.15.1310449F | MORF4L1 | ENST00000379535.4 | ExonBnd | NaN | chr15 | 78891481 | + | ch.15.1310449F | NaN | NaN | ... | 0.029367 | 0.036926 | 0.040849 | 0.119281 | 0.032884 | 0.029524 | 0.032956 | 0.039342 | 0.074056 | 0.037042 |
586055 rows × 154 columns
print(len(filtered_cptac))
# Remove unassigned CpGs since we're just interested in those that annotate to genes in this study
cpg_assigned = filtered_cptac[filtered_cptac['GencodeCompV12_NAME_FILT'] != 0] # Remove unassigned CpGs
ensembl_gene_ids = []
unassigned = []
cpg_ids = cpg_assigned.index.values
gene_names = cpg_assigned['GencodeCompV12_NAME_FILT'].values
for i, g in enumerate(cpg_assigned['GencodeCompV12_Accession_FILT'].values):
t = g.split('.')[0]
assigned = False
if enst_to_ensembl_gene_hg38.get(t):
ensembl_gene_ids.append(enst_to_ensembl_gene_hg38[t])
assigned = True
else:
t = gene_names[i].split('.')[0]
if hgnc_to_ensembl_gene_hg38.get(t): # Check hgnc symbol
ensembl_gene_ids.append(hgnc_to_ensembl_gene_hg38[t])
assigned = True
elif gene_name_to_ensembl_gene_hg38.get(t): # Check external gene name
ensembl_gene_ids.append(gene_name_to_ensembl_gene_hg38[t])
assigned = True
elif syn_to_ensembl_gene_hg38.get(t): # Check alternative gene names
ensembl_gene_ids.append(syn_to_ensembl_gene_hg38[t])
assigned = True
elif hgnc37_to_ensembl_gene_hg38.get(t): # Check hgnc symbol
ensembl_gene_ids.append(hgnc37_to_ensembl_gene_hg38[t])
assigned = True
elif gene_name37_to_ensembl_gene_hg38.get(t): # Check external gene name
ensembl_gene_ids.append(gene_name37_to_ensembl_gene_hg38[t])
assigned = True
elif syn37_to_ensembl_gene_hg38.get(t): # Check alternative gene names
ensembl_gene_ids.append(syn37_to_ensembl_gene_hg38[t])
assigned = True
if not assigned:
unassigned.append(cpg_ids[i])
ensembl_gene_ids.append(None)
586055
Merge TCGA data with the CPTAC info¶
tcga_normal = pd.read_csv(f'{base_dir}raw_downloads/TCGA/TCGA-KIRC-normal.csv', index_col=0)
print(len(tcga_normal))
# Filter to ensure we only have "Normal" patient data
tcga_normal = tcga_normal.dropna(subset=[c for c in tcga_normal if 'Normal' in c], how='all')
tcga_normal
480437
comp_elem_ref | cpg_id | TCGA-KIRC_SolidTissueNormal_male_white_2_HumanMethylation450_None_58_TCGA-KIRC_TCGA-BP-5199_db01e359-10a7-48c0-a742-656183e60ba8 | TCGA-KIRC_SolidTissueNormal_male_blackorafricanamerican_3_HumanMethylation450_None_57_TCGA-KIRC_TCGA-CJ-4882_03c62ae5-662b-493b-b7b9-1bdd1cfafb5d | TCGA-KIRC_SolidTissueNormal_female_white_2_HumanMethylation450_1121_78_TCGA-KIRC_TCGA-B0-4852_6942780c-b6c7-4534-b9b7-feda9cd04ecc | TCGA-KIRC_SolidTissueNormal_female_white_1_HumanMethylation450_None_59_TCGA-KIRC_TCGA-CZ-4859_ed7f6372-d820-43b9-bafa-7d83be8f66ec | TCGA-KIRC_SolidTissueNormal_male_white_1_HumanMethylation450_None_55_TCGA-KIRC_TCGA-BP-5170_c474cee0-1117-4576-9e44-17938be10ca5 | TCGA-KIRC_SolidTissueNormal_female_white_1_HumanMethylation450_None_62_TCGA-KIRC_TCGA-CZ-4856_990080e4-9a47-4271-af1e-b34d7a973841 | TCGA-KIRC_SolidTissueNormal_female_white_2_HumanMethylation450_510_68_TCGA-KIRC_TCGA-B0-4818_c0357545-fb85-4d3c-b653-54fe2d1f7d18 | TCGA-KIRC_SolidTissueNormal_female_white_3_HumanMethylation450_None_69_TCGA-KIRC_TCGA-CJ-4916_9e662d2e-69ba-47f4-8954-3647ce22dc18 | ... | TCGA-KIRC_SolidTissueNormal_female_white_1_HumanMethylation450_None_50_TCGA-KIRC_TCGA-BP-5186_27b38cec-40fe-4598-959c-f3a1408ba17d | TCGA-KIRC_SolidTissueNormal_male_white_3_HumanMethylation450_None_49_TCGA-KIRC_TCGA-CJ-4869_d781be9c-7b3b-4acf-b202-d4d4b847db05 | TCGA-KIRC_SolidTissueNormal_male_white_4_HumanMethylation450_168_58_TCGA-KIRC_TCGA-B0-4814_62bcce15-fdaf-49d4-9bed-24a493f5776b | TCGA-KIRC_SolidTissueNormal_male_white_4_HumanMethylation450_42_75_TCGA-KIRC_TCGA-B0-4698_e3ed1380-d6f1-420e-b6a6-d24a6e81a364 | TCGA-KIRC_SolidTissueNormal_male_white_3_HumanMethylation450_106_72_TCGA-KIRC_TCGA-B0-4694_c92995d5-68ce-47d0-8149-0323e624c032 | TCGA-KIRC_SolidTissueNormal_male_white_3_HumanMethylation450_None_43_TCGA-KIRC_TCGA-CZ-5458_f29b6c8c-d713-42ad-9b90-e556df9b05cd | TCGA-KIRC_SolidTissueNormal_male_white_4_HumanMethylation450_None_69_TCGA-KIRC_TCGA-CZ-5464_cb371398-ee48-4665-8089-26229c5b2cf0 | TCGA-KIRC_SolidTissueNormal_male_blackorafricanamerican_1_HumanMethylation450_1696_51_TCGA-KIRC_TCGA-A3-3376_ac092a8e-80af-4589-8bb8-d86427b398ca | TCGA-KIRC_SolidTissueNormal_male_white_3_HumanMethylation450_478_47_TCGA-KIRC_TCGA-B0-4810_318f6ffb-1fdf-4f74-90db-21e337503aae | TCGA-KIRC_SolidTissueNormal_male_white_4_HumanMethylation450_1337_76_TCGA-KIRC_TCGA-B0-4712_ef9ae1dd-83aa-404d-83ef-ced707ae738b | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | cg00000029 | chr16:53434200 | 0.585096 | 0.511813 | 0.489982 | 0.433942 | 0.362978 | 0.216424 | 0.444647 | 0.477372 | ... | 0.492131 | 0.482295 | 0.331813 | 0.483519 | 0.352021 | 0.407191 | 0.417096 | 0.387277 | 0.479396 | 0.270373 |
3 | cg00000165 | chr1:90729117 | 0.112046 | 0.124627 | 0.125703 | 0.131980 | 0.136427 | 0.121054 | 0.078726 | 0.159395 | ... | 0.110112 | 0.117445 | 0.165679 | 0.113446 | 0.106500 | 0.128271 | 0.123130 | 0.145776 | 0.127239 | 0.088902 |
4 | cg00000236 | chr8:42405776 | 0.890243 | 0.921860 | 0.893998 | 0.892156 | 0.900247 | 0.912149 | 0.914099 | 0.904354 | ... | 0.827524 | 0.926865 | 0.907697 | 0.868838 | 0.876332 | 0.908702 | 0.882604 | 0.926601 | 0.903058 | 0.913879 |
5 | cg00000289 | chr14:68874422 | 0.787893 | 0.764004 | 0.747008 | 0.772496 | 0.819497 | 0.807987 | 0.833389 | 0.750756 | ... | 0.757610 | 0.819237 | 0.576580 | 0.811579 | 0.539969 | 0.728173 | 0.811078 | 0.800740 | 0.721580 | 0.707835 |
6 | cg00000292 | chr16:28878779 | 0.486220 | 0.450472 | 0.550261 | 0.468594 | 0.417706 | 0.534426 | 0.434728 | 0.516065 | ... | 0.484210 | 0.508354 | 0.507747 | 0.512481 | 0.491918 | 0.518954 | 0.509752 | 0.563412 | 0.440027 | 0.556221 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
480432 | rs9363764 | chr6:67522149 | 0.958777 | 0.029669 | 0.022984 | 0.969557 | 0.042266 | 0.970431 | 0.025363 | 0.964112 | ... | 0.957271 | 0.964707 | 0.545864 | 0.524157 | 0.952614 | 0.530244 | 0.021642 | 0.029753 | 0.973232 | 0.549709 |
480433 | rs939290 | chr3:14617359 | 0.499332 | 0.979551 | 0.532267 | 0.022846 | 0.973063 | 0.969120 | 0.021137 | 0.502045 | ... | 0.017175 | 0.524521 | 0.531782 | 0.979047 | 0.532914 | 0.503287 | 0.979060 | 0.530323 | 0.981460 | 0.525923 |
480434 | rs951295 | chr15:45707625 | 0.979259 | 0.978631 | 0.517537 | 0.035266 | 0.543317 | 0.962350 | 0.527981 | 0.542110 | ... | 0.534639 | 0.028460 | 0.054535 | 0.519416 | 0.535817 | 0.025882 | 0.508680 | 0.534920 | 0.536647 | 0.023618 |
480435 | rs966367 | chr2:12008094 | 0.032289 | 0.032615 | 0.034503 | 0.031796 | 0.492723 | 0.516675 | 0.518407 | 0.475700 | ... | 0.956839 | 0.034964 | 0.053458 | 0.026630 | 0.040761 | 0.037919 | 0.961308 | 0.462472 | 0.542867 | 0.524512 |
480436 | rs9839873 | chr3:86613005 | 0.643824 | 0.043111 | 0.955502 | 0.947049 | 0.949996 | 0.963768 | 0.044777 | 0.955269 | ... | 0.957763 | 0.632807 | 0.942134 | 0.972022 | 0.064434 | 0.961850 | 0.657795 | 0.619891 | 0.607759 | 0.968885 |
393499 rows × 153 columns
# Now let's annotate to the gene ID (note we will be loosing only 418 CpGs)
cpg_assigned['ensembl_gene_id'] = ensembl_gene_ids
cpg_assigned['Locus'] = cpg_assigned.index
cpg_annot = cpg_assigned.merge(annot, on='ensembl_gene_id', how="left")
# Check how many are unassigned
unassigned = cpg_annot[cpg_annot['ensembl_gene_id'].isnull()]
cpg_annot = cpg_annot[~cpg_annot['ensembl_gene_id'].isnull()] # Remove any unassigned
u.dp(['All:', len(cpg_annot), 'Unassigned:', len(unassigned), 'Unassigned genes:', len(set(unassigned['GencodeBasicV12_NAME'].values))])
# We also want to remove any of the cases that were in the Not ccRCC cases from the tumour samples and merge the sample info
non_ccrcc = ['C3L-00359', 'C3N-00313', 'C3N-00435', 'C3N-00492', 'C3N-00832', 'C3N-01175', 'C3N-01180']
sample_df = pd.read_csv(f'{output_dir}CCRCC_Clark_Cell2019_meth_sample_df.csv', index_col=0)
sample_df = sample_df[~sample_df.index.isin(non_ccrcc)]
# Since they don't say explicitly that they collect normal samples remove those with cond name normal just incase
sample_df = sample_df[sample_df['CondName'] == 'Tumor']
# Removed these samples
removed_samples = ['CpG_Tumor_C3L.01281_1', 'CpG_Tumor_C3L.01882_1', 'CpG_Tumor_C3L.01885_1']
sample_df = sample_df[~sample_df['FullLabel'].isin(removed_samples)]
-------------------------------------------------------------------------------- All: 573041 Unassigned: 13014 Unassigned genes: 3771 --------------------------------------------------------------------------------
print(len(cpg_annot))
meta_cols = ['Locus', 'chr', 'pos', 'ensembl_gene_id', 'external_gene_name', 'hgnc_symbol', 'entrezgene_id', 'Relation_to_Island', 'UCSC_RefGene_Group', 'GencodeCompV12_Accession_FILT', 'GencodeCompV12_Group_FILT']
filtered_map = {}
cols = list(cpg_annot.columns)
cols_to_keep = []
new_full_label_map = dict(zip(sample_df['SampleId'], sample_df['FullLabel']))
for label, values in new_full_label_map.items():
if label in cols:
filtered_map[label] = values
cols_to_keep.append(values)
annot_m = cpg_annot.rename(columns=filtered_map)
annot_m = annot_m[meta_cols + cols_to_keep]
573041
Now rename columns from TCGA dataset¶
cond_names = []
cond_ids = []
SafeCases = []
FullLabel = []
gender = []
TumourStage = []
AgeGrouped = []
RaceGrouped = []
sample_id = []
label_map = {}
cases = []
tumour_stage_dict = {'1': 'Stage I', '2': 'Stage II', '3': 'Stage III', '4': 'Stage IV'}
for case in tcga_normal.columns:
if 'KIRC' in case:
c = case.split('_')
# TCGA-KIRC_SolidTissueNormal_female_white_3_HumanMethylation450_None_69_TCGA-KIRC_TCGA-CJ-4916_9e662d2e-69ba-47f4-8954-3647ce22dc18'
cond_names.append('Normal')
gender.append(c[2].capitalize())
RaceGrouped.append(c[3].capitalize())
TumourStage.append(tumour_stage_dict.get(c[4]))
a = c[7]
if a == '>=90':
AgeGrouped.append('old')
elif int(a) < 42:
AgeGrouped.append('young')
elif int(a) < 58:
AgeGrouped.append('middle')
elif int(a) < 90:
AgeGrouped.append('old')
else:
AgeGrouped.append(None)
cond_ids.append(0)
SafeCases.append(c[-1].replace('-', '.'))
new_label = f'CpG_Normal_{c[-1].replace("-", ".")}_1'
label_map[case] = new_label
FullLabel.append(new_label)
sample_id.append(f'{c[-1]}_Normal')
cases.append(c[-1])
# Rename tcga columns using full label
tcga_renamed = tcga_normal.rename(columns=label_map)
# Set the index
tcga_renamed = tcga_renamed.set_index('comp_elem_ref')
# Combine the two sample dfs
merged_sample_df = pd.DataFrame()
merged_sample_df['case'] = list(sample_df.index) + cases
merged_sample_df['SampleId'] = list(sample_df['SampleId'].values) + sample_id
merged_sample_df['CondId'] = list(sample_df['CondId'].values) + cond_ids
merged_sample_df['CondName'] = list(sample_df['CondName'].values) + cond_names
merged_sample_df['SafeCases'] = list(sample_df['SafeCases'].values) + SafeCases
merged_sample_df['FullLabel'] = list(sample_df['FullLabel'].values) + FullLabel
merged_sample_df['TumorStage'] = list(sample_df['TumorStage'].values) + TumourStage
merged_sample_df['gender'] = list(sample_df['gender'].values) + gender
merged_sample_df['RaceGrouped'] = list(sample_df['RaceGrouped'].values) + RaceGrouped
merged_sample_df['AgeGrouped'] = list(sample_df['AgeGrouped'].values) + AgeGrouped
merged_sample_df.set_index('case', inplace=True)
Merge CPTAC and TCGA¶
# Finally merge the two togather
annot_m.set_index('Locus', inplace=True)
tcga_all = tcga_renamed.join(annot_m, how='inner')
tcga_all
cpg_id | CpG_Normal_db01e359.10a7.48c0.a742.656183e60ba8_1 | CpG_Normal_03c62ae5.662b.493b.b7b9.1bdd1cfafb5d_1 | CpG_Normal_6942780c.b6c7.4534.b9b7.feda9cd04ecc_1 | CpG_Normal_ed7f6372.d820.43b9.bafa.7d83be8f66ec_1 | CpG_Normal_c474cee0.1117.4576.9e44.17938be10ca5_1 | CpG_Normal_990080e4.9a47.4271.af1e.b34d7a973841_1 | CpG_Normal_c0357545.fb85.4d3c.b653.54fe2d1f7d18_1 | CpG_Normal_9e662d2e.69ba.47f4.8954.3647ce22dc18_1 | CpG_Normal_ce33af25.6279.4e4a.8cfc.61d03c7eaf2f_1 | ... | CpG_Tumor_C3N.01220_1 | CpG_Tumor_C3N.01261_1 | CpG_Tumor_C3N.01361_1 | CpG_Tumor_C3N.01522_1 | CpG_Tumor_C3N.01524_1 | CpG_Tumor_C3N.01646_1 | CpG_Tumor_C3N.01648_1 | CpG_Tumor_C3N.01649_1 | CpG_Tumor_C3N.01651_1 | CpG_Tumor_C3N.01808_1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
cg00000029 | chr16:53434200 | 0.585096 | 0.511813 | 0.489982 | 0.433942 | 0.362978 | 0.216424 | 0.444647 | 0.477372 | 0.456852 | ... | 0.736535 | 0.749333 | 0.419163 | 0.753077 | 0.770999 | 0.721516 | 0.544133 | 0.327466 | 0.503078 | 0.694581 |
cg00000236 | chr8:42405776 | 0.890243 | 0.921860 | 0.893998 | 0.892156 | 0.900247 | 0.912149 | 0.914099 | 0.904354 | 0.923435 | ... | 0.869694 | 0.873052 | 0.893087 | 0.861024 | 0.906754 | 0.891277 | 0.847947 | 0.782197 | 0.874034 | 0.875275 |
cg00000289 | chr14:68874422 | 0.787893 | 0.764004 | 0.747008 | 0.772496 | 0.819497 | 0.807987 | 0.833389 | 0.750756 | 0.504889 | ... | 0.467959 | 0.545539 | 0.671885 | 0.702027 | 0.689886 | 0.609681 | 0.610733 | 0.632943 | 0.623050 | 0.592563 |
cg00000292 | chr16:28878779 | 0.486220 | 0.450472 | 0.550261 | 0.468594 | 0.417706 | 0.534426 | 0.434728 | 0.516065 | 0.500513 | ... | 0.566099 | 0.736726 | 0.403426 | 0.329949 | 0.526423 | 0.680651 | 0.690699 | 0.356444 | 0.678508 | 0.475394 |
cg00000321 | chr8:41310283 | 0.383709 | 0.396987 | 0.409871 | 0.514956 | 0.495738 | 0.466870 | 0.470612 | 0.392804 | 0.449994 | ... | 0.441940 | 0.676859 | 0.419441 | 0.521325 | 0.583523 | 0.427041 | 0.294012 | 0.541033 | 0.445536 | 0.552439 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
ch.X.42405548R | chrX:42661352 | 0.049304 | 0.074080 | 0.054073 | 0.044249 | 0.063061 | 0.028285 | 0.036191 | 0.045584 | 0.104399 | ... | 0.092153 | 0.102222 | 0.095231 | 0.082132 | 0.074636 | 0.069788 | 0.107173 | 0.147709 | 0.103869 | 0.136737 |
ch.X.557489R | chrX:34358320 | 0.107342 | 0.137135 | 0.067283 | 0.061627 | 0.102144 | 0.061248 | 0.052358 | 0.081089 | 0.232619 | ... | 0.138909 | 0.156391 | 0.132571 | 0.076778 | 0.111397 | 0.097706 | 0.116886 | 0.171847 | 0.142111 | 0.153098 |
ch.X.665616F | chrX:41219099 | 0.166893 | 0.181600 | 0.081570 | 0.074302 | 0.143562 | 0.060974 | 0.063811 | 0.102876 | 0.246511 | ... | 0.243470 | 0.178430 | 0.176420 | 0.100230 | 0.132132 | 0.195308 | 0.174261 | 0.244952 | 0.219125 | 0.247429 |
ch.X.772253F | chrX:47204286 | 0.165270 | 0.190517 | 0.119149 | 0.110620 | 0.072773 | 0.076397 | 0.113159 | 0.103859 | 0.324864 | ... | 0.247740 | 0.089806 | 0.121446 | 0.090446 | 0.084857 | 0.117017 | 0.095262 | 0.270480 | 0.075017 | 0.289129 |
ch.X.881546R | chrX:53664051 | 0.135172 | 0.143195 | 0.088927 | 0.064423 | 0.110187 | 0.063170 | 0.051933 | 0.082401 | 0.263820 | ... | 0.197644 | 0.184757 | 0.147397 | 0.082857 | 0.100079 | 0.135061 | 0.164813 | 0.232246 | 0.163514 | 0.173626 |
268355 rows × 262 columns
Check the data distribution of hyper and hypomethylated samples¶
import numpy as np
import matplotlib.pyplot as plt
mean_change = np.mean(tcga_all[[c for c in tcga_all.columns if 'Tumor' in c]].values, axis=1) - np.mean(tcga_all[[c for c in tcga_all.columns if 'Normal' in c]].values, axis=1)
u.dp(['Number hyper (>0.1):', len(mean_change[mean_change > 0.1]), 'Mean hypo: (<-0.1)', len(mean_change[mean_change < -0.1])])
plt.hist(mean_change, bins=20)
-------------------------------------------------------------------------------- Number hyper (>0.1): 16196 Mean hypo: (<-0.1) 26172 --------------------------------------------------------------------------------
(array([4.00000e+00, 8.00000e+00, 4.00000e+01, 1.89000e+02, 6.42000e+02, 1.74200e+03, 5.12000e+03, 1.42540e+04, 4.39190e+04, 1.52085e+05, 2.94060e+04, 9.78500e+03, 2.99300e+03, 8.70000e+02, 1.93000e+02, 3.30000e+01, 1.70000e+01, 7.00000e+00, 1.00000e+00, 3.00000e+00]), array([-0.70565387, -0.63154229, -0.5574307 , -0.48331912, -0.40920754, -0.33509595, -0.26098437, -0.18687278, -0.1127612 , -0.03864961, 0.03546197, 0.10957356, 0.18368514, 0.25779673, 0.33190831, 0.40601989, 0.48013148, 0.55424306, 0.62835465, 0.70246623, 0.77657782]), <BarContainer object of 20 artists>)
island = tcga_all[tcga_all['Relation_to_Island'] == 'Island']
mean_change = np.mean(island[[c for c in island.columns if 'Tumor' in c]].values, axis=1) - np.mean(island[[c for c in island.columns if 'Normal' in c]].values, axis=1)
u.dp(['Number hyper (>0.1):', len(mean_change[mean_change > 0.1]), 'Mean hypo: (<-0.1)', len(mean_change[mean_change < -0.1])])
plt.hist(mean_change, bins=20)
-------------------------------------------------------------------------------- Number hyper (>0.1): 6171 Mean hypo: (<-0.1) 1744 --------------------------------------------------------------------------------
(array([3.0000e+00, 5.0000e+00, 1.7000e+01, 5.7000e+01, 1.6800e+02, 5.6600e+02, 1.9460e+03, 1.5834e+04, 7.3388e+04, 8.1530e+03, 2.9100e+03, 9.8200e+02, 3.5100e+02, 9.2000e+01, 2.9000e+01, 6.0000e+00, 5.0000e+00, 4.0000e+00, 0.0000e+00, 3.0000e+00]), array([-0.53426145, -0.46871949, -0.40317752, -0.33763556, -0.2720936 , -0.20655163, -0.14100967, -0.07546771, -0.00992574, 0.05561622, 0.12115818, 0.18670015, 0.25224211, 0.31778407, 0.38332604, 0.448868 , 0.51440996, 0.57995193, 0.64549389, 0.71103585, 0.77657782]), <BarContainer object of 20 artists>)
# We also want all the supplementary TCGA dataset methylation
tcga_all = tcga_all.fillna(0)
tcga_all["Locus"] = tcga_all.index
tcga_all.to_csv(f'{output_dir}meth_df_CPTAC-TCGA.csv', index=True)
tcga_all
cpg_id | CpG_Normal_db01e359.10a7.48c0.a742.656183e60ba8_1 | CpG_Normal_03c62ae5.662b.493b.b7b9.1bdd1cfafb5d_1 | CpG_Normal_6942780c.b6c7.4534.b9b7.feda9cd04ecc_1 | CpG_Normal_ed7f6372.d820.43b9.bafa.7d83be8f66ec_1 | CpG_Normal_c474cee0.1117.4576.9e44.17938be10ca5_1 | CpG_Normal_990080e4.9a47.4271.af1e.b34d7a973841_1 | CpG_Normal_c0357545.fb85.4d3c.b653.54fe2d1f7d18_1 | CpG_Normal_9e662d2e.69ba.47f4.8954.3647ce22dc18_1 | CpG_Normal_ce33af25.6279.4e4a.8cfc.61d03c7eaf2f_1 | ... | CpG_Tumor_C3N.01261_1 | CpG_Tumor_C3N.01361_1 | CpG_Tumor_C3N.01522_1 | CpG_Tumor_C3N.01524_1 | CpG_Tumor_C3N.01646_1 | CpG_Tumor_C3N.01648_1 | CpG_Tumor_C3N.01649_1 | CpG_Tumor_C3N.01651_1 | CpG_Tumor_C3N.01808_1 | Locus | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
cg00000029 | chr16:53434200 | 0.585096 | 0.511813 | 0.489982 | 0.433942 | 0.362978 | 0.216424 | 0.444647 | 0.477372 | 0.456852 | ... | 0.749333 | 0.419163 | 0.753077 | 0.770999 | 0.721516 | 0.544133 | 0.327466 | 0.503078 | 0.694581 | cg00000029 |
cg00000236 | chr8:42405776 | 0.890243 | 0.921860 | 0.893998 | 0.892156 | 0.900247 | 0.912149 | 0.914099 | 0.904354 | 0.923435 | ... | 0.873052 | 0.893087 | 0.861024 | 0.906754 | 0.891277 | 0.847947 | 0.782197 | 0.874034 | 0.875275 | cg00000236 |
cg00000289 | chr14:68874422 | 0.787893 | 0.764004 | 0.747008 | 0.772496 | 0.819497 | 0.807987 | 0.833389 | 0.750756 | 0.504889 | ... | 0.545539 | 0.671885 | 0.702027 | 0.689886 | 0.609681 | 0.610733 | 0.632943 | 0.623050 | 0.592563 | cg00000289 |
cg00000292 | chr16:28878779 | 0.486220 | 0.450472 | 0.550261 | 0.468594 | 0.417706 | 0.534426 | 0.434728 | 0.516065 | 0.500513 | ... | 0.736726 | 0.403426 | 0.329949 | 0.526423 | 0.680651 | 0.690699 | 0.356444 | 0.678508 | 0.475394 | cg00000292 |
cg00000321 | chr8:41310283 | 0.383709 | 0.396987 | 0.409871 | 0.514956 | 0.495738 | 0.466870 | 0.470612 | 0.392804 | 0.449994 | ... | 0.676859 | 0.419441 | 0.521325 | 0.583523 | 0.427041 | 0.294012 | 0.541033 | 0.445536 | 0.552439 | cg00000321 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
ch.X.42405548R | chrX:42661352 | 0.049304 | 0.074080 | 0.054073 | 0.044249 | 0.063061 | 0.028285 | 0.036191 | 0.045584 | 0.104399 | ... | 0.102222 | 0.095231 | 0.082132 | 0.074636 | 0.069788 | 0.107173 | 0.147709 | 0.103869 | 0.136737 | ch.X.42405548R |
ch.X.557489R | chrX:34358320 | 0.107342 | 0.137135 | 0.067283 | 0.061627 | 0.102144 | 0.061248 | 0.052358 | 0.081089 | 0.232619 | ... | 0.156391 | 0.132571 | 0.076778 | 0.111397 | 0.097706 | 0.116886 | 0.171847 | 0.142111 | 0.153098 | ch.X.557489R |
ch.X.665616F | chrX:41219099 | 0.166893 | 0.181600 | 0.081570 | 0.074302 | 0.143562 | 0.060974 | 0.063811 | 0.102876 | 0.246511 | ... | 0.178430 | 0.176420 | 0.100230 | 0.132132 | 0.195308 | 0.174261 | 0.244952 | 0.219125 | 0.247429 | ch.X.665616F |
ch.X.772253F | chrX:47204286 | 0.165270 | 0.190517 | 0.119149 | 0.110620 | 0.072773 | 0.076397 | 0.113159 | 0.103859 | 0.324864 | ... | 0.089806 | 0.121446 | 0.090446 | 0.084857 | 0.117017 | 0.095262 | 0.270480 | 0.075017 | 0.289129 | ch.X.772253F |
ch.X.881546R | chrX:53664051 | 0.135172 | 0.143195 | 0.088927 | 0.064423 | 0.110187 | 0.063170 | 0.051933 | 0.082401 | 0.263820 | ... | 0.184757 | 0.147397 | 0.082857 | 0.100079 | 0.135061 | 0.164813 | 0.232246 | 0.163514 | 0.173626 | ch.X.881546R |
268355 rows × 263 columns
# Also save the sample dataframe
merged_sample_df.to_csv(f'{output_dir}meth_sample_df_CPTAC-TCGA.csv', index=True)
Get the mutation profile for the TCGA patients¶
We're interested in looking at PBRM1 mutations,so we want to make sure we also use normal data from patients with the same mutations
# Want to make the sample df have the case ID so we can get the mutation information
cases = [c.split('_')[-2] for c in tcga_normal.columns if 'KIRC' in c]
fileid_to_case = {c.split('_')[-1]:c.split('_')[-2] for c in tcga_normal.columns if 'KIRC' in c}
Make a TCGA only sample frame¶
merged_sample_df['tcga_case_id'] = [fileid_to_case.get(c) for c in merged_sample_df.index.values]
tcga_sample_df = merged_sample_df[merged_sample_df['CondId'] == 0]
"""
We only want to do this once I keep it off by default :)
"""
# Download mutation data
# Here we want to download the mutation files
from scidat.download import Download
from sciutil import SciUtil
tcga_dir = '../data/raw_downloads/TCGA/'
u = SciUtil()
mutation_dir = f'{tcga_dir}mutation/'
download_mutation = False
if download_mutation:
download = Download(f'{tcga_dir}kidney_clinical.tsv',
f'{tcga_dir}mutation/',
f'{tcga_dir}mutation/',
f'{tcga_dir}gdc-client',
max_cnt=1)
download.download_data_using_api(cases, 'mutation')
else:
u.dp(["You said you already have your mutations downloaded, if this isn't the case you'll need to do this!"])
-------------------------------------------------------------------------------- You said you already have your mutations downloaded, if this isn't the case you'll need to do this! --------------------------------------------------------------------------------
mutation_files = os.listdir(mutation_dir)
gene_id = 'ssm.consequence.0.transcript.gene.symbol'
genes = ['MTOR', 'PTEN', 'VHL', 'PBRM1', 'KDM5C', 'CUBN', 'USH2A', 'SETD2', 'BAP1', 'MUC16', 'TTN']
for gene in genes:
mutations = []
change = 'ssm.genomic_dna_change'
for f in mutation_files:
df = pd.read_csv(f'{mutation_dir}{f}', sep='\t')
df = df[df[gene_id] == gene]
if len(df) > 0:
mutations.append(df['ssm.genomic_dna_change'].values[0])
print(gene, len(mutations), len(set(mutations)))
MTOR 8 8 PTEN 3 3 VHL 57 53 PBRM1 57 57 KDM5C 8 8 CUBN 5 5 USH2A 4 4 SETD2 15 15 BAP1 15 15 MUC16 12 12 TTN 28 28
from scidat.api import API, APIException
import matplotlib.pyplot as plt
gdc_client = f'{tcga_dir}./gdc-client'
sample_file = f'{tcga_dir}gdc_sample_sheet.2021-05-03.tsv'
manifest_file = f'{tcga_dir}gdc_manifest_20210503_065756.txt'
annotation_file = f'{supp_dir}hsapiens_gene_ensembl-GRCh38.p13.csv'
clinical_file = f'{tcga_dir}clinical.cart.2021-05-03/clinical.tsv'
api = API(manifest_file, gdc_client, clinical_file, sample_file, data_dir, data_dir, annotation_file,
max_cnt=10, requires_lst=['counts'])
api.build_mutation_df(mutation_dir)
# Now we want to build a dictionary with mutations and cases.
filter_col = 'ssm.consequence.0.transcript.gene.symbol'
genes_with_mutations = api.get_mutation_values_on_filter('ssm.consequence.0.transcript.gene.symbol', [''], 'ssm.consequence.0.transcript.gene.symbol')
# Now for each gene we want to count the patients with mutations
cases_w_mutations = {}
for g in genes_with_mutations:
cases = api.get_mutation_values_on_filter('case_id', [g], filter_col, exact=True)
cases_w_mutations[g] = cases
# Sort keys by the genes with most mutations
sorted_genes_w_mutations = sorted(cases_w_mutations, key=lambda k: len(cases_w_mutations[k]), reverse=True)
# Print these out!
for g in sorted_genes_w_mutations:
print(g, len(cases_w_mutations[g]))
case_to_mutation = {}
for gene in cases_w_mutations:
if len(cases_w_mutations[gene]) > 10:
for c in cases_w_mutations[gene]:
if case_to_mutation.get(c):
case_to_mutation[c].append(gene)
else:
case_to_mutation[c] = [gene]
# Check how many patients only have one mutation
number_mutations = []
for c in case_to_mutation:
number_mutations.append(len(case_to_mutation[c]))
plt.hist(number_mutations, bins=20)
plt.title("Number of mutations per case (if gene has 5 or more cases)")
VHL 57 PBRM1 57 TTN 28 SETD2 15 BAP1 15 MUC16 12 DST 12 KMT2C 9 MTOR 8 KDM5C 8 KMT2A 7 KIAA1549L 7 SPEN 7 ANK3 7 CSMD3 7 ZFPM2 6 BIRC6 6 DNAH9 6 PEG3 6 PDZD2 6 HMCN1 6 NEB 6 KAT6A 6 TP53 6 LRP2 6 STAG2 6 NF1 5 RANBP2 5 CNOT1 5 LYST 5 AKAP9 5 RPAP1 5 THBS1 5 VWF 5 AP000769.3 5 APOB 5 ABCA13 5 TRIOBP 5 CUBN 5 HSPA8 5 FCGBP 5 MTUS2 5 AHNAK2 5 COL6A3 5 TNR 5 UNC79 5 ZFHX4 5 LRP1 5 SYNE2 5 SCAF4 5 ATM 5 LRP1B 5 LAMA1 5 RALGAPA1 5 NRXN1 5 COL6A6 5 FBN2 5 PKHD1L1 5 CACNA1S 5 FAT4 5 SYNE1 5 XIRP2 4 SIPA1L2 4 HIVEP1 4 AKAP6 4 MAGEC1 4 HERC1 4 TENM1 4 USH2A 4 CCAR1 4 PTPRT 4 IGSF10 4 ERBB2 4 SAMD9 4 ZMYM2 4 PAN3 4 NPNT 4 SPHKAP 4 MACF1 4 ATP1B4 4 SEPTIN4 4 ARID1A 4 ANKHD1-EIF4EBP3 4 RYR3 4 KIF14 4 PXDN 4 DSCAM 4 N4BP2L2 4 DENND4A 4 GCC2 4 ZEB2 4 RFC1 4 MYH7 4 DOCK2 4 SULF1 4 CENPF 4 FBN1 4 PCNX1 4 TARBP1 4 DNAH2 4 GIGYF2 4 ARMC4 4 GON4L 4 FAT1 4 ROCK1 4 SDK2 4 RGS3 4 G3BP1 4 ZNF609 4 ABCC4 4 TEX13A 4 ADAMTS14 4 DNAH10 4 FZD10 4 UBN1 4 CPAMD8 4 KCNH7 4 TPR 4 LAMA2 4 NOTCH2 4 ALMS1 4 CSMD1 4 ALPK2 4 MARCHF10 4 UTRN 4 ZRANB1 3 PCNX2 3 NOL6 3 ZNF687 3 ARID2 3 ARHGAP45 3 TUBG1 3 EPHB6 3 MTUS1 3 PLXNA4 3 ZFX 3 GPR171 3 ARFGEF1 3 PHLDB1 3 TTI1 3 ATP8B1 3 APC 3 RP1L1 3 ALS2 3 ARHGAP20 3 ICE1 3 CSPP1 3 CUL9 3 CTSB 3 USP40 3 TLR4 3 AHNAK 3 ZNF184 3 COL24A1 3 TUBGCP3 3 AKAP17A 3 TMEM245 3 HEATR5B 3 FBH1 3 SMARCA4 3 USP24 3 ZNF236 3 REV3L 3 BRD9 3 TRMT13 3 BRD1 3 PAPOLG 3 MYO5C 3 ZNFX1 3 ABCA10 3 WNK1 3 TRAPPC8 3 EXPH5 3 LAMB1 3 BMS1 3 ARHGAP35 3 ZZEF1 3 IWS1 3 ADCY5 3 DAXX 3 MXRA5 3 GALNT7 3 SLC23A1 3 NPAS4 3 GSDME 3 CD180 3 TMPRSS11D 3 ALDOB 3 ZNF112 3 ADAM10 3 PKN3 3 UHRF1BP1L 3 RNF217-AS1 3 LRPPRC 3 RNF10 3 DAPK1 3 GPR180 3 NUAK2 3 DOCK8 3 PI4KA 3 AL353804.7 3 MUC17 3 LRRK2 3 NCKAP5 3 NUP205 3 POLR2A 3 CLTC 3 MYRF 3 MUC2 3 KIF1A 3 MAML1 3 PYGO1 3 CHL1 3 TACR3 3 MAP3K4 3 ABCA12 3 RBFOX1 3 SLC3A2 3 LARP4 3 FAT2 3 ILDR2 3 NLRC5 3 TGM5 3 NLRP9 3 ZBTB44 3 RAPGEF6 3 RASA1 3 ERCC6 3 CACHD1 3 CELSR1 3 PPL 3 ATP1A2 3 MYCBP2 3 LIPI 3 EP300 3 ADGRG4 3 ATXN2L 3 MYH6 3 RAPGEF2 3 STRN3 3 MTSS1 3 ACO1 3 CDK12 3 FBXO43 3 BEST1 3 NEXMIF 3 WDFY3 3 DISP1 3 THAP9 3 SPTB 3 KLK6 3 NOS1 3 ATRX 3 ADAMTS18 3 AGO2 3 MYOM1 3 IRX1 3 PRRC2B 3 TINAGL1 3 VPS13D 3 PCDHA12 3 GBF1 3 MKI67 3 AMER2 3 DIAPH1 3 MYH4 3 SRCAP 3 SPTA1 3 LAMC3 3 WDR72 3 DYNC1H1 3 UBR1 3 CABIN1 3 PASD1 3 STK31 3 PLEKHH2 3 KLHL17 3 BLM 3 IGF2R 3 NCOA4 3 AL592490.1 3 KDR 3 IGHG4 3 KBTBD6 3 PRTG 3 ARFGEF2 3 ANKRD26 3 GUCY2F 3 EPHA5 3 LIPE 3 EIF2AK4 3 ADAMTS20 3 MAX 3 SLC22A8 3 CBLN1 3 SLF2 3 PLEC 3 VPS13C 3 RICTOR 3 VPS13B 3 GABRQ 3 NUP210L 3 CFAP43 3 DOCK10 3 SUPT6H 3 ZBBX 3 EMILIN1 3 LVRN 3 MARS1 3 PTPRH 3 HDAC6 3 ADAMTS12 3 ATF7IP 3 ZNF91 3 SUZ12 3 RANBP6 3 IRAK1 3 SCAF11 3 PRR12 3 MAP3K11 3 RNF145 3 DENND4B 3 MCHR2 3 ABCA7 3 AHCTF1 3 COL1A2 3 RRS1 3 NFE2L2 3 SRGAP1 3 ABCD3 3 CDH8 3 ANKRD11 3 SMC5 3 UBR4 3 PTEN 3 NPHP3 3 PSG3 3 SLC5A10 3 SLCO4C1 3 RFX6 3 THSD7B 3 NCOA3 3 RYR1 3 POC1B 3 MYH2 3 SVEP1 3 C3 3 FAT3 3 CHD7 3 ITSN2 3 CTNNA1 3 OR2AG1 3 TNC 3 NALCN 3 CCDC173 3 KIF1B 3 SLC47A2 3 MYH11 3 NCOR1 3 GRID2 3 DUS2 3 CELSR2 3 LRP12 3 SLC5A12 3 TMEM132D 3 RELN 3 AP4E1 3 PCDHGB7 3 ZNF318 3 CPS1 3 CDKN2AIP 3 KLRC2 3 ZC3H13 3 ADA2 3 CDCA2 3 CACNA1C 3 TSC2 3 FAM83B 3 MCCC2 3 TNS2 3 PCLO 3 GUCY2C 3 KRT2 3 SPOCD1 2 CSGALNACT1 2 LETMD1 2 PLD1 2 SNRNP48 2 OR4K1 2 INPP5F 2 RBPJL 2 ATRNL1 2 PGAM2 2 ITGB1 2 SYT3 2 POT1 2 ANKRD34B 2 TSNARE1 2 SH3BP4 2 DOP1A 2 SLC44A4 2 MVP 2 KRT4 2 CEP250 2 SH3TC1 2 NRBP1 2 GPATCH1 2 ZFP82 2 RAD51AP2 2 LAMB4 2 CRYBG2 2 SEC16A 2 CSF2RA 2 PPP2R1B 2 URB2 2 ELP1 2 MROH2B 2 PAH 2 CPED1 2 NMT2 2 LRIG3 2 PDXDC1 2 AFF3 2 NKX2-2 2 GOSR1 2 SLC34A1 2 RAB6D 2 KIDINS220 2 ZNF804B 2 SLC7A3 2 ZFAT 2 ZNF761 2 GRAMD1A 2 GPM6A 2 PKD1 2 LARP1 2 PCDH7 2 PABPC3 2 AKNA 2 SDK1 2 CEP131 2 GRIN2B 2 MAGT1 2 CHRM2 2 CWC27 2 MEFV 2 DIP2C 2 USP25 2 PREX1 2 NACA 2 SLC16A4 2 FLRT2 2 DRD3 2 PYCARD 2 OR2J2 2 ALPK3 2 TRMT9B 2 BOC 2 PPP1R15B 2 PTPRZ1 2 NOL11 2 RNPEPL1 2 ARHGAP30 2 EHBP1 2 CADPS 2 ACAD10 2 VPS41 2 LAMC2 2 NCAPD3 2 KNDC1 2 SLC9A4 2 FANCE 2 ADGRV1 2 GJA10 2 DOCK7 2 OR7A10 2 SMCR8 2 SERPINB8 2 CDH7 2 SCN10A 2 ITGAL 2 SNX25 2 WDR33 2 TNFRSF1A 2 RAPGEF4 2 ANK2 2 EAF2 2 ZNF543 2 KSR2 2 PHF14 2 NSD1 2 TBXAS1 2 SAMD15 2 TNIK 2 ANKRD17 2 DYRK1A 2 BAZ1B 2 TSHZ2 2 APC2 2 TCF20 2 SLC10A5 2 ADGRD1 2 PCDHGB4 2 TTC37 2 CCPG1 2 TEKT4 2 BRPF1 2 COL14A1 2 RRAGC 2 ANKRD13C 2 MYL10 2 ADGRL3 2 TNRC6A 2 CD4 2 CATSPERG 2 PENK 2 ADH1B 2 EIF4A3 2 NNT 2 SCNN1A 2 SLC19A3 2 NELL1 2 ZSCAN21 2 FGFR3 2 LRRC4 2 TRPM6 2 EIF2AK2 2 DNAJC13 2 ATP1A4 2 ODF2 2 MED18 2 MYH13 2 QSER1 2 ANKK1 2 CSK 2 SIL1 2 ECEL1 2 PLCE1 2 PIK3R6 2 CD44 2 FHOD3 2 DNAH7 2 DNAH3 2 TPST1 2 ZMYND8 2 CPSF7 2 VPS35L 2 ELFN2 2 STAT2 2 C1orf112 2 RBM28 2 LAMA4 2 SART3 2 EP400 2 NBPF11 2 COBLL1 2 KIAA1755 2 NCAN 2 AVIL 2 DPP10 2 TOGARAM1 2 ADAR 2 PCDHB8 2 PSG8 2 ACSM3 2 KALRN 2 BRIP1 2 CLDN10 2 DOCK4 2 SLC3A1 2 PCF11 2 MCTP2 2 RIPK2 2 DIP2B 2 TAF1A 2 CYP26B1 2 LTN1 2 ENPP3 2 SEL1L2 2 CAMTA2 2 MYO16 2 KIAA0319 2 THSD7A 2 IRS1 2 SCD5 2 COPS5 2 PDE1B 2 KLHL7 2 SLC22A9 2 MCTP1 2 HDAC9 2 MIA3 2 MAB21L1 2 LMTK2 2 WTAP 2 ARL14EP 2 ZGRF1 2 ACTBL2 2 SERPINI2 2 RUSC1 2 GRTP1 2 CLIC6 2 CCNF 2 RGL1 2 MN1 2 HCN1 2 DACT1 2 RIN3 2 ASTN2 2 MKKS 2 SOS2 2 PRSS55 2 TIMM44 2 CORO2B 2 RPL22 2 SMOX 2 DNMT3A 2 B3GNT8 2 CLDN4 2 TP53I3 2 PLEKHG2 2 RGS6 2 SNRNP200 2 SPNS2 2 VANGL1 2 CRYBG1 2 CUX1 2 PHACTR1 2 MICAL2 2 SNED1 2 UCK2 2 VPS54 2 SCN9A 2 NUP160 2 TMEM132E 2 BCL6 2 GSAP 2 CHAT 2 MLYCD 2 THOC1 2 MCM3AP 2 TMOD4 2 RELCH 2 AHDC1 2 UNK 2 SLC6A19 2 PCDHGA3 2 SPSB1 2 SLC7A2 2 MDN1 2 WNK3 2 EPS8L2 2 MTMR10 2 DNAJC10 2 FBXO11 2 ELF2 2 KAT7 2 PNLIP 2 MSN 2 RGS18 2 JAK1 2 TBX21 2 GBA2 2 CCDC157 2 CELF4 2 KDM5A 2 ZFP90 2 DAAM1 2 FEM1C 2 PCSK2 2 CHAF1A 2 HNRNPH1 2 CATSPERD 2 AGPAT5 2 ASB1 2 SELL 2 PLAT 2 SLC25A14 2 USP9X 2 OR4Q3 2 ITPKB 2 ZDHHC9 2 TSPOAP1 2 FBN3 2 OR4C16 2 ACOT2 2 MYO10 2 DNAJB1 2 PRKACB 2 SMG8 2 CFAP65 2 CFAP69 2 XKR4 2 ATR 2 MYO3A 2 RYR2 2 IFT88 2 DSC3 2 TTBK1 2 TPTE2 2 TRAF5 2 TNRC6B 2 RTTN 2 MAPT 2 SACS 2 SLC34A3 2 SYNJ2 2 PROS1 2 GNAS 2 ADGRA1 2 ERBB4 2 PTPRJ 2 SH3TC2 2 IDE 2 CLSPN 2 CYP11B2 2 CFH 2 PKHD1 2 GOT2 2 WDR64 2 AIDA 2 CCT8 2 PDS5B 2 ARHGEF7 2 DCHS2 2 TEX11 2 SLC22A4 2 ARMCX5 2 RUNDC3B 2 MAP3K1 2 UBN2 2 SIPA1L3 2 FREM2 2 CCDC181 2 UNC13D 2 MYOM2 2 SESTD1 2 BRD8 2 SLITRK2 2 KRT73 2 ZNF423 2 HRNR 2 NRAP 2 TSHZ3 2 TOM1L1 2 ANKRD50 2 SCN1A 2 ASH1L 2 CHI3L1 2 ZNF276 2 SHANK2 2 MROH5 2 SLFN11 2 SLC22A6 2 RAPH1 2 EIF4A2 2 SPIB 2 MIA2 2 GPR174 2 DHX32 2 ZNF3 2 UBQLN3 2 ZSCAN29 2 SHLD2 2 BTAF1 2 BAZ2B 2 SIPA1L1 2 A2M 2 PTK7 2 CSMD2 2 TNKS1BP1 2 RP1 2 HECTD2 2 NR1I3 2 GLB1 2 SPG11 2 LIMK2 2 PDE6B 2 KLHL32 2 PHF3 2 DDX46 2 PLEKHO2 2 CUL4B 2 ZNF415 2 PITPNM1 2 RASGRF2 2 ITPR2 2 XPO1 2 NOL10 2 IRF9 2 ARHGAP25 2 KRT12 2 PKN2 2 FN1 2 GTF3C1 2 AMPH 2 BRINP2 2 CEP290 2 ASXL3 2 YEATS4 2 SIGLEC8 2 MRPS18A 2 GGNBP2 2 SELE 2 RNF213 2 ULK3 2 SCARB2 2 BRPF3 2 MLLT1 2 TRIM36 2 ADCY7 2 UBE2O 2 SPTBN2 2 KIAA0930 2 KCNA1 2 P2RX2 2 NATD1 2 MORC1 2 ZNF407 2 MAPK8 2 PCDHGB3 2 DMD 2 TAX1BP1 2 NVL 2 COL4A5 2 SF3A3 2 ETV1 2 NAF1 2 ABCC9 2 RB1 2 SLC36A1 2 MMP12 2 NLRP7 2 GPR89B 2 AOC1 2 PCED1B 2 SLTM 2 KMT2B 2 STC2 2 DHX36 2 ZC3HC1 2 MYO5B 2 NAV2 2 SOX9 2 ACTG1 2 RIMS1 2 CACNA1A 2 CDKL5 2 CCSER2 2 LNPEP 2 SMS 2 DSCAML1 2 ZNF77 2 GUSB 2 FCGR3B 2 CCBE1 2 ATP8A2 2 FBLN1 2 MED12 2 RAD54L 2 MYO7A 2 PCDHB16 2 CFAP57 2 RBM27 2 DYNC2H1 2 KNTC1 2 ZNF862 2 DGKH 2 DNAH11 2 CAMK1D 2 UACA 2 XRCC5 2 YTHDC1 2 MFSD2A 2 DISC1 2 HTR2A 2 CAPZA2 2 NCOA2 2 ARL6IP6 2 TASOR2 2 ZBTB40 2 ABLIM1 2 DDB1 2 FSTL4 2 NTRK1 2 TENM4 2 ADGRB3 2 MYO1F 2 EPHX1 2 LMBRD2 2 PCDHGB2 2 CFHR3 2 NT5DC3 2 FLNB 2 DCAF17 2 IPP 2 FOXA2 2 ATP8B4 2 RPRD2 2 ZNF304 2 SAMD14 2 RABGGTB 2 AMBRA1 2 SLC1A5 2 PIK3C3 2 CD300LG 2 PLEKHG4 2 FBXL14 2 SEC13 2 DNMT3B 2 PTPRB 2 KLF8 2 EHMT2 2 TTC32 2 UIMC1 2 FTO 2 ZNF124 2 ZHX2 2 MYH7B 2 NF2 2 TFDP3 2 RNF14 2 ZNF594 2 HECW1 2 CYP19A1 2 AGBL1 2 SLC22A10 2 RABGEF1 2 OR51Q1 2 KPRP 2 NDC80 2 ZNF804A 2 NES 2 WASHC2A 2 PROSER1 2 PI4KB 2 DNAJB11 2 SOCS3 2 SERAC1 2 PIK3CG 2 EIF4G3 2 DCTN1 2 SYK 2 POLD1 2 NDUFS1 2 MRC1 2 ROS1 2 SHROOM4 2 DCT 2 CCDC180 2 NABP1 2 TLN1 2 ZNF141 2 ORC1 2 SPTAN1 2 EPHA8 2 CRYGB 2 AMN 2 ASB16 2 COL4A6 2 ACSS2 2 GPR26 2 SNAP91 2 WNK2 2 PRAMEF10 2 CCDC88B 2 CHRNB1 2 NOP14 2 TGM6 2 PRRC2A 2 OTOP1 2 SSTR3 2 POTEF 2 ATP10D 2 CLVS1 2 TRIM6 2 PCDH1 2 PHIP 2 TAF1 2 CLCN6 2 PLG 2 STMN4 2 CLASP1 2 ACP7 2 KITLG 2 ADAMTS13 2 TAFA4 2 TFDP2 2 FANCG 2 CDC42BPA 2 VPS13A 2 ASCC3 2 DCDC1 2 TMPRSS6 2 COQ8A 2 PDIA2 2 AQR 2 FAM83C 2 ST14 2 ACSL1 2 ACACB 2 CCDC150 2 RENBP 2 CSF3R 2 CNPY4 2 UGT1A8 2 SPAG17 2 C14orf39 2 ZC3H7A 2 PIAS1 2 CAMKK1 2 KDM4D 2 KNL1 2 CDC14A 2 CERS3 2 REPS2 2 KCNH5 2 UBE3C 2 RAI1 2 TMTC2 2 ZDHHC16 2 OR1D2 2 TMEM130 2 EIF4G1 2 MET 2 DPYSL3 2 DIS3 2 CHD4 2 ZNF561 2 XPO5 2 RAB24 2 PTPRN2 2 GDF10 2 ARFGEF3 2 RBM39 2 MED13 2 KHK 2 OLFM4 2 SASS6 2 CPA1 2 ANKLE2 2 STAB2 2 RBM19 2 ORC4 2 SNX27 2 TCF3 2 SRSF4 2 RAB33A 2 R3HDML 2 MUC4 2 SMAD2 2 ATP12A 2 NUP88 2 MYOT 2 MKRN3 2 OPA1 2 SERPINA12 2 ABCC11 2 RREB1 2 OCA2 2 IL20RA 2 PM20D2 2 ZNF655 2 TNK1 2 PAXBP1 2 SMARCC2 2 JMJD1C 2 ZNF496 2 HECTD4 2 TBC1D9B 2 NOX4 2 SMC3 2 SALL4 2 KCNB2 2 PLBD1 2 EFCC1 2 PTPN4 2 DENND5A 2 ZAN 2 SFXN4 2 EVPL 2 TACC2 2 ASAH2 2 ZNF100 2 TTK 2 ABCA1 2 TRAPPC13 2 RPH3AL 2 BARD1 2 CXCR6 2 DNAH8 2 PLEKHA6 2 KLF3 2 PGPEP1L 2 PRPF8 2 ZNF395 2 MYH14 2 CAD 2 GLG1 2 TBC1D24 2 TRPM4 2 KPNA1 2 EPB41L4B 2 EEA1 2 GPR89A 2 CACNA1F 2 DRD2 2 GGT7 2 GZF1 2 UBLCP1 2 ATXN1 2 PICALM 2 PIWIL1 2 TLR8 2 MPP5 2 HACE1 2 DLC1 2 PPIL4 2 RDH16 2 C11orf16 2 CSNK1A1L 2 OPHN1 2 CD101 2 CUL3 2 ZMYM4 2 TG 2 PRKD3 2 TBC1D8B 2 IRF7 2 INPP4B 2 DNM1L 2 ZNF257 2 C8orf37 2 NCAPG2 2 ZMYM6 2 CDH4 2 SCN11A 2 HLA-E 2 BCAR3 2 EPB41L2 2 SBF1 2 FLVCR2 2 POLG2 2 ZNF215 2 LXN 2 UBQLN4 2 KIF21B 2 NECTIN3 2 WDR25 2 SYT7 2 AOC2 2 ENPP7 2 PIGT 2 CFAP36 2 TNXB 2 ALG13 2 ACP2 2 MTREX 2 SCG2 2 AP5Z1 2 CACNA1G 2 TASOR 2 ACSBG1 2 SLC4A7 2 SLC6A12 2 COL6A2 2 IGFLR1 2 MEP1B 2 USP5 2 UBA6 2 RAB8A 2 SLU7 2 MLLT10 2 RIC8B 2 COQ5 2 SCAPER 2 SLC39A10 2 LRRIQ1 2 BRINP3 2 AGTPBP1 2 CHRNA1 2 FKBPL 2 MAP3K15 2 PCK1 2 LRRCC1 2 MOV10L1 2 SIGLEC1 2 KCNA5 2 CHERP 2 LRP4 2 HCFC2 2 TMPRSS15 2 DTNBP1 2 RPS6KA1 2 DOP1B 2 LAMP3 2 TRAT1 2 MINK1 2 GTSF1 2 NPR1 2 NSG1 2 COL11A2 2 EGFR 2 KCND2 2 MED12L 2 MUC5B 2 NDST4 2 KIF13A 2 CCNB2 2 RSPRY1 2 IREB2 2 CACNA1D 2 SPTBN1 2 SLC39A6 2 SFRP4 2 BCL9 2 INPP4A 2 TULP1 2 SLC38A8 2 ACAP2 2 ARHGAP31 2 SF3B3 2 OPRM1 2 STAG3L2 2 DCST2 2 KCNT2 2 DSP 2 PLAU 2 VPS18 2 KCNN2 2 OR52E4 2 DCAF13 2 RABGAP1 2 PTCD3 2 BNC1 2 ZNF800 2 LSP1 2 CNTN5 2 HSPG2 2 CDX1 2 GNAO1 2 KRT16 2 OTX1 2 CCDC61 2 TAS2R50 2 RIOK3 2 HIVEP3 2 GBA3 2 SZT2 2 FCHSD1 2 ITPRID1 2 FREM1 2 TMEM184C 2 TNKS2 2 UBAP2L 2 OR4S2 2 PLXND1 2 GAPDHS 2 RTL1 2 IPO9 2 SSPOP 2 LRIF1 2 PDGFRA 2 PCDH15 2 CR2 2 COL15A1 2 MKNK2 2 ABCA8 2 ABCC8 2 LINS1 2 DHX37 2 NEGR1 2 CPNE1 2 GNA13 2 CSGALNACT2 2 N4BP1 2 SCN2A 2 SLC17A9 2 DNAI3 2 PIGN 2 AOX1 2 TP53BP1 2 NBR1 2 HJURP 2 LOXL1 2 TRIM23 2 RNF25 2 EPB41L3 2 RAD51 2 LPCAT3 2 AKAP11 2 HADHA 2 KDM6A 2 GPR15 2 NAV1 2 LPIN3 2 MACROD2 2 ACSL4 2 MAP4K4 2 CWC22 2 ATG13 2 ANKRD30A 2 PRPF31 2 SEZ6 2 CEP164 2 ANXA6 2 ZC3HAV1 2 AKAP13 2 KCNN3 2 PPP4R3A 2 CNR2 2 HERPUD2 2 DHRS4 2 C5 2 KIF11 2 PRKD1 2 PRG4 2 GGA2 2 FYB1 2 STIL 2 FLG 2 POLE 2 AHSG 2 PDGFD 2 MCU 2 SLC7A6 2 ROBO2 2 RC3H1 2 CHD6 2 BRWD1 2 CTAGE1 2 CADM2 2 UBE3B 2 CORO1B 2 DISP3 2 RAB11FIP3 2 SRL 2 RSF1 2 CR1 2 PLPPR4 2 LRRC8B 2 HEPHL1 2 ZUP1 2 KCNH6 2 PRUNE2 2 FNTB 2 MYO15A 2 NSD3 2 PNPLA6 2 TENM2 2 ARHGEF5 2 CHD9 2 KMT2D 2 ETNK1 2 DMXL1 2 ZNF560 2 TGM1 2 POLM 2 MON2 2 ASXL2 2 DHX9 2 CORO7 2 MUC6 2 CLIP1 2 SGPL1 2 TIAM1 2 ELAC2 2 KPNB1 2 CNOT9 2 KDM5B 2 DCUN1D3 2 IGSF9B 2 GRIN2A 2 PRX 2 SNW1 2 A4GNT 2 FNIP2 2 CLCN3 2 COLEC10 2 OR51T1 2 OTUD4 2 IST1 2 PLOD2 2 PPP6R1 2 F8 2 FMOD 2 NUDT12 2 NCOR2 2 RNASEL 2 CEP152 2 FBLN2 2 PTPN18 2 PRSS21 2 MMP11 2 PCDH10 2 RBM12B 2 OR2L8 2 OR10G7 2 HPN 2 TARS1 2 COPA 2 YAP1 2 ACACA 2 PPP1R9A 2 TYK2 2 POTEE 2 FES 2 CATSPERB 2 AKAP12 2 ZNF677 2 NGF 2 ACSF2 2 OR12D3 2 TMC5 2 PRKDC 2 ZNF692 2 SERTAD4 2 NR1H4 2 CLK1 2 GPC5 2 ZBTB11 2 AC008397.2 2 GRIA1 2 CYP1B1 1 ABTB2 1 SYNCRIP 1 PACSIN3 1 SLC25A4 1 OPRD1 1 C3AR1 1 SNX33 1 CCN3 1 OMD 1 OGT 1 TRDN 1 CLIP2 1 UPF1 1 CHMP2A 1 AMOTL2 1 KRAS 1 CASP1 1 USP35 1 NOS1AP 1 GRAMD1C 1 PDLIM2 1 SPDL1 1 SCLT1 1 CHMP4B 1 MRTFB 1 MSRA 1 RTN2 1 ZFP3 1 ZBTB7A 1 ARHGAP18 1 IL2RA 1 RANBP10 1 OR7G3 1 ZMYND11 1 THAP5 1 UNC5D 1 GRIK5 1 STS 1 CHRNA4 1 SIGLEC11 1 PYDC2 1 ADRA1A 1 S1PR3 1 RAB9B 1 MSL3 1 UBAP2 1 ARHGAP5 1 NCOA6 1 NDUFB9 1 ESCO1 1 ANKRD20A2P 1 GSPT1 1 SNX14 1 NTRK3 1 IFIT2 1 RBM15 1 LIN7A 1 CYP2A13 1 ZSCAN5A 1 RSU1 1 PRRC2C 1 ARHGEF3 1 SCRN2 1 PCNT 1 ADTRP 1 PCSK1 1 DDX39A 1 SHCBP1L 1 MEIOB 1 ADPRHL1 1 CPZ 1 BICD2 1 PRAMEF14 1 RMC1 1 OPRPN 1 SYT2 1 NMRAL1 1 SRSF11 1 RPN2 1 DUSP19 1 USP22 1 PDE8A 1 OR5L2 1 C17orf49 1 AC090517.4 1 IGLC2 1 IP6K1 1 CBR4 1 MAOA 1 MARCHF7 1 IQCK 1 DAB1 1 STMN2 1 PFKFB1 1 FBXW10 1 EXOC6B 1 PPM1E 1 ABRAXAS2 1 SLC5A11 1 NOXRED1 1 RPL26L1 1 LRRC43 1 TOP3A 1 TP53I13 1 SPG7 1 POTEM 1 EPAS1 1 ABCE1 1 RWDD2B 1 CNGB3 1 CELF3 1 PRRX1 1 CDK2 1 CLSTN3 1 ERCC6L2 1 IQGAP1 1 ALAS2 1 MYBL1 1 KRTAP5-1 1 E2F3 1 TMEM94 1 AP2B1 1 CLK2 1 REG1B 1 AUH 1 OXA1L 1 GPR142 1 HLA-DPA1 1 TAS2R46 1 RAB36 1 PHTF2 1 AKIP1 1 ADAM20 1 BMP6 1 PUM2 1 ZNF507 1 PPP6R3 1 LHFPL1 1 STKLD1 1 FAM120AOS 1 ANKRD1 1 HBB 1 PIWIL3 1 KLK8 1 IFIH1 1 OR2C1 1 ADNP2 1 COL4A3 1 RECQL5 1 CLCA2 1 IPO13 1 ATAD2B 1 CRIP3 1 PLCG1 1 GRM3 1 SEMA6C 1 SLC12A9 1 KIF15 1 GATAD2A 1 NCAM1 1 CBX2 1 C11orf24 1 SLC35B3 1 ST6GALNAC2 1 FOSB 1 TAF15 1 VPS4B 1 EXOSC9 1 H3-5 1 COPS7B 1 PHF23 1 ZNF185 1 ARNT2 1 GP1BA 1 DDX10 1 APBB3 1 OR1S1 1 GK 1 DDX5 1 SCAMP4 1 DUOX1 1 VEZF1 1 CDK6 1 TCOF1 1 MFSD1 1 RASL10B 1 SUCLG1 1 IL21 1 C20orf204 1 RUFY3 1 RAB9A 1 POMT2 1 RGL2 1 CAB39 1 FAM120A 1 TAAR6 1 GAS2L1 1 CENPS 1 MLXIPL 1 WDR1 1 CLOCK 1 TUFM 1 GPX6 1 ATP13A3 1 WNT7B 1 ACSM6 1 MTMR4 1 ECHDC3 1 POLR2E 1 CREB5 1 CELA3A 1 SEPSECS 1 JUN 1 ZBTB16 1 RHBDF2 1 PRDX5 1 ALPK1 1 SIAH2 1 ZBTB4 1 ZMYM3 1 PORCN 1 PEA15 1 TAF1C 1 DDC 1 SYCN 1 OR4D1 1 OR14K1 1 RABL6 1 TXNDC11 1 GAP43 1 IGFBP7 1 PLEKHM2 1 ACAP1 1 SYDE1 1 JAML 1 OAS1 1 ABCC6 1 HYDIN 1 STRIP1 1 OR10H2 1 YY1AP1 1 MYH3 1 CHRM3 1 SREK1 1 CYBRD1 1 DHX58 1 PTGER4 1 STAM 1 MSLNL 1 MRPS18C 1 GALNT5 1 PPP1R2 1 COL9A2 1 DOCK5 1 SLC35G6 1 CDK8 1 CTSG 1 ANO8 1 SLC11A2 1 BLMH 1 GPR84 1 SEPTIN3 1 ATP13A5 1 PIWIL2 1 MST1 1 TSEN54 1 TAS2R42 1 TMEM54 1 DHX15 1 INTS11 1 KIF2B 1 GDPD5 1 WASHC4 1 SOX6 1 SYCP2 1 ARNTL2 1 CDR1 1 MFSD14A 1 SNX19 1 BMP1 1 IQUB 1 ZMAT1 1 TP63 1 OTUD3 1 ROCK2 1 CYP11A1 1 SOX2 1 SOX8 1 ATG2A 1 BAZ2A 1 SAT1 1 OR2G2 1 CEP57L1 1 ELL2 1 TBC1D3B 1 HCFC1 1 EIF1 1 TKTL2 1 AKR1B15 1 ETFDH 1 GPT 1 CNDP1 1 VWA8 1 FERMT2 1 TEP1 1 SLC35A4 1 THOP1 1 ASPM 1 WASL 1 ZNF546 1 ZNF653 1 SLC18A1 1 LRFN3 1 OR51B4 1 GOT1 1 AGMO 1 LSM14B 1 IRAG1 1 ZNF610 1 CLYBL 1 YIF1B 1 FAM222A 1 MRGPRD 1 SLC44A3 1 KLK11 1 ALDH1A1 1 LINC00518 1 TRRAP 1 DHX33 1 TRIM55 1 IL3 1 SERPINB6 1 CCDC120 1 MRPS26 1 MAFB 1 SALL1 1 KIR3DL3 1 KDM2B 1 MS4A10 1 FXR2 1 CLSTN1 1 SEMA3E 1 OSCP1 1 ADRB2 1 ESR1 1 DDX20 1 MRO 1 MPO 1 CIAO2B 1 POM121 1 ZNF117 1 TNNT1 1 NYNRIN 1 SERPINB5 1 HIBADH 1 SLX4 1 THEMIS 1 NKAP 1 PRDM12 1 HLA-DRA 1 ZNF510 1 C9orf131 1 CLPTM1 1 HIRIP3 1 CRHR2 1 CYFIP1 1 BCL11B 1 UGT2A3 1 USP48 1 PFN4 1 SYF2 1 GLIS1 1 CARD9 1 PCDHA13 1 ARRB1 1 NOSTRIN 1 PLP2 1 PTOV1 1 NUDT21 1 CHIA 1 IDI2 1 LCA5 1 RPLP0 1 PITPNB 1 MRPS22 1 BTG2 1 LMLN 1 GALNT17 1 SRRM2 1 GPBAR1 1 MZB1 1 GRIK2 1 NAA20 1 LILRB5 1 MMS19 1 FNDC3B 1 SF3B2 1 OR4C11 1 NPR2 1 PIK3R1 1 JAK2 1 NOTCH3 1 BRD4 1 SCUBE2 1 PM20D1 1 ATP5MPL 1 GOLGA4 1 LOXL4 1 SLC45A3 1 RBPJ 1 RBM23 1 RDM1 1 THOC6 1 TRIM73 1 CD1B 1 GPAT3 1 STAP2 1 MROH6 1 COG8 1 PDIA3 1 LIPG 1 FAM71C 1 PTAR1 1 NR2F2 1 ZNF221 1 TEX44 1 KDM4C 1 TTC14 1 RPS2 1 COL28A1 1 NARS2 1 RIPOR2 1 GRIA4 1 GBGT1 1 SLC7A14 1 PSMC5 1 SPTBN4 1 DUSP16 1 HR 1 SOWAHD 1 ZNF234 1 TCF12 1 GDPD1 1 ECHDC1 1 ZGPAT 1 ZPBP 1 ADD2 1 ERN1 1 RASGRF1 1 CD163 1 WFIKKN2 1 BAG6 1 MAN1B1 1 C2CD6 1 EDEM2 1 HSD17B1 1 IPMK 1 TMEM169 1 FCRLA 1 SLC35B2 1 ASH2L 1 METTL18 1 KCNJ15 1 TMEM59 1 EIF3L 1 PON3 1 FASTKD3 1 SUV39H2 1 EFTUD2 1 LRRC18 1 GINM1 1 ASB4 1 NCKAP1L 1 IGHV3-20 1 LTBP3 1 SERPINA9 1 NCL 1 SHROOM1 1 CMC1 1 CARD17 1 CD19 1 SHOC2 1 DYRK4 1 ZC3H3 1 ABCD4 1 BSND 1 HECW2 1 GHDC 1 ATP1A1 1 PRICKLE1 1 UGGT1 1 DPP3 1 ANKRA2 1 KEAP1 1 LCP2 1 MAML2 1 MFAP3L 1 TRPC1 1 PROC 1 CCDC102A 1 TLL1 1 CRY1 1 ZBED3 1 CAV3 1 DBN1 1 ATG7 1 TMOD3 1 NAGLU 1 MEGF10 1 MSS51 1 TCEAL4 1 LRP11 1 NOCT 1 GFRA3 1 TRAPPC10 1 ZNF782 1 MACROH2A2 1 RFC4 1 AC098582.1 1 MAP2 1 GALR1 1 RARG 1 MORN5 1 TMPRSS9 1 RSPO1 1 AMER3 1 SIK2 1 SCNN1G 1 JMJD6 1 GRN 1 AGRN 1 OR4A16 1 ZNF20 1 GPX8 1 DPP6 1 TSPAN6 1 DNAAF4 1 PLAC8L1 1 DMAC2L 1 SECISBP2 1 RBAK 1 GRM8 1 DLG2 1 TMTC1 1 TEX10 1 WDR93 1 CAMSAP1 1 DUOX2 1 TRAPPC11 1 AP3S2 1 C16orf89 1 SPINK5 1 SNX18 1 FLNC 1 STPG2 1 CARMIL2 1 NEXN 1 ADAMTS9 1 SPPL2C 1 WBP1 1 ZWINT 1 CRYBG3 1 DMGDH 1 LRRC49 1 DMRTC1 1 ZNF287 1 C11orf45 1 TRHDE 1 SGO2 1 TBL1XR1 1 CHPT1 1 CDKN1A 1 THRAP3 1 GRM5 1 ABLIM2 1 GAB1 1 TMEM79 1 MYCL 1 USP19 1 STX8 1 TMEM80 1 NHLRC2 1 FTL 1 CAMTA1 1 WFDC8 1 CDV3 1 POR 1 RLIM 1 PRDM4 1 PDE3A 1 UBE2D1 1 BRCC3 1 RHOU 1 RNF144A 1 ZC3H6 1 MAP1B 1 SLC16A10 1 JADE1 1 MTA3 1 GCNT1 1 A2ML1 1 PYCR2 1 SOCS6 1 EXT1 1 YME1L1 1 ATP6V1A 1 LTBP4 1 KLHL24 1 OR8B8 1 CXXC4 1 KIF17 1 PHF6 1 ARHGAP17 1 SRGAP3 1 HOXA9 1 HSPA6 1 GRXCR1 1 PSMC6 1 STK10 1 ATP9B 1 SLC2A14 1 EBNA1BP2 1 BCL2A1 1 EGR2 1 CXorf58 1 COL22A1 1 TCTEX1D2 1 SHCBP1 1 FBXO38 1 PTPRR 1 INTS8 1 RGMA 1 SORCS2 1 TET2 1 MYL9 1 FGF3 1 ZEB1 1 PSG6 1 NUDT7 1 TPD52L2 1 PTPRF 1 SLC7A13 1 TCHH 1 ZNF652 1 CFAP61 1 ATP5F1C 1 MSR1 1 LOXL3 1 BEST3 1 RUVBL2 1 OTOF 1 WDR11 1 PHLPP1 1 CHEK2 1 PRKAG1 1 MTCL1 1 WT1 1 TAF1L 1 CYP39A1 1 TPCN1 1 PDLIM1 1 PPIC 1 ST3GAL4 1 MAP1LC3B2 1 PSMD4 1 PLXNA3 1 CCNB1IP1 1 ASIC5 1 MYO1C 1 TM9SF3 1 HROB 1 TRIM49 1 ECM1 1 FEZ1 1 AEBP1 1 LRRC8A 1 ABCB6 1 SOHLH2 1 DMRTB1 1 PLA2G4D 1 CCNK 1 DMWD 1 SMARCA5 1 TSBP1 1 B4GALNT3 1 MYF6 1 NEK3 1 GPR162 1 BPIFB1 1 TH 1 SBF2 1 CEP135 1 ZNF584 1 ZNF12 1 MRGPRX3 1 WDR47 1 RNF169 1 GNB1 1 OR8G2P 1 LRBA 1 TPPP 1 BBS10 1 HMGCS1 1 RNF19A 1 CFL1 1 KANSL3 1 AGA 1 TMEM97 1 SYT15 1 DLX6 1 TOM1 1 COG1 1 DOT1L 1 NRG2 1 EFCAB5 1 KIF26B 1 PALLD 1 SEC23IP 1 DSE 1 OR2W1 1 FBXO24 1 SYNGAP1 1 TIMMDC1 1 MMEL1 1 RHPN2 1 FBXO7 1 ANKRD23 1 OR4D6 1 RPL3L 1 KLC1 1 CACNB1 1 KCNV2 1 RBM42 1 IFT140 1 CDC42BPB 1 AP002748.5 1 SLC14A2 1 PSAPL1 1 ZFP42 1 RPS3A 1 FAN1 1 COG6 1 CKAP2 1 C5orf22 1 RPS6KB1 1 MFSD6 1 CCDC88A 1 UGT1A3 1 CD6 1 MRPL22 1 CCND3 1 WASF2 1 SNAI3 1 SLC52A2 1 SPATA20 1 PBDC1 1 RNF139 1 PRTFDC1 1 CATSPERE 1 RARA 1 PRKCQ 1 ADGRA2 1 PRIM1 1 CH25H 1 GJB6 1 TMEM67 1 KIF5C 1 MAPK10 1 CNDP2 1 MYO5A 1 MICU1 1 SCYL2 1 KLHL20 1 VRK3 1 PIK3R4 1 ALDH1B1 1 ZSCAN26 1 RNASE11 1 RPS6KC1 1 CCDC8 1 B4GALT6 1 FAM162A 1 OSGIN2 1 DDX19B 1 MROH9 1 KANSL1 1 SLC35A3 1 NUP42 1 LRRC4B 1 TSPAN19 1 ASGR2 1 RPL30 1 ENG 1 HADH 1 MON1B 1 PDSS2 1 LRRC3 1 UQCR10 1 SLC9A1 1 CERT1 1 PDYN 1 MFSD2B 1 PIK3CD 1 SLC32A1 1 STK3 1 FARSA 1 MYOCD 1 LACC1 1 GTF2IRD2B 1 MME 1 GPR141 1 NAA40 1 PCDHA5 1 PDE11A 1 LHX8 1 ENPP5 1 TPRX1 1 ZNF513 1 COQ10A 1 C19orf44 1 CCDC80 1 TRAF2 1 ARHGAP15 1 LINC02843 1 FOLH1B 1 SUCLG2 1 LGSN 1 MORC2 1 DEDD 1 RPL7P3 1 XRN1 1 MSL1 1 UCHL3 1 ZSWIM5 1 PRR27 1 FOXM1 1 EFEMP2 1 KCNK5 1 BICRA 1 SLC15A4 1 ARMT1 1 CMYA5 1 SP100 1 SMARCC1 1 GLI2 1 LBP 1 NTN4 1 SPEM1 1 IPO4 1 CERKL 1 LINGO1 1 DMXL2 1 BCHE 1 CTDNEP1 1 GSE1 1 KIF5A 1 PPP1R14B 1 OR13C2 1 SLC52A1 1 GBA 1 ERMARD 1 ITM2C 1 PVR 1 SEL1L3 1 APLP2 1 EFNB1 1 STXBP5L 1 GNB5 1 CCDC160 1 C15orf39 1 AC109583.1 1 TRAPPC1 1 CDH18 1 HERC5 1 SLC35A5 1 BMP4 1 HMOX1 1 AGAP1 1 ACSL3 1 PRC1 1 FAM110C 1 G6PC 1 ZNF816 1 CAGE1 1 KLK14 1 ORC3 1 TENM3 1 GNPTAB 1 PCNX3 1 SFMBT2 1 ST20-MTHFS 1 DMBX1 1 KLF10 1 MCAM 1 MB21D2 1 C12orf77 1 MFHAS1 1 CATSPER4 1 BRWD3 1 FLT3 1 TMED2 1 ZMPSTE24 1 MMADHC 1 PARP14 1 GDF9 1 ZNF790 1 RIPK4 1 EPPK1 1 RRP15 1 SPATA5L1 1 YTHDC2 1 ACOT1 1 CCDC78 1 DDA1 1 COX6B2 1 GPATCH8 1 COL4A2 1 PDCD5 1 NIPSNAP3B 1 PRODH2 1 IFT172 1 TGM7 1 TWNK 1 GEMIN7 1 TMEM185A 1 FOXP4 1 COL1A1 1 GCFC2 1 AMDHD2 1 MIOX 1 SPCS1 1 CFP 1 DDX59 1 SOAT2 1 MIR519C 1 RHBDF1 1 CENPM 1 RTN1 1 ZNF433 1 NOX1 1 ERG28 1 TECTA 1 ZFR2 1 COMMD9 1 AL162726.3 1 CENPI 1 CDK13 1 CDH23 1 GPNMB 1 SEC31A 1 RBBP6 1 SDHD 1 AMBP 1 ATP11B 1 SLC51A 1 PTPRC 1 PLEKHJ1 1 GPR45 1 PPFIBP2 1 HSPA4 1 ZAP70 1 EHMT1 1 OR10H5 1 CENPE 1 TSPYL6 1 SLC6A5 1 SEC14L5 1 GABRE 1 CPEB1 1 KIF5B 1 FAM107B 1 UNC5A 1 DNAAF2 1 IGHD6-19 1 MBD1 1 AC005863.1 1 TRIM39 1 CHD5 1 KCTD18 1 GUCY1A1 1 RABEP2 1 NAALADL2 1 TSHR 1 ARHGAP11B 1 CCDC112 1 COL4A1 1 RSPH9 1 DPCD 1 SSH2 1 CLIP4 1 TMEM106A 1 FRMD4A 1 G6PD 1 ZNF649 1 RHOXF1 1 C16orf78 1 TOE1 1 STYX 1 SEMA6D 1 ADH4 1 PLAGL2 1 ATP6AP1 1 ZNF518B 1 HTT 1 AMBN 1 IFNL3 1 SIRPD 1 ITGA9 1 CLCN1 1 KDM7A 1 KCNH2 1 NAV3 1 ZKSCAN1 1 WEE1 1 PURG 1 UBC 1 PAQR7 1 PAX3 1 SYNPO2 1 ADIG 1 PAAF1 1 CD109 1 PSMD11 1 SRP68 1 DIS3L 1 PEMT 1 ALDH3A1 1 CFAP298-TCP10L 1 ACTR3B 1 C1orf56 1 FKBP15 1 FBXO2 1 VSIG1 1 DSEL 1 DYRK1B 1 KIF20B 1 PTGFRN 1 SV2C 1 ATG4D 1 DYRK3 1 FAM155A 1 TOPBP1 1 POLR3GL 1 KRI1 1 PHLDA1 1 SYMPK 1 CNOT10 1 ITCH 1 SNORD116-7 1 BDH2 1 KIF4B 1 MARCHF8 1 BABAM2 1 ASB15 1 PGAM5 1 DRD1 1 MCRS1 1 TXNIP 1 KLHL13 1 FGA 1 HCN4 1 FAM83A 1 AVPR1A 1 FAAH 1 BMP2K 1 PHYHIPL 1 CASP10 1 QTRT1 1 FGF13 1 AGXT 1 DHX8 1 OIT3 1 TTC8 1 TUB 1 NOVA2 1 ILF3 1 MRM1 1 MICAL3 1 N6AMT1 1 MASP1 1 JAKMIP2 1 ERP27 1 AP001425.1 1 ASPG 1 CDKN2AIPNL 1 SLC47A1 1 ZSCAN2 1 UHRF1BP1 1 MINDY3 1 CNKSR1 1 SNORD108 1 IQCE 1 DAGLA 1 PCDHGA1 1 CHFR 1 LMBR1L 1 MLX 1 EDC4 1 TRBV4-2 1 SFI1 1 SPRY2 1 SYCP2L 1 GPR78 1 GPR75 1 PSMA2 1 SULT1A1 1 FGD6 1 GLIPR1L1 1 KRT25 1 GANC 1 WBP11 1 ZNF160 1 BICC1 1 SETBP1 1 LRGUK 1 DDX6 1 GLYCTK 1 TMTC3 1 OR5K4 1 HCK 1 RHOJ 1 ATF6B 1 PRDM15 1 CAPN7 1 OR4B1 1 KMT2E 1 ACTC1 1 ELOC 1 PDE6A 1 RABEP1 1 KDM2A 1 SPAG5 1 DPYS 1 OTUB1 1 SENP7 1 EIF4G2 1 LEFTY2 1 CDS2 1 TMEM168 1 JCAD 1 NUP133 1 AP3B1 1 ABCC1 1 ZIC4 1 ELK3 1 OBSCN 1 FRMD7 1 PSMD1 1 PLAC1 1 ZNF780A 1 TICRR 1 PPIP5K1 1 PIRT 1 PDE12 1 ANO10 1 EDN3 1 OR5K2 1 EFCAB12 1 SLC12A3 1 FOXRED1 1 SYTL3 1 ARHGAP32 1 PRKCE 1 TRIM32 1 PHF20 1 SCAF8 1 EIF5AL1 1 PPP1R10 1 FBXO40 1 SDHAF3 1 DCUN1D1 1 TYRP1 1 ASB6 1 RMND1 1 LATS2 1 ST8SIA6 1 PRR22 1 PTCHD4 1 IFRD1 1 PTPN3 1 ZFAND1 1 KLHL38 1 C3orf70 1 HERPUD1 1 MAT2A 1 CKAP2L 1 SLC9A9 1 ALYREF 1 DMRT2 1 ZBED8 1 GOLGA6A 1 CDC123 1 CRB1 1 CLEC11A 1 BACE1 1 DLX3 1 ACSM2A 1 SLC8B1 1 PITRM1 1 CEP85 1 LATS1 1 SCN5A 1 GALNT11 1 DMPK 1 ARFGAP2 1 OR7G2 1 CENPL 1 NBPF10 1 NUTM2G 1 TXNDC5 1 MAGEE1 1 TMIGD3 1 CCDC158 1 PSD3 1 MAGEE2 1 ADAM29 1 GNG3 1 CCDC25 1 PPP1R8 1 DEPDC5 1 CAPN12 1 LRRC36 1 ERICH1 1 GPR63 1 CABLES2 1 SLC2A10 1 CALHM1 1 SLC5A1 1 SMARCA2 1 ABCA3 1 CYP27B1 1 TFG 1 AP1G1 1 OR1B1 1 SP140 1 FANCF 1 RRM2 1 CDC25A 1 DCAF1 1 NFU1 1 RBM14 1 NKAIN3 1 CNN2 1 ARIH1 1 RUNX3 1 SH3RF1 1 TMEM242 1 PRDM2 1 TSPYL1 1 PER3 1 LMAN1L 1 FCHO2 1 TMEM179B 1 LACTB 1 NELFE 1 WDR92 1 FOXK2 1 ABI2 1 RUBCNL 1 ZFHX3 1 CHMP2B 1 TUFT1 1 DECR1 1 TLE6 1 MTMR9 1 DNAH1 1 NLRP4 1 ITGB4 1 MFN1 1 GPRC5A 1 FOXO1 1 EPHA7 1 APLP1 1 SLC6A16 1 KDM3A 1 ZDBF2 1 ZNF266 1 PSKH2 1 COL8A2 1 TUBB 1 PRNP 1 SLC4A1AP 1 UNC45B 1 SNTG1 1 AC093827.5 1 LRRC27 1 FADS6 1 PSD4 1 DTNA 1 SETD4 1 GAA 1 MMP10 1 CD163L1 1 ESRP1 1 MYL1 1 C1QTNF9 1 EPX 1 PACS2 1 USP42 1 CNOT4 1 KIF3C 1 DDX50 1 ZNF34 1 WDR27 1 TUBA1C 1 DNAJC30 1 SNAPC5 1 L3MBTL4 1 GTF2E2 1 PDCD2 1 MTF1 1 ARHGAP10 1 DDX28 1 GTPBP8 1 VPS8 1 TOP2A 1 TGFBRAP1 1 SYTL2 1 CNP 1 PCDHGA10 1 NAT8 1 TOM1L2 1 GPR152 1 PLCD1 1 ZNF597 1 AMY2A 1 RAB37 1 TRA2B 1 WRN 1 CTBP1 1 PPARGC1B 1 PGD 1 CLCC1 1 DYSF 1 S100A12 1 SGPP1 1 USP53 1 TIMM17A 1 CEP120 1 OLFM2 1 HIC1 1 MARS2 1 ZNF432 1 DNAJC3 1 TMOD2 1 SMYD2 1 CCDC54 1 PPFIA4 1 EIF5A 1 ADCY2 1 FIGN 1 CCDC116 1 BSDC1 1 LIMD1 1 GAL3ST4 1 MROH1 1 FOXJ3 1 DCAF5 1 DHRS4L2 1 BCL9L 1 ATP13A1 1 AIPL1 1 TMCC3 1 IGF1R 1 STOML3 1 NECTIN2 1 SLC25A28 1 SLC13A4 1 CXXC1 1 RNF31 1 AJUBA 1 PHF20L1 1 SLC4A1 1 TBC1D25 1 PIGB 1 TIE1 1 SRC 1 ZNF267 1 AP4M1 1 ARMCX5-GPRASP2 1 SEC31B 1 ATP11C 1 PITPNM2 1 SLC4A4 1 MRPS5 1 CLK4 1 GCNT2 1 ACSS1 1 SEC61B 1 IGHD1-26 1 CEP41 1 RRP9 1 MRPL32 1 URGCP 1 SLC28A3 1 KIAA0513 1 TMEM71 1 PDE6C 1 FOXP2 1 NRXN2 1 LAMTOR5 1 MAP1LC3A 1 FMN2 1 MYO18B 1 ARID4A 1 CYP27C1 1 PABPC1 1 TBX22 1 EXOC4 1 GALT 1 TBL3 1 SLITRK4 1 STK38L 1 OGG1 1 TRAC 1 HOXB3 1 MYPN 1 KIR3DX1 1 KCTD7 1 PDZD8 1 GPHN 1 OLAH 1 MFSD8 1 MICU2 1 MGAT4A 1 REC114 1 ITIH5 1 FAM71B 1 MAP7D2 1 LAMC1 1 C3orf38 1 RBM8A 1 MGAM 1 ABL2 1 OR5M8 1 PRAME 1 ADAP1 1 ZSCAN16 1 RABGGTA 1 ZNF831 1 PCDHB1 1 IL21R 1 SLC35A1 1 SAMD9L 1 ARMH4 1 BANK1 1 VCAN 1 SCML2 1 CHST8 1 HAUS7 1 ZNF408 1 MUC5AC 1 NLRP6 1 HNRNPA1 1 SLC45A4 1 TOB1 1 EDC3 1 MTMR2 1 USP43 1 KATNIP 1 VPS25 1 PLPPR1 1 PLCB1 1 LRRTM4 1 TEK 1 DPT 1 PXDC1 1 ATP7A 1 FKBP11 1 RTF1 1 POC5 1 CNTN2 1 JAG1 1 NBPF3 1 CHSY3 1 FRY 1 PSMC2 1 TRIP13 1 VPS28 1 ULK1 1 ZNF616 1 TIMM17B 1 MRPL15 1 GPLD1 1 PXDNL 1 HGSNAT 1 TRMT1L 1 FHL5 1 YBX3 1 WDCP 1 SUN1 1 TMEM81 1 ACVR1C 1 ETF1 1 SPG21 1 RAB3GAP2 1 STXBP1 1 CHCHD3 1 HHIPL1 1 SCAP 1 CD1D 1 EXTL1 1 KAT5 1 ZNF528 1 AFDN 1 DAB2 1 MAPKBP1 1 LRRC42 1 ACSM5 1 DHRS13 1 RAI14 1 SOAT1 1 ADCY10 1 IKZF4 1 WWP2 1 DMRT3 1 NR4A1 1 CXCL10 1 LSM1 1 LY75-CD302 1 GPBP1 1 GABRG1 1 BCL7A 1 NIFK 1 CISD2 1 ADGRF2 1 ATP11A 1 CACNA2D1 1 DARS1 1 CLDN18 1 GNA14 1 CDHR5 1 HELQ 1 ZCCHC17 1 TAF2 1 MAK16 1 CACNG2 1 AGO4 1 ZNF331 1 NLRP14 1 SERPINE1 1 CXorf21 1 BACH2 1 NUP214 1 INTS4 1 TMX3 1 SORCS1 1 CACNA1B 1 CTDSP2 1 HEATR4 1 NEURL4 1 UBR2 1 TXNDC12 1 BNIP5 1 OR13C5 1 MIS18BP1 1 GLB1L 1 TKFC 1 BCL2 1 BSX 1 ACAN 1 TMCO3 1 AASDH 1 GABRA1 1 KRR1 1 PLEKHH1 1 OR5A2 1 QSOX2 1 TMEM98 1 WDR24 1 PRPF3 1 DENND1B 1 NLGN4X 1 MSMO1 1 SHANK3 1 JAGN1 1 MYO1H 1 ARHGAP6 1 INVS 1 MYT1 1 MAPK8IP3 1 L3HYPDH 1 CAMSAP2 1 TSPAN5 1 CPXCR1 1 ONECUT1 1 ATP6V0A2 1 TUBGCP5 1 IGKV2D-29 1 APPL2 1 UBXN10 1 BCL11A 1 PEPD 1 SLC11A1 1 BMP5 1 TRAFD1 1 MAGEB6B 1 AK2 1 RALGPS1 1 CXCL11 1 MFSD6L 1 TIMM50 1 SNORD115-23 1 ALS2CL 1 CCDC146 1 DIAPH2 1 RBM26 1 OLFM1 1 PLEKHG3 1 ZMYM1 1 CSPG4 1 OR7G1 1 OSBPL10 1 NANOS3 1 RGPD1 1 OR4K13 1 MBD5 1 RNF43 1 DDX39B 1 PBLD 1 LZTR1 1 SASH1 1 RALY 1 SPESP1 1 FOXR1 1 PPP1R9B 1 SLC38A11 1 POSTN 1 ACADS 1 VWA3B 1 SERINC1 1 IL25 1 ZMYM5 1 RORC 1 SYNRG 1 CCHCR1 1 PTPRA 1 PCARE 1 VAC14 1 UNC50 1 KBTBD2 1 PDIA6 1 DNHD1 1 GPR148 1 GPKOW 1 VAT1 1 CD2AP 1 HDX 1 SCARB1 1 ASXL1 1 ZSCAN1 1 TRIM48 1 GLT8D2 1 ZNF426 1 RMI1 1 SYNGR3 1 ME2 1 HIRA 1 CBL 1 DNASE2B 1 PREX2 1 CD63 1 OAS3 1 SLC9C1 1 SLC41A1 1 MARCKS 1 SLC17A5 1 DBH 1 TUBB4B 1 C10orf71 1 CTC1 1 PRPF6 1 PML 1 ATG3 1 APBB1 1 FER 1 GEMIN5 1 PYDC1 1 SLC13A5 1 VARS1 1 MID1 1 KBTBD8 1 ZNF354C 1 POLR3C 1 DGKZ 1 AP3B2 1 LIMA1 1 DCAF8 1 NOD1 1 ELANE 1 PRPSAP1 1 LRRFIP2 1 EML5 1 TERT 1 EEF1D 1 RAD52 1 ZDHHC7 1 TNFAIP3 1 GPR150 1 RGS11 1 OR51A2 1 THADA 1 FOXP1 1 ZNF154 1 EGFL7 1 ZNF260 1 F11R 1 KANK1 1 ANXA10 1 PPP1R13B 1 MRPL30 1 NYAP2 1 NXF5 1 KATNA1 1 VNN2 1 IFTAP 1 ZNF425 1 DDX3X 1 PRAMEF19 1 WDHD1 1 PCDHA2 1 AIFM1 1 ZFR 1 OR10H3 1 ARHGAP11A 1 PPFIA2 1 KIAA0895 1 CD74 1 BEND6 1 GMPPB 1 SAMD4A 1 MOXD1 1 PRPF39 1 CALM2 1 PTPDC1 1 BAG1 1 CYP4F2 1 COL17A1 1 ZBTB43 1 ADGRE3 1 ZNF786 1 NUP98 1 FRMPD1 1 CORO2A 1 MGAT4C 1 AL121899.2 1 APOBEC3F 1 MSH3 1 CACNA1I 1 TRMT1 1 FAM13A 1 AFF4 1 TTC17 1 IDH3A 1 TAGLN3 1 AGFG2 1 CD200 1 ZNF532 1 ZNF75D 1 HSP90AB1 1 SAG 1 FBXW7 1 PITHD1 1 SLC35G3 1 SEPTIN11 1 SRP9 1 GPR12 1 CD22 1 SLC29A2 1 RGS9 1 BEND2 1 VTN 1 MFAP4 1 RER1 1 CGN 1 KHDC3L 1 PWP1 1 TMEM222 1 EIF4A1 1 METTL16 1 TANK 1 GPR149 1 NPHP1 1 CABLES1 1 UROC1 1 SEPTIN10 1 PAX1 1 STXBP2 1 KCNAB1 1 PARP4 1 CLINT1 1 SLAMF7 1 MYOC 1 SPEG 1 USP31 1 IDH2 1 FASTKD2 1 F2R 1 LBX2 1 DSG4 1 NFXL1 1 SUN5 1 IGKV3-15 1 FOXR2 1 H2AC6 1 CAMSAP3 1 ADAMTSL4 1 DUOXA1 1 PPRC1 1 OR1A1 1 ATP2C2 1 APOA5 1 PRDX1 1 ASTL 1 ATP2A1 1 NLRC3 1 MTFR2 1 RNF38 1 CD5L 1 RIN2 1 TTC12 1 NOC3L 1 PASK 1 ZFP91 1 RFX1 1 OR6N1 1 PAPPA 1 SRCIN1 1 CES1 1 OGA 1 TNNI3K 1 DYNC2I1 1 NIPAL3 1 GPR179 1 ADAM9 1 MAP4K3 1 CARS1 1 RTN3 1 MELTF 1 CACNG6 1 USP29 1 KCNT1 1 ZFYVE16 1 ZNF284 1 ANKFN1 1 ZNF438 1 LRG1 1 DDX21 1 TELO2 1 BBS1 1 NDUFS4 1 OSBPL5 1 BIRC3 1 HMGXB4 1 ATP8B3 1 ABCG4 1 ZNF24 1 PRPF19 1 RPA3 1 EIF2AK1 1 ACBD4 1 KIF2A 1 NSUN6 1 CCDC114 1 NCLN 1 ZNF619 1 EXD3 1 WAPL 1 SELENOP 1 ARCN1 1 OR2AE1 1 IDO2 1 DIS3L2 1 AFAP1L2 1 ZCCHC9 1 COL25A1 1 PSMC1 1 LAYN 1 YTHDF3 1 MERTK 1 C17orf75 1 PRSS33 1 POU5F1 1 RPA1 1 RFX7 1 SLC28A1 1 ZNF530 1 SH2D3A 1 OR2W3 1 LONRF1 1 SLC16A13 1 DTX3 1 IDH3G 1 SENP1 1 EXO5 1 MFSD5 1 IL18RAP 1 RNMT 1 KLHL1 1 CLTCL1 1 CNOT8 1 PRLHR 1 SELENBP1 1 HGD 1 ISY1-RAB43 1 ZNF704 1 ADAMTSL5 1 SCGB2A1 1 FER1L6 1 ADD3 1 MBD4 1 MIIP 1 NR1H3 1 TRIO 1 NKRF 1 DPEP3 1 MAP3K3 1 ZNF226 1 INCENP 1 TMEM17 1 C1orf162 1 CDK5RAP1 1 PPM1B 1 KCNJ3 1 PELI2 1 PFKM 1 SNRK 1 PCDHGA5 1 SON 1 NEMF 1 PAF1 1 NBAS 1 CSRP2 1 COPE 1 COL3A1 1 BSCL2 1 MBOAT1 1 DNAJA4 1 GPATCH4 1 BOD1L1 1 ZBTB38 1 APCS 1 RAB32 1 PLOD3 1 MYOM3 1 PEX26 1 RTL6 1 MCOLN3 1 SSH3 1 YTHDF1 1 EIF2S2 1 RUNX1T1 1 UGT2B4 1 SHF 1 KRT31 1 MLH1 1 NONO 1 KCTD8 1 IGFN1 1 SLC22A12 1 NRG1 1 HTR7 1 SCYL3 1 KCNK18 1 ABCC2 1 PCDHB11 1 ADGRG7 1 GET1-SH3BGR 1 SMARCD2 1 SFMBT1 1 TBC1D23 1 FRAS1 1 S100A16 1 VPS45 1 SLC24A3 1 CLCN2 1 MIEF1 1 KRBA2 1 API5 1 JADE3 1 PCDHA9 1 CSAG1 1 FAM160A2 1 HMBS 1 FHL2 1 GOLGA1 1 CRYZ 1 CASQ2 1 PNPLA4 1 PPP2R5B 1 OAZ3 1 C21orf58 1 TBC1D31 1 NEBL 1 MTTP 1 XRN2 1 DBF4B 1 ANXA3 1 CS 1 CDYL 1 SLC2A4RG 1 KIAA0319L 1 EBF2 1 UROD 1 PAG1 1 KRTAP4-2 1 UBOX5 1 FAM114A1 1 STX11 1 SAR1B 1 CIT 1 ANKS1B 1 PISD 1 RAD17 1 NDRG2 1 CPD 1 KBTBD7 1 PCDHA10 1 TMEM171 1 KCTD14 1 PPA2 1 FAM135B 1 AFF2 1 HOOK3 1 AP001781.2 1 SCRN3 1 RNF4 1 GAR1 1 KAT14 1 COPS2 1 ALPI 1 ETNPPL 1 ELOVL2 1 MTMR14 1 KIF16B 1 ZCCHC14 1 UNC5C 1 FNBP1L 1 SEMG2 1 SPAG7 1 FCRL3 1 NIT1 1 SRP72 1 RINL 1 LMAN1 1 ATXN7L2 1 OSBPL8 1 ZNF713 1 CAMK2G 1 IHO1 1 SPATA22 1 WRAP73 1 SSBP4 1 SECISBP2L 1 ZNF57 1 PIK3AP1 1 C6orf58 1 MYLK4 1 GAS2 1 DNAJC11 1 COL5A2 1 ALX4 1 LRRN4 1 ADAMTS2 1 PFKFB3 1 F11 1 APOBEC3D 1 KNG1 1 NT5M 1 CCN2 1 DNTTIP2 1 IGF2BP3 1 ZNF680 1 SPIN2A 1 SORBS2 1 RPL31 1 OR5I1 1 NBEAL2 1 ZIK1 1 BTBD9 1 FUBP3 1 LAMP5 1 KRT19 1 PHB 1 BCO2 1 PHKA2 1 MYBPC1 1 SLCO3A1 1 DCTD 1 OGDHL 1 CIITA 1 CEP68 1 KPNA4 1 PLXDC1 1 ACKR3 1 TBCK 1 FSCN1 1 CTR9 1 CREB3 1 AP5M1 1 USP51 1 ZNF492 1 ZNF212 1 CDRT1 1 KCNQ1 1 RNF20 1 BTNL3 1 PCP4L1 1 RTL5 1 IARS1 1 IQCN 1 MSGN1 1 MCCC1 1 PCYT1A 1 SLC24A4 1 FHOD1 1 SLC26A7 1 KL 1 LMNB2 1 RMI2 1 CUZD1 1 TMEM87A 1 OR4D9 1 CFAP58 1 DPEP1 1 TTC30B 1 PGAP4 1 GABPA 1 PRELP 1 LRRC56 1 RBBP5 1 SLC25A13 1 ZNF705A 1 KCND3 1 CTSH 1 OTX2 1 GPR62 1 PAQR8 1 PRB4 1 KLHL4 1 ESM1 1 CCDC97 1 IL4R 1 IQGAP3 1 TBC1D22B 1 NEUROD1 1 COL2A1 1 UBAC2 1 ACOX3 1 PRPF40B 1 MYH10 1 TRARG1 1 CDC16 1 SERPINB13 1 SH3GL1 1 TM9SF2 1 FTSJ3 1 ZNF480 1 GAN 1 SUDS3 1 PDCD2L 1 ATCAY 1 CXorf66 1 ALG10 1 DIPK1C 1 CCKAR 1 ZNF527 1 SULT4A1 1 ACTR6 1 ADAD2 1 KRTAP10-9 1 ANGPTL7 1 INTS10 1 ENOSF1 1 FLI1 1 ZBTB48 1 ZNF354A 1 LIN54 1 TRERF1 1 AMMECR1 1 EIF2B3 1 IFIT1B 1 IPO7 1 SEMA4B 1 NUP210 1 SLC9C2 1 TLR1 1 UHRF2 1 SEMA4D 1 RPTOR 1 DENND2D 1 GTF3C2 1 PANK2 1 IFT81 1 AGAP6 1 S1PR4 1 PCDHA1 1 FMNL1 1 ITGAV 1 TXN 1 PTPRS 1 SPATA17 1 TCF21 1 GPR107 1 APAF1 1 AIFM3 1 SLC25A20 1 RHCG 1 RND1 1 UMODL1 1 ORAI1 1 PPP2R2B 1 SPATA4 1 THRA 1 TUT7 1 CHID1 1 ATP4A 1 MAN2B2 1 METAP2 1 OR5AU1 1 CLCN4 1 OR52B4 1 USP26 1 DGCR2 1 AMHR2 1 CCND2 1 OR5D14 1 OR2T34 1 FLYWCH1 1 CCNJL 1 PRELID1 1 DCAF4L1 1 TMUB2 1 KIF18A 1 FGFBP1 1 TMEM198 1 FGF6 1 BSPRY 1 RNF40 1 PDPR 1 UNC93A 1 LGALS3BP 1 C6orf62 1 LYPD4 1 OCIAD2 1 RAB18 1 COL5A1 1 ZNF292 1 SRPK2 1 RAPGEF5 1 SDR16C5 1 WDR81 1 SLC12A7 1 MTMR8 1 KIF21A 1 JUNB 1 OR2W5 1 TSEN2 1 ZNF600 1 OCLN 1 OR8H1 1 CLIP3 1 ADAM2 1 SLC12A2 1 GTF2I 1 CDK17 1 RTN4IP1 1 IVL 1 CNTN3 1 SGSM1 1 OTUD7B 1 IGHA2 1 PEX2 1 KIF4A 1 P4HA2 1 SP4 1 OR11L1 1 KIAA0232 1 ICE2 1 UPRT 1 DAND5 1 TADA2A 1 PKD1L1 1 ARMCX6 1 THOC3 1 CSRNP2 1 BIN2 1 FCRL5 1 ATP6AP1L 1 PUM1 1 IZUMO1R 1 VWCE 1 ZNF358 1 ABCC5 1 MYL6B 1 RIMKLB 1 COL11A1 1 INSR 1 ERMP1 1 PHRF1 1 MARK4 1 NME8 1 EDIL3 1 CMTM1 1 ADGRE1 1 CMTR1 1 CDKN2C 1 LRRTM3 1 STIMATE 1 IFNAR1 1 ITIH2 1 RCOR3 1 DAPK3 1 EPHB4 1 STOML1 1 FAM131A 1 STRADA 1 NPPB 1 PGC 1 DNALI1 1 EPHA3 1 TMX4 1 TRAF3IP1 1 NID1 1 METTL7B 1 KCNU1 1 JMY 1 ITGB5 1 SPAG9 1 ABCG2 1 MADD 1 WFDC1 1 DCN 1 CD93 1 TMF1 1 EGLN2 1 RNASE7 1 KEL 1 ALPP 1 CDH12 1 ACVR1 1 CCDC151 1 GIMAP8 1 CYP8B1 1 TMEM38A 1 FAM20B 1 FAM162B 1 PTGIR 1 CYLD 1 GSS 1 NEURL1 1 SLC35G5 1 TMCO6 1 UPF2 1 ZNF586 1 SLC25A12 1 VPS50 1 GFRA2 1 FLG2 1 KRIT1 1 FAM27E5 1 CCDC51 1 PRUNE1 1 ZSWIM8 1 SLC16A1 1 CSNK1G2 1 CLCA1 1 POLR1F 1 ZNF572 1 F5 1 GPR155 1 FKTN 1 MARK1 1 ARHGAP21 1 ZP4 1 RASGEF1C 1 SGCG 1 ARHGAP28 1 ARG1 1 TSPAN32 1 LENG8 1 SLC26A11 1 DIPK2B 1 SLC4A5 1 LPCAT2 1 OSTC 1 H2AC12 1 TTYH3 1 ST6GAL2 1 TBX19 1 GLO1 1 ADAM19 1 AC011448.1 1 RFXAP 1 TMEM107 1 OR4F17 1 APBA2 1 MEGF6 1 DPPA2 1 TMEM208 1 SLC4A10 1 UVSSA 1 PLCD4 1 TBC1D14 1 PLD3 1 C2orf78 1 LY6E 1 CDH19 1 ASB13 1 NEO1 1 LONP1 1 FRMD3 1 RMDN1 1 CEBPG 1 CTSV 1 PRICKLE4 1 CCDC81 1 ZNF625 1 MISP 1 ITPRID2 1 RASAL1 1 PHF12 1 DSC1 1 MRPS17 1 LPP 1 MED7 1 NBEAP2 1 TTI2 1 CTNND2 1 ATP10A 1 ANOS1 1 SEC14L1 1 TMPO 1 FAM153B 1 LRRIQ3 1 DPYSL4 1 BMPR1B 1 SAFB2 1 NFASC 1 CDH6 1 NFAT5 1 UBE4B 1 HK3 1 EXOC6 1 DNA2 1 CDT1 1 SLC46A3 1 HHEX 1 OSBP2 1 SORBS1 1 MXRA7 1 ERCC6L 1 PXN 1 THAP11 1 NRCAM 1 EMILIN2 1 CPA4 1 RFLNA 1 SPHK2 1 DDX11 1 TUBD1 1 FAM9A 1 CNST 1 GLCE 1 CEP295 1 PLVAP 1 VAV2 1 CACYBP 1 C1orf189 1 NLRP8 1 OR10A4 1 LIFR 1 KIR2DL3 1 ECT2 1 TRAPPC2 1 SMOC1 1 EML4 1 ZDHHC5 1 OTUB2 1 TLR6 1 SMC1A 1 SLC12A5 1 ALKBH1 1 DDX24 1 MATR3 1 BICD1 1 TACR2 1 EPB41L1 1 CCDC87 1 CCDC148 1 NUFIP2 1 LUM 1 LCOR 1 CRYGS 1 PCGF1 1 IL15 1 PLXNB2 1 CAND2 1 B3GNT6 1 OTC 1 DNMT1 1 RAVER2 1 XIRP1 1 PRIM2 1 CCDC91 1 TRUB2 1 PROX1 1 CDK10 1 CTNND1 1 PIGO 1 RPS4Y1 1 C11orf58 1 ZNF544 1 OPTC 1 BTG3 1 HAT1 1 GCNT4 1 SNRPB 1 PRMT8 1 GZMK 1 NOMO3 1 CBR1 1 CNKSR3 1 CRACD 1 SRBD1 1 DNAJC7 1 MED1 1 PYGM 1 STYXL2 1 RB1CC1 1 KRTAP10-4 1 CCDC159 1 R3HDM1 1 PKP2 1 FIP1L1 1 GRK6 1 INO80 1 TAS2R8 1 KIAA1210 1 PRICKLE2 1 C6 1 GCC1 1 OR1G1 1 AKT2 1 OR51F1 1 IFNA7 1 KIAA1522 1 PIMREG 1 PARN 1 EMSY 1 NFIX 1 AL645922.1 1 COIL 1 CES5A 1 STK36 1 LTB4R 1 G2E3 1 WNT2 1 SLC29A1 1 LIG4 1 SCRIB 1 EVI2A 1 GJA8 1 ZNF638 1 ZNF132 1 KRTAP12-1 1 GGN 1 CBR3 1 ZNF180 1 DEPDC4 1 CARNMT1 1 WAC 1 OTOGL 1 ONECUT2 1 CD300LB 1 ZNF627 1 SMC1B 1 SERTAD2 1 FOCAD 1 ABHD3 1 AK9 1 NAT9 1 NUP85 1 PSG1 1 COLGALT2 1 PTDSS1 1 TICAM1 1 SETX 1 ANKRD29 1 SREBF2 1 ACSM1 1 ZNF319 1 MCM7 1 CIDEB 1 PPFIA3 1 POLR1A 1 CCNJ 1 REPS1 1 MYO6 1 HIPK2 1 TJP1 1 C19orf47 1 RBM22 1 TRIM11 1 CCNA1 1 MPZL2 1 PLEKHA1 1 ABL1 1 OR2T33 1 TMCC2 1 PTPN11 1 OR51I2 1 VIM 1 BNC2 1 ALDH16A1 1 CFAP161 1 GGT5 1 PDE2A 1 SULT2A1 1 CLRN3 1 RBM12 1 PMVK 1 HVCN1 1 SEPHS2 1 LYSMD4 1 B4GALT3 1 RIMS4 1 GPRASP1 1 BTC 1 MAP2K2 1 CALU 1 MMP28 1 COG5 1 DNAJC15 1 SLC17A4 1 ST18 1 CDC25C 1 TRIM8 1 LRCH2 1 MRTFA 1 NFATC1 1 DPAGT1 1 BORA 1 UBIAD1 1 TFRC 1 OR6S1 1 RPGRIP1 1 TP53INP1 1 CGNL1 1 APBA1 1 RAB6B 1 GALK2 1 DCDC2B 1 NHSL2 1 P3H2 1 RPL7 1 DNAAF1 1 CRK 1 POLR3G 1 IL17RC 1 MAGEB10 1 GLRA1 1 HOXC11 1 GAB2 1 MTIF2 1 GANAB 1 MTHFR 1 KDM6B 1 ACD 1 OR7A17 1 IFT20 1 EOGT 1 NOS3 1 GASK1B 1 STXBP5 1 KCNA4 1 PIF1 1 CDCP1 1 GINS3 1 ERCC8 1 IL23A 1 PRR13 1 AGO1 1 GPR19 1 BRCA2 1 CHST12 1 VPS51 1 POLN 1 RPUSD2 1 PHYHIP 1 KIN 1 EPB41 1 NFATC3 1 TFEC 1 PSD2 1 GPR158 1 TLCD2 1 TPRN 1 TDRD5 1 CARD6 1 ALDH1A2 1 REN 1 AXL 1 DCAF11 1 FAM184A 1 DTD2 1 PLA2R1 1 ARFIP1 1 TPX2 1 TBC1D2 1 TDRD6 1 B3GNTL1 1 GP2 1 SLCO4A1 1 CCNC 1 MAS1 1 MRPS21 1 CES2 1 ATE1 1 TAF5 1 FZD5 1 CNKSR2 1 REXO5 1 CUL7 1 KRT74 1 CEP76 1 SSRP1 1 CRTAC1 1 PC 1 RNF217 1 OR10G9 1 BET1L 1 MAP6 1 OAF 1 RANBP3 1 CHAF1B 1 NDUFS2 1 OR52L1 1 LIPF 1 MARCHF11 1 SPATA45 1 CDC27 1 CTSL3P 1 KIF27 1 PNN 1 THTPA 1 NFAM1 1 KIF13B 1 EXT2 1 ANKH 1 ZNF333 1 TNPO1 1 HIPK3 1 SLC16A14 1 CPEB4 1 PPP1R16B 1 PCDHB13 1 COL27A1 1 SLC9B2 1 SDF4 1 SLC25A23 1 PLEKHA7 1 SGIP1 1 TDRKH 1 TPM2 1 SNCA 1 OR6C3 1 SFXN1 1 LRCH1 1 SMARCAD1 1 CCDC136 1 PHYH 1 MAK 1 MOSPD3 1 IL27RA 1 TOP1MT 1 TRAF3IP2 1 RPAP3 1 LINC02870 1 OR2A7 1 POLR2C 1 SURF6 1 EVI2B 1 TREML4 1 PARP8 1 GAS8 1 PTPRM 1 NBPF25P 1 GALE 1 NPL 1 USP16 1 SLITRK6 1 KHDRBS1 1 M6PR 1 KIAA1109 1 FAM209A 1 SLC22A16 1 DCHS1 1 ZNF646 1 CCDC183 1 PRAMEF20 1 USF1 1 ANPEP 1 SLC9A3 1 SEMA4A 1 PARP6 1 COX19 1 SBNO1 1 RELB 1 ZNF93 1 ADAM32 1 SNX8 1 BAIAP3 1 ABCG5 1 STT3B 1 MOCS2 1 MYBBP1A 1 TNFRSF9 1 UCN3 1 TMOD1 1 NINL 1 ARF1 1 RARRES1 1 SH2B1 1 HOXD4 1 GALNT15 1 SLC16A3 1 HSPH1 1 DNAH5 1 ANXA5 1 CLCN7 1 TUBB1 1 RACGAP1 1 CYP4A11 1 DHRS7B 1 PRDM10 1 KIF20A 1 SIRT6 1 POLR3A 1 EPC1 1 MAP7 1 HGF 1 KIRREL1 1 OTUD6B 1 PEX1 1 FLT4 1 KLHDC10 1 AHR 1 METTL14 1 PRDM7 1 TMEFF2 1 NAP1L2 1 CWC15 1 ADAMTS3 1 IL33 1 FBXW11 1 RNF26 1 ADAMTS16 1 BCKDK 1 NRP1 1 G3BP2 1 PRPS1L1 1 IL19 1 ACHE 1 NCF1 1 AOC3 1 TIAF1 1 OGFOD1 1 C17orf98 1 INPP5B 1 ACOX1 1 VSTM2L 1 ORMDL3 1 C1QTNF4 1 FSCB 1 DNAJC18 1 ACTRT2 1 OR4E2 1 TTC3 1 RASGEF1B 1 DPY19L3 1 WDR36 1 CTNNA3 1 MAML3 1 POLR2H 1 EVX2 1 DDR1 1 AL035460.1 1 HNRNPA2B1 1 TDGF1 1 BRCA1 1 NLRP10 1 ELP2 1 FCRL4 1 VEGFD 1 ZNF467 1 TOGARAM2 1 IPO11 1 PPEF1 1 FAM171B 1 MAGI3 1 ANAPC1 1 PFDN5 1 GOLGA5 1 CAVIN1 1 CUL2 1 GDF11 1 TRPC3 1 EFR3A 1 ERP44 1 SLC24A2 1 HDAC7 1 LYPD2 1 GAS6 1 SLC23A3 1 AKR1C1 1 LAMB3 1 TLR10 1 EIF5 1 UBTF 1 ITGA5 1 IL20RB 1 NKG7 1 CD9 1 SLIT2 1 SEMA6A 1 MRPL39 1 IL12RB2 1 SUV39H1 1 TRIM50 1 INSIG1 1 CYP2A6 1 DLG4 1 SPOCK1 1 SERPINA10 1 TDRD1 1 ABCC3 1 ATP2B2 1 INTU 1 CPXM1 1 MOCOS 1 PEBP4 1 DCAF7 1 TMEM38B 1 TAS1R3 1 ST6GALNAC3 1 SHOC1 1 ARHGEF1 1 ZNF2 1 B3GNT2 1 HOXB1 1 BDKRB1 1 BTG1 1 COX11 1 HSP90AA1 1 RAB40B 1 CIC 1 ITGA11 1 UQCC1 1 NDUFAF2 1 HSH2D 1 VCL 1 SERPINF2 1 MAGI2 1 TMEM92 1 KCNK10 1 CACNA1H 1 STAB1 1 KLHDC8B 1 TTC1 1 EXOC2 1 RMND5B 1 SDCCAG8 1 MRPL54 1 PLA2G4A 1 ABHD12 1 FSTL1 1 PPFIA1 1 NCAPH2 1 CLEC12A 1 GNAI3 1 IGKV1-6 1 PBXIP1 1 PRR16 1 SMC2 1 PCDHA11 1 TATDN3 1 ZPLD1 1 KLHL26 1 PBX2 1 ZNF777 1 KCTD2 1 ITPR3 1 CPXM2 1 PTPA 1 OSGEP 1 SYCP1 1 MGA 1 SGCZ 1 EPN2 1 MYO1B 1 ZNF592 1 YLPM1 1 MICAL1 1 PCMTD1 1 ZNF568 1 STON1 1 TRMT2B 1 SLC35F3 1 TBC1D17 1 OR2B11 1 OR2B3 1 MIDEAS 1 KIR2DL1 1 MAP1A 1 WDPCP 1 TTC7A 1 B4GALNT1 1 LTBP1 1 FSIP2 1 MARF1 1 NAB1 1 CCDC15 1 DDTL 1 FOXD4L1 1 RECK 1 MTCP1 1 ZSWIM4 1 ZNF347 1 AAGAB 1 DHX35 1 C4BPA 1 SLC19A2 1 MIR544A 1 CDC42BPG 1 RRP1B 1 GPATCH2 1 CNTNAP2 1 ZNF701 1 CD38 1 NIPBL 1 IGHV3-16 1 MED25 1 DDR2 1 ZSCAN20 1 SLC35F5 1 CEP83 1 JAK3 1 ANKRD13D 1 SH2B2 1 OR8D2 1 NID2 1 FAM91A1 1 HUS1 1 IGDCC3 1 TMEM26 1 NOMO1 1 ELOVL5 1 APH1B 1 TRPM2 1 EME2 1 TMEM255B 1 PDE5A 1 NME9 1 ARHGAP42 1 RTP5 1 PACC1 1 LSM11 1 MUTYH 1 SLC16A8 1 CA5B 1 PEX7 1 QRICH2 1 STON1-GTF2A1L 1 PKDREJ 1 RSPH6A 1 POMGNT1 1 CRYM 1 WNK4 1 NR3C1 1 BCLAF1 1 LCAT 1 ALPL 1 FRMPD2B 1 SLC15A3 1 NFYC 1 ELMOD3 1 ZNF200 1 RILP 1 SEC24A 1 MTCH1 1 ZNF670 1 NDN 1 DIP2A 1 NT5C3A 1 ENTPD5 1 PPHLN1 1 CDC20 1 OR2F1 1 TMEM183B 1 TOMM40 1 DLAT 1 MAP7D3 1 PHF10 1 TAAR5 1 UBE2J1 1 SVOPL 1 RNF19B 1 TBL1Y 1 CLEC16A 1 DEFB116 1 LGALS8 1 TLDC2 1 PDE4A 1 TTBK2 1 MEF2D 1 MARCHF5 1 ACSL6 1 RAB26 1 DENND2C 1 GRM1 1 HS3ST2 1 PSMA4 1 SHROOM3 1 KIF24 1 VIPAS39 1 MEIS1 1 SLC4A8 1 SLC5A4 1 UBA5 1 CPNE7 1 CDK5RAP2 1 GRHL3 1 SLC27A1 1 KLHL5 1 TDRD9 1 TRPV4 1 TRAV26-1 1 ATG4A 1 NXPH3 1 MPPED2 1 ARID5A 1 OR2T4 1 TOP3B 1 EIF2B1 1 SCRN1 1 CENPW 1 ZNF280D 1 BBS2 1 SMARCAL1 1 AGO3 1 MRC2 1 RPL3 1 NAALAD2 1 ZBTB12 1 PCDHGA8 1 FAM222B 1 P2RX1 1 INSL6 1 ACOT9 1 TRIM28 1 ZNF211 1 MYT1L 1 IFNA21 1 IRX5 1 ZNF765 1 GTDC1 1 PSENEN 1 XYLT1 1 CDH16 1 NR5A2 1 SPSB3 1 IGHG3 1 LUZP1 1 PIGR 1 EPHA2 1 SMIM8 1 KLK13 1 NUP37 1 METTL3 1 PRKAG2 1 EMC2 1 PPP1R13L 1 ACVR1B 1 TMPRSS11F 1 CEMIP2 1 SDSL 1 NGEF 1 BCORL1 1 PHLPP2 1 GIPC3 1 LRRC66 1 ZKSCAN3 1 PDP2 1 RIN1 1 PGR 1 MRPL4 1 HTR5A 1 PARP16 1 UNC13C 1 ARID5B 1 MTMR1 1 MMS22L 1 CEP112 1 ZNF749 1 MS4A14 1 SHC1 1 ASIC3 1 PIGK 1 OR6C68 1 PCDHAC2 1 CASKIN2 1 ADAM11 1 DNAJC21 1 SORBS3 1 MAPK8IP1 1 LY75 1 RAB1A 1 SLC6A17 1 WDR46 1 USP50 1 PLCL1 1 VWA2 1 SLC22A25 1 RIPOR1 1 ABCA4 1 SDHB 1 ALOX15B 1 EPS15L1 1 SERPINI1 1 MGAT4B 1 B4GAT1 1 TSR1 1 ZBED4 1 NPFFR2 1 IQSEC3 1 THEG 1 ZFP14 1 LPA 1 REXO2 1 EVX1 1 SLC17A1 1 NFS1 1 DYM 1 MAP3K20 1 STEAP1 1 SSX5 1 TXNRD2 1 WNT16 1 MYEF2 1 UBA2 1 TBC1D19 1 MAEL 1 TRPC4 1 NR6A1 1 FBXO39 1 MKLN1 1 GCM1 1 OR6V1 1 GRM2 1 RSBN1L 1 OSBPL9 1 OCEL1 1 CYP46A1 1 ZNF99 1 KIR3DL2 1 PRMT5 1 NRF1 1 ITGA7 1 MECOM 1 LIMK1 1 BAHCC1 1 DHX30 1 ITGAX 1 CLDN7 1 HSPA5 1 CAPN3 1 OR4A5 1 GCN1 1 PDXK 1 HSD17B11 1 KLHL31 1 ANLN 1 ENKD1 1 STAT6 1 BTNL9 1 KRT82 1 RBM25 1 ZCCHC12 1 ANKRD39 1 USH1C 1 ANKRD22 1 OGDH 1 DENND11 1 PTPN23 1 VARS2 1 ABCA2 1 PYGL 1 FBXO34 1 ILVBL 1 TRAV17 1 CCNB3 1 SELENOI 1 KRTAP19-2 1 ZW10 1 NUTM2D 1 ZNF280C 1 THOC2 1 OR5T2 1 ABCA6 1 DUS3L 1 GALNT10 1 CTNNA2 1 ARRDC3 1 CAMK1 1 TSPAN1 1 ANAPC5 1 PTPRU 1 MCF2 1 COLEC12 1 CAT 1 SYT16 1 OR8B12 1 PDZRN4 1 MIDN 1 UNC13B 1 RCOR2 1 GCKR 1 STK11 1 HIPK1 1 TTC4 1 SKA3 1 ADSL 1 BVES 1 DOK4 1 STRN 1 MFAP2 1 DLX4 1 PKP4 1 MYO3B 1 PGS1 1 XRCC6 1 TTYH2 1 PHLDB2 1 KRTAP5-9 1 ZBTB49 1 VWC2 1 NCAPD2 1 SYT10 1 ALDH8A1 1 EIF4ENIF1 1 WDR35 1 RPIA 1 VCPIP1 1 UGT2A1 1 HCN3 1 FAM221A 1 UBTFL2 1 ESPL1 1 PAK5 1 GAL3ST1 1 NPRL3 1 KLHDC7B 1 JOSD2 1 ADAM33 1 FAM78B 1 CD96 1 HEXD 1 NTSR1 1 USPL1 1 FAM135A 1 PDE4B 1 TET3 1 YIPF7 1 C2orf49 1 CSE1L 1 MIR450A1 1 PIP4K2B 1 DUSP12 1 KIAA1217 1 MAPK7 1 CACNG3 1 PTPRN 1 PEX11B 1 PCBD2 1 FASTKD5 1 SLC22A7 1 NRXN3 1 CAND1 1 FBXO28 1 FMO2 1 SMURF1 1 GTF2A1 1 SLC44A5 1 TOR3A 1 TENT5D 1 RABGAP1L 1 AASDHPPT 1 DPYD 1 IGKV1-12 1 CYTH3 1 LRTM2 1 RC3H2 1 PEAK1 1 OR4K2 1 CCDC68 1 OR5B12 1 TLN2 1 MTERF1 1 NIPAL2 1 AK8 1 PTH1R 1 DRD5 1 STAR 1 PLD5 1 BPIFC 1 SLC49A3 1 EWSR1 1 SNX7 1 RDH5 1 SARDH 1 VKORC1 1 OR10C1 1 NBPF20 1 NTF4 1 ALCAM 1 GSPT2 1 HNRNPU 1 GDAP1L1 1 CALD1 1 SAMD3 1 PTPN1 1 ITIH4 1 CA10 1 CARD11 1 SOX7 1 TNKS 1 AKR1B1 1 GFRAL 1 UTP14A 1 USP34 1 MCMDC2 1 YWHAB 1 PAGE5 1 KBTBD3 1 GPRIN1 1 PLAAT3 1 TMEM154 1 VEZT 1 WRNIP1 1 IARS2 1 PI15 1 OR5P2 1 NAALADL1 1 SMURF2 1 ZFPL1 1 SPTBN5 1 SLCO1C1 1 KRTAP15-1 1 DDX25 1 GLS 1 CTNS 1 DALRD3 1 MPC1 1 OR2H2 1 MSH2 1 SLC38A10 1 LRRIQ4 1 ITGA2 1 KPTN 1 HBE1 1 VPS37C 1 LGALS14 1 OR6K6 1 EPRS1 1 ARID1B 1 OR10AG1 1 RAB43 1 IL16 1 RUBCN 1 KLHL36 1 FRYL 1 DNAJC9 1 TRMT44 1 FN3K 1 CNGA4 1 SNX29 1 KRT27 1 DIO1 1 FAM151A 1 TENT4B 1 NOL8 1 C1orf122 1 AMZ2 1 RTF2 1 DOCK3 1 TLR2 1 DDX54 1 HSP90B1 1 SPATA32 1 CEP95 1 PRDM1 1 HIF1A 1 SEMA3C 1 ALAS1 1 ANO1 1 ZNF703 1 PROM2 1 IGHV3-49 1 FERMT1 1 NDUFV3 1 COASY 1 ZNF223 1 CPSF3 1 CCNT1 1 NRROS 1 PTPN7 1 PACS1 1 LARS1 1 GAB4 1 PLEKHH3 1 COL9A1 1 ANKS4B 1 LAMP1 1 RASGRP3 1 KLHL21 1 CDADC1 1 ALG5 1 DGKQ 1 PPP1R3D 1 FAM180A 1 QRSL1 1 TRIM37 1 TFPI 1 ABHD1 1 IMPG1 1 NR1I2 1 TTC7B 1 SLCO1B3-SLCO1B7 1 ABCB4 1 NUP50 1 ITGA3 1 ADO 1 ERMN 1 LONRF3 1 PTPRK 1 SRRT 1 TRAV21 1 CIAPIN1 1 ZNF155 1 CCDC93 1 PCDHB6 1 BPTF 1 ELAPOR2 1 KAT2B 1 RPH3A 1 USHBP1 1 PABPC5 1 STAT4 1 AC013489.1 1 PLD4 1 ATP6V0B 1 LRRTM1 1 F7 1 TRMO 1 DRP2 1 MAB21L3 1 TNFSF13 1 DHDH 1 TMPRSS11A 1 CCDC88C 1 CPLX2 1 KRT32 1 GGT1 1 UBXN6 1 IL18BP 1 MIR222 1 CLDN6 1 MIS12 1 MYLK3 1 CCL4L2 1 PGF 1 USP6NL 1 TIMELESS 1 KRTCAP2 1 TSEN15 1 UGT1A1 1 DNAJC16 1 ADAMTS17 1 JMJD7-PLA2G4B 1 CASP8AP2 1 AADACL4 1 SP1 1 GPRIN3 1 ZDHHC8 1 CENPC 1 OSBPL7 1 ADGRL4 1 FNIP1 1 PCDHB9 1 PGAP3 1 FYB2 1 RPS19BP1 1 TRANK1 1 TM2D2 1 YIPF4 1 THBS2 1 MDGA2 1 H1-0 1 PDE4DIP 1 NAA35 1 PHTF1 1 KLHL6 1 SLC5A7 1 CUL1 1 KCNG4 1 FPGS 1 AQP9 1 ABCA9 1 FOXRED2 1 SCAMP2 1 HAP1 1 CCT8L2 1 GAGE10 1 ANK1 1 GCLM 1 CLSTN2 1 USF3 1 MYBPHL 1 FOXB1 1 KLHL25 1 PCSK6 1 CYP2C19 1 NUSAP1 1 MYH8 1 APOBEC3B 1 ZNF708 1 ZHX1 1 ARRDC1 1 PPP1R3A 1 HPS4 1 SQOR 1 IQCF2 1 HOXD11 1 IMPA2 1 OR52J3 1 CNTNAP5 1 RFTN1 1 PLA2G4F 1 GPRC5B 1 CADM3 1 SLC28A2 1 ANKRD55 1 GALNTL5 1 DTX4 1 MCOLN1 1 CDKAL1 1 COL5A3 1 HTATIP2 1 ANKIB1 1 CRTC1 1 CAPN1 1 CLMN 1 ZNF396 1 BMPR2 1 ANKRD6 1 BDP1 1 NIP7 1 ELOVL1 1 COX7A2 1 DISP2 1 SERPINB10 1 HDAC10 1 CDC6 1 B3GALT2 1 USP7 1 CTCFL 1 UEVLD 1 EIF2B2 1 PRKG2 1 SCARA3 1 MAF 1 HYOU1 1 AXIN1 1 TUT1 1 TMC3 1 BAIAP2 1 CCDC40 1 FAM122C 1 PIGG 1 TAF4B 1 ESPN 1 OR6C4 1 TAFA2 1 PSMD3 1 INSRR 1 NCAPH 1 SVIL 1 ALKBH2 1 ODF3 1 CSF2RB 1 WDR5 1 UBR7 1 TAAR1 1 LRIG2 1 CCDC73 1 EPHA1 1 OR4A15 1 COP1 1 ZNF165 1 MRPS33 1 TNFAIP8L3 1 TEKT5 1 ETV3 1 MTBP 1 TANC2 1 DNAJC17 1 CDH10 1 ACTR8 1 ACTL7B 1 SIRT1 1 SLC6A13 1 TCEAL2 1 RALGAPB 1 RIMBP2 1 SELPLG 1 ELN 1 ARMCX3 1 ALG12 1 DGAT1 1 SH3GL2 1 GPR4 1 KTN1 1 RALYL 1 FAM160B1 1 FAM234B 1 ZNF207 1 HRC 1 CCDC3 1 BCAS3 1 LTA4H 1 ROR1 1 TRABD2A 1 PARPBP 1 PLCB3 1 PHB2 1 REEP2 1 TMED6 1 ZNF750 1 RAD23A 1 SELP 1 CNTRL 1 PCED1A 1 TCP11L1 1 MICB 1 SPATA33 1 P2RY2 1 MAP10 1 CASKIN1 1 ABCB5 1 ITSN1 1 SLC15A2 1 DKK1 1 MOB2 1 MAP3K6 1 NOXA1 1 PDILT 1 SAMHD1 1 PAK4 1 TSSK6 1 LRRC24 1 HNRNPK 1 SLC66A3 1 PCIF1 1 RAD50 1 ZNF17 1 COPG1 1 SAMD8 1 SLC7A4 1 PATJ 1 ARAP2 1 ATAD5 1 SHLD1 1 GPR183 1 OR2M4 1 NISCH 1 PTGS1 1 ACTN2 1 POLH 1 KRTAP10-11 1 DICER1 1 NOM1 1 PAPOLA 1 HLF 1 FYN 1 TFAP2C 1 ZNF320 1 MCM2 1 MRPS12 1 NANOG 1 DPY19L1 1 SNRPD2 1 CCDC9B 1 DNAH17 1 CCNY 1 AL353804.6 1 SORT1 1 SUOX 1 SUN2 1 PKNOX1 1 GREB1 1 PDLIM5 1 TUBA4A 1 RABL3 1 CCNI 1 INKA2 1 RASSF9 1 OTUD7A 1 SIX6 1 MAN2C1 1 NFKBIZ 1 SLC5A2 1 TSC1 1 TMEM104 1 PMFBP1 1 H2BC17 1 SLC2A1 1 WDR70 1 REST 1 RNF103 1 SYVN1 1 EYS 1 PRMT7 1 GIMAP6 1 CDC23 1 ALB 1 RPGRIP1L 1 PI4K2A 1 APOBEC1 1 PLEKHS1 1 HOXB4 1 SLC45A2 1 SKIV2L 1 LUC7L2 1 NPHS1 1 SLC9A5 1 MYEOV 1 HGS 1 PCDHGC5 1 ZNF675 1 ERAP2 1 ATP6V0A1 1 FIG4 1 ACIN1 1 CDK14 1 UGT2B11 1 GYPC 1 NSD2 1 ZRANB3 1 TECTB 1 PPP1R3C 1 TRAPPC9 1 CR1L 1 VPS35 1 SNRNP40 1 PLCB4 1 VSIG10 1 ADAM23 1 SLC2A8 1 EIF3M 1 XPO6 1 DNAI4 1 ARSG 1 UFL1 1 C10orf62 1 UBAP1 1 WDR48 1 DSG3 1 SPARCL1 1 SLCO1A2 1 ZFP37 1 SLC30A9 1 NAGPA 1 ECM2 1 MAST4 1 MRPS18B 1 NFATC2 1 CASS4 1 P4HA1 1 SPRR3 1 HIP1R 1 TLL2 1 RFC3 1 PRKD2 1 SMARCE1 1 LUZP2 1 JADE2 1 MINAR1 1 STX7 1 GRID1 1 SDHAF4 1 ENTHD1 1 UBQLN1 1 FGFR1 1 PRMT6 1 TMTC4 1 MMP8 1 PZP 1 ETFB 1 TARS3 1 WDR19 1 PCSK5 1 TMCC1 1 VANGL2 1 AC024940.1 1 B3GALT4 1 RBM34 1 DNAJB2 1 SPNS1 1 VASP 1 NKD1 1 BBX 1 TULP4 1 ALDH1L1 1 M1AP 1 NT5C2 1 ABCB1 1 MOB3C 1 ABCC12 1 KLHL15 1 POM121C 1 C1QTNF5 1 CNTNAP4 1 NBN 1 IYD 1 SERPIND1 1 OR10W1 1 VSIR 1 GALNT1 1 KRTAP4-12 1 GNB1L 1 CD28 1 MAST2 1 TALDO1 1 ZNF429 1 DNAJB4 1 HS6ST3 1 CCNDBP1 1 RAB3GAP1 1 KLF15 1 JPT2 1 ADH5 1 SLCO1B3 1 ATP2A2 1 HNRNPM 1 MRPL14 1 GRK2 1 ZZZ3 1 SLMAP 1 ABHD18 1 OR51L1 1 HNRNPUL2 1 AAMDC 1 TECRL 1 PRRG3 1 NDUFAF6 1 FKBP6 1 MYO19 1 GEMIN8 1 AP000769.5 1 CHST9 1 INTS13 1 TAF5L 1 CLEC18B 1 BIVM-ERCC5 1 ANAPC2 1 OR4K14 1 UNG 1 F2 1 CD274 1 SLC13A1 1 AREL1 1 ZNF302 1 CHST4 1 DDX42 1 NRG3 1 MTMR11 1 ZNF22 1 SFTPD 1 AKAP7 1 MAGEA5 1 AOPEP 1 ZNF217 1 NBEAL1 1 FUT10 1 SPRED1 1 MIS18A 1 HFM1 1 DUSP10 1 CDKL4 1 TTLL5 1 PTGER2 1 IRAK1BP1 1 CEP170 1 NHS 1 CNOT2 1 ANGPTL1 1 CCL3 1 PODXL 1 SEC23A 1 ZNF462 1 TBC1D12 1 ZNF385A 1 TAP1 1 MCPH1 1 SRSF7 1 FUT11 1 LZTS1 1 MSH4 1 NXPH4 1 C20orf194 1 RIPPLY3 1 NT5C1B 1 HPS6 1 GOLGA3 1 PI16 1 GPRASP2 1 TRIM2 1 CBLL1 1 COL4A4 1 PARP1 1 MPP3 1 ZNF556 1 IFT57 1 ATP8B2 1 AGTR1 1 TACR1 1 TECPR2 1 CCDC125 1 CCDC89 1 WDFY2 1 NR3C2 1 RBM45 1 TMEM145 1 GTF3C5 1 CD300E 1 SHH 1 PAFAH2 1 CYB5R3 1 IFIT3 1 UPP2 1 ANO4 1 NDC1 1 PLXNA1 1 ACVRL1 1 PMP22 1 RGP1 1 GIPR 1 MGAT5 1 LAMB2 1 C12orf40 1 FBXL13 1 WDR5B 1 ATP6V1B1 1 ZBTB20 1 KCNN4 1 ANKRD2 1 SLC10A3 1 SLC38A9 1 CACNA2D4 1 KRT75 1 HTR2C 1 RMND5A 1 ATF7IP2 1 FOXQ1 1 NLRP3 1 OR4C15 1 MXD4 1 TBL2 1 CCDC105 1 TAOK3 1 RPGR 1 WIPF2 1 ITM2B 1 SCNN1B 1 ANKRD12 1 STK17B 1 FNBP4 1 HAS3 1 CREBZF 1 PIK3CA 1 OR9G4 1 NEK11 1 KANSL2 1 CDON 1 CYC1 1 TRPC7 1 PIGL 1 LCLAT1 1 ZNF385B 1 WDR37 1 GDF15 1 RGS22 1 SLC25A53 1 COG2 1 H3C12 1
Text(0.5, 1.0, 'Number of mutations per case (if gene has 5 or more cases)')
vhl_and_ttn = [c for c, v in case_to_mutation.items() if 'VHL' in v and 'TTN' in v]
vhl_no_ttn = [c for c, v in case_to_mutation.items() if 'VHL' in v and 'TTN' not in v]
vhl_and_pbrm1 = [c for c, v in case_to_mutation.items() if 'VHL' in v and 'PBRM1' in v]
vhl_no_pbrm1 = [c for c, v in case_to_mutation.items() if 'VHL' in v and 'PBRM1' not in v]
pbrm1_no_vhl = [c for c, v in case_to_mutation.items() if 'VHL' not in v and 'PBRM1' in v]
vhl = [c for c, v in case_to_mutation.items() if 'VHL' in v and 'PBRM1' not in v and 'TTN' not in v]
ttn_no_vhl = [c for c, v in case_to_mutation.items() if 'VHL' not in v and 'TTN' in v]
vhl_ttn_pbrm1 = [c for c, v in case_to_mutation.items() if 'VHL' not in v and 'TTN' in v and 'PBRM1' in v]
pbrm1 = [c for c, v in case_to_mutation.items() if 'PBRM1' in v]
bap1 = [c for c, v in case_to_mutation.items() if 'BAP1' in v]
overlap = list(set(bap1) & set(pbrm1)) # Interesting!
print(overlap) # T
['TCGA-B0-5097', 'TCGA-B0-5096', 'TCGA-B0-5107', 'TCGA-B0-4842', 'TCGA-CZ-5470', 'TCGA-CJ-4923']
## Finally let's add a column to our clinical DF for each of the mutations
genes_of_interest = ['MTOR', 'VHL', 'SETD2', 'PBRM1', 'KDM5C', 'TTN', 'BAP1']
mutations_to_cases = {}
all_cases_w_muts = []
for g in genes_of_interest:
mutations_to_cases[g] = api.get_mutation_values_on_filter('case_id', [g], filter_col, exact=True)
all_cases_w_muts += mutations_to_cases[g]
for g in genes_of_interest:
mutation_col = []
for case in tcga_sample_df['tcga_case_id'].values:
if case in mutations_to_cases[g]:
mutation_col.append(1) # has mutation
else:
mutation_col.append(0)
tcga_sample_df[f'{g}_mutation'] = mutation_col
tcga_sample_df.to_csv(f'{output_dir}clinical_TCGA_13052022.csv', index=False)
/Users/ariane/opt/miniconda3/envs/clean_ml/lib/python3.6/site-packages/ipykernel_launcher.py:16: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy app.launch_new_instance()
tcga_sample_df
SampleId | CondId | CondName | SafeCases | FullLabel | TumorStage | gender | RaceGrouped | AgeGrouped | tcga_case_id | MTOR_mutation | VHL_mutation | SETD2_mutation | PBRM1_mutation | KDM5C_mutation | TTN_mutation | BAP1_mutation | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
case | |||||||||||||||||
db01e359-10a7-48c0-a742-656183e60ba8 | db01e359-10a7-48c0-a742-656183e60ba8_Normal | 0 | Normal | db01e359.10a7.48c0.a742.656183e60ba8 | CpG_Normal_db01e359.10a7.48c0.a742.656183e60ba8_1 | Stage II | Male | White | old | TCGA-BP-5199 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
03c62ae5-662b-493b-b7b9-1bdd1cfafb5d | 03c62ae5-662b-493b-b7b9-1bdd1cfafb5d_Normal | 0 | Normal | 03c62ae5.662b.493b.b7b9.1bdd1cfafb5d | CpG_Normal_03c62ae5.662b.493b.b7b9.1bdd1cfafb5d_1 | Stage III | Male | Blackorafricanamerican | middle | TCGA-CJ-4882 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
6942780c-b6c7-4534-b9b7-feda9cd04ecc | 6942780c-b6c7-4534-b9b7-feda9cd04ecc_Normal | 0 | Normal | 6942780c.b6c7.4534.b9b7.feda9cd04ecc | CpG_Normal_6942780c.b6c7.4534.b9b7.feda9cd04ecc_1 | Stage II | Female | White | old | TCGA-B0-4852 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ed7f6372-d820-43b9-bafa-7d83be8f66ec | ed7f6372-d820-43b9-bafa-7d83be8f66ec_Normal | 0 | Normal | ed7f6372.d820.43b9.bafa.7d83be8f66ec | CpG_Normal_ed7f6372.d820.43b9.bafa.7d83be8f66ec_1 | Stage I | Female | White | old | TCGA-CZ-4859 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
c474cee0-1117-4576-9e44-17938be10ca5 | c474cee0-1117-4576-9e44-17938be10ca5_Normal | 0 | Normal | c474cee0.1117.4576.9e44.17938be10ca5 | CpG_Normal_c474cee0.1117.4576.9e44.17938be10ca5_1 | Stage I | Male | White | middle | TCGA-BP-5170 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
f29b6c8c-d713-42ad-9b90-e556df9b05cd | f29b6c8c-d713-42ad-9b90-e556df9b05cd_Normal | 0 | Normal | f29b6c8c.d713.42ad.9b90.e556df9b05cd | CpG_Normal_f29b6c8c.d713.42ad.9b90.e556df9b05cd_1 | Stage III | Male | White | middle | TCGA-CZ-5458 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
cb371398-ee48-4665-8089-26229c5b2cf0 | cb371398-ee48-4665-8089-26229c5b2cf0_Normal | 0 | Normal | cb371398.ee48.4665.8089.26229c5b2cf0 | CpG_Normal_cb371398.ee48.4665.8089.26229c5b2cf0_1 | Stage IV | Male | White | old | TCGA-CZ-5464 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ac092a8e-80af-4589-8bb8-d86427b398ca | ac092a8e-80af-4589-8bb8-d86427b398ca_Normal | 0 | Normal | ac092a8e.80af.4589.8bb8.d86427b398ca | CpG_Normal_ac092a8e.80af.4589.8bb8.d86427b398ca_1 | Stage I | Male | Blackorafricanamerican | middle | TCGA-A3-3376 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
318f6ffb-1fdf-4f74-90db-21e337503aae | 318f6ffb-1fdf-4f74-90db-21e337503aae_Normal | 0 | Normal | 318f6ffb.1fdf.4f74.90db.21e337503aae | CpG_Normal_318f6ffb.1fdf.4f74.90db.21e337503aae_1 | Stage III | Male | White | middle | TCGA-B0-4810 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ef9ae1dd-83aa-404d-83ef-ced707ae738b | ef9ae1dd-83aa-404d-83ef-ced707ae738b_Normal | 0 | Normal | ef9ae1dd.83aa.404d.83ef.ced707ae738b | CpG_Normal_ef9ae1dd.83aa.404d.83ef.ced707ae738b_1 | Stage IV | Male | White | old | TCGA-B0-4712 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
151 rows × 17 columns