DE analysis between conditions (time)

Time condition comparison AP-axis

Notebook for performing the DESeq2 analysis

Here we perform DEseq2 analysis for each condition comparison using time as a factor (i.e. testing DE at each timpoint between conditions).

This is performed on each condition separately.

library(DESeq2)
Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply,
    parSapply, parSapplyLB

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl,
    intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
    rownames, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which, which.max, which.min


Attaching package: ‘S4Vectors’

The following object is masked from ‘package:base’:

    expand.grid

Loading required package: IRanges
Loading required package: GenomicRanges
Loading required package: GenomeInfoDb
Loading required package: SummarizedExperiment
Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages
    'citation("pkgname")'.

Loading required package: DelayedArray
Loading required package: matrixStats

Attaching package: ‘matrixStats’

The following objects are masked from ‘package:Biobase’:

    anyMissing, rowMedians


Attaching package: ‘DelayedArray’

The following objects are masked from ‘package:matrixStats’:

    colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges

The following objects are masked from ‘package:base’:

    aperm, apply, rowsum
project_dir <- '../data/results/deseq2/'

date <- '20210124'


runDeseq2BetweenCondTime <- function (filename, output_filename) {

    file_path <- paste(project_dir, filename, sep='')
  
    print(paste("========================== RUNNING ", filename, " ============================", sep=""))
      # https://bioconductor.org/packages/release/workflows/vignettes/rnaseqGene/inst/doc/rnaseqGene.html
    counts <- read.csv(file_path, header = TRUE, sep = ",")
    rownames(counts) <- counts$u_id

    # Let's make sure our count data is in matrix format and is only the numeric columns i.e. everything but the genes
    counts <- counts[,2:ncol(counts)]

    sample_names <- colnames(counts) # Sample names
    
    # Make sure we don't include the ID in our columns
    count_matrix <- as.matrix(counts)
    
    # We now set the row names to be the gene IDs
    rownames(count_matrix) <- rownames(counts) 
    
    # Separate out each of the sample names to be the different experiment conditions
    condition <- factor(sapply(sample_names, function(x){substr(x, start = 1, stop = 2)}))
    time <- factor(sapply(sample_names, function(x){substr(x, start = 3, stop = 4)}))
    tissue <- factor(sapply(sample_names, function(x){substr(x, start = 5, stop = 6)}))
    condition_id <- factor(sapply(sample_names, function(x){if (grepl("ko", x, fixed = TRUE)) {1} else {0}}))

    # For DEseq2 we need to turn this into a dataframe 
    sample_df = data.frame(sample_names = sample_names, condition = condition, time = time, tissue = tissue, condition_id=condition_id)
    dds_mat <- DESeqDataSetFromMatrix(countData = count_matrix,
                                     colData = sample_df,
                                     design = ~tissue+condition_id) # Have tissue as a factor
    
    dds <- estimateSizeFactors(dds_mat)
    
    dds_mat$design # Print the design of the experiment
    dds <- estimateSizeFactors(dds_mat)
    
    num_samples_meeting_criteria <- 4  # be strict and enforce that at least half the samples need to meet the criteria (i.e. one full condition)
    num_counts_in_gene <- 10  # They need at least 10 counts
    keep <- rowSums(counts(dds_mat) >= num_counts_in_gene) >= num_samples_meeting_criteria
    dds <- dds_mat[keep,] # Only keep the rows with this criteria
    
     # Let's print the number of rows
    print(paste("Dataset dimensions: ", nrow(dds), ncol(dds)))
    if (ncol(dds) != 8) {
      print(paste("================== WARNING WARNING WARNING YOUR COLUMNS MAY HAVE THE WRONG DIMS  ===========================", sep=""))
    }
    
    # Run DEseq2
    dds <- DESeq(dds)
    
    # Build results table
    res <- results(dds)
    print(paste("Deseq2 design: ", design(dds)))

    # Sumarise the results
    summary(res)
    
    # Lastly, we may want to see the results of the high logfoldchange e.g. > 1 with a padj value < 0.05
    res_padj05_lfc1 <- results(dds, lfcThreshold=2)
    table(res_padj05_lfc1$padj < 0.05)
    
    # Save the results
    res_ordered <- res[order(res$pvalue),]
    output_filename <- paste(project_dir, output_filename, sep='')
    write.csv(res_ordered, file = output_filename)
    return(res_ordered)
}

Run each of the supplimentary DEseq 2 experiments

runDeseq2BetweenCondTime(paste('merged_df_posterior_11_FEATURE_COUNTS_', date, '.csv', sep=''), paste('DEseq2_CNS_posterior_11_', date, '.csv', sep=''))
[1] "========================== RUNNING merged_df_posterior_11_FEATURE_COUNTS_20210124.csv ============================"
[1] "Dataset dimensions:  14884 8"
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
[1] "Deseq2 design:  ~"                     "Deseq2 design:  tissue + condition_id"

out of 14884 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 62, 0.42%
LFC < 0 (down)     : 8, 0.054%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 7)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

log2 fold change (MLE): condition id 1 vs 0 
Wald test p-value: condition id 1 vs 0 
DataFrame with 14884 rows and 6 columns
              baseMean log2FoldChange     lfcSE         stat      pvalue        padj
             <numeric>      <numeric> <numeric>    <numeric>   <numeric>   <numeric>
12578-Cdkn2a  151.5313       7.221034  0.747206      9.66405 4.28578e-22 6.37895e-18
12151-Bmi1   6077.3530       0.752335  0.100942      7.45316 9.11332e-14 6.78213e-10
24113-Vax2    129.7000       4.444877  0.627088      7.08812 1.35943e-12 6.74457e-09
12032-Bcan     97.9164       1.873976  0.287800      6.51137 7.44661e-11 2.77088e-07
78593-Nrip3    78.3483       1.311626  0.236568      5.54439 2.94974e-08 8.78078e-05
...                ...            ...       ...          ...         ...         ...
22427-Wrn     1088.967   -4.81832e-05 0.0788431 -6.11127e-04    0.999512    0.999781
22152-Tubb3  44012.946   -1.12213e-04 0.2387785 -4.69947e-04    0.999625    0.999827
50793-Orc3    3436.838   -1.16617e-05 0.0560489 -2.08063e-04    0.999834    0.999935
66795-Atg10    216.396    3.51760e-05 0.2118081  1.66075e-04    0.999867    0.999935
54394-Crlf3   1325.691   -1.33851e-05 0.1631289 -8.20524e-05    0.999935    0.999935
runDeseq2BetweenCondTime(paste('merged_df_posterior_13_FEATURE_COUNTS_', date, '.csv', sep=''), paste('DEseq2_CNS_posterior_13_', date, '.csv', sep=''))
[1] "========================== RUNNING merged_df_posterior_13_FEATURE_COUNTS_20210124.csv ============================"
[1] "Dataset dimensions:  15043 8"
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
[1] "Deseq2 design:  ~"                     "Deseq2 design:  tissue + condition_id"

out of 15043 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 4156, 28%
LFC < 0 (down)     : 3573, 24%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 7)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

log2 fold change (MLE): condition id 1 vs 0 
Wald test p-value: condition id 1 vs 0 
DataFrame with 15043 rows and 6 columns
               baseMean log2FoldChange     lfcSE        stat       pvalue         padj
              <numeric>      <numeric> <numeric>   <numeric>    <numeric>    <numeric>
245527-Eda2r    391.948       3.392528 0.1317647     25.7469 3.49473e-146 5.25712e-142
320429-Trank1  4195.667       2.038323 0.0946236     21.5414 6.37894e-103  4.79792e-99
20689-Sall3    5032.493      -0.902797 0.0424928    -21.2459 3.59635e-100  1.80333e-96
21349-Tal1     4476.607       2.585642 0.1391089     18.5872  4.08083e-77  1.53470e-73
13626-Eed      1077.450      -1.180654 0.0643553    -18.3459  3.56212e-75  1.07170e-71
...                 ...            ...       ...         ...          ...          ...
53970-Rfx5     745.6879   -1.72120e-04 0.0654757 -0.00262876     0.997903     0.998168
243277-Adgrd1   19.7514    1.12995e-03 0.4932991  0.00229060     0.998172     0.998371
30050-Fbxw2   2049.3965    9.23083e-05 0.0494430  0.00186696     0.998510     0.998643
26425-Nubp1   1187.6584    8.07616e-05 0.0507485  0.00159141     0.998730     0.998797
71952-Riox1    934.3996    7.84622e-05 0.0686373  0.00114314     0.999088     0.999088
runDeseq2BetweenCondTime(paste('merged_df_posterior_15_FEATURE_COUNTS_', date, '.csv', sep=''), paste('DEseq2_CNS_posterior_15_', date, '.csv', sep=''))
[1] "========================== RUNNING merged_df_posterior_15_FEATURE_COUNTS_20210124.csv ============================"
[1] "Dataset dimensions:  15643 8"
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
[1] "Deseq2 design:  ~"                     "Deseq2 design:  tissue + condition_id"

out of 15643 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 833, 5.3%
LFC < 0 (down)     : 362, 2.3%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 6)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

log2 fold change (MLE): condition id 1 vs 0 
Wald test p-value: condition id 1 vs 0 
DataFrame with 15643 rows and 6 columns
               baseMean log2FoldChange     lfcSE         stat      pvalue        padj
              <numeric>      <numeric> <numeric>    <numeric>   <numeric>   <numeric>
12151-Bmi1     9272.095        1.14814 0.0614868      18.6730 8.20976e-78 1.28425e-73
20476-Six6      928.288        9.78025 0.5359886      18.2471 2.18109e-74 1.70594e-70
21349-Tal1     6417.605        3.44848 0.1938262      17.7916 8.21077e-71 4.28137e-67
24113-Vax2      567.385        8.73584 0.5009278      17.4393 4.15027e-68 1.62307e-64
12450-Ccng1    5613.820        1.22610 0.0735476      16.6709 2.13410e-62 6.67676e-59
...                 ...            ...       ...          ...         ...         ...
51799-Rundc3a  11073.31   -1.37210e-04 0.1776573 -0.000772331    0.999384    0.999639
67398-Srpr      4379.06    8.45541e-05 0.1567136  0.000539545    0.999570    0.999761
434130-Ccdc8    1149.38   -1.12418e-04 0.3515756 -0.000319754    0.999745    0.999873
20333-Sec22b    3436.15    1.84246e-05 0.0855527  0.000215360    0.999828    0.999892
66241-Tmem9     4085.38   -1.26315e-05 0.1002142 -0.000126045    0.999899    0.999899
runDeseq2BetweenCondTime(paste('merged_df_posterior_18_FEATURE_COUNTS_', date, '.csv', sep=''), paste('DEseq2_CNS_posterior_18_', date, '.csv', sep=''))
[1] "========================== RUNNING merged_df_posterior_18_FEATURE_COUNTS_20210124.csv ============================"
[1] "Dataset dimensions:  15498 8"
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
[1] "Deseq2 design:  ~"                     "Deseq2 design:  tissue + condition_id"

out of 15498 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 2264, 15%
LFC < 0 (down)     : 1678, 11%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 7)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

log2 fold change (MLE): condition id 1 vs 0 
Wald test p-value: condition id 1 vs 0 
DataFrame with 15498 rows and 6 columns
               baseMean log2FoldChange     lfcSE         stat       pvalue         padj
              <numeric>      <numeric> <numeric>    <numeric>    <numeric>    <numeric>
21349-Tal1     3243.017        3.13064 0.1010915      30.9684 1.43745e-210 2.22776e-206
21375-Tbr1      425.520        5.09250 0.2149681      23.6896 4.62003e-124 3.58006e-120
15228-Foxg1     397.421        5.90634 0.2518970      23.4474 1.40376e-121 7.25184e-118
21386-Tbx3      743.201        1.48186 0.0731583      20.2555  3.17929e-91  1.23182e-87
13798-En1       720.597        2.16826 0.1094159      19.8167  2.13834e-87  6.62800e-84
...                 ...            ...       ...          ...          ...          ...
74362-Spag17    44.1697    1.69115e-04 0.2880849  5.87033e-04     0.999532      0.99979
240614-Ranbp6 3808.2142   -1.04189e-05 0.0818811 -1.27244e-04     0.999898      0.99999
381045-Ccdc58  464.5467   -7.41797e-06 0.0877941 -8.44928e-05     0.999933      0.99999
408062-Zfp873  311.6448   -9.81615e-06 0.1367160 -7.17996e-05     0.999943      0.99999
107272-Psat1  3831.9073   -7.48832e-07 0.0596016 -1.25640e-05     0.999990      0.99999
runDeseq2BetweenCondTime(paste('merged_df_anterior_11_FEATURE_COUNTS_', date, '.csv', sep=''), paste('DEseq2_CNS_anterior_11_', date, '.csv', sep=''))
[1] "========================== RUNNING merged_df_anterior_11_FEATURE_COUNTS_20210124.csv ============================"
[1] "Dataset dimensions:  14784 8"
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
[1] "Deseq2 design:  ~"                     "Deseq2 design:  tissue + condition_id"

out of 14784 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 114, 0.77%
LFC < 0 (down)     : 20, 0.14%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 7)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

log2 fold change (MLE): condition id 1 vs 0 
Wald test p-value: condition id 1 vs 0 
DataFrame with 14784 rows and 6 columns
               baseMean log2FoldChange     lfcSE         stat      pvalue        padj
              <numeric>      <numeric> <numeric>    <numeric>   <numeric>   <numeric>
12578-Cdkn2a   165.2090        7.62576  0.740688     10.29550 7.38331e-25 1.09155e-20
24113-Vax2     144.5959        4.33845  0.517341      8.38605 5.02762e-17 3.71642e-13
15416-Hoxb8    706.0535        7.22790  0.887593      8.14326 3.84765e-16 1.89612e-12
15415-Hoxb7    159.9976        8.40976  1.058035      7.94847 1.88830e-15 6.97916e-12
216166-Plk5     43.4126        2.62745  0.357956      7.34015 2.13353e-13 6.30842e-10
...                 ...            ...       ...          ...         ...         ...
69938-Scrn1    2555.668   -1.05523e-04 0.1885943 -5.59521e-04    0.999554    0.999824
14760-Gpr19     681.998   -5.59130e-05 0.1671797 -3.34449e-04    0.999733    0.999913
56399-Akap8    7818.582   -2.02222e-05 0.0724646 -2.79063e-04    0.999777    0.999913
66234-Msmo1    5336.486   -1.20719e-05 0.1118192 -1.07960e-04    0.999914    0.999982
213499-Fbxo42  2736.754    7.37334e-07 0.0625216  1.17933e-05    0.999991    0.999991
runDeseq2BetweenCondTime(paste('merged_df_anterior_13_FEATURE_COUNTS_', date, '.csv', sep=''), paste('DEseq2_CNS_anterior_13_', date, '.csv', sep=''))
[1] "========================== RUNNING merged_df_anterior_13_FEATURE_COUNTS_20210124.csv ============================"
[1] "Dataset dimensions:  14986 8"
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
[1] "Deseq2 design:  ~"                     "Deseq2 design:  tissue + condition_id"

out of 14986 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 4377, 29%
LFC < 0 (down)     : 4218, 28%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 6)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

log2 fold change (MLE): condition id 1 vs 0 
Wald test p-value: condition id 1 vs 0 
DataFrame with 14986 rows and 6 columns
                 baseMean log2FoldChange     lfcSE        stat       pvalue         padj
                <numeric>      <numeric> <numeric>   <numeric>    <numeric>    <numeric>
15410-Hoxb3       1231.78        7.85710 0.2392313     32.8431 1.42771e-236 2.13957e-232
76161-Lamp5       3010.13        4.87498 0.1524921     31.9687 2.96872e-224 2.22446e-220
12151-Bmi1        7011.44        1.32665 0.0432139     30.6997 5.74892e-207 2.87178e-203
20675-Sox3        2633.26       -2.16810 0.0713168    -30.4010 5.33114e-203 1.99731e-199
27280-Phlda3      1033.35        2.10388 0.0731979     28.7424 1.12668e-181 3.37689e-178
...                   ...            ...       ...         ...          ...          ...
56378-Arpc3    4334.18203   -7.40490e-05 0.0431619 -0.00171561     0.998631     0.998898
13419-Dnase1    308.12970   -1.72880e-04 0.1078681 -0.00160270     0.998721     0.998921
435811-Ldlrad2    8.81715   -9.24037e-04 0.7643027 -0.00120899     0.999035     0.999152
114606-Tle6      76.59359   -3.46786e-04 0.3091133 -0.00112187     0.999105     0.999152
77286-Nkrf     1131.81702   -7.28793e-05 0.0685811 -0.00106267     0.999152     0.999152
runDeseq2BetweenCondTime(paste('merged_df_anterior_15_FEATURE_COUNTS_', date, '.csv', sep=''), paste('DEseq2_CNS_anterior_15_', date, '.csv', sep=''))
[1] "========================== RUNNING merged_df_anterior_15_FEATURE_COUNTS_20210124.csv ============================"
[1] "Dataset dimensions:  15508 8"
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
[1] "Deseq2 design:  ~"                     "Deseq2 design:  tissue + condition_id"

out of 15508 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 2610, 17%
LFC < 0 (down)     : 1886, 12%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 6)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

log2 fold change (MLE): condition id 1 vs 0 
Wald test p-value: condition id 1 vs 0 
DataFrame with 15508 rows and 6 columns
                 baseMean log2FoldChange     lfcSE         stat       pvalue         padj
                <numeric>      <numeric> <numeric>    <numeric>    <numeric>    <numeric>
15422-Hoxc13     2270.819        9.44095  0.387312      24.3756 3.10466e-131 4.81471e-127
15402-Hoxa5       555.272        6.93023  0.299430      23.1447 1.64275e-118 1.27379e-114
15438-Hoxd9       751.546        7.83114  0.347136      22.5593 1.08902e-112 5.62953e-109
15413-Hoxb5       499.452        7.06447  0.320937      22.0120 2.20927e-107 8.56535e-104
15430-Hoxd10     1265.550        8.51812  0.390317      21.8236 1.38586e-105 4.29839e-102
...                   ...            ...       ...          ...          ...          ...
243910-Nfkbid     107.084   -1.84327e-04 0.1771694 -0.001040401     0.999170     0.999428
67544-Fam120b    4057.323    6.37699e-05 0.0862637  0.000739244     0.999410     0.999604
211401-Mtss1    12460.250   -5.98453e-05 0.1018736 -0.000587447     0.999531     0.999660
17354-Mllt10     3226.316   -4.84064e-05 0.1046439 -0.000462582     0.999631     0.999694
216177-AU041133   360.541   -4.89877e-05 0.1275424 -0.000384090     0.999694     0.999694
runDeseq2BetweenCondTime(paste('merged_df_anterior_18_FEATURE_COUNTS_', date, '.csv', sep=''), paste('DEseq2_CNS_anterior_18_', date, '.csv', sep=''))
[1] "========================== RUNNING merged_df_anterior_18_FEATURE_COUNTS_20210124.csv ============================"
[1] "Dataset dimensions:  15442 8"
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
[1] "Deseq2 design:  ~"                     "Deseq2 design:  tissue + condition_id"

out of 15442 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 4191, 27%
LFC < 0 (down)     : 4123, 27%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 7)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

log2 fold change (MLE): condition id 1 vs 0 
Wald test p-value: condition id 1 vs 0 
DataFrame with 15442 rows and 6 columns
                       baseMean log2FoldChange     lfcSE         stat       pvalue         padj
                      <numeric>      <numeric> <numeric>    <numeric>    <numeric>    <numeric>
209448-Hoxc10          1311.682        8.75973  0.311277      28.1412 3.06769e-174 4.73712e-170
22160-Twist1            761.408        3.46965  0.135393      25.6265 7.73754e-145 5.97415e-141
15422-Hoxc13           1462.927        9.65259  0.399628      24.1539 6.79110e-129 3.49561e-125
15430-Hoxd10            746.998        8.50237  0.380516      22.3443 1.37223e-110 5.29748e-107
245527-Eda2r            484.995        2.99488  0.134576      22.2541 1.02906e-109 3.17813e-106
...                         ...            ...       ...          ...          ...          ...
240174-Thada          1018.2475   -1.52725e-04 0.1368979 -0.001115611     0.999110     0.999331
55949-Eef1b2         13542.3669   -9.08883e-05 0.0840355 -0.001081547     0.999137     0.999331
442803-A830005F24Rik    34.6784   -2.30270e-04 0.3431027 -0.000671140     0.999465     0.999561
26373-Clcn7           2264.3380   -4.07776e-05 0.0648996 -0.000628318     0.999499     0.999561
11606-Agt              254.0168    5.14070e-04 0.9340988  0.000550338     0.999561     0.999561

References:

Love MI, Huber W, Anders S (2014). “Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2.” Genome Biology, 15, 550. doi: 10.1186/s13059-014-0550-8.

LS0tCnRpdGxlOiAiVGltZSBjb25kaXRpb24gY29tcGFyaXNvbiBBUC1heGlzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCiMjIE5vdGVib29rIGZvciBwZXJmb3JtaW5nIHRoZSBERVNlcTIgYW5hbHlzaXMKCkhlcmUgd2UgcGVyZm9ybSBERXNlcTIgYW5hbHlzaXMgZm9yIGVhY2ggY29uZGl0aW9uIGNvbXBhcmlzb24gdXNpbmcgdGltZSBhcyBhIGZhY3RvciAoaS5lLiB0ZXN0aW5nIERFIGF0IGVhY2ggdGltcG9pbnQgYmV0d2VlbiBjb25kaXRpb25zKS4KClRoaXMgaXMgcGVyZm9ybWVkIG9uIGVhY2ggY29uZGl0aW9uIHNlcGFyYXRlbHkuCgoKCmBgYHtyfQpsaWJyYXJ5KERFU2VxMikKCnByb2plY3RfZGlyIDwtICcuLi9kYXRhL3Jlc3VsdHMvZGVzZXEyLycKCmRhdGUgPC0gJzIwMjEwMTI0JwoKCnJ1bkRlc2VxMkJldHdlZW5Db25kVGltZSA8LSBmdW5jdGlvbiAoZmlsZW5hbWUsIG91dHB1dF9maWxlbmFtZSkgewoKICAgIGZpbGVfcGF0aCA8LSBwYXN0ZShwcm9qZWN0X2RpciwgZmlsZW5hbWUsIHNlcD0nJykKICAKICAgIHByaW50KHBhc3RlKCI9PT09PT09PT09PT09PT09PT09PT09PT09PSBSVU5OSU5HICIsIGZpbGVuYW1lLCAiID09PT09PT09PT09PT09PT09PT09PT09PT09PT0iLCBzZXA9IiIpKQogICAgICAjIGh0dHBzOi8vYmlvY29uZHVjdG9yLm9yZy9wYWNrYWdlcy9yZWxlYXNlL3dvcmtmbG93cy92aWduZXR0ZXMvcm5hc2VxR2VuZS9pbnN0L2RvYy9ybmFzZXFHZW5lLmh0bWwKICAgIGNvdW50cyA8LSByZWFkLmNzdihmaWxlX3BhdGgsIGhlYWRlciA9IFRSVUUsIHNlcCA9ICIsIikKICAgIHJvd25hbWVzKGNvdW50cykgPC0gY291bnRzJHVfaWQKCiAgICAjIExldCdzIG1ha2Ugc3VyZSBvdXIgY291bnQgZGF0YSBpcyBpbiBtYXRyaXggZm9ybWF0IGFuZCBpcyBvbmx5IHRoZSBudW1lcmljIGNvbHVtbnMgaS5lLiBldmVyeXRoaW5nIGJ1dCB0aGUgZ2VuZXMKICAgIGNvdW50cyA8LSBjb3VudHNbLDI6bmNvbChjb3VudHMpXQoKICAgIHNhbXBsZV9uYW1lcyA8LSBjb2xuYW1lcyhjb3VudHMpICMgU2FtcGxlIG5hbWVzCiAgICAKICAgICMgTWFrZSBzdXJlIHdlIGRvbid0IGluY2x1ZGUgdGhlIElEIGluIG91ciBjb2x1bW5zCiAgICBjb3VudF9tYXRyaXggPC0gYXMubWF0cml4KGNvdW50cykKICAgIAogICAgIyBXZSBub3cgc2V0IHRoZSByb3cgbmFtZXMgdG8gYmUgdGhlIGdlbmUgSURzCiAgICByb3duYW1lcyhjb3VudF9tYXRyaXgpIDwtIHJvd25hbWVzKGNvdW50cykgCiAgICAKICAgICMgU2VwYXJhdGUgb3V0IGVhY2ggb2YgdGhlIHNhbXBsZSBuYW1lcyB0byBiZSB0aGUgZGlmZmVyZW50IGV4cGVyaW1lbnQgY29uZGl0aW9ucwogICAgY29uZGl0aW9uIDwtIGZhY3RvcihzYXBwbHkoc2FtcGxlX25hbWVzLCBmdW5jdGlvbih4KXtzdWJzdHIoeCwgc3RhcnQgPSAxLCBzdG9wID0gMil9KSkKICAgIHRpbWUgPC0gZmFjdG9yKHNhcHBseShzYW1wbGVfbmFtZXMsIGZ1bmN0aW9uKHgpe3N1YnN0cih4LCBzdGFydCA9IDMsIHN0b3AgPSA0KX0pKQogICAgdGlzc3VlIDwtIGZhY3RvcihzYXBwbHkoc2FtcGxlX25hbWVzLCBmdW5jdGlvbih4KXtzdWJzdHIoeCwgc3RhcnQgPSA1LCBzdG9wID0gNil9KSkKICAgIGNvbmRpdGlvbl9pZCA8LSBmYWN0b3Ioc2FwcGx5KHNhbXBsZV9uYW1lcywgZnVuY3Rpb24oeCl7aWYgKGdyZXBsKCJrbyIsIHgsIGZpeGVkID0gVFJVRSkpIHsxfSBlbHNlIHswfX0pKQoKICAgICMgRm9yIERFc2VxMiB3ZSBuZWVkIHRvIHR1cm4gdGhpcyBpbnRvIGEgZGF0YWZyYW1lIAogICAgc2FtcGxlX2RmID0gZGF0YS5mcmFtZShzYW1wbGVfbmFtZXMgPSBzYW1wbGVfbmFtZXMsIGNvbmRpdGlvbiA9IGNvbmRpdGlvbiwgdGltZSA9IHRpbWUsIHRpc3N1ZSA9IHRpc3N1ZSwgY29uZGl0aW9uX2lkPWNvbmRpdGlvbl9pZCkKICAgIGRkc19tYXQgPC0gREVTZXFEYXRhU2V0RnJvbU1hdHJpeChjb3VudERhdGEgPSBjb3VudF9tYXRyaXgsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2xEYXRhID0gc2FtcGxlX2RmLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGVzaWduID0gfnRpc3N1ZStjb25kaXRpb25faWQpICMgSGF2ZSB0aXNzdWUgYXMgYSBmYWN0b3IKICAgIAogICAgZGRzIDwtIGVzdGltYXRlU2l6ZUZhY3RvcnMoZGRzX21hdCkKICAgIAogICAgZGRzX21hdCRkZXNpZ24gIyBQcmludCB0aGUgZGVzaWduIG9mIHRoZSBleHBlcmltZW50CiAgICBkZHMgPC0gZXN0aW1hdGVTaXplRmFjdG9ycyhkZHNfbWF0KQogICAgCiAgICBudW1fc2FtcGxlc19tZWV0aW5nX2NyaXRlcmlhIDwtIDQgICMgYmUgc3RyaWN0IGFuZCBlbmZvcmNlIHRoYXQgYXQgbGVhc3QgaGFsZiB0aGUgc2FtcGxlcyBuZWVkIHRvIG1lZXQgdGhlIGNyaXRlcmlhIChpLmUuIG9uZSBmdWxsIGNvbmRpdGlvbikKICAgIG51bV9jb3VudHNfaW5fZ2VuZSA8LSAxMCAgIyBUaGV5IG5lZWQgYXQgbGVhc3QgMTAgY291bnRzCiAgICBrZWVwIDwtIHJvd1N1bXMoY291bnRzKGRkc19tYXQpID49IG51bV9jb3VudHNfaW5fZ2VuZSkgPj0gbnVtX3NhbXBsZXNfbWVldGluZ19jcml0ZXJpYQogICAgZGRzIDwtIGRkc19tYXRba2VlcCxdICMgT25seSBrZWVwIHRoZSByb3dzIHdpdGggdGhpcyBjcml0ZXJpYQogICAgCiAgICAgIyBMZXQncyBwcmludCB0aGUgbnVtYmVyIG9mIHJvd3MKICAgIHByaW50KHBhc3RlKCJEYXRhc2V0IGRpbWVuc2lvbnM6ICIsIG5yb3coZGRzKSwgbmNvbChkZHMpKSkKICAgIGlmIChuY29sKGRkcykgIT0gOCkgewogICAgICBwcmludChwYXN0ZSgiPT09PT09PT09PT09PT09PT09IFdBUk5JTkcgV0FSTklORyBXQVJOSU5HIFlPVVIgQ09MVU1OUyBNQVkgSEFWRSBUSEUgV1JPTkcgRElNUyAgPT09PT09PT09PT09PT09PT09PT09PT09PT09Iiwgc2VwPSIiKSkKICAgIH0KICAgIAogICAgIyBSdW4gREVzZXEyCiAgICBkZHMgPC0gREVTZXEoZGRzKQogICAgCiAgICAjIEJ1aWxkIHJlc3VsdHMgdGFibGUKICAgIHJlcyA8LSByZXN1bHRzKGRkcykKICAgIHByaW50KHBhc3RlKCJEZXNlcTIgZGVzaWduOiAiLCBkZXNpZ24oZGRzKSkpCgogICAgIyBTdW1hcmlzZSB0aGUgcmVzdWx0cwogICAgc3VtbWFyeShyZXMpCiAgICAKICAgICMgTGFzdGx5LCB3ZSBtYXkgd2FudCB0byBzZWUgdGhlIHJlc3VsdHMgb2YgdGhlIGhpZ2ggbG9nZm9sZGNoYW5nZSBlLmcuID4gMSB3aXRoIGEgcGFkaiB2YWx1ZSA8IDAuMDUKICAgIHJlc19wYWRqMDVfbGZjMSA8LSByZXN1bHRzKGRkcywgbGZjVGhyZXNob2xkPTIpCiAgICB0YWJsZShyZXNfcGFkajA1X2xmYzEkcGFkaiA8IDAuMDUpCiAgICAKICAgICMgU2F2ZSB0aGUgcmVzdWx0cwogICAgcmVzX29yZGVyZWQgPC0gcmVzW29yZGVyKHJlcyRwdmFsdWUpLF0KICAgIG91dHB1dF9maWxlbmFtZSA8LSBwYXN0ZShwcm9qZWN0X2Rpciwgb3V0cHV0X2ZpbGVuYW1lLCBzZXA9JycpCiAgICB3cml0ZS5jc3YocmVzX29yZGVyZWQsIGZpbGUgPSBvdXRwdXRfZmlsZW5hbWUpCiAgICByZXR1cm4ocmVzX29yZGVyZWQpCn0KCmBgYAoKCiMjIFJ1biBlYWNoIG9mIHRoZSBzdXBwbGltZW50YXJ5IERFc2VxIDIgZXhwZXJpbWVudHMKCmBgYHtyfQpydW5EZXNlcTJCZXR3ZWVuQ29uZFRpbWUocGFzdGUoJ21lcmdlZF9kZl9wb3N0ZXJpb3JfMTFfRkVBVFVSRV9DT1VOVFNfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpLCBwYXN0ZSgnREVzZXEyX0NOU19wb3N0ZXJpb3JfMTFfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpKQpydW5EZXNlcTJCZXR3ZWVuQ29uZFRpbWUocGFzdGUoJ21lcmdlZF9kZl9wb3N0ZXJpb3JfMTNfRkVBVFVSRV9DT1VOVFNfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpLCBwYXN0ZSgnREVzZXEyX0NOU19wb3N0ZXJpb3JfMTNfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpKQpydW5EZXNlcTJCZXR3ZWVuQ29uZFRpbWUocGFzdGUoJ21lcmdlZF9kZl9wb3N0ZXJpb3JfMTVfRkVBVFVSRV9DT1VOVFNfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpLCBwYXN0ZSgnREVzZXEyX0NOU19wb3N0ZXJpb3JfMTVfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpKQpydW5EZXNlcTJCZXR3ZWVuQ29uZFRpbWUocGFzdGUoJ21lcmdlZF9kZl9wb3N0ZXJpb3JfMThfRkVBVFVSRV9DT1VOVFNfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpLCBwYXN0ZSgnREVzZXEyX0NOU19wb3N0ZXJpb3JfMThfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpKQoKcnVuRGVzZXEyQmV0d2VlbkNvbmRUaW1lKHBhc3RlKCdtZXJnZWRfZGZfYW50ZXJpb3JfMTFfRkVBVFVSRV9DT1VOVFNfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpLCBwYXN0ZSgnREVzZXEyX0NOU19hbnRlcmlvcl8xMV8nLCBkYXRlLCAnLmNzdicsIHNlcD0nJykpCnJ1bkRlc2VxMkJldHdlZW5Db25kVGltZShwYXN0ZSgnbWVyZ2VkX2RmX2FudGVyaW9yXzEzX0ZFQVRVUkVfQ09VTlRTXycsIGRhdGUsICcuY3N2Jywgc2VwPScnKSwgcGFzdGUoJ0RFc2VxMl9DTlNfYW50ZXJpb3JfMTNfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpKQpydW5EZXNlcTJCZXR3ZWVuQ29uZFRpbWUocGFzdGUoJ21lcmdlZF9kZl9hbnRlcmlvcl8xNV9GRUFUVVJFX0NPVU5UU18nLCBkYXRlLCAnLmNzdicsIHNlcD0nJyksIHBhc3RlKCdERXNlcTJfQ05TX2FudGVyaW9yXzE1XycsIGRhdGUsICcuY3N2Jywgc2VwPScnKSkKcnVuRGVzZXEyQmV0d2VlbkNvbmRUaW1lKHBhc3RlKCdtZXJnZWRfZGZfYW50ZXJpb3JfMThfRkVBVFVSRV9DT1VOVFNfJywgZGF0ZSwgJy5jc3YnLCBzZXA9JycpLCBwYXN0ZSgnREVzZXEyX0NOU19hbnRlcmlvcl8xOF8nLCBkYXRlLCAnLmNzdicsIHNlcD0nJykpCgpgYGAKCgojIyMgUHJpbnQgc2Vzc2lvbiBpbmZvCmBgYHtyfQpzZXNzaW9uSW5mbygpCmBgYAoKIyMjIFJlZmVyZW5jZXM6CgpMb3ZlIE1JLCBIdWJlciBXLCBBbmRlcnMgUyAoMjAxNCkuIOKAnE1vZGVyYXRlZCBlc3RpbWF0aW9uIG9mIGZvbGQgY2hhbmdlIGFuZCBkaXNwZXJzaW9uIGZvciBSTkEtc2VxIGRhdGEgd2l0aCBERVNlcTIu4oCdIEdlbm9tZSBCaW9sb2d5LCAxNSwgNTUwLiBkb2k6IDEwLjExODYvczEzMDU5LTAxNC0wNTUwLTgu