The analysis starts with a matrix of read counts, filtered based on gene and cell requirements. In this case, let us use the raw numbers of reads mapped to each endogenous feature, recast as integer type:
table(isSpike(sce.norm, "ERCC"))
##
## FALSE TRUE
## 8226 44
sce.endo <- sce.norm[!isSpike(sce.norm, "ERCC"),]
## [1] 8226 342
Considering the demonstrated impact on the Time factor on gene expression profiles, and the fact that differential expression will not be assessed between time points, individual time points will be processed separately for the remainder of this scde analysis.
sce.2h <- sce.endo[,sce.endo$Time == '2h']
dim(sce.2h)
## [1] 8226 119
sce.4h <- sce.endo[,sce.endo$Time == '4h']
dim(sce.4h)
## [1] 8226 110
sce.6h <- sce.endo[,sce.endo$Time == '6h']
dim(sce.6h)
## [1] 8226 113
The analysis starts with a matrix of read counts, filtered based on gene and cell requirements.
In this case, for each time point, let us retain only features detected with at least 10 counts in at least 10 cells of any of the five experimental groups, use the raw numbers of reads mapped to each endogenous feature, recast as integer type.
Let us first define a function to apply the detection cutoff:
filterCounts <- function(m, counts = 10, cells = 10){
apply(m, 1, function(e){
return(sum(e >= counts) >= cells)
})
}
sg.2h <- droplevels(sce.2h$Group)
keep.2h <- filterCounts(counts(sce.2h))
cd.2h <- counts(sce.2h)[keep.2h,]; storage.mode(cd.2h) <- 'integer'
## [1] 7755 119
sg.4h <- droplevels(sce.4h$Group)
keep.4h <- filterCounts(counts(sce.4h))
cd.4h <- counts(sce.4h)[keep.4h,]; storage.mode(cd.4h) <- 'integer'
## [1] 7411 110
sg.6h <- droplevels(sce.6h$Group)
keep.6h <- filterCounts(counts(sce.6h))
cd.6h <- counts(sce.6h)[keep.6h,]; storage.mode(cd.6h) <- 'integer'
## [1] 7604 113
Here, we fit the error models on which all subsequent calculations will rely. The fitting process relies on a subset of robust genes that are detected in multiple cross-cell comparisons. Here we supply the groups argument, so that the error models for each experimental group of cells are fit independently. If the groups argument is omitted, the models would be fit using a common set.
o.ifm.2h <- scde.error.models(
counts = cd.2h, groups = sg.2h, n.cores = 4, verbose = 1
)
Particularly poor cells may result in abnormal fits, most commonly showing negative corr.a, and should be removed.
valid.cells <- o.ifm.2h$corr.a > 0; table(valid.cells)
## valid.cells
## TRUE
## 119
o.ifm.2h <- o.ifm.2h[valid.cells, ]
o.ifm.4h <- scde.error.models(
counts = cd.4h, groups = sg.4h, n.cores = 4, verbose = 1
)
Particularly poor cells may result in abnormal fits, most commonly showing negative corr.a, and should be removed.
valid.cells <- o.ifm.4h$corr.a > 0; table(valid.cells)
## valid.cells
## TRUE
## 110
o.ifm.4h <- o.ifm.4h[valid.cells, ]
o.ifm.6h <- scde.error.models(
counts = cd.6h, groups = sg.6h, n.cores = 4, verbose = 1
)
Particularly poor cells may result in abnormal fits, most commonly showing negative corr.a, and should be removed.
valid.cells <- o.ifm.6h$corr.a > 0; table(valid.cells)
## valid.cells
## TRUE
## 113
o.ifm.6h <- o.ifm.6h[valid.cells, ]
The scde.error.models produces error models with cells reordered by experimental group. For clarity, let us prepare a SCESet and count matrix that match this order:
sce.ifm.2h <- sce.2h[,rownames(o.ifm.2h)]
sg.ifm.2h <- droplevels(sce.ifm.2h$Group)
cd.ifm.2h <- counts(sce.ifm.2h)[rownames(cd.2h),]; storage.mode(cd.ifm.2h) <- 'integer'
sce.ifm.4h <- sce.4h[,rownames(o.ifm.4h)]
sg.ifm.4h <- droplevels(sce.ifm.4h$Group)
cd.ifm.4h <- counts(sce.ifm.4h)[rownames(cd.4h),]; storage.mode(cd.ifm.4h) <- 'integer'
sce.ifm.6h <- sce.6h[,rownames(o.ifm.6h)]
sg.ifm.6h <- droplevels(sce.ifm.6h$Group)
cd.ifm.6h <- counts(sce.ifm.6h)[rownames(cd.6h),]; storage.mode(cd.ifm.6h) <- 'integer'
Finally, we need to define an expression magnitude prior for the genes. Its main function, however, is to define a grid of expression magnitude values on which the numerical calculations will be carried out.
o.prior.2h <- scde.expression.prior(
models = o.ifm.2h, counts = cd.ifm.2h, show.plot = TRUE
)

o.prior.4h <- scde.expression.prior(
models = o.ifm.4h, counts = cd.ifm.4h, show.plot = TRUE
)

o.prior.6h <- scde.expression.prior(
models = o.ifm.6h, counts = cd.ifm.6h, show.plot = TRUE
)

Let us first define:
scde.res <- list()
convert.z.score <- function(x, one.sided = NULL) {
z <- x$Z
if(is.null(one.sided)) {
pval = pnorm(-abs(z));
pval = 2 * pval
} else if(one.sided=="-") {
pval = pnorm(z);
} else {
pval = pnorm(-z);
}
x <- cbind(
x,
p.value = pval
)
return(x);
}
addGeneName <- function(x){
x <- cbind(
gene_name = with(rowData(sce.endo), gene_name[match(rownames(x), gene_id)]),
x
)
return(x)
}
orderResults <- function(x){
x <- x[with(x, order(abs(Z), decreasing = TRUE)),]
return(x)
}
normExprsById <- function(geneId){
geneName <- subset(rowData(sce.endo),gene_id==geneId,"gene_name",drop=TRUE)
gdata <- data.frame(
logcounts = assay(sce.endo, "logcounts")[geneId,],
colData(sce.endo)[,c("Infection","Status","Time")],
row.names = colnames(sce.endo)
)
ggplot(gdata, aes(gsub("-", "\n", Infection), logcounts)) +
geom_violin(
aes(fill = Infection, alpha = Status),
draw_quantiles = c(0.25, 0.5, 0.75)
) +
geom_jitter(width = 0.1, alpha = 0.5, size = 0.5) +
facet_grid(Time ~ Status) +
scale_fill_manual(values = col.infection) +
scale_alpha_discrete("Status", range = c(0.3, 0.5)) +
ggtitle(sprintf("%s - %s", geneId, geneName)) +
theme_minimal() +
labs(y = "Normalised expression", x = "Infection") +
guides(alpha = "none")
}
single.scde.2h <- function(gene, groupTarget, groupRef){
gT <- sprintf("2h_%s", groupTarget); gR <- sprintf("2h_%s", groupRef)
sg.test <- factor(colData(sce.ifm.2h)[,"Group"], levels = c(gT, gR))
scde.test.gene.expression.difference(
gene, o.ifm.2h, cd.ifm.2h, o.prior.2h, sg.test,
n.cores = 4, verbose = 1
)
}
sig.levels <- c(0.05, 0.01)
volcano.sig <- data.frame(
P = sig.levels,
level = as.character(sig.levels)
)
volcano.mle <- function(x, sub = NULL){
varName <- deparse(substitute(x))
x <- convert.z.score(x)
gg <- ggplot(x, aes(mle, -log10(p.value))) +
geom_point(aes(colour = (cZ != 0))) +
geom_hline(aes(yintercept=-log10(p.value),linetype=level),volcano.sig) +
ggtitle(varName, sub)
print(gg)
return(x)
}
contrasts.2h <- list(
c("2h_STM-D23580_Infected", "2h_Mock_Uninfected"), # vs. Mock
c("2h_STM-LT2_Infected", "2h_Mock_Uninfected"),
c("2h_STM-D23580_Exposed", "2h_Mock_Uninfected"),
c("2h_STM-LT2_Exposed", "2h_Mock_Uninfected"),
c("2h_STM-D23580_Infected", "2h_STM-LT2_Infected"), # direct
c("2h_STM-D23580_Infected", "2h_STM-D23580_Exposed"),
c("2h_STM-LT2_Infected", "2h_STM-LT2_Exposed"),
c("2h_STM-D23580_Exposed", "2h_STM-LT2_Exposed")
)
contrasts.4h <- list(
c("4h_STM-D23580_Infected", "4h_Mock_Uninfected"), # 4h
c("4h_STM-LT2_Infected", "4h_Mock_Uninfected"),
c("4h_STM-D23580_Exposed", "4h_Mock_Uninfected"),
c("4h_STM-LT2_Exposed", "4h_Mock_Uninfected"),
c("4h_STM-D23580_Infected", "4h_STM-LT2_Infected"), # 4h
c("4h_STM-D23580_Infected", "4h_STM-D23580_Exposed"),
c("4h_STM-LT2_Infected", "4h_STM-LT2_Exposed"),
c("4h_STM-D23580_Exposed", "4h_STM-LT2_Exposed")
)
contrasts.6h <- list(
c("6h_STM-D23580_Infected", "6h_Mock_Uninfected"), # 6h
c("6h_STM-LT2_Infected", "6h_Mock_Uninfected"),
c("6h_STM-D23580_Exposed", "6h_Mock_Uninfected"),
c("6h_STM-LT2_Exposed", "6h_Mock_Uninfected"),
c("6h_STM-D23580_Infected", "6h_STM-LT2_Infected"), # 6h
c("6h_STM-D23580_Infected", "6h_STM-D23580_Exposed"),
c("6h_STM-LT2_Infected", "6h_STM-LT2_Exposed"),
c("6h_STM-D23580_Exposed", "6h_STM-LT2_Exposed")
)
stopifnot(all(rownames(o.ifm.2h) == colnames(cd.ifm.2h)))
for (contrastNames in contrasts.2h){
groupTarget <- contrastNames[1]; groupRef <- contrastNames[2]
sg.test <- factor(sce.ifm.2h$Group, levels = c(groupTarget, groupRef))
names(sg.test) <- colnames(sce.ifm.2h); summary(sg.test)
contrastName <- sprintf("%s-%s", groupTarget, groupRef); message(contrastName)
scde.res[[contrastName]] <- scde.expression.difference(
o.ifm.2h, cd.ifm.2h, o.prior.2h, sg.test, n.cores = 4, verbose = 1
)
}
stopifnot(all(rownames(o.ifm.4h) == colnames(cd.ifm.4h)))
for (contrastNames in contrasts.4h){
groupTarget <- contrastNames[1]; groupRef <- contrastNames[2]
sg.test <- factor(sce.ifm.4h$Group, levels = c(groupTarget, groupRef))
names(sg.test) <- colnames(sce.ifm.4h); summary(sg.test)
contrastName <- sprintf("%s-%s", groupTarget, groupRef); message(contrastName)
scde.res[[contrastName]] <- scde.expression.difference(
o.ifm.4h, cd.ifm.4h, o.prior.4h, sg.test, n.cores = 4, verbose = 1
)
}
stopifnot(all(rownames(o.ifm.6h) == colnames(cd.ifm.6h)))
for (contrastNames in contrasts.6h){
groupTarget <- contrastNames[1]; groupRef <- contrastNames[2]
sg.test <- factor(sce.ifm.6h$Group, levels = c(groupTarget, groupRef))
names(sg.test) <- colnames(sce.ifm.6h); summary(sg.test)
contrastName <- sprintf("%s-%s", groupTarget, groupRef); message(contrastName)
scde.res[[contrastName]] <- scde.expression.difference(
o.ifm.6h, cd.ifm.6h, o.prior.6h, sg.test, n.cores = 4, verbose = 1
)
}
Let us first identify the extreme values of maximum likelihood estimate of fold-change to scale subsequent plot for comparability and symmetry of axes:
mleRange <-
max(abs(do.call("c", lapply(scde.res, function(x){return(x$mle)}))))*c(-1,1)


| P.01 | P.05 | cZ | |
|---|---|---|---|
| 2h_STM-D23580_Infected-2h_Mock_Uninfected | 84 | 259 | 92 |
| 2h_STM-LT2_Infected-2h_Mock_Uninfected | 136 | 354 | 301 |
| 2h_STM-D23580_Exposed-2h_Mock_Uninfected | 125 | 317 | 229 |
| 2h_STM-LT2_Exposed-2h_Mock_Uninfected | 134 | 328 | 209 |
| 4h_STM-D23580_Infected-4h_Mock_Uninfected | 479 | 927 | 5758 |
| 4h_STM-LT2_Infected-4h_Mock_Uninfected | 618 | 1135 | 7025 |
| 4h_STM-D23580_Exposed-4h_Mock_Uninfected | 443 | 868 | 5547 |
| 4h_STM-LT2_Exposed-4h_Mock_Uninfected | 558 | 1006 | 6696 |
| 6h_STM-D23580_Infected-6h_Mock_Uninfected | 1155 | 1954 | 7338 |
| 6h_STM-LT2_Infected-6h_Mock_Uninfected | 1154 | 1978 | 7348 |
| 6h_STM-D23580_Exposed-6h_Mock_Uninfected | 978 | 1740 | 7274 |
| 6h_STM-LT2_Exposed-6h_Mock_Uninfected | 1090 | 1862 | 7329 |
| P.01 | P.05 | cZ | |
|---|---|---|---|
| 2h_STM-D23580_Infected-2h_STM-LT2_Infected | 60 | 224 | 42 |
| 2h_STM-D23580_Infected-2h_STM-D23580_Exposed | 55 | 241 | 30 |
| 2h_STM-LT2_Infected-2h_STM-LT2_Exposed | 63 | 210 | 39 |
| 2h_STM-D23580_Exposed-2h_STM-LT2_Exposed | 56 | 220 | 38 |
| 4h_STM-D23580_Infected-4h_STM-LT2_Infected | 64 | 257 | 56 |
| 4h_STM-D23580_Infected-4h_STM-D23580_Exposed | 121 | 401 | 569 |
| 4h_STM-LT2_Infected-4h_STM-LT2_Exposed | 96 | 286 | 178 |
| 4h_STM-D23580_Exposed-4h_STM-LT2_Exposed | 92 | 315 | 190 |
| 6h_STM-D23580_Infected-6h_STM-LT2_Infected | 74 | 279 | 65 |
| 6h_STM-D23580_Infected-6h_STM-D23580_Exposed | 151 | 425 | 641 |
| 6h_STM-LT2_Infected-6h_STM-LT2_Exposed | 85 | 306 | 116 |
| 6h_STM-D23580_Exposed-6h_STM-LT2_Exposed | 109 | 394 | 441 |
Let us define:
resCols <- c("mle","Z")
add.sig.XY <- function(x, cutoff = 0.01){
sig.levels <- c(NA_character_, "X", "Y", "Both")
alpha.levels <- c(0.3, 1, 1, 1)
sig.x <- (x$p.value.x < cutoff); sig.y <- (x$p.value.y < cutoff)
x$p.01 <- sig.levels[1 + sig.x + 2 * sig.y]
x$alpha <- alpha.levels[1 + sig.x + 2 * sig.y]
return(x)
}
addOpposite <- function(x, cutoff = 0.01){
x$opposite <- NA_character_
x.sig.idx <- (x$p.value.x < cutoff & x$p.value.y < cutoff)
x.opposite <- (x$mle.x * x$mle.y) < 0
x[x.sig.idx, "opposite"] <- x.opposite[x.sig.idx]
return(x)
}
contrasts.contrasts.2h <- list(
c("2h_STM-D23580_Infected-2h_Mock_Uninfected","2h_STM-LT2_Infected-2h_Mock_Uninfected"),
c("2h_STM-D23580_Infected-2h_Mock_Uninfected","2h_STM-D23580_Exposed-2h_Mock_Uninfected"),
c("2h_STM-LT2_Infected-2h_Mock_Uninfected","2h_STM-LT2_Exposed-2h_Mock_Uninfected"),
c("2h_STM-D23580_Exposed-2h_Mock_Uninfected","2h_STM-LT2_Exposed-2h_Mock_Uninfected")
)
contrasts.contrasts.4h <- list(
c("4h_STM-D23580_Infected-4h_Mock_Uninfected","4h_STM-LT2_Infected-4h_Mock_Uninfected"),
c("4h_STM-D23580_Infected-4h_Mock_Uninfected","4h_STM-D23580_Exposed-4h_Mock_Uninfected"),
c("4h_STM-LT2_Infected-4h_Mock_Uninfected","4h_STM-LT2_Exposed-4h_Mock_Uninfected"),
c("4h_STM-D23580_Exposed-4h_Mock_Uninfected","4h_STM-LT2_Exposed-4h_Mock_Uninfected")
)
contrasts.contrasts.6h <- list(
c("6h_STM-D23580_Infected-6h_Mock_Uninfected","6h_STM-LT2_Infected-6h_Mock_Uninfected"),
c("6h_STM-D23580_Infected-6h_Mock_Uninfected","6h_STM-D23580_Exposed-6h_Mock_Uninfected"),
c("6h_STM-LT2_Infected-6h_Mock_Uninfected","6h_STM-LT2_Exposed-6h_Mock_Uninfected"),
c("6h_STM-D23580_Exposed-6h_Mock_Uninfected","6h_STM-LT2_Exposed-6h_Mock_Uninfected")
)


