Title: | A Multi-Modal Simulator for Spearheading Single-Cell Omics Analyses |
---|---|
Description: | A novel, multi-modal simulation engine for studying dynamic cellular processes at single-cell resolution. 'dyngen' is more flexible than current single-cell simulation engines. It allows better method development and benchmarking, thereby stimulating development and testing of novel computational methods. Cannoodt et al. (2021) <doi:10.1038/s41467-021-24152-2>. |
Authors: | Robrecht Cannoodt [aut, cre, cph] , Wouter Saelens [aut] |
Maintainer: | Robrecht Cannoodt <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.0.5 |
Built: | 2024-11-16 05:24:02 UTC |
Source: | https://github.com/dynverse/dyngen |
For use with other packages compatible with dyno, anndata, SingleCellExperiment, or Seurat.
as_dyno( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) as_anndata( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) as_sce( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) as_seurat( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) as_list( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) wrap_dataset( model, format = c("list", "dyno", "sce", "seurat", "anndata", "none"), store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) )
as_dyno( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) as_anndata( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) as_sce( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) as_seurat( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) as_list( model, store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) ) wrap_dataset( model, format = c("list", "dyno", "sce", "seurat", "anndata", "none"), store_dimred = !is.null(model$simulations$dimred), store_cellwise_grn = !is.null(model$experiment$cellwise_grn), store_rna_velocity = !is.null(model$experiment$rna_velocity) )
model |
A dyngen output model for which the experiment has been emulated with |
store_dimred |
Whether or not to store the dimensionality reduction constructed on the true counts. |
store_cellwise_grn |
Whether or not to also store cellwise GRN information. |
store_rna_velocity |
WHether or not to store the log propensity ratios. |
format |
Which output format to use, must be one of 'dyno' (requires |
A dataset object.
data("example_model") dataset <- wrap_dataset(example_model, format = "list") dataset <- wrap_dataset(example_model, format = "dyno") dataset <- wrap_dataset(example_model, format = "sce") dataset <- wrap_dataset(example_model, format = "seurat") dataset <- wrap_dataset(example_model, format = "anndata") dataset <- wrap_dataset(example_model, format = "none")
data("example_model") dataset <- wrap_dataset(example_model, format = "list") dataset <- wrap_dataset(example_model, format = "dyno") dataset <- wrap_dataset(example_model, format = "sce") dataset <- wrap_dataset(example_model, format = "seurat") dataset <- wrap_dataset(example_model, format = "anndata") dataset <- wrap_dataset(example_model, format = "none")
A module is a group of genes which, to some extent, shows the same expression behaviour. Several modules are connected together such that one or more genes from one module will regulate the expression of another module. By creating chains of modules, a dynamic behaviour in gene regulation can be created.
backbone(module_info, module_network, expression_patterns)
backbone(module_info, module_network, expression_patterns)
module_info |
A tibble containing meta information on the modules themselves.
|
module_network |
A tibble describing which modules regulate which other modules.
|
expression_patterns |
A tibble describing the expected expression pattern changes when a cell is simulated by dyngen. Each row represents one transition between two cell states.
|
A dyngen backbone.
dyngen on how to run a dyngen simulation
library(tibble) backbone <- backbone( module_info = tribble( ~module_id, ~basal, ~burn, ~independence, "M1", 1, TRUE, 1, "M2", 0, FALSE, 1, "M3", 0, FALSE, 1 ), module_network = tribble( ~from, ~to, ~effect, ~strength, ~hill, "M1", "M2", 1L, 1, 2, "M2", "M3", 1L, 1, 2 ), expression_patterns = tribble( ~from, ~to, ~module_progression, ~start, ~burn, ~time, "s0", "s1", "+M1", TRUE, TRUE, 30, "s1", "s2", "+M2,+M3", FALSE, FALSE, 80 ) )
library(tibble) backbone <- backbone( module_info = tribble( ~module_id, ~basal, ~burn, ~independence, "M1", 1, TRUE, 1, "M2", 0, FALSE, 1, "M3", 0, FALSE, 1 ), module_network = tribble( ~from, ~to, ~effect, ~strength, ~hill, "M1", "M2", 1L, 1, 2, "M2", "M3", 1L, 1, 2 ), expression_patterns = tribble( ~from, ~to, ~module_progression, ~start, ~burn, ~time, "s0", "s1", "+M1", TRUE, TRUE, 30, "s1", "s2", "+M2,+M3", FALSE, FALSE, 80 ) )
You can use the bblego
functions in order to create
custom backbones using various components. Please note that the bblego
functions currently only allow you to create tree-like backbones.
bblego(..., .list = NULL) bblego_linear( from, to, type = sample(c("simple", "doublerep1", "doublerep2"), 1), num_modules = sample(4:6, 1), burn = FALSE ) bblego_branching( from, to, type = "simple", num_steps = 3, num_modules = 2 + length(to) * (3 + num_steps), burn = FALSE ) bblego_start( to, type = sample(c("simple", "doublerep1", "doublerep2"), 1), num_modules = sample(4:6, 1) ) bblego_end( from, type = sample(c("simple", "doublerep1", "doublerep2"), 1), num_modules = sample(4:6, 1) )
bblego(..., .list = NULL) bblego_linear( from, to, type = sample(c("simple", "doublerep1", "doublerep2"), 1), num_modules = sample(4:6, 1), burn = FALSE ) bblego_branching( from, to, type = "simple", num_steps = 3, num_modules = 2 + length(to) * (3 + num_steps), burn = FALSE ) bblego_start( to, type = sample(c("simple", "doublerep1", "doublerep2"), 1), num_modules = sample(4:6, 1) ) bblego_end( from, type = sample(c("simple", "doublerep1", "doublerep2"), 1), num_modules = sample(4:6, 1) )
... , .list
|
|
from |
The begin state of this component. |
to |
The end state of this component. |
type |
Some components have alternative module regulatory networks.
|
num_modules |
The number of modules this component is allowed to use. Various components might require a minimum number of components in order to work properly. |
burn |
Whether or not these components are part of the warm-up simulation. |
num_steps |
The number of branching steps to reduce the odds of double positive cells occurring. |
A backbone always needs to start with a single bblego_start()
state and
needs to end with one or more bblego_end()
states.
The order of the mentioned states needs to be such that a state is never
specified in the first argument (except for bblego_start()
) before
having been specified as the second argument.
A dyngen backbone.
backbone <- bblego( bblego_start("A", type = "simple", num_modules = 2), bblego_linear("A", "B", type = "simple", num_modules = 3), bblego_branching("B", c("C", "D"), type = "simple", num_steps = 3), bblego_end("C", type = "flipflop", num_modules = 4), bblego_end("D", type = "doublerep1", num_modules = 7) )
backbone <- bblego( bblego_start("A", type = "simple", num_modules = 2), bblego_linear("A", "B", type = "simple", num_modules = 3), bblego_branching("B", c("C", "D"), type = "simple", num_steps = 3), bblego_end("C", type = "flipflop", num_modules = 4), bblego_end("D", type = "doublerep1", num_modules = 7) )
Assume the given models have the exact same feature ids and ran up until the generate_cells()
step.
In addition, the user is expected to run generate_experiment()
on the combined models.
combine_models(models, duplicate_gold_standard = TRUE)
combine_models(models, duplicate_gold_standard = TRUE)
models |
A named list of models. The names of the list will be used to prefix the different cellular states in the combined model. |
duplicate_gold_standard |
Whether or not the gold standards of the models are different and should be duplicated and prefixed. |
See the vignette on simulating batch effects on how to use this function.
A combined dyngen model.
data("example_model") model_ab <- combine_models(list("left" = example_model, "right" = example_model)) # show a dimensionality reduction plot_simulations(model_ab) plot_gold_mappings(model_ab, do_facet = FALSE)
data("example_model") model_ab <- combine_models(list("left" = example_model, "right" = example_model)) # show a dimensionality reduction plot_simulations(model_ab) plot_gold_mappings(model_ab, do_facet = FALSE)
A toolkit for generating synthetic single cell data.
initialise_model()
: Define and store settings for all following steps. See each of the sections below for more information.
Use a predefined backbone:
Create a custom backbone:
Visualise the backbone:
generate_tf_network()
: Generate a transcription factor network from the backbone
tf_network_default()
: Parameters for configuring this step
generate_feature_network()
: Generate a target network
feature_network_default()
: Parameters for configuring this step
plot_feature_network()
: Visualise the gene network
generate_kinetics()
: Generate the gene kinetics
kinetics_default()
, kinetics_random_distributions()
: Parameters for configuring this step
generate_gold_standard()
: Simulate the gold standard backbone, used for mapping to cell states afterwards
gold_standard_default()
: Parameters for configuring this step
plot_gold_mappings()
: Visualise the mapping of the simulations to the gold standard
plot_gold_simulations()
: Visualise the gold standard simulations using the dimred
plot_gold_expression()
: Visualise the expression of the gold standard over simulation time
generate_cells()
: Simulate the cells based on its GRN
simulation_default()
: Parameters for configuring this step
simulation_type_wild_type()
, simulation_type_knockdown()
: Used for configuring the type of simulation
kinetics_noise_none()
, kinetics_noise_simple()
: Different kinetics randomisers to apply to each simulation
plot_simulations()
: Visualise the simulations using the dimred
plot_simulation_expression()
: Visualise the expression of the simulations over simulation time
generate_experiment()
: Sample cells and transcripts from experiment
list_experiment_samplers()
, experiment_snapshot()
, experiment_synchronised()
: Parameters for configuring this step
simtime_from_backbone()
: Determine the simulation time from the backbone
plot_experiment_dimred()
: Plot a dimensionality reduction of the final dataset
as_dyno()
, wrap_dataset()
: Convert a dyngen model to a dyno dataset
as_anndata()
: Convert a dyngen model to an anndata dataset
as_sce()
: Convert a dyngen model to a SingleCellExperiment dataset
as_seurat()
: Convert a dyngen model to a Seurat dataset
generate_dataset()
: Run through steps 2 to 8 with a single function
plot_summary()
: Plot a summary of all dyngen simulation steps
example_model: A (very) small toy dyngen model, used for documentation and testing purposes
realcounts: A set of real single-cell expression datasets, to be used as reference datasets
realnets: A set of real gene regulatory networks, to be sampled in step 3
dyngen: This help page
get_timings()
: Extract execution timings for each of the dyngen steps
combine_models()
: Combine multiple dyngen models
rnorm_bounded()
: A bounded version of rnorm()
runif_subrange()
: A subrange version of runif()
model <- initialise_model( backbone = backbone_bifurcating() ) model <- model %>% generate_tf_network() %>% generate_feature_network() %>% generate_kinetics() %>% generate_gold_standard() %>% generate_cells() %>% generate_experiment() dataset <- wrap_dataset(model, format = "dyno") # format can also be set to "sce", "seurat", "anndata" or "list" # library(dynplot) # plot_dimred(dataset)
model <- initialise_model( backbone = backbone_bifurcating() ) model <- model %>% generate_tf_network() %>% generate_feature_network() %>% generate_kinetics() %>% generate_gold_standard() %>% generate_cells() %>% generate_experiment() dataset <- wrap_dataset(model, format = "dyno") # format can also be set to "sce", "seurat", "anndata" or "list" # library(dynplot) # plot_dimred(dataset)
Used for showcasing examples of functions.
example_model
example_model
An object of class list
(inherits from dyngen::init
) of length 19.
generate_cells()
runs simulations in order to determine the gold standard
of the simulations.
simulation_default()
is used to configure parameters pertaining this process.
generate_cells(model) simulation_default( burn_time = NULL, total_time = NULL, ssa_algorithm = ssa_etl(tau = 30/3600), census_interval = 4, experiment_params = bind_rows(simulation_type_wild_type(num_simulations = 32), simulation_type_knockdown(num_simulations = 0)), store_reaction_firings = FALSE, store_reaction_propensities = FALSE, compute_cellwise_grn = FALSE, compute_dimred = TRUE, compute_rna_velocity = FALSE, kinetics_noise_function = kinetics_noise_simple(mean = 1, sd = 0.005) ) simulation_type_wild_type( num_simulations, seed = sample.int(10 * num_simulations, num_simulations) ) simulation_type_knockdown( num_simulations, timepoint = runif(num_simulations), genes = "*", num_genes = sample(1:5, num_simulations, replace = TRUE, prob = 0.25^(1:5)), multiplier = runif(num_simulations, 0, 1), seed = sample.int(10 * num_simulations, num_simulations) )
generate_cells(model) simulation_default( burn_time = NULL, total_time = NULL, ssa_algorithm = ssa_etl(tau = 30/3600), census_interval = 4, experiment_params = bind_rows(simulation_type_wild_type(num_simulations = 32), simulation_type_knockdown(num_simulations = 0)), store_reaction_firings = FALSE, store_reaction_propensities = FALSE, compute_cellwise_grn = FALSE, compute_dimred = TRUE, compute_rna_velocity = FALSE, kinetics_noise_function = kinetics_noise_simple(mean = 1, sd = 0.005) ) simulation_type_wild_type( num_simulations, seed = sample.int(10 * num_simulations, num_simulations) ) simulation_type_knockdown( num_simulations, timepoint = runif(num_simulations), genes = "*", num_genes = sample(1:5, num_simulations, replace = TRUE, prob = 0.25^(1:5)), multiplier = runif(num_simulations, 0, 1), seed = sample.int(10 * num_simulations, num_simulations) )
model |
A dyngen intermediary model for which the gold standard been generated with |
burn_time |
The burn in time of the system, used to determine an initial state vector. If |
total_time |
The total simulation time of the system. If |
ssa_algorithm |
Which SSA algorithm to use for simulating the cells with |
census_interval |
A granularity parameter for the outputted simulation. |
experiment_params |
A tibble generated by rbinding multiple calls of |
store_reaction_firings |
Whether or not to store the number of reaction firings. |
store_reaction_propensities |
Whether or not to store the propensity values of the reactions. |
compute_cellwise_grn |
Whether or not to compute the cellwise GRN activation values. |
compute_dimred |
Whether to perform a dimensionality reduction after simulation. |
compute_rna_velocity |
Whether or not to compute the propensity ratios after simulation. |
kinetics_noise_function |
A function that will generate noise to the kinetics of each simulation.
It takes the |
num_simulations |
The number of simulations to run. |
seed |
A set of seeds for each of the simulations. |
timepoint |
The relative time point of the knockdown |
genes |
Which genes to sample from. |
num_genes |
The number of genes to knockdown. |
multiplier |
The strength of the knockdown. Use 0 for a full knockout, 0<x<1 for a knockdown, and >1 for an overexpression. |
A dyngen model.
dyngen on how to run a complete dyngen simulation
library(dplyr) model <- initialise_model( backbone = backbone_bifurcating(), simulation = simulation_default( ssa_algorithm = ssa_etl(tau = .1), experiment_params = bind_rows( simulation_type_wild_type(num_simulations = 4), simulation_type_knockdown(num_simulations = 4) ) ) ) data("example_model") model <- example_model %>% generate_cells() plot_simulations(model) plot_gold_mappings(model) plot_simulation_expression(model)
library(dplyr) model <- initialise_model( backbone = backbone_bifurcating(), simulation = simulation_default( ssa_algorithm = ssa_etl(tau = .1), experiment_params = bind_rows( simulation_type_wild_type(num_simulations = 4), simulation_type_knockdown(num_simulations = 4) ) ) ) data("example_model") model <- example_model %>% generate_cells() plot_simulations(model) plot_gold_mappings(model) plot_simulation_expression(model)
This function contains the complete pipeline for generating a dataset with dyngen. In order to have more control over how the dataset is generated, run each of the steps in this function separately.
generate_dataset( model, format = c("list", "dyno", "sce", "seurat", "anndata", "none"), output_dir = NULL, make_plots = FALSE, store_dimred = model$simulation_params$compute_dimred, store_cellwise_grn = model$simulation_params$compute_cellwise_grn, store_rna_velocity = model$simulation_params$compute_rna_velocity )
generate_dataset( model, format = c("list", "dyno", "sce", "seurat", "anndata", "none"), output_dir = NULL, make_plots = FALSE, store_dimred = model$simulation_params$compute_dimred, store_cellwise_grn = model$simulation_params$compute_cellwise_grn, store_rna_velocity = model$simulation_params$compute_rna_velocity )
model |
A dyngen initial model created with |
format |
Which output format to use, must be one of 'dyno' (requires |
output_dir |
If not |
make_plots |
Whether or not to generate an overview of the dataset. |
store_dimred |
Whether or not to store the dimensionality reduction constructed on the true counts. |
store_cellwise_grn |
Whether or not to also store cellwise GRN information. |
store_rna_velocity |
WHether or not to store the log propensity ratios. |
A list containing a dyngen model (li$model
) and a dynwrap dataset (li$dataset
).
model <- initialise_model( backbone = backbone_bifurcating() ) # generate dataset and output as a list format # please note other output formats exist: "dyno", "sce", "seurat", "anndata" out <- generate_dataset(model, format = "list") model <- out$model dataset <- out$dataset
model <- initialise_model( backbone = backbone_bifurcating() ) # generate dataset and output as a list format # please note other output formats exist: "dyno", "sce", "seurat", "anndata" out <- generate_dataset(model, format = "list") model <- out$model dataset <- out$dataset
generate_experiment()
samples cells along the different simulations.
Two approaches are implemented: sampling from an unsynchronised population of single cells (snapshot) or
sampling at multiple time points in a synchronised population (time series).
generate_experiment(model) list_experiment_samplers() experiment_snapshot( realcount = NULL, map_reference_cpm = TRUE, map_reference_ls = TRUE, weight_bw = 0.1 ) experiment_synchronised( realcount = NULL, map_reference_cpm = TRUE, map_reference_ls = TRUE, num_timepoints = 8, pct_between = 0.75 )
generate_experiment(model) list_experiment_samplers() experiment_snapshot( realcount = NULL, map_reference_cpm = TRUE, map_reference_ls = TRUE, weight_bw = 0.1 ) experiment_synchronised( realcount = NULL, map_reference_cpm = TRUE, map_reference_ls = TRUE, num_timepoints = 8, pct_between = 0.75 )
model |
A dyngen intermediary model for which the simulations have been run with |
realcount |
The name of a dataset in realcounts. If |
map_reference_cpm |
Whether or not to try to match the CPM distribution to that of a reference dataset. |
map_reference_ls |
Whether or not to try to match the distribution of the library sizes to that of the reference dataset. |
weight_bw |
[snapshot] A bandwidth parameter for determining the distribution of cells along each edge in order to perform weighted sampling. |
num_timepoints |
[synchronised] The number of time points used in the experiment. |
pct_between |
[synchronised] The percentage of 'unused' simulation time. |
experiment_snapshot()
samples the cells using the length of each edge in the milestone network as weights.
See Supplementary Figure 7A from the dyngen paper for an illustration of how these weights are computed.
experiment_synchronised()
samples the cells along the simulation timeline by binning it into num_timepoints
groups separated by num_timepoints-1
gaps.
See Supplementary Figure 7B from the dyngen paper for an illustration of how the timepoint groups are computed.
A dyngen model.
names(list_experiment_samplers()) model <- initialise_model( backbone = backbone_bifurcating(), experiment = experiment_synchronised() ) data("example_model") model <- example_model %>% generate_experiment() plot_experiment_dimred(model)
names(list_experiment_samplers()) model <- initialise_model( backbone = backbone_bifurcating(), experiment = experiment_synchronised() ) data("example_model") model <- example_model %>% generate_experiment() plot_experiment_dimred(model)
generate_feature_network()
generates a network of target genes that are regulated
by the previously generated TFs, and also a separate network of housekeeping genes (HKs).
feature_network_default()
is used to configure parameters pertaining this process.
generate_feature_network(model) feature_network_default( realnet = NULL, damping = 0.01, target_resampling = Inf, max_in_degree = 5 )
generate_feature_network(model) feature_network_default( realnet = NULL, damping = 0.01, target_resampling = Inf, max_in_degree = 5 )
model |
A dyngen intermediary model for which the transcription network has been generated with |
realnet |
The name of a gene regulatory network (GRN) in realnets.
If |
damping |
A damping factor used for the page rank algorithm used to subsample the realnet. |
target_resampling |
How many targets / HKs to sample from the realnet per iteration. |
max_in_degree |
The maximum in-degree of a target / HK. |
A dyngen model.
dyngen on how to run a complete dyngen simulation
model <- initialise_model( backbone = backbone_bifurcating(), feature_network = feature_network_default(damping = 0.1) ) data("example_model") model <- example_model %>% generate_tf_network() %>% generate_feature_network() plot_feature_network(model)
model <- initialise_model( backbone = backbone_bifurcating(), feature_network = feature_network_default(damping = 0.1) ) data("example_model") model <- example_model %>% generate_tf_network() %>% generate_feature_network() plot_feature_network(model)
generate_gold_standard()
runs simulations in order to determine the gold standard
of the simulations.
gold_standard_default()
is used to configure parameters pertaining this process.
generate_gold_standard(model) gold_standard_default( tau = 30/3600, census_interval = 10/60, simulate_targets = FALSE )
generate_gold_standard(model) gold_standard_default( tau = 30/3600, census_interval = 10/60, simulate_targets = FALSE )
model |
A dyngen intermediary model for which the kinetics of the feature network has been generated with |
tau |
The time step of the ODE algorithm used to generate the gold standard. |
census_interval |
A granularity parameter of the gold standard time steps. Should be larger than or equal to |
simulate_targets |
Also simulate the targets during the gold standard simulation |
A dyngen model.
dyngen on how to run a complete dyngen simulation
model <- initialise_model( backbone = backbone_bifurcating(), gold_standard = gold_standard_default(tau = .01, census_interval = 1) ) data("example_model") model <- example_model %>% generate_gold_standard() plot_gold_simulations(model) plot_gold_mappings(model) plot_gold_expression(model)
model <- initialise_model( backbone = backbone_bifurcating(), gold_standard = gold_standard_default(tau = .01, census_interval = 1) ) data("example_model") model <- example_model %>% generate_gold_standard() plot_gold_simulations(model) plot_gold_mappings(model) plot_gold_expression(model)
generate_kinetics()
samples the kinetics of genes in the feature network for which
the kinetics have not yet been defined.
kinetics_default()
is used to configure parameters pertaining this process.
kinetics_random_distributions()
will do the same, but the distributions are also
randomised.
generate_kinetics(model) kinetics_default() kinetics_random_distributions()
generate_kinetics(model) kinetics_default() kinetics_random_distributions()
model |
A dyngen intermediary model for which the feature network has been generated with |
To write different kinetics settings, you need to write three functions
with interface function(feature_info, feature_network, cache_dir, verbose)
.
Described below are the default kinetics samplers.
sampler_tfs()
mutates the feature_info
data frame by adding the following columns:
transcription_rate
: the rate at which pre-mRNAs are transcribed,
in pre-mRNA / hour. Default distribution: U(1, 2).
translation_rate
: the rate at which mRNAs are translated into proteins,
in protein / mRNA / hour. Default distribution: U(100, 150).
mrna_halflife
: the half-life of (pre-)mRNA molecules, in hours.
Default distribution: U(2.5, 5).
protein_halflife
: the half-life of proteins, in hours.
Default distribution: U(5, 10).
splicing_rate
: the rate at which pre-mRNAs are spliced into mRNAs,
in reactions / hour. Default value: log(2) / (10/60), which corresponds to a half-life of 10 minutes.
independence
: the degree to which all regulators need to be bound for transcription to occur (0), or
whether transcription can occur if only one of the regulators is bound (1).
sampler_nontfs()
samples the transcription_rate
, translation_rate
,
mrna_halflife
and protein_halflife
from a supplementary file of Schwannhäusser et al.,
2011, doi.org/10.1038/nature10098. splicing_rate
is by default the same as in sampler_tfs()
.
independence
is sampled from U(0, 1).
sampler_interactions()
mutates the feature_network
data frame by adding the following columns.
effect
: the effect of the interaction; upregulating = +1, downregulating = -1.
By default, sampled from -1, 1 with probabilities .25, .75.
strength
: the strength of the interaction. Default distribution: 10^U(0, 2).
hill
: the hill coefficient. Default distribution: N(2, 2) with a minimum of 1 and a maximum of 10.
A dyngen model.
dyngen on how to run a complete dyngen simulation
model <- initialise_model( backbone = backbone_bifurcating(), kinetics_params = kinetics_default() ) data("example_model") model <- example_model %>% generate_kinetics()
model <- initialise_model( backbone = backbone_bifurcating(), kinetics_params = kinetics_default() ) data("example_model") model <- example_model %>% generate_kinetics()
generate_tf_network()
generates the transcription factors (TFs) that
drive the dynamic process a cell undergoes.
tf_network_default()
is used to configure parameters pertaining this process.
generate_tf_network(model) tf_network_default( min_tfs_per_module = 1L, sample_num_regulators = function() 2, weighted_sampling = FALSE )
generate_tf_network(model) tf_network_default( min_tfs_per_module = 1L, sample_num_regulators = function() 2, weighted_sampling = FALSE )
model |
A dyngen initial model created with |
min_tfs_per_module |
The number of TFs to generate per module in the backbone. |
sample_num_regulators |
A function to generate the number of TFs per module each TF will be regulated by. |
weighted_sampling |
When determining what TFs another TF is regulated by, whether to perform weighted sampling (by rank) or not. |
A dyngen model.
dyngen on how to run a complete dyngen simulation
model <- initialise_model( backbone = backbone_bifurcating() ) model <- model %>% generate_tf_network() plot_feature_network(model)
model <- initialise_model( backbone = backbone_bifurcating() ) model <- model %>% generate_tf_network() plot_feature_network(model)
Return the timings of each of the dyngen steps
get_timings(model)
get_timings(model)
model |
A dyngen object |
A data frame with columns "group"
, "task"
, "time_elapsed"
.
data("example_model") timings <- get_timings(example_model)
data("example_model") timings <- get_timings(example_model)
Initial settings for simulating a dyngen dataset
initialise_model( backbone, num_cells = 1000, num_tfs = nrow(backbone$module_info), num_targets = 100, num_hks = 50, distance_metric = c("pearson", "spearman", "cosine", "euclidean", "manhattan"), tf_network_params = tf_network_default(), feature_network_params = feature_network_default(), kinetics_params = kinetics_default(), gold_standard_params = gold_standard_default(), simulation_params = simulation_default(), experiment_params = experiment_snapshot(), verbose = TRUE, download_cache_dir = getOption("dyngen_download_cache_dir"), num_cores = getOption("Ncpus") %||% 1L, id = NULL )
initialise_model( backbone, num_cells = 1000, num_tfs = nrow(backbone$module_info), num_targets = 100, num_hks = 50, distance_metric = c("pearson", "spearman", "cosine", "euclidean", "manhattan"), tf_network_params = tf_network_default(), feature_network_params = feature_network_default(), kinetics_params = kinetics_default(), gold_standard_params = gold_standard_default(), simulation_params = simulation_default(), experiment_params = experiment_snapshot(), verbose = TRUE, download_cache_dir = getOption("dyngen_download_cache_dir"), num_cores = getOption("Ncpus") %||% 1L, id = NULL )
backbone |
The gene module configuration that determines the type of dynamic
process being simulated. See |
num_cells |
The number of cells to sample. |
num_tfs |
The number of transcription factors (TFs) to generate. TFs are the main drivers of the changes that occur in a cell. TFs are regulated only by other TFs. |
num_targets |
The number of target genes to generate. Target genes are regulated by TFs and sometimes by other target genes. |
num_hks |
The number of housekeeping genes (HKs) to generate. HKs are typically highly expressed, and are not regulated by the TFs or targets. |
distance_metric |
The distance metric to be used to calculate the distance
between cells. See |
tf_network_params |
Settings for generating the TF network with
|
feature_network_params |
Settings for generating the feature network with
|
kinetics_params |
Settings for determining the kinetics of the feature network
with |
gold_standard_params |
Settings pertaining simulating the gold standard with
|
simulation_params |
Settings pertaining the simulation itself with |
experiment_params |
Settings related to how the experiment is simulated with
|
verbose |
Whether or not to print messages during the simulation. |
download_cache_dir |
If not |
num_cores |
Parallellisation parameter for various steps in the pipeline. |
id |
An identifier for the model. |
A dyngen model.
dyngen on how to run a complete dyngen simulation
model <- initialise_model( backbone = backbone_bifurcating(), num_cells = 555, verbose = FALSE, download_cache_dir = "~/.cache/dyngen" )
model <- initialise_model( backbone = backbone_bifurcating(), num_cells = 555, verbose = FALSE, download_cache_dir = "~/.cache/dyngen" )
Add small noise to the kinetics of each simulation
kinetics_noise_none() kinetics_noise_simple(mean = 1, sd = 0.005)
kinetics_noise_none() kinetics_noise_simple(mean = 1, sd = 0.005)
mean |
The mean level of noise (should be 1) |
sd |
The sd of the noise (should be a relatively small value) |
A list of noise generators for the kinetics.
A module is a group of genes which, to some extent, shows the same expression behaviour. Several modules are connected together such that one or more genes from one module will regulate the expression of another module. By creating chains of modules, a dynamic behaviour in gene regulation can be created.
list_backbones() backbone_bifurcating() backbone_bifurcating_converging() backbone_bifurcating_cycle() backbone_bifurcating_loop() backbone_branching( num_modifications = rbinom(1, size = 6, 0.25) + 1, min_degree = 3, max_degree = sample(min_degree:5, 1) ) backbone_binary_tree(num_modifications = rbinom(1, size = 6, 0.25) + 1) backbone_consecutive_bifurcating() backbone_trifurcating() backbone_converging() backbone_cycle() backbone_cycle_simple() backbone_linear() backbone_linear_simple() backbone_disconnected( left_backbone = list_backbones() %>% keep(., names(.) != "disconnected") %>% sample(1) %>% first(), right_backbone = list_backbones() %>% keep(., names(.) != "disconnected") %>% sample(1) %>% first(), num_common_modules = 10 )
list_backbones() backbone_bifurcating() backbone_bifurcating_converging() backbone_bifurcating_cycle() backbone_bifurcating_loop() backbone_branching( num_modifications = rbinom(1, size = 6, 0.25) + 1, min_degree = 3, max_degree = sample(min_degree:5, 1) ) backbone_binary_tree(num_modifications = rbinom(1, size = 6, 0.25) + 1) backbone_consecutive_bifurcating() backbone_trifurcating() backbone_converging() backbone_cycle() backbone_cycle_simple() backbone_linear() backbone_linear_simple() backbone_disconnected( left_backbone = list_backbones() %>% keep(., names(.) != "disconnected") %>% sample(1) %>% first(), right_backbone = list_backbones() %>% keep(., names(.) != "disconnected") %>% sample(1) %>% first(), num_common_modules = 10 )
num_modifications |
The number of branch points in the generated backbone. |
min_degree |
The minimum degree of each node in the backbone. |
max_degree |
The maximum degree of each node in the backbone. |
left_backbone |
A backbone (other than a disconnected backbone), see |
right_backbone |
A backbone (other than a disconnected backbone), see |
num_common_modules |
The number of modules which are regulated by either backbone. |
A list of all the available backbone generators.
dyngen on how to run a dyngen simulation
names(list_backbones()) bb <- backbone_bifurcating() bb <- backbone_bifurcating_converging() bb <- backbone_bifurcating_cycle() bb <- backbone_bifurcating_loop() bb <- backbone_binary_tree() bb <- backbone_branching() bb <- backbone_consecutive_bifurcating() bb <- backbone_converging() bb <- backbone_cycle() bb <- backbone_cycle_simple() bb <- backbone_disconnected() bb <- backbone_linear() bb <- backbone_linear_simple() bb <- backbone_trifurcating() model <- initialise_model( backbone = bb )
names(list_backbones()) bb <- backbone_bifurcating() bb <- backbone_bifurcating_converging() bb <- backbone_bifurcating_cycle() bb <- backbone_bifurcating_loop() bb <- backbone_binary_tree() bb <- backbone_branching() bb <- backbone_consecutive_bifurcating() bb <- backbone_converging() bb <- backbone_cycle() bb <- backbone_cycle_simple() bb <- backbone_disconnected() bb <- backbone_linear() bb <- backbone_linear_simple() bb <- backbone_trifurcating() model <- initialise_model( backbone = bb )
Visualise the backbone of a model
plot_backbone_modulenet(model)
plot_backbone_modulenet(model)
model |
A dyngen initial model created with |
A ggplot2 object.
data("example_model") plot_backbone_modulenet(example_model)
data("example_model") plot_backbone_modulenet(example_model)
Visualise the backbone state network of a model
plot_backbone_statenet(model, detailed = FALSE)
plot_backbone_statenet(model, detailed = FALSE)
model |
A dyngen initial model created with |
detailed |
Whether or not to also plot the substates of transitions. |
A ggplot2 object.
data("example_model") plot_backbone_statenet(example_model)
data("example_model") plot_backbone_statenet(example_model)
Plot a dimensionality reduction of the final dataset
plot_experiment_dimred(model, mapping = aes_string("comp_1", "comp_2"))
plot_experiment_dimred(model, mapping = aes_string("comp_1", "comp_2"))
model |
A dyngen intermediary model for which the simulations have been run with |
mapping |
Which components to plot. |
A ggplot2 object.
data("example_model") plot_experiment_dimred(example_model)
data("example_model") plot_experiment_dimred(example_model)
Visualise the feature network of a model
plot_feature_network( model, show_tfs = TRUE, show_targets = TRUE, show_hks = FALSE )
plot_feature_network( model, show_tfs = TRUE, show_targets = TRUE, show_hks = FALSE )
model |
A dyngen intermediary model for which the feature network has been generated with |
show_tfs |
Whether or not to show the transcription factors. |
show_targets |
Whether or not to show the targets. |
show_hks |
Whether or not to show the housekeeping genes. |
A ggplot2 object.
data("example_model") plot_feature_network(example_model)
data("example_model") plot_feature_network(example_model)
Visualise the expression of the gold standard over simulation time
plot_gold_expression( model, what = c("mol_premrna", "mol_mrna", "mol_protein"), label_changing = TRUE )
plot_gold_expression( model, what = c("mol_premrna", "mol_mrna", "mol_protein"), label_changing = TRUE )
model |
A dyngen intermediary model for which the simulations have been run with |
what |
Which molecule types to visualise. |
label_changing |
Whether or not to add a label next to changing molecules. |
A ggplot2 object.
data("example_model") plot_gold_expression(example_model, what = "mol_mrna", label_changing = FALSE)
data("example_model") plot_gold_expression(example_model, what = "mol_mrna", label_changing = FALSE)
Visualise the mapping of the simulations to the gold standard
plot_gold_mappings( model, selected_simulations = NULL, do_facet = TRUE, mapping = aes_string("comp_1", "comp_2") )
plot_gold_mappings( model, selected_simulations = NULL, do_facet = TRUE, mapping = aes_string("comp_1", "comp_2") )
model |
A dyngen intermediary model for which the simulations have been run with |
selected_simulations |
Which simulation indices to visualise. |
do_facet |
Whether or not to facet according to simulation index. |
mapping |
Which components to plot. |
A ggplot2 object.
data("example_model") plot_gold_mappings(example_model)
data("example_model") plot_gold_mappings(example_model)
Visualise the simulations using the dimred
plot_gold_simulations( model, detailed = FALSE, mapping = aes_string("comp_1", "comp_2"), highlight = 0 )
plot_gold_simulations( model, detailed = FALSE, mapping = aes_string("comp_1", "comp_2"), highlight = 0 )
model |
A dyngen intermediary model for which the simulations have been run with |
detailed |
Whether or not to colour according to each separate sub-edge in the gold standard. |
mapping |
Which components to plot. |
highlight |
Which simulation to highlight. If highlight == 0 then the gold simulation will be highlighted. |
A ggplot2 object.
data("example_model") plot_gold_simulations(example_model)
data("example_model") plot_gold_simulations(example_model)
Visualise the expression of the simulations over simulation time
plot_simulation_expression( model, simulation_i = 1:4, what = c("mol_premrna", "mol_mrna", "mol_protein"), facet = c("simulation", "module_group", "module_id", "none"), label_nonzero = FALSE )
plot_simulation_expression( model, simulation_i = 1:4, what = c("mol_premrna", "mol_mrna", "mol_protein"), facet = c("simulation", "module_group", "module_id", "none"), label_nonzero = FALSE )
model |
A dyngen intermediary model for which the simulations have been run with |
simulation_i |
Which simulation to visualise. |
what |
Which molecule types to visualise. |
facet |
What to facet on. |
label_nonzero |
Plot labels for non-zero molecules. |
A ggplot2 object.
data("example_model") plot_simulation_expression(example_model)
data("example_model") plot_simulation_expression(example_model)
Visualise the simulations using the dimred
plot_simulations(model, mapping = aes_string("comp_1", "comp_2"))
plot_simulations(model, mapping = aes_string("comp_1", "comp_2"))
model |
A dyngen intermediary model for which the simulations have been run with |
mapping |
Which components to plot. |
A ggplot2 object.
data("example_model") plot_simulations(example_model)
data("example_model") plot_simulations(example_model)
Plot a summary of all dyngen simulation steps.
plot_summary(model)
plot_summary(model)
model |
A dyngen intermediary model for which the simulations have been run with |
A ggplot2 object.
data("example_model") plot_summary(example_model)
data("example_model") plot_summary(example_model)
Statistics are derived from these datasets in order to simulate single cell experiments.
realcounts
realcounts
An object of class tbl_df
(inherits from tbl
, data.frame
) with 111 rows and 9 columns.
These networks are subsampled in order to generate realistic feature and housekeeping networks.
realnets
realnets
An object of class tbl_df
(inherits from tbl
, data.frame
) with 32 rows and 2 columns.
A bounded version of rnorm
rnorm_bounded(n, mean = 0, sd = 1, min = -Inf, max = Inf)
rnorm_bounded(n, mean = 0, sd = 1, min = -Inf, max = Inf)
n |
number of observations. If |
mean |
vector of means. |
sd |
vector of standard deviations. |
min |
lower limits of the distribution. |
max |
upper limits of the distribution. |
Generates values with rnorm, bounded by [min, max]
rnorm_bounded(10)
rnorm_bounded(10)
Will generate numbers from a random subrange within the given range.
For example, if [min, max]is set to \[0, 10\], this function could decide to generate
n' numbers between 2 and 6.
runif_subrange(n, min, max)
runif_subrange(n, min, max)
n |
Number of observations |
min |
Lower limits of the distribution. |
max |
Upper limits of the distribution. |
Generates values with runif, bounded by a range drawn from sort(runif(2, min, max))
.
runif_subrange(20, 0, 10)
runif_subrange(20, 0, 10)
Determine simulation time from backbone
simtime_from_backbone(backbone, burn = FALSE)
simtime_from_backbone(backbone, burn = FALSE)
backbone |
A valid dyngen backbone object |
burn |
Whether or not to compute the simtime for only the burn phase |
An estimation of the required simulation time
backbone <- backbone_linear() simtime_from_backbone(backbone) model <- initialise_model( backbone = backbone, simulation_params = simulation_default( burn_time = simtime_from_backbone(backbone, burn = TRUE), total_time = simtime_from_backbone(backbone, burn = FALSE) ) )
backbone <- backbone_linear() simtime_from_backbone(backbone) model <- initialise_model( backbone = backbone, simulation_params = simulation_default( burn_time = simtime_from_backbone(backbone, burn = TRUE), total_time = simtime_from_backbone(backbone, burn = FALSE) ) )