--- title: "Overview of the functionality provided by the dynutils package" author: "Robrecht Cannoodt & Wouter Saelens" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Overview of the functionality provided by the dynutils package} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} editor_options: chunk_output_type: console --- ```{r setup1, echo=FALSE, message=FALSE} knitr::opts_chunk$set(comment = "#>", collapse = TRUE) library(dplyr) library(purrr) library(stringr) library(ggplot2) library(dynutils) set.seed(1) ``` ## Table of contents ```{r echo=FALSE, results='asis'} lines <- readLines("functionality.Rmd") headings <- c(which(grepl("^## ", lines)), length(lines)) subheadings <- which(grepl("^### .*`", lines)) strs <- map_chr(seq_len(length(headings) - 1), function(i) { head <- lines[[headings[[i]]]] subhead <- subheadings %>% keep(~ headings[[i]] < . & . < headings[[i+1]]) %>% lines[.] if (length(subhead) > 0) { fun_texts <- gsub("[^`]*(`[^`]*`).*", "\\1", subhead) fun_links <- subhead %>% tolower() %>% str_replace_all(" ", "-") %>% str_replace_all("[^a-z\\-_]", "") %>% str_replace("^-*", "#") paste0( gsub("## ", "* ", head), ": \n", paste0(" [", fun_texts, "](", fun_links, ")", collapse = ",\n", sep = ""), "\n" ) } else { "" } }) cat(strs, sep = "") ``` ## Manipulation of lists ### `add_class`: Add a class to an object ```{r add_class} l <- list(important_number = 42) %>% add_class("my_list") l ``` ### `extend_with`: Extend list with more data ```{r extend_with} l %>% extend_with( .class_name = "improved_list", url = "https://github.com/dynverse/dynverse" ) ``` ## Calculations ### `calculate_distance`: Compute pairwise distances between two matrices See `?calculate_distance` for the list of currently supported distances. ```{r calculate_distance} x <- matrix(runif(30), ncol = 10) y <- matrix(runif(50), ncol = 10) calculate_distance(x, y, method = "euclidean") ``` For euclidean distances, this is similar to calculating: ```{r dist} as.matrix(dist(rbind(x, y)))[1:3, -1:-3] ``` ### `project_to_segments`: Project a set of points to to set of segments ```{r project_to_segments} x <- matrix(rnorm(50, 0, .5), ncol = 2) segfrom <- matrix(c(0, 1, 0, -1, 1, 0, -1, 0), ncol = 2, byrow = TRUE) segto <- segfrom / 10 fit <- project_to_segments(x, segfrom, segto) ggplot() + geom_segment(aes(x = x[,1], xend = fit$x_proj[,1], y = x[,2], yend = fit$x_proj[,2], colour = "projection"), linetype = "dashed") + geom_point(aes(x[,1], x[,2], colour = "point")) + geom_segment(aes(x = segfrom[,1], xend = segto[,1], y = segfrom[,2], yend = segto[,2], colour = "segment")) + scale_colour_brewer(palette = "Dark2") + scale_x_continuous(name = NULL, breaks = NULL) + scale_y_continuous(name = NULL, breaks = NULL) + labs(colour = "Object type") + theme_classic() str(fit) ``` ### `calculate_mean`: Calculate a (weighted) mean between vectors or a list of vectors; supports the arithmetic, geometric and harmonic mean ```{r calculate_mean} calculate_arithmetic_mean(0.1, 0.5, 0.9) calculate_geometric_mean(0.1, 0.5, 0.9) calculate_harmonic_mean(0.1, 0.5, 0.9) calculate_mean(.1, .5, .9, method = "harmonic") # example with multiple vectors calculate_arithmetic_mean(c(0.1, 0.9), c(0.2, 1)) # example with a list of vectors vectors <- list(c(0.1, 0.2), c(0.4, 0.5)) calculate_geometric_mean(vectors) # example of weighted means calculate_geometric_mean(c(0.1, 10), c(0.9, 20), c(0.5, 2), weights = c(1, 2, 5)) ``` ## Manipulation of matrices ### `expand_matrix`: Add rows and columns to a matrix ```{r expand_matrix} x <- matrix(runif(12), ncol = 4, dimnames = list(c("a", "c", "d"), c("D", "F", "H", "I"))) expand_matrix(x, letters[1:5], LETTERS[1:10], fill = 0) ``` ## Scaling of matrices and vectors ### `scale_uniform`: Rescale data to have a certain center and max range Generate a matrix from a normal distribution with a large standard deviation, centered at c(5, 5). ```{r scale_uniform_gen} x <- matrix(rnorm(200*2, sd = 10, mean = 5), ncol = 2) ``` Center the dataset at c(0, 0) with a minimum of c(-.5, -.5) and a maximum of c(.5, .5). ```{r scale_uniform} x_scaled <- scale_uniform(x, center = 0, max_range = 1) ``` Check the ranges and verify that the scaling is correct. ```{r scale_uniform_verify} ranges <- apply(x_scaled, 2, range) ranges # should all lie between -.5 and .5 colMeans(ranges) # should all be equal to 0 apply(ranges, 2, diff) # max should be 1 ``` ### `scale_minmax`: Rescale data to a [0, 1] range ```{r scale_minmax} x_scaled2 <- scale_minmax(x) ``` Check the ranges and verify that the scaling is correct. ```{r scale_minmax_verify} apply(x_scaled2, 2, range) # each column should be [0, 1] ``` ### `scale_quantile`: Cut off outer quantiles and rescale to a [0, 1] range ```{r scale_quantile} x_scaled3 <- scale_quantile(x, .05) ``` Check the ranges and verify that the scaling is correct. ```{r scale_quantile_verify} apply(x_scaled3, 2, range) # each column should be [0, 1] qplot(x_scaled2[,1], x_scaled3[,1]) + theme_bw() ``` ## Manipulation of functions ### `inherit_default_params`: Have one function inherit the default parameters from other functions ```{r inherit_default_params} fun1 <- function(a = 10, b = 7) runif(a, -b, b) fun2 <- function(c = 9) 2^c fun3 <- inherit_default_params( super = list(fun1, fun2), fun = function(a, b, c) { list(x = fun1(a, b), y = fun2(c)) } ) fun3 ``` ## Manipulation of packages ### `check_packages`: Easily checking whether certain packages are installed ```{r check_packages} check_packages("SCORPIUS", "dynutils", "wubbalubbadubdub") check_packages(c("princurve", "mlr", "tidyverse")) ``` ### `install_packages`: Install packages taking into account the remotes of another This is useful for installing suggested packages with GitHub remotes. ```{r install_packages, eval = FALSE} install_packages("SCORPIUS", package = "dynmethods", prompt = TRUE) ``` ``` > install_packages("SCORPIUS", package = "dynmethods", prompt = TRUE) Following packages have to be installed: SCORPIUS Do you want to install these packages? (y/yes/1 or n/no/2): 1 Installing SCORPIUS ... ** testing if installed package can be loaded * DONE (SCORPIUS) Installed SCORPIUS [1] "SCORPIUS" ``` ## Manipulation of vectors ### `random_time_string`: Generates a string very likely to be unique ```{r random_time_string} random_time_string("test") random_time_string("test") random_time_string("test") ``` ## Tibble helpers ### `list_as_tibble`: Convert a list of lists to a tibble whilst retaining class information ```{r list_as_tibble} li <- list( list(a = 1, b = log10, c = "parrot") %>% add_class("myobject"), list(a = 2, b = sqrt, c = "quest") %>% add_class("yourobject") ) tib <- list_as_tibble(li) tib ``` ### `tibble_as_list`: Convert a tibble back to a list of lists whilst retaining class information ```{r tibble_as_list} li <- tibble_as_list(tib) li ``` ### `extract_row_to_list`: Extracts one row from a tibble and converts it to a list ```{r extract_row_to_list} extract_row_to_list(tib, 2) ``` ### `mapdf`: Apply a function to each row of a data frame The `mapdf` functions apply a function on each row of a data frame. They are based heavily on purrr's `map` functions. ```{r mapdf_function} tib %>% mapdf(class) ``` Or use an anonymous function. ```{r mapdf_anonfun} tib %>% mapdf(function(row) paste0(row$b(row$a), "_", row$c)) ``` Or even a formula. ```{r mapdf_formula} tib %>% mapdf(~ .$b) ``` There are many more variations available. See `?mapdf` for more info. ```{r mapdf_more} tib %>% mapdf_lgl(~ .$a > 1) tib %>% mapdf_chr(~ paste0("~", .$c, "~")) tib %>% mapdf_int(~ nchar(.$c)) tib %>% mapdf_dbl(~ .$a * 1.234) ``` ## File helpers ### `safe_tempdir`: Create an empty temporary directory and return its path ```{R safe_tempdir} safe_tempdir("samson") ``` ## Assertion helpers ### `%all_in%`: Check whether a vector are all elements of another vector ```{r all_in, error=TRUE} library(assertthat) assert_that(c(1, 2) %all_in% c(0, 1, 2, 3, 4)) assert_that("a" %all_in% letters) assert_that("A" %all_in% letters) assert_that(1:10 %all_in% letters) ``` ### `%has_names%`: Check whether an object has certain names ```{r hasnames, error=TRUE} assert_that(li %has_names% "a") assert_that(li %has_names% "c") assert_that(li %has_names% letters) ``` ### `is_single_numeric`: Check whether a value is a single numeric ```{r is_single_numeric, error=TRUE} assert_that(is_single_numeric(1)) assert_that(is_single_numeric(Inf)) assert_that(is_single_numeric(1.6)) assert_that(is_single_numeric(NA)) assert_that(is_single_numeric(1:6)) assert_that(is_single_numeric("pie")) ``` ### `is_bounded`: Check whether a value within a certain interval ```{r is_bounded, error=TRUE} assert_that(is_bounded(10)) assert_that(is_bounded(10:30)) assert_that(is_bounded(Inf)) assert_that(is_bounded(10, lower_bound = 20)) assert_that(is_bounded( 10, lower_bound = 20, lower_closed = TRUE, upper_bound = 30, upper_closed = FALSE )) ```