microbenchmark::microbenchmark( loop = { s <-0; for (i in1:1000) s <- s + i; s }, vec =sum(1:1000), times =50)
Exercise 2: Vectorize a loop
Difficulty: Intermediate.
Show solution
RInteractive R
x <-1:10000# slowslow <-function() { y <-numeric(length(x)); for (i inseq_along(x)) y[i] <- x[i]^2; y }# fastfast <-function() x^2
Exercise 3: Preallocate vs grow
Difficulty: Intermediate.
Show solution
RInteractive R
grow <-function(n) { x <-numeric(); for (i in1:n) x <-c(x, i); x }prealloc <-function(n) { x <-numeric(n); for (i in1:n) x[i] <- i; x }# prealloc is 100x+ faster for large n
Exercise 4: apply vs vectorized
Difficulty: Intermediate.
Show solution
RInteractive R
m <-matrix(rnorm(1000*100), nrow =100)microbenchmark::microbenchmark( apply =apply(m, 1, sum), vec =rowSums(m), times =50)
Exercise 5: Profile a function
Difficulty: Advanced.
Show solution
RInteractive R
profvis::profvis({ x <-numeric()for (i in1:1000) x <-c(x, i^2)})
Exercise 6: byte-compile
Difficulty: Advanced.
Show solution
RInteractive R
slow <-function(n) { s <-0; for (i in1:n) s <- s + i; s }fast <- compiler::cmpfun(slow)microbenchmark::microbenchmark(slow(1e4), fast(1e4), times =50)
Exercise 7: data.table vs dplyr aggregation
Difficulty: Advanced.
Show solution
RInteractive R
dt <- data.table::as.data.table(mtcars)microbenchmark::microbenchmark( dplyr_way = dplyr::summarise(dplyr::group_by(mtcars, cyl), m =mean(mpg)), dt_way = dt[, .(m =mean(mpg)), by = cyl], times =50)
Exercise 8: Memory usage
Difficulty: Advanced.
Show solution
RInteractive R
m <-matrix(rnorm(1e6), 1000, 1000)object.size(m) |>format(units ="MB")
x <-1:5tracemem(x)x[1] <-99# prints memory copy addressuntracemem(x)
Exercise 13: bench package timing
Difficulty: Intermediate.
Show solution
RInteractive R
bench::mark( v1 =sum(1:1e4), v2 = { s <-0; for (i in1:1e4) s <- s + i; s }, check =TRUE)
Exercise 14: Use which() instead of all-row filter
Difficulty: Intermediate.
Show solution
RInteractive R
v <-runif(1e6)microbenchmark::microbenchmark( v1 = v[v >0.5], v2 = v[which(v >0.5)], times =20)
Exercise 15: Avoid factors when not needed
Difficulty: Advanced.
Show solution
RInteractive R
# When reading strings that won't be used in models, stringsAsFactors = FALSE or use read_csv()# In tibbles, strings stay as character by default. In data.frame pre-R 4.0, they became factors.
Exercise 16: rowsum() for fast group sums
Difficulty: Advanced.
Show solution
RInteractive R
rowsum(mtcars$mpg, mtcars$cyl)
Exercise 17: tabulate for fast counts
Difficulty: Advanced.
Show solution
RInteractive R
x <-sample(1:5, 1e5, replace =TRUE)microbenchmark::microbenchmark( table_fn =table(x), tab =tabulate(x), times =20)