Use of this document

This is a study note for functional programming using \(plyr\), \(purrr\), \(furrr\) package

Prerequisites

library(plyr)
library(purrr)
library(furrr) 
library(dplyr) # data wraggling
# Define my function
mysum <- function(x = NULL, y = NULL, z = NULL, k = NULL){
  return(sum(x,y,z,k))
}
# Define the dataframe
createDF <- function(seed=0){
  set.seed(seed)
  df <- data.frame(x = runif(n=5, min=-10, max=10) %>% round(),
                 y = runif(n=5, min=-10, max=10) %>% round(),
                 z = runif(n=5, min=-10, max=10) %>% round(),
                 k = rep(100,5))
  return(df)
}
df <- createDF(); df
##    x  y  z   k
## 1  8 -6 -9 100
## 2 -5  8 -6 100
## 3 -3  9 -6 100
## 4  1  3  4 100
## 5  8  3 -2 100

1. characteristics of functional programming

\(purrr\) enhances R’s functional programming (FP) toolkit by providing a complete and consistent set of tools for working with functions and vectors. If you’ve never heard of FP before, the best place to start is the family of map() functions which allow you to replace many for loops with code that is both more succinct and easier to read. The best place to learn about the map() functions is the iteration chapter in R for data science. (source: purrr Overview)

1.1 Test Module & Execution Module

# Test Module
nested_data <- data %>% tidyr::nest(index)
data <- nested_data$data[[1]]; data
myfun <- function(data){
  
}

# Execution Module
data %>% tidyr::nest(index) %>% 
  mutate(result = data %>% purrr::map(myfun))

2. Generic Method

2.1 a listized array with constant inputs

# listization
mysum_plyr <- plyr::llply(df$x, function(x) {
  mysum(x = x,
        y = 100,
        z = 100,
        k = NULL)
})

mysum_purrr <- purrr::map(df$x, function(x) {
  mysum(x = x,
        y = 100,
        z = 100,
        k = NULL)
})

mysum_furrr <- furrr::future_map(df$x, function(x) {
  mysum(x = x,
        y = 100,
        z = 100,
        k = NULL)
})
tibble(mysum_plyr = mysum_plyr %>% unlist,
       mysum_purrr = mysum_purrr %>% unlist,
       mysum_furrr = mysum_furrr %>% unlist)
## # A tibble: 5 x 3
##   mysum_plyr mysum_purrr mysum_furrr
##        <dbl>       <dbl>       <dbl>
## 1        208         208         208
## 2        195         195         195
## 3        197         197         197
## 4        201         201         201
## 5        208         208         208

2.2 Two listized array with constant inputs

mysum_purrr <- purrr::map2(df$x, df$y, function(x,y) {
  mysum(x = x,
        y = y,
        z = 100,
        k = NULL)
})

mysum_furrr <- furrr::future_map2(df$x, df$y, function(x,y) {
  mysum(x = x,
        y = y,
        z = 100,
        k = NULL)
})
tibble(mysum_purrr = mysum_purrr %>% unlist,
       mysum_furrr = mysum_furrr %>% unlist)
## # A tibble: 5 x 2
##   mysum_purrr mysum_furrr
##         <dbl>       <dbl>
## 1         102         102
## 2         103         103
## 3         106         106
## 4         104         104
## 5         111         111

2.3 a listized dataframe with constant inputs

# listization
mysum_plyr <- plyr::mlply(df, mysum)

mysum_purrr <- purrr::pmap(df, mysum)

mysum_furrr <- furrr::future_pmap(df, mysum)

tibble(mysum_plyr = mysum_plyr %>% unlist,
       mysum_purrr = mysum_purrr %>% unlist,
       mysum_furrr = mysum_furrr %>% unlist)
## # A tibble: 5 x 3
##   mysum_plyr mysum_purrr mysum_furrr
##        <dbl>       <dbl>       <dbl>
## 1         93          93          93
## 2         97          97          97
## 3        100         100         100
## 4        108         108         108
## 5        109         109         109
df <- data_frame(x = runif(n=20, min=-10, max=10) %>% round(),
                 y = runif(n=20, min=-10, max=10) %>% round(),
                 z = runif(n=20, min=-10, max=10) %>% round(),
                 k = rep(100,20)) %>% 
  tibble::rowid_to_column("index")

3. Advance Method

3.1 Split-Apply-Combine Method

  1. nest creates list of data frames (split)
  2. map applies function to list object (apply)
  3. unnest moves list results back into a data frame (combine)
library(gapminder)
myfit <- function (dframe) {
  lm(lifeExp ~ year, data = dframe)
}

countryList <- gapminder %>% mutate(year = year-1950) %>% 
  tidyr::nest(-country, -continent) 

countryList <- countryList %>% 
  mutate(model = purrr::map(data, myfit)) %>% 
  mutate(intercept = model %>% purrr::map_dbl(.f = function(m) coef(m)[1]),
         slope = model %>% purrr::map_dbl(.f = function(m) coef(m)[2]))

3.2 Nested Functional Programming Method