The \(reticulate\) package provides a comprehensive set of tools for interoperability between Python and R. The package includes facilities for translation between R and Python. \(reticulate\) embeds a Python session within your R session, enabling seamless, high-performance interoperability. If you are an R developer that uses Python for some of your work or a member of data science team that uses both languages, reticulate can dramatically streamline your workflow!
When calling into Python, R data types are automatically converted to their equivalent Python types. When values are returned from Python to R they are converted back to R types. Types are converted as follows:
R | Python | Examples |
---|---|---|
Single-element | vector Scalar | 1, 1L, TRUE, “foo” |
Multi-element vector | List | c(1.0, 2.0, 3.0), c(1L, 2L, 3L) |
List of multiple | types Tuple | list(1L, TRUE, “foo”) |
Named list | Dict | list(a = 1L, b = 2.0), dict(x = x_data) |
Matrix/Array | NumPy ndarray | matrix(c(1,2,3,4), nrow = 2, ncol = 2) |
Data Frame | Pandas DataFrame | data.frame(x = c(1,2,3), y = c(“a”, “b”, “c”)) |
Function | Python function | function(x) x + 1 |
NULL, TRUE, FALSE | None, True, False | NULL, TRUE, FALSE |
(source: https://rstudio.github.io/reticulate/)
# use python3 engine
library(reticulate)
use_python("/usr/local/bin/python3")
library(dplyr)
library(purrr)
iris %>% head %>% knitr::kable(format = "markdown")
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
---|---|---|---|---|
5.1 | 3.5 | 1.4 | 0.2 | setosa |
4.9 | 3.0 | 1.4 | 0.2 | setosa |
4.7 | 3.2 | 1.3 | 0.2 | setosa |
4.6 | 3.1 | 1.5 | 0.2 | setosa |
5.0 | 3.6 | 1.4 | 0.2 | setosa |
5.4 | 3.9 | 1.7 | 0.4 | setosa |
# import r object dat to python
iris = r.iris
# python code
iris2 = iris
x = iris2.iloc[:,:4]
y = iris2.iloc[:,4]
# This chunk is python code !!!
import pandas as pd # data structure
import seaborn as sns # visualization
from sklearn.ensemble import RandomForestClassifier # algorithm
from sklearn.model_selection import cross_val_score # target function for Bayesian optimization
from bayes_opt import BayesianOptimization # Bayesian optimization
# Bayesian optimization
def rf_cv(n_estimators, min_samples_split, max_features, max_depth):
val = cross_val_score(RandomForestClassifier(n_estimators=int(n_estimators),
min_samples_split=int(min_samples_split),
max_features=min(max_features, 0.999),
max_depth=int(max_depth),
random_state=2),
x, y, cv=5).mean()
return val
rf_bo = BayesianOptimization(rf_cv,
{'n_estimators': (10, 250),
'min_samples_split': (2, 25),
'max_features': (0.1, 0.999),
'max_depth': (5, 15) } ) # object for Bayesian optimization
rf_bo.maximize() # optimizing
## | iter | target | max_depth | max_fe... | min_sa... | n_esti... |
## -------------------------------------------------------------------------
## | [0m 1 [0m | [0m 0.96 [0m | [0m 11.78 [0m | [0m 0.9302 [0m | [0m 23.51 [0m | [0m 75.09 [0m |
## | [0m 2 [0m | [0m 0.96 [0m | [0m 6.404 [0m | [0m 0.5346 [0m | [0m 3.992 [0m | [0m 242.2 [0m |
## | [0m 3 [0m | [0m 0.9533 [0m | [0m 8.627 [0m | [0m 0.3307 [0m | [0m 20.8 [0m | [0m 163.3 [0m |
## | [0m 4 [0m | [0m 0.9533 [0m | [0m 12.63 [0m | [0m 0.3901 [0m | [0m 14.77 [0m | [0m 149.8 [0m |
## | [0m 5 [0m | [0m 0.9533 [0m | [0m 12.3 [0m | [0m 0.8749 [0m | [0m 16.16 [0m | [0m 158.7 [0m |
## | [95m 6 [0m | [95m 0.9667 [0m | [95m 12.21 [0m | [95m 0.7069 [0m | [95m 22.37 [0m | [95m 74.29 [0m |
## | [0m 7 [0m | [0m 0.9667 [0m | [0m 11.62 [0m | [0m 0.6678 [0m | [0m 21.36 [0m | [0m 74.47 [0m |
## | [0m 8 [0m | [0m 0.96 [0m | [0m 11.89 [0m | [0m 0.1083 [0m | [0m 21.52 [0m | [0m 72.43 [0m |
## | [0m 9 [0m | [0m 0.9667 [0m | [0m 12.99 [0m | [0m 0.5134 [0m | [0m 20.55 [0m | [0m 74.97 [0m |
## | [0m 10 [0m | [0m 0.96 [0m | [0m 13.3 [0m | [0m 0.9152 [0m | [0m 19.52 [0m | [0m 76.82 [0m |
## | [0m 11 [0m | [0m 0.96 [0m | [0m 14.53 [0m | [0m 0.2492 [0m | [0m 20.64 [0m | [0m 72.7 [0m |
## | [0m 12 [0m | [0m 0.9667 [0m | [0m 14.43 [0m | [0m 0.6484 [0m | [0m 23.27 [0m | [0m 74.33 [0m |
## | [0m 13 [0m | [0m 0.96 [0m | [0m 13.17 [0m | [0m 0.1849 [0m | [0m 21.68 [0m | [0m 75.26 [0m |
## | [0m 14 [0m | [0m 0.9667 [0m | [0m 13.76 [0m | [0m 0.6379 [0m | [0m 22.45 [0m | [0m 73.29 [0m |
## | [0m 15 [0m | [0m 0.9533 [0m | [0m 14.91 [0m | [0m 0.1762 [0m | [0m 24.03 [0m | [0m 72.7 [0m |
## | [0m 16 [0m | [0m 0.96 [0m | [0m 13.48 [0m | [0m 0.9034 [0m | [0m 22.97 [0m | [0m 73.78 [0m |
## | [0m 17 [0m | [0m 0.96 [0m | [0m 10.1 [0m | [0m 0.1503 [0m | [0m 17.13 [0m | [0m 67.45 [0m |
## | [0m 18 [0m | [0m 0.96 [0m | [0m 11.22 [0m | [0m 0.9914 [0m | [0m 22.36 [0m | [0m 74.87 [0m |
## | [0m 19 [0m | [0m 0.9667 [0m | [0m 6.3 [0m | [0m 0.6236 [0m | [0m 7.066 [0m | [0m 49.52 [0m |
## | [0m 20 [0m | [0m 0.9533 [0m | [0m 12.86 [0m | [0m 0.4823 [0m | [0m 7.341 [0m | [0m 103.9 [0m |
## | [0m 21 [0m | [0m 0.96 [0m | [0m 8.98 [0m | [0m 0.8033 [0m | [0m 6.907 [0m | [0m 203.3 [0m |
## | [0m 22 [0m | [0m 0.9467 [0m | [0m 13.34 [0m | [0m 0.3529 [0m | [0m 23.99 [0m | [0m 128.1 [0m |
## | [0m 23 [0m | [0m 0.96 [0m | [0m 12.16 [0m | [0m 0.1344 [0m | [0m 20.59 [0m | [0m 74.49 [0m |
## | [0m 24 [0m | [0m 0.9667 [0m | [0m 12.67 [0m | [0m 0.5545 [0m | [0m 21.62 [0m | [0m 73.52 [0m |
## | [0m 25 [0m | [0m 0.9667 [0m | [0m 12.59 [0m | [0m 0.7377 [0m | [0m 21.6 [0m | [0m 74.47 [0m |
## | [0m 26 [0m | [0m 0.96 [0m | [0m 13.77 [0m | [0m 0.8722 [0m | [0m 21.26 [0m | [0m 73.84 [0m |
## | [0m 27 [0m | [0m 0.96 [0m | [0m 11.96 [0m | [0m 0.2607 [0m | [0m 21.84 [0m | [0m 73.94 [0m |
## | [0m 28 [0m | [0m 0.96 [0m | [0m 14.71 [0m | [0m 0.9825 [0m | [0m 23.55 [0m | [0m 74.87 [0m |
## | [0m 29 [0m | [0m 0.9667 [0m | [0m 12.92 [0m | [0m 0.6757 [0m | [0m 20.42 [0m | [0m 80.11 [0m |
## | [0m 30 [0m | [0m 0.9533 [0m | [0m 9.965 [0m | [0m 0.1326 [0m | [0m 9.472 [0m | [0m 159.8 [0m |
## =========================================================================
res = pd.DataFrame(rf_bo.res)
# import python object dat to r
rf_bo.res <- py$res
# r code
plot(rf_bo.res$target)