# About rpy2
- pip install rpy2
- import from the **rpy2** tool R's high-level inference *robjects*

In [22]:
import rpy2.robjects as robjects

---

## Importing Packages
- Import the R package in the embedded R
- Transform all R objects in that package as Python objects

In [7]:
# import R packages with rpy2's funcion : rpy2.robjects.packages.importr()
from rpy2.robjects.packages import importr

In [4]:
base = importr('base')

In [5]:
utils = importr('utils')

---

## Installing Packages
- Usually performed by fetching R packages from a package repository and installing them locally
- Provided by R libraries, and when in Python we can simply use them using rpy2

In [10]:
'''
Install packages from the first mirror (ind=1) known to R is done with
'''
import rpy2.robjects.packages as rpackages
# import R's utility package
utils = rpackages.importr('utils')

# select a mirror for R packages
utils.chooseCRANmirror(ind=1)

<rpy2.rinterface_lib.sexp.NULLType object at 0x10e629bc0> [RTYPES.NILSXP]

- Can now install packages with R's own function install.package

In [11]:
# R package names
packnames = ('ggplot2', 'hexbin')

# R vector of strings
from rpy2.robjects.vectors import StrVector

# Selectively install what needs to be install.
names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(names_to_install) > 0:
    utils.install_packages(StrVector(names_to_install))

R[write to console]: trying URL 'https://cloud.r-project.org/bin/macosx/contrib/4.2/hexbin_1.28.2.tgz'

R[write to console]: Content type 'application/x-gzip'
R[write to console]:  length 1471267 bytes (1.4 MB)

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[writ


The downloaded binary packages are in
	/var/folders/fz/zn5r8vq12nv5p23dtlr15sk40000gn/T//RtmpXu0Ns8/downloaded_packages


In [16]:
pi = robjects.r('pi')
pi

0
3.141593


---

## Getting help 
- Help on a topic within a given package, or currently loaded packages¶

In [19]:
help_doc = utils.help("utils")
help_doc[0]

'/Library/Frameworks/R.framework/Versions/4.2/Resources/library/utils/help/utils-package'

---

## R in a IPython Kernel
- [Documentation by rpy2](https://rpy2.github.io/doc/latest/html/interactive.html#overview)

In [24]:
# Load the rpy2.ipython extension to use %%R and magic commands
# %load_ext rpy2.ipython

# reload with
%reload_ext rpy2.ipython

In [31]:
%R X=c(1, 4, 5, 7); print(mean(X)); print(sd(X))

[1] 4.25
[1] 2.5


array([2.5])

In [33]:
import numpy as np

In [35]:
X = np.array([4.5, 6.3, 7.9])
X

array([4.5, 6.3, 7.9])

In [36]:
X.mean()

6.233333333333334

In [37]:
np.mean(X)

6.233333333333334

- Push variable from py to rpy2 with %Rpush

In [38]:
%Rpush X
X

array([4.5, 6.3, 7.9])

In [39]:
%R mean(X)

array([6.23333333])

In [41]:
%%R
mean(X)

[1] 6.233333


In [43]:
Y = np.array([1, 2, 3])
Y.mean(), np.mean(Y)

(2.0, 2.0)

In [44]:
%R -i Y

In [45]:
Y

array([1, 2, 3])

In [47]:
%%R
mean(Y)

[1] 2


- Pull variable from py to rpy2 with %Rpull

In [70]:
%R X = c(4, 5, 6)
X

array([4.5, 6.3, 7.9])

In [72]:
%Rpull X
X

<rpy2.rinterface.FloatSexpVector object at 0x115df0c00> [RTYPES.REALSXP]

- R directly in Py Kernel

In [84]:
%%R
X = c(1, 4, 5, 7)
Y = c(2, 4, 3, 9)
summary(lm(Y~X))

ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
weight <- c(ctl, trt)
summary(lm(weight~group))


Call:
lm(formula = weight ~ group)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.0710 -0.4938  0.0685  0.2462  1.3690 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   5.0320     0.2202  22.850 9.55e-15 ***
groupTrt     -0.3710     0.3114  -1.191    0.249    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.6964 on 18 degrees of freedom
Multiple R-squared:  0.07308,	Adjusted R-squared:  0.02158 
F-statistic: 1.419 on 1 and 18 DF,  p-value: 0.249



---

## Linear Models

In [89]:
%%R
ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
weight <- c(ctl, trt)
# summary(lm(weight~group))

anova(lm.D9 <- lm(weight~group))

Analysis of Variance Table

Response: weight
          Df Sum Sq Mean Sq F value Pr(>F)
group      1 0.6882 0.68820  1.4191  0.249
Residuals 18 8.7292 0.48496               


In [92]:
%%R
summary(lm.D90 <- lm(weight ~ group - 1))# omitting intercept


Call:
lm(formula = weight ~ group - 1)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.0710 -0.4938  0.0685  0.2462  1.3690 

Coefficients:
         Estimate Std. Error t value Pr(>|t|)    
groupCtl   5.0320     0.2202   22.85 9.55e-15 ***
groupTrt   4.6610     0.2202   21.16 3.62e-14 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.6964 on 18 degrees of freedom
Multiple R-squared:  0.9818,	Adjusted R-squared:  0.9798 
F-statistic: 485.1 on 2 and 18 DF,  p-value: < 2.2e-16



In [96]:
from rpy2.robjects import FloatVector
from rpy2.robjects.packages import importr
stats = importr('stats')
base = importr('base')

ctl = FloatVector([4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14])
trt = FloatVector([4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69])
group = base.gl(2, 10, 20, labels = ['Ctl','Trt'])
weight = ctl + trt

robjects.globalenv['weight'] = weight
robjects.globalenv['group'] = group
lm_D9 = stats.lm('weight ~ group')
print(stats.anova(lm_D9))

# omitting the intercept
lm_D90 = stats.lm('weight ~ group - 1')
print(base.summary(lm_D90))

Analysis of Variance Table

Response: weight
          Df Sum Sq Mean Sq F value Pr(>F)
group      1 0.6882 0.68820  1.4191  0.249
Residuals 18 8.7292 0.48496               


Call:
(function (formula, data, subset, weights, na.action, method = "qr", 
    model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, 
    contrasts = NULL, offset, ...) 
{
    ret.x <- x
    ret.y <- y
    cl <- match.call()
    mf <- match.call(expand.dots = FALSE)
    m <- match(c("formula", "data", "subset", "weights", "na.action", 
        "offset"), names(mf), 0L)
    mf <- mf[c(1L, m)]
    mf$drop.unused.levels <- TRUE
    mf[[1L]] <- quote(stats::model.frame)
    mf <- eval(mf, parent.frame())
    if (method == "model.frame") 
        return(mf)
    else if (method != "qr") 
            method), domain = NA)
    mt <- attr(mf, "terms")
    y <- model.response(mf, "numeric")
    w <- as.vector(model.weights(mf))
    if (!is.null(w) && !is.numeric(w)) 
        stop("'weights' must be a numeric 

<rpy2.rinterface_lib.sexp.StrSexpVector object at 0x115ca6f80> [RTYPES.STRSXP]
