# rpy2

https://rpy2.github.io/doc/latest/html/introduction.html

rpy2 provides 2 levels of interface with R: - low-level (rpy2.rinterface, and rpy2:rinterface_lib) - high-level (rpy2.robjects)

In [3]:
import pandas as pd
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri# Defining the R script and loading the instance in Python
import rpy2
print(rpy2.__version__)

3.4.4


In [4]:
import rpy2.situation
for row in rpy2.situation.iter_info():
    print(row)

rpy2 version:
3.4.4
Python version:
3.8.5 (default, Sep  3 2020, 21:29:08) [MSC v.1916 64 bit (AMD64)]
Looking for R's HOME:
    Environment variable R_HOME: None
    InstallPath in the registry: C:\Program Files\R\R-4.0.5
    Environment variable R_USER: None
    Environment variable R_LIBS_USER: None
R version:
    In the PATH: 
    Loading R library from rpy2: OK
Additional directories to load R packages from:
None
C extension compilation:


In [5]:
### importing the R package in the embedded R, and is exposing all R objects in that package as Python objects.

from rpy2.robjects.packages import importr
# import R's "base" package
base = importr('base')

# import R's "utils" package
utils = importr('utils')

Install additional R packages. The code can be part of Python code you distribute if you are relying on CRAN packages not distributed with R by default.

In [19]:
### Install a R package

# import rpy2's package module
import rpy2.robjects.packages as rpackages

# import R's utility package
utils = rpackages.importr('utils')

# select a mirror for R packages
utils.chooseCRANmirror(ind=1) 

# R package names
packnames = ('ggplot2', 'ECoL')

# R vector of strings
from rpy2.robjects.vectors import StrVector

# Selectively install what needs to be install.
# We are fancy, just because we can.
names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(names_to_install) > 0:
    utils.install_packages(StrVector(names_to_install))

Use robjects.r to interact with R

In [3]:
pi = robjects.r['pi']
pi

0
3.141593


In [4]:
pi[0]

3.141592653589793

In [13]:
robjects.r('''
        # create a function `f`
        f <- function(r, verbose=FALSE) {
            if (verbose) {
                cat("I am calling f().\n")
            }
            2 * pi * r
        }
        # call the function `f` with argument value 3
        f(3)
        ''')

0
18.849556


In [17]:
r_f = robjects.r['f']
r_f(4)

0
25.132741


Try ECoL

In R Studio:

    > library("ECoL")
    > complexity(iris[,1:3], iris[,5])
     overlapping.F1.mean    overlapping.F1.sd overlapping.F1v.mean   overlapping.F1v.sd  overlapping.F2.mean 
             0.346379901          0.271980407          0.038708394          0.052046615          0.023931624 
       overlapping.F2.sd  overlapping.F3.mean    overlapping.F3.sd  overlapping.F4.mean    overlapping.F4.sd 
             0.041450789          0.123333333          0.213619600          0.110000000          0.190525589 
         neighborhood.N1 neighborhood.N2.mean   neighborhood.N2.sd neighborhood.N3.mean   neighborhood.N3.sd 
             0.173333333          0.238447518          0.192953453          0.086666667          0.282288506 
    neighborhood.N4.mean   neighborhood.N4.sd neighborhood.T1.mean   neighborhood.T1.sd     neighborhood.LSC 
             0.046666667          0.211630508          0.031250000          0.061064335          0.856622222 
       linearity.L1.mean      linearity.L1.sd    linearity.L2.mean      linearity.L2.sd    linearity.L3.mean 
             0.007050287          0.012211455          0.020000000          0.034641016          0.006666667 
         linearity.L3.sd    dimensionality.T2    dimensionality.T3    dimensionality.T4           balance.C1 
             0.011547005          0.020000000          0.013333333          0.666666667          1.000000000 
              balance.C2      network.Density      network.ClsCoef    network.Hubs.mean      network.Hubs.sd 
             0.000000000          0.848769575          0.271609303          0.874332820          0.240090885 

In [9]:
metrics = robjects.r('''
        library("ECoL", lib.loc = "ECoL/R/")
        complexity(iris[,1:4], iris[,5])
        ''')

In [10]:
idx = 0
for v in metrics:
    print(metrics.names[idx], ": " ,v)
    idx += 1

overlapping.F1.mean :  0.2775641932566494
overlapping.F1.sd :  0.2612622587707821
overlapping.F1v.mean :  0.026799629786085757
overlapping.F1v.sd :  0.0337704173653305
overlapping.F2.mean :  0.006381766381766379
overlapping.F2.sd :  0.011053543615254367
overlapping.F3.mean :  0.12333333333333334
overlapping.F3.sd :  0.21361959960016152
overlapping.F4.mean :  0.043333333333333335
overlapping.F4.sd :  0.07505553499465135
neighborhood.N1 :  0.10666666666666667
neighborhood.N2.mean :  0.19814444234467493
neighborhood.N2.sd :  0.1466933405777466
neighborhood.N3.mean :  0.06
neighborhood.N3.sd :  0.23828244477915883
neighborhood.N4.mean :  0.013333333333333334
neighborhood.N4.sd :  0.11508191810497581
neighborhood.T1.mean :  0.05555555555555556
neighborhood.T1.sd :  0.09094996110409614
neighborhood.LSC :  0.8164
linearity.L1.mean :  0.0043356927104076055
linearity.L1.sd :  0.007509640060431987
linearity.L2.mean :  0.013333333333333345
linearity.L2.sd :  0.023094010767585053
linearity.L3.mean

Run this R code:

    ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
    trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
    group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
    weight <- c(ctl, trt)

    anova(lm.D9 <- lm(weight ~ group))

    summary(lm.D90 <- lm(weight ~ group - 1))# omitting intercept

In [2]:
import rpy2.robjects as robjects
from rpy2.robjects import FloatVector
from rpy2.robjects.packages import importr
stats = importr('stats')
base = importr('base')

ctl = FloatVector([4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14])
trt = FloatVector([4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69])
group = base.gl(2, 10, 20, labels = ['Ctl','Trt'])
weight = ctl + trt

robjects.globalenv['weight'] = weight
robjects.globalenv['group'] = group
lm_D9 = stats.lm('weight ~ group')
print(stats.anova(lm_D9))

# omitting the intercept
lm_D90 = stats.lm('weight ~ group - 1')
print(base.summary(lm_D90))

Analysis of Variance Table



Response: weight

          Df Sum Sq Mean Sq F value Pr(>F)

group      1 0.6882 0.68821  1.4191  0.249

Residuals 18 8.7292 0.48496               



Call:

(function (formula, data, subset, weights, na.action, method = "qr", 

    model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, 

    contrasts = NULL, offset, ...) 

{

    ret.x <- x

    ret.y <- y

    cl <- match.call()

    mf <- match.call(expand.dots = FALSE)

    m <- match(c("formula", "data", "subset", "weights", "na.action", 

        "offset"), names(mf), 0L)

    mf <- mf[c(1L, m)]

    mf$drop.unused.levels <- TRUE

    mf[[1L]] <- quote(stats::model.frame)

    mf <- eval(mf, parent.frame())

    if (method == "model.frame") 

        return(mf)

    else if (method != "qr") 


            method), domain = NA)

    mt <- attr(mf, "terms")

    y <- model.response(mf, "numeric")

    w <- as.vector(model.weights(mf))

    if (!is.null(w) && !is.numeric(w)) 

        stop

In [5]:
lm_D9.rclass

<rpy2.rinterface.StrSexpVector - Python:0x000002641FF88090 / R:0x00000264219BF930>

In [7]:
print(lm_D9.names)

 [1] "coefficients"  "residuals"     "effects"       "rank"         

 [5] "fitted.values" "assign"        "qr"            "df.residual"  

 [9] "contrasts"     "xlevels"       "call"          "terms"        

[13] "model"        



In [13]:
from rpy2.robjects import NA_Real
from rpy2.rlike.container import TaggedList
from rpy2.robjects.packages import importr

base = importr('base')

# create a numerical matrix of size 100x10 filled with NAs
m = base.matrix(NA_Real, nrow=100, ncol=10)

# fill the matrix
for row_i in range(1, 100+1):
    for col_i in range(1, 10+1):
        m.rx[TaggedList((row_i, ), (col_i, ))] = row_i + col_i * 100

TypeError: 'int' object is not iterable

Setup notebook

In [21]:
from functools import partial
from rpy2.ipython import html
html.html_rdataframe=partial(html.html_rdataframe, table_class="docutils")

In [23]:
from rpy2.robjects.packages import importr
utils = importr('utils')

dataf = utils.read_csv('https://raw.githubusercontent.com/jakevdp/PythonDataScienceHandbook/'
                       'master/notebooks/data/california_cities.csv')

In [24]:
import rpy2.ipython.html
rpy2.ipython.html.init_printing()
dataf

Unnamed: 0,Unnamed: 1,X,city,latd,longd,...,area_water_km2,area_water_percent
0,1,0,Adelanto,34.57611111111112,-117.43277777777779,...,0.046,0.03
1,2,1,AgouraHills,34.15333333333333,-118.76166666666667,...,0.076,0.37
2,3,2,Alameda,37.75611111111111,-122.27444444444444,...,31.983,53.79
3,4,3,Albany,37.886944444444445,-122.29777777777778,...,9.524,67.28
4,5,4,Alhambra,34.081944444444446,-118.135,...,0.003,0.01
5,6,5,AlisoViejo,33.575,-117.72555555555556,...,0.0,0.0
6,7,6,Alturas,41.48722222222222,-120.5425,...,0.036000000000000004,0.57
7,8,7,AmadorCity,38.419444444444444,-120.82416666666666,...,0.0,0.0
...,...,...,...,...,...,...,...,...
480,481,480,Yucaipa,34.030277777777776,-117.04861111111111,...,0.013000000000000001,0.02


In [33]:
stats = importr('stats')
base = importr('base')
clf = stats.lm('elevation_m ~ latd + longd', data=dataf)
clf[0]

0,1,2
16206.30871116903,172.50868495534922,185.73263719825687


In [38]:
import rpy2.robjects.lib.ggplot2 as gp
from rpy2.ipython.ggplot import image_png
from rpy2.robjects.vectors import IntVector
from rpy2.robjects import rl

p = (gp.ggplot(dataf) +
     gp.aes(x=r1('longd'),
            y=r1('latd'),
            color=r1('population_total'),
            size=r1('area_total_km2')) +
     gp.geom_point(alpha=0.5) +
     # Axis definitions.
     gp.scale_x_continuous('Longitude') +
     gp.scale_y_continuous('Latitude') +
     # Custom size range.
     gp.scale_size(range=IntVector([1, 18])) +
     # Transform for pop -> color mapping
     gp.scale_color_continuous(trans='log10') +
     # Title.
     gp.ggtitle('California Cities: Area and Population') +
     # Plot theme and text size.
     gp.theme_light(base_size=16))
image_png(p)

ImportError: cannot import name 'rl' from 'rpy2.robjects' (C:\ProgramData\Anaconda3\lib\site-packages\rpy2\robjects\__init__.py)

Run script

In [None]:
r = robjects.r

In [None]:
r['source']('preprocess.R')# Loading the function we have defined in R.
filter_country_function_r = robjects.globalenv['filter_country']# Reading and processing data
df = pd.read_csv("Country-Sales.csv")#converting it into r object for passing into r function
df_r = pandas2ri.ri2py(df)
#Invoking the R function and getting the result
df_result_r = filter_country_function_r(df_r, 'USA')
#Converting it back to a pandas dataframe.
df_result = pandas2ri.py2ri(df_result_r)