{ "metadata": { "name": "rpy2_demo" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Rpy2 demonstration" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "by Dan McGlinn (danmcglinn@gmail.com), 11-07-12" ] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Part 1: Import necessary modules" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import rpy2.robjects as robj\n", "import rpy2.rinterface as ri\n", "import rpy2.robjects.vectors as rv\n", "from rpy2.robjects.packages import importr\n", "\n", "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Part 2: Interact with the R interpreter by creating and calling R objects" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# Interact with R using strings\n", "print robj.r('3 + 4')\n", "\n", "robj.r('x = 3')\n", "print robj.r('x')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[1] 7\n", "\n", "[1] 3\n", "\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "# Interact with R using attributes\n", "print robj.r.x\n", "print \"why are the two subsequent calls to ls producing different results?\"\n", "print robj.r.ls\n", "print robj.r.ls()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[1] 3\n", "\n", "why are the two subsequent calls to ls producing different results?\n", "function (name, pos = -1, envir = as.environment(pos), all.names = FALSE, \n", " pattern) \n", "{\n", " if (!missing(name)) {\n", " nameValue <- try(name, silent = TRUE)\n", " if (identical(class(nameValue), \"try-error\")) {\n", " name <- substitute(name)\n", " if (!is.character(name)) \n", " name <- deparse(name)\n", " warning(sQuote(name), \" converted to character string\")\n", " pos <- name\n", " }\n", " else pos <- nameValue\n", " }\n", " all.names <- .Internal(ls(envir, all.names))\n", " if (!missing(pattern)) {\n", " if ((ll <- length(grep(\"[\", pattern, fixed = TRUE))) && \n", " ll != length(grep(\"]\", pattern, fixed = TRUE))) {\n", " if (pattern == \"[\") {\n", " pattern <- \"\\\\[\"\n", " warning(\"replaced regular expression pattern '[' by '\\\\\\\\['\")\n", " }\n", " else if (length(grep(\"[^\\\\\\\\]\\\\[<-\", pattern))) {\n", " pattern <- sub(\"\\\\[<-\", \"\\\\\\\\\\\\[<-\", pattern)\n", " warning(\"replaced '[<-' by '\\\\\\\\[<-' in regular expression pattern\")\n", " }\n", " }\n", " grep(pattern, all.names, value = TRUE)\n", " }\n", " else all.names\n", "}\n", "\n", "\n", "\n", "[1] \"x\"\n", "\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "# create a python vector and compute R stats on it\n", "x = rv.IntVector(range(0,11))\n", "print \"why are the two subsequent calls to mean producing different results?\"\n", "print robj.r('mean(x)')\n", "print robj.r.mean(x)\n", "\n", "print \"Here are some other stats\"\n", "print \"Sum\"\n", "print robj.r.sum(x)\n", "print \"Variance\"\n", "print robj.r.var(x)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "why are the two subsequent calls to mean producing different results?\n", "[1] 3\n", "\n", "[1] 5\n", "\n", "Here are some other stats\n", "Sum\n", "[1] 55\n", "\n", "Variance\n", "[1] 11\n", "\n" ] } ], "prompt_number": 4 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Part 3: Create and interact with multi-dimensional R objects" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# create R matrices\n", "v = robj.FloatVector(robj.r.rnorm(20))\n", "m = robj.r.matrix(v, ncol = 2)\n", "print(m)\n", "print \"According to R the column sums are\"\n", "print robj.r.apply(m, 2, 'sum')\n", "\n", "# convert matrix into a numpy array\n", "m_np = np.array(m)\n", "print(m_np)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " [,1] [,2]\n", " [1,] -0.6156952 0.1543477\n", " [2,] -2.5999678 -0.3216966\n", " [3,] 0.9813665 -0.4068260\n", " [4,] -1.4399858 0.4291471\n", " [5,] 0.8828780 -0.2680895\n", " [6,] -0.7911376 1.2824338\n", " [7,] -1.1457275 1.0733889\n", " [8,] -0.3995717 0.1192389\n", " [9,] -1.2495037 1.5853373\n", "[10,] 1.6610916 0.3145800\n", "\n", "According to R the column sums are\n", "[1] -4.716253 3.961862\n", "\n", "[[-0.61569516 0.15434772]\n", " [-2.59996779 -0.32169662]\n", " [ 0.9813665 -0.40682599]\n", " [-1.43998583 0.42914708]\n", " [ 0.882878 -0.26808947]\n", " [-0.79113756 1.28243381]\n", " [-1.1457275 1.0733889 ]\n", " [-0.39957175 0.11923892]\n", " [-1.24950375 1.58533734]\n", " [ 1.66109159 0.31458002]]\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "# read in data as an R data.frame\n", "faithful = robj.DataFrame.from_csvfile('faithful.dat', sep=' ')\n", "print type(faithful)\n", "print faithful.names" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\n", "[1] \"eruptions\" \"waiting\" \n", "\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "# subset columns\n", "# extract as a DataFrame\n", "col_one = faithful.rx(1)\n", "print robj.r.head(col_one)\n", "col_one = faithful.rx('eruptions')\n", "print robj.r.head(col_one)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " eruptions\n", "1 3.600\n", "2 1.800\n", "3 3.333\n", "4 2.283\n", "5 4.533\n", "6 2.883\n", "\n", " eruptions\n", "1 3.600\n", "2 1.800\n", "3 3.333\n", "4 2.283\n", "5 4.533\n", "6 2.883\n", "\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "# extract as a Vector\n", "print faithful.rx2(1)[0:11]\n", "print faithful.rx2('eruptions')[0:11]" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " [1] 3.600 1.800 3.333 2.283 4.533 2.883 4.700 3.600 1.950 4.350 1.833\n", "\n", " [1] 3.600 1.800 3.333 2.283 4.533 2.883 4.700 3.600 1.950 4.350 1.833\n", "\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "# subset rows\n", "print faithful.rx(robj.IntVector(range(1,6)), True)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " eruptions waiting\n", "1 3.600 79\n", "2 1.800 54\n", "3 3.333 74\n", "4 2.283 62\n", "5 4.533 85\n", "\n" ] } ], "prompt_number": 9 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Part 4: Work with R and python functions" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# create an R function and call it in the Python interpreter\n", "robj.r('''\n", " calc_area_r = function(radius, verbose=FALSE)\n", " {\n", " if (verbose) {\n", " cat(\"I am calling calc_area_r().\\n\")\n", " }\n", " return( 2 * pi * radius )\n", " }\n", " calc_area_r(3)\n", " ''')\n", "\n", "print robj.r.calc_area_r(5, verbose='TRUE')\n", "\n", "# Make the calc_area_r a callable python function\n", "calc_area_py = robj.r.calc_area_r\n", "print \"Here is the result of calc_area_py()\"\n", "print calc_area_py(5)\n", "print type(calc_area_py)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "I am calling calc_area_r().\n", "[1] 31.41593\n", "\n", "Here is the result of calc_area_py()\n", "[1] 31.41593\n", "\n", "\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "# Rpy supports keyword arguments if there is no pythonic conflict\n", "robj.r.rnorm(10, mean=2, sd=3)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 11, "text": [ "\n", "[0.056675, -2.211890, 1.191051, ..., 0.768670, -1.918076, -2.066839]" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "# this will draw an error\n", "robj.r.rpois(10, lambda=3) " ], "language": "python", "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (, line 2)", "output_type": "pyerr", "traceback": [ "\u001b[1;36m File \u001b[1;32m\"\"\u001b[1;36m, line \u001b[1;32m2\u001b[0m\n\u001b[1;33m robj.r.rpois(10, lambda=3)\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "# creating a keyword argument dictionary solves the problem\n", "robj.r.rpois(10, **{'lambda' : 3})" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 14, "text": [ "\n", "[1.000000, 2.000000, 2.000000, ..., 2.000000, 5.000000, 4.000000]" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "# Make a python function and call it within R\n", "\n", "ri.initr()\n", "\n", "stats = importr('stats')\n", "\n", "def quad_f(x):\n", " x = x[0]\n", " return x ** -2 + 0.5 * x\n", " \n", "# wrap the function f so it can be exposed to R\n", "quad_fr = ri.rternalize(quad_f)\n", " \n", "# define the interval to find the minimum over\n", "interval = rv.IntVector((0, 10))\n", " \n", "# call R's optimize()\n", "res = stats.optimize(quad_fr, interval)\n", "print res" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "$minimum\n", "[1] 1.587388\n", "\n", "$objective\n", "[1] 1.190551\n", "\n", "\n" ] } ], "prompt_number": 15 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Part 5: Creating and exporting R graphics" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# plotting\n", "x = robj.r.runif(10)\n", "y = robj.r.rnorm(10)\n", "\n", "robj.r.plot(x, y, xlab=\"runif rv\", ylab=\"rnorm rv\", col=\"red\")" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 16, "text": [ "rpy2.rinterface.NULL" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "# close that graphic\n", "robj.r('dev.off()')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 17, "text": [ "\n", "[ 1]" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "# export the graphic as a pdf\n", "robj.r.pdf('example_r_plot.pdf')\n", "robj.r.plot(x, y, xlab=\"runif rv\", ylab=\"rnorm rv\", col=\"red\")\n", "robj.r('dev.off()')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 18, "text": [ "\n", "[ 1]" ] } ], "prompt_number": 18 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Part 6: Importing R librairies and pythonizing R" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# import R libraries\n", "vegan = importr('vegan')\n", "\n", "comm_mat = robj.r.matrix(robj.r.rpois(100, 1), 10, 10)\n", "ca = vegan.cca(comm_mat)\n", "print ca\n", "\n", "robj.r.plot(ca)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Call: cca(X = structure(c(1, 1, 1, 2, 0, 1, 0, 0, 2, 1, 0, 1, 0, 1, 2,\n", "0, 1, 1, 1, 0, 1, 1, 0, 4, 1, 1, 0, 1, 0, 1, 2, 2, 1, 1, 0, 0, 1, 2, 0,\n", "1, 2, 1, 0, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 1, 0, 0, 0, 3, 2, 2, 2, 2, 0,\n", "1, 1, 1, 2, 3, 0, 1, 1, 1, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 0, 2,\n", "1, 3, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 2), .Dim = c(10L, 10L)))\n", "\n", " Inertia Rank\n", "Total 0.7991 \n", "Unconstrained 0.7991 9\n", "Inertia is mean squared contingency coefficient \n", "\n", "Eigenvalues for unconstrained axes:\n", " CA1 CA2 CA3 CA4 CA5 CA6 CA7 CA8 \n", "0.258703 0.161061 0.139109 0.121747 0.050726 0.043811 0.017398 0.005068 \n", " CA9 \n", "0.001450 \n", "\n", "\n" ] }, { "output_type": "pyout", "prompt_number": 19, "text": [ "\n", "[Matrix, Matrix]\n", " species: \n", " \n", "[0.214855, -0.793607, 0.211679, ..., -0.656879, 0.178845, 0.737441]\n", " sites: \n", " \n", "[0.985231, 0.310632, -0.389409, ..., 1.004003, -0.708114, 0.545716]" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "# close the graphical device\n", "robj.r('dev.off()')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 20, "text": [ "\n", "[ 1]" ] } ], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "# make R pythonic by making it clear which functions are attributed to which packages\n", "base = importr('base')\n", "stats = importr('stats')\n", "graphics = importr('graphics')\n", "\n", "m = base.matrix(stats.rnorm(100), ncol = 5)\n", "pca = stats.princomp(m)\n", "graphics.plot(pca, main = \"Eigen values\")\n", "stats.biplot(pca, main = \"biplot\")" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 21, "text": [ "rpy2.rinterface.NULL" ] } ], "prompt_number": 21 } ], "metadata": {} } ] }