{ "metadata": { "name": "multi_way_anova" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Multi-way ANOVA\n", "===============\n", "\n", "Some notes on who to run a multi-way ANOVA in Python.\n", "Primary sources include [Ben Bolker's book](http://www.math.mcmaster.ca/~bolker/emdbook/chap9A.pdf)\n", "and the [Statsmodels docs](http://statsmodels.sourceforge.net/devel/examples/generated/example_interactions.html#two-way-anova)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "from urllib2 import urlopen\n", "\n", "import numpy as np\n", "\n", "import statsmodels.api as sm\n", "\n", "import pandas\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "from statsmodels.formula.api import ols\n", "\n", "from statsmodels.graphics.api import interaction_plot, abline_plot\n", "\n", "from statsmodels.stats.anova import anova_lm\n", "\n", "url = 'http://stats191.stanford.edu/data/kidney.table'\n", "kidney_table = pandas.read_table(url, delimiter=\" *\")\n", "\n", "Days = kidney_table['Days']\n", "Duration = kidney_table" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', kidney_table).fit()\n", "anova_lm(kidney_lm)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dfsum_sqmean_sqFPR(>F)
C(Duration) 1 2.339693 2.339693 4.358293 0.041562
C(Weight) 2 16.971291 8.485645 15.806745 0.000004
C(Duration):C(Weight) 2 0.635658 0.317829 0.592040 0.556748
Residual 54 28.989198 0.536837 NaN NaN
\n", "
" ], "output_type": "pyout", "prompt_number": 2, "text": [ " df sum_sq mean_sq F PR(>F)\n", "C(Duration) 1 2.339693 2.339693 4.358293 0.041562\n", "C(Weight) 2 16.971291 8.485645 15.806745 0.000004\n", "C(Duration):C(Weight) 2 0.635658 0.317829 0.592040 0.556748\n", "Residual 54 28.989198 0.536837 NaN NaN" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "#This is the style of the examples in the docs, but data=rehab_table doesn't work\n", "#just rehab_table as the second argument does\n", "url = 'http://stats191.stanford.edu/data/rehab.csv'\n", "rehab_table = pandas.read_table(url, delimiter=\",\")\n", "rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "from_formula() takes at least 3 arguments (2 given)", "output_type": "pyerr", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0murl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'http://stats191.stanford.edu/data/rehab.csv'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mrehab_table\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_table\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdelimiter\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\",\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[0mrehab_lm\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mols\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Time ~ C(Fitness)'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mrehab_table\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mTypeError\u001b[0m: from_formula() takes at least 3 arguments (2 given)" ] } ], "prompt_number": 3 } ], "metadata": {} } ] }