{ "metadata": { "name": "Corpus Analysis (new)" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Corpus Analysis\n", "\n", "## Overview\n", "\n", "In this section we'll look at the characteristics of the collected corpus." ] }, { "cell_type": "code", "collapsed": false, "input": [ "import book_classification as bc\n", "import pandas\n", "import shelve\n", "\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "myShelf = shelve.open(\"storage_new.db\")\n", "aBookCollection = myShelf['aBookCollection']\n", "aDataFrame = aBookCollection.as_dataframe()\n", "del myShelf" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "aDataFrame.icol([0, 1]).describe()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "/home/david/.local/lib/python3.3/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.\n", "\n", " warnings.warn(d.msg, DeprecationWarning)\n", "/home/david/.local/lib/python3.3/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.\n", "\n", " warnings.warn(d.msg, DeprecationWarning)\n" ] }, { "html": [ "
\n", " | Title | \n", "Author | \n", "
---|---|---|
count | \n", "597 | \n", "597 | \n", "
unique | \n", "586 | \n", "47 | \n", "
top | \n", "A Christmas Carol | \n", "Nathaniel Hawthorne | \n", "
freq | \n", "5 | \n", "94 | \n", "