{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Strings can do operations on themselves #\n", "`.lower()`, `.upper()`,`.capitalize()`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "\"funKY tOwn\".capitalize()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "\"funky tOwn\".lower()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "\"fUNKY tOWN\".swapcase()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "How you call this:\n", "> `.split([sep [,maxsplit]])`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "\"funKY tOwn\".split()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "\"funKY tOwn\".capitalize().split()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "[x.capitalize() for x in \"funKY tOwn\".split()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "\"I want to take you to, funKY tOwn\".split(\"u\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "\"I want to take you to, funKY tOwn\".split(\"you\")" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## `.strip()`, `.join()`, `.replace()` ##" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "csv_string = 'Dog,Cat,Spam,Defenestrate,1, 3.1415 \\n\\t'\n", "csv_string.strip()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "clean_list = [x.strip() for x in csv_string.split(\",\")]\n", "print(clean_list)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "`.join()` allows you to glue a list of strings together with a certain string" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "print(\",\".join(clean_list))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "print(\"\\t\".join(clean_list))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "`.replace()` strings in strings" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "csv_string = 'Dog,Cat,Spam,Defenestrate,1, 3.1415 \\n\\t'\n", "alt_csv = csv_string.strip().replace(' ','')\n", "print(alt_csv)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "print(csv_string.strip().replace(' ','').replace(',','\\t'))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## `.find()` ##\n", "incredibly useful searching,\n", "returning the index of the search" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "s = 'My Funny Valentine'\n", "s.find(\"y\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "s.find(\"y\",2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "s[s.find(\"Funny\"):]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "s.find(\"z\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "ss = [s,\"Argentine\",\"American\",\"Quarentine\"]\n", "for thestring in ss:\n", " if thestring.find(\"tine\") != -1:\n", " print(\"'\" + str(thestring) + \"' contains 'tine'.\")" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## `string` module ##\n", "exposes useful variables and functions " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "import string" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "string.ascii_letters" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "string.digits" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## String Formatting ##\n", "casting using str() is very limited\n", "Python gives access to C-like string formatting" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ " usage: “%(format)” % (variable)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "import math\n", "print(\"My favorite integer is %i and my favorite float is %f,\\n\" \n", " \" which to three decimal places is %.3f and in exponential form is %e\" \n", " % (3,math.pi,math.pi,math.pi))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "common formats:\n", "\n", " f (float), i (integer), s (string), g (nicely formatting floats)\n", " \n", "http://docs.python.org/release/2.7.2/library/stdtypes.html#string-formatting-operations" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## String Formatting ##\n", "\n", "> % escapes “%”" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "print(\"I promise to give 100%% effort whenever asked of %s.\" % (\"me\"))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "> \\+ and zero-padding" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "print(\"%f\\n%+f\\n%f\\n%010f\\n%10s\" % (math.pi,math.pi,-1.0*math.pi,math.pi,\"pi\"))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## String Formatting ##\n", "\n", "the (somewhat) preferred way\n", "\n", "is `string.format(value0,value1,....)`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "'on {0}, I feel {1}'.format(\"saturday\",\"groovy\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "'on {}, I feel {}'.format(\"saturday\",\"groovy\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "'on {0}, I feel {1}'.format([\"saturday\",\"groovy\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "'on {0}, I feel {0}'.format([\"saturday\",\"groovy\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "'on {0}, I feel {0}'.format(\"saturday\",\"groovy\")" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "you can assign by argument position or by name" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "'{desire} to {place}'.format(desire='Fly me',\\\n", " place='The Moon')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "'{desire} to {place} or else I wont visit {place}.'.format( \\\n", " desire='Fly me',place='The Moon')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "f = {\"desire\": \"I want to take you\", \"place\": \"funky town\"}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "'{desire} to {place}'.format(**f)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Formatting comes after a colon (:) ###" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "(\"%03.2f\" % 3.14159) == \"{:03.2f}\".format(3.14159)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "\"{0:03.2f}\".format(3.14159,42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "\"{1:03.2f}\".format(3.14159,42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "# format also supports binary numbers\n", "\"int: {0:d}; hex: {0:x}; oct: {0:o}; bin: {0:b}\".format(42)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# File I/O (read/write) #\n", "\n", "`.open()` and `.close()` are builtin functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "%%file mydata.dat\n", "This is my zeroth file I/O. Zing!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "file_stream = open('mydata.dat','r') ; print(type(file_stream))\n", "file_stream.close()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "open modes: `r` (read), `w` (write), `r+` (read + update), `rb` (read as a binary stream, ...), `rt` (read as text file)\n", "\n", "Writing data: `.write()` or `.writelines()`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "f= open(\"test.dat\",\"w\")\n", "f.write(\"This is my first file I/O. Zing!\")\n", "f.close()\n", "!cat test.dat" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "f= open(\"test.dat\",\"w\")\n", "f.writelines([\"a=['This is my second file I/O.']\\n\",\"Take that Dr. Zing!\\n\"])\n", "f.close()\n", "!cat test.dat" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "Likewise, there is `.readlines()` and `.read()`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "f= open(\"test.dat\",\"r\")\n", "data = f.readlines()\n", "f.close() ; print(data)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "%%file tabbify_my_csv.py\n", "\"\"\"\n", "small copy program that turns a csv file into a tabbed file\n", "\n", " PYTHON BOOT CAMP EXAMPLE; \n", " created by Josh Bloom at UC Berkeley, 2010,2012,2013,2015 (ucbpythonclass+bootcamp@gmail.com)\n", "\n", "\"\"\"\n", "import os\n", "\n", "def tabbify(infilename,outfilename,ignore_comments=True,comment_chars=\"#;/\"):\n", " \"\"\"\n", "INPUT: infilename\n", "OUTPUT: creates a file called outfilename\n", " \"\"\"\n", " if not os.path.exists(infilename):\n", " return # do nothing if the file isn't there\n", " f = open(infilename,\"r\")\n", " o = open(outfilename,\"w\")\n", " inlines = f.readlines() ; f.close()\n", " outlines = []\n", " for l in inlines:\n", " if ignore_comments and (l[0] in comment_chars):\n", " outlines.append(l)\n", " else:\n", " outlines.append(l.replace(\",\",\"\\t\"))\n", " o.writelines(outlines) ; o.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "%run tabbify_my_csv.py\n", "tabbify(\"google_share_price.csv\",\"google_share_price.tsv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "!cat google_share_price.csv |head" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "!cat google_share_price.tsv |head" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# File I/O (read/write) #\n", "\n", "`shutil` module is preferred for copying, archiving & removing files/directories\n", "\n", "http://docs.python.org/library/shutil.html#module-shutil\n", "\n", "\n", "`tempfile` module is used for the creation of temporary directories and files\n", "\n", "http://www.doughellmann.com/PyMOTW/tempfile/" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "import tempfile\n", "tmp = tempfile.TemporaryFile() ; type(tmp)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "tmp = tempfile.NamedTemporaryFile(suffix=\".csv\",\\\n", " prefix=\"boot\",dir=\"/tmp\",delete=False)\n", "print(tmp.name)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "tmp.write(bytes(\"# stock phrases of today's youth\\nWassup?!,OMG,LOL,BRB,Python\\n\",\"utf-8\"))\n", "tmp.close()\n", "!cat $tmp.name" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tmp = tempfile.NamedTemporaryFile(suffix=\".csv\",\\\n", " prefix=\"boot\",dir=\"/tmp\",delete=False)\n", "print(tmp.name)\n", "tmp.write(b\"# stock phrases of today's youth\\nWassup?!,OMG,LOL,BRB,Python\\n\")\n", "tmp.close()\n", "!cat $tmp.name" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "  " ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# `io` module `StringIO`/`BytesIO` #\n", "\n", "handy for making file-like objects out of strings" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "import io\n", "myfile = io.StringIO( \\\n", " \"# stock phrases of today's youth\\nWassup?!,OMG,LOL,BRB,Python\\n\")\n", "myfile.getvalue() ## get what we just wrote" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "myfile.seek(0) ## go back to the beginning\n", "myfile.readlines()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "myfile.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "myfile.write('not gonna happen')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "myfile = io.BytesIO(b\"# stock phrases of today's youth\\nWassup?!,OMG,LOL,BRB,Python\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "myfile.seek(2) ; myfile.write(b\"silly wah wah\") ; myfile.seek(0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "myfile.readlines()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# `subprocess` module #" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "`subprocess` is the preferred way to interact with other programs, as you might do on the command line" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "from subprocess import *\n", "p = Popen(\"ls\", shell=True, stdout=PIPE) # list the directory\n", "p.pid # get the process ID of the new subprocess" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "print(p.stdout.readlines())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "p = Popen(\"vanRossum-Trump-2016\", shell=True, stdout=PIPE,stderr=PIPE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "print(p.stderr.readlines())" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "it's often advisable to wait until the subprocess has finished" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "p = Popen(\"find .. -name '*.py'\", shell=True, stdout=PIPE,stderr=PIPE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "os.waitpid(p.pid, 0) ## this will block until the search is done" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "(c) J Bloom 2013-2015 All Rights Reserved" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }