%%% This is a knitr document, not regular Sweave %%% http://yihui.name/knitr/ \documentclass[]{beamer} \usetheme{Singapore} \usepackage{hyperref} \usepackage{helvet} \usepackage{graphicx} \usepackage{tipa} \usepackage{booktabs} \usepackage{color} \usepackage[normalem]{ulem} \AtBeginSection[] { \begin{frame}{Outline} \tableofcontents[currentsection] \end{frame} } \ifdefined\knitrout \renewenvironment{knitrout}{\begin{footnotesize}}{\end{footnotesize}} \else \fi \mode \title{Murders in Philadelphia} \author{Josef Fruehwald} %\date{} \SweaveOpts{cache=F, fig.path='figures/murder-', error=F, warning=F,message=F} \begin{document} % very important to use option [fragile] for frames containing code output! \begin{frame} \titlepage \end{frame} \section{The Data} \begin{frame} \frametitle{The Data} The Philadelphia Inquirer has a Google Fusion table (\href{https://www.google.com/fusiontables/DataSource?snapid=S4035208e94}{\color{red} link}) where they have compiled publicly available data from the Philadelphia Police Department on every murder in Philadelphia County between 1988 and 2011. \end{frame} <>= philly <- read.csv("Murders in Philadelphia County 1988 - 2011.csv", na.strings = "") library(ggplot2) library(reshape2) library(plyr) library(mgcv) library(scales) options(width = 59) @ \begin{frame}[fragile] \frametitle{The Data} <>= nrow(philly) colnames(philly) @ \end{frame} \begin{frame}[fragile] \frametitle{The Data} <>= table(is.na(philly$cause)) table(is.na(philly$motive)) table(is.na(philly$time)) @ \end{frame} \begin{frame}[fragile] \frametitle{The Data} <>= library(reshape2) dcast(philly, race ~ weapon) @ \end{frame} <>= ## Process the Hour of the day philly$half <- NA philly$half[grepl("[aA]", philly$time, )] <- "AM" philly$half[grepl("[pP]", philly$time, )] <- "PM" philly$hour <- gsub("[aApP][mM]?", "", philly$time) philly$hour <- as.numeric(philly$hour) philly$hour[philly$hour > 12] <- NA philly$hour[is.na(philly$half)] <- NA philly$hour[philly$hour == 12 & !is.na(philly$hour)] <- 0 philly$hour[philly$half == "PM" & !is.na(philly$half)] <- philly$hour[philly$half == "PM" & !is.na(philly$half)] + 12 philly$hour3 <- philly$hour philly$hour3[is.na(philly$hour)] <- 0 philly$date <- paste(philly$date, philly$hour3) @ <>= dates <- as.POSIXlt(philly$date, format = "%m/%d/%y %H") philly$date <- as.Date(dates) philly$month <- as.factor(month.abb[dates$mon+1]) philly$month <- reorder(philly$month, dates$mon, min) philly$year <- dates$year + 1900 philly$monthn <- dates$mon + 1 ## Month date indicates just the month in Date format philly$month.date <- as.POSIXct(paste(dates$mon + 1,1, dates$year + 1900, sep = "/"), format = "%m/%d/%Y") philly$month.date <- as.Date(philly$month.date) @ <>= philly$WDay <- weekdays(dates) @ <>= ndays <- data.frame(month.date = seq(as.Date("1988-01-01"), as.Date("2011-12-01"), by = "month"), ndays = as.numeric(diff(seq(as.Date("1988-01-01"), as.Date("2012-01-01"), by = "month")))) philly <- join(philly, ndays, type="left") @ \begin{frame}[fragile] \frametitle{The Data} I've manipulated the data in my own way to include \begin{itemize} \item Month of year, \item month in Date format, \item year, \item a few different representations of the hour of day \item and the weekday. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{By Weekday} <>= ggplot(philly, aes(WDay)) + stat_bin() + xlim("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")+ theme_bw() @ <>= @ \end{frame} \begin{frame}[fragile] \frametitle{By Month} <>= ggplot(philly, aes(month)) + stat_bin() + theme_bw()+ scale_x_discrete(limits = month.abb) @ <>= @ \end{frame} <>= philly.hour <- subset(philly, !is.na(hour)) philly.hour$hour2 <- philly.hour$hour philly.hour$hour2[philly.hour$half == "PM"] <- philly.hour$hour[philly.hour$half == "PM"] - 24 philly.hour.count <- count(philly.hour, "hour2") @ \begin{frame}[fragile] \frametitle{By Hour} <>= ggplot(philly.hour.count, aes(hour2, freq)) + annotate("rect", ymin = 0, ymax = 230, xmin = -13, xmax = 0, fill = "grey50", alpha = 0.2)+ geom_vline(x = 0)+ geom_bar(stat = "identity") + annotate("text", x = 8, y = 175, label = "AM")+ annotate("text", x = -8, y = 175, label = "PM")+ ylab("count")+ xlab("hour")+ theme_bw() @ <>= @ \end{frame} \section{Two Philadelphias} \begin{frame}[fragile] \frametitle{Two Philadelphias} <<>>= philly.bw <- subset(philly, race %in% c("B","W") & !is.na(month)) philly.bw.count <- count(philly.bw, c("month.date","month", "race","ndays")) head(philly.bw.count) @ \end{frame} <>= revlog_trans <- function (base = exp(1)) { trans <- function(x) -(log(x, base)) inv <- function(x) (base^(-x)) trans_new(paste("revlog-", base, sep = ""), trans, inv, log_breaks(base = base), domain = c(1e-100, Inf)) } @ \begin{frame}[fragile] \frametitle{Two Philadelphias} <>= ggplot(philly.bw.count, aes(month.date, 1/(freq/ndays), color = race)) + geom_point()+ scale_color_hue(limits = c("B","W"))+ stat_smooth(method = gam, formula = y ~ s(x, bs = "cs"))+ #coord_trans(ytrans="log2")+ scale_y_continuous(breaks = c(0.5,1,2,7,14,21,31), trans = revlog_trans(base = 2))+ expand_limits(y = 0.5)+ ylab("1 murder every X days")+ xlab("Date (by month)")+ theme_bw()+ scale_color_brewer(palette = "Set1") @ <>= @ \end{frame} \begin{frame}[fragile] \frametitle{Two Philadelphias} <<>>= model1 <- glm(freq ~ month * race, offset = ndays, family = poisson, data = philly.bw.count) anova(model1, test = "Chisq") @ \end{frame} \begin{frame}[fragile] \frametitle{Two Philadelphias} <>= philly.bw.count$resid <- resid(model1) @ <>= ggplot(philly.bw.count, aes(month.date, resid, color = race)) + geom_hline(y = 0, color = "grey")+ geom_point()+ stat_smooth(method = gam, formula = y ~ s(x, bs = "cs"))+ ylab("residuals")+ xlab("Date (by month)")+ theme_bw()+ scale_color_brewer(palette = "Set1") @ <>= @ \end{frame} \begin{frame}[fragile] \frametitle{Two Philadelphias} <>= philly.bw.count.w <- count(philly.bw, c("month.date","month", "race","weapon","ndays")) philly.bw.count.w <- transform(philly.bw.count.w, weapon = reorder(weapon, -freq, sum)) philly.bw.count.w <- subset(philly.bw.count.w, weapon %in% c("FIREARM","KNIFE","HANDS")) head(philly.bw.count.w) @ \end{frame} \begin{frame}[fragile] <>= ## To capture 0 counts months.grid <- expand.grid(month.date = unique(philly.bw$month.date), race = c("B","W"), weapon = c("FIREARM","KNIFE","HANDS")) months.grid <- join(months.grid, ndays) months.grid$month <- month.abb[as.POSIXlt(months.grid$month.date)$mon+1] philly.bw.count.w <- merge(months.grid, philly.bw.count.w, all.x = T) philly.bw.count.w$freq[is.na(philly.bw.count.w$freq)] <- 0 @ \end{frame} \begin{frame} \frametitle{Two Philadelphias} <>= ggplot(subset(philly.bw.count.w, freq > 0), aes(month.date, ndays/freq, color = race)) + geom_point()+ stat_smooth(method = gam, formula = y ~ s(x, bs = "cs"))+ scale_y_continuous(breaks = c(0.5,1,2,7,14,21,31), trans = revlog_trans(base = 2))+ expand_limits(y = 0.5)+ ylab("1 murder every X days")+ xlab("Date (by month)")+ theme_bw()+ facet_wrap(~weapon)+ scale_color_brewer(palette = "Set1") @ <>= @ \end{frame} \begin{frame}[fragile] <<>>= model2 <- glm(freq ~ month + weapon*race, offset = ndays, data = philly.bw.count.w, family = poisson) anova(model2, test = "Chisq") @ \end{frame} \begin{frame} \frametitle{Two Philadelphias} \begin{itemize} \item White murder victims are $\Sexpr{round(exp(-(coef(model2)["weaponKNIFE"] + coef(model2)["weaponKNIFE:raceW"])), digits =2)}\times$ more likely to be shot than stabbed. \item African American murder victims are $\Sexpr{round(exp(-coef(model2)["weaponKNIFE"]), digits =2)}\times$ more likely to be shot than stabbed. \item African American murder victims are $\Sexpr{round(exp(coef(model2)["weaponKNIFE:raceW"]), digits = 2)}\times$ more likely to have been shot than White murder victims. \end{itemize} \end{frame} <>= philly.bw <- subset(philly.bw, age <= 200) philly.bw$age.10 <- as.factor(floor(philly.bw$age/10)*10) philly.bw.w <- subset(philly.bw, weapon %in% c("FIREARM","KNIFE","HANDS") ) philly.bw.w$weapon <- relevel(philly.bw.w$weapon, "KNIFE") philly.bw.w$weapon <- relevel(philly.bw.w$weapon, "FIREARM") philly.bw.w$race <- as.factor(as.character(philly.bw.w$race)) levels(philly.bw.w$race) <- c("African American","White") philly.bw.w$sex <- as.factor(as.character(philly.bw.w$sex)) levels(philly.bw.w$sex)<- c("Female","Male") @ \begin{frame} \frametitle{Two Philadelphias} <>= ggplot(philly.bw.w, aes(age.10, fill = weapon)) + stat_bin(position = "dodge", color = "black")+ theme_bw()+ scale_fill_brewer(palette = "Dark2") + xlab("Age")+ ylab("Murders (1988-2001)")+ facet_grid(sex ~ race) @ <>= @ \end{frame} \begin{frame}{} \begin{center} {\Huge Thanks} \end{center} \end{frame} \end{document}