Exploring Descriptive Statistics

Today, Steve Brooks is talking about running descriptive analyses.

The code below is reprinted from our GitHub repository.

The dataset that goes with this code can be downloaded here, directly from the GitHub repository, as well. (You can right-click on this link and select “Save As…” to save the dataset.)

<pre>#INSTALL/LOAD NECESSARY PACKAGES#

if(!require(data.table) |
   !require(RCurl) |
   !require(curl) |
   !require(dplyr) |
   !require(readr) |
   !require(tidyr) |
   !require(ggplot2)) {
  install.packages(
    c('data.table','RCurl','curl','dplyr','readr','tidyr','ggplot2'))
}

library(data.table)
library(RCurl)
library(curl)
library(dplyr)
library(readr)
library(ggplot2)
library(tidyr)

##IMPORT##

phila_crimes_10percent <- read_csv("C:/Users/BrookSte/Desktop/Learning R/r-learning-group--phila-crimes/phila_crimes_10percent.csv")
View(phila_crimes_10percent)

##CLEAN + ADD VARs##

phila_crimes_10percent$X1<-NULL

summary(phila_crimes_10percent)

phila_crimes_10percent$District=as.numeric(phila_crimes_10percent$District)

summary(phila_crimes_10percent)

  #Create a variable to collapse Police Districts into Court Divisions#

phila_crimes_10percent$CourtDivision <- NA

attach(phila_crimes_10percent)
phila_crimes_10percent$CourtDivision[District==12|District==77|District==18|District==19|District==16] <- "Southwest"
phila_crimes_10percent$CourtDivision[District==22|District==9|District==6|District==17|District==3|District==1] <- "Central & South"
phila_crimes_10percent$CourtDivision[District==2|District==15|District==7|District==8] <- "Northeast"
phila_crimes_10percent$CourtDivision[District==5|District==14|District==35|District==39] <- "Northwest"
phila_crimes_10percent$CourtDivision[District==25|District==24|District==26] <- "East"
detach(phila_crimes_10percent)

  #check to make sure variable is working properly#
table(phila_crimes_10percent$District, phila_crimes_10percent$CourtDivision)
  #Can see that not all Districts got mapped onto a Crime Division...Why?#

##DESCRIBE##
summary(phila_crimes_10percent)

  #how many unique categories of crime do we have#
n_distinct(phila_crimes_10percent$`UCR Code`, na.rm=TRUE)

n_distinct(phila_crimes_10percent$`General Crime Category`, na.rm=TRUE)

  #UCR Code and Gen. Crime Category don't match exactly#
  #For which UCR Codes do we have multiple Crime Categories?#
crimetypes<-data.frame(table(phila_crimes_10percent$`UCR Code`,phila_crimes_10percent$`General Crime Category`))
crimetypes$Freq[crimetypes$Freq==0]<-NA
crimetypes<-drop_na(crimetypes)
  #Or, Alternatively, we Could Graph it to find out...#
crimetype<-ggplot(
phila_crimes_10percent,
aes(x=phila_crimes_10percent$`UCR Code`, y=phila_crimes_10percent$`General Crime Category`))

crimetypescatter<-crimetype+geom_point()+theme_classic()
crimetypescatter

  #When do Crimes Happen?#

</pre>
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s