IST687: PROJECT: DENVER CRIME
Step-1: Data Preparation
Step-2: Visualizations of all crimes
Step-3: Top Five Crime Categories in 2018
3A:Top Five Crime Categories in 2018
Step-4: Top Five Offenses
Step-5: Precinct wise and Neighborhood wise Incidents
5A:Best Five Precints with least Crime
5B:Worst Five Precints with max Crime
5C: Best Five Neighborhoods with least Crime
5D: Worst Five Neighborhoods with max Crime
Step-6: Change in Crime: Visualizations
6A: Crime figures over the years
Step-7: Category wise Change in Crimes
7A: Five Categories where crime has improved
7B: Five Categories where crime has worsened
Step-8: Precinct wise Change in Crime
8A: Five Precints where crime has improved maximum
8B: Five Precints where crime has increased the most
Step-9: Change in Crimes Month Wise and Year Wise
9A: Change in Crimes Month Wise and Year Wise
9B: Change in Crimes Month Wise in 2017
9C: Change in Crimes MonthWise in 2018
Step-10: Change in Crimes Month Wise and Year Wise
10A: Total no of Crime Occurrances Since 2014
Step-11: Regression-Prepared Separately

IST687: PROJECT: DENVER CRIME

knitr::opts_chunk$set(echo = TRUE)
library(pander)
pandoc.header('Step-1: Data Preparation', 1, 'setext')

Step-1: Data Preparation

#Loading the Data
#Used if data is on local machine
#CWD <- getwd()
#crime.data <- read.csv(paste(CWD,'/DENVER CRIME/crime.csv',sep=''))
#code.data <- read.csv(paste(CWD,'/DENVER CRIME/offense_codes.csv',sep=''))

#Load the data from the url
crime.data <- read.csv("https://www.denvergov.org/media/gis/DataCatalog/crime/csv/crime.csv")
code.data <- read.csv("https://www.denvergov.org/media/gis/DataCatalog/crime/csv/offense_codes.csv")

#Loading all packages
library(stringr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

library(pander)

library(tmap)    # for static and interactive maps
library(leaflet) # for interactive maps
library(mapview) # for interactive maps
library(ggplot2) # tidyverse data visualization package
library(ggmap)

## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.

## Please cite ggmap if you use it! See citation("ggmap") for details.

library(rgdal)

## Loading required package: sp

## rgdal: version: 1.4-4, (SVN revision 833)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/rsman/OneDrive/Documents/R/win-library/3.5/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/rsman/OneDrive/Documents/R/win-library/3.5/rgdal/proj
##  Linking to sp version: 1.3-1

library(sf)

## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

library(raster)

## 
## Attaching package: 'raster'

## The following object is masked from 'package:dplyr':
## 
##     select

library(spData)

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

library(ggthemes)


###Fomrating the Data - All new columns added have preifix "N_"
#Formating the Occurence Date
crime.data$N_OCCURRENCE_DATE <- substr(crime.data$FIRST_OCCURRENCE_DATE, 1, 
                                       str_locate(crime.data$FIRST_OCCURRENCE_DATE," "))
crime.data$N_OCCURRENCE_DATE <- as.Date(crime.data$FIRST_OCCURRENCE_DATE, 
                                        format = "%m/%d/%Y")

#Making additional columns for month and year
crime.data$N_MONTH <- months(crime.data$N_OCCURRENCE_DATE)
crime.data$N_MONTH_FIG <- month(crime.data$N_OCCURRENCE_DATE)
crime.data$N_YEAR <- year(crime.data$N_OCCURRENCE_DATE)
crime.data$N_IS_TRAFFIC <- ifelse(crime.data$IS_TRAFFIC == 1 & 
                                    crime.data$IS_CRIME == 0,"Yes", "No")

crime.data$state <- "colorado"#needed for ggmap

#Separating traffic crimes and other crimes
crime.data.traffic <- subset(crime.data, crime.data$N_IS_TRAFFIC == "Yes")
crime.data.traffic <- data.frame(crime.data.traffic)
crime.data.crimes <- subset(crime.data, crime.data$N_IS_TRAFFIC == "No")
crime.data.crimes <- data.frame(crime.data.crimes)

Step-2: Visualizations of all crimes

###############::NO-1::##################
knitr::opts_chunk$set(echo = TRUE)
pandoc.header('Step-2: Visualizations of all crimes', 1, 'setext')

Step-2: Visualizations of all crimes

###No-2A: Bar chart of all crimes yearwise
df <- crime.data %>%  
  group_by(N_YEAR,N_MONTH_FIG, N_MONTH) %>%
  summarise(incidents = sum(IS_CRIME) + sum(IS_TRAFFIC)) %>%
  arrange(N_YEAR, N_MONTH_FIG,N_MONTH)

df.stp10 <- df
p1A <- ggplot(df)  
p1A <- p1A + geom_bar(aes(x = factor(N_YEAR), weight = incidents)) + 
  ggtitle('Incidents Reported by Year: Total') + xlab('Year') + 
  ylab('Incidents') + theme(plot.title = element_text(hjust = 0.5)) + coord_flip()

###No-2B: Bar chart of all crimes yearwise and monthwise (CHANGE ORDER)
df.aa <- df[order(df$N_YEAR, df$N_MONTH_FIG),]

p1B <- ggplot(df.aa,aes(x=factor(N_YEAR),y=incidents,fill=factor(N_MONTH)))  
p1B <- p1B + geom_bar(stat='identity') + ggtitle('Incidents Reported by Year: Month wise') + 
  xlab('Year') + ylab('Incidents') + theme(plot.title = element_text(hjust = 0.5)) +
  scale_fill_discrete(name = 'Month', labels=c('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec') ) +
  guides(fill = guide_legend(title='Month')) + coord_flip() + theme_light()

###No-2C: Bar chart of all crimes yearwise and Traffic vs Crime
#library(dplyr)
tmp <- crime.data
tmp$N_CRIME_TYPE <- ifelse(tmp$IS_CRIME == 1, "Crime","Traffic")
tmp$N_CRIME_TYPE <- factor(tmp$N_CRIME_TYPE) 


df <- tmp %>%  
  group_by(N_YEAR,N_CRIME_TYPE) %>%
  summarise(crimeIncidents = sum(IS_CRIME) + sum(IS_TRAFFIC)) %>%
  arrange(N_YEAR)

p1C <- ggplot(df,aes(x=factor(N_YEAR),y=crimeIncidents,fill=N_CRIME_TYPE))  
p1C <- p1C + geom_bar(stat='identity') + ggtitle('Incidents Reported by Year: Traffic Vs Rest') + 
  xlab('Year') + ylab('Incidents') + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  guides(fill = guide_legend(title='Incident Type')) + coord_flip()+ theme_light()
cat("\\newpage")

p1A #All Incidents grouped by Year

cat("Looking at the chart above we can see that incidents reported have increased over the years especially when we compare the number of reports from 2014 to 2018. Also, if you see the data of 2019, 35000 incidents have already occured which is more than what had occured in 2014 during this perios. The crime is increasing and increasing at a faster pace")

Looking at the chart above we can see that incidents reported have increased over the years especially when we compare the number of reports from 2014 to 2018. Also, if you see the data of 2019, 35000 incidents have already occured which is more than what had occured in 2014 during this perios. The crime is increasing and increasing at a faster pace

cat("\\newpage")

p1B #All Incidents grouped by Year and Month

cat("Number of incidents reported for each month looks to be the same throughout the years. It looks like there were less incidents reported during the winter season (Dec., Jan., Feb.) and more reports during Autumn (Aug., Sep., Oct.)  ")

Number of incidents reported for each month looks to be the same throughout the years. It looks like there were less incidents reported during the winter season (Dec., Jan., Feb.) and more reports during Autumn (Aug., Sep., Oct.)

cat("\\newpage")

p1C #All Incidents grouped by Year and Traffic vs Crime

cat("Looking at this chart of traffic incidents and crime reported, we see that each year has about the same number of reports for both traffic and crime. Although there were more crime reported in 2018 than in 2014, traffic incidents were about the same for all years.
")

Looking at this chart of traffic incidents and crime reported, we see that each year has about the same number of reports for both traffic and crime. Although there were more crime reported in 2018 than in 2014, traffic incidents were about the same for all years.

cat("\\newpage")

Step-3:Top Five Crime categories in 2018

###############::NO-4::##################
pandoc.header('Step-3: Top Five Crime Categories in 2018', 1, 'setext')

Step-3: Top Five Crime Categories in 2018

crime.tmp <- crime.data.crimes[crime.data.crimes$N_YEAR==2018,]
#crime.tmp <- crime.tmp[crime.tmp$GEO_LON !=-0.0000024,]
tmp.df <- data.frame(tapply(crime.tmp$INCIDENT_ID, crime.tmp$OFFENSE_CATEGORY_ID, length))
tmp.df$OFFENSE_CATEGORY_ID <- rownames(tmp.df)
tmp.df$NO_OF_CRIMES <- tmp.df$tapply.crime.tmp.INCIDENT_ID..crime.tmp.OFFENSE_CATEGORY_ID..
rownames(tmp.df) <- rep(1:dim(tmp.df$NO_OF_CRIMES))
tmp.df <- tmp.df[,-1]
tmp.df <- tmp.df[order(tmp.df$NO_OF_CRIMES, decreasing = TRUE),]
crime.categories <- data.frame(tmp.df[,1])
colnames(crime.categories) <-  c("OFFENSE_CATEGORY_ID")
crime.categories$OFFENSE_CATEGORY_ID <- as.character(crime.categories$OFFENSE_CATEGORY_ID)

#rm(crime.tmp)

rownames(tmp.df) <- NULL
pandoc.header('3A:Top Five Crime Categories in 2018', 1, 'setext')

3A:Top Five Crime Categories in 2018

pander(tmp.df[1:5,], justify = c("left") )#Top Five Crime Categories

OFFENSE_CATEGORY_ID	NO_OF_CRIMES
all-other-crimes	17607
larceny	9212
public-disorder	8569
theft-from-motor-vehicle	8116
drug-alcohol	6132

#rm(tmp.df)

crime.tmp.plots <- crime.data[crime.data$GEO_LON !=-0.0000024,]
crime.category.plot <- function(Rx.Var, Yr.Var){
  data <- crime.tmp.plots[crime.tmp.plots$OFFENSE_CATEGORY_ID==Rx.Var & crime.tmp.plots$N_YEAR==Yr.Var,]
  xy <- data[c("GEO_LON", "GEO_LAT")]
  ifelse(any(is.na(xy)),xy <- xy[-which(is.na(xy$GEO_LAT)),], xy <- xy )
  m <- leaflet(data=xy) %>% addTiles() %>% addMarkers(~GEO_LON,~GEO_LAT, clusterOptions = markerClusterOptions())
  return(m)
}

output.plot <- crime.category.plot(crime.categories$OFFENSE_CATEGORY_ID[1], 2018)
cat("Plot of Category: ", crime.categories$OFFENSE_CATEGORY_ID[1])

Plot of Category: all-other-crimes

output.plot

output.plot <- crime.category.plot(crime.categories$OFFENSE_CATEGORY_ID[2], 2018)
cat("Plot of Category: ", crime.categories$OFFENSE_CATEGORY_ID[2])

Plot of Category: larceny

output.plot

output.plot <- crime.category.plot(crime.categories$OFFENSE_CATEGORY_ID[3], 2018)
cat("Plot of Category: ", crime.categories$OFFENSE_CATEGORY_ID[3])

Plot of Category: public-disorder

output.plot

output.plot <- crime.category.plot(crime.categories$OFFENSE_CATEGORY_ID[4], 2018)
cat("Plot of Category: ", crime.categories$OFFENSE_CATEGORY_ID[4])

Plot of Category: theft-from-motor-vehicle

output.plot

output.plot <- crime.category.plot(crime.categories$OFFENSE_CATEGORY_ID[5], 2018)
cat("Plot of Category: ", crime.categories$OFFENSE_CATEGORY_ID[5])

Plot of Category: drug-alcohol

output.plot

cat("\\newpage")

Step-4:Top Five Offenses-All Categories

###############::NO-2::##################
pandoc.header('Step-4: Top Five Offenses', 1, 'setext')

Step-4: Top Five Offenses

###NO-2A: Top Five Crimes yearwise
crime.types <- tapply(crime.data.crimes$OFFENSE_CATEGORY_ID, crime.data.crimes$OFFENSE_TYPE_ID, length)
crime.types <- data.frame(crime.types)
crime.types$OFFENSE_TYPE_ID <- row.names(crime.types)
row.names(crime.types) <- rep(1:dim(crime.types[,1]))
crime.types$NO_OF_CRIMES <- crime.types$crime.types
crime.types <- crime.types[,2:3]
crime.types <- crime.types[-which(is.na(crime.types$NO_OF_CRIMES)),]
crimes.by.number <- crime.types[order(crime.types$NO_OF_CRIMES, decreasing = TRUE),]#Crimes by Occurance



tmp$N_TOP_FIVE[tmp$OFFENSE_TYPE_ID == crimes.by.number[1,1]] <- crimes.by.number[1,1]
tmp$N_TOP_FIVE_1[tmp$OFFENSE_TYPE_ID == crimes.by.number[1,1]] <- 1
tmp$N_TOP_FIVE_1[tmp$OFFENSE_TYPE_ID != crimes.by.number[1,1]] <- 0
tmp$N_TOP_FIVE[tmp$OFFENSE_TYPE_ID == crimes.by.number[2,1]] <- crimes.by.number[2,1]
tmp$N_TOP_FIVE_2[tmp$OFFENSE_TYPE_ID == crimes.by.number[2,1]] <- 1
tmp$N_TOP_FIVE_2[tmp$OFFENSE_TYPE_ID != crimes.by.number[2,1]] <- 0
tmp$N_TOP_FIVE[tmp$OFFENSE_TYPE_ID == crimes.by.number[3,1]] <- crimes.by.number[3,1]
tmp$N_TOP_FIVE_3[tmp$OFFENSE_TYPE_ID == crimes.by.number[3,1]] <- 1
tmp$N_TOP_FIVE_3[tmp$OFFENSE_TYPE_ID != crimes.by.number[3,1]] <- 0
tmp$N_TOP_FIVE[tmp$OFFENSE_TYPE_ID == crimes.by.number[4,1]] <- crimes.by.number[4,1]
tmp$N_TOP_FIVE_4[tmp$OFFENSE_TYPE_ID == crimes.by.number[4,1]] <- 1
tmp$N_TOP_FIVE_4[tmp$OFFENSE_TYPE_ID != crimes.by.number[4,1]] <- 0
tmp$N_TOP_FIVE[tmp$OFFENSE_TYPE_ID == crimes.by.number[5,1]] <- crimes.by.number[5,1]
tmp$N_TOP_FIVE_5[tmp$OFFENSE_TYPE_ID == crimes.by.number[5,1]] <- 1
tmp$N_TOP_FIVE_5[tmp$OFFENSE_TYPE_ID != crimes.by.number[5,1]] <- 0
tmp.top.five <- tmp[-which(is.na(tmp$N_TOP_FIVE)),] # removing the other rows.
tmp.top.five$N_TOP_FIVE <- factor(tmp.top.five$N_TOP_FIVE)

df <- tmp.top.five %>%  
  group_by(N_YEAR,N_TOP_FIVE) %>%
  summarise(crime.top.five = sum(N_TOP_FIVE_1)+sum(N_TOP_FIVE_2)+sum(N_TOP_FIVE_3)+sum(N_TOP_FIVE_4)+sum(N_TOP_FIVE_5)) %>%
  arrange(N_YEAR)

p2A = ggplot(df,aes(x=factor(N_YEAR),y=crime.top.five,fill=N_TOP_FIVE))  
p2A + geom_bar(stat='identity') + ggtitle('Incidents Reported by Year') + 
  xlab('Year') + ylab('Incidents') + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  guides(fill = guide_legend(title='Incident Type')) + coord_flip() + theme_light()

cat("Number of incidents reported for each month looks to be the same throughout the years. It looks like there were less incidents reported during the winter season (Dec., Jan., Feb.) and more reports during Autumn (Aug., Sep., Oct.)  ")

cat("\\newpage")

Step-5:Precinct wise and Neighborhood wise Incidents

###############::NO-5::##################
pandoc.header('Step-5: Precinct wise and Neighborhood wise Incidents', 1, 'setext')

Step-5: Precinct wise and Neighborhood wise Incidents

###NO-5A: Precint wise analysis
crime.precints <- data.frame(tapply(crime.data.crimes$DISTRICT_ID, crime.data.crimes$PRECINCT_ID, length))
crime.precints$PRECINT_ID <- row.names(crime.precints)
crime.precints$NO_OF_CRIMES <- crime.precints$tapply.crime.data.crimes.DISTRICT_ID..crime.data.crimes.PRECINCT_ID..
crime.precints <- crime.precints[,2:3]
rownames(crime.precints) <- rep(1:nrow(crime.precints$NO_OF_CRIMES))
avg.crime.precint <- sum(crime.precints$NO_OF_CRIMES)/nrow(crime.precints$NO_OF_CRIMES)
crime.precints$STATUS <- ifelse(crime.precints$NO_OF_CRIMES < avg.crime.precint, "Below Average", "Above Average")
crime.precints$GAP <- round(crime.precints$NO_OF_CRIMES - avg.crime.precint, 2)
crime.precints <- crime.precints[order(crime.precints$NO_OF_CRIMES),]
crime.precints$PRECINT_ID <- factor(crime.precints$PRECINT_ID, levels = crime.precints$PRECINT_ID)

pandoc.header('5A:Best Five Precints with least Crime', 1, 'setext')

5A:Best Five Precints with least Crime

pander(crime.precints[1:5,])#Pricints with min crime

	PRECINT_ID	NO_OF_CRIMES	STATUS	GAP
36	759	3920	Below Average	-6148
29	522	4403	Below Average	-5665
30	523	4810	Below Average	-5258
16	314	5788	Below Average	-4280
20	324	5816	Below Average	-4252

crime.precints <- crime.precints[order(crime.precints$NO_OF_CRIMES),]# Precinsts with leaste crime
pandoc.header('5B:Worst Five Precints with max Crime', 1, 'setext')

5B:Worst Five Precints with max Crime

pander(crime.precints[(nrow(crime.precints)-5):nrow(crime.precints),])#Pricints with max crime

	PRECINT_ID	NO_OF_CRIMES	STATUS	GAP
33	621	13501	Above Average	3433
5	122	13853	Above Average	3785
13	311	14387	Above Average	4319
32	612	15191	Above Average	5123
22	412	15250	Above Average	5182
31	611	28087	Above Average	18019

##check labels
ggplot(crime.precints, aes(x=PRECINT_ID, y=GAP, label="Status")) + 
  geom_bar(stat = 'identity', aes(fill = STATUS), width = .5) + 
  labs(title="Precint Wise Crime Occurence Status", 
       subtitle= "Ordered by Occurence Above and Below Average", xlab = "Gap from Average", ylab ="aa") +
  coord_flip()

###NO-5C: Neighborhood wise analysis
crime.locations <- data.frame(tapply(crime.data.crimes$DISTRICT_ID, crime.data.crimes$NEIGHBORHOOD_ID, length))
crime.locations$NEIGHBORHOOD_ID <- row.names(crime.locations)
crime.locations$NO_OF_CRIMES <- crime.locations$tapply.crime.data.crimes.DISTRICT_ID..crime.data.crimes.NEIGHBORHOOD_ID..
crime.locations <- crime.locations[,2:3]
rownames(crime.locations) <- rep(1:nrow(crime.locations$NO_OF_CRIMES))
avg.crime.locations <- sum(crime.locations$NO_OF_CRIMES)/nrow(crime.locations$NO_OF_CRIMES)
crime.locations$STATUS <- ifelse(crime.locations$NO_OF_CRIMES < avg.crime.locations, "Below Average", "Above Average")
crime.locations$GAP <- round(crime.locations$NO_OF_CRIMES - avg.crime.locations, 2)
crime.locations <- crime.locations[order(crime.locations$NO_OF_CRIMES),]
crime.locations$NEIGHBORHOOD_ID <- factor(crime.locations$NEIGHBORHOOD_ID, levels = crime.locations$NEIGHBORHOOD_ID)

pandoc.header('5C: Best Five Neighborhoods with least Crime', 1, 'setext')

5C: Best Five Neighborhoods with least Crime

pander(crime.locations[1:5,]) # Top Five Crime Locations

	NEIGHBORHOOD_ID	NO_OF_CRIMES	STATUS	GAP
73	wellshire	429	Below Average	-4218
38	indian-creek	435	Below Average	-4212
22	country-club	756	Below Average	-3891
53	rosedale	1132	Below Average	-3515
7	belcaro	1307	Below Average	-3340

pandoc.header('5D: Worst Five Neighborhoods with max Crime', 1, 'setext')

5D: Worst Five Neighborhoods with max Crime

pander(crime.locations[(nrow(crime.locations)-5): nrow(crime.locations),])#Leaste Five Crime Loactions

	NEIGHBORHOOD_ID	NO_OF_CRIMES	STATUS	GAP
24	east-colfax	11463	Above Average	6816
9	capitol-hill	13122	Above Average	8475
45	montbello	13298	Above Average	8651
60	stapleton	13934	Above Average	9287
10	cbd	15475	Above Average	10828
26	five-points	21396	Above Average	16749

ggplot(crime.locations, aes(x=NEIGHBORHOOD_ID, y=GAP, label="Status")) + 
  geom_bar(stat = 'identity', aes(fill = STATUS), width = .5) + 
  labs(title="Neighborhood Wise Crime Occurence Status", 
       subtitle= "Ordered by Occurence Above and Below Average", xlab = "Gap from Average", ylab ="aa") +
  coord_flip()

cat('Removeing the locations where change is not significant')

Removeing the locations where change is not significant

crime.loc.tmp <- crime.locations[crime.locations$GAP > 1000 |  crime.locations$GAP < -2300,]
ggplot(crime.loc.tmp, aes(x=NEIGHBORHOOD_ID, y=GAP, label="Status")) + 
  geom_bar(stat = 'identity', aes(fill = STATUS), width = .5) + 
  labs(title="Neighborhood Wise Crime Occurence Status", 
       subtitle= "Ordered by Occurence Above and Below Average", xlab = "Gap from Average", ylab ="aa") +
  coord_flip()

rm(crime.loc.tmp)

###NO-3B: location wise analysis
#Location of last 10000 crimes
cat("Location of the last 1000 crimes")

Location of the last 1000 crimes

data <- crime.data[crime.data$GEO_LON !=-0.0000024,]
xy <- data[c("GEO_LON", "GEO_LAT")]
xy <- xy[-which(is.na(xy$GEO_LAT)),]
xy <- head(xy, 1000)
m <- leaflet(data=xy) %>% addTiles() %>% addMarkers(~GEO_LON,~GEO_LAT, clusterOptions = markerClusterOptions())
m

cat("\\newpage")

Step-6:Change in Crime-Code

###############::NO-6::##################
crime.categories.yearly <- data.frame(crime.categories[order(crime.categories$OFFENSE_CATEGORY_ID),])
colnames(crime.categories.yearly) <- c("OFFENSE_CATEGORY_ID")
crime.cat.yr <- crime.categories.yearly
crime.cat.plot <- as.character(unlist(crime.categories.yearly[,1]))

df.line.plot <- data.frame(matrix(ncol = 16, nrow = 0))
colnames(df.line.plot) <- c("N_YEAR", crime.cat.plot[1:15])
df.tmp <- data.frame(matrix(ncol = 16, nrow=1))
colnames(df.tmp) <- c("N_YEAR", crime.cat.plot[1:15])

#Category wise crime in 2014
crime.data.tmp <- crime.data[crime.data$N_YEAR=="2014",]
crime.categories.tmp <- data.frame(tapply(crime.data.tmp$N_YEAR, crime.data.tmp$OFFENSE_CATEGORY_ID, length))
colnames(crime.categories.tmp) <- c("2014")
crime.categories.tmp$OFFENSE_CATEGORY_ID <- rownames(crime.categories.tmp)
crime.categories.yearly <- merge(crime.categories.yearly, crime.categories.tmp, by.x="OFFENSE_CATEGORY_ID", by.y="OFFENSE_CATEGORY_ID")
crime.cat.yr <- cbind(crime.cat.yr, crime.categories.tmp$`2014`)
colnames(crime.cat.yr) <- c("OFFENSE_CATEGORY_ID", "NO_OF_CRIMES")
crime.cat.yr$N_YEAR <- "2014"

df.tmp[1,1] <- "2014"
df.tmp[1,2:16] <- crime.categories.yearly[,2]
df.line.plot <- rbind(df.line.plot, df.tmp)
df.tmp <- df.tmp[-1,]



#Category wise crime in 2015
crime.data.tmp <- crime.data[crime.data$N_YEAR=="2015",]
crime.categories.tmp <- data.frame(tapply(crime.data.tmp$N_YEAR, crime.data.tmp$OFFENSE_CATEGORY_ID, length))
colnames(crime.categories.tmp) <- c("2015")
crime.categories.tmp$OFFENSE_CATEGORY_ID <- rownames(crime.categories.tmp)
crime.categories.yearly <- merge(crime.categories.yearly, crime.categories.tmp, by.x="OFFENSE_CATEGORY_ID", by.y="OFFENSE_CATEGORY_ID")
crime.categories.tmp$NO_OF_CRIMES <- crime.categories.tmp$`2015`
crime.categories.tmp$N_YEAR <- "2015"
crime.categories.tmp <- crime.categories.tmp[,2:4]
crime.cat.yr <- rbind(crime.cat.yr, crime.categories.tmp)

df.tmp[1,1] <- "2015"
df.tmp[1,2:16] <- crime.categories.yearly[,3]
df.line.plot <- rbind(df.line.plot, df.tmp)
df.tmp <- df.tmp[-1,]


#Category wise crime in 2016
crime.data.tmp <- crime.data[crime.data$N_YEAR=="2016",]
crime.categories.tmp <- data.frame(tapply(crime.data.tmp$N_YEAR, crime.data.tmp$OFFENSE_CATEGORY_ID, length))
colnames(crime.categories.tmp) <- c("2016")
crime.categories.tmp$OFFENSE_CATEGORY_ID <- rownames(crime.categories.tmp)
crime.categories.yearly <- merge(crime.categories.yearly, crime.categories.tmp, by.x="OFFENSE_CATEGORY_ID", by.y="OFFENSE_CATEGORY_ID")
crime.categories.tmp$NO_OF_CRIMES <- crime.categories.tmp$`2016`
crime.categories.tmp$N_YEAR <- "2016"
crime.categories.tmp <- crime.categories.tmp[,2:4]
crime.cat.yr <- rbind(crime.cat.yr, crime.categories.tmp)

df.tmp[1,1] <- "2016"
df.tmp[1,2:16] <- crime.categories.yearly[,4]
df.line.plot <- rbind(df.line.plot, df.tmp)
df.tmp <- df.tmp[-1,]

#Category wise crime in 2017
crime.data.tmp <- crime.data[crime.data$N_YEAR=="2017",]
crime.categories.tmp <- data.frame(tapply(crime.data.tmp$N_YEAR, crime.data.tmp$OFFENSE_CATEGORY_ID, length))
colnames(crime.categories.tmp) <- c("2017")
crime.categories.tmp$OFFENSE_CATEGORY_ID <- rownames(crime.categories.tmp)
crime.categories.yearly <- merge(crime.categories.yearly, crime.categories.tmp, by.x="OFFENSE_CATEGORY_ID", by.y="OFFENSE_CATEGORY_ID")
crime.categories.tmp$NO_OF_CRIMES <- crime.categories.tmp$`2017`
crime.categories.tmp$N_YEAR <- "2017"
crime.categories.tmp <- crime.categories.tmp[,2:4]
crime.cat.yr <- rbind(crime.cat.yr, crime.categories.tmp)

df.tmp[1,1] <- "2017"
df.tmp[1,2:16] <- crime.categories.yearly[,5]
df.line.plot <- rbind(df.line.plot, df.tmp)
df.tmp <- df.tmp[-1,]

#Category wise crime in 2018
crime.data.tmp <- crime.data[crime.data$N_YEAR=="2018",]
crime.categories.tmp <- data.frame(tapply(crime.data.tmp$N_YEAR, crime.data.tmp$OFFENSE_CATEGORY_ID, length))
colnames(crime.categories.tmp) <- c("2018")
crime.categories.tmp$OFFENSE_CATEGORY_ID <- rownames(crime.categories.tmp)
crime.categories.yearly <- merge(crime.categories.yearly, crime.categories.tmp, by.x="OFFENSE_CATEGORY_ID", by.y="OFFENSE_CATEGORY_ID")
crime.categories.tmp$NO_OF_CRIMES <- crime.categories.tmp$`2018`
crime.categories.tmp$N_YEAR <- "2018"
crime.categories.tmp <- crime.categories.tmp[,2:4]
crime.cat.yr <- rbind(crime.cat.yr, crime.categories.tmp)

df.tmp[1,1] <- "2018"
df.tmp[1,2:16] <- crime.categories.yearly[,6]
df.line.plot <- rbind(df.line.plot, df.tmp)
df.tmp <- df.tmp[-1,]

#Category wise crime in 2019
crime.data.tmp <- crime.data[crime.data$N_YEAR=="2019",]
crime.categories.tmp <- data.frame(tapply(crime.data.tmp$N_YEAR, crime.data.tmp$OFFENSE_CATEGORY_ID, length))
colnames(crime.categories.tmp) <- c("2019")
crime.categories.tmp$OFFENSE_CATEGORY_ID <- rownames(crime.categories.tmp)
crime.categories.yearly <- merge(crime.categories.yearly, crime.categories.tmp, by.x="OFFENSE_CATEGORY_ID", by.y="OFFENSE_CATEGORY_ID")
crime.categories.tmp$NO_OF_CRIMES <- crime.categories.tmp$`2019`
crime.categories.tmp$N_YEAR <- "2019"
crime.categories.tmp <- crime.categories.tmp[,2:4]
crime.cat.yr <- rbind(crime.cat.yr, crime.categories.tmp)


cat("Change in Total Crime category wise from 2014 to 2018")

Change in Total Crime category wise from 2014 to 2018

df.tmp[1,1] <- "2019"
df.tmp[1,2:16] <- crime.categories.yearly[,7]
df.line.plot <- rbind(df.line.plot, df.tmp)
df.tmp <- df.tmp[-1,]

Step-6:Change in Crime: Visualizations

###############::NO-6::##################
pandoc.header('Step-6: Change in Crime: Visualizations', 1, 'setext')

Step-6: Change in Crime: Visualizations

crime.categories.yearly$TOTAL <- rowSums(crime.categories.yearly[,2:6])
crime.categories.yearly <- crime.categories.yearly[order(crime.categories.yearly$TOTAL, decreasing = TRUE),]
rownames(crime.categories.yearly) <- NULL

pandoc.header('6A: Crime figures over the years', 1, 'setext')

6A: Crime figures over the years

pander(crime.categories.yearly[,c(1,2,3,4,5,6,8)], justify = c("left"))#Category and year wise crime figures

OFFENSE_CATEGORY_ID	2014	2015	2016	2017	2018	TOTAL
traffic-accident	21395	23307	23739	24255	24282	116978
all-other-crimes	15270	15729	16465	17233	17607	82304
public-disorder	9832	9793	9915	9355	8569	47464
larceny	9190	8895	9099	8940	9212	45336
theft-from-motor-vehicle	5013	6262	6571	7341	8116	33303
drug-alcohol	5968	6186	6085	5673	6132	30044
auto-theft	3448	4480	4835	5549	5362	23674
burglary	4463	4877	4681	4444	4098	22563
other-crimes-against-persons	4053	4408	4633	4765	4091	21950
aggravated-assault	1743	2007	2151	2099	2438	10438
white-collar-crime	1036	1312	1189	1267	1275	6079
robbery	1057	1213	1155	1204	1207	5836
sexual-assault	584	717	724	862	828	3715
arson	127	107	95	126	114	569
murder	32	57	54	58	57	258

#Add labels
df2 <- crime.cat.yr[crime.cat.yr$N_YEAR != 2019,]
ggplot(df2, aes(x=df2$N_YEAR, y=df2$NO_OF_CRIMES)) + 
  geom_line(aes(colour=df2$OFFENSE_CATEGORY_ID, group=df2$OFFENSE_CATEGORY_ID)) + 
  geom_point(aes(colour=df2$OFFENSE_CATEGORY_ID), size=3)+  scale_fill_brewer(palette="Set1") + 
    theme_classic() + 
    labs(subtitle="Classic Theme")

cat("\\newpage")

Step-7:Change in Crimes: Category Wise

###############::NO-6::##################
pandoc.header('Step-7: Category wise Change in Crimes', 1, 'setext')

Step-7: Category wise Change in Crimes

#The change in crime of the top 5 categories between 2017 and 2018 is as below:-
off.cat.change <- crime.categories.yearly[,c(1,5,6)]
off.cat.change$OFFENSE_CATEGORY_ID <- as.character(off.cat.change$OFFENSE_CATEGORY_ID)
off.cat.change$N_PERCENT_CHANGE <- round(((off.cat.change$`2018`- off.cat.change$`2017`)/off.cat.change$`2017`)*100,2)
off.cat.change <- off.cat.change[order(off.cat.change$N_PERCENT_CHANGE),]
off.cat.change$STATUS <- ifelse(off.cat.change$N_PERCENT_CHANGE < 0, "Crime Improved", "Crime Worsened")

rownames(off.cat.change) <- NULL
pandoc.header('7A: Five Categories where crime has improved', 1, 'setext')

7A: Five Categories where crime has improved

pander(off.cat.change[1:5,], justify=c("left"))#Top Five categoris where crim has risen least or reduced.

OFFENSE_CATEGORY_ID	2017	2018	N_PERCENT_CHANGE	STATUS
other-crimes-against-persons	4765	4091	-14.14	Crime Improved
arson	126	114	-9.52	Crime Improved
public-disorder	9355	8569	-8.4	Crime Improved
burglary	4444	4098	-7.79	Crime Improved
sexual-assault	862	828	-3.94	Crime Improved

off.cat.change.tmp <- off.cat.change[order(off.cat.change$N_PERCENT_CHANGE, decreasing = TRUE),]
#off.cat.change.tmp <- off.cat.change[order(off.cat.change$N_PERCENT_CHANGE),]
rownames(off.cat.change.tmp) <- NULL
pandoc.header('7B: Five Categories where crime has worsened', 1, 'setext')

7B: Five Categories where crime has worsened

pander(off.cat.change.tmp[1:5,], justify=c("left"), include.rownames = FALSE)#Top Five categoris where crim has risen maximum.

OFFENSE_CATEGORY_ID	2017	2018	N_PERCENT_CHANGE	STATUS
aggravated-assault	2099	2438	16.15	Crime Worsened
theft-from-motor-vehicle	7341	8116	10.56	Crime Worsened
drug-alcohol	5673	6132	8.09	Crime Worsened
larceny	8940	9212	3.04	Crime Worsened
all-other-crimes	17233	17607	2.17	Crime Worsened

rm(off.cat.change.tmp)

off.cat.change <- off.cat.change[order(off.cat.change$N_PERCENT_CHANGE),]
off.cat.change$OFFENSE_CATEGORY_ID <- factor(off.cat.change$OFFENSE_CATEGORY_ID, levels = off.cat.change$OFFENSE_CATEGORY_ID)

ggplot(off.cat.change, aes(x=OFFENSE_CATEGORY_ID, y=N_PERCENT_CHANGE)) + 
  geom_bar(stat = 'identity', aes(fill = STATUS), width = .5) + 
  labs(title="Category wise change in Crime", 
       subtitle= "Ordered by percenct change Above and Below Average", xlab = "Gap from Average", ylab ="aa") +
  coord_flip()

cat("\\newpage")

Step-8:Change in Crimes: Precinct Wise

###############::NO-8::##################
pandoc.header('Step-8: Precinct wise Change in Crime', 1, 'setext')

Step-8: Precinct wise Change in Crime

crime.data.precint <- crime.data[crime.data$N_YEAR=="2017",]
crime.precints.change <- data.frame(tapply(crime.data.precint$DISTRICT_ID, crime.data.precint$PRECINCT_ID, length))
crime.precints.change$PRECINT_ID <- row.names(crime.precints.change)
crime.precints.change$Y2017 <- crime.precints.change$tapply.crime.data.precint.DISTRICT_ID..crime.data.precint.PRECINCT_ID..
crime.precints.change <- crime.precints.change[,2:3]
rownames(crime.precints.change) <- rep(1:nrow(crime.precints.change))

crime.data.precint <- crime.data[crime.data$N_YEAR=="2018",]
crime.precints.tmp <- data.frame(tapply(crime.data.precint$DISTRICT_ID, crime.data.precint$PRECINCT_ID, length))
crime.precints.tmp$Y2018 <- crime.precints.tmp$tapply.crime.data.precint.DISTRICT_ID..crime.data.precint.PRECINCT_ID..

crime.precints.change <- cbind(crime.precints.change, crime.precints.tmp$Y2018)
colnames(crime.precints.change) <- c("PRECINCT_ID", "2017", "2018")

crime.precints.change$N_PERCENT_CHANGE <- round(((crime.precints.change$`2018`- crime.precints.change$`2017`)/crime.precints.change$`2017`)*100,2)
crime.precints.change$STATUS <- ifelse(crime.precints.change$N_PERCENT_CHANGE<0, "Crime Reduced", "Crime Increased")
crime.precints.change <- crime.precints.change[order(crime.precints.change$N_PERCENT_CHANGE),]
crime.precints.change$PRECINCT_ID <- factor(crime.precints.change$PRECINCT_ID, levels = crime.precints.change$PRECINCT_ID)

rownames(crime.precints.change) <- NULL
pandoc.header('8A: Five Precints where crime has improved maximum', 1, 'setext')

8A: Five Precints where crime has improved maximum

pander(crime.precints.change[1:5,])#Pricints with max increase in crime

PRECINCT_ID	2017	2018	N_PERCENT_CHANGE	STATUS
211	2860	2446	-14.48	Crime Reduced
411	2447	2126	-13.12	Crime Reduced
223	3333	2963	-11.1	Crime Reduced
222	1888	1748	-7.42	Crime Reduced
212	2086	1945	-6.76	Crime Reduced

crime.precints.tmp <- crime.precints.change[order(crime.precints$NO_OF_CRIMES, decreasing = TRUE),]# Precinsts with leaste crime
rownames(crime.precints.tmp) <- NULL
pandoc.header('8B: Five Precints where crime has increased the most', 1, 'setext')

8B: Five Precints where crime has increased the most

pander(crime.precints.tmp[1:5,])#Pricints with max crime

PRECINCT_ID	2017	2018	N_PERCENT_CHANGE	STATUS
522	1119	1565	39.86	Crime Increased
523	1000	1157	15.7	Crime Increased
314	1509	1730	14.65	Crime Increased
512	2484	2815	13.33	Crime Increased
511	1932	2184	13.04	Crime Increased

rm(crime.precints.tmp)

##check labels
ggplot(crime.precints.change, aes(x=PRECINCT_ID, y=N_PERCENT_CHANGE, label="Status")) + 
  geom_bar(stat = 'identity', aes(fill = STATUS), width = .5) + 
  labs(title="Precint Wise change in Crime Status", 
       subtitle= "Ordered by Max increase to decrease", xlab = "Gap from Average", ylab ="aa") +
  coord_flip()

cat("\\newpage")

Step-9:Change in Crimes: Month and Year Wise

###############::NO-9::##################
pandoc.header('Step-9: Change in Crimes Month Wise and Year Wise', 1, 'setext')

Step-9: Change in Crimes Month Wise and Year Wise

cat('The Change in Crime month wise is being shown for last five years')

The Change in Crime month wise is being shown for last five years

df.tmp.2 <- data.frame(tapply(crime.data$OFFENSE_CATEGORY_ID, crime.data$N_OCCURRENCE_DATE, length))
df.tmp.2$N_OCCURENCE_DATE <- as.Date(rownames(df.tmp.2))
df.tmp.2$N_NO_OF_INCIDENTS <- df.tmp.2$tapply.crime.data.OFFENSE_CATEGORY_ID..crime.data.N_OCCURRENCE_DATE..
df.tmp.2 <- df.tmp.2[,-1]
df.tmp.2$N_MONTH <- month(df.tmp.2$N_OCCURENCE_DATE)

crime.month.plot <- function(yr.1, yr.2){
  cat('The Change in Crime month wise is being shown for ', yr.1, 'and ', yr.2)
  df.tmp.3 <- df.tmp.2[year(df.tmp.2$N_OCCURENCE_DATE) == yr.1 | year(df.tmp.2$N_OCCURENCE_DATE) == yr.2 ,]
  cr.mt.plot <-  ggplot(df.tmp.3, aes(x=N_OCCURENCE_DATE, y=N_NO_OF_INCIDENTS, colour = factor(N_MONTH)))+geom_point()+
  facet_wrap(~year(N_OCCURENCE_DATE), nrow=1, scales = "free")+
  guides(colour=guide_legend(override.aes = list(size=4))) + 
  theme_classic() + theme(axis.text.x=element_text(angle = 90, hjust = 0))+
    stat_smooth(method = "gam", formula = y~s(x,k=100), colour ="dodgerblue4", se=FALSE, size = 1.2)+
    labs(subtitle="Classic Theme")
  cr.mt.plot
}
pandoc.header('9A: Change in Crimes Month Wise and Year Wise', 1, 'setext')

9A: Change in Crimes Month Wise and Year Wise

crime.month.plot(2014, 2015)

The Change in Crime month wise is being shown for 2014 and 2015

crime.month.plot(2016, 2017)

The Change in Crime month wise is being shown for 2016 and 2017

crime.month.plot(2018, 2014)

The Change in Crime month wise is being shown for 2018 and 2014

#df.tmp <- df[df$N_YEAR==2018 | df$N_YEAR==2017 ,]

crime.monthly.plot <- function(df.rx, yr.vr){

  df.tmp <- df.rx[df.rx$N_YEAR==yr.vr ,]
  
  df.tmp <- df.tmp %>%  
    group_by(N_YEAR,N_MONTH_FIG) %>%
    arrange(N_YEAR, N_MONTH_FIG)
  lbl <- paste("Month-", yr.vr)
  df.tmp <- df.tmp[order(df.tmp$N_MONTH_FIG),]
  df.tmp <- df.tmp[order(df.tmp$N_MONTH_FIG),]
  df.tmp$N_MONTH_FIG <- factor(df.tmp$N_MONTH_FIG, levels = df.tmp$N_MONTH_FIG)
  
  p9 <- ggplot(df.tmp, aes(x=factor(N_MONTH_FIG), y=incidents, fill=rainbow(12)))+ 
    geom_bar(stat='identity') +
    scale_fill_discrete(name = lbl, labels=c('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec') )  
  return(p9)
  
}

pandoc.header('9B: Change in Crimes Month Wise in 2017', 1, 'setext')

9B: Change in Crimes Month Wise in 2017

crime.plot.yr1 <- crime.monthly.plot(df.stp10, 2017)
crime.plot.yr1

pandoc.header('9C: Change in Crimes MonthWise in 2018', 1, 'setext')

9C: Change in Crimes MonthWise in 2018

crime.plot.yr2 <- crime.monthly.plot(df.stp10, 2018)
crime.plot.yr2

Step-10:Offeness and their Total Occurrences since 2014

###############::NO-10::##################
pandoc.header('Step-10: Change in Crimes Month Wise and Year Wise', 1, 'setext')

Step-10: Change in Crimes Month Wise and Year Wise

rf.data <- crime.data[,c(5,7,16,17)]
rf.data$N_OCCURRENCE_TIME <- mdy_hms(rf.data$FIRST_OCCURRENCE_DATE)
rf.data$N_DAY_OF_WEEK <- weekdays(rf.data$N_OCCURRENCE_TIME)

rf.data.2 <- data.frame(unique(crime.data$OFFENSE_TYPE_ID))
colnames(rf.data.2) <- c("OFFENSE_TYPE_ID")
rf.data.2 <- data.frame(rf.data.2[order(rf.data.2$OFFENSE_TYPE_ID),])
colnames(rf.data.2) <- c("OFFENSE_TYPE_ID")
col.names.rf.data2<- colnames(rf.data.2)
day.of.week <- unique(weekdays(crime.data$N_OCCURRENCE_DATE))

crime.number.generator <- function(Rx.Yr){
  crime.tmp <- crime.data[crime.data$N_YEAR==Rx.Yr,]
  rf.data.tmp <- data.frame(tapply(crime.tmp$N_YEAR, crime.tmp$OFFENSE_TYPE_ID, length))
  rf.data.tmp$OFFENSE_TYPE_ID <- rownames(rf.data.tmp)
  rf.data.tmp$NO_OF_INCIDENTS <- rf.data.tmp[,1]
  rownames(rf.data.tmp) <- rep(1:dim(rf.data.tmp$NO_OF_INCIDENTS))
  rf.data.tmp <- rf.data.tmp[,-1]
  rf.data.tmp[which(is.na(rf.data.tmp[,2])),2] <- 0
  rf.data.tmp <- rf.data.tmp[order(rf.data.tmp$OFFENSE_TYPE_ID),]
  return(rf.data.tmp)
}

crime.month.generator <- function(Rx.vr){
  crime.tmp <- crime.data[crime.data$N_MONTH==Rx.vr,]
  rf.data.tmp <- data.frame(tapply(crime.tmp$N_YEAR, crime.tmp$OFFENSE_TYPE_ID, length))
  rf.data.tmp$OFFENSE_TYPE_ID <- rownames(rf.data.tmp)
  rf.data.tmp$NO_OF_INCIDENTS <- rf.data.tmp[,1]
  rownames(rf.data.tmp) <- rep(1:dim(rf.data.tmp$NO_OF_INCIDENTS))
  rf.data.tmp <- rf.data.tmp[,-1]
  rf.data.tmp[which(is.na(rf.data.tmp[,2])),2] <- 0
  rf.data.tmp <- rf.data.tmp[order(rf.data.tmp$OFFENSE_TYPE_ID),]
  return(rf.data.tmp)
}

crime.wkday.generator <- function(Rx.vr){
  crime.tmp <- crime.data[weekdays(crime.data$N_OCCURRENCE_DATE)==Rx.vr,]
  rf.data.tmp <- data.frame(tapply(crime.tmp$N_YEAR, crime.tmp$OFFENSE_TYPE_ID, length))
  rf.data.tmp$OFFENSE_TYPE_ID <- rownames(rf.data.tmp)
  rf.data.tmp$NO_OF_INCIDENTS <- rf.data.tmp[,1]
  rownames(rf.data.tmp) <- rep(1:dim(rf.data.tmp$NO_OF_INCIDENTS))
  rf.data.tmp <- rf.data.tmp[,-1]
  rf.data.tmp[which(is.na(rf.data.tmp[,2])),2] <- 0
  rf.data.tmp <- rf.data.tmp[order(rf.data.tmp$OFFENSE_TYPE_ID),]
  return(rf.data.tmp)
}

yr.ctr <- 2014
while (yr.ctr < 2020) {
 rf.rx <- crime.number.generator(yr.ctr)
 rf.data.2 <- cbind(rf.data.2, rf.rx$NO_OF_INCIDENTS)
 colnames(rf.data.2) <- c(col.names.rf.data2, as.character(yr.ctr))
 col.names.rf.data2<- colnames(rf.data.2) 
 yr.ctr<- yr.ctr+1
}

col.names.rf.data2<- colnames(rf.data.2)

mth.ctr <- 1
while (mth.ctr < 13) {
 rf.rx <- crime.month.generator(month.name[mth.ctr])
 rf.data.2 <- cbind(rf.data.2, rf.rx$NO_OF_INCIDENTS)
 colnames(rf.data.2) <- c(col.names.rf.data2, month.name[mth.ctr])
 col.names.rf.data2<- colnames(rf.data.2) 
 mth.ctr<- mth.ctr+1
}

col.names.rf.data2<- colnames(rf.data.2)

wk.day.ctr<- 1
while (wk.day.ctr < 8) {
 rf.rx <- crime.wkday.generator(day.of.week[wk.day.ctr])
 rf.data.2 <- cbind(rf.data.2, rf.rx$NO_OF_INCIDENTS)
 colnames(rf.data.2) <- c(col.names.rf.data2, day.of.week[wk.day.ctr])
 col.names.rf.data2<- colnames(rf.data.2) 
 wk.day.ctr<- wk.day.ctr+1
}

rf.data.2$NO_OF_INCIDENTS <- rowSums(rf.data.2[,2:7])

crime.plotter <- function(rf.rx, low.l, high.l){
  
  rf.data.plot <- rf.rx[rf.rx$NO_OF_INCIDENTS > low.l & rf.rx$NO_OF_INCIDENTS <= high.l,]
  rf.data.plot <- rf.data.plot[order(rf.data.plot$NO_OF_INCIDENTS),]
  rf.data.plot$OFFENSE_TYPE_ID <- factor(rf.data.plot$OFFENSE_TYPE_ID, levels = rf.data.plot$OFFENSE_TYPE_ID)
  ggplot(rf.data.plot, aes(x=OFFENSE_TYPE_ID, y=rf.data.plot$NO_OF_INCIDENTS))+geom_bar(stat = 'identity')+
      theme(axis.text.x = element_text(angle = 90, hjust=0))+
    ggtitle(paste('Offense Types where Incidents Occured  between', low.l, 'and', high.l))
  
}
pandoc.header('10A: Total no of Crime Occurrances Since 2014', 1, 'setext')

10A: Total no of Crime Occurrances Since 2014

crime.plotter(rf.data.2,10,100)

crime.plotter(rf.data.2,101,500)

crime.plotter(rf.data.2,501,1000)

crime.plotter(rf.data.2,1001,5000)

crime.plotter(rf.data.2,5001,10000)

crime.plotter(rf.data.2,10001,100000)

Step-11: Regression(Separate File)

pandoc.header('Step-11: Regression-Prepared Separately', 1, 'setext')

Step-11: Regression-Prepared Separately

Project-Denver Crime

Team 4-The Crime Busters

June 12, 2019

IST687: PROJECT: DENVER CRIME

Step-1: Data Preparation

Step-2: Visualizations of all crimes

Step-3: Top Five Crime Categories in 2018

3A:Top Five Crime Categories in 2018

Step-4: Top Five Offenses

Step-5: Precinct wise and Neighborhood wise Incidents

5A:Best Five Precints with least Crime

5B:Worst Five Precints with max Crime

5C: Best Five Neighborhoods with least Crime

5D: Worst Five Neighborhoods with max Crime

Step-6: Change in Crime: Visualizations

6A: Crime figures over the years

Step-7: Category wise Change in Crimes

7A: Five Categories where crime has improved

7B: Five Categories where crime has worsened

Step-8: Precinct wise Change in Crime

8A: Five Precints where crime has improved maximum

8B: Five Precints where crime has increased the most

Step-9: Change in Crimes Month Wise and Year Wise

9A: Change in Crimes Month Wise and Year Wise

9B: Change in Crimes Month Wise in 2017

9C: Change in Crimes MonthWise in 2018

Step-10: Change in Crimes Month Wise and Year Wise

10A: Total no of Crime Occurrances Since 2014

Step-11: Regression-Prepared Separately