library(tidyverse)
library(janitor)
library(lubridate)
tx <- read_csv("data-raw/gun_accidents_tx.csv") %>% clean_names()
## Parsed with column specification:
## cols(
## `Incident Date` = col_character(),
## State = col_character(),
## `City Or County` = col_character(),
## Address = col_character(),
## `# Killed` = col_double(),
## `# Injured` = col_double(),
## Operations = col_character()
## )
Adds an id so we can export parts and merge the back to the correct records. Sets city as value of city_or_county, which is cleaned in the next step. Adds county which is filled later.
tx <- tx %>%
mutate(id = row_number(),
county = "",
city = city_or_county
)
tx <- tx %>%
mutate(
city = str_replace(city, ".*county.*", "")
)
There are six records that have a county listed in city_or_county. We populate the county column only when the word phrase " (county)" is in the city_or_county field. Then we remove the the phrase " (county)“. Note the double-escape for parenthesis.
tx <- tx %>%
mutate(
county = case_when(
grepl("county", city_or_county) ~ city_or_county
)
) %>%
mutate(
county = str_remove(county, " \\(county\\)")
)
tx <- tx %>%
mutate(
incident_date = mdy(incident_date)
)
Peek at the names
tx %>% names()
## [1] "incident_date" "state" "city_or_county" "address"
## [5] "number_killed" "number_injured" "operations" "id"
## [9] "county" "city"
Reselect to change the order
tx <- tx %>%
select(
id,
incident_date,
address,
city,
county,
state,
number_killed,
number_injured,
operations,
city_or_county
)
tx %>% write_rds("data-out/01_tx.rds")
tx %>% write_csv("data-out/01_tx.csv")