-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01.tidy_grocery.R
56 lines (36 loc) · 1.44 KB
/
01.tidy_grocery.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
library(tidyverse)
library(dplyr)
# Preparation
path <- "C:/Users/witim/final-project-jieyi_hanzhe_jaeho"
df_2020 <- read_csv(file.path(path, "0.raw_data_grocery_store_2020.csv"))
df_2013 <- read_csv(file.path(path, "0.raw_data_grocery_store_2013.csv"))
df_2011 <- read_csv(file.path(path, "0.raw_data_grocery_store_2011.csv"))
names(df_2020) <- tolower(names(df_2020))
names(df_2013) <- tolower(names(df_2013))
names(df_2011) <- tolower(names(df_2011))
# Data Wrangling
fn.zip_store <- function (df) {
names(df) <- tolower(names(df))
df %>%
rename("zip_code" = contains("zip")) %>%
select(zip_code) %>%
separate(col = zip_code,
into = c("zip_code", NA),
sep = (5)) %>%
count(zip_code)
}
df_grocery_2020 <- fn.zip_store(df_2020)
df_grocery_2013 <- fn.zip_store(df_2013)
df_grocery_2011 <- fn.zip_store(df_2011)
df_grocery_list <- list(df_grocery_2020, df_grocery_2013, df_grocery_2011)
df_grocery <- df_grocery_list %>%
reduce(full_join, by = "zip_code")
colnames(df_grocery)[2:4] <- c("2020", "2013", "2011")
df_grocery <- df_grocery %>%
pivot_longer(cols = 2:4,
names_to = "year",
values_to = "tot_grocery") %>%
mutate(tot_grocery = coalesce(tot_grocery, 0))
write.csv(df_grocery, "02.zip_grocery.csv", row.names = FALSE)
# '[note]' is for group members. Remove those notes before submission.
# [reference]: https://www.statology.org/merge-multiple-data-frames-in-r/