forked from half-adder/Cod-Squad
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodeling_pt_1.R
107 lines (73 loc) · 2.92 KB
/
modeling_pt_1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
## Libraries
#install.packages("pscl")
library(pscl)
## Read in data
dat <- read.csv(file.choose())
dat <- na.omit(dat)
newdat <- left_join(dat, seasurfacetemp,
by = c("left" = "lon",
"right" = "lon",
"top" = "lat",
"bottom" = "lat"))
sediment$count <- rowSums(sediment[, 2:9])
sediment$sediement_NA_flag <- ifelse(is.na(sediment$count), 1, 0)
write.csv(sediment, "sediment_grid_with_flag.csv")
master$grid_id <- as.factor(master$grid_id)
## make really truly final dat
final <- read.csv(file.choose())
buoy <- read.csv(file.choose())
final2 <- left_join(final, buoy, by = c("grid_id",
"month"))
final3 <- left_join(final2, sediment %>% dplyr::select(grid_id, sediement_NA_flag))
final4 <- left_join(final3, newMaster %>% dplyr::select(grid_id, temp))
write.csv(final4, "MASTER.csv")
master2 <- left_join(master, mydf)
## for testing/training
dat_17 <- subset(master, year == 2017)
dat_18 <- subset(master, year == 2018)
## build out formula
formula <- as.formula(number_of_fish ~ as.factor(month) +
temp + GRAVEL + SAND + CLAY + MUD + SILT + ROCK +
BEDROCK + sediement_NA_flag + grid_id +
wind_direction_degrees + atmospheric_pressure_mb +
wave_period_s + wave_height_m + wind_gust_m.s +
wind_direction_degrees + visibility_m +
temp + air_temperatures_deg_f )
formula2 <- as.formula(number_of_fish ~ as.factor(month) +
temp + sediement_NA_flag + as.factor(grid_id) +
wind_direction_degrees + atmospheric_pressure_mb +
wave_period_s + wave_height_m + wind_gust_m.s +
wind_direction_degrees + visibility_m +
temp + air_temperatures_deg_f)
formula_single_year <- as.formula(number_of_fish ~ as.factor(month) +
GRAVEL + SAND + CLAY + MUD + SILT + ROCK + SEDIMENT +
BEDROCK + sediement_NA_flag + as.factor(grid_id))
## Poisson
pois1 <- glm(formula,
data = master3,
family = "poisson")
### significant factors ###
# atmospheric pressure
# sediment
# sand
# air temperature
# wind gust
# air temp degrees
df$pred <- predict(pois1, master3[, 15:17])
plot(master3, df$count)
lines(df$days, df$pred,type='l',col='blue')
pois2 <- glm(formula2,
data = master3,
family = "poisson")
## Hurdle
h1 <- hurdle(as.formula(number_of_fish ~ .),
data = master,
dist = "negbin")
## Zero-inflated poisson
zip1 <- zeroinfl(,
data = master,
dist = "poisson")
## Zero-inflated negative binomial
zinb1 <- zeroinfl(,
data = master,
dist = "negbin")