-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathIndiana_Data_LONG_2023.R
48 lines (36 loc) · 2.38 KB
/
Indiana_Data_LONG_2023.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
###########################################################################################
###
### Script for creating Indiana LONG data set for 2023
###
###########################################################################################
### Load SGP Package:
require(data.table)
### Load base data files
Indiana_Data_LONG_2023 <- fread("Data/Base_Files/ILEARN_2023_Damian_Export_062923.csv", colClasses=rep("character", 7))
Indiana_Demographics_2023 <- fread("Data/Base_Files/ILEARN_2023_demographics.csv", colClasses=rep("character", 6))
### Prepare Data
setnames(Indiana_Data_LONG_2023, c("IDOE_CORPORATION_ID", "IDOE_SCHOOL_ID", "STN", "STUDENT_ID", "GRADE_ID", "ELA_SCALE_SCORE", "MATH_SCALE_SCORE"))
Indiana_Data_LONG_2023[,"STN":=NULL]
Indiana_Data_LONG_2023 <- rbindlist(list(Indiana_Data_LONG_2023[,c(1:5), with=FALSE], Indiana_Data_LONG_2023[,c(1:4,6), with=FALSE]), use.names=FALSE)
setnames(Indiana_Data_LONG_2023, "ELA_SCALE_SCORE", "SCALE_SCORE")
Indiana_Data_LONG_2023[,CONTENT_AREA:=rep(c("ELA", "MATHEMATICS"), each=dim(Indiana_Data_LONG_2023)[1]/2)]
Indiana_Data_LONG_2023[,VALID_CASE:="VALID_CASE"]
Indiana_Data_LONG_2023[,SCHOOL_YEAR:="2023"]
Indiana_Data_LONG_2023[,SCALE_SCORE:=as.numeric(SCALE_SCORE)]
### Prepare Indiana_Demographics_2023
setnames(Indiana_Demographics_2023, c("STUDENT_ID", "ETHNICITY", "SPECIAL_EDUCATION_STATUS", "SOCIO_ECONOMIC_STATUS", "ENGLISH_LANGUAGE_LEARNER_STATUS", "GENDER"))
Indiana_Demographics_2023[,SCHOOL_YEAR:="2023"][,VALID_CASE:="VALID_CASE"]
setkey(Indiana_Demographics_2023, VALID_CASE, SCHOOL_YEAR, STUDENT_ID)
setkey(Indiana_Data_LONG_2023, VALID_CASE, SCHOOL_YEAR, STUDENT_ID)
### Merge in demographics
Indiana_Data_LONG_2023 <- Indiana_Demographics_2023[Indiana_Data_LONG_2023]
### Tidy up column order
setcolorder(Indiana_Data_LONG_2023, c(8, 13, 7, 11, 1, 12, 2, 3, 4, 5, 6, 9, 10))
### Take highest score for duplicates
setkey(Indiana_Data_LONG_2023, VALID_CASE, SCHOOL_YEAR, CONTENT_AREA, GRADE_ID, STUDENT_ID, SCALE_SCORE)
setkey(Indiana_Data_LONG_2023, VALID_CASE, SCHOOL_YEAR, CONTENT_AREA, GRADE_ID, STUDENT_ID)
Indiana_Data_LONG_2023[which(duplicated(Indiana_Data_LONG_2023, by=key(Indiana_Data_LONG_2023)))-1, VALID_CASE:="INVALID_CASE"]
### Setkey final time
setkey(Indiana_Data_LONG_2023, VALID_CASE, SCHOOL_YEAR, CONTENT_AREA, GRADE_ID, STUDENT_ID)
### Save results
save(Indiana_Data_LONG_2023, file="Data/Indiana_Data_LONG_2023.Rdata")