Parse js files. Had to split into two.
options(stringsAsFactors = F)
library(jsonlite)
library(stringr)
library(data.table)
##################### high schools
dfRaw = readLines("hs.json")
#Convert to list
dfSplit <- strsplit(dfRaw, '\\},\\{')
#Setup empty data table
dfClean <- data.table(
CEEB = character(),
Name = character(),
City = character(),
State = character(),
Country = character(),
Lat = integer(),
Long = integer(),
Type = character()
)
for(i in 1:sapply(dfSplit, length)){ #Iterate through # of elements/schools
cs <- str_replace_all(dfSplit[[1]][i], 'id:', '\\"id:')
cs <- str_replace_all(cs, ':\\"', '\\":\"')
cs <- str_replace_all(cs, '\\",', '\\",\\"')
# Add missing curly bracket(s)
cs <- paste(ifelse(substr(cs, 1, 1) != '{', '{', ''), cs, ifelse(substr(cs, nchar(cs), nchar(cs)) != '}', '}', ''), sep = '')
cs <- fromJSON(cs)
dfClean <- rbind(dfClean, list(cs$Cb, cs$N, cs$Ci, cs$S, cs$C, cs$La, cs$Lo, cs$T))
}
## save to a different dataframe
hs_data = as.data.frame(dfClean)
##################### colleges
dfRaw = readLines("c.json")
#Convert to list
dfSplit <- strsplit(dfRaw, '\\},\\{')
#Setup empty data table
dfClean <- data.table(
CEEB = character(),
Name = character(),
City = character(),
State = character(),
Country = character(),
Lat = integer(),
Long = integer(),
Type = character()
)
for(i in 1:sapply(dfSplit, length)){ #Iterate through # of elements/schools
cs <- str_replace_all(dfSplit[[1]][i], 'id:', '\\"id:')
cs <- str_replace_all(cs, ':\\"', '\\":\"')
cs <- str_replace_all(cs, '\\",', '\\",\\"')
# Add missing curly bracket(s)
cs <- paste(ifelse(substr(cs, 1, 1) != '{', '{', ''), cs, ifelse(substr(cs, nchar(cs), nchar(cs)) != '}', '}', ''), sep = '')
cs <- fromJSON(cs)
dfClean <- rbind(dfClean, list(cs$Cb, cs$N, cs$Ci, cs$S, cs$C, cs$La, cs$Lo, cs$T))
}
## save to a different dataframe
college_data = as.data.frame(dfClean)