Btibert3
4/24/2016 - 9:12 PM

Parse js files. Had to split into two.

Parse js files. Had to split into two.

options(stringsAsFactors = F)
library(jsonlite)
library(stringr)
library(data.table)


##################### high schools

dfRaw = readLines("hs.json")

#Convert to list
dfSplit <- strsplit(dfRaw, '\\},\\{')

#Setup empty data table
dfClean <- data.table(
  CEEB = character(),
  Name = character(),
  City = character(),
  State = character(),
  Country = character(),
  Lat = integer(),
  Long = integer(),
  Type = character()
)  

for(i in 1:sapply(dfSplit, length)){ #Iterate through # of elements/schools
  cs <- str_replace_all(dfSplit[[1]][i], 'id:', '\\"id:')
  cs <- str_replace_all(cs, ':\\"', '\\":\"')
  cs <- str_replace_all(cs, '\\",', '\\",\\"')
  # Add missing curly bracket(s)
  cs <- paste(ifelse(substr(cs, 1, 1) != '{', '{', ''), cs, ifelse(substr(cs, nchar(cs), nchar(cs)) != '}', '}', ''), sep = '')
  cs <- fromJSON(cs)
  dfClean <- rbind(dfClean, list(cs$Cb, cs$N, cs$Ci, cs$S, cs$C, cs$La, cs$Lo, cs$T))
}

## save to a different dataframe
hs_data = as.data.frame(dfClean)


##################### colleges

dfRaw = readLines("c.json")

#Convert to list
dfSplit <- strsplit(dfRaw, '\\},\\{')

#Setup empty data table
dfClean <- data.table(
  CEEB = character(),
  Name = character(),
  City = character(),
  State = character(),
  Country = character(),
  Lat = integer(),
  Long = integer(),
  Type = character()
)  

for(i in 1:sapply(dfSplit, length)){ #Iterate through # of elements/schools
  cs <- str_replace_all(dfSplit[[1]][i], 'id:', '\\"id:')
  cs <- str_replace_all(cs, ':\\"', '\\":\"')
  cs <- str_replace_all(cs, '\\",', '\\",\\"')
  # Add missing curly bracket(s)
  cs <- paste(ifelse(substr(cs, 1, 1) != '{', '{', ''), cs, ifelse(substr(cs, nchar(cs), nchar(cs)) != '}', '}', ''), sep = '')
  cs <- fromJSON(cs)
  dfClean <- rbind(dfClean, list(cs$Cb, cs$N, cs$Ci, cs$S, cs$C, cs$La, cs$Lo, cs$T))
}

## save to a different dataframe
college_data = as.data.frame(dfClean)