rongmu
12/2/2015 - 3:34 PM

LC-2015.11.27-ttr_gi.R

library(dplyr)

tokens <- read.csv('data/tokens.csv')

result <- tokens %>%
    group_by(native, id) %>%
    summarise(
        n_token = length(token),
        n_type  = length(unique(token)),
        ttr     = n_type / n_token,
        gi      = n_type / sqrt(n_token)
    ) 


# save the result to csv
write.csv(result, file = 'out/2015.11.27-ttr_gi.csv', row.names = FALSE)

# print out the result
result %>%
    mutate_each(
        funs(round(., 3)),
        ttr:gi
    ) %>%
    as.data.frame() %>%
    print()