ettorerizza
5/21/2017 - 2:55 PM

prend les matrices de plusieurs topic modellings et les reformate

prend les matrices de plusieurs topic modellings et les reformate

library(dplyr)
library(data.table)
library(stringr)

#dossier contenant les fichiers
setwd("C:/Users/ettor/Desktop/Eurovoc Topicmodeling/presidencies")

#on merge les trois
files <- list.files(path = getwd(),
                    pattern = ".txt")
temp <- lapply(files, fread, sep="\t")
data <- rbindlist(temp)

#on reshape le tout
DT.m2 <- data.table::melt(data,
             id = c("V1", "V2"),
             measure = patterns("[13579]$", "[02468]$"), #les colonnes se terminant par un chiffre impairs = key, pairs = value
             value.name = c("topic", "proportion"),
             na.rm=TRUE) %>% 
  filter(!str_detect(proportion, "file")) %>% 
  mutate("presidency" = str_extract(V2, "EU_en_pres\\d"), 
         fichier = str_extract(V2,"\\d{4}.+\\.txt"),
         topic_id = paste0(presidency, "-100_", V1)) %>% 
  select(8, "doc"=1, 6, 7, 4, 5) %>% 
  arrange(topic_id, topic)

#on enregistre le tout
fwrite(DT.m2, "topics_matrice.csv")