library(rvest)
library(data.table)
library(dplyr)
lpt <- read_html("https://es.wikipedia.org/wiki/Primeira_Liga_2014/15", encoding = "Windos-1251")
data <- lpt %>%
html_nodes("table") %>%
html_table(fill = T) %>%
.[[11]] # The 11 table was chosen. We can choose the other( from one to eleven)
data$Jugador <- iconv(x = data$Jugador, from = "UTF-8", to = "latin1") # Transform to latin1 enconding Windows OS
data$Equipo <- iconv(x = data$Equipo, from = "UTF-8", to = "latin1")
data
data <- data.table(data)
data[ , Jugador := data$Jugador %>% tolower %>%
chartr( old="áéíóúàèìòùãõçñôê", new="aeiouaeiouaocnoe", x= .) %>%
toupper %>% gsub(pattern=" ", replacement="_") %>% gsub(pattern="__", replacement="") %>%
factor] # Erase the non english characters from Jugador
data[ , Equipo := data$Equipo %>% tolower %>%
chartr( old="áéíóúàèìòùãõçñôê", new="aeiouaeiouaocnoe", x= .) %>%
toupper %>% gsub(pattern=" ", replacement="_") %>% gsub(pattern="__", replacement="") %>%
factor]
data