In the second R Video Tutorial I show you how you can scrape tables from websites, change them into whatever format you need and then automatically upload the new table to a WordPress Website.
All of this takes place within an R Markdown file. If you want to see me make a video about R Markdown, please let me know.
The video:
The code part 1: The RMarkdown file
```{r generate table, include = FALSE}
library(dplyr)
library(httr)
library(rvest)
library(data.table)
library(mgsub)
library(knitr)
library(kableExtra)
#GET HTML page and transfer it to tables
rlo <- httr::GET("http://baseballaustria.com/regionalliga-ost/")
rlo <- xml2::read_html(rlo)
#read out "table node"
schedule <- rvest::html_nodes(rlo, css = "table")[2]
#Get the raw schedule
schedule <- rvest::html_table(schedule)
schedule <- data.table::rbindlist(schedule)
#Some data wrangling to get what we need
schedule <- schedule[Home == "Rohrbach Crazy Geese" | Away == "Rohrbach Crazy Geese"]
#Get Date and Time from Date column
schedule[, Date_new := strptime(schedule$Date, format = '%Y-%m-%d %H:%M:%S')]
schedule[, Datum := format(schedule$Date_new, "%d.%m.%Y")]
schedule[, Beginn := format(schedule$Date_new, "%H:%M")]
#Format the location
schedule[Field %like% "Rohrbach", Field := "Rohrbach"]
schedule[Field %like% "Schwechat", Field := "Rannersdorf"]
schedule[Field %like% "Spenadlwiese", Field := "Spenadlwiese - Wien"]
schedule[Field %like% "Schrems", Field := "this really works"]
#select the columns we want
schedule <- select(schedule, -Date_new)
#Format the team names
schedule <- as.data.table(lapply(schedule, function(x) {
mgsub(x, c("Rohrbach Crazy Geese", "Schwechat Blue Bats"), c("Crazy Geese", "Blue Bats"))
}))
#find rows which have results
results <- grepl(" - ", schedule$`Time/Results`)
schedule[results, Ergebnis := `Time/Results`]
schedule[!results, Ergebnis := " - "]
#select the columns we want
schedule <- select(schedule, Datum, Beginn, Heim = Home, Gast = Away, Spielort = Field, Ergebnis)
#Vector to decide which lines should be blue
blue_rows <- which(schedule$Spielort == "Rohrbach")
schedule_format <- kable(schedule, align = c(rep("c", 6))) %>%
kable_styling(bootstrap_options = c("responsive")) %>%
row_spec(blue_rows, color = "#01023C")
""`
**Regular Season**
```{r show the table , echo = FALSE}
schedule_format
```
**Heimspiele sind blau markiert**
The code part 2: Posting to WordPress
options(WordPressLogin = c(YOURWORDPRESSUSERNAME = 'YOURWORDPRESSPASSWORD'),
WordPressURL = 'http://www.YOURWEBSITE.COM/xmlrpc.php')
library(knitr)
#Problem with RWordPress, wouldn't let you install on latest R Version with install.packages("RWordPres").
#Here is a workaround
library(remotes)
install_github("duncantl/XMLRPC")
install_github("duncantl/RWordPress")
library(RWordPress)
###post to wordpress##
#post rlo schedule
knit2wp("C:/Users/skofield/Nextcloud/Firma/Projects/Tutorials/02_webscraping_wordpress/schedule_rlo.Rmd",
title = 'Spielplan RLO 2018',
publish = TRUE, action = "editPost", postid = 1032)