install.packages("rvest") #if not installed - its time to install rvest
pacman::p_load(tidyverse,pacman,rvest,openxlsx) # load all packages
url<-"https://en.wikipedia.org/wiki/Rowing_at_the_Summer_Olympics"
wdata<- read_html(url) # read the website data
class(wdata)
str(wdata)
wdata
## some url examples
url<-"https://www.worldometers.info/coronavirus/"
html_table(wdata) # will show all table in the wdata
html_table(wdata)[[1]] # will show the table 1 of the wdata
data <- html_table(wdata, header = TRUE, fill = TRUE)[[3]]
View(data)
html_nodes(wdata, "table") # will show number of tables
wnodes<-html_nodes(wdata, "table") # we can store all table information
str(wnodes)
class(wnodes)
wnodes
# some extra code - to understand rvest
html_attrs(wdata) # get all attributes
html_text(wdata) # works - shows all text
write.xlsx(data,"rowing.xlsx") # write in an excel file
file.show("rowing.xlsx") # view the excel file
#example from bits website
url<-"https://www.bits-pilani.ac.in/goa/listOfRecruiters" # data from bits website
data <- html_table(wdata, header = TRUE, fill = TRUE)[[1]]
View(data)