install.packages(c("tm","wordcloud","SnowballC"))
pacman::p_load(tm,wordcloud,SnowballC,tidyverse,pacman,openxlsx)
data<-read.xlsx("linkedin.xlsx")
View(data)
#Creating corpus from dataset####
data<-VCorpus(VectorSource(data$Review))#Creates a volatile corpus, i.e., a temporary collection of documents containing (natural language) text that have a structure employed by tm package
class(data)
inspect(data)#Examines and modifies the source code of function or method
inspect(data[[88]])#Inspects 88th row of data
#Cleaning the data####
data<-tm_map(data, content_transformer(tolower))#All data converted to lowercase
data<-tm_map(data, removePunctuation)#Removes all punctuation marks
stopwords()
data<-tm_map(data, removeWords, stopwords())#Removes all words from 'data' based on stopwords, i.e., words that don't have much meaning in a sentence
extra<-c("dr","swapan","purkait","sir")
data<-tm_map(data, removeWords, extra)#Removes all words from 'data' based on vector 'extra'
data<-tm_map(data, stripWhitespace)#Removes all unnecessary spaces between words
#Matrix creation####
feedback<-DocumentTermMatrix(data)#Creates a document term matrix, i.e., a mathematical matrix that describes the frequency of terms that occur in a collection of documents
inspect(feedback)
class(feedback)#ISO-International organisation for standardisation
View(feedback)
#Frequency dataframe####
word_frequency<-sort(colSums(as.matrix(feedback)),decreasing = TRUE)#Sorts all words in the document based on frequency in descending order
word_frequency
class(word_frequency)
View(word_frequency)
df_frequency<-data.frame(word = names(word_frequency),frequency = word_frequency)#Creates a data frame with words and their frequencies
View(df_frequency)
#Plotting words####
png("cloud.png")
wordcloud(df_frequency$word,df_frequency$frequency)#Creates a collection/collage of words with their sizes based on their respective frequencies
dev.off()
file.show("cloud.png")
png("cloud1.png")
wordcloud(df_frequency$word,df_frequency$frequency,max.words = 20,min.freq = 10)#Creates a collage with max 20 words and all of them must have frequency 20 or above
dev.off()
file.show("cloud1.png")
#With colors####
color_pal<-brewer.pal(8,"Dark2")#Makes color palettes from ColorBrewer available as R palettes
png("cloud2.png")
wordcloud(df_frequency$word,df_frequency$frequency,max.words = 20,min.freq = 10,colors = color_pal)#Gives color to previous collage
dev.off()
file.show("cloud2.png")
#Font####
png("cloud3.png")
wordcloud(df_frequency$word,df_frequency$frequency,max.words = 20,min.freq = 10,colors = color_pal,vfont = c("script","plain"))#Gives script font to above colored collage without any other font styles like italic, bold,etc
dev.off()
file.show("cloud3.png")
png("cloud4.png")
wordcloud(df_frequency$word,df_frequency$frequency,max.words = 20,min.freq = 10,colors = color_pal,vfont = c("serif","plain"))
dev.off()
file.show("cloud4.png")