Day #12

install.packages(c("tm","wordcloud","SnowballC"))

pacman::p_load(tm,wordcloud,SnowballC,tidyverse,pacman,openxlsx)

data<-read.xlsx("linkedin.xlsx")

View(data)

#Creating corpus from dataset####

data<-VCorpus(VectorSource(data$Review))#Creates a volatile corpus, i.e., a temporary collection of documents containing (natural language) text that have a structure employed by tm package

class(data)

inspect(data)#Examines and modifies the source code of function or method

inspect(data[[88]])#Inspects 88th row of data

#Cleaning the data####

data<-tm_map(data, content_transformer(tolower))#All data converted to lowercase

data<-tm_map(data, removePunctuation)#Removes all punctuation marks

stopwords()

data<-tm_map(data, removeWords, stopwords())#Removes all words from 'data' based on stopwords, i.e., words that don't have much meaning in a sentence

extra<-c("dr","swapan","purkait","sir")

data<-tm_map(data, removeWords, extra)#Removes all words from 'data' based on vector 'extra'

data<-tm_map(data, stripWhitespace)#Removes all unnecessary spaces between words

#Matrix creation####

feedback<-DocumentTermMatrix(data)#Creates a document term matrix, i.e., a mathematical matrix that describes the frequency of terms that occur in a collection of documents

inspect(feedback)

class(feedback)#ISO-International organisation for standardisation

View(feedback)

#Frequency dataframe####

word_frequency<-sort(colSums(as.matrix(feedback)),decreasing = TRUE)#Sorts all words in the document based on frequency in descending order

word_frequency

class(word_frequency)

View(word_frequency)

df_frequency<-data.frame(word = names(word_frequency),frequency = word_frequency)#Creates a data frame with words and their frequencies

View(df_frequency)

#Plotting words####

png("cloud.png")

wordcloud(df_frequency$word,df_frequency$frequency)#Creates a collection/collage of words with their sizes based on their respective frequencies

dev.off()

file.show("cloud.png")

png("cloud1.png")

wordcloud(df_frequency$word,df_frequency$frequency,max.words = 20,min.freq = 10)#Creates a collage with max 20 words and all of them must have frequency 20 or above

dev.off()

file.show("cloud1.png")

#With colors####

color_pal<-brewer.pal(8,"Dark2")#Makes color palettes from ColorBrewer available as R palettes

png("cloud2.png")

wordcloud(df_frequency$word,df_frequency$frequency,max.words = 20,min.freq = 10,colors = color_pal)#Gives color to previous collage

dev.off()

file.show("cloud2.png")

#Font####

png("cloud3.png")

wordcloud(df_frequency$word,df_frequency$frequency,max.words = 20,min.freq = 10,colors = color_pal,vfont = c("script","plain"))#Gives script font to above colored collage without any other font styles like italic, bold,etc

dev.off()

file.show("cloud3.png")

png("cloud4.png")

wordcloud(df_frequency$word,df_frequency$frequency,max.words = 20,min.freq = 10,colors = color_pal,vfont = c("serif","plain"))

dev.off()

file.show("cloud4.png")

Page updated

Google Sites

Report abuse