Chapter 56 Top Ten most Common Words
createBarPlotCommonWords = function(train,title)
{
train %>%
unnest_tokens(word, text) %>%
filter(!word %in% stop_words$word) %>%
filter(!word %in% movie_stopwords) %>%
dplyr::count(word,sort = TRUE) %>%
ungroup() %>%
mutate(word = factor(word, levels = rev(unique(word)))) %>%
head(10) %>%
ggplot(aes(x = word,y = n)) +
geom_bar(stat='identity',colour="white", fill =fillColor) +
geom_text(aes(x = word, y = 1, label = paste0("(",n,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Word', y = 'Word Count',
title = title) +
coord_flip() +
theme_bw()
}
createBarPlotCommonWords(train,'Top 10 most Common Words')
56.1 WordCloud of the Common Words
A word cloud is a graphical representation of frequently used words in the text. The height of each word in this picture is an indication of frequency of occurrence of the word in the entire text.
createWordCloud = function(train)
{
train %>%
unnest_tokens(word, text) %>%
filter(!word %in% stop_words$word) %>%
filter(!word %in% movie_stopwords) %>%
dplyr::count(word,sort = TRUE) %>%
ungroup() %>%
head(30) %>%
with(wordcloud(word, n, max.words = 30,colors=brewer.pal(8, "Dark2")))
}
createWordCloud(train)
56.2 Word Cloud of Negative Sentiments
#0 = Negative
createWordCloud(train %>% filter(Sentiment == 0))
56.3 Word Cloud of somewhat negative Sentiments
#1 = somewhat negative
createWordCloud(train %>% filter(Sentiment == 1))
56.4 Word Cloud of neutral Sentiments
#2 = neutral
createWordCloud(train %>% filter(Sentiment == 2))
56.5 Word Cloud of somewhat positive Sentiments
#3 = somewhat positive
createWordCloud(train %>% filter(Sentiment == 3))
56.6 Word Cloud of positive Sentiments
#4 = positive
createWordCloud(train %>% filter(Sentiment == 4))