Chapter 56 Top Ten most Common Words

createBarPlotCommonWords = function(train,title)
{
  train %>%
  unnest_tokens(word, text) %>%
  filter(!word %in% stop_words$word) %>%
  filter(!word %in% movie_stopwords) %>% 
  dplyr::count(word,sort = TRUE) %>%
  ungroup() %>%
  mutate(word = factor(word, levels = rev(unique(word)))) %>%
  head(10) %>%
  
  ggplot(aes(x = word,y = n)) +
  geom_bar(stat='identity',colour="white", fill =fillColor) +
  geom_text(aes(x = word, y = 1, label = paste0("(",n,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Word', y = 'Word Count', 
       title = title) +
  coord_flip() + 
  theme_bw()

}

createBarPlotCommonWords(train,'Top 10 most Common Words')

56.1 WordCloud of the Common Words

A word cloud is a graphical representation of frequently used words in the text. The height of each word in this picture is an indication of frequency of occurrence of the word in the entire text.

createWordCloud = function(train)
{
  train %>%
  unnest_tokens(word, text) %>%
  filter(!word %in% stop_words$word) %>%
  filter(!word %in% movie_stopwords) %>% 
  dplyr::count(word,sort = TRUE) %>%
  ungroup()  %>%
  head(30) %>%
  
  with(wordcloud(word, n, max.words = 30,colors=brewer.pal(8, "Dark2")))
}

createWordCloud(train)

56.2 Word Cloud of Negative Sentiments

#0 = Negative 

createWordCloud(train %>% filter(Sentiment == 0))

56.3 Word Cloud of somewhat negative Sentiments

#1 = somewhat negative 

createWordCloud(train %>% filter(Sentiment == 1))

56.4 Word Cloud of neutral Sentiments

#2 = neutral 

createWordCloud(train %>% filter(Sentiment == 2))

56.5 Word Cloud of somewhat positive Sentiments

#3 = somewhat positive 

createWordCloud(train %>% filter(Sentiment == 3))

56.6 Word Cloud of positive Sentiments

#4 = positive 

createWordCloud(train %>% filter(Sentiment == 4))