Chapter 11 Most Common Trigrams

A Trigram is a collection of Three words. We examine the most common Trigrams and plot them in a bar plot.

train %>%
  unnest_tokens(trigram, text, token = "ngrams", n = 3) %>%
  separate(trigram, c("word1", "word2","word3"), sep = " ") %>%
  filter(!word1 %in% stop_words$word,
         !word2 %in% stop_words$word,
         !word3 %in% stop_words$word) %>%
  unite(trigramWord, word1, word2, word3,sep = " ") %>%
  group_by(trigramWord) %>%
  tally() %>%
  ungroup() %>%
  arrange(desc(n)) %>%
  mutate(trigramWord = reorder(trigramWord,n)) %>%
  head(10) %>%
  
  ggplot(aes(x = trigramWord,y = n)) +
  geom_bar(stat='identity',colour="white", fill = fillColor2) +
  geom_text(aes(x = trigramWord, y = 1, label = paste0("(",n,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Trigram', 
       y = 'Count', 
       title = 'Trigram and Count') +
  coord_flip() + 
  theme_bw()