Chapter 11 Most Common Trigrams
A Trigram is a collection of Three words. We examine the most common Trigrams and plot them in a bar plot.
train %>%
unnest_tokens(trigram, text, token = "ngrams", n = 3) %>%
separate(trigram, c("word1", "word2","word3"), sep = " ") %>%
filter(!word1 %in% stop_words$word,
!word2 %in% stop_words$word,
!word3 %in% stop_words$word) %>%
unite(trigramWord, word1, word2, word3,sep = " ") %>%
group_by(trigramWord) %>%
tally() %>%
ungroup() %>%
arrange(desc(n)) %>%
mutate(trigramWord = reorder(trigramWord,n)) %>%
head(10) %>%
ggplot(aes(x = trigramWord,y = n)) +
geom_bar(stat='identity',colour="white", fill = fillColor2) +
geom_text(aes(x = trigramWord, y = 1, label = paste0("(",n,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Trigram',
y = 'Count',
title = 'Trigram and Count') +
coord_flip() +
theme_bw()