Chapter 46 Relationship among words
Til now, we have explored the most important words for a character. Now, we will explore the relationship between words.
count_bigrams <- function(dataset) {
dataset %>%
unnest_tokens(bigram, normalized_text, token = "ngrams", n = 2) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word,
!word2 %in% stop_words$word) %>%
dplyr::count(word1, word2, sort = TRUE)
}
visualize_bigrams <- function(bigrams) {
set.seed(2016)
a <- grid::arrow(type = "closed", length = unit(.15, "inches"))
bigrams %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = n), show.legend = FALSE, arrow = a) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
theme_void()
}
visualize_bigrams_individual <- function(bigrams) {
set.seed(2016)
a <- grid::arrow(type = "closed", length = unit(.15, "inches"))
bigrams %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = n), show.legend = FALSE, arrow = a,end_cap = circle(.07, 'inches')) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
theme_void()
}
SCWords <- SC %>%
count_bigrams()
SCWords %>%
filter(n > 50) %>%
visualize_bigrams()
The above infographic shows the words which follow another word. E.g. the word dont is preceded by many words such as
- care
- forget
- understand
- worry
- mind
- wanna
There are certain words which follows itself such as bye , hee since we have script lines which would be bye bye and hee hee respectively.
46.1 Dont word network graph
individual_words_bigrams <- function(SC, word1Value, word2Value) {
x_Words1 <- SC %>%
count_bigrams() %>%
filter(word1 == word1Value)
x_Words2 <- SC %>%
count_bigrams() %>%
filter(word2 == word2Value)
x_full = rbind(x_Words1,x_Words2)
}
individual_words_bigrams(SC,"dont","dont") %>%
filter(n > 20) %>%
visualize_bigrams_individual()