Chapter 51 Best and the Worst Episodes
Episodes = read_csv("input/simpsons_episodes.csv")
Episodes$imdb_rating = as.numeric(Episodes$imdb_rating)
BestEpisode = Episodes %>%
arrange(desc(imdb_rating)) %>%
select(id,imdb_rating) %>%
head(1)
WorstEpisode = Episodes %>%
arrange((imdb_rating)) %>%
select(id,imdb_rating) %>%
head(1)
getEpisodeSentimentScore <- function(ScriptsCharacters, ID) {
SCBestEpisode = ScriptsCharacters %>%
#BestEpisode$id
filter(episode_id == ID ) %>%
select(id,name,normalized_text)
SCWords <- SCBestEpisode %>%
unnest_tokens(word, normalized_text) %>%
filter(name != "NA") %>%
dplyr::count(name, word, sort = TRUE) %>%
ungroup()
SCWords_sentiments <- SCWords %>%
inner_join(get_sentiments("afinn"), by = "word") %>%
summarize(score = sum(score * n) / sum(n))
return(SCWords_sentiments$score)
}
51.1 Best Episode
## [1] 0.8309179
is the sentiment score for the Best Episode
51.2 Worst Episode
## [1] 0.4871795
is the sentiment score for the Worst Episode
51.3 Positive and Not So Positive Characters of the Best Episode
SC = ScriptsCharacters %>%
filter(episode_id == BestEpisode$id ) %>%
unnest_tokens(word, normalized_text) %>%
dplyr::count(name, word, sort = TRUE) %>%
ungroup()
visualize_sentiments(SC)
51.4 Positive and Not So Positive Characters of the Worst Episode
SC = ScriptsCharacters %>%
filter(episode_id == WorstEpisode$id ) %>%
unnest_tokens(word, normalized_text) %>%
dplyr::count(name, word, sort = TRUE) %>%
ungroup()
visualize_sentiments(SC)
51.5 Postive and Not So Postive Words of Best Episode
The following graph shows the Twenty high positive and the negative words by Best Episode.
positiveWordsBarGraph <- function(SC) {
contributions <- SC %>%
unnest_tokens(word, normalized_text) %>%
dplyr::count(name, word, sort = TRUE) %>%
ungroup() %>%
inner_join(get_sentiments("afinn"), by = "word") %>%
group_by(word) %>%
summarize(occurences = n(),
contribution = sum(score))
contributions %>%
top_n(20, abs(contribution)) %>%
mutate(word = reorder(word, contribution)) %>%
head(20) %>%
ggplot(aes(word, contribution, fill = contribution > 0)) +
geom_col(show.legend = FALSE) +
coord_flip() + theme_bw()
}
SC = ScriptsCharacters %>%
#BestEpisode$id
filter(episode_id == BestEpisode$id ) %>%
select(id,name,normalized_text)
positiveWordsBarGraph(SC)
51.6 Postive and Not So Postive Words of Worst Episode
SC = ScriptsCharacters %>%
#BestEpisode$id
filter(episode_id == WorstEpisode$id ) %>%
select(id,name,normalized_text)
positiveWordsBarGraph(SC)