Chapter 51 Best and the Worst Episodes

Episodes = read_csv("input/simpsons_episodes.csv")

Episodes$imdb_rating = as.numeric(Episodes$imdb_rating)

BestEpisode =  Episodes %>%
  arrange(desc(imdb_rating)) %>%
   select(id,imdb_rating) %>%
   head(1)

WorstEpisode =  Episodes %>%
  arrange((imdb_rating)) %>%
  select(id,imdb_rating) %>%
  head(1)


  getEpisodeSentimentScore <- function(ScriptsCharacters, ID) {
    SCBestEpisode = ScriptsCharacters %>%
      
      #BestEpisode$id
      
      filter(episode_id == ID ) %>% 
      select(id,name,normalized_text)
    
    SCWords <- SCBestEpisode %>%
      unnest_tokens(word, normalized_text) %>%
      filter(name != "NA") %>%
      dplyr::count(name, word, sort = TRUE) %>%
      ungroup()
    
    SCWords_sentiments <- SCWords %>%
      inner_join(get_sentiments("afinn"), by = "word") %>%
      summarize(score = sum(score * n) / sum(n))
    
    return(SCWords_sentiments$score)
  }

51.1 Best Episode

## [1] 0.8309179

is the sentiment score for the Best Episode

51.2 Worst Episode

## [1] 0.4871795

is the sentiment score for the Worst Episode

51.3 Positive and Not So Positive Characters of the Best Episode

SC = ScriptsCharacters %>%
      filter(episode_id == BestEpisode$id ) %>%
      unnest_tokens(word, normalized_text) %>%
      dplyr::count(name, word, sort = TRUE) %>%
      ungroup()

visualize_sentiments(SC)

51.4 Positive and Not So Positive Characters of the Worst Episode

SC = ScriptsCharacters %>%
      filter(episode_id == WorstEpisode$id ) %>%
      unnest_tokens(word, normalized_text) %>%
      dplyr::count(name, word, sort = TRUE) %>%
      ungroup()

visualize_sentiments(SC)

51.5 Postive and Not So Postive Words of Best Episode

The following graph shows the Twenty high positive and the negative words by Best Episode.

positiveWordsBarGraph <- function(SC) {
  contributions <- SC %>%
    unnest_tokens(word, normalized_text) %>%
    dplyr::count(name, word, sort = TRUE) %>%
    ungroup() %>%
    
    inner_join(get_sentiments("afinn"), by = "word") %>%
    group_by(word) %>%
    summarize(occurences = n(),
              contribution = sum(score))
  
  contributions %>%
    top_n(20, abs(contribution)) %>%
    mutate(word = reorder(word, contribution)) %>%
    head(20) %>%
    ggplot(aes(word, contribution, fill = contribution > 0)) +
    geom_col(show.legend = FALSE) +
    coord_flip() + theme_bw()
}

SC = ScriptsCharacters %>%
      
      #BestEpisode$id
      
      filter(episode_id == BestEpisode$id ) %>% 
      select(id,name,normalized_text)

positiveWordsBarGraph(SC)

51.6 Postive and Not So Postive Words of Worst Episode

SC = ScriptsCharacters %>%
      
      #BestEpisode$id
      
      filter(episode_id == WorstEpisode$id ) %>% 
      select(id,name,normalized_text)

positiveWordsBarGraph(SC)