Chapter 33 “Mon Ami Gabi”

The location and category of the most liked business Mon Ami Gabi is shown below

mon_ami_gabi = business %>% filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw") %>%
  select(name,neighborhood,city,state,postal_code,categories)

datatable(head(mon_ami_gabi), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))

33.1 Useful,funny,cool reviews

The following plot describes the number of Useful, Funny and Cool reviews.Most of the reviews are NOT useful , funny or cool.

mon_ami_gabi_reviews = reviews %>%
  filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw")

mon_ami_gabi_reviews %>%
  group_by(useful) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(useful = reorder(useful,Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = useful,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor) +
  geom_text(aes(x = useful, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Useful Reviews', 
       y = 'Count', 
       title = 'Useful Reviews and Count') +
  coord_flip() +
   theme_bw()


mon_ami_gabi_reviews %>%
  group_by(funny) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(funny = reorder(funny,Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = funny,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor2) +
  geom_text(aes(x = funny, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Funny Reviews', 
       y = 'Count', 
       title = 'Funny Reviews and Count') +
  coord_flip() +
   theme_bw()


mon_ami_gabi_reviews %>%
  group_by(cool) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(cool = reorder(cool,Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = cool,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor) +
  geom_text(aes(x = cool, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Cool Reviews', 
       y = 'Count', 
       title = 'Cool Reviews and Count') +
  coord_flip() +
   theme_bw()

33.2 Word Cloud of Mon Ami Gabi

A word cloud is a graphical representation of frequently used words in the text. The height of each word in this picture is an indication of frequency of occurrence of the word in the entire text. The words steak, service, vegas,french,patio,bellagio,delicious, nice are the words which have been used very frequently in the reviews.Note that if we choose a word which is not food related , it is Service and we will see in the subsequent sections of sentiment analysis and topic modelling , why this keyword is important.

createWordCloud = function(train)
{
  train %>%
  unnest_tokens(word, text) %>%
  filter(!word %in% stop_words$word) %>%
  count(word,sort = TRUE) %>%
  ungroup()  %>%
  head(30) %>%
  
  with(wordcloud(word, n, max.words = 30,colors=brewer.pal(8, "Dark2")))
}

createWordCloud(reviews %>%
  filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw"))

33.3 Top Ten most common Words of the business “Mon Ami Gabi”

We examine the Top Ten Most Common words and show them in a bar graph. The words steak, service, vegas,french,patio,bellagio,delicious, nice are the words which have been used very frequently in the reviews.

reviews %>%
  filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw") %>%
  unnest_tokens(word, text) %>%
  filter(!word %in% stop_words$word) %>%
  filter(!word %in% c('food','restaurant')) %>%
  count(word,sort = TRUE) %>%
  ungroup() %>%
  mutate(word = factor(word, levels = rev(unique(word)))) %>%
  head(10) %>%
  
  ggplot(aes(x = word,y = n)) +
  geom_bar(stat='identity',colour="white", fill =fillColor) +
  geom_text(aes(x = word, y = 1, label = paste0("(",n,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Word', y = 'Word Count', 
       title = 'Word Count') +
  coord_flip() + 
  theme_bw()

33.4 Sentiment Analysis - Postive and Not So Postive Words of “Mon Ami Gabi”

We display the Positive and Not So Positive words used by reviewers for the business Mon Ami Gabi.We have used the AFINN sentiment lexicon, which provides numeric positivity scores for each word, and visualize it with a bar plot.

Breathtaking,funnier,fun,fantastic,fabulous,ecstatic,brilliant,awesome,amazing are some of the postive words that we have seen in the reviews of the business.

positiveWordsBarGraph <- function(SC) {
  contributions <- SC %>%
    unnest_tokens(word, text) %>%
    count(word,sort = TRUE) %>%
    ungroup() %>%
    
    inner_join(get_sentiments("afinn"), by = "word") %>%
    group_by(word) %>%
    summarize(occurences = n(),
              contribution = sum(score))
  
  contributions %>%
    top_n(20, abs(contribution)) %>%
    mutate(word = reorder(word, contribution)) %>%
    head(20) %>%
    ggplot(aes(word, contribution, fill = contribution > 0)) +
    geom_col(show.legend = FALSE) +
    coord_flip() + theme_bw()
}

positiveWordsBarGraph(reviews %>%
                        filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw"))

33.5 Calculate Sentiment for the reviews

We calculate the sentiment scores for all the reviews using the AFINN sentiment lexicon. We display the Top Six sentiments here.

calculate_sentiment <- function(review_text)
{
  sentiment_lines  =  review_text %>%
                  filter(textcat(text) == "english") %>%  # considering only English text
                  unnest_tokens(word, text) %>%
                  inner_join(get_sentiments("afinn"), by = "word") %>%
                  group_by(review_id) %>%
                  summarize(sentiment = mean(score),words = n()) %>%
                  ungroup() %>%
                  filter(words >= 5) 

  return(sentiment_lines)
  
}


sentiment_lines = calculate_sentiment(mon_ami_gabi_reviews)

head(sentiment_lines)

33.6 Negative Reviews

We examine the Top Ten most negative reviews. The complaints were about Service. An excerpt of the Service Complaints is provided below

  • Worst service ever. Didn't pay attention to our orders at all so we had to send most of the food back

  • The server ignored us twice when we are talking to him. Threw the dishes instead of placing them on the table

  • The service was mediocre and the food was terrible

  • Food was OK, but service was terrible. Our server never came back to our table to check if we need another drink, water, bread, etc. We had to get somebody else's attention for our need. At the end, they included 18% tipping which is their policy for 5 or more people

display_neg_sentiments <- function(sentiment_lines,review_text)
{
  neg_sentiment_lines = sentiment_lines %>%
  arrange(desc(sentiment))  %>%
  top_n(-10, sentiment) %>%
  inner_join(review_text, by = "review_id") %>%
  select(date,sentiment,text) 
  
datatable(neg_sentiment_lines, style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
}

display_neg_sentiments(sentiment_lines,mon_ami_gabi_reviews)

33.7 Positive Reviews

We examine the Top Ten most positive reviews.

display_pos_sentiments <- function(sentiment_lines,review_text)
{
  pos_sentiment_lines = sentiment_lines %>%
  arrange(desc(sentiment))  %>%
  top_n(10, sentiment) %>%
  inner_join(review_text, by = "review_id") %>%
  select(date,sentiment,text) 
  
datatable(pos_sentiment_lines, style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))

}

display_pos_sentiments(sentiment_lines,mon_ami_gabi_reviews)

33.8 Most Common Bigrams of “Mon Ami Gabi”

A Bigram is a collection of Two words. We examine the most common Bigrams and plot them in a bar plot.

count_bigrams <- function(dataset) {
  dataset %>%
    unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
    separate(bigram, c("word1", "word2"), sep = " ") %>%
    filter(!word1 %in% stop_words$word,
           !word2 %in% stop_words$word) %>%
    count(word1, word2, sort = TRUE)
}


visualize_bigrams <- function(bigrams) {
  set.seed(2016)
  a <- grid::arrow(type = "closed", length = unit(.15, "inches"))
  
  bigrams %>%
    graph_from_data_frame() %>%
    ggraph(layout = "fr") +
    geom_edge_link(aes(edge_alpha = n), show.legend = FALSE, arrow = a) +
    geom_node_point(color = "lightblue", size = 5) +
    geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
    theme_void()
  
}

visualize_bigrams_individual <- function(bigrams) {
  set.seed(2016)
  a <- grid::arrow(type = "closed", length = unit(.15, "inches"))
  
  bigrams %>%
    graph_from_data_frame() %>%
    ggraph(layout = "fr") +
    geom_edge_link(aes(edge_alpha = n), show.legend = FALSE, arrow = a,end_cap = circle(.07, 'inches')) +
    geom_node_point(color = "lightblue", size = 5) +
    geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
    theme_void()
}


reviews %>%
  filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw") %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
  select(bigram,review_id) %>%
  head(10)


reviews %>%
  filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw") %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stop_words$word,
         !word2 %in% stop_words$word) %>%
  filter(!word1 %in% c("mon","ami")) %>%
   filter(!word2 %in% c("gabi")) %>%
  unite(bigramWord, word1, word2, sep = " ") %>%
  group_by(bigramWord) %>%
  tally() %>%
  ungroup() %>%
  arrange(desc(n)) %>%
  mutate(bigramWord = reorder(bigramWord,n)) %>%
  head(10) %>%
  
  ggplot(aes(x = bigramWord,y = n)) +
  geom_bar(stat='identity',colour="white", fill = fillColor2) +
  geom_text(aes(x = bigramWord, y = 1, label = paste0("(",n,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Bigram', 
       y = 'Count', 
       title = 'Bigram and Count') +
  coord_flip() + 
  theme_bw()

33.9 Relationship among words

We explore the different relationship among the various words in Mon Ami Gabi reviews here through a network graph

bigramsMonAmiGabi <- reviews %>%
  filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw") %>%
  count_bigrams()

bigramsMonAmiGabi %>%
  filter(n > 50) %>%
  visualize_bigrams()

33.9.1 Relationship of words with steak

The following network diagram shows the words associated with the word steak

bigramsMonAmiGabi %>%
  filter(word1 == "steak" | word2 == "steak") %>%
  filter(n > 30) %>%
  visualize_bigrams()

33.9.2 Relationship of words with french

The following network diagram shows the words associated with the word french

bigramsMonAmiGabi %>%
  filter(word1 == "french" | word2 == "french" ) %>%
  filter(n > 30) %>%
  visualize_bigrams()