Chapter 21 Feature Engineering : First, Second , Third Set Winners and Losers

We extract features of the first set , second set and third set winners

whowon = function(scores,setnumber)
{
  scores2 = str_split(scores," ")

  
  set = scores2[[1]][setnumber]

  set_score = str_split(set,"-")

  winner_score = as.numeric(set_score[[1]][1])
  loser_score =as.numeric(str_split(set_score[[1]][2],"")[[1]][1])
  
  if( (is.na(winner_score)) ||
      (is.na(loser_score))
  )
  {
    setwinner = ""
  }else
  {
    if(winner_score > loser_score)
    {
      setwinner = "winner"
    }else
    {
      setwinner = "loser"
    }
  }
  
  
  return(setwinner)
  
}

matches$first_set = sapply(matches$score,whowon, setnumber = 1)
matches$second_set = sapply(matches$score,whowon, setnumber = 2)
matches$third_set = sapply(matches$score,whowon, setnumber = 3)

21.1 Percentage of Winners after losing 1st set

Percentage of Winners after losing 1st set

first_set_loser = matches %>% 
  filter(first_set == "loser") 

nrow(first_set_loser)/nrow(matches) *100
## [1] 16.46598

21.2 Most wins after losing the first set

The bar plot shows the winners after losing the first set

first_set_loser %>%
  group_by(winner_name) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(winner_name = reorder(winner_name,Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = winner_name,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor2) +
  geom_text(aes(x = winner_name, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Winner', 
       y = 'Count', 
       title = 'Winner') +
  coord_flip() + 
  theme_bw()

21.3 Percentage of Winners after losing 2nd set

Percentage of Winners after losing 2nd set

second_set_loser = matches %>% 
  filter(second_set == "loser") 

nrow(second_set_loser)/nrow(matches) *100
## [1] 14.64302

21.4 Most wins after losing the second set

The bar plot shows the winners after losing the second set

second_set_loser = matches %>% 
  filter(second_set == "loser") 

second_set_loser %>%
  group_by(winner_name) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(winner_name = reorder(winner_name,Count)) %>%
  head(10) %>%
  
  ggplot(aes(x = winner_name,y = Count)) +
  geom_bar(stat='identity',colour="white", fill = fillColor) +
  geom_text(aes(x = winner_name, y = 1, label = paste0("(",Count,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Winner', 
       y = 'Count', 
       title = 'Winner') +
  coord_flip() + 
  theme_bw()

21.5 Percentage of Winners after losing 1st set in Grand Slams

We calculate the Percentage of winners after losing the 1st set

gs_final_firstset_loser = matches %>%
  filter(tourney_level == "G") %>%
  filter(round == "F") %>%
  filter(first_set == "loser")

gs_final_secondset_loser = matches %>%
  filter(tourney_level == "G") %>%
  filter(round == "F") %>%
  filter(second_set == "loser")

gs_final_thirdset_loser = matches %>%
  filter(tourney_level == "G") %>%
  filter(round == "F") %>%
  filter(third_set == "loser")

gs_final = matches %>%
  filter(tourney_level == "G") %>%
  filter(round == "F")

nrow(gs_final_firstset_loser)/nrow(gs_final) *100
## [1] 12.5

21.6 Percentage of Winners after losing 2nd set in Grand Slams

We calculate the Percentage of winners after losing the 2nd set

nrow(gs_final_secondset_loser)/nrow(gs_final) *100
## [1] 15.27778

21.7 Percentage of Winners after losing 1st set in Australian Open

We calculate the Percentage of winners after losing the 1st set

percentWinnersTourney = function(matches,tournamentName,loser = 1)
{
  gs_final_firstset_loser = matches %>%
  filter(tourney_name == tournamentName) %>%
  filter(round == "F") %>%
  filter(first_set == "loser")

gs_final_secondset_loser = matches %>%
  filter(tourney_name == tournamentName) %>%
  filter(round == "F") %>%
  filter(second_set == "loser")

gs_final = matches %>%
  filter(tourney_name == tournamentName) %>%
  filter(round == "F")

if(loser ==  1)
{
  nrow(gs_final_firstset_loser)/nrow(gs_final) *100
}
else{
  
  nrow(gs_final_secondset_loser)/nrow(gs_final) *100
}
  

}



displayGrandSlamWinnersAfterLosingFirstSet = function(matches,tournamentName)
{
  gs_final_firstset_loser = matches %>%
  filter(tourney_name == tournamentName) %>%
  filter(round == "F") %>%
  filter(first_set == "loser") %>% 
  select(winner_name,loser_name,year,score)
  
  kable(gs_final_firstset_loser,"html") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  scroll_box(width = "800px")

}

percentWinnersTourney(matches,"Australian Open")
## [1] 22.22222
displayGrandSlamWinnersAfterLosingFirstSet(matches,"Australian Open")
winner_name loser_name year score
Jennifer Capriati Martina Hingis 2002 4-6 7-6(7) 6-2
Serena Williams Lindsay Davenport 2005 2-6 6-3 6-0
Kim Clijsters Na Li 2011 3-6 6-3 6-3
Victoria Azarenka Na Li 2013 4-6 6-4 6-3

21.8 Percentage of Winners after losing 2nd set in Australian Open

percentWinnersTourney(matches,"Australian Open",2)
## [1] 22.22222

21.9 Percentage of Winners after losing 1st set in Wimbledon

We calculate the Percentage of winners after losing the 1st set

percentWinnersTourney(matches,"Wimbledon")
## [1] 16.66667
displayGrandSlamWinnersAfterLosingFirstSet(matches,"Wimbledon")
winner_name loser_name year score
Serena Williams Venus Williams 2003 4-6 6-4 6-2
Venus Williams Lindsay Davenport 2005 4-6 7-6(4) 9-7
Amelie Mauresmo Justine Henin 2006 2-6 6-3 6-4

21.10 Percentage of Winners after losing 1st set in French Open

We calculate the Percentage of winners after losing the 1st set

percentWinnersTourney(matches,"French Open")
## [1] 5.882353
displayGrandSlamWinnersAfterLosingFirstSet(matches,"French Open")
winner_name loser_name year score
Jennifer Capriati Kim Clijsters 2001 1-6 6-4 12-10

21.11 Percentage of Winners after losing 1st set in US Open

We calculate the Percentage of winners after losing the 1st set

percentWinnersTourney(matches,"US Open")
## [1] 0
displayGrandSlamWinnersAfterLosingFirstSet(matches,"US Open")
winner_name loser_name year score

It is evident that losing the 1st set in French and US means practically losing the Final. Zero times and only Once that a player has won the US and French respectively after losing the 1st set.