Chapter 6 Hidden Gem and Competiton Notebook

More than 85% of the Hidden Gems are Non Competition Notebooks . 95% Confidence Interval for a Hidden Gem being a NOT a Competition notebook is between 80% and 88%

comp_notebook = kvcs %>%
  filter(!is.na(SourceCompetitionId))

TotalNoOfRows = nrow(kvcs)

comp_notebooks <- c(
  (TotalNoOfRows - nrow(comp_notebook))/TotalNoOfRows *100,
   nrow(comp_notebook)/TotalNoOfRows *100 )

notebook_type <- c("NotCompetitionNotebook","CompetitionNotebook")

df_comp_notebooks <-data.frame(notebook_type, comp_notebooks)


comp_notebooks_colors <- c("NotCompetitionNotebook" = fillColor, 
          "CompetitionNotebook" = fillColor2)


 df_comp_notebooks %>%
  arrange(desc(comp_notebooks))  %>%                                                  
  ggplot(aes(x = notebook_type,y = comp_notebooks, fill = (notebook_type) )) +
  geom_bar(stat='identity',colour="white") +
  geom_label(aes(label = round(comp_notebooks,digits = 2))) +
  labs(x = 'Notebook Type', 
       y = 'Percentage', 
       title = 'Notebook Type and Percentage') +
  theme_fivethirtyeight() +
  theme(legend.position = "none") 

6.1 95% Confidence Interval for a Hidden Gem being a NOT A Competition notebook

prop.test(nrow(kvcs %>% filter(is.na(SourceCompetitionId))),nrow(kvcs),correct=FALSE)
## 
##  1-sample proportions test without continuity correction
## 
## data:  nrow(kvcs %>% filter(is.na(SourceCompetitionId))) out of nrow(kvcs), null probability 0.5
## X-squared = 147, df = 1, p-value < 0.00000000000000022
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.8051824 0.8859675
## sample estimates:
##    p 
## 0.85