Chapter 6 Hidden Gem and Competiton Notebook
More than 85% of the Hidden Gems are Non Competition Notebooks . 95% Confidence Interval for a Hidden Gem being a NOT a Competition notebook is between 80% and 88%
comp_notebook = kvcs %>%
filter(!is.na(SourceCompetitionId))
TotalNoOfRows = nrow(kvcs)
comp_notebooks <- c(
(TotalNoOfRows - nrow(comp_notebook))/TotalNoOfRows *100,
nrow(comp_notebook)/TotalNoOfRows *100 )
notebook_type <- c("NotCompetitionNotebook","CompetitionNotebook")
df_comp_notebooks <-data.frame(notebook_type, comp_notebooks)
comp_notebooks_colors <- c("NotCompetitionNotebook" = fillColor,
"CompetitionNotebook" = fillColor2)
df_comp_notebooks %>%
arrange(desc(comp_notebooks)) %>%
ggplot(aes(x = notebook_type,y = comp_notebooks, fill = (notebook_type) )) +
geom_bar(stat='identity',colour="white") +
geom_label(aes(label = round(comp_notebooks,digits = 2))) +
labs(x = 'Notebook Type',
y = 'Percentage',
title = 'Notebook Type and Percentage') +
theme_fivethirtyeight() +
theme(legend.position = "none")
6.1 95% Confidence Interval for a Hidden Gem being a NOT A Competition notebook
##
## 1-sample proportions test without continuity correction
##
## data: nrow(kvcs %>% filter(is.na(SourceCompetitionId))) out of nrow(kvcs), null probability 0.5
## X-squared = 147, df = 1, p-value < 0.00000000000000022
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.8051824 0.8859675
## sample estimates:
## p
## 0.85