Chapter 19 Similar Authors
We find similar authors using the Hidden Gems reviews. We use the TF-IDF technique as well as the cosine similarity to find similar authors. Below is the list of similar authors along with a Cosine Similarity Score. Higher the score , better is the similarity
19.1 Jonathan Bouchet - Similar Author
cos_sim = function(matrix){
numerator = matrix %*% t(matrix)
A = sqrt(apply(matrix^2, 1, sum))
denumerator = A %*% t(A)
return(numerator / denumerator)
}
Sim = cos_sim(as.matrix(tfidf_trainWords_dtm))
df_sim <- as.data.frame(Sim)
df_sim$DisplayName <- rownames(df_sim)
get_sim_authors <- function(x)
{
x <- df_sim %>%
filter(DisplayName == x)
x_transpose <- as.data.frame(t(as.matrix(x)))
x_transpose$DisplayName <-rownames(x_transpose)
colnames(x_transpose) <- c("SimScore","Author")
x_transpose <- x_transpose %>%
arrange(desc(SimScore))
x_transpose <- x_transpose[-1,]
x_transpose <- x_transpose[-1,]
return(x_transpose)
}
get_sim_authors("Jonathan Bouchet") %>%
head() %>%
gt() %>%
tab_header(
title = "Authors similiar to Jonathan Bouchet")| Authors similiar to Jonathan Bouchet | |
|---|---|
| SimScore | Author |
| 0.3877258 | Ramshankar Yadhunath |
| 0.3285369 | Laura Fink |
| 0.3129579 | Aayush Jain |
| 0.3056249 | mitra mirshafiee |
| 0.2845213 | Zulkhairee Sulaiman |
| 0.2841765 | Janio Martinez Bachmann |
19.2 Vopani - Similar Author
| Authors similiar to Vopani | |
|---|---|
| SimScore | Author |
| 0.3974131 | xhlulu |
| 0.3868042 | Katie |
| 0.3745317 | Nicole Yong ZH |
| 0.3742988 | arieljumba |
| 0.3660715 | Loulou |
| 0.3632678 | Guan Haibin |
19.3 Parul Pandey - Similar Author
get_sim_authors("Parul Pandey") %>%
head() %>%
gt() %>%
tab_header(
title = "Authors similiar to Parul Pandey")| Authors similiar to Parul Pandey | |
|---|---|
| SimScore | Author |
| 0.3268602 | Andre Sionek |
| 0.2614882 | Philippe Julien |
| 0.2594373 | Jaime Becerra Guerrero |
| 0.2288022 | Itokiana RAFIDINARIVO |
| 0.2157352 | Jonathan Bouchet |
| 0.2135042 | Poonam Ligade |
19.4 Bojan Tunguz - Similar Author
get_sim_authors("Bojan Tunguz") %>%
head() %>%
gt() %>%
tab_header(
title = "Authors similiar to Bojan Tunguz")| Authors similiar to Bojan Tunguz | |
|---|---|
| SimScore | Author |
| 0.2932779 | Giba |
| 0.2886751 | Jiwei Liu |
| 0.1678122 | Jakub Jaszczyk |
| 0.1677051 | SRK |
| 0.1538968 | kxx |
| 0.1434438 | Mhamed Jabri |
19.5 Laura Fink - Similar Author
get_sim_authors("Laura Fink") %>%
head() %>%
gt() %>%
tab_header(
title = "Authors similiar to Laura Fink")| Authors similiar to Laura Fink | |
|---|---|
| SimScore | Author |
| 0.4531466 | Ramshankar Yadhunath |
| 0.3285369 | Jonathan Bouchet |
| 0.3079201 | Zulkhairee Sulaiman |
| 0.2771739 | kohei-mu |
| 0.2738613 | Will Isley |
| 0.2735942 | Saba Tavoosi |