df <-tibble(id =1, text ="Hello world from R")tidytext::unnest_tokens(df, word, text)
Exercise 6: Sentiment with bing lexicon
Difficulty: Advanced.
Show solution
RInteractive R
df <-tibble(id =1:2, text =c("I love R", "This is terrible"))df |> tidytext::unnest_tokens(word, text) |>inner_join(tidytext::get_sentiments("bing"), by ="word") |>count(id, sentiment)
Exercise 7: TF-IDF
Difficulty: Advanced.
Show solution
RInteractive R
df <-tibble(doc =c("d1","d2"), text =c("R is great R is powerful", "Python is great"))df |> tidytext::unnest_tokens(word, text) |>count(doc, word) |> tidytext::bind_tf_idf(word, doc, n)
Exercise 8: Bigrams
Difficulty: Advanced.
Show solution
RInteractive R
df <-tibble(id =1, text ="the cat sat on the mat")tidytext::unnest_tokens(df, bigram, text, token ="ngrams", n =2)
Exercise 9: Word cloud (concept)
Difficulty: Intermediate.
Show solution
RInteractive R
# wordcloud::wordcloud(words, freq, min.freq = 1)
Exercise 10: Document-term matrix
Difficulty: Advanced.
Show solution
RInteractive R
df <-tibble(doc =c("d1","d2"), text =c("hello world", "world R"))df |> tidytext::unnest_tokens(word, text) |>count(doc, word) |> tidyr::pivot_wider(names_from = word, values_from = n, values_fill =0)
df <-tibble(group =c("A","A","B"), text =c("R is great", "R is powerful", "Python is also great"))df |> tidytext::unnest_tokens(word, text) |>count(group, word, sort =TRUE) |>group_by(group) |>slice_head(n =3)
Exercise 16: Document similarity (cosine)
Difficulty: Advanced.
Show solution
RInteractive R
df <-tibble(doc =c("d1","d2"), text =c("R is great", "R is great"))dtm <- df |> tidytext::unnest_tokens(word, text) |>count(doc, word) |> tidyr::pivot_wider(names_from = word, values_from = n, values_fill =0)v1 <-as.numeric(dtm[1,-1]); v2 <-as.numeric(dtm[2,-1])sum(v1*v2) / (sqrt(sum(v1^2)) *sqrt(sum(v2^2)))
Exercise 17: Top tf-idf per doc
Difficulty: Advanced.
Show solution
RInteractive R
df <-tibble(doc =c("d1","d1","d2","d2"), word =c("r","stats","python","stats"), n =c(2,1,3,1))df |> tidytext::bind_tf_idf(word, doc, n) |>group_by(doc) |>slice_max(tf_idf, n =2)
Exercise 18: Filter very rare/common words
Difficulty: Advanced.
Show solution
RInteractive R
df <-tibble(doc =c("d1","d1","d2"), word =c("r","stats","r"), n =c(2,1,3))df |>group_by(word) |>filter(n() >=2)
The quiz is concept-based and respects your time: pass it once and your verifiable certificate is yours to share on LinkedIn, your resume, or your portfolio. Take it when you feel comfortable with the material.