20BDS0008_VL2022230504168_AST04

pdf

School

Vellore Institute of Technology *

*We aren’t endorsed by this school

Course

Subject

Information Systems

Date

Nov 24, 2024

Type

pdf

Pages

Uploaded by GrandBeaverPerson179

Digital Assessment 4 Name:Rachit Biswas Reg no:20BDS0008 1. Create a Text Analytics with Word Cloud using Shakespeare dataset. Code: #TEXT ALANYTICS ON SHAKESPEARE #1. load shakespeare.rda into r environment load("E:\\SEM 6\\DATA VIZ\\LAB\\DATASET\\shakespeare.rda") View(shakespeare) #2. Pipe the shakespeare data frame to the next line # Use count to find out how many titles/types there are shakespeare %>% count(title, type) #3. Load tidytext/ tidyverse library(tidytext) library(dplyr) library(tidyverse) #4. create an object tidy_shakespeare # Group by the titles of the plays # Define a new column line number # Transform the non-tidy text data to tidy text data tidy_shakespeare <- shakespeare %>% group_by(title) %>% mutate(linenumber = row_number()) %>% unnest_tokens(word, text) %>% ungroup() View(tidy_shakespeare) #5. Pipe the tidy Shakespeare data frame to the next line # Use count to find out how many times each word is used tidy_shakespeare %>% count(word, sort = TRUE) #6. Sentiment analysis of tidy_shakespeare assign to object shakespeare_sentiment # Implement sentiment analysis with the "bing" lexicon shakespeare_sentiment <- tidy_shakespeare %>%

inner_join(get_sentiments("bing"),by="word") #7. shakespeare_sentiment # Find how many positive/negative words each play has shakespeare_sentiment %>% count(title, sentiment) #8. Tragedy or comedy from tidy_shakespeare assign to sentiment_counts # Implement sentiment analysis using the "bing" lexicon # Count the number of words by title, type, and sentiment sentiment_counts <- tidy_shakespeare %>% inner_join(get_sentiments("bing")) %>% count(word, title, sentiment) #9. from sentiment_counts # Group by the titles of the plays # Find the total number of words in each play # Calculate the number of words divided by the total # Filter the results for only negative sentiment then arrange percentages in ASC order sentiment_counts %>% group_by(title) %>% mutate(total = sum(n), percent = n / total) %>% filter(sentiment == "negative") %>% arrange(percent) #10 Most common positive and negative words and assign to word_could # Implement sentiment analysis using the "bing" lexicon # Count by word and sentiment word_count <- tidy_shakespeare %>% inner_join(get_sentiments("bing"), by = "word") %>% count(word, sentiment, sort = TRUE) word_count1 <- select(word_count,1,3) word_count1 colnames(word_count1) <- c("word","freq") #install.packages("wordcloud2") library(wordcloud2) wordcloud2(data=word_count1, size=1.4, color='random-dark') #11. extract the top 10 words from word_counts and assign to top_words

# Group by sentiment # Take the top 10 for each sentiment and ungroup it # Make word a factor in order of n top_words <- word_count %>% group_by(sentiment) %>% top_n(10, n) %>% ungroup() %>% mutate(word = factor(word, levels = rev(unique(word)))) View(top_words) #12 Use aes() to put words on the x-axis and n on the y-axis # Make a bar chart with geom_col() # facet_wrap for sentiments and apply scales as free #Move x to y and y to x library(ggplot2) ggplot(top_words, aes(x = n, y = word, fill = sentiment)) + geom_col() +ggtitle("20BDS0008") facet_wrap(vars(sentiment)) #install.packages("wordcloud") library(wordcloud) top_words set.seed(100) top_words <- word_count %>% group_by(sentiment) %>% top_n(50, n) %>% ungroup() %>% mutate(word = factor(word, levels = rev(unique(word)))) wordcloud(words = top_words$word, freq = top_words$n, random.order = TRUE, colors = brewer.pal(8,"Dark2")) wordcloud(words = top_words$word, freq = top_words$n, random.order = FALSE, colors = brewer.pal(12,"Paired"))]

Your preview ends here