As part of the Hacking Health Covid-19, the SKEMA Global Lab in AI provided to SKEMA’ students a fully developped data science environment to realize their project. See [here].
For this specific module, this team used these following courses:
Using Twitter’s Open API for sentiment analysis to measure how Donald Trump’s speeches impact the audiences’ emotions regarding the pandemic at hand.
Millions of viewers sit down to watch the American president’s speech regarding the challenging times we are facing regarding the coronavirus. What does the polarity of emotions look like in relation to the pandemic? Will the data collected be sufficient and valuable enough for the government take into consideration when preparing Trump’s upcoming speeches in order to make them more effective in generating more positive emotions regarding the crisis?
library("twitteR")
library("ROAuth")
library("NLP")
library("syuzhet")
library("tm")
library("SnowballC")
library("stringi")
library("topicmodels")
library("wordcloud")
library("ggplot2")
library("dplyr")
library("tidyr")
library("tidytext")
library("tidyverse")
library("tokenizers")
tweets<-read.csv2(file = "./data/trumptweets.csv", header=T, sep=",")
tweets_text<-tweets$content
tweets_text<-tweets_text[1:1000]
head(tweets_text)
[1] "Be sure to tune in and watch Donald Trump on Late Night with David Letterman as he presents the Top Ten List tonight!"
[2] "Donald Trump will be appearing on The View tomorrow morning to discuss Celebrity Apprentice and his new book Think Like A Champion!"
[3] "Donald Trump reads Top Ten Financial Tips on Late Show with David Letterman: http://tinyurl.com/ooafwn - Very funny!"
[4] "New Blog Post: Celebrity Apprentice Finale and Lessons Learned Along the Way: http://tinyurl.com/qlux5e"
[5] "\"My persona will never be that of a wallflower - I’d rather build walls than cling to them\" --Donald J. Trump"
[6] "Miss USA Tara Conner will not be fired - \"I've always been a believer in second chances.\" says Donald Trump"
#convert all text to lower case
tweets_text<- tolower(tweets_text)
# Replace @UserName
tweets_text <- gsub("@\\w+", "",tweets_text)
# Remove punctuation
tweets_text <- gsub("[[:punct:]]", "", tweets_text)
# Remove links
tweets_text <- gsub("http\\w+", "", tweets_text)
# Remove tabs
tweets_text <- gsub("[ |\t]{2,}", "", tweets_text)
# Remove blank spaces at the beginning
tweets_text <- gsub("^ ", "", tweets_text)
# Remove blank spaces at the end
tweets_text <- gsub(" $", "", tweets_text)
#tokenizing each word
text_df <- tibble(line = 1:length(tweets_text), text = tweets_text)
tidy_tweets <- text_df %>%
unnest_tokens(word, text)
#Remove stop words and other useless words
custom_words_removing <- bind_rows(tibble(word = c("trump", "donald"), lexicon = c("custom")),stop_words)
tweets_text <- tidy_tweets%>%anti_join(custom_words_removing)
tweets_text<-tweets_text$word
#beautiful wordcloud
wordcloud(tweets_text,min.freq = 10,colors=brewer.pal(8, "Dark2"),random.color = TRUE,max.words = 100)
#getting emotions using in-built function
mysentiment_tweets<-get_nrc_sentiment((tweets_text))
#calculationg total score for each sentiment
Sentimentscores_tweets<-data.frame(colSums(mysentiment_tweets[,]))
names(Sentimentscores_tweets)<-"Score"
Sentimentscores_tweets<-cbind("sentiment"=rownames(Sentimentscores_tweets),Sentimentscores_tweets)
rownames(Sentimentscores_tweets)<-NULL
#plotting the sentiments with scores
ggplot(data=Sentimentscores_tweets,aes(x=sentiment,y=Score))+geom_bar(aes(fill=sentiment),stat = "identity")+
theme(legend.position="none")+
xlab("Sentiments")+ylab("scores")+ggtitle("Sentiments of people behind the Trump Tweets")
library("twitteR")
library("ROAuth")
library("NLP")
library("syuzhet")
library("tm")
library("SnowballC")
library("stringi")
library("topicmodels")
library("wordcloud")
library("ggplot2")
library("dplyr")
library("tidyr")
library(tidytext)
library(tidyverse)
library(tokenizers)
tweets<-read.csv2(file = "trumptweets.csv", header=T, sep=",")
tweets_text<-tweets$content
tweets_text<-tweets_text[1:1000]
head(tweets_text)
#convert all text to lower case
tweets_text<- tolower(tweets_text)
# Replace @UserName
tweets_text <- gsub("@\\w+", "",tweets_text)
# Remove punctuation
tweets_text <- gsub("[[:punct:]]", "", tweets_text)
# Remove links
tweets_text <- gsub("http\\w+", "", tweets_text)
# Remove tabs
tweets_text <- gsub("[ |\t]{2,}", "", tweets_text)
# Remove blank spaces at the beginning
tweets_text <- gsub("^ ", "", tweets_text)
# Remove blank spaces at the end
tweets_text <- gsub(" $", "", tweets_text)
#tokenizing each word
text_df <- tibble(line = 1:length(tweets_text), text = tweets_text)
tidy_tweets <- text_df %>%
unnest_tokens(word, text)
#Remove stop words and other useless words
custom_words_removing <- bind_rows(tibble(word = c("trump", "donald"), lexicon = c("custom")),stop_words)
tweets_text <- tidy_tweets%>%anti_join(custom_words_removing)
tweets_text<-tweets_text$word
#beautiful wordcloud
wordcloud(tweets_text,min.freq = 10,colors=brewer.pal(8, "Dark2"),random.color = TRUE,max.words = 100)
#getting emotions using in-built function
mysentiment_tweets<-get_nrc_sentiment((tweets_text))
#calculationg total score for each sentiment
Sentimentscores_tweets<-data.frame(colSums(mysentiment_tweets[,]))
names(Sentimentscores_tweets)<-"Score"
Sentimentscores_tweets<-cbind("sentiment"=rownames(Sentimentscores_tweets),Sentimentscores_tweets)
rownames(Sentimentscores_tweets)<-NULL
#plotting the sentiments with scores
ggplot(data=Sentimentscores_tweets,aes(x=sentiment,y=Score))+geom_bar(aes(fill=sentiment),stat = "identity")+
theme(legend.position="none")+
xlab("Sentiments")+ylab("scores")+ggtitle("Sentiments of people behind the Trump Tweets")
Text and figures are licensed under Creative Commons Attribution CC BY 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".