This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(SocialMediaLab)
## Warning: package 'SocialMediaLab' was built under R version 3.4.3
library(magrittr)
library(tm)
## Loading required package: NLP
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.4.3
library(stringr)
#extract
#NSF campaign
#https://www.youtube.com/watch?v=xK1Qf0MTIRU #2009
#1,645 views #1 comment
#Very helpful. I feel much more aware of how to recognise a stroke. Thanks!
#https://www.youtube.com/watch?v=RBaMgsSKzCc #2010
#7,946 views #0 Comments
#https://www.youtube.com/watch?v=MmoGeCXovJ8 #2011
#4,755 views # 0 comments
#Everyone needs to know this.
#https://www.youtube.com/watch?v=27pbdKLOHNU #2013
#479 views #0 Comments
#https://www.youtube.com/watch?v=xA-P5voEik8 #2015
#60 views #0 Comments
#https://www.youtube.com/watch?v=YHzz2cXBlGk #2006 stroke heroes long version
videoIDs<-c("YHzz2cXBlGk") #123 comments #406,253 views 2/2/18
#extract
#g_youtube_actor <- Authenticate("youtube", apiKey= apiKey) %>%
# Collect(videoIDs = videoIDs, writeToFile=TRUE) %>%
# Create("Actor")
#output of socialmedialab
df<-read.csv("Feb_01_1_49_59 PM_2018_AEDT_YoutubeData.csv",stringsAsFactors = FALSE)
toRemove <- which(df$Comment=="")
if (isTRUE(length(toRemove)!=0)) {
df <- df[-toRemove,]
}
keywords <- df$Comment
keywords <- iconv(keywords, to = 'utf-8')
myCorpus <- VCorpus(VectorSource(keywords))
myCorpus <- tm_map(myCorpus, content_transformer(tolower))
myCorpus <- tm_map(myCorpus, removeNumbers)
myCorpus <- tm_map(myCorpus, removePunctuation)
myCorpus <- tm_map(myCorpus, removeWords, stopwords("english"),lazy=TRUE)
myCorpus <- tm_map(myCorpus, stripWhitespace, lazy=TRUE)
dtm <- DocumentTermMatrix(myCorpus,control = list(wordLengths=c(3, 20)))
dtm<-removeSparseTerms(dtm, 0.95)
tdm=TermDocumentMatrix(myCorpus,control = list(minWordLength=3,maxWordLength=20) )
inspect(dtm[1:5,5:10])
## <<DocumentTermMatrix (documents: 5, terms: 6)>>
## Non-/sparse entries: 3/27
## Sparsity : 90%
## Maximal term length: 5
## Weighting : term frequency (tf)
## Sample :
## Terms
## Docs just know like love song still
## 1 0 0 0 1 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 1 0 0
## 4 0 0 0 0 0 0
## 5 2 0 0 0 0 0
#convert to matrix
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
write.csv(m,file="youtube_strokeheroes.csv") #write to Document directory
head(d, 10)
## word freq
## stroke stroke 22
## like like 14
## video video 13
## song song 10
## just just 8
## fast fast 7
## first first 7
## good good 7
## know know 7
## love love 7
barplot(d[1:20,]$freq, las = 2, names.arg = d[1:20,]$word,
col ="lightblue", main ="Most frequent words",
ylab = "Word frequencies")
#word cloud
library(wordcloud)
## Loading required package: RColorBrewer
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
max.words=100, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
library(syuzhet)
## Warning: package 'syuzhet' was built under R version 3.4.3
#base on unigram
#nrc
my_example_text=keywords
s_v <- get_sentences(my_example_text)
class(s_v)
## [1] "character"
str(s_v)
## chr [1:183] "I love this." ...
head(s_v)
## [1] "I love this."
## [2] "I remember always seeing a shortened version of it as a TV commercial 7-8 years ago at my grandparentsâ house."
## [3] "God what a fucking jam"
## [4] "This shit is soooo catchy <f0><U+009F><U+0098><U+00A9><f0><U+009F><U+0091><U+008C><f0><U+009F><U+0094><U+00A5><f0><U+009F><U+0094><U+00A5><f0><U+009F><U+0094><U+00A5> god I love my Health Class"
## [5] "What about asking them to stick their tongue out?"
## [6] "(if it's crooked, they have a stroke)"
sentiment_vector <- get_sentiment(s_v, method="bing")
sentiment_vector
## [1] 1 0 -2 1 0 -1 0 0 0 1 1 0 -1 -2 0 1 0 0 -1 1 0 0 0
## [24] 0 0 1 0 2 0 1 0 2 2 0 1 0 0 0 0 0 -1 0 1 1 0 1
## [47] 0 0 2 0 0 -1 1 0 0 0 0 0 0 1 0 0 0 0 0 -1 0 0 -1
## [70] 0 0 1 0 -1 1 2 0 0 2 1 0 -1 0 1 1 2 0 0 0 0 0 1
## [93] -1 -1 -1 1 1 -2 0 -1 2 1 -1 0 0 -1 -1 0 0 0 0 1 0 -1 1
## [116] 0 -1 -2 -1 0 0 -1 0 1 -1 1 1 -1 1 0 1 0 1 2 1 0 1 0
## [139] 0 0 0 -1 0 1 -1 -1 0 1 0 0 1 -1 0 1 1 0 0 -1 0 1 0
## [162] 0 2 -1 0 0 2 0 -1 1 -1 0 0 0 0 0 0 0 0 0 0 1 1
afinn_vector <- get_sentiment(s_v, method="afinn")
afinn_vector
## [1] 3 0 -3 0 0 0 2 0 0 2 3 0 -2 0 0 2 0 0 0 3 0 0 0
## [24] 0 0 2 0 3 0 3 3 4 2 -1 0 0 0 0 0 -1 -2 0 2 5 2 3
## [47] 0 -1 7 0 0 -4 3 0 0 0 0 0 0 4 0 2 0 0 0 -3 3 3 -2
## [70] 0 0 2 0 -2 1 5 0 -3 5 6 0 0 0 2 3 0 -1 -2 0 3 0 3
## [93] 0 -2 -2 0 0 -4 0 -3 6 3 -3 0 -2 1 1 0 -4 0 0 2 0 -2 2
## [116] 0 0 -2 -1 0 0 -3 0 5 -1 2 4 -1 5 0 4 0 4 6 0 0 2 0
## [139] 6 -2 0 4 0 -3 7 1 0 2 -3 3 2 -2 0 3 -1 8 1 4 3 3 0
## [162] 0 6 -2 1 0 0 0 -2 2 -2 0 -1 0 0 0 0 -1 0 0 1 0 2
nrc_vector <- get_sentiment(s_v, method="nrc")
nrc_vector
## [1] 1 0 2 1 0 -1 0 0 0 1 1 0 -2 0 -1 0 0 0 1 1 1 0 0
## [24] 0 1 3 1 1 0 2 0 0 1 0 0 0 0 1 0 0 -1 1 2 1 0 1
## [47] 0 1 0 1 1 -1 2 0 0 0 0 0 0 0 -2 0 0 0 0 0 0 0 -2
## [70] 2 0 0 1 -1 0 0 0 0 1 1 0 -1 0 1 1 -1 1 1 0 0 0 1
## [93] -1 -2 0 0 1 -1 -1 -1 1 0 -1 0 0 -2 0 0 0 0 1 0 0 0 2
## [116] -1 -1 -2 -1 -1 -1 0 0 0 -1 0 0 0 1 1 0 0 0 1 0 0 -1 1
## [139] 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 3 0 0 0 1 0
## [162] 0 2 0 0 0 0 0 -1 1 -1 0 0 0 0 0 0 -1 0 0 0 1 1
sum(sentiment_vector)
## [1] 26
mean(sentiment_vector)
## [1] 0.1420765
summary(sentiment_vector)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.0000 0.0000 0.0000 0.1421 1.0000 2.0000
nrc_data <- get_nrc_sentiment(s_v)
angry_items <- which(nrc_data$anger > 0)
joy_items <- which(nrc_data$joy > 0)
s_v[joy_items]
## [1] "I love this."
## [2] "God what a fucking jam"
## [3] "This shit is soooo catchy <f0><U+009F><U+0098><U+00A9><f0><U+009F><U+0091><U+008C><f0><U+009F><U+0094><U+00A5><f0><U+009F><U+0094><U+00A5><f0><U+009F><U+0094><U+00A5> god I love my Health Class"
## [4] "I still love this song!!"
## [5] "ok when face and arm is not ok than i take money ok."
## [6] "She has a deaf student this semester and would love to get it captioned."
## [7] "They need to bring this back again to teach kids about strokes like how I learned when I was younger..."
## [8] "This was an outstanding video that even shows a child being empowered to act when her grandmother had signs of a stroke."
## [9] "Love it!"
## [10] "Hi, that is really nice, is part of the idea of the song, my father had a stroke and thanks to a video like this (the spanish version) I was able to take him to the hospital in less than 5 min since the first sing came."
## [11] "I was sleeping till the music waked me up LooooL"
## [12] "Good information though, I guess..."
## [13] "I'm a Paramedic and I sing this to my partners in the Ambulance."
## [14] "thank you >>>very good video"
## [15] "haha I love bio classes at my school!"
## [16] "This is good to make something for kid's to get."
## [17] "My moms stroke wasn't caught fast enough, my mother passed away this morning around 3 am."
## [18] "what's dick Clarke's favorite band?"
## [19] "I love this video, and the short commercial on T.V., it rocks!"
## [20] "WHATCHU TRYIN T'A SAY, WHITE BOI. I DOESN'T AFRAID."
## [21] "@DisneylandWalt \n\nthe song is used to explain how to recognize a stroke not to entertain the audience"
## [22] "Very good song and great tips."
## [23] "This is why FAST was created, to create awareness of the fact that these isolated symptoms can be signs of a stroke and to emphasize on the importance of fast reaction."
## [24] "Hilarious and educational!"
## [25] "pls if you see anyone or know anyone that has a stroke pls don't make fun of them."
## [26] "If you would like to know or learn mire about stroke tell me I'll be more then happy to explain it to you."
## [27] "We saw this in a supervisor training- It's really good- great impact:)"
## [28] "Funny AND helpful"
## [29] "This is pretty funny, but if it helps just one person to recognize the symptoms of a stroke, I'm all for it!"
## [30] "Very good video"
## [31] "Strokes are kind of funny, and a cartoon music video commercial about strokes is absolutely hilarious."
## [32] "only if you want consistently good medical care"
## [33] "+Melanie âscrew youâ ely Yes, that's important, because what if the victim doesn't have anyone around but their grandchildren/nieces/nephews?"
sum(nrc_data$trust)/dim(nrc_data)[1]
## [1] 0.2568306
sum(nrc_data$anticipation)/dim(nrc_data)[1]
## [1] 0.2131148
sum(nrc_data$disgust)/dim(nrc_data)[1]
## [1] 0.04918033
#pander::pandoc.table(nrc_data[, 1:8])
#pander::pandoc.table(nrc_data[, 9:10])
valence <- (nrc_data[, 9]*-1) + nrc_data[, 10]
valence
## [1] 1 0 2 1 0 -1 0 0 0 1 1 0 -2 0 -1 0 0 0 1 1 1 0 0
## [24] 0 1 3 1 1 0 2 0 0 1 0 0 0 0 1 0 0 -1 1 2 1 0 1
## [47] 0 1 0 1 1 -1 2 0 0 0 0 0 0 0 -2 0 0 0 0 0 0 0 -2
## [70] 2 0 0 1 -1 0 0 0 0 1 1 0 -1 0 1 1 -1 1 1 0 0 0 1
## [93] -1 -2 0 0 1 -1 -1 -1 1 0 -1 0 0 -2 0 0 0 0 1 0 0 0 2
## [116] -1 -1 -2 -1 -1 -1 0 0 0 -1 0 0 0 1 1 0 0 0 1 0 0 -1 1
## [139] 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 3 0 0 0 1 0
## [162] 0 2 0 0 0 0 0 -1 1 -1 0 0 0 0 0 0 -1 0 0 0 1 1
barplot(
sort(colSums(prop.table(nrc_data[, 1:8]))),
horiz = TRUE,
cex.names = 0.7,
las = 1,
main = "Emotions in Sample text", xlab="Percentage"
)
library(DT)
## Warning: package 'DT' was built under R version 3.4.3
df<-merge(s_v,nrc_data)
datatable(df)
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## http://rstudio.github.io/DT/server.html
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.