R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(SocialMediaLab)
## Warning: package 'SocialMediaLab' was built under R version 3.4.3
library(magrittr)

library(tm)
## Loading required package: NLP
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.4.3
library(stringr)
#extract
#NSF campaign
#https://www.youtube.com/watch?v=xK1Qf0MTIRU #2009 
#1,645 views #1 comment
#Very helpful.  I feel much more aware of how to recognise a stroke.  Thanks!
#https://www.youtube.com/watch?v=RBaMgsSKzCc #2010
#7,946 views #0 Comments
#https://www.youtube.com/watch?v=MmoGeCXovJ8 #2011
#4,755 views # 0 comments
#Everyone needs to know this. 
#https://www.youtube.com/watch?v=27pbdKLOHNU #2013
#479 views #0 Comments
#https://www.youtube.com/watch?v=xA-P5voEik8 #2015
#60 views #0 Comments
#https://www.youtube.com/watch?v=YHzz2cXBlGk #2006 stroke heroes long version
videoIDs<-c("YHzz2cXBlGk") #123 comments #406,253 views 2/2/18

#extract
#g_youtube_actor <- Authenticate("youtube", apiKey= apiKey) %>%
#  Collect(videoIDs = videoIDs, writeToFile=TRUE) %>%
#  Create("Actor")

#output of socialmedialab
df<-read.csv("Feb_01_1_49_59 PM_2018_AEDT_YoutubeData.csv",stringsAsFactors = FALSE)

toRemove <- which(df$Comment=="")

if (isTRUE(length(toRemove)!=0)) {
  df <- df[-toRemove,]
}

keywords <- df$Comment 
keywords <- iconv(keywords, to = 'utf-8')
myCorpus <- VCorpus(VectorSource(keywords))
myCorpus <- tm_map(myCorpus, content_transformer(tolower))
myCorpus <- tm_map(myCorpus, removeNumbers)
myCorpus <- tm_map(myCorpus, removePunctuation)
myCorpus <- tm_map(myCorpus, removeWords, stopwords("english"),lazy=TRUE) 
myCorpus <- tm_map(myCorpus, stripWhitespace, lazy=TRUE)
dtm <- DocumentTermMatrix(myCorpus,control = list(wordLengths=c(3, 20)))
dtm<-removeSparseTerms(dtm, 0.95)

tdm=TermDocumentMatrix(myCorpus,control = list(minWordLength=3,maxWordLength=20) )

inspect(dtm[1:5,5:10])
## <<DocumentTermMatrix (documents: 5, terms: 6)>>
## Non-/sparse entries: 3/27
## Sparsity           : 90%
## Maximal term length: 5
## Weighting          : term frequency (tf)
## Sample             :
##     Terms
## Docs just know like love song still
##    1    0    0    0    1    0     0
##    2    0    0    0    0    0     0
##    3    0    0    0    1    0     0
##    4    0    0    0    0    0     0
##    5    2    0    0    0    0     0

matrix conversion

#convert to matrix
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)

write.csv(m,file="youtube_strokeheroes.csv") #write to Document directory
head(d, 10)
##          word freq
## stroke stroke   22
## like     like   14
## video   video   13
## song     song   10
## just     just    8
## fast     fast    7
## first   first    7
## good     good    7
## know     know    7
## love     love    7
barplot(d[1:20,]$freq, las = 2, names.arg = d[1:20,]$word,
        col ="lightblue", main ="Most frequent words",
        ylab = "Word frequencies")

#word cloud
library(wordcloud)
## Loading required package: RColorBrewer
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
          max.words=100, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

sentiment analyis

library(syuzhet)
## Warning: package 'syuzhet' was built under R version 3.4.3
#base on unigram 
#nrc
my_example_text=keywords
s_v <- get_sentences(my_example_text)
class(s_v)
## [1] "character"
str(s_v)
##  chr [1:183] "I love this." ...
head(s_v) 
## [1] "I love this."                                                                                                                                                                                     
## [2] "I remember always seeing a shortened version of it as a TV commercial 7-8 years ago at my grandparents’ house."                                                                                 
## [3] "God what a fucking jam"                                                                                                                                                                           
## [4] "This shit is soooo catchy <f0><U+009F><U+0098><U+00A9><f0><U+009F><U+0091><U+008C><f0><U+009F><U+0094><U+00A5><f0><U+009F><U+0094><U+00A5><f0><U+009F><U+0094><U+00A5> god I love my Health Class"
## [5] "What about asking them to stick their tongue out?"                                                                                                                                                
## [6] "(if it's crooked, they have a stroke)"
sentiment_vector <- get_sentiment(s_v, method="bing")
sentiment_vector
##   [1]  1  0 -2  1  0 -1  0  0  0  1  1  0 -1 -2  0  1  0  0 -1  1  0  0  0
##  [24]  0  0  1  0  2  0  1  0  2  2  0  1  0  0  0  0  0 -1  0  1  1  0  1
##  [47]  0  0  2  0  0 -1  1  0  0  0  0  0  0  1  0  0  0  0  0 -1  0  0 -1
##  [70]  0  0  1  0 -1  1  2  0  0  2  1  0 -1  0  1  1  2  0  0  0  0  0  1
##  [93] -1 -1 -1  1  1 -2  0 -1  2  1 -1  0  0 -1 -1  0  0  0  0  1  0 -1  1
## [116]  0 -1 -2 -1  0  0 -1  0  1 -1  1  1 -1  1  0  1  0  1  2  1  0  1  0
## [139]  0  0  0 -1  0  1 -1 -1  0  1  0  0  1 -1  0  1  1  0  0 -1  0  1  0
## [162]  0  2 -1  0  0  2  0 -1  1 -1  0  0  0  0  0  0  0  0  0  0  1  1
afinn_vector <- get_sentiment(s_v, method="afinn")
afinn_vector
##   [1]  3  0 -3  0  0  0  2  0  0  2  3  0 -2  0  0  2  0  0  0  3  0  0  0
##  [24]  0  0  2  0  3  0  3  3  4  2 -1  0  0  0  0  0 -1 -2  0  2  5  2  3
##  [47]  0 -1  7  0  0 -4  3  0  0  0  0  0  0  4  0  2  0  0  0 -3  3  3 -2
##  [70]  0  0  2  0 -2  1  5  0 -3  5  6  0  0  0  2  3  0 -1 -2  0  3  0  3
##  [93]  0 -2 -2  0  0 -4  0 -3  6  3 -3  0 -2  1  1  0 -4  0  0  2  0 -2  2
## [116]  0  0 -2 -1  0  0 -3  0  5 -1  2  4 -1  5  0  4  0  4  6  0  0  2  0
## [139]  6 -2  0  4  0 -3  7  1  0  2 -3  3  2 -2  0  3 -1  8  1  4  3  3  0
## [162]  0  6 -2  1  0  0  0 -2  2 -2  0 -1  0  0  0  0 -1  0  0  1  0  2
nrc_vector <- get_sentiment(s_v, method="nrc")
nrc_vector
##   [1]  1  0  2  1  0 -1  0  0  0  1  1  0 -2  0 -1  0  0  0  1  1  1  0  0
##  [24]  0  1  3  1  1  0  2  0  0  1  0  0  0  0  1  0  0 -1  1  2  1  0  1
##  [47]  0  1  0  1  1 -1  2  0  0  0  0  0  0  0 -2  0  0  0  0  0  0  0 -2
##  [70]  2  0  0  1 -1  0  0  0  0  1  1  0 -1  0  1  1 -1  1  1  0  0  0  1
##  [93] -1 -2  0  0  1 -1 -1 -1  1  0 -1  0  0 -2  0  0  0  0  1  0  0  0  2
## [116] -1 -1 -2 -1 -1 -1  0  0  0 -1  0  0  0  1  1  0  0  0  1  0  0 -1  1
## [139]  1  0  0  1  0  0  0  0  0  0  0  0  1  0  0  1  0  3  0  0  0  1  0
## [162]  0  2  0  0  0  0  0 -1  1 -1  0  0  0  0  0  0 -1  0  0  0  1  1
sum(sentiment_vector)
## [1] 26
mean(sentiment_vector)
## [1] 0.1420765
summary(sentiment_vector)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.0000  0.0000  0.0000  0.1421  1.0000  2.0000
nrc_data <- get_nrc_sentiment(s_v)
angry_items <- which(nrc_data$anger > 0)

joy_items <- which(nrc_data$joy > 0)
s_v[joy_items]
##  [1] "I love this."                                                                                                                                                                                                               
##  [2] "God what a fucking jam"                                                                                                                                                                                                     
##  [3] "This shit is soooo catchy <f0><U+009F><U+0098><U+00A9><f0><U+009F><U+0091><U+008C><f0><U+009F><U+0094><U+00A5><f0><U+009F><U+0094><U+00A5><f0><U+009F><U+0094><U+00A5> god I love my Health Class"                          
##  [4] "I still love this song!!"                                                                                                                                                                                                   
##  [5] "ok when face and arm is not ok than i take money ok."                                                                                                                                                                       
##  [6] "She has a deaf student this semester and would love to get it captioned."                                                                                                                                                   
##  [7] "They need to bring this back again to teach kids about strokes like how I learned when I was younger..."                                                                                                                    
##  [8] "This was an outstanding video that even shows a child being empowered to act when her grandmother had signs of a stroke."                                                                                                   
##  [9] "Love it!"                                                                                                                                                                                                                   
## [10] "Hi, that is really nice, is part of the idea of the song, my father had a stroke and thanks to a video like this (the spanish version) I was able to take him to the hospital in less than 5 min since the first sing came."
## [11] "I was sleeping till the music waked me up LooooL"                                                                                                                                                                           
## [12] "Good information though, I guess..."                                                                                                                                                                                        
## [13] "I'm a Paramedic and I sing this to my partners in the Ambulance."                                                                                                                                                           
## [14] "thank you >>>very good video"                                                                                                                                                                                               
## [15] "haha I love bio classes at my school!"                                                                                                                                                                                      
## [16] "This is good to make something for kid's to get."                                                                                                                                                                           
## [17] "My moms stroke wasn't caught fast enough, my mother passed away this morning around 3 am."                                                                                                                                  
## [18] "what's dick Clarke's favorite band?"                                                                                                                                                                                        
## [19] "I love this video, and the short commercial on T.V., it rocks!"                                                                                                                                                             
## [20] "WHATCHU TRYIN T'A SAY, WHITE BOI. I DOESN'T AFRAID."                                                                                                                                                                        
## [21] "@DisneylandWalt \n\nthe song is used to explain how to recognize a stroke not to entertain the audience"                                                                                                                    
## [22] "Very good song and great tips."                                                                                                                                                                                             
## [23] "This is why FAST was created, to create awareness of the fact that these isolated symptoms can be signs of a stroke and to emphasize on the importance of fast reaction."                                                   
## [24] "Hilarious and educational!"                                                                                                                                                                                                 
## [25] "pls if you see anyone or know anyone that has a stroke pls don't make fun of them."                                                                                                                                         
## [26] "If you would like to know or learn mire about stroke tell me I'll be more then happy to explain it to you."                                                                                                                 
## [27] "We saw this in a supervisor training-  It's really good- great impact:)"                                                                                                                                                    
## [28] "Funny AND helpful"                                                                                                                                                                                                          
## [29] "This is pretty funny, but if it helps just one person to recognize the symptoms of a stroke, I'm all for it!"                                                                                                               
## [30] "Very good video"                                                                                                                                                                                                            
## [31] "Strokes are kind of funny, and a cartoon music video commercial about strokes is absolutely hilarious."                                                                                                                     
## [32] "only if you want consistently good medical care"                                                                                                                                                                            
## [33] "+Melanie “screw you” ely Yes, that's important, because what if the victim doesn't have anyone around but their grandchildren/nieces/nephews?"
sum(nrc_data$trust)/dim(nrc_data)[1]
## [1] 0.2568306
sum(nrc_data$anticipation)/dim(nrc_data)[1]
## [1] 0.2131148
sum(nrc_data$disgust)/dim(nrc_data)[1]
## [1] 0.04918033
#pander::pandoc.table(nrc_data[, 1:8])
#pander::pandoc.table(nrc_data[, 9:10])

valence <- (nrc_data[, 9]*-1) + nrc_data[, 10]
valence
##   [1]  1  0  2  1  0 -1  0  0  0  1  1  0 -2  0 -1  0  0  0  1  1  1  0  0
##  [24]  0  1  3  1  1  0  2  0  0  1  0  0  0  0  1  0  0 -1  1  2  1  0  1
##  [47]  0  1  0  1  1 -1  2  0  0  0  0  0  0  0 -2  0  0  0  0  0  0  0 -2
##  [70]  2  0  0  1 -1  0  0  0  0  1  1  0 -1  0  1  1 -1  1  1  0  0  0  1
##  [93] -1 -2  0  0  1 -1 -1 -1  1  0 -1  0  0 -2  0  0  0  0  1  0  0  0  2
## [116] -1 -1 -2 -1 -1 -1  0  0  0 -1  0  0  0  1  1  0  0  0  1  0  0 -1  1
## [139]  1  0  0  1  0  0  0  0  0  0  0  0  1  0  0  1  0  3  0  0  0  1  0
## [162]  0  2  0  0  0  0  0 -1  1 -1  0  0  0  0  0  0 -1  0  0  0  1  1
barplot(
  sort(colSums(prop.table(nrc_data[, 1:8]))), 
  horiz = TRUE, 
  cex.names = 0.7, 
  las = 1, 
  main = "Emotions in Sample text", xlab="Percentage"
)

data table

library(DT)
## Warning: package 'DT' was built under R version 3.4.3
df<-merge(s_v,nrc_data)
datatable(df)
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## http://rstudio.github.io/DT/server.html

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.