-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwordcloud.r
More file actions
29 lines (24 loc) · 923 Bytes
/
wordcloud.r
File metadata and controls
29 lines (24 loc) · 923 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/env Rscript
# Required packages
library(tm)
library(wordcloud)
# Locate and load the Corpus.
cname <- file.path("~", "Desktop", "programming", "R", "texts")
docs <- Corpus(DirSource(cname))
# Transforms
toSpace <- content_transformer(function(x, pattern) gsub(pattern, " ", x))
docs <- tm_map(docs, toSpace, "/|@|\\|")
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeNumbers)
docs <- tm_map(docs, removePunctuation)
docs <- tm_map(docs, removeWords, stopwords("english"))
docs <- tm_map(docs, stripWhitespace)
docs <- tm_map(docs, stemDocument)
# Document term matrix.
dtm <- DocumentTermMatrix(docs)
findFreqTerms(dtm, lowfreq=450)
findAssocs(dtm, "data", corlimit=0.8)
freq <- sort(colSums(as.matrix(dtm)), decreasing=TRUE)
wf <- data.frame(word=names(freq), freq=freq)
#set.seed(142)
wordcloud(names(freq), freq, min.freq=500, scale=c(5, .1), colors=brewer.pal(6, "Dark2"))