# 安装包
if (!requireNamespace("data.table", quietly = TRUE)) {
install.packages("data.table")
}if (!requireNamespace("jsonlite", quietly = TRUE)) {
install.packages("jsonlite")
}if (!requireNamespace("ggwordcloud", quietly = TRUE)) {
install.packages("ggwordcloud")
}if (!requireNamespace("curl", quietly = TRUE)) {
install.packages("curl")
}if (!requireNamespace("png", quietly = TRUE)) {
install.packages("png")
}
# 加载包
library(data.table)
library(jsonlite)
library(ggwordcloud)
library(curl)
library(png)
ggplot2 词云
词云是通过形成“关键字云层”或“关键字渲染”来可视化Web文本中经常出现的“关键字”。
环境配置
系统: Cross-platform (Linux/MacOS/Windows)
编程语言: R
依赖包:
data.table
;jsonlite
;ggwordcloud
数据准备
加载数据名词和名词频率。
# 加载数据
<- data.table::fread(jsonlite::read_json("https://hiplot.cn/ui/basic/ggwordcloud/data.json")$exampleData$textarea[[1]])
data <- as.data.frame(data)
data <- "https://download.hiplot.cn/api/file/fetch/?path=public/demo/ggwordcloud/hearth.png"
inmask
# 整理数据格式
<- data[, 2]
col <- cbind(data, col)
data
# 查看数据
head(data)
word freq col
1 oil 85 85
2 said 73 73
3 prices 48 48
4 opec 42 42
5 mln 31 31
6 the 26 26
可视化
# ggplot2 词云
<- ggplot(data, aes(label = word, size = freq, color = col)) +
p scale_size_area(max_size = 40) +
theme_minimal() +
geom_text_wordcloud_area(
mask = png::readPNG(curl::curl_fetch_memory(inmask)$content),
rm_outside = TRUE) +
scale_color_gradient(low = "#8B0000", high = "#FF0000")
p

根据名词的频率在词云图中显示名词的比例。