# 安装包
if (!requireNamespace("data.table", quietly = TRUE)) {
install.packages("data.table")
}
if (!requireNamespace("jsonlite", quietly = TRUE)) {
install.packages("jsonlite")
}
if (!requireNamespace("ggalluvial", quietly = TRUE)) {
install.packages("ggalluvial")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
# 加载包
library(data.table)
library(jsonlite)
library(ggalluvial)
library(ggplot2)桑基图
注记
Hiplot 网站
本页面为 Hiplot Sankey 插件的源码版本教程,您也可以使用 Hiplot 网站实现无代码绘图,更多信息请查看以下链接:
桑基图是一种流量图,其中箭头的宽度与流量成比例。
环境配置
系统: Cross-platform (Linux/MacOS/Windows)
编程语言: R
依赖包:
data.table;jsonlite;ggalluvial;ggplot2
sessioninfo::session_info("attached")─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.5.2 (2025-10-31)
os Ubuntu 24.04.3 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate C.UTF-8
ctype C.UTF-8
tz UTC
date 2026-01-27
pandoc 3.1.3 @ /usr/bin/ (via rmarkdown)
quarto 1.8.27 @ /usr/local/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
data.table * 1.18.0 2025-12-24 [1] RSPM
ggalluvial * 0.12.5 2023-02-22 [1] RSPM
ggplot2 * 4.0.1 2025-11-14 [1] RSPM
jsonlite * 2.0.0 2025-03-27 [1] RSPM
[1] /home/runner/work/_temp/Library
[2] /opt/R/4.5.2/lib/R/site-library
[3] /opt/R/4.5.2/lib/R/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────
数据准备
载入数据为 4 个变量及每4种变量组合下的频数。
# 加载数据
data <- data.table::fread(jsonlite::read_json("https://hiplot.cn/ui/basic/sankey/data.json")$exampleData$textarea[[1]])
data <- as.data.frame(data)
# 整理数据格式
value <- "Freq"
axis <- c("Class", "Sex")
usr_axis <- c()
for (i in seq_len(length(axis))) {
usr_axis <- c(usr_axis, axis[i])
assign(paste0("axis", i), axis[i])
}
index_axis <- match(usr_axis, colnames(data))
index_value <- match(value, colnames(data))
data1 <- data[, c(index_value, index_axis)]
## 定义带颜色
nlevels <- as.numeric(apply(data1[, -1], 2, function(data) {
return(length(unique(data)))
}))
band_color <- c("#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#8DD3C7", "#FFFFB3")
## 重命名数据
data_rename <- data1
colnames(data_rename) <- c(
"value",
paste("axis", seq_len(length(usr_axis)), sep = "")
)
# 查看数据
head(data) Class Sex Age Survived Freq
1 1st Male Child No 0
2 2nd Male Child No 0
3 3rd Male Child No 35
4 Crew Male Child No 0
5 1st Female Child No 0
6 2nd Female Child No 0
可视化
# 桑基图
p <- ggplot(data_rename, aes(y = value, axis1 = axis1, axis2 = axis2)) +
geom_alluvium(alpha = 1, aes(fill = data1[, colnames(data1) == "Sex"]),
width = 0, reverse = FALSE) +
scale_x_discrete(limits = usr_axis, expand = c(0.02, 0.1)) +
ylab("") +
scale_fill_discrete(name = "Sex") +
coord_flip() +
geom_stratum(alpha = 1, width = 1 / 8, reverse = FALSE, fill = band_color,
color = "white") +
geom_text(stat = "stratum", infer.label = TRUE, reverse = FALSE) +
ggtitle("Sankey plot") +
guides(fill = guide_legend(title = "Sex")) +
scale_fill_manual(values = c("#00468BFF", "#ED0000FF")) +
theme_bw() +
theme(text = element_text(family = "Arial"),
plot.title = element_text(size = 12,hjust = 0.5),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
axis.text.x = element_text(angle = 0, hjust = 0.5,vjust = 1),
legend.position = "right",
legend.direction = "vertical",
legend.title = element_text(size = 10),
legend.text = element_text(size = 10))
p
female 分流的颜色为蓝色,male 分流的颜色为红色,蓝色分流出去的宽度和等于 female 的总宽度。
