using CairoMakie
using DataFrames
using Statistics
using Random散点图(Julia)
散点图以点的集合展示两个连续变量的值。Julia 的 CairoMakie 包(Makie.jl 生态系统的一部分)提供了高性能的绘图功能,非常适合大型生物医学数据集。Makie 通过可组合的声明式 API 提供出版级别的渲染质量。
示例

环境配置
- 系统要求:跨平台(Linux/MacOS/Windows)
- 编程语言:Julia
- 依赖包:
CairoMakie、DataFrames、Statistics
数据准备
Random.seed!(42)
n = 150
species = repeat(["setosa", "versicolor", "virginica"], inner=50)
sepal_length = [randn(50) .* 0.35 .+ 5.0;
randn(50) .* 0.52 .+ 5.9;
randn(50) .* 0.64 .+ 6.6]
sepal_width = [randn(50) .* 0.38 .+ 3.4;
randn(50) .* 0.31 .+ 2.8;
randn(50) .* 0.32 .+ 3.0]
iris_df = DataFrame(species=species, sepal_length=sepal_length, sepal_width=sepal_width)150×3 DataFrame
125 rows omitted
| Row | species | sepal_length | sepal_width |
|---|---|---|---|
| String | Float64 | Float64 | |
| 1 | setosa | 4.87282 | 3.44153 |
| 2 | setosa | 5.08811 | 3.19805 |
| 3 | setosa | 4.88975 | 3.43855 |
| 4 | setosa | 4.89106 | 4.06589 |
| 5 | setosa | 5.28571 | 3.21915 |
| 6 | setosa | 5.16686 | 3.35007 |
| 7 | setosa | 4.69916 | 3.59429 |
| 8 | setosa | 4.48575 | 3.45141 |
| 9 | setosa | 4.25998 | 3.00572 |
| 10 | setosa | 5.01532 | 3.23638 |
| 11 | setosa | 4.71113 | 3.18741 |
| 12 | setosa | 5.2941 | 3.395 |
| 13 | setosa | 5.15186 | 3.6258 |
| ⋮ | ⋮ | ⋮ | ⋮ |
| 139 | virginica | 6.93975 | 2.62209 |
| 140 | virginica | 6.97755 | 3.70858 |
| 141 | virginica | 6.94162 | 3.20095 |
| 142 | virginica | 6.95693 | 2.73005 |
| 143 | virginica | 7.86145 | 3.01312 |
| 144 | virginica | 5.12471 | 2.88178 |
| 145 | virginica | 6.22286 | 3.06058 |
| 146 | virginica | 8.263 | 3.38505 |
| 147 | virginica | 6.10909 | 2.68394 |
| 148 | virginica | 5.49659 | 3.09755 |
| 149 | virginica | 7.40266 | 3.07188 |
| 150 | virginica | 6.43618 | 2.74329 |
可视化
基础散点图
fig = Figure(size=(700, 500))
ax = Axis(fig[1,1], xlabel="Sepal Length (cm)", ylabel="Sepal Width (cm)",
title="Iris Scatter Plot")
colors_map = Dict("setosa" => :steelblue, "versicolor" => :coral, "virginica" => :green)
for sp in unique(iris_df.species)
mask = iris_df.species .== sp
scatter!(ax, iris_df.sepal_length[mask], iris_df.sepal_width[mask],
color=colors_map[sp], markersize=10, alpha=0.7, label=sp)
end
axislegend(ax, position=:rt)
fig带趋势线的散点图
Random.seed!(42)
n2 = 100
gene_a_tumor = randn(n2) .* 2 .+ 7
gene_b_tumor = 0.6 .* gene_a_tumor .+ randn(n2) .* 1.0 .+ 1.5
gene_a_normal = randn(n2) .* 2 .+ 5
gene_b_normal = 0.4 .* gene_a_normal .+ randn(n2) .* 1.2 .+ 2.0
fig2 = Figure(size=(700, 500))
ax2 = Axis(fig2[1,1], xlabel="Gene A Expression", ylabel="Gene B Expression",
title="Gene Expression Correlation by Group")
scatter!(ax2, gene_a_tumor, gene_b_tumor, color=(:red, 0.5), markersize=8, label="Tumor")
scatter!(ax2, gene_a_normal, gene_b_normal, color=(:steelblue, 0.5), markersize=8, label="Normal")
for (x, y, col) in [(gene_a_tumor, gene_b_tumor, :red),
(gene_a_normal, gene_b_normal, :steelblue)]
x_sorted = sort(x)
m = (length(x) * sum(x .* y) - sum(x) * sum(y)) /
(length(x) * sum(x.^2) - sum(x)^2)
b = mean(y) - m * mean(x)
lines!(ax2, x_sorted, m .* x_sorted .+ b, color=col, linewidth=2, linestyle=:dash)
end
axislegend(ax2, position=:lt)
fig2密度散点图
Random.seed!(123)
n3 = 2000
x_dense = randn(n3) .* 3
y_dense = x_dense .* 0.5 .+ randn(n3) .* 2
fig3 = Figure(size=(700, 550))
ax3 = Axis(fig3[1,1], xlabel="Log2 Fold Change", ylabel="-Log10(P-value)",
title="Density Scatter")
hm = hexbin!(ax3, x_dense, y_dense, cellsize=0.4, colormap=:viridis)
Colorbar(fig3[1,2], hm, label="Count")
fig3参考文献
- Danisch, S., & Krumbiegel, J. (2021). Makie.jl: Flexible high-performance data visualization for Julia. JOSS, 6(65), 3349.
