import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from scipy.cluster.hierarchy import linkage热力图(Python)
热力图是一种使用颜色来表示矩阵中数值的数据可视化技术。在生物医学研究中,热力图对于可视化基因表达谱、相关矩阵、甲基化数据和药物反应面板至关重要。Python 的 seaborn 和 matplotlib 库提供了强大的热力图功能,并内置聚类支持。
示例

环境配置
- 系统要求:跨平台(Linux/MacOS/Windows)
- 编程语言:Python
- 依赖包:
matplotlib、seaborn、pandas、numpy、scipy
数据准备
np.random.seed(42)
n_genes = 30
n_samples = 12
gene_names = [f'Gene_{i+1}' for i in range(n_genes)]
sample_names = [f'Sample_{i+1}' for i in range(n_samples)]
groups = ['Tumor'] * 6 + ['Normal'] * 6
expr_matrix = np.random.randn(n_genes, n_samples)
expr_matrix[:10, :6] += 2.5
expr_matrix[10:20, 6:] += 2.0
expr_df = pd.DataFrame(expr_matrix, index=gene_names, columns=sample_names)可视化
基础热力图
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(expr_df, cmap='RdBu_r', center=0, xticklabels=True,
yticklabels=True, linewidths=0.5, ax=ax)
ax.set_title('Gene Expression Heatmap')
ax.set_xlabel('Samples')
ax.set_ylabel('Genes')
plt.tight_layout()
plt.show()
聚类热力图
col_colors = ['#e63946' if g == 'Tumor' else '#457b9d' for g in groups]
g = sns.clustermap(expr_df, cmap='RdBu_r', center=0, figsize=(10, 10),
col_colors=col_colors, method='ward',
linewidths=0.3, dendrogram_ratio=0.15)
g.ax_heatmap.set_xlabel('Samples')
g.ax_heatmap.set_ylabel('Genes')
plt.suptitle('Clustered Gene Expression Heatmap', y=1.02)
plt.show()
相关矩阵热力图
corr = expr_df.T.corr()
mask = np.triu(np.ones_like(corr, dtype=bool), k=1)
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr, mask=mask, cmap='coolwarm', center=0, square=True,
linewidths=0.5, annot=False, fmt='.2f',
cbar_kws={'shrink': 0.8, 'label': 'Pearson r'}, ax=ax)
ax.set_title('Gene-Gene Correlation Matrix')
plt.tight_layout()
plt.show()
参考文献
- Wilkinson, L., & Friendly, M. (2009). The history of the cluster heat map. The American Statistician, 63(2), 179-184.
- Waskom, M. L. (2021). seaborn: statistical data visualization. JOSS, 6(60), 3021.
