# Install packages
if (!requireNamespace("data.table", quietly = TRUE)) {
install.packages("data.table")
}if (!requireNamespace("jsonlite", quietly = TRUE)) {
install.packages("jsonlite")
}if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) {
::install("ComplexHeatmap")
BiocManager
}
# Load packages
library(data.table)
library(jsonlite)
library(ComplexHeatmap)
Corrplot Big Data
Note
Hiplot website
This page is the tutorial for source code version of the Hiplot Corrplot Big Data
plugin. You can also use the Hiplot website to achieve no code ploting. For more information please see the following link:
The correlation heat map is a graph that analyzes the correlation between two or more variables.
Setup
System Requirements: Cross-platform (Linux/MacOS/Windows)
Programming language: R
Dependent packages:
data.table
;jsonlite
;ComplexHeatmap
Data Preparation
The loaded data are the gene names and the expression of each sample.
# Load data
<- data.table::fread(jsonlite::read_json("https://hiplot.cn/ui/basic/big-corrplot/data.json")$exampleData$textarea[[1]])
data <- as.data.frame(data)
data
# convert data structure
<- data[!is.na(data[, 1]), ]
data <- duplicated(data[, 1])
idx 1] <- paste0(data[idx, 1], "--dup-", cumsum(idx)[idx])
data[idx, rownames(data) <- data[, 1]
<- data[, -1]
data <- function(x) {
str2num_df <- lapply(x, function(l) as.numeric(l))
x[]
x
}<- t(str2num_df(data))
tmp <- round(cor(tmp, use = "na.or.complete", method = "pearson"), 3)
corr
# View data
head(corr[,1:5])
RGL4 MPP7 UGCG CYSTM1 ANXA2
RGL4 1.000 0.914 0.929 0.936 -0.592
MPP7 0.914 1.000 0.852 0.907 -0.543
UGCG 0.929 0.852 1.000 0.956 -0.440
CYSTM1 0.936 0.907 0.956 1.000 -0.358
ANXA2 -0.592 -0.543 -0.440 -0.358 1.000
ENDOD1 -0.908 -0.862 -0.791 -0.762 0.826
Visualization
# Corrplot Big Data
<- ComplexHeatmap::Heatmap(
p col = colorRampPalette(c("#4477AA","#FFFFFF","#BB4444"))(50),
corr, clustering_distance_rows = "euclidean",
clustering_method_rows = "ward.D2",
clustering_distance_columns = "euclidean",
clustering_method_columns = "ward.D2",
show_column_dend = FALSE, show_row_dend = FALSE,
column_names_gp = gpar(fontsize = 8),
row_names_gp = gpar(fontsize = 8)
)
p

Red indicates positive correlation between two genes, blue indicates negative correlation between two genes, and the number in each cell indicates correlation coefficient.