# Install packages
if (!requireNamespace("ggrepel", quietly = TRUE)) {
install.packages("ggrepel")
}if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
# Load packages
library(ggrepel)
library(ggplot2)
Line Regression
Hiplot website
This page is the tutorial for source code version of the Hiplot Line Regression
plugin. You can also use the Hiplot website to achieve no code ploting. For more information please see the following link:
Linear regression is a regression method for linear modeling of the relationship between independent variables and dependent variables.If there is only one independent variable, it is called simple regression, and if there is more than one independent variable, it is called multiple regression.
Setup
System Requirements: Cross-platform (Linux/MacOS/Windows)
Programming language: R
Dependent packages:
ggrepel
;ggplot2
Data Preparation
The loaded data are independent variables, dependent variables and groups.
# Load data
<- read.delim("files/Hiplot/094-line-regression-data.txt", header = T)
data
# Convert data structure
$group <- factor(data$group, levels = unique(data$group))
data
# View data
head(data)
value1 value2 group
1 36.8 29.44 G1
2 54.0 43.20 G1
3 26.0 26.00 G1
4 39.0 31.20 G1
5 33.0 29.70 G1
6 29.0 34.80 G1
Visualization
# Line Regression
## Defining the equation
<- function(x, add_p = FALSE) {
equation <- summary(x)
xs <- list(
lm_coef a = as.numeric(round(coef(x)[1], digits = 2)),
b = as.numeric(round(coef(x)[2], digits = 2)),
r2 = round(xs$r.squared, digits = 2),
pval = xs$coef[2, 4]
)if (add_p) {
<- substitute(italic(y) == a + b %.% italic(x) * "," ~ ~
lm_eq italic(R)^2 ~ "=" ~ r2 * "," ~ ~ italic(p) ~ "=" ~ pval, lm_coef)
else {
} <- substitute(italic(y) == a + b %.% italic(x) * "," ~ ~
lm_eq italic(R)^2 ~ "=" ~ r2, lm_coef)
}as.expression(lm_eq)
}## Plot
<- ggplot(data, aes(x = value1, y = value2, colour = group)) +
p geom_point(show.legend = TRUE) +
geom_smooth(method = "lm", se = T, show.legend = F) +
geom_rug(sides = "bl", size = 1, show.legend = F) +
scale_color_manual(values = c("#00468BFF","#ED0000FF")) +
ggtitle("Line Reguression Plot") +
theme_bw() +
theme(text = element_text(family = "Arial"),
plot.title = element_text(size = 12, hjust = 0.5),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
axis.text.x = element_text(angle = 0, hjust = 0.5,vjust = 1),
legend.position = "right",
legend.direction = "vertical",
legend.title = element_text(size = 10),
legend.text = element_text(size = 10))
## Add annotations for each group using ggrepel
<- rep("", nrow(data))
repels for (g in unique(data$group)) {
<- lm(value2 ~ value1, data = data[data$group == g, ])
fit <- max(data[data$group == g, "value2"])
v which(data$value2 == v)[1]] <- equation(fit, add_p = F)
repels[
}<- p + geom_text_repel(
p data = data,
label = repels,
size = 4,
force = 5,
label.padding = 5,
na.rm = TRUE,
min.segment.length = 100,
show.legend = FALSE,
nudge_x = 0,
nudge_y = 0
)
p

Different colors represent different groups, and linear regression equations can be added. The closer R squared is to 1, the closer the fitted curve is to the actual curve.