# Install packages
if (!requireNamespace("survival", quietly = TRUE)) {
install.packages("survival")
}if (!requireNamespace("survminer", quietly = TRUE)) {
install.packages("survminer")
}if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}if (!requireNamespace("dplyr", quietly = TRUE)) {
install.packages("dplyr")
}if (!requireNamespace("tidyr", quietly = TRUE)) {
install.packages("tidyr")
}if (!requireNamespace("zoo", quietly = TRUE)) {
install.packages("zoo")
}if (!requireNamespace("patchwork", quietly = TRUE)) {
install.packages("patchwork")
}
# Load packages
library(survival)
library(survminer)
library(ggplot2)
library(dplyr)
library(tidyr)
library(zoo)
library(patchwork)
Kaplan Meier Plot
Example
Survival analysis studies the time that passes before an event occurs. The most common application in clinical research is the estimation of mortality (predicting the survival time of patients), but survival analysis can also be applied to other fields such as mechanical failure time.
Setup
System Requirements: Cross-platform (Linux/MacOS/Windows)
Programming language: R
Dependent packages:
survival
,survminer
,ggplot2
,dplyr
,tidyr
,zoo
,patchwork
Data Preparation
# Using the built-in lung dataset (from the survival package)
data("lung")
# Data preprocessing
<- lung %>%
surv_data mutate(
status = ifelse(status == 2, 1, 0), # Transition state code (1=event)
sex = factor(sex, labels = c("Male", "Female")),
group = sample(c("Treatment", "Placebo"), n(), replace = TRUE)
)
# View data structure
glimpse(surv_data)
Rows: 228
Columns: 11
$ inst <dbl> 3, 3, 3, 5, 1, 12, 7, 11, 1, 7, 6, 16, 11, 21, 12, 1, 22, 16β¦
$ time <dbl> 306, 455, 1010, 210, 883, 1022, 310, 361, 218, 166, 170, 654β¦
$ status <dbl> 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, β¦
$ age <dbl> 74, 68, 56, 57, 60, 74, 68, 71, 53, 61, 57, 68, 68, 60, 57, β¦
$ sex <fct> Male, Male, Male, Male, Male, Male, Female, Female, Male, Maβ¦
$ ph.ecog <dbl> 1, 0, 0, 1, 0, 1, 2, 2, 1, 2, 1, 2, 1, NA, 1, 1, 1, 2, 2, 1,β¦
$ ph.karno <dbl> 90, 90, 90, 90, 100, 50, 70, 60, 70, 70, 80, 70, 90, 60, 80,β¦
$ pat.karno <dbl> 100, 90, 90, 60, 90, 80, 60, 80, 80, 70, 80, 70, 90, 70, 70,β¦
$ meal.cal <dbl> 1175, 1225, NA, 1150, NA, 513, 384, 538, 825, 271, 1025, NA,β¦
$ wt.loss <dbl> NA, 15, 15, 11, 0, 0, 10, 1, 16, 34, 27, 23, 5, 32, 60, 15, β¦
$ group <chr> "Placebo", "Placebo", "Placebo", "Treatment", "Placebo", "Trβ¦
# Survival time distribution
summary(surv_data$time)
Min. 1st Qu. Median Mean 3rd Qu. Max.
5.0 166.8 255.5 305.2 396.5 1022.0
# Fitting survival curves
<- survfit(Surv(time, status) ~ group, data = surv_data)
fit
# Extract curve data
<- surv_summary(fit)
surv_curve
# Calculate the log-rank test P value
<- survdiff(Surv(time, status) ~ group, data = surv_data)
diff <- signif(1 - pchisq(diff$chisq, length(diff$n)-1), 3) p_value
Visualization
1. Basic Plot
Use basic functions to draw the caption and description of the image.
# Basic survival curve
<- ggplot(surv_curve, aes(x = time, y = surv, color = strata)) +
p1 geom_step(linewidth = 1) +
labs(x = "Time (Months)", y = "Survival Probability") +
scale_y_continuous(labels = scales::percent)
p1

# Code Example (Taking the Supplementary Parameter bins as an example)-----
## Custom aesthetic mapping
<- c("Treatment" = "#E64B35", "Placebo" = "#3182BD")
group_colors <- c("Treatment" = "solid", "Placebo" = "dashed")
line_types
# Complete plot code
<- ggplot(surv_curve, aes(x = time, color = group, fill = group)) +
p_km # First draw the confidence interval
geom_ribbon(aes(ymin = lower, ymax = upper),
alpha = 0.15,
colour = NA) +
# Draw the survival curve again
geom_step(aes(y = surv, linetype = group),
linewidth = 1.2) +
# Color Mapping
scale_color_manual(
values = group_colors,
name = "Treatment Group",
labels = c("Placebo", "Treatment")
+
) # Fill color map
scale_fill_manual(
values = group_colors,
guide = "none"
+
) # Line Mapping
scale_linetype_manual(
values = line_types,
guide = "none" # Share legend with colors
+
) # Coordinate axis optimization
scale_x_continuous(
name = "Time (Months)",
expand = c(0, 0),
breaks = seq(0, 1000, by = 100)
+
) scale_y_continuous(
name = "Survival Probability",
labels = scales::percent,
limits = c(0, 1)
+
) # Theme Settings
theme_classic(base_size = 12) +
theme(
legend.position = c(0.85, 0.85),
legend.background = element_rect(fill = "white", colour = "grey50"),
panel.grid.major.y = element_line(colour = "grey90")
) p_km

2. More advanced plot
# ----------------------------
# Risk table data generation
# ----------------------------
# Generate quantile breakpoints (automatically adapt to data distribution)
<- quantile(surv_curve$time,
custom_breaks probs = seq(0, 1, 0.25),
na.rm = TRUE) %>%
unique() # Ensure breakpoints are unique
# How to correctly generate interval labels
<- paste0(
time_labels round(custom_breaks[-length(custom_breaks)]), # Starting point (excluding the last one)
"-",
round(custom_breaks[-1]) # End point (excluding the first one)
)
# Generate segmented risk table data --------------------------------------------------------
<- surv_curve %>%
risk_table_segmented mutate(
time_interval = cut(time,
breaks = custom_breaks,
include.lowest = TRUE,
right = FALSE,
labels = time_labels)
%>%
) group_by(group, time_interval) %>%
summarise(
n_risk = ifelse(all(is.na(n.risk)), 0, max(n.risk, na.rm = TRUE)), # Take the maximum number of people at risk within the interval
.groups = "drop"
%>%
) complete(group, time_interval, fill = list(n_risk = 0)) %>%
# 4. Added total row (fixed syntax)
bind_rows(
group_by(., group) %>%
summarise(
time_interval = "Total",
n_risk = sum(n_risk, na.rm = TRUE),
.groups = "drop"
)%>%
) mutate(
group = factor(group, levels = c("Treatment", "Placebo")),
time_interval = factor(time_interval, levels = c(time_labels, "Total") )
)
# Plot of a segmented risk table with borders ----------------------------------------------------
<- ggplot(risk_table_segmented,
risk_table_plot aes(x = time_interval, y = group)) +
# # Cell background (with border)
# geom_tile(color = "gray50", fill = "white", linewidth = 0.5,
# width = 0.95, height = 0.95) +
# Numeric Label
geom_text(aes(label = n_risk), size = 4.5, fontface = "bold", color = "black") +
# Column header style
scale_x_discrete(
name = "Time Interval (Months)",
position = "top",
expand = expansion(add = 0.5)
+
) scale_y_discrete(
name = NULL,
limits = rev,
labels = c("Treatment\nGroup", "Placebo\nGroup") # Add group labels
+
) # Color system
scale_fill_manual(values = group_colors, guide = "none") +
# Theme System
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 0, color = "black"),
axis.text.y = element_text(
color = group_colors,
face = "bold",
margin = margin(r = 15)
),panel.grid = element_blank(),
plot.margin = margin(t = 30, b = 5, unit = "pt"),
axis.title.x = element_text(margin = margin(t = 15))
+
) # Add an outer border
annotate("rect",
xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf,
color = "gray30", fill = NA, linewidth = 0.8)
risk_table_plot
# ----------------------------
# Plot parameter settings
# ----------------------------
<- c("Treatment" = "#E64B35", "Placebo" = "#3182BD")
group_colors theme_set(theme_minimal(base_size = 12))
# ----------------------------
# KM survival curve drawing
# ----------------------------
<- ggplot(surv_curve, aes(x = time, color = group, fill = group)) +
p_km # First draw the confidence interval
geom_ribbon(aes(ymin = lower, ymax = upper),
alpha = 0.15,
colour = NA) +
# Draw the survival curve again
geom_step(aes(y = surv, linetype = group),
linewidth = 1.2) +
# Color Mapping
scale_color_manual(
values = group_colors,
name = "Treatment Group",
labels = c("Placebo", "Treatment")
+
) # Fill color map
scale_fill_manual(
values = group_colors,
guide = "none"
+
) # Line Mapping
scale_linetype_manual(
values = line_types,
guide = "none" # Share legend with colors
+
) # Coordinate axis optimization
scale_x_continuous(
name = "Time (Months)",
expand = c(0, 0),
breaks = seq(0, 1000, by = 100)
+
) scale_y_continuous(
name = "Survival Probability",
labels = scales::percent,
limits = c(0, 1)
+
) # Theme Settings
theme_classic(base_size = 12) +
theme(
legend.position = c(0.85, 0.85),
legend.background = element_rect(fill = "white", colour = "grey50"),
panel.grid.major.y = element_line(colour = "grey90")
)
p_km
# ----------------------------
# Graphic Combination
# ----------------------------
<- p_km / risk_table_plot +
final_plot plot_layout(heights = c(3, 0.4))
final_plot

Application

This figure shows the comparison of the generated survival time between the three groups of data: Standard, New Drug, and Combination.
Reference
[1] Kassambara A, et al.Β Survminer: Drawing Survival Curves using ggplot2. JOSS 2017
[2] Wickham H. ggplot2: Elegant Graphics for Data Analysis. Springer 2016