# Install packages
if (!requireNamespace("survival", quietly = TRUE)) {
install.packages("survival")
}
if (!requireNamespace("survminer", quietly = TRUE)) {
install.packages("survminer")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
if (!requireNamespace("dplyr", quietly = TRUE)) {
install.packages("dplyr")
}
if (!requireNamespace("tidyr", quietly = TRUE)) {
install.packages("tidyr")
}
if (!requireNamespace("zoo", quietly = TRUE)) {
install.packages("zoo")
}
if (!requireNamespace("patchwork", quietly = TRUE)) {
install.packages("patchwork")
}
# Load packages
library(survival)
library(survminer)
library(ggplot2)
library(dplyr)
library(tidyr)
library(zoo)
library(patchwork)Kaplan Meier Plot
Example

Survival analysis studies the time that passes before an event occurs. The most common application in clinical research is the estimation of mortality (predicting the survival time of patients), but survival analysis can also be applied to other fields such as mechanical failure time.
Setup
System Requirements: Cross-platform (Linux/MacOS/Windows)
Programming language: R
Dependent packages:
survival,survminer,ggplot2,dplyr,tidyr,zoo,patchwork
Data Preparation
# Using the built-in lung dataset (from the survival package)
data("lung")
# Data preprocessing
surv_data <- lung %>%
mutate(
status = ifelse(status == 2, 1, 0), # Transition state code (1=event)
sex = factor(sex, labels = c("Male", "Female")),
group = sample(c("Treatment", "Placebo"), n(), replace = TRUE)
)
# View data structure
glimpse(surv_data)Rows: 228
Columns: 11
$ inst <dbl> 3, 3, 3, 5, 1, 12, 7, 11, 1, 7, 6, 16, 11, 21, 12, 1, 22, 16β¦
$ time <dbl> 306, 455, 1010, 210, 883, 1022, 310, 361, 218, 166, 170, 654β¦
$ status <dbl> 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, β¦
$ age <dbl> 74, 68, 56, 57, 60, 74, 68, 71, 53, 61, 57, 68, 68, 60, 57, β¦
$ sex <fct> Male, Male, Male, Male, Male, Male, Female, Female, Male, Maβ¦
$ ph.ecog <dbl> 1, 0, 0, 1, 0, 1, 2, 2, 1, 2, 1, 2, 1, NA, 1, 1, 1, 2, 2, 1,β¦
$ ph.karno <dbl> 90, 90, 90, 90, 100, 50, 70, 60, 70, 70, 80, 70, 90, 60, 80,β¦
$ pat.karno <dbl> 100, 90, 90, 60, 90, 80, 60, 80, 80, 70, 80, 70, 90, 70, 70,β¦
$ meal.cal <dbl> 1175, 1225, NA, 1150, NA, 513, 384, 538, 825, 271, 1025, NA,β¦
$ wt.loss <dbl> NA, 15, 15, 11, 0, 0, 10, 1, 16, 34, 27, 23, 5, 32, 60, 15, β¦
$ group <chr> "Treatment", "Treatment", "Placebo", "Placebo", "Placebo", "β¦
# Survival time distribution
summary(surv_data$time) Min. 1st Qu. Median Mean 3rd Qu. Max.
5.0 166.8 255.5 305.2 396.5 1022.0
# Fitting survival curves
fit <- survfit(Surv(time, status) ~ group, data = surv_data)
# Extract curve data
surv_curve <- surv_summary(fit)
# Calculate the log-rank test P value
diff <- survdiff(Surv(time, status) ~ group, data = surv_data)
p_value <- signif(1 - pchisq(diff$chisq, length(diff$n)-1), 3)Visualization
1. Basic Plot
Use basic functions to draw the caption and description of the image.
# Basic survival curve
p1 <- ggplot(surv_curve, aes(x = time, y = surv, color = strata)) +
geom_step(linewidth = 1) +
labs(x = "Time (Months)", y = "Survival Probability") +
scale_y_continuous(labels = scales::percent)
p1
# Code Example (Taking the Supplementary Parameter bins as an example)-----
## Custom aesthetic mapping
group_colors <- c("Treatment" = "#E64B35", "Placebo" = "#3182BD")
line_types <- c("Treatment" = "solid", "Placebo" = "dashed")
# Complete plot code
p_km <- ggplot(surv_curve, aes(x = time, color = group, fill = group)) +
# First draw the confidence interval
geom_ribbon(aes(ymin = lower, ymax = upper),
alpha = 0.15,
colour = NA) +
# Draw the survival curve again
geom_step(aes(y = surv, linetype = group),
linewidth = 1.2) +
# Color Mapping
scale_color_manual(
values = group_colors,
name = "Treatment Group",
labels = c("Placebo", "Treatment")
) +
# Fill color map
scale_fill_manual(
values = group_colors,
guide = "none"
) +
# Line Mapping
scale_linetype_manual(
values = line_types,
guide = "none" # Share legend with colors
) +
# Coordinate axis optimization
scale_x_continuous(
name = "Time (Months)",
expand = c(0, 0),
breaks = seq(0, 1000, by = 100)
) +
scale_y_continuous(
name = "Survival Probability",
labels = scales::percent,
limits = c(0, 1)
) +
# Theme Settings
theme_classic(base_size = 12) +
theme(
legend.position = c(0.85, 0.85),
legend.background = element_rect(fill = "white", colour = "grey50"),
panel.grid.major.y = element_line(colour = "grey90")
)
p_km
2. More advanced plot
# ----------------------------
# Risk table data generation
# ----------------------------
# Generate quantile breakpoints (automatically adapt to data distribution)
custom_breaks <- quantile(surv_curve$time,
probs = seq(0, 1, 0.25),
na.rm = TRUE) %>%
unique() # Ensure breakpoints are unique
# How to correctly generate interval labels
time_labels <- paste0(
round(custom_breaks[-length(custom_breaks)]), # Starting point (excluding the last one)
"-",
round(custom_breaks[-1]) # End point (excluding the first one)
)
# Generate segmented risk table data --------------------------------------------------------
risk_table_segmented <- surv_curve %>%
mutate(
time_interval = cut(time,
breaks = custom_breaks,
include.lowest = TRUE,
right = FALSE,
labels = time_labels)
) %>%
group_by(group, time_interval) %>%
summarise(
n_risk = ifelse(all(is.na(n.risk)), 0, max(n.risk, na.rm = TRUE)), # Take the maximum number of people at risk within the interval
.groups = "drop"
) %>%
complete(group, time_interval, fill = list(n_risk = 0)) %>%
# 4. Added total row (fixed syntax)
bind_rows(
group_by(., group) %>%
summarise(
time_interval = "Total",
n_risk = sum(n_risk, na.rm = TRUE),
.groups = "drop"
)
) %>%
mutate(
group = factor(group, levels = c("Treatment", "Placebo")),
time_interval = factor(time_interval, levels = c(time_labels, "Total") )
)
# Plot of a segmented risk table with borders ----------------------------------------------------
risk_table_plot <- ggplot(risk_table_segmented,
aes(x = time_interval, y = group)) +
# # Cell background (with border)
# geom_tile(color = "gray50", fill = "white", linewidth = 0.5,
# width = 0.95, height = 0.95) +
# Numeric Label
geom_text(aes(label = n_risk), size = 4.5, fontface = "bold", color = "black") +
# Column header style
scale_x_discrete(
name = "Time Interval (Months)",
position = "top",
expand = expansion(add = 0.5)
) +
scale_y_discrete(
name = NULL,
limits = rev,
labels = c("Treatment\nGroup", "Placebo\nGroup") # Add group labels
) +
# Color system
scale_fill_manual(values = group_colors, guide = "none") +
# Theme System
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 0, color = "black"),
axis.text.y = element_text(
color = group_colors,
face = "bold",
margin = margin(r = 15)
),
panel.grid = element_blank(),
plot.margin = margin(t = 30, b = 5, unit = "pt"),
axis.title.x = element_text(margin = margin(t = 15))
) +
# Add an outer border
annotate("rect",
xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf,
color = "gray30", fill = NA, linewidth = 0.8)
risk_table_plot
# ----------------------------
# Plot parameter settings
# ----------------------------
group_colors <- c("Treatment" = "#E64B35", "Placebo" = "#3182BD")
theme_set(theme_minimal(base_size = 12))
# ----------------------------
# KM survival curve drawing
# ----------------------------
p_km <- ggplot(surv_curve, aes(x = time, color = group, fill = group)) +
# First draw the confidence interval
geom_ribbon(aes(ymin = lower, ymax = upper),
alpha = 0.15,
colour = NA) +
# Draw the survival curve again
geom_step(aes(y = surv, linetype = group),
linewidth = 1.2) +
# Color Mapping
scale_color_manual(
values = group_colors,
name = "Treatment Group",
labels = c("Placebo", "Treatment")
) +
# Fill color map
scale_fill_manual(
values = group_colors,
guide = "none"
) +
# Line Mapping
scale_linetype_manual(
values = line_types,
guide = "none" # Share legend with colors
) +
# Coordinate axis optimization
scale_x_continuous(
name = "Time (Months)",
expand = c(0, 0),
breaks = seq(0, 1000, by = 100)
) +
scale_y_continuous(
name = "Survival Probability",
labels = scales::percent,
limits = c(0, 1)
) +
# Theme Settings
theme_classic(base_size = 12) +
theme(
legend.position = c(0.85, 0.85),
legend.background = element_rect(fill = "white", colour = "grey50"),
panel.grid.major.y = element_line(colour = "grey90")
)
p_km
# ----------------------------
# Graphic Combination
# ----------------------------
final_plot <- p_km / risk_table_plot +
plot_layout(heights = c(3, 0.4))
final_plot
Application
This figure shows the comparison of the generated survival time between the three groups of data: Standard, New Drug, and Combination.
Reference
[1] Kassambara A, et al.Β Survminer: Drawing Survival Curves using ggplot2. JOSS 2017
[2] Wickham H. ggplot2: Elegant Graphics for Data Analysis. Springer 2016
