Bar Plot

Authors

A bar plot is a graph that uses the height or length of the bars to represent the amount of data.

Example

Bar Plot DEMO

As shown in the figure above, each group of bars reflects the expression of five genes.

Setup

  • System Requirements: Cross-platform (Linux/MacOS/Windows)

  • Programming Language: R

  • Dependencies: magrittr; tidyr; ggplot2; cowplot; forcats; dplyr; hrbrthemes; ggpattern; rstatix; ggpubr

# Install packages
if (!requireNamespace("magrittr", quietly = TRUE)) {
  install.packages("magrittr")
}
if (!requireNamespace("tidyr", quietly = TRUE)) {
  install.packages("tidyr")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
  install.packages("ggplot2")
}
if (!requireNamespace("cowplot", quietly = TRUE)) {
  install.packages("cowplot")
}
if (!requireNamespace("forcats", quietly = TRUE)) {
  install.packages("forcats")
}
if (!requireNamespace("dplyr", quietly = TRUE)) {
  install.packages("dplyr")
}
if (!requireNamespace("hrbrthemes", quietly = TRUE)) {
  install.packages("hrbrthemes")
}
if (!requireNamespace("ggpattern", quietly = TRUE)) {
  install.packages("ggpattern")
}
if (!requireNamespace("ggpubr", quietly = TRUE)) {
  install.packages("ggpubr")
}
if (!requireNamespace("rstatix", quietly = TRUE)) {
  install.packages("rstatix")
}
if (!requireNamespace("palmerpenguins", quietly = TRUE)) {
  install.packages("palmerpenguins")
}

# Load packages
library(magrittr)
library(tidyr)
library(ggplot2)
library(cowplot)
library(forcats)
library(dplyr)
library(hrbrthemes)
library(ggpattern)
library(rstatix)
library(ggpubr)
library(palmerpenguins)

Data Preparation

The data from TCGA and the data sets that come with R are mainly used for drawing.

data_TCGA <- readr::read_csv("https://bizard-1301043367.cos.ap-guangzhou.myqcloud.com/TCGA-BRCA.htseq_counts_processed.csv")

data_TCGA1 <- data_TCGA[1:5,] %>%
  gather(key = "sample",value = "gene_expression",3:1219)

data_tcga_mean <- aggregate(data_TCGA1$gene_expression, 
                            by=list(data_TCGA1$gene_name), mean) # mean
colnames(data_tcga_mean) <- c("gene","expression")

data_tcga_sd <- aggregate(data_TCGA1$gene_expression, 
                            by=list(data_TCGA1$gene_name), sd)
colnames(data_tcga_sd) <- c("gene","sd")

data_tcga <- merge(data_tcga_mean, data_tcga_sd, by="gene")

data_penguins <- penguins

data_penguins_flipper_length <- aggregate(data_penguins$flipper_length_mm,
                                by=list(data_penguins$species,data_penguins$sex),
                                mean)
colnames(data_penguins_flipper_length) <- c("species","sex","flipper_length_mm")

data_mpg <- mpg

Visualization

1. Basic plot

1.1 Basic bar plot

Here we take the TCGA database data as an example.

# Basic bar plot
p <- ggplot(data_tcga_mean, aes(x=gene, y=expression)) + 
  geom_bar(stat = "identity")

p
Figure 1: Basic bar plot

The mean expression of the five gene samples is shown.

1.2 Horizontal bar plot

Use coord_flip() to flip the coordinate axes.

# Horizontal bar plot
p <- data_tcga_mean %>%
  mutate(gene=fct_reorder(gene,expression)) %>% # fct_reorder order
  ggplot(aes(x=gene, y=expression)) + 
  geom_bar(stat = "identity",width=0.5)+ # adjust width
  coord_flip() # Flip the axes

p
Figure 2: Horizontal bar plot

1.3 Color settings

ggplot2 can implement a variety of color settings. Here we provide six examples.

# Color settings
p1 <- ggplot(data_tcga_mean, aes(x=gene, y=expression)) + 
  geom_bar(stat = "identity",color="blue", fill=rgb(0.1,0.4,0.5,0.7))+
  labs(title = "Unified Color")+
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_text(aes(label=round(expression,2)),  # Add data labels
            position = position_dodge2(width = 0.9, preserve = 'single'), 
    vjust = -0.2, hjust = 0.5) +
  scale_y_continuous(limits = c(0, 17),breaks = seq(0,17,5))

p2 <- ggplot(data_tcga_mean, aes(x=gene, y=expression, fill=gene)) + 
  geom_bar(stat = "identity")+
  scale_fill_hue(c = 40) +
  labs(title = "Gradient Color")+
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="none") +
  geom_text(aes(label=round(expression,2)),  # Add data labels
            position = position_dodge2(width = 0.9, preserve = 'single'), 
            vjust = -0.2, hjust = 0.5) +
  scale_y_continuous(limits = c(0, 17),breaks = seq(0,17,5))

p3 <- ggplot(data_tcga_mean, aes(x=gene, y=expression, fill=gene)) + 
  geom_bar(stat = "identity")+
  scale_fill_brewer(palette = "Set1")+
  labs(title = "RColorBrewer palette 1")+
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="none") +
  geom_text(aes(label=round(expression,2)),  # Add data labels
            position = position_dodge2(width = 0.9, preserve = 'single'), 
            vjust = -0.2, hjust = 0.5) +
  scale_y_continuous(limits = c(0, 17),breaks = seq(0,17,5))

p4 <- ggplot(data_tcga_mean, aes(x=gene, y=expression, fill=gene)) + 
  geom_bar(stat = "identity")+
  scale_fill_brewer(palette = "Set2") +
  labs(title = "RColorBrewer palette 2")+
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="none") +
  geom_text(aes(label=round(expression,2)),  # Add data labels
            position = position_dodge2(width = 0.9, preserve = 'single'), 
            vjust = -0.2, hjust = 0.5) +
  scale_y_continuous(limits = c(0, 17),breaks = seq(0,17,5))

p5 <- ggplot(data_tcga_mean, aes(x=gene, y=expression, fill=gene)) + 
  geom_bar(stat = "identity")+
  scale_fill_grey(start = 0.25, end = 0.75) +
  labs(title = "Grayscale")+
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="none") +
  geom_text(aes(label=round(expression,2)),  # Add data labels
            position = position_dodge2(width = 0.9, preserve = 'single'), 
            vjust = -0.2, hjust = 0.5) +
  scale_y_continuous(limits = c(0, 17),breaks = seq(0,17,5))


p6 <- ggplot(data_tcga_mean, aes(x=gene, y=expression, fill=gene)) + 
  geom_bar(stat = "identity")+
  scale_fill_manual(values = c("red", "green", "blue","pink","yellow") ) +
  labs(title = "Manual settings")+
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="none") +
  geom_text(aes(label=round(expression,2)),  # Add data labels
            position = position_dodge2(width = 0.9, preserve = 'single'), 
            vjust = -0.2, hjust = 0.5) +
  scale_y_continuous(limits = c(0, 17),breaks = seq(0,17,5))

plot_grid(p1, p2, p3, p4, p5, p6, labels = LETTERS[1:6], ncol = 3)
Figure 3: Color settings

1.4 Grayscale bar plot vs. textured bar plot

Besides handling color settings, ggplot2 can also draw grayscale bar charts and textured bar charts.

# Grayscale bar plot
p <- 
  ggplot(data_tcga_mean, aes(x=gene, y=expression, fill=gene)) +
  geom_bar(stat="identity", alpha=.6, width=.4) +
  scale_fill_grey(start=0, end=0.8) +  # Set the grayscale range
  theme_bw()
p
Figure 4: Grayscale bar plot
# textured bar plot
p <- ggplot(data_tcga_mean, aes(x=gene, y=expression)) +
  geom_col_pattern(
    aes(pattern=gene,
        pattern_angle=gene,
        pattern_spacing=gene
    ), 
    fill            = 'white',
    colour          = 'black', 
    pattern_density = 0.5, 
    pattern_fill    = 'black',
    pattern_colour  = 'darkgrey'
  ) +
  theme_bw()
p
Figure 5: textured bar plot
Tip

Key parameters:

  • fill:The base fill color behind the pattern
  • colour:The border color of the bar
  • pattern_density:Density of pattern within the bar
  • pattern_fill:Pattern color
  • pattern_colour:Secondary color of the pattern

2. Variable Width Bar plot

The variable width bar chart visualizes the number of samples, using the width of the bar to represent the number of samples.

# Variable Width Bar plot----
data <- data.frame(
  group=c("A ","B ","C ","D ") , 
  value=c(33,62,56,67) , 
  number_of_obs=c(100,500,459,342)
)

# Set the left and right width limits
data$right <- cumsum(data$number_of_obs) + 30*c(0:(nrow(data)-1))
data$left <- data$right - data$number_of_obs 

# plot
p <- 
  ggplot(data, aes(ymin = 0)) + 
  geom_rect(aes(xmin = left, xmax = right, # Set upper and lower width limits
                ymax = value, 
                colour = group, fill = group),alpha=0.8) +
  xlab("number of obs") + 
  ylab("value") +
  theme_ipsum() +
  scale_fill_brewer(palette = "Set1") +
  theme(legend.position="none") 
p
Figure 6: Variable Width Bar plot

The above graph shows the sample size using bar width.

3. Error bar plot

Bar charts allow for error bars to be added, which we can do with geom_errorbar.

3.1 Basic error bar plot

# Basic error bar plot
p <- 
  ggplot(data_tcga) +
  geom_bar( aes(x=gene, y=expression), stat="identity", fill="skyblue", alpha=0.7) +
  geom_errorbar(aes(x=gene, 
                    ymin=expression-sd, ymax=expression+sd), 
                width=0.4, colour="orange", alpha=0.9, size=1) +
  labs(title = "Basic error bar plot") +
  theme(plot.title = element_text(hjust = 0.5))
p
Figure 7: Basic error bar plot

The above figure shows the expression level of each gene and adds standard deviation error bars.

3.2 Various types of error bar plot

At the same time, ggplot2 provides a variety of error bar types, here we provide four examples.

# Error bar chart (taking standard deviation as an example) ----
## Various types of error bar plot
p1 <- ggplot(data_tcga) +
  geom_bar( aes(x=gene, y=expression), stat="identity", fill="skyblue", alpha=0.7) +
  geom_crossbar( aes(x=gene, y=expression, 
                     ymin=expression-sd, ymax=expression+sd), 
                 width=0.4, colour="orange", alpha=0.9, size=1.3) +
  labs(title = "Box error bar plot")+
  theme(plot.title = element_text(hjust = 0.5))

p2 <- ggplot(data_tcga) +
  geom_bar( aes(x=gene, y=expression), stat="identity", fill="skyblue", alpha=0.7) +
  geom_linerange( aes(x=gene, 
                      ymin=expression-sd, ymax=expression+sd), 
                  colour="orange", alpha=0.9, size=1.3) +
  labs(title = "Linear error bar plot")+
  theme(plot.title = element_text(hjust = 0.5))


p3 <- ggplot(data_tcga) +
  geom_bar( aes(x=gene, y=expression), stat="identity", fill="skyblue", alpha=0.7) +
  geom_pointrange( aes(x=gene, y=expression, 
                       ymin=expression-sd, ymax=expression+sd), 
                   colour="orange", alpha=0.9, size=1) +
  labs(title = "Line + point error bar chart")+
  theme(plot.title = element_text(hjust = 0.5))


p4 <- ggplot(data_tcga) +
  geom_bar( aes(x=gene, y=expression), stat="identity", fill="skyblue", alpha=0.7) +
  geom_errorbar( aes(x=gene, ymin=expression-sd, ymax=expression+sd), 
                 width=0.4, colour="orange", alpha=0.9, size=1.3) +
  coord_flip() +
  labs(title = "Horizontal error bar plot")+
  theme(plot.title = element_text(hjust = 0.5))

plot_grid(p1, p2, p3, p4, labels = LETTERS[1:4], ncol = 2)
Figure 8: Various types of error bar plot

3.3 Add differential analysis

# Add differential analysis
data_tcga_p <- filter(data_TCGA1,
                      gene_name == "A1BG" | gene_name == "A1CF" | gene_name == "A2M")

data_tcga_plot <- filter(data_tcga,
                         gene == "A1BG" | gene == "A1CF" | gene == "A2M")

# Calculation of p-value between groups
df_p_val <- data_tcga_p %>% 
  wilcox_test(formula = gene_expression~gene_name) %>% 
  add_significance(p.col = 'p',cutpoints = c(0,0.001,0.01,0.05,1),symbols = c('***','**','*','ns')) %>% 
  add_xy_position()

p <- 
  ggplot(data_tcga_plot) +
  geom_bar( aes(x=gene, y=expression), stat="identity", fill="skyblue", alpha=0.7) +
  geom_errorbar( aes(x=gene, 
                     ymin=expression-sd, ymax=expression+sd), 
                 width=0.4, colour="orange", alpha=0.9, size=1) +
  stat_pvalue_manual(df_p_val,label = '{p.signif}', # Adding differential analysis lines
                     tip.length = 0.01,
                     y.position = c(15,17,19)) +
  labs(title = "Add differential analysis") +
  theme(plot.title = element_text(hjust = 0.5))
p
Figure 9: Add differential analysis

The above figure adds differential analysis lines of three gene expression data.

4. Stacked bar plot

When including within-group grouping, we can choose to plot a stacked bar chart.

Take the penguins dataset as an example.

# Stacked bar plot----
p <- 
  ggplot(data_penguins_flipper_length,aes(x=species,y=flipper_length_mm,fill=sex))+
  geom_bar(stat="identity",  
           position = position_stack(reverse = TRUE))+ # Change stacking order
  guides(fill=guide_legend(reverse = TRUE)) + # Change legend order
  scale_fill_manual(values = c("#87CEFA","#8DB6CD"))
p
Figure 10: Stacked bar plot

A stacked bar chart showing the wing length of penguins of different sexes.

5. Percentage bar plot

Similarly, when performing ratio analysis, you can also choose to draw a percentage bar chart.

# Percentage bar plot----
p <- 
  ggplot(data_mpg,aes(class))+
  geom_bar(aes(fill = drv), 
           position = 'fill') + # Draw percentage
  scale_fill_manual(values = c("#87CEFA","#8DB6CD","#84B6BD"))
p
Figure 11: Percentage bar plot

The above chart shows the percentage of drive type (four-wheel drive, front-wheel drive, rear-wheel drive) in each model.

6. Side-by-side bar plot

When including grouping within groups, we can also draw side-by-side bar plot by setting the position parameter.

# Side-by-side bar plot----
p <- 
  ggplot(data_mpg,aes(class)) +
  geom_bar(aes(fill = drv), 
           position = position_dodge2(preserve = 'single')) +
  scale_fill_manual(values = c("#87CEFA","#8DB6CD","#84B6BD"))
p
Figure 12: Side-by-side bar plot

7. Deformation of bar plot

7.1 Pyramid plot

df <- tibble(
  gene = factor(paste0("gene_", rep(1:16, 2)), levels = paste0("gene_", 16:1)),
  stat = c(seq(-10, -100, -10), seq(-90, -40, 10), seq(10, 100, 10), seq(90, 40, -10)),
  direct = rep(c("down", "up"), each=16)
)

p <- 
  ggplot(df, aes(gene, stat, fill = direct)) + 
  geom_col() +
  coord_flip() + 
  scale_y_continuous(breaks = seq(-100, 100, 20),
                     labels = c(seq(100, 0, -20), seq(20, 100, 20)))
p
Figure 13: Pyramid plot

7.2 Deviation Plot

# Deviation Plot
df <- tibble(
  gene = factor(paste0("gene_", 1:20), levels = paste0("gene_", 20:1)),
  stat = c(seq(100, 10, -10), seq(-10, -100, -10)),
  direct = factor(rep(c("up", "down"), each=10), levels = c("up", "down"))
)

p <- 
  ggplot(df, aes(gene, stat, fill = direct)) + 
  geom_col() +
  coord_flip()
p
Figure 14: Deviation Plot

Applications

1. Basic bar plot

BarPlotApp1
Figure 15: Applications of basic bar plot

The data are from the TCGA database. The top bar chart shows the number of available data types for each of the 36 cancer types as of January 2015; the bottom bar chart shows the number of cancer types for which each data type was available. [1]

2. Error bar plot

BarPlotApp2
Figure 16: Applications of error bar plot
  1. is a bar graph showing the collagen volume fractions of the PBS, MSC, GENE, and MSC + GENE groups stained with large areas; (B) is a bar graph showing the collagen volume fractions of the PBS, MSC, GENE, and MSC + GENE groups stained with Sirius red. [2]

3. Side-by-side bar plot

BarPlotApp3
Figure 17: Applications of side-by-side bar plot

Figure B shows the average LV myocardial signal-to-noise ratio (SNR) of all mice in different segments of the six-segment model (A) acquired using a CryoProbe (CP) or room temperature coil (RT); Figure D shows the average LV myocardial SNR of all mice in different slices. [3]

Reference

[1] Kannan L, Ramos M, Re A, El-Hachem N, Safikhani Z, Gendoo DM, Davis S, Gomez-Cabrero D, Castelo R, Hansen KD, Carey VJ, Morgan M, Culhane AC, Haibe-Kains B, Waldron L. Public data and open source tools for multi-assay genomic investigation of disease. Brief Bioinform. 2016 Jul;17(4):603-15. doi: 10.1093/bib/bbv080. Epub 2015 Oct 12. PMID: 26463000; PMCID: PMC4945830.

[2] Yu Q, Fang W, Zhu N, Zheng X, Na R, Liu B, Meng L, Li Z, Li Q, Li X. Beneficial effects of intramyocardial mesenchymal stem cells and VEGF165 plasmid injection in rats with furazolidone induced dilated cardiomyopathy. J Cell Mol Med. 2015 Aug;19(8):1868-76. doi: 10.1111/jcmm.12558. Epub 2015 Mar 5. PMID: 25753859; PMCID: PMC4549037.

[3] Wagenhaus B, Pohlmann A, Dieringer MA, Els A, Waiczies H, Waiczies S, Schulz-Menger J, Niendorf T. Functional and morphological cardiac magnetic resonance imaging of mice using a cryogenic quadrature radiofrequency coil. PLoS One. 2012;7(8):e42383. doi: 10.1371/journal.pone.0042383. Epub 2012 Aug 1. PMID: 22870323; PMCID: PMC3411643.

[4] Bache S, Wickham H (2022). magrittr: A Forward-Pipe Operator for R. R package version 2.0.3, https://CRAN.R-project.org/package=magrittr.

[5] Wickham H, Vaughan D, Girlich M (2024). tidyr: Tidy Messy Data. R package version 1.3.1, https://CRAN.R-project.org/package=tidyr.

[6] H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016.

[7] Wilke C (2024). cowplot: Streamlined Plot Theme and Plot Annotations for ‘ggplot2’. R package version 1.1.3, https://CRAN.R-project.org/package=cowplot.

[8] Wickham H (2023). forcats: Tools for Working with Categorical Variables (Factors). R package version 1.0.0, https://CRAN.R-project.org/package=forcats.

[9] Wickham H, François R, Henry L, Müller K, Vaughan D (2023). dplyr: A Grammar of Data Manipulation. R package version 1.1.4, https://CRAN.R-project.org/package=dplyr.

[10] Rudis B (2024). hrbrthemes: Additional Themes, Theme Components and Utilities for ‘ggplot2’. R package version 0.8.7, https://CRAN.R-project.org/package=hrbrthemes.

[11] FC M, Davis T, ggplot2 authors (2024). ggpattern: ‘ggplot2’ Pattern Geoms. R package version 1.1.1, https://CRAN.R-project.org/package=ggpattern.

[12] Kassambara A (2023). rstatix: Pipe-Friendly Framework for Basic Statistical Tests. R package version 0.7.2, https://CRAN.R-project.org/package=rstatix.