# Install packages
if (!requireNamespace("data.table", quietly = TRUE)) {
install.packages("data.table")
}if (!requireNamespace("jsonlite", quietly = TRUE)) {
install.packages("jsonlite")
}if (!requireNamespace("ggseqlogo", quietly = TRUE)) {
install.packages("ggseqlogo")
}if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
# Load packages
library(data.table)
library(jsonlite)
library(ggseqlogo)
library(ggplot2)
Seqlogo
The sequence LOGO is a graphic that describes a sequence pattern of binding sites.
Setup
System Requirements: Cross-platform (Linux/MacOS/Windows)
Programming language: R
Dependent packages:
data.table
;jsonlite
;ggseqlogo
;ggplot2
Data Preparation
The loaded data is the Sequence of binding sites for multiple transcription factors on multiple genes.
# Load data
<- data.table::fread(jsonlite::read_json("https://hiplot.cn/ui/basic/ggseqlogo/data.json")$exampleData$textarea[[1]])
data <- as.data.frame(data)
data
# Convert data structure
<- data[, !sapply(data, function(x) {all(is.na(x))})]
data <- as.list(data)
data <- lapply(data, function(x) {return(x[!is.na(x)])})
data
# View data
str(data[1:5])
List of 5
$ MA0001.1: chr [1:97] "CCATATATAG" "CCATATATAG" "CCATAAATAG" "CCATAAATAG" ...
$ MA0002.1: chr [1:26] "AATTGTGGTTA" "ATCTGTGGTTA" "AATTGTGGTAA" "TTCTGCGGTTA" ...
$ MA0004.1: chr [1:20] "CACGTG" "CACGTG" "CACGTG" "CACGTG" ...
$ MA0005.1: chr [1:90] "CCTAATTGGGC" "CCTAATTTGGC" "CCTAATCGGGC" "CCTAATCGGGC" ...
$ MA0006.1: chr [1:24] "CGCGTG" "CGCGTG" "CGCGTG" "CGCGTG" ...
Visualization
# Seqlogo
<- ggseqlogo(
p
data,ncol = 4,
col_scheme = "nucleotide",
seq_type = "dna",
method = "bits") +
theme(plot.title = element_text(hjust = 0.5))
p

A sequence of binding sites was displayed in a column of the chart by means of BITS calculation, which could clearly observe the large proportion of bases of different sequences.