ggplot2之扩展内容#

ggplot2有很多扩展包

install.packages(c("sf", "cowplot", "patchwork", "gghighlight", "ggforce", "ggfx"))
还安装相依关系‘proxy’, ‘e1071’, ‘class’, ‘wk’, ‘classInt’, ‘s2’, ‘units’, ‘magick’


Warning message in install.packages(c("sf", "cowplot", "patchwork", "gghighlight", :
“安装程序包‘units’时退出狀態的值不是0”
Warning message in install.packages(c("sf", "cowplot", "patchwork", "gghighlight", :
“安装程序包‘sf’时退出狀態的值不是0”
更新'.Library'里的HTML程序包列表

Making 'packages.html' ...
 做完了。
library(tidyverse)
library(gghighlight)
library(cowplot)
library(patchwork)
library(ggforce)
library(ggridges)
── Attaching core tidyverse packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
 dplyr     1.1.4      readr     2.1.5
 forcats   1.0.0      stringr   1.5.1
 ggplot2   3.5.0      tibble    3.2.1
 lubridate 1.9.3      tidyr     1.3.1
 purrr     1.0.2     
── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
 dplyr::filter() masks stats::filter()
 dplyr::lag()    masks stats::lag()
 Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

载入程辑包:‘cowplot’


The following object is masked from ‘package:lubridate’:

    stamp



载入程辑包:‘patchwork’


The following object is masked from ‘package:cowplot’:

    align_plots
p1 <- ggplot(mpg, aes(x = cty, y = hwy))+
  geom_point()+
  geom_smooth()+
  labs(title = "1: geom_point() + geom_smooth()")+
  theme(plot.title = element_text(face = "bold"))

p2 <- ggplot(mpg, aes(x = cty, y = hwy))+
  geom_bin_2d()+
  labs(title = "2: geom_bin_2d()")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p3 <- ggplot(mpg, aes(x = drv, fill = drv))+
  geom_bar()+
  labs(title = "3: geom_bar()")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p4 <- ggplot(mpg, aes(x = cty))+
  geom_histogram(binwidth = 2, color = "white")+
  labs(title = "4: geom_histogram()")+
  theme(plot.title = element_text(face = "bold"))

p5 <- ggplot(mpg, aes(x = cty, y = drv, fill = drv))+
  geom_violin()+
  labs(title = "5: geom_violin()")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p6 <- ggplot(mpg, aes(x = cty, y = drv, fill = drv))+
  geom_boxplot()+
  labs(title = "6: geom_boxplot()")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p7 <- ggplot(mpg, aes(x = cty, fill = drv))+
  geom_density(alpha = 0.7)+
  labs(title = "7: geom_density")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p8 <- ggplot(mpg, aes(x = cty, y = drv, fill = drv))+
  geom_density_ridges()+
  labs(title = "8: geom_density_ridges")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p9 <- ggplot(mpg, aes(x = cty, y = hwy))+
  geom_density_2d()+
  labs(title = "9: geom_density_2d()")+
  theme(plot.title = element_text(face = "bold"))

p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9

ggsave("plot.png", width = 15, height = 10, dpi = 600)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Picking joint bandwidth of 0.879

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Picking joint bandwidth of 0.879
../_images/c6f47dc61d98883b44b49ed2e1af908ae8e17caea453382515d2bb2bcbae0cdc.png
mpg %>% head()
A tibble: 6 × 11
manufacturermodeldisplyearcyltransdrvctyhwyflclass
<chr><chr><dbl><int><int><chr><chr><int><int><chr><chr>
audia41.819994auto(l5) f1829pcompact
audia41.819994manual(m5)f2129pcompact
audia42.020084manual(m6)f2031pcompact
audia42.020084auto(av) f2130pcompact
audia42.819996auto(l5) f1626pcompact
audia42.819996manual(m5)f1826pcompact

定制#

1 标签#

ggtitle("My Plot Title")+   xlab("The X variable")+   ylab("The Y variable")

labs(title = "My Plot Title", subtitle = "My Plot subtitle", x = "The X Variable", y = "The Y Variable")

gapdata <- read_csv("./demo_data/gapminder.csv")
Rows: 1704 Columns: 6
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country, continent
dbl (4): year, lifeExp, pop, gdpPercap

 Use `spec()` to retrieve the full column specification for this data.
 Specify the column types or set `show_col_types = FALSE` to quiet this message.
gapdata %>% head()
A tibble: 6 × 6
countrycontinentyearlifeExppopgdpPercap
<chr><chr><dbl><dbl><dbl><dbl>
AfghanistanAsia195228.801 8425333779.4453
AfghanistanAsia195730.332 9240934820.8530
AfghanistanAsia196231.99710267083853.1007
AfghanistanAsia196734.02011537966836.1971
AfghanistanAsia197236.08813079460739.9811
AfghanistanAsia197738.43814880372786.1134
gapdata %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent))+
  geom_point()+
  scale_x_log10()+
  ggtitle("My Plot Title")+
  xlab("The X variable")+
  ylab("The Y variable")
../_images/b6160c4cd430e97e4b0c0ee04aea388d0392ec42613860c1a4848de9eefda4f6.png
gapdata %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent))+
  geom_point()+
  scale_x_log10()+
  labs(title = "My Plot Title",
       subtitle = "My Plot subtitle",
       x = "The X Variable", y = "The Y Variable")
../_images/fcaff3d871e2ab077be1f091577e9243d1faa2685b87893c74e25ee3f72483fa.png

2 定制颜色#

  • 自定义颜色 scale_color_manual()scale_fill_manual()

  • 已有颜色系统

    • 离散型变量 scale_color_viridis_d()

    • 连续型变量 scale_color_viridis_c()

gapdata %>%
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() +
  scale_x_log10() +
  scale_color_manual(values = c("#195744", "#008148", "#C6C013", "#EF8A17", "#EF2917"))
../_images/b90105299e6191e2eafd95098cbaaa31228d8579f533e06d4caa8e091f55723d.png
gapdata %>%
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() +
  scale_x_log10() +
  scale_color_viridis_d()
../_images/dd3a94a3b45ba9223b1978714c501d8f4b08003baf37ee066372911d5cf9a492.png

组合图片#

我们有时候想把多张图组合到一起

1 cowplot#

可以使用 cowplot 宏包的plot_grid()函数完成多张图片的组合,使用方法很简单。

library(cowplot)

p1 <- gapdata %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp))+
  geom_point(aes(color = lifeExp > mean(lifeExp)))+
  scale_x_log10()+
  theme(legend.position = "none")+
  scale_color_manual(values = c("orange", "pink"))+
  labs(title = "My Plot Title", x = "The X Variable", y = "The Y Variable")

p2 <- gapdata %>%
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() +
  scale_x_log10() +
  scale_color_manual(values = c("#195744", "#008148", "#C6C013", "#EF8A17", "#EF2917")) +
  theme(legend.position = "none") +
  labs(title = "My Plot Title",
    x = "The X Variable",
    y = "The Y Variable")

cowplot::plot_grid(p1, p2, labels = c("A", "B"))
../_images/3a90f33e0e211e95d600e59dd7bf695efab9170d4fd47394a95c87a489a69627.png

也可以使用patchwork宏包,方法更简单

plot_layout(guides = "collect") 表示将所有组合在一起的图形共享的图例合并到一个位置显示。

plot_annotation(tag_levels = "A", title = "",subtitle = "",caption = ""),用于对组合图的注释

library(patchwork)

p1 + p2

p1 / p2
../_images/c8c1ff6b6827cf2010a1c083f5e9dfc4d4db17e810e794e475a9aea41984de45.png ../_images/9e20eef975577e51074bbb177a560b98ee61d4464418a10c269568dd8828bfd5.png
p1 + p2 +
  plot_annotation(tag_levels = "A", 
                  title = "The surprising truth about mtcars",
                  subtitle = "These 3 plots will reveal yet-untold secrets about our beloved data-set",
                  caption = "Disclaimer: None of these plots are insightful")
../_images/4c372104282d2e441b55bac8b4b974f5e1eb329fb1278ab165b32eed2c09ea33.png
library(palmerpenguins)

g1 <- penguins %>% 
  ggplot(aes(bill_length_mm, body_mass_g, color = species))+
  geom_point()+
  theme_bw(base_size = 14)+
  labs(x = "Bill length (mm)", y = "Bodu mass (g)",
       tag = "(A)", color = "Species")

g2 <- penguins %>% 
  ggplot(aes(bill_length_mm, bill_depth_mm, color = species))+
  geom_point()+
  theme_bw(base_size = 14)+
  labs(x = "Bill length (mm)", y = "Bill depth (mm)",
       tag = "(B)", color = "Species")

g1 + g2 + patchwork::plot_layout(guides = "collect")
Warning message:
“Removed 2 rows containing missing values or values outside the scale range (`geom_point()`).”
Warning message:
“Removed 2 rows containing missing values or values outside the scale range (`geom_point()`).”
../_images/7ffcc95b957fa6fcf968d7296457ce32a9b9559dc275b2bcb09405551cdb494b.png

patchwork 使用方法很简单image.png

高亮某一组#

画图很容易,然而画一张好图,不容易。图片质量好不好,其原则就是不增加看图者的心智负担,有些图片的色彩很丰富,然而需要看图人配合文字和图注等信息才能看懂作者想表达的意思,这样就失去了图片“一图胜千言”的价值。

分析数据过程中,我们可以使用高亮我们某组数据,突出我们想表达的信息,是非常好的一种可视化探索手段。

1 ggplot2方法#

这种方法是将背景部分高亮部分分两步来画

drop_facet <- function(x) select(x, -continent)

gapdata %>% 
  ggplot()+
  geom_line(data = drop_facet, 
            aes(x = year, y = lifeExp, group = country), color = "grey")+
  geom_line(aes(x = year, y = lifeExp, color = country, group = country))+
  facet_wrap(vars(continent))+
  theme(legend.position = "none")
../_images/a6d9a734b3715ad7fc3c407a6d301dbd7b5925154e72ba4e7ae92c459e8848cc.png
gapdata %>% 
  mutate(group = country) %>% 
  filter(continent == "Asia") %>% 
  ggplot()+
  geom_line(data = function(d) select(d, -country), 
            aes(x = year, y = lifeExp, group = group), color = "grey")+
  geom_line(aes(x = year, y = lifeExp, group = country), color = "red")+
  facet_wrap(vars(country))+
  theme(legend.position = "none")
../_images/d1f638e6d11ae85c7ea12ba9dbfd4f098d9e3cddf01293a73bf0b9e285d86239.png

.2 gghighlight方法#

这里推荐gghighlight宏包

  • dplyr has filter()

  • ggplot has Highlighting

gapdata %>% 
  filter(country == "China")
A spec_tbl_df: 12 × 6
countrycontinentyearlifeExppopgdpPercap
<chr><chr><dbl><dbl><dbl><dbl>
ChinaAsia195244.00000 556263527 400.4486
ChinaAsia195750.54896 637408000 575.9870
ChinaAsia196244.50136 665770000 487.6740
ChinaAsia196758.38112 754550000 612.7057
ChinaAsia197263.11888 862030000 676.9001
ChinaAsia197763.96736 943455000 741.2375
ChinaAsia198265.525001000281000 962.4214
ChinaAsia198767.2740010840350001378.9040
ChinaAsia199268.6900011649700001655.7842
ChinaAsia199770.4260012300750002289.2341
ChinaAsia200272.0280012804000003119.2809
ChinaAsia200772.9610013186830964959.1149
gapdata %>% 
  ggplot(aes(x = year, y = lifeExp, 
             color = continent, group = country))+
  geom_line()+
  gghighlight(country == "China", label_key = country)
Warning message:
“Tried to calculate with group_by(), but the calculation failed.
Falling back to ungrouped filter operation...”
../_images/16d3b0fd6a97c290bfa7e2a065e2ec32812dd815817eae946d0e7e1a918dca70.png
gapdata %>% 
  filter(continent == "Asia") %>% 
  ggplot(aes(year, lifeExp, color = country, group = country))+
  geom_line(size = 1.2, alpha = 0.9, color = "#E58C23")+
  theme_minimal(base_size = 14)+
  theme(legend.position = "none",
        panel.grid.major.x = element_blank(), 
        panel.grid.minor = element_blank()  
        )+
  gghighlight(country %in% c("China", "India", "Japan", "Korea, Rep."),
              use_group_by = FALSE,
              use_direct_label = FALSE, 
              unhighlighted_params = list(color = "grey90")
             )+
  facet_wrap(vars(country))
../_images/7b8422bc20001dacba46ac097995bf621980f0e8633c0b458e21dbcf4349848d.png

3D效果#

ggfx::with_shadow()

library(ggfx)

mtcars %>% 
  ggplot(aes(mpg, disp))+
  ggfx::with_shadow(geom_smooth(alpha = 1), sigma = 4)+
  ggfx::with_shadow(geom_point(), sigma = 4)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
../_images/52941cb8530afe18d8e80858af959da0a0871b6feddcd0af56d6dbdf5276f4b5.png

弯曲文本#

弯曲文本,使其匹配多种图形的轨迹。

geomtextpath::geom_textdensity()

geomtextpath::geom_labelsmooth()

# install.packages("geomtextpath")
library(geomtextpath)

iris %>% 
  ggplot(aes(x = Sepal.Length, color = Species, label = Species))+
  geomtextpath::geom_textdensity(size = 6, fontface = 2, 
                                 hjust = 0.2 , vjust = 0.3)+
  theme(legend.position = "none")
../_images/2c86ee218b9904d520f8fd6a6c3d6c669da96bd0a8e2108253c5faaacec3865a.png
library(palmerpenguins)

penguins %>% 
  ggplot(aes(x = bill_length_mm, y = bill_depth_mm, color = species))+
  geom_point(alpha = 0.3)+
  geom_labelsmooth(aes(label = species), method = "loess", 
                   size = 5, linewidth = 1)+
  scale_color_manual(values = c("forestgreen", "deepskyblue4", "tomato4"))+
  theme(legend.position = "none")
`geom_smooth()` using formula = 'y ~ x'
Warning message:
“Removed 2 rows containing non-finite outside the scale range (`stat_smooth()`).”
Warning message:
“Removed 2 rows containing missing values or values outside the scale range (`geom_point()`).”
../_images/882f01fe525c5fe5b06b330c938b0253204628dcb83526922903f6d0af2dbb50.png

函数图#

stat_function()

有时候我们想画一个函数图,比如正态分布的函数,可能会想到先产生数据,然后画图,比如下面的代码

tibble(x = seq(from = -3, to = 3, by = 0.01)) %>% 
  mutate(y = dnorm(x, mean = 0, sd = 1)) %>% 
  ggplot(aes(x = x, y = y))+
  geom_line(color = "grey33")
../_images/aba66cc52c7351d305dc0bde85ac69c8bcf72a5d4cede828b51c19feebb0ffd4.png

事实上,stat_function()可以简化这个过程

ggplot(data = data.frame(x = c(-3,3)), aes(x = x))+
  stat_function(fun = dnorm)
../_images/94eb1bb4297d844344ab4c4affba29a7f4c98b70be3f6332313adf851569a4fb.png

支持绘制自定义函数

myfun <- function(x){
    (x -1)**2
}

ggplot(data = data.frame(x = c(-3, 3)), aes(x = x))+
  stat_function(fun = myfun, 
                geom = "line", color = "red")
../_images/9a4a1cd5a5fe377c0a7eb64ba8b776f4d0ac90c10afacde4d5f640295cf259d3.png
d <- tibble(x = rnorm(2000, mean = 2, sd = 4))

ggplot(d, aes(x = x))+
  geom_histogram(aes(y = after_stat(density)))+
  geom_density()+
  stat_function(fun = dnorm, args = list(mean = 2, sd = 4),
                color = "red")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
../_images/c1bb179216c57f604a0f630a50a6ebfce1a7d3e0aaabda11f7f317a0e402409f.png

latex公式#

library(ggplot2)
library(latex2exp)
  
ggplot(mpg, aes(x = displ, y = hwy))+
  geom_point()+
  annotate("text", x = 4, y = 40,
           label = TeX("$\\alpha^2 + $\\theta^2 = \\omega^2 $"),
           size = 9)+
  labs(title = TeX("The ratio of 1 and 2 is $\\,\\, \\frac{1}{2}$"),
       x = TeX("$\\alpha$"),
       y = TeX("$\\alpha^2$"))
Warning message in is.na(x):
“is.na()不适用于类别为'expression'的非串列或非矢量”
../_images/dd00fc77ce6085b9333a15098505635db1f54a07d8b5e4c205142514e2913890.png

“coord_cartesian() 与 scale_x_continuous()”#

乍一看,这两个操作没有区别

p1 <- mtcars %>% 
  ggplot(aes(disp, wt))+
  geom_point()+
  scale_x_continuous(limits = c(325, 500))+
  ggtitle("scale_x_continuous(limits = c(325, 500))")

p2  <- mtcars %>% 
  ggplot(aes(disp, wt))+
  geom_point()+
  coord_cartesian(xlim = c(325, 500))+
  ggtitle("coord_cartesian(xlim = c(325, 500))")


p1 + p2
Warning message:
“Removed 24 rows containing missing values or values outside the scale range (`geom_point()`).”
../_images/b7b6388a6d449df535a885b31bb125f15dd854c72e6afb4eaaa9cb53f9a6cf82.png

实际上这两个操作,区别蛮大的