ggplot2之扩展内容

`ggplot2`之扩展内容#

ggplot2有很多扩展包

install.packages(c("sf", "cowplot", "patchwork", "gghighlight", "ggforce", "ggfx"))

还安装相依关系‘proxy’, ‘e1071’, ‘class’, ‘wk’, ‘classInt’, ‘s2’, ‘units’, ‘magick’


Warning message in install.packages(c("sf", "cowplot", "patchwork", "gghighlight", :
“安装程序包‘units’时退出狀態的值不是0”
Warning message in install.packages(c("sf", "cowplot", "patchwork", "gghighlight", :
“安装程序包‘sf’时退出狀態的值不是0”
更新'.Library'里的HTML程序包列表

Making 'packages.html' ...
 做完了。

library(tidyverse)
library(gghighlight)
library(cowplot)
library(patchwork)
library(ggforce)
library(ggridges)

── Attaching core tidyverse packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

载入程辑包：‘cowplot’


The following object is masked from ‘package:lubridate’:

    stamp



载入程辑包：‘patchwork’


The following object is masked from ‘package:cowplot’:

    align_plots

p1 <- ggplot(mpg, aes(x = cty, y = hwy))+
  geom_point()+
  geom_smooth()+
  labs(title = "1: geom_point() + geom_smooth()")+
  theme(plot.title = element_text(face = "bold"))

p2 <- ggplot(mpg, aes(x = cty, y = hwy))+
  geom_bin_2d()+
  labs(title = "2: geom_bin_2d()")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p3 <- ggplot(mpg, aes(x = drv, fill = drv))+
  geom_bar()+
  labs(title = "3: geom_bar()")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p4 <- ggplot(mpg, aes(x = cty))+
  geom_histogram(binwidth = 2, color = "white")+
  labs(title = "4: geom_histogram()")+
  theme(plot.title = element_text(face = "bold"))

p5 <- ggplot(mpg, aes(x = cty, y = drv, fill = drv))+
  geom_violin()+
  labs(title = "5: geom_violin()")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p6 <- ggplot(mpg, aes(x = cty, y = drv, fill = drv))+
  geom_boxplot()+
  labs(title = "6: geom_boxplot()")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p7 <- ggplot(mpg, aes(x = cty, fill = drv))+
  geom_density(alpha = 0.7)+
  labs(title = "7: geom_density")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p8 <- ggplot(mpg, aes(x = cty, y = drv, fill = drv))+
  geom_density_ridges()+
  labs(title = "8: geom_density_ridges")+
  guides(fill = FALSE)+
  theme(plot.title = element_text(face = "bold"))

p9 <- ggplot(mpg, aes(x = cty, y = hwy))+
  geom_density_2d()+
  labs(title = "9: geom_density_2d()")+
  theme(plot.title = element_text(face = "bold"))

p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9

ggsave("plot.png", width = 15, height = 10, dpi = 600)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Picking joint bandwidth of 0.879

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Picking joint bandwidth of 0.879

../_images/c6f47dc61d98883b44b49ed2e1af908ae8e17caea453382515d2bb2bcbae0cdc.png

mpg %>% head()

A tibble: 6 × 11
manufacturer	model	displ	year	cyl	trans	drv	cty	hwy	fl	class
<chr>	<chr>	<dbl>	<int>	<int>	<chr>	<chr>	<int>	<int>	<chr>	<chr>
audi	a4	1.8	1999	4	auto(l5)	f	18	29	p	compact
audi	a4	1.8	1999	4	manual(m5)	f	21	29	p	compact
audi	a4	2.0	2008	4	manual(m6)	f	20	31	p	compact
audi	a4	2.0	2008	4	auto(av)	f	21	30	p	compact
audi	a4	2.8	1999	6	auto(l5)	f	16	26	p	compact
audi	a4	2.8	1999	6	manual(m5)	f	18	26	p	compact

定制#

1 标签#

ggtitle("My Plot Title")+ xlab("The X variable")+ ylab("The Y variable")

labs(title = "My Plot Title", subtitle = "My Plot subtitle", x = "The X Variable", y = "The Y Variable")

gapdata <- read_csv("./demo_data/gapminder.csv")

Rows: 1704 Columns: 6
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country, continent
dbl (4): year, lifeExp, pop, gdpPercap

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

gapdata %>% head()

A tibble: 6 × 6
country	continent	year	lifeExp	pop	gdpPercap
<chr>	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
Afghanistan	Asia	1952	28.801	8425333	779.4453
Afghanistan	Asia	1957	30.332	9240934	820.8530
Afghanistan	Asia	1962	31.997	10267083	853.1007
Afghanistan	Asia	1967	34.020	11537966	836.1971
Afghanistan	Asia	1972	36.088	13079460	739.9811
Afghanistan	Asia	1977	38.438	14880372	786.1134

gapdata %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent))+
  geom_point()+
  scale_x_log10()+
  ggtitle("My Plot Title")+
  xlab("The X variable")+
  ylab("The Y variable")

../_images/b6160c4cd430e97e4b0c0ee04aea388d0392ec42613860c1a4848de9eefda4f6.png

gapdata %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent))+
  geom_point()+
  scale_x_log10()+
  labs(title = "My Plot Title",
       subtitle = "My Plot subtitle",
       x = "The X Variable", y = "The Y Variable")

../_images/fcaff3d871e2ab077be1f091577e9243d1faa2685b87893c74e25ee3f72483fa.png

2 定制颜色#

自定义颜色 scale_color_manual() 和 scale_fill_manual()
已有颜色系统
- 离散型变量 scale_color_viridis_d()
- 连续型变量 scale_color_viridis_c()

gapdata %>%
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() +
  scale_x_log10() +
  scale_color_manual(values = c("#195744", "#008148", "#C6C013", "#EF8A17", "#EF2917"))

../_images/b90105299e6191e2eafd95098cbaaa31228d8579f533e06d4caa8e091f55723d.png

gapdata %>%
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() +
  scale_x_log10() +
  scale_color_viridis_d()

../_images/dd3a94a3b45ba9223b1978714c501d8f4b08003baf37ee066372911d5cf9a492.png

组合图片#

我们有时候想把多张图组合到一起

1 `cowplot`#

可以使用 cowplot 宏包的plot_grid()函数完成多张图片的组合，使用方法很简单。

library(cowplot)

p1 <- gapdata %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp))+
  geom_point(aes(color = lifeExp > mean(lifeExp)))+
  scale_x_log10()+
  theme(legend.position = "none")+
  scale_color_manual(values = c("orange", "pink"))+
  labs(title = "My Plot Title", x = "The X Variable", y = "The Y Variable")

p2 <- gapdata %>%
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point() +
  scale_x_log10() +
  scale_color_manual(values = c("#195744", "#008148", "#C6C013", "#EF8A17", "#EF2917")) +
  theme(legend.position = "none") +
  labs(title = "My Plot Title",
    x = "The X Variable",
    y = "The Y Variable")

cowplot::plot_grid(p1, p2, labels = c("A", "B"))

../_images/3a90f33e0e211e95d600e59dd7bf695efab9170d4fd47394a95c87a489a69627.png

也可以使用patchwork宏包，方法更简单

plot_layout(guides = "collect") 表示将所有组合在一起的图形共享的图例合并到一个位置显示。

plot_annotation(tag_levels = "A", title = "",subtitle = "",caption = "")，用于对组合图的注释

library(patchwork)

p1 + p2

p1 / p2

../_images/c8c1ff6b6827cf2010a1c083f5e9dfc4d4db17e810e794e475a9aea41984de45.png

../_images/9e20eef975577e51074bbb177a560b98ee61d4464418a10c269568dd8828bfd5.png

p1 + p2 +
  plot_annotation(tag_levels = "A", 
                  title = "The surprising truth about mtcars",
                  subtitle = "These 3 plots will reveal yet-untold secrets about our beloved data-set",
                  caption = "Disclaimer: None of these plots are insightful")

../_images/4c372104282d2e441b55bac8b4b974f5e1eb329fb1278ab165b32eed2c09ea33.png

library(palmerpenguins)

g1 <- penguins %>% 
  ggplot(aes(bill_length_mm, body_mass_g, color = species))+
  geom_point()+
  theme_bw(base_size = 14)+
  labs(x = "Bill length (mm)", y = "Bodu mass (g)",
       tag = "(A)", color = "Species")

g2 <- penguins %>% 
  ggplot(aes(bill_length_mm, bill_depth_mm, color = species))+
  geom_point()+
  theme_bw(base_size = 14)+
  labs(x = "Bill length (mm)", y = "Bill depth (mm)",
       tag = "(B)", color = "Species")

g1 + g2 + patchwork::plot_layout(guides = "collect")

Warning message:
“Removed 2 rows containing missing values or values outside the scale range (`geom_point()`).”
Warning message:
“Removed 2 rows containing missing values or values outside the scale range (`geom_point()`).”

../_images/7ffcc95b957fa6fcf968d7296457ce32a9b9559dc275b2bcb09405551cdb494b.png

patchwork 使用方法很简单

高亮某一组#

画图很容易，然而画一张好图，不容易。图片质量好不好，其原则就是不增加看图者的心智负担，有些图片的色彩很丰富，然而需要看图人配合文字和图注等信息才能看懂作者想表达的意思，这样就失去了图片“一图胜千言”的价值。

分析数据过程中，我们可以使用高亮我们某组数据，突出我们想表达的信息，是非常好的一种可视化探索手段。

1 `ggplot2`方法#

这种方法是将背景部分和高亮部分分两步来画

drop_facet <- function(x) select(x, -continent)

gapdata %>% 
  ggplot()+
  geom_line(data = drop_facet, 
            aes(x = year, y = lifeExp, group = country), color = "grey")+
  geom_line(aes(x = year, y = lifeExp, color = country, group = country))+
  facet_wrap(vars(continent))+
  theme(legend.position = "none")

../_images/a6d9a734b3715ad7fc3c407a6d301dbd7b5925154e72ba4e7ae92c459e8848cc.png

gapdata %>% 
  mutate(group = country) %>% 
  filter(continent == "Asia") %>% 
  ggplot()+
  geom_line(data = function(d) select(d, -country), 
            aes(x = year, y = lifeExp, group = group), color = "grey")+
  geom_line(aes(x = year, y = lifeExp, group = country), color = "red")+
  facet_wrap(vars(country))+
  theme(legend.position = "none")

../_images/d1f638e6d11ae85c7ea12ba9dbfd4f098d9e3cddf01293a73bf0b9e285d86239.png

.2 gghighlight方法#

这里推荐gghighlight宏包

dplyr has filter()
ggplot has Highlighting

gapdata %>% 
  filter(country == "China")

A spec_tbl_df: 12 × 6
country	continent	year	lifeExp	pop	gdpPercap
<chr>	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
China	Asia	1952	44.00000	556263527	400.4486
China	Asia	1957	50.54896	637408000	575.9870
China	Asia	1962	44.50136	665770000	487.6740
China	Asia	1967	58.38112	754550000	612.7057
China	Asia	1972	63.11888	862030000	676.9001
China	Asia	1977	63.96736	943455000	741.2375
China	Asia	1982	65.52500	1000281000	962.4214
China	Asia	1987	67.27400	1084035000	1378.9040
China	Asia	1992	68.69000	1164970000	1655.7842
China	Asia	1997	70.42600	1230075000	2289.2341
China	Asia	2002	72.02800	1280400000	3119.2809
China	Asia	2007	72.96100	1318683096	4959.1149

gapdata %>% 
  ggplot(aes(x = year, y = lifeExp, 
             color = continent, group = country))+
  geom_line()+
  gghighlight(country == "China", label_key = country)

Warning message:
“Tried to calculate with group_by(), but the calculation failed.
Falling back to ungrouped filter operation...”

../_images/16d3b0fd6a97c290bfa7e2a065e2ec32812dd815817eae946d0e7e1a918dca70.png

gapdata %>% 
  filter(continent == "Asia") %>% 
  ggplot(aes(year, lifeExp, color = country, group = country))+
  geom_line(size = 1.2, alpha = 0.9, color = "#E58C23")+
  theme_minimal(base_size = 14)+
  theme(legend.position = "none",
        panel.grid.major.x = element_blank(), 
        panel.grid.minor = element_blank()  
        )+
  gghighlight(country %in% c("China", "India", "Japan", "Korea, Rep."),
              use_group_by = FALSE,
              use_direct_label = FALSE, 
              unhighlighted_params = list(color = "grey90")
             )+
  facet_wrap(vars(country))

../_images/7b8422bc20001dacba46ac097995bf621980f0e8633c0b458e21dbcf4349848d.png

3D效果#

ggfx::with_shadow()

library(ggfx)

mtcars %>% 
  ggplot(aes(mpg, disp))+
  ggfx::with_shadow(geom_smooth(alpha = 1), sigma = 4)+
  ggfx::with_shadow(geom_point(), sigma = 4)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

../_images/52941cb8530afe18d8e80858af959da0a0871b6feddcd0af56d6dbdf5276f4b5.png

弯曲文本#

弯曲文本，使其匹配多种图形的轨迹。

geomtextpath::geom_textdensity()

geomtextpath::geom_labelsmooth()

# install.packages("geomtextpath")
library(geomtextpath)

iris %>% 
  ggplot(aes(x = Sepal.Length, color = Species, label = Species))+
  geomtextpath::geom_textdensity(size = 6, fontface = 2, 
                                 hjust = 0.2 , vjust = 0.3)+
  theme(legend.position = "none")

../_images/2c86ee218b9904d520f8fd6a6c3d6c669da96bd0a8e2108253c5faaacec3865a.png

library(palmerpenguins)

penguins %>% 
  ggplot(aes(x = bill_length_mm, y = bill_depth_mm, color = species))+
  geom_point(alpha = 0.3)+
  geom_labelsmooth(aes(label = species), method = "loess", 
                   size = 5, linewidth = 1)+
  scale_color_manual(values = c("forestgreen", "deepskyblue4", "tomato4"))+
  theme(legend.position = "none")

`geom_smooth()` using formula = 'y ~ x'
Warning message:
“Removed 2 rows containing non-finite outside the scale range (`stat_smooth()`).”
Warning message:
“Removed 2 rows containing missing values or values outside the scale range (`geom_point()`).”

../_images/882f01fe525c5fe5b06b330c938b0253204628dcb83526922903f6d0af2dbb50.png

函数图#

stat_function()

有时候我们想画一个函数图，比如正态分布的函数，可能会想到先产生数据，然后画图，比如下面的代码

tibble(x = seq(from = -3, to = 3, by = 0.01)) %>% 
  mutate(y = dnorm(x, mean = 0, sd = 1)) %>% 
  ggplot(aes(x = x, y = y))+
  geom_line(color = "grey33")

../_images/aba66cc52c7351d305dc0bde85ac69c8bcf72a5d4cede828b51c19feebb0ffd4.png

事实上，stat_function()可以简化这个过程

ggplot(data = data.frame(x = c(-3,3)), aes(x = x))+
  stat_function(fun = dnorm)

../_images/94eb1bb4297d844344ab4c4affba29a7f4c98b70be3f6332313adf851569a4fb.png

支持绘制自定义函数

myfun <- function(x){
    (x -1)**2
}

ggplot(data = data.frame(x = c(-3, 3)), aes(x = x))+
  stat_function(fun = myfun, 
                geom = "line", color = "red")

../_images/9a4a1cd5a5fe377c0a7eb64ba8b776f4d0ac90c10afacde4d5f640295cf259d3.png

d <- tibble(x = rnorm(2000, mean = 2, sd = 4))

ggplot(d, aes(x = x))+
  geom_histogram(aes(y = after_stat(density)))+
  geom_density()+
  stat_function(fun = dnorm, args = list(mean = 2, sd = 4),
                color = "red")

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

../_images/c1bb179216c57f604a0f630a50a6ebfce1a7d3e0aaabda11f7f317a0e402409f.png

latex公式#

library(ggplot2)
library(latex2exp)
  
ggplot(mpg, aes(x = displ, y = hwy))+
  geom_point()+
  annotate("text", x = 4, y = 40,
           label = TeX("$\\alpha^2 + $\\theta^2 = \\omega^2 $"),
           size = 9)+
  labs(title = TeX("The ratio of 1 and 2 is $\\,\\, \\frac{1}{2}$"),
       x = TeX("$\\alpha$"),
       y = TeX("$\\alpha^2$"))

Warning message in is.na(x):
“is.na()不适用于类别为'expression'的非串列或非矢量”

../_images/dd00fc77ce6085b9333a15098505635db1f54a07d8b5e4c205142514e2913890.png

“coord_cartesian() 与 scale_x_continuous()”#

乍一看，这两个操作没有区别

p1 <- mtcars %>% 
  ggplot(aes(disp, wt))+
  geom_point()+
  scale_x_continuous(limits = c(325, 500))+
  ggtitle("scale_x_continuous(limits = c(325, 500))")

p2  <- mtcars %>% 
  ggplot(aes(disp, wt))+
  geom_point()+
  coord_cartesian(xlim = c(325, 500))+
  ggtitle("coord_cartesian(xlim = c(325, 500))")


p1 + p2

Warning message:
“Removed 24 rows containing missing values or values outside the scale range (`geom_point()`).”

../_images/b7b6388a6d449df535a885b31bb125f15dd854c72e6afb4eaaa9cb53f9a6cf82.png

实际上这两个操作，区别蛮大的