# install.packages("gtsummary")gtsummary
Using gtsummary package to create publication ready tables
What is gtsummary?
R tool that creates publication ready tables
Summarizes data sets, regression models and other analysis
Highly customizable

Install gtsummary package
Data Source: gapminder
# install.packages("gapminder")Load in libraries
library(gtsummary)
library(gapminder)
library(tidyverse)About gapminder
Code
head(gapminder)# A tibble: 6 × 6
country continent year lifeExp pop gdpPercap
<fct> <fct> <int> <dbl> <int> <dbl>
1 Afghanistan Asia 1952 28.8 8425333 779.
2 Afghanistan Asia 1957 30.3 9240934 821.
3 Afghanistan Asia 1962 32.0 10267083 853.
4 Afghanistan Asia 1967 34.0 11537966 836.
5 Afghanistan Asia 1972 36.1 13079460 740.
6 Afghanistan Asia 1977 38.4 14880372 786.
Code
str(gapminder)tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
$ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
$ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
$ year : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
$ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
$ pop : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
$ gdpPercap: num [1:1704] 779 821 853 836 740 ...
Create basic gtsummary table
table1 <-
gapminder %>%
# Year we are interested in
filter(year == 2007) %>%
# Columns we want to look at
select( gdpPercap, lifeExp, pop ) %>%
# function to create the summary table
tbl_summary()
table1| Characteristic | N = 1421 |
|---|---|
| gdpPercap | 6,124 (1,625, 18,009) |
| lifeExp | 72 (57, 76) |
| pop | 10,517,531 (4,508,034, 31,210,042) |
| 1 Median (IQR) | |
Customization option: by()
by() : allows users to select which variable they would like to use for group comparison.
- In the example below, we chose continents, but another option could have been countries.
table1 <-
gapminder %>%
filter(year == 2007) %>%
select( gdpPercap, lifeExp, pop ,continent ) %>%
tbl_summary(
# group by continent
by = continent
)
table1| Characteristic | Africa, N = 521 | Americas, N = 251 | Asia, N = 331 | Europe, N = 301 | Oceania, N = 21 |
|---|---|---|---|---|---|
| gdpPercap | 1,452 (863, 3,994) | 8,948 (5,728, 11,978) | 4,471 (2,452, 22,316) | 28,054 (14,812, 33,818) | 29,810 (27,498, 32,123) |
| lifeExp | 53 (48, 59) | 73 (72, 76) | 72 (65, 76) | 79 (75, 80) | 81 (80, 81) |
| pop | 10,093,311 (2,909,227, 19,363,655) | 9,319,622 (5,675,356, 28,674,757) | 24,821,286 (6,426,679, 69,453,570) | 9,493,598 (4,780,560, 20,849,695) | 12,274,974 (8,195,372, 16,354,575) |
| 1 Median (IQR) | |||||
Customization option: label(), modify_header(), bold_labels()
- label(): Allows you to rename the variables from their default names within dataset
- modify_header(): Change the default header label within your chart
- in the example below we changed it to the word “variables” bolded
- bold_labels(): Makes the rows and column names bold
table1 <-
gapminder %>%
filter(year == 2007) %>%
select( gdpPercap, lifeExp, pop ,continent ) %>%
tbl_summary(
by = continent,
# Changing the name of our rows
label = list(
gdpPercap ~ "GDP per capita",
lifeExp ~ "Life expectancy",
pop ~ "Population"
)
) %>%
# Change header name
modify_header(label = "**Variables**") %>%
# Bold row and column names
bold_labels()
table1| Variables | Africa, N = 521 | Americas, N = 251 | Asia, N = 331 | Europe, N = 301 | Oceania, N = 21 |
|---|---|---|---|---|---|
| GDP per capita | 1,452 (863, 3,994) | 8,948 (5,728, 11,978) | 4,471 (2,452, 22,316) | 28,054 (14,812, 33,818) | 29,810 (27,498, 32,123) |
| Life expectancy | 53 (48, 59) | 73 (72, 76) | 72 (65, 76) | 79 (75, 80) | 81 (80, 81) |
| Population | 10,093,311 (2,909,227, 19,363,655) | 9,319,622 (5,675,356, 28,674,757) | 24,821,286 (6,426,679, 69,453,570) | 9,493,598 (4,780,560, 20,849,695) | 12,274,974 (8,195,372, 16,354,575) |
| 1 Median (IQR) | |||||
Customization change default statistics: statistics (), add_p()
- statistics(): customize to calculate specified summary statistic; default option for continuous variables is median{IQR}
- statistics options continuous variables:
- median, mean, sd, var, min, max, sum, p##: any integer percentage, foo: any function of the form foo
- statistics options categorical variables:
- n: frequency, N: denominator or sample size, p: percentage
- statistics options continuous variables:
- add_p(): Add in p values as a new column; default adds p-value as last column in table
table1 <-
gapminder %>%
filter(year == 2007) %>%
select( gdpPercap, lifeExp, pop ,continent ) %>%
tbl_summary(
by = continent,
label = list(
gdpPercap ~ "GDP per capita",
lifeExp ~ "Life expectancy",
pop ~ "Population"
) ,
# Calculate mean for gdpPercap variable
statistic = list(gdpPercap ~ "{mean}")
) %>%
modify_header(label = "**Variables**") %>%
add_p()
table1| Variables | Africa, N = 521 | Americas, N = 251 | Asia, N = 331 | Europe, N = 301 | Oceania, N = 21 | p-value2 |
|---|---|---|---|---|---|---|
| GDP per capita | 3,089 | 11,003 | 12,473 | 25,054 | 29,810 | <0.001 |
| Life expectancy | 53 (48, 59) | 73 (72, 76) | 72 (65, 76) | 79 (75, 80) | 81 (80, 81) | <0.001 |
| Population | 10,093,311 (2,909,227, 19,363,655) | 9,319,622 (5,675,356, 28,674,757) | 24,821,286 (6,426,679, 69,453,570) | 9,493,598 (4,780,560, 20,849,695) | 12,274,974 (8,195,372, 16,354,575) | 0.044 |
| 1 Mean; Median (IQR) | ||||||
| 2 Kruskal-Wallis rank sum test | ||||||
Customization to add a column with overall statistics: add_overall()
- add_overall(): Gives an additional column with the summary of all observations without grouping established with by() function
table1 <-
gapminder %>%
filter(year == 2007) %>%
select( gdpPercap, lifeExp, pop ,continent ) %>%
tbl_summary(
by = continent,
label = list (
gdpPercap ~ "GDP per capita",
lifeExp ~ "Life expectancy",
pop ~ "Population"
) ,
statistic = list( gdpPercap ~ "{mean}")
) %>%
modify_header(label = "**Variables**") %>%
add_overall(last = TRUE) %>%
add_p()
table1| Variables | Africa, N = 521 | Americas, N = 251 | Asia, N = 331 | Europe, N = 301 | Oceania, N = 21 | Overall, N = 1421 | p-value2 |
|---|---|---|---|---|---|---|---|
| GDP per capita | 3,089 | 11,003 | 12,473 | 25,054 | 29,810 | 11,680 | <0.001 |
| Life expectancy | 53 (48, 59) | 73 (72, 76) | 72 (65, 76) | 79 (75, 80) | 81 (80, 81) | 72 (57, 76) | <0.001 |
| Population | 10,093,311 (2,909,227, 19,363,655) | 9,319,622 (5,675,356, 28,674,757) | 24,821,286 (6,426,679, 69,453,570) | 9,493,598 (4,780,560, 20,849,695) | 12,274,974 (8,195,372, 16,354,575) | 10,517,531 (4,508,034, 31,210,042) | 0.044 |
| 1 Mean; Median (IQR) | |||||||
| 2 Kruskal-Wallis rank sum test | |||||||
Using gtsummary for regression models
- First a linear regression model needs to be created
- the standard R summary output (shown below) is not publication ready, but the gtsummary package makes it looked more polished
mod1 <- lm(lifeExp ~ year + continent + continent*year, data = gapminder)
summary(mod1)
Call:
lm(formula = lifeExp ~ year + continent + continent * year, data = gapminder)
Residuals:
Min 1Q Median 3Q Max
-28.8854 -4.2696 0.3298 3.9835 21.1306
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -524.25785 32.96343 -15.904 < 2e-16 ***
year 0.28953 0.01665 17.387 < 2e-16 ***
continentAmericas -138.84845 57.85058 -2.400 0.01650 *
continentAsia -312.63305 52.90355 -5.909 4.14e-09 ***
continentEurope 156.84685 54.49776 2.878 0.00405 **
continentOceania 182.34988 171.28299 1.065 0.28720
year:continentAmericas 0.07812 0.02922 2.673 0.00758 **
year:continentAsia 0.16359 0.02672 6.121 1.15e-09 ***
year:continentEurope -0.06760 0.02753 -2.455 0.01417 *
year:continentOceania -0.07926 0.08653 -0.916 0.35980
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7.18 on 1694 degrees of freedom
Multiple R-squared: 0.6927, Adjusted R-squared: 0.6911
F-statistic: 424.3 on 9 and 1694 DF, p-value: < 2.2e-16
tbl_regression() creates a polished summary table for the linear regression model
- modify_caption(): adds a title to the table
- bold_p(): makes all significant p-values bold
table2 <- tbl_regression(mod1) %>%
bold_labels() %>%
bold_p() %>%
# includes caption on top of table
modify_caption("Table 2: Regression results for Life Expectancy")
table2| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| year | 0.29 | 0.26, 0.32 | <0.001 |
| continent | |||
| Africa | — | — | |
| Americas | -139 | -252, -25 | 0.016 |
| Asia | -313 | -416, -209 | <0.001 |
| Europe | 157 | 50, 264 | 0.004 |
| Oceania | 182 | -154, 518 | 0.3 |
| year * continent | |||
| year * Americas | 0.08 | 0.02, 0.14 | 0.008 |
| year * Asia | 0.16 | 0.11, 0.22 | <0.001 |
| year * Europe | -0.07 | -0.12, -0.01 | 0.014 |
| year * Oceania | -0.08 | -0.25, 0.09 | 0.4 |
| 1 CI = Confidence Interval | |||
Exporting GT Summary tables to Word Documents
- gtsummary is now compatible with Word Documents.
- These are the steps to follow in order to upload your table to a word document
tbl %>%
as_gt() %>%
# If you are using word, use extension .docx
# alternative options: .html, .png, .pdg, .tex, .rtf
gt::gtsave(filename = ".") References
Sjoberg, Daniel D., Joseph Larmarange, Michael Curry, Jessica Lavery, Karissa Whiting, Emily C. Zabor, Xing Bai, et al. 2023. “Gtsummary: Presentation-Ready Data Summary and Analytic Result Tables.” https://cran.r-project.org/web/packages/gtsummary/index.html.