A Simple Way to Visualize Missing Values in R
#' Visualize missing value cells in dataframes.
#'
#' For plotting a table of a dataset where missingness
#' is mapped to cell/tile color.
#' Remember to make a distinction between actual and structural
#' missingness due to data-formatting and impossible values.
#' You can also explore the patterns in missingness visually
#' with this function by sorting the dataset differently before plotting.
#'
#' @param data A dataframe.
#' @param title A character of plot title.
#' @param tile_colors A character vector of two color hex values. The second one is for the missing values.
#' @param text_size An integer size for the variable names on the y-axis.
#'
#' @return A ggplot2 object of the visualized table.
#' @export
plot_missing <- function(data,
title,
tile_colors = c("#f2f4fb", "#c30000"),
text_size = 9) {
count_missing <- function(data) {
data %>%
as.matrix() %>%
as.numeric() %>%
is.na() %>%
sum()
}
missing_count <- count_missing(data)
plot_result <- data %>%
dplyr::mutate(row = dplyr::row_number()) %>%
tidyr::gather("variable", "value", -row) %>%
dplyr::mutate(missing = dplyr::if_else(is.na(value), TRUE, FALSE)) %>%
ggplot2::ggplot() +
ggplot2::geom_tile(ggplot2::aes(
x = row,
y = forcats::fct_relevel(variable, colnames(data)),
fill = missing
)) +
ggplot2::scale_fill_manual(values = tile_colors, guide = "none") +
ggplot2::theme_void() +
ggplot2::theme(
axis.text.y = ggplot2::element_text(hjust = 1, size = text_size),
plot.margin = ggplot2::margin(0, 0.5, 0, 0.5, "cm")
) +
ggplot2::scale_x_discrete(position = "top") +
ggplot2::coord_cartesian(clip = "off") +
ggplot2::labs(
title = title,
subtitle = str_c(missing_count, " missing values")
)
return(plot_result)
}