Skip to main content eteppo

A Simple Way to Visualize Missing Values in R

Published: 2023-08-03
Updated: 2023-08-03
#' Visualize missing value cells in dataframes.
#'
#' For plotting a table of a dataset where missingness 
#' is mapped to cell/tile color.
#' Remember to make a distinction between actual and structural 
#' missingness due to data-formatting and impossible values.
#' You can also explore the patterns in missingness visually 
#' with this function by sorting the dataset differently before plotting. 
#'
#' @param data A dataframe.
#' @param title A character of plot title.
#' @param tile_colors A character vector of two color hex values. The second one is for the missing values.
#' @param text_size An integer size for the variable names on the y-axis.
#' 
#' @return A ggplot2 object of the visualized table.
#' @export

plot_missing <- function(data,
                         title,
                         tile_colors = c("#f2f4fb", "#c30000"),
                         text_size = 9) {

    count_missing <- function(data) {
        data %>%
            as.matrix() %>%
            as.numeric() %>%
            is.na() %>%
            sum()
	}
    
    missing_count <- count_missing(data)

    plot_result <- data %>%
        dplyr::mutate(row = dplyr::row_number()) %>%
        tidyr::gather("variable", "value", -row) %>%
        dplyr::mutate(missing = dplyr::if_else(is.na(value), TRUE, FALSE)) %>%
        ggplot2::ggplot() +
            ggplot2::geom_tile(ggplot2::aes(
                x = row, 
                y = forcats::fct_relevel(variable, colnames(data)), 
                fill = missing
            )) +
            ggplot2::scale_fill_manual(values = tile_colors, guide = "none") +
            ggplot2::theme_void() +
            ggplot2::theme(
                axis.text.y = ggplot2::element_text(hjust = 1, size = text_size),
                plot.margin = ggplot2::margin(0, 0.5, 0, 0.5, "cm")
            ) +
            ggplot2::scale_x_discrete(position = "top") +
            ggplot2::coord_cartesian(clip = "off") +
            ggplot2::labs(
                title = title, 
                subtitle = str_c(missing_count, " missing values")
	    )
            
    return(plot_result)

}