Skip to main content eteppo

Venn Diagrams for Tidy Data in R

Published: 2023-08-04
Updated: 2023-08-04
# Data is tidy in that each set is a binary variable.
# Each unit has an ID and TRUE or FALSE on each set.
# In Venn Diagrams the ID can belong to any combination of sets.
# ggVennDiagram wants the sets as lists so we need to work around that.
plot_venn_diagram <- function(data, 
                              title, 
                              unit_variable, 
                              set_variable_names,
                              category_names = set_variable_names,
                              category_name_size = 4,
                              margin_gap = 2) {

  plot <- data %>%
    listify_set_data(
      unit_variable = {{ unit_variable }}, 
      set_variable_names = set_variable_names
    ) %>%
    ggVennDiagram::ggVennDiagram(
      set_size = category_name_size,
      label_color = "black", 
      label_alpha = 0.5,
      label_size = 4,
      edge_size = 0.1,
      category.names = category_names
    ) +
    # Improve the default theme a little bit.
    coord_sf(clip = "off") +
    scale_fill_gradient(
      low = "white", 
      high = "black", 
      guide = "none",
      limit = c(0, NA)
    ) +
    labs(title = title) +
    theme(
      # 'color' refers to borders.
      panel.background = element_rect(fill = "white", color = "white"),
      plot.background = element_rect(fill = "white", color = "white"),
      text = element_text(lineheight = 0.25),
      plot.title = element_text(size = 15),
      plot.margin = grid::unit(rep(margin_gap, times = 4), "cm")
    )

  return(plot)

}

# Package we use (ggVennDiagram) wants list-formatted set data
# whereas our data has a binary indicator for each set.
listify_set_data <- function(data, unit_variable, set_variable_names) {

  list_data <- data %>%
    select(
      unit_variable = {{ unit_variable }}, 
      all_of(set_variable_names)
    ) %>%
    mutate(across(
      all_of(set_variable_names),
      function(x) if_else(x == TRUE, unit_variable, NA_character_)
    )) %>%
    select(-unit_variable) %>%
    as.list() %>%
    map(remove_na)

  return(list_data)

}