Skip to main content eteppo

Pretty Numbers in R When Simple Rounding Is Not Enough

Published: 2023-08-04
Updated: 2023-08-04

Making all sizes of numbers presentable in tables in surprisingly difficult in R. I ended up writing my own function for this. The key thing is to conditionally use the scientific format for very small and large numbers.

# Cut-offs define an interval where the number won't be
# converted to the scientific format like 2*10^5.
clean_number <- function(x, digits = 2, cutoffs = c(0.001, 1000)) {

  assert_that(is.numeric(digits))
  assert_that(length(digits) == 1)
  assert_that(length(cutoffs) == 2)
  assert_that(is.numeric(cutoffs))

  if (length(x) == 0) { 
    # Obvious in the result even if silent in the code.
    return("EMPTY")
  }
  
  if (is.character(x)) {
    x <- readr::parse_double(x)
    if (all(is.na(x))) {
      return(rep("MISSING AFTER PARSING", times = length(x)))
    }
  }

  not_in_interval <- function(x, cutoffs) {
    # Note: x is a vector of any length.
    positive_cutoffs <- cutoffs
    negative_cutoffs <- -1 * cutoffs
    output <- c()
    for (number in x) {
      if (is.na(number)) {
        output <- c(output, NA)
      } else if (number > 0) {
        value <- (number < positive_cutoffs[1] | number > positive_cutoffs[2])
        output <- c(output, value)
      } else if (number < 0) {
        value <- (number > negative_cutoffs[1] | number < negative_cutoffs[2])
        output <- c(output, value)
      } else if (number == 0L){
        output <- c(output, FALSE)
      }
    }
    return(output)
  }

  format_number <- function(x, digits, scientific) {
    x <- signif(x, digits = digits)
    x <- format(
      x = x, 
      scientific = scientific,
      justify = "none", 
      drop0trailing = TRUE, 
      big.mark = ".",
      decimal.mark = ","
    )
    x <- str_trim(x, side = "both")
    return(x)
  }

  clean_numbers <- if_else(
    condition = not_in_interval(x, cutoffs = cutoffs),
    true = format_number(x, scientific = TRUE, digits = digits), 
    false = format_number(x, scientific = FALSE, digits = digits),
    missing = "-"
  )

  assert_that(length(x) == length(clean_numbers))

  return(clean_numbers)

}