Tidy Uniform Manifold Approximation And Projection in R
learn_umaps <- function(data,
id_var_names,
n_neighbors = c(10, 15),
n_components = 2) {
missing_count <- data %>%
select(-any_of(id_var_names)) %>%
as.matrix() %>%
as.numeric() %>%
is.na() %>%
sum()
assert_that(missing_count < 1)
# umap hyperparameters (umap::umap.defaults)
# n_neighbors: n_neighbors
# n_components: n_components (defaults to 2)
# metric: euclidean
# n_epochs: 200
# input: data
# init: spectral
# min_dist: 0.1
# set_op_mix_ratio: 1
# local_connectivity: 1
# bandwidth: 1
# alpha: 1
# gamma: 1
# negative_sample_rate: 5
# a: NA
# b: NA
# spread: 1
# random_state: NA
# transform_state: NA
# knn_repeats: 1
# verbose: FALSE
# umap_learn_args: NA
fit_umap <- function(data, id_var_names, n_neighbors) {
custom_settings <- umap::umap.defaults
custom_settings$n_neighbors <- n_neighbors
custom_settings$n_components <- n_components
umap_object <- data %>%
select(-any_of(id_var_names)) %>%
umap::umap(config = custom_settings, method = "naive")
return(umap_object)
}
# Extract results from the non-tidy objects.
tidy_umap <- function(umap_object, data, id_var_names) {
column_names <- str_c("umap", 1:ncol(umap_object$layout), sep = "")
umap_features <- umap_object$layout %>%
tibble::as_tibble(.name_repair = ~ column_names)
umap_obs <- data %>%
select(any_of(id_var_names)) %>%
bind_cols(umap_features) %>%
mutate(n_neighbors = umap_object$config$n_neighbors)
return(umap_obs)
}
# Everything is mapped over multiple neighbor values.
# You might want to compare small (5) and larger (30) neighborhoods
# because the meaning of this hyperparameter is not very obvious.
umap_output <- n_neighbors %>%
map(fit_umap, data = data, id_var_names = id_var_names) %>%
map_dfr(tidy_umap, data = data, id_var_names = id_var_names)
return(umap_output)
}