R example¶
Setup a python environment¶
If you already have one just skip this!
Using VENV¶
If you don’t have Anaconda installed (or know what that is) go with the below option:
virtualenv_create(
envname = "ml",
python = NULL,
packages = "scivae",
system_site_packages = getOption("reticulate.virtualenv.system_site_packages",
default = FALSE)
)
You only need to do the above once. This creates a new environment for you and then you can install as below:
use_virtualenv("ml", required = TRUE)
Using Conda¶
Create a new conda environment called ml and install scivae into it.
use_condaenv(condaEnvName, required = TRUE)
library(tidyverse)
library(dplyr)
library(reticulate)
# If things fail here it's because you need to the steps above
use_condaenv("ml", required = TRUE) # OR use_virtualenv("ml", required = TRUE) # depending on how you installed it!
scivae <<- import("scivae") # Make global
df <- read_csv("iris.csv")
labels <- df$label # Keep for later
# Now we want the dataset not to have the gene ID column (i.e. just to be the numeric values)
df_mat <- df %>% select(!(label))
df_mat <- as.matrix(df_mat)
vae <- scivae$VAE(df_mat, df_mat, labels, "config.json", 'vae_rcm', config_as_str=T)
vae$encode('default')
vae$save()
# Load saved data
vae$load()
# Now let's run the VAE on the data
data <- vae$encode_new_data(df_mat, encoding_type="z", scale=T)
# Add in the columns to the old DF
df$VAE0 <- data[, 1]
df$VAE1 <- data[, 2]
df$VAE2 <- data[, 3]
vis <- scivae$Vis(vae, vae$u, NULL)
cols <- c("sepal_length", "sepal_width")
vis$plot_feature_scatters(df, 'label', columns=cols, show_plt=F, fig_type="png", save_fig=T, vae_data=data,
title="cX DepthshadeTrue latent space")
vis$plot_node_hists(show_plt=F, save_fig=T)
vis$plot_node_hists(show_plt=F, save_fig=T, method="z_mean")
vis$plot_node_hists(show_plt=F, save_fig=T, method="z_log_var")
vis$plot_node_feature_correlation(df, 'label', columns=cols, show_plt=F, save_fig=T, vae_data=data)
vis$plot_node_correlation(show_plt=F, save_fig=T)
vis$plot_input_distribution(df, show_plt=F, save_fig=T)
vis$plot_top_values_by_rank(df, c("VAE0", "VAE1", "VAE2"), cols, "label", num_values=as.integer(10), cluster_rows=F)