diff --git a/R/auto_device.R b/R/auto_device.R index 0220a34..e33bcf0 100644 --- a/R/auto_device.R +++ b/R/auto_device.R @@ -1,19 +1,17 @@ #' @noRd # Automatically detect appropriate device -# Updated 29.01.2024 +# Updated 06.02.2024 auto_device <- function(device, transformer) { # Set transformer memory (MB) - # Numbers derived from overall memory usage on Alex's 1x A6000 - # Single run of `rag` with each model using 3800 tweets - transformer_memory <- round( - switch( - transformer, - "tinyllama" = 5504, "llama-2" = 5964, - "mistral-7b" = 30018, "openchat-3.5" = 29238, - "orca-2" = 29836, "phi-2" = 13594 - ), digits = -2 + # Assume GPU and 16-bit unless otherwise noted + transformer_memory <- switch( + transformer, + "tinyllama" = 2640, "llama-2" = 4200, # supported by 4-bit + "mistral-7b" = 16800, "openchat-3.5" = 16800, + "orca-2" = 33600, # supported by 32-bit + "phi-2" = 6480 ) # First, check for "auto" diff --git a/R/rag.R b/R/rag.R index 8e8c0a4..ae8497e 100644 --- a/R/rag.R +++ b/R/rag.R @@ -383,12 +383,6 @@ get_embedding <- function(index, output) # Loop across documents embedding <- do.call(cbind, lapply(output$content$document, index$vector_store$get)) - - - - - - # Initialize data frame content_df <- matrix( data = NA, nrow = n_documents, ncol = 3, @@ -397,7 +391,6 @@ get_embedding <- function(index, output) ) ) - # Loop over content for(i in seq_len(n_documents)){ @@ -421,16 +414,77 @@ get_embedding <- function(index, output) #' @noRd # LLAMA-2 ---- -# Updated 28.01.2023 +# Updated 06.02.2024 setup_llama2 <- function(llama_index, prompt, device) { - # Return model - return( - llama_index$ServiceContext$from_defaults( + # Check for device + if(grepl("cuda", device)){ + + # Try to setup GPU modules + output <- try(setup_gpu_modules(), silent = TRUE) + + # If error, then switch to "cpu" + if(is(device, "try-error")){ + device <- "cpu" + } + + } + + # If GPU possible, try different models + if(grepl("cuda", device)){ + + # Order of models to try + MODEL <- c("GPTQ", "AWQ") + + # Loop over and try + for(model in MODEL){ + + # Set up model + model_name <- paste0("TheBloke/Llama-2-7B-Chat-", model) + + # Try to get and load model + load_model <- try( + llama_index$ServiceContext$from_defaults( + llm = llama_index$llms$HuggingFaceLLM( + model_name = model_name, + tokenizer_name = model_name, + query_wrapper_prompt = llama_index$PromptTemplate( + paste0( + "<|system|>\n", prompt, + "\n<|user|>\n{query_str}\n<|assistant|>\n" + ) + ), device_map = device, + generate_kwargs = list( + temperature = as.double(0.1), do_sample = TRUE + ) + ), context_window = 8192L, + embed_model = "local:BAAI/bge-small-en-v1.5" + ), silent = TRUE + ) + + # Check if load model failed + if(is(load_model, "try-error")){ + delete_transformer(gsub("/", "--", model_name), TRUE) + }else{ # Successful load, break out of loop + break + } + + } + + # If by the end, still failing, switch to CPU + if(is(load_model, "try-error")){ + device <- "cpu" + } + + } + + # Use CPU model + if(device == "cpu"){ + load_model <- llama_index$ServiceContext$from_defaults( llm = llama_index$llms$HuggingFaceLLM( - model_name = "TheBloke/Llama-2-7b-Chat-AWQ", - tokenizer_name = "TheBloke/Llama-2-7b-Chat-AWQ", + model_name = "TheBloke/Llama-2-7B-Chat-fp16", + tokenizer_name = "TheBloke/Llama-2-7B-Chat-fp16", query_wrapper_prompt = llama_index$PromptTemplate( paste0( "<|system|>\n", prompt, @@ -438,18 +492,21 @@ setup_llama2 <- function(llama_index, prompt, device) ) ), device_map = device, generate_kwargs = list( - "temperature" = as.double(0.1), do_sample = TRUE + temperature = as.double(0.1), do_sample = TRUE ) ), context_window = 8192L, embed_model = "local:BAAI/bge-small-en-v1.5" ) - ) + } + + # Return model + return(load_model) } #' @noRd # Mistral-7B ---- -# Updated 28.01.2023 +# Updated 28.01.2024 setup_mistral <- function(llama_index, prompt, device) { @@ -461,7 +518,7 @@ setup_mistral <- function(llama_index, prompt, device) tokenizer_name = "mistralai/Mistral-7B-v0.1", device_map = device, generate_kwargs = list( - "temperature" = as.double(0.1), do_sample = TRUE, + temperature = as.double(0.1), do_sample = TRUE, pad_token_id = 2L, eos_token_id = 2L ) ), context_window = 8192L, @@ -473,7 +530,7 @@ setup_mistral <- function(llama_index, prompt, device) #' @noRd # OpenChat-3.5 ---- -# Updated 28.01.2023 +# Updated 28.01.2024 setup_openchat <- function(llama_index, prompt, device) { @@ -485,7 +542,7 @@ setup_openchat <- function(llama_index, prompt, device) tokenizer_name = "openchat/openchat_3.5", device_map = device, generate_kwargs = list( - "temperature" = as.double(0.1), do_sample = TRUE + temperature = as.double(0.1), do_sample = TRUE ) ), context_window = 8192L, embed_model = "local:BAAI/bge-small-en-v1.5" @@ -496,7 +553,7 @@ setup_openchat <- function(llama_index, prompt, device) #' @noRd # Orca-2 ---- -# Updated 28.01.2023 +# Updated 28.01.2024 setup_orca2 <- function(llama_index, prompt, device) { @@ -508,7 +565,7 @@ setup_orca2 <- function(llama_index, prompt, device) tokenizer_name = "microsoft/Orca-2-7b", device_map = device, generate_kwargs = list( - "temperature" = as.double(0.1), do_sample = TRUE + temperature = as.double(0.1), do_sample = TRUE ) ), context_window = 4096L, embed_model = "local:BAAI/bge-small-en-v1.5" @@ -519,7 +576,7 @@ setup_orca2 <- function(llama_index, prompt, device) #' @noRd # Phi-2 ---- -# Updated 28.01.2023 +# Updated 28.01.2024 setup_phi2 <- function(llama_index, prompt, device) { @@ -531,7 +588,7 @@ setup_phi2 <- function(llama_index, prompt, device) tokenizer_name = "microsoft/phi-2", device_map = device, generate_kwargs = list( - "temperature" = as.double(0.1), do_sample = TRUE, + temperature = as.double(0.1), do_sample = TRUE, pad_token_id = 2L, eos_token_id = 2L ) ), context_window = 2048L, @@ -543,7 +600,7 @@ setup_phi2 <- function(llama_index, prompt, device) #' @noRd # TinyLLAMA ---- -# Updated 28.01.2023 +# Updated 28.01.2024 setup_tinyllama <- function(llama_index, prompt, device) { diff --git a/R/setup_gpu_modules.R b/R/setup_gpu_modules.R index 470280e..0f54264 100644 --- a/R/setup_gpu_modules.R +++ b/R/setup_gpu_modules.R @@ -9,15 +9,20 @@ #' @export #' # Install GPU modules -# Updated 03.02.2024 +# Updated 06.02.2024 setup_gpu_modules <- function() { # Set necessary modules modules <- c( - "autoawq" + "autoawq", "auto-gptq", "optimum" ) + # Check for Linux + if(system.check()$OS == "linux"){ + modules <- c(modules, "llama-cpp-python") + } + # Determine whether any modules need to be installed installed_modules <- reticulate::py_list_packages(envname = "transforEmotion") diff --git a/R/setup_miniconda.R b/R/setup_miniconda.R index 837debd..36d436c 100644 --- a/R/setup_miniconda.R +++ b/R/setup_miniconda.R @@ -22,22 +22,22 @@ conda_check <- function(){ #' #' @author Alexander P. Christensen #' Aleksandar Tomašević -#' +#' #' @export #' # Install miniconda # Updated 15.11.2023 setup_miniconda <- function() { - + # Install miniconda path_to_miniconda <- try( install_miniconda(), silent = TRUE ) - - if(any(class(path_to_miniconda) != "try-error")){ - message("\nTo uninstall miniconda, use `reticulate::miniconda_uninstall()`") + + if(any(class(path_to_miniconda) != "try-error")){ + message("\nTo uninstall miniconda, use `reticulate::miniconda_uninstall()`") } # Create transformEmotion enviroment if it doesn't exist @@ -52,7 +52,7 @@ setup_miniconda <- function() # Activate the environment reticulate::use_condaenv("transforEmotion", required = TRUE) - + print("Installing missing Python libraries...") setup_modules() }