Skip to content

Commit

Permalink
used sysinfo as optional dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
caglacelik committed Nov 18, 2024
1 parent 2feaf13 commit dbd5637
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 61 deletions.
6 changes: 4 additions & 2 deletions workflows/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ rand.workspace = true
log.workspace = true
eyre.workspace = true

# system info
sysinfo = "0.32.0"
sysinfo = { version = "0.32.0", optional = true }

[features]
profiling = ["sysinfo"]

[dev-dependencies]
# only used for tests
Expand Down
167 changes: 108 additions & 59 deletions workflows/src/bin/tps.rs
Original file line number Diff line number Diff line change
@@ -1,69 +1,116 @@
use std::vec;

use dkn_workflows::{DriaWorkflowsConfig, OllamaConfig};
use ollama_workflows::ollama_rs::{
generation::{completion::request::GenerationRequest, options::GenerationOptions},
Ollama,
};
use sysinfo::{CpuRefreshKind, RefreshKind, System};
use ollama_workflows::Model;

#[cfg(feature = "profiling")]
use sysinfo::{CpuRefreshKind, RefreshKind, System, MINIMUM_CPU_UPDATE_INTERVAL};

#[tokio::main]
async fn main() {
// initialize logger
env_logger::init();
#[cfg(feature = "profiling")]
{
// initialize logger
env_logger::init();

let cfg = DriaWorkflowsConfig::new_from_csv("finalend/hermes-3-llama-3.1:8b-q8_0,phi3:14b-medium-4k-instruct-q4_1,phi3:14b-medium-128k-instruct-q4_1,phi3.5:3.8b,phi3.5:3.8b-mini-instruct-fp16,gemma2:9b-instruct-q8_0,gemma2:9b-instruct-fp16,llama3.1:latest,llama3.1:8b-instruct-q8_0,llama3.1:8b-instruct-fp16,llama3.1:70b-instruct-q4_0,llama3.1:70b-instruct-q8_0,llama3.2:1b,llama3.2:3b,qwen2.5:7b-instruct-q5_0,qwen2.5:7b-instruct-fp16,qwen2.5:32b-instruct-fp16,qwen2.5-coder:1.5b,qwen2.5-coder:7b-instruct,llama3.2:3b,qwen2.5-coder:7b-instruct-q8_0,qwen2.5-coder:7b-instruct-fp16,deepseek-coder:6.7b,mixtral:8x7b");
let config = OllamaConfig::default();
let ollama = Ollama::new(config.host, config.port);
let models = vec![
Model::NousTheta,
Model::Phi3Medium,
Model::Phi3Medium128k,
Model::Phi3_5Mini,
Model::Phi3_5MiniFp16,
Model::Gemma2_9B,
Model::Gemma2_9BFp16,
Model::Llama3_1_8B,
Model::Llama3_1_8Bq8,
Model::Llama3_1_8Bf16,
Model::Llama3_1_8BTextQ4KM,
Model::Llama3_1_8BTextQ8,
Model::Llama3_1_70B,
Model::Llama3_1_70Bq8,
Model::Llama3_1_70BTextQ4KM,
Model::Llama3_2_1B,
Model::Llama3_2_3B,
Model::Llama3_2_1BTextQ4KM,
Model::Qwen2_5_7B,
Model::Qwen2_5_7Bf16,
Model::Qwen2_5_32Bf16,
Model::Qwen2_5Coder1_5B,
Model::Qwen2_5coder7B,
Model::Qwen2_5oder7Bq8,
Model::Qwen2_5coder7Bf16,
Model::DeepSeekCoder6_7B,
Model::Mixtral8_7b,
Model::GPT4Turbo,
Model::GPT4o,
Model::GPT4oMini,
Model::O1Preview,
Model::O1Mini,
Model::Gemini15ProExp0827,
Model::Gemini15Pro,
Model::Gemini15Flash,
Model::Gemini10Pro,
Model::Gemma2_2bIt,
Model::Gemma2_27bIt,
];

log::info!("Starting...");
// ensure that all lists of CPUs and processes are filled
let mut system = System::new_all();
// update all information of the system
system.refresh_all();
let cfg = DriaWorkflowsConfig::new(models);
let config = OllamaConfig::default();
let ollama = Ollama::new(config.host, config.port);
log::info!("Starting...");
// ensure that all lists of CPUs and processes are filled
let mut system = System::new_all();
// update all information of the system
system.refresh_all();

log::debug!("Getting system information...");
let brand = system.cpus()[0].brand().to_string();
let os_name = System::name().unwrap_or_else(|| "Unknown".to_string());
let os_version = System::long_os_version().unwrap_or_else(|| "Unknown".to_string());
let cpu_usage = system.global_cpu_usage();
let total_memory = system.total_memory();
let used_memory = system.used_memory();
log::debug!("Getting system information...");
let brand = system.cpus()[0].brand().to_string();
let os_name = System::name().unwrap_or_else(|| "Unknown".to_string());
let os_version = System::long_os_version().unwrap_or_else(|| "Unknown".to_string());
let cpu_usage = system.global_cpu_usage();
let total_memory = system.total_memory();
let used_memory = system.used_memory();

for (_, model) in cfg.models {
log::info!("Pulling model: {}", model);
for (_, model) in cfg.models {
log::info!("Pulling model: {}", model);

// pull model
match ollama.pull_model(model.to_string(), false).await {
Ok(status) => log::info!("Status: {}", status.message),
Err(err) => {
log::error!("Failed to pull model {}: {:?}", model, err);
// pull model
match ollama.pull_model(model.to_string(), false).await {
Ok(status) => log::info!("Status: {}", status.message),
Err(err) => {
log::error!("Failed to pull model {}: {:?}", model, err);
}
}
}

log::debug!("Creating request...");
// create dummy request
let mut generation_request =
GenerationRequest::new(model.to_string(), "compute 6780 * 1200".to_string());
log::debug!("Creating request...");
// create dummy request
let mut generation_request =
GenerationRequest::new(model.to_string(), "compute 6780 * 1200".to_string());

if let Ok(num_thread) = std::env::var("OLLAMA_NUM_THREAD") {
generation_request = generation_request.options(
GenerationOptions::default().num_thread(
num_thread
.parse()
.expect("num threads should be a positive integer"),
),
);
}
if let Ok(num_thread) = std::env::var("OLLAMA_NUM_THREAD") {
generation_request = generation_request.options(
GenerationOptions::default().num_thread(
num_thread
.parse()
.expect("num threads should be a positive integer"),
),
);
}

// generate response
match ollama.generate(generation_request).await {
Ok(response) => {
log::debug!("Got response for model {}", model);
// compute TPS
let tps = (response.eval_count.unwrap_or_default() as f64)
/ (response.eval_duration.unwrap_or(1) as f64)
* 1_000_000_000f64;
// report machine info
log::info!(
// generate response
match ollama.generate(generation_request).await {
Ok(response) => {
log::debug!("Got response for model {}", model);
// compute TPS
let tps = (response.eval_count.unwrap_or_default() as f64)
/ (response.eval_duration.unwrap_or(1) as f64)
* 1_000_000_000f64;
// report machine info
log::info!(
"\n Model: {} \n TPS: {} \n OS: {} {} \n Version: {} \n CPU Usage: % {} \n Total Memory: {} KB \n Used Memory: {} KB ",
model,
tps,
Expand All @@ -74,18 +121,20 @@ async fn main() {
total_memory,
used_memory,
);
}
Err(e) => {
log::warn!("Ignoring model {}: Workflow failed with error {}", model, e);
}
}
Err(e) => {
log::warn!("Ignoring model {}: Workflow failed with error {}", model, e);
}
// refresh CPU usage (https://docs.rs/sysinfo/latest/sysinfo/struct.Cpu.html#method.cpu_usage)
system = System::new_with_specifics(
RefreshKind::new().with_cpu(CpuRefreshKind::everything()),
);
// wait a bit because CPU usage is based on diff
std::thread::sleep(MINIMUM_CPU_UPDATE_INTERVAL);
// refresh CPUs again to get actual value
system.refresh_cpu_usage();
}
// refresh CPU usage (https://docs.rs/sysinfo/latest/sysinfo/struct.Cpu.html#method.cpu_usage)
system =
System::new_with_specifics(RefreshKind::new().with_cpu(CpuRefreshKind::everything()));
// wait a bit because CPU usage is based on diff
std::thread::sleep(sysinfo::MINIMUM_CPU_UPDATE_INTERVAL);
// refresh CPUs again to get actual value
system.refresh_cpu_usage();
}
log::info!("Finished");
}

0 comments on commit dbd5637

Please sign in to comment.