Skip to content

Commit

Permalink
Merge pull request #104 from firstbatchxyz/erhant/autonat-identify-fixes
Browse files Browse the repository at this point in the history
use observed_addr, rfk cancellations, smol fixes
  • Loading branch information
erhant authored Aug 27, 2024
2 parents 7fe02d2 + 85e4290 commit 87c1ddb
Show file tree
Hide file tree
Showing 17 changed files with 598 additions and 438 deletions.
242 changes: 12 additions & 230 deletions Cargo.lock

Large diffs are not rendered by default.

13 changes: 8 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "dkn-compute"
version = "0.1.5"
version = "0.1.6"
edition = "2021"
license = "Apache-2.0"
readme = "README.md"
Expand All @@ -10,6 +10,9 @@ readme = "README.md"
inherits = "release"
debug = true

[features]
profiling = []

[dependencies]
tokio-util = { version = "0.7.10", features = ["rt"] }
tokio = { version = "1", features = ["macros", "rt-multi-thread", "signal"] }
Expand All @@ -28,6 +31,7 @@ url = "2.5.0"
urlencoding = "2.1.3"
uuid = { version = "1.8.0", features = ["v4"] }
rand = "0.8.5"
semver = "1.0.23"

# logging
env_logger = "0.11.3"
Expand All @@ -41,10 +45,11 @@ sha3 = "0.10.8"
fastbloom-rs = "0.5.9"

# workflows
ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows", rev = "25467d2" }
ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows", rev = "d6b2e1e" }

# peer-to-peer
libp2p = { git = "https://github.com/anilaltuner/rust-libp2p.git", rev = "be2ed55", features = [
# libp2p = { version = "0.54.1", features = [
"dcutr",
"ping",
"relay",
Expand All @@ -60,11 +65,9 @@ libp2p = { git = "https://github.com/anilaltuner/rust-libp2p.git", rev = "be2ed5
"quic",
"kad",
] }

libp2p-identity = { version = "0.2.9", features = ["secp256k1", "ed25519"] }
libp2p-identity = { version = "0.2.9", features = ["secp256k1"] }
tracing = { version = "0.1.40" }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
public-ip = "0.2.2"


[dev-dependencies]
Expand Down
12 changes: 8 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,19 @@ debug:

.PHONY: trace # | Run with crate-level TRACE logging
trace:
RUST_LOG=none,dkn_compute=trace cargo run
RUST_LOG=none,dkn_compute=trace,libp2p=debug cargo run

.PHONY: build # | Build
build:
cargo build

.PHONY: profile # | Profile with flamegraph at dev level
profile:
cargo flamegraph --root --profile=profiling
.PHONY: profile-cpu # | Profile CPU usage with flamegraph
profile-cpu:
cargo flamegraph --root --profile=profiling --features=profiling

.PHONY: profile-mem # | Profile memory usage with instruments
profile-mem:
cargo instruments --profile=profiling --features=profiling -t Leaks

.PHONY: version # | Print version
version:
Expand Down
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@

## About

A **Dria Compute Node** is a unit of computation within the Dria Knowledge Network. It's purpose is to process tasks given by the **Dria Admin Node**, and receive rewards for providing correct results.

To get started, [setup](#setup) your envrionment and then see [usage](#usage) to run the node.
A **Dria Compute Node** is a unit of computation within the Dria Knowledge Network. It's purpose is to process tasks given by the **Dria Admin Node**. To get started, [setup](#setup) your envrionment and then see [usage](#usage) to run the node.

### Tasks

Expand Down Expand Up @@ -164,6 +162,8 @@ Based on the resources of your machine, you must decide which models that you wi
- `phi3:14b-medium-128k-instruct-q4_1`
- `phi3:3.8b`
- `llama3.1:latest`
- `phi3.5:3.8b`
- `phi3.5:3.8b-mini-instruct-fp16`

#### OpenAI Models

Expand Down Expand Up @@ -338,17 +338,21 @@ make format # rustfmt

### Profiling

To create a flamegraph of the application, do:
We would like to profile both CPU and Memory usage.

To create a [flamegraph](https://crates.io/crates/flamegraph) of the application, do:

```sh
make profile
make profile-cpu
```

This will create a profiling build that inherits `release` mode, except with debug information.

To profile memory usage, we make use of [cargo-instruments](https://crates.io/crates/cargo-instruments).

> [!NOTE]
>
> Profiling requires superuser access.
> CPU profiling may require super-user access.
## License

Expand Down
5 changes: 4 additions & 1 deletion compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@ services:
image: "firstbatch/dkn-compute-node:latest"
# build: "./" # use this one instead if you want to build locally
environment:
RUST_LOG: ${RUST_LOG:-none,dkn_compute=info}
# Dria
DKN_WALLET_SECRET_KEY: ${DKN_WALLET_SECRET_KEY}
DKN_ADMIN_PUBLIC_KEY: ${DKN_ADMIN_PUBLIC_KEY}
DKN_MODELS: ${DKN_MODELS}
RUST_LOG: ${RUST_LOG-none,dkn_compute=info}
DKN_P2P_LISTEN_ADDR: ${DKN_P2P_LISTEN_ADDR}
DKN_RELAY_NODES: ${DKN_RELAY_NODES}
DKN_BOOTSTRAP_NODES: ${DKN_BOOTSTRAP_NODES}
# Api Keys
OPENAI_API_KEY: ${OPENAI_API_KEY}
SERPER_API_KEY: ${SERPER_API_KEY}
JINA_API_KEY: ${JINA_API_KEY}
# Ollama
OLLAMA_HOST: ${OLLAMA_HOST}
OLLAMA_PORT: ${OLLAMA_PORT}
OLLAMA_AUTO_PULL: ${OLLAMA_AUTO_PULL:-true}
Expand Down
51 changes: 40 additions & 11 deletions src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use ollama::OllamaConfig;
use ollama_workflows::ModelProvider;
use openai::OpenAIConfig;

use std::env;
use std::{env, time::Duration};

#[derive(Debug, Clone)]
pub struct DriaComputeNodeConfig {
Expand Down Expand Up @@ -105,34 +105,63 @@ impl DriaComputeNodeConfig {
}
}

/// Check if the required compute services are running, e.g. if Ollama
/// is detected as a provider for the chosen models, it will check that
/// Ollama is running.
pub async fn check_services(&self) -> Result<(), String> {
/// Check if the required compute services are running.
/// This has several steps:
///
/// - If Ollama models are used, hardcoded models are checked locally, and for
/// external models, the workflow is tested with a simple task with timeout.
/// - If OpenAI models are used, the API key is checked and the models are tested
///
/// If both type of models are used, both services are checked.
/// In the end, bad models are filtered out and we simply check if we are left if any valid models at all.
/// If not, an error is returned.
pub async fn check_services(&mut self) -> Result<(), String> {
log::info!("Checking configured services.");

// TODO: can refactor (provider, model) logic here
let unique_providers = self.model_config.get_providers();

let mut good_models = Vec::new();

// if Ollama is a provider, check that it is running & Ollama models are pulled (or pull them)
if unique_providers.contains(&ModelProvider::Ollama) {
let ollama_models = self
.model_config
.get_models_for_provider(ModelProvider::Ollama);
self.ollama_config
.check(ollama_models.into_iter().map(|m| m.to_string()).collect())

// ensure that the models are pulled / pull them if not
let good_ollama_models = self
.ollama_config
.check(ollama_models, Duration::from_secs(30))
.await?;
good_models.extend(
good_ollama_models
.into_iter()
.map(|m| (ModelProvider::Ollama, m)),
);
}

// if OpenAI is a provider, check that the API key is set
if unique_providers.contains(&ModelProvider::OpenAI) {
let openai_models = self
.model_config
.get_models_for_provider(ModelProvider::OpenAI);
self.openai_config
.check(openai_models.into_iter().map(|m| m.to_string()).collect())
.await?;

let good_openai_models = self.openai_config.check(openai_models).await?;
good_models.extend(
good_openai_models
.into_iter()
.map(|m| (ModelProvider::OpenAI, m)),
);
}

Ok(())
// update good models
if good_models.is_empty() {
return Err("No good models found, please check logs for errors.".into());
} else {
self.model_config.models = good_models;
Ok(())
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/config/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ impl ModelConfig {
Self { models }
}

/// Returns the models that belong to a given providers from the config.
pub fn get_models_for_provider(&self, provider: ModelProvider) -> Vec<Model> {
self.models
.iter()
Expand Down
Loading

0 comments on commit 87c1ddb

Please sign in to comment.