Merge pull request #104 from firstbatchxyz/erhant/autonat-identify-fixes

use observed_addr, rfk cancellations, smol fixes
firstbatchxyz · Aug 27, 2024 · 87c1ddb · 87c1ddb
2 parents 7fe02d2 + 85e4290
commit 87c1ddb
Show file tree

Hide file tree

Showing 17 changed files with 598 additions and 438 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "dkn-compute"
-version = "0.1.5"
+version = "0.1.6"
 edition = "2021"
 license = "Apache-2.0"
 readme = "README.md"
@@ -10,6 +10,9 @@ readme = "README.md"
 inherits = "release"
 debug = true
 
+[features]
+profiling = []
+
 [dependencies]
 tokio-util = { version = "0.7.10", features = ["rt"] }
 tokio = { version = "1", features = ["macros", "rt-multi-thread", "signal"] }
@@ -28,6 +31,7 @@ url = "2.5.0"
 urlencoding = "2.1.3"
 uuid = { version = "1.8.0", features = ["v4"] }
 rand = "0.8.5"
+semver = "1.0.23"
 
 # logging
 env_logger = "0.11.3"
@@ -41,10 +45,11 @@ sha3 = "0.10.8"
 fastbloom-rs = "0.5.9"
 
 # workflows
-ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows", rev = "25467d2" }
+ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows", rev = "d6b2e1e" }
 
 # peer-to-peer
 libp2p = { git = "https://github.com/anilaltuner/rust-libp2p.git", rev = "be2ed55", features = [
+    # libp2p = { version = "0.54.1", features = [
     "dcutr",
     "ping",
     "relay",
@@ -60,11 +65,9 @@ libp2p = { git = "https://github.com/anilaltuner/rust-libp2p.git", rev = "be2ed5
     "quic",
     "kad",
 ] }
-
-libp2p-identity = { version = "0.2.9", features = ["secp256k1", "ed25519"] }
+libp2p-identity = { version = "0.2.9", features = ["secp256k1"] }
 tracing = { version = "0.1.40" }
 tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
-public-ip = "0.2.2"
 
 
 [dev-dependencies]

diff --git a/Makefile b/Makefile
@@ -19,15 +19,19 @@ debug:
 
 .PHONY: trace #        | Run with crate-level TRACE logging
 trace:
-		RUST_LOG=none,dkn_compute=trace cargo run
+		RUST_LOG=none,dkn_compute=trace,libp2p=debug cargo run
 
 .PHONY: build #        | Build
 build:
 		cargo build
 
-.PHONY: profile #      | Profile with flamegraph at dev level
-profile:
-	  cargo flamegraph --root --profile=profiling
+.PHONY: profile-cpu #   | Profile CPU usage with flamegraph
+profile-cpu:
+	  cargo flamegraph --root --profile=profiling --features=profiling
+
+.PHONY: profile-mem #   | Profile memory usage with instruments
+profile-mem:
+	  cargo instruments --profile=profiling --features=profiling -t Leaks
 
 .PHONY: version #      | Print version
 version:

diff --git a/README.md b/README.md
@@ -28,9 +28,7 @@
 
 ## About
 
-A **Dria Compute Node** is a unit of computation within the Dria Knowledge Network. It's purpose is to process tasks given by the **Dria Admin Node**, and receive rewards for providing correct results.
-
-To get started, [setup](#setup) your envrionment and then see [usage](#usage) to run the node.
+A **Dria Compute Node** is a unit of computation within the Dria Knowledge Network. It's purpose is to process tasks given by the **Dria Admin Node**. To get started, [setup](#setup) your envrionment and then see [usage](#usage) to run the node.
 
 ### Tasks
 
@@ -164,6 +162,8 @@ Based on the resources of your machine, you must decide which models that you wi
 - `phi3:14b-medium-128k-instruct-q4_1`
 - `phi3:3.8b`
 - `llama3.1:latest`
+- `phi3.5:3.8b`
+- `phi3.5:3.8b-mini-instruct-fp16`
 
 #### OpenAI Models
 
@@ -338,17 +338,21 @@ make format # rustfmt
 
 ### Profiling
 
-To create a flamegraph of the application, do:
+We would like to profile both CPU and Memory usage.
+
+To create a [flamegraph](https://crates.io/crates/flamegraph) of the application, do:
 
 ```sh
-make profile
+make profile-cpu
 ```
 
 This will create a profiling build that inherits `release` mode, except with debug information.
 
+To profile memory usage, we make use of [cargo-instruments](https://crates.io/crates/cargo-instruments).
+
 > [!NOTE]
 >
-> Profiling requires superuser access.
+> CPU profiling may require super-user access.
 
 ## License
 

diff --git a/compose.yml b/compose.yml
@@ -4,16 +4,19 @@ services:
     image: "firstbatch/dkn-compute-node:latest"
     # build: "./" # use this one instead if you want to build locally
     environment:
+      RUST_LOG: ${RUST_LOG:-none,dkn_compute=info}
+      # Dria
       DKN_WALLET_SECRET_KEY: ${DKN_WALLET_SECRET_KEY}
       DKN_ADMIN_PUBLIC_KEY: ${DKN_ADMIN_PUBLIC_KEY}
       DKN_MODELS: ${DKN_MODELS}
-      RUST_LOG: ${RUST_LOG-none,dkn_compute=info}
       DKN_P2P_LISTEN_ADDR: ${DKN_P2P_LISTEN_ADDR}
       DKN_RELAY_NODES: ${DKN_RELAY_NODES}
       DKN_BOOTSTRAP_NODES: ${DKN_BOOTSTRAP_NODES}
+      # Api Keys
       OPENAI_API_KEY: ${OPENAI_API_KEY}
       SERPER_API_KEY: ${SERPER_API_KEY}
       JINA_API_KEY: ${JINA_API_KEY}
+      # Ollama
       OLLAMA_HOST: ${OLLAMA_HOST}
       OLLAMA_PORT: ${OLLAMA_PORT}
       OLLAMA_AUTO_PULL: ${OLLAMA_AUTO_PULL:-true}

diff --git a/src/config/mod.rs b/src/config/mod.rs
@@ -9,7 +9,7 @@ use ollama::OllamaConfig;
 use ollama_workflows::ModelProvider;
 use openai::OpenAIConfig;
 
-use std::env;
+use std::{env, time::Duration};
 
 #[derive(Debug, Clone)]
 pub struct DriaComputeNodeConfig {
@@ -105,34 +105,63 @@ impl DriaComputeNodeConfig {
         }
     }
 
-    /// Check if the required compute services are running, e.g. if Ollama
-    /// is detected as a provider for the chosen models, it will check that
-    /// Ollama is running.
-    pub async fn check_services(&self) -> Result<(), String> {
+    /// Check if the required compute services are running.
+    /// This has several steps:
+    ///
+    /// - If Ollama models are used, hardcoded models are checked locally, and for
+    ///   external models, the workflow is tested with a simple task with timeout.
+    /// - If OpenAI models are used, the API key is checked and the models are tested
+    ///
+    /// If both type of models are used, both services are checked.
+    /// In the end, bad models are filtered out and we simply check if we are left if any valid models at all.
+    /// If not, an error is returned.
+    pub async fn check_services(&mut self) -> Result<(), String> {
         log::info!("Checking configured services.");
+
+        // TODO: can refactor (provider, model) logic here
         let unique_providers = self.model_config.get_providers();
 
+        let mut good_models = Vec::new();
+
         // if Ollama is a provider, check that it is running & Ollama models are pulled (or pull them)
         if unique_providers.contains(&ModelProvider::Ollama) {
             let ollama_models = self
                 .model_config
                 .get_models_for_provider(ModelProvider::Ollama);
-            self.ollama_config
-                .check(ollama_models.into_iter().map(|m| m.to_string()).collect())
+
+            // ensure that the models are pulled / pull them if not
+            let good_ollama_models = self
+                .ollama_config
+                .check(ollama_models, Duration::from_secs(30))
                 .await?;
+            good_models.extend(
+                good_ollama_models
+                    .into_iter()
+                    .map(|m| (ModelProvider::Ollama, m)),
+            );
         }
 
         // if OpenAI is a provider, check that the API key is set
         if unique_providers.contains(&ModelProvider::OpenAI) {
             let openai_models = self
                 .model_config
                 .get_models_for_provider(ModelProvider::OpenAI);
-            self.openai_config
-                .check(openai_models.into_iter().map(|m| m.to_string()).collect())
-                .await?;
+
+            let good_openai_models = self.openai_config.check(openai_models).await?;
+            good_models.extend(
+                good_openai_models
+                    .into_iter()
+                    .map(|m| (ModelProvider::OpenAI, m)),
+            );
         }
 
-        Ok(())
+        // update good models
+        if good_models.is_empty() {
+            return Err("No good models found, please check logs for errors.".into());
+        } else {
+            self.model_config.models = good_models;
+            Ok(())
+        }
     }
 }
 

diff --git a/src/config/models.rs b/src/config/models.rs
@@ -38,6 +38,7 @@ impl ModelConfig {
         Self { models }
     }
 
+    /// Returns the models that belong to a given providers from the config.
     pub fn get_models_for_provider(&self, provider: ModelProvider) -> Vec<Model> {
         self.models
             .iter()