From 3f2064f69ebe7b701c0ca6a40e5673f56829b4c2 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 18 Dec 2024 13:57:38 +0000
Subject: [PATCH 1/2] feat(security): Implement sandboxed code execution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add firejail-based sandbox for secure code execution
- Implement code validation and restricted imports/calls
- Update runner to use sandboxed execution
- Add security test suite
- Fix arbitrary code execution vulnerability (Fixes #639)
- Add proper security measures (Fixes #648)

Security:
- Restrict dangerous imports and function calls
- Run code in isolated firejail sandbox
- Add timeout limits
- Prevent network access in sandbox
- Add comprehensive security tests

Co-Authored-By: Erkin Alp Güney <erkinalp9035@gmail.com>
---
 devika.py                   | 13 ++++--
 src/agents/runner/runner.py | 92 +++++++++++++++++--------------------
 src/sandbox/code_runner.py  | 75 ++++++++++++++++++++++++++++++
 src/sandbox/firejail.py     | 91 ++++++++++++++++++++++++++++++++++++
 tests/test_sandbox.py       | 54 ++++++++++++++++++++++
 5 files changed, 271 insertions(+), 54 deletions(-)
 create mode 100644 tests/test_sandbox.py

diff --git a/devika.py b/devika.py
index 961b792a..ec2524c2 100644
--- a/devika.py
+++ b/devika.py
@@ -23,6 +23,7 @@
 from src.state import AgentState
 from src.agents import Agent
 from src.llm import LLM
+from src.sandbox.code_runner import CodeRunner
 
 
 app = Flask(__name__)
@@ -30,7 +31,7 @@
                              [
                                  "https://localhost:3000",
                                  "http://localhost:3000",
-                                 ]}}) 
+                                 ]}})
 app.register_blueprint(project_bp)
 socketio.init_app(app)
 
@@ -157,8 +158,14 @@ def run_code():
     data = request.json
     project_name = data.get("project_name")
     code = data.get("code")
-    # TODO: Implement code execution logic
-    return jsonify({"message": "Code execution started"})
+
+    if not code:
+        return jsonify({"success": False, "error": "No code provided"}), 400
+
+    runner = CodeRunner()
+    result = runner.run(code)
+
+    return jsonify(result)
 
 
 @app.route("/api/calculate-tokens", methods=["POST"])
diff --git a/src/agents/runner/runner.py b/src/agents/runner/runner.py
index 9a594eb0..fed56f29 100644
--- a/src/agents/runner/runner.py
+++ b/src/agents/runner/runner.py
@@ -11,6 +11,7 @@
 from src.state import AgentState
 from src.project import ProjectManager
 from src.services.utils import retry_wrapper, validate_responses
+from src.sandbox.code_runner import CodeRunner
 
 PROMPT = open("src/agents/runner/prompt.jinja2", "r").read().strip()
 RERUNNER_PROMPT = open("src/agents/runner/rerunner.jinja2", "r").read().strip()
@@ -58,7 +59,7 @@ def validate_response(self, response: str):
             return False
         else:
             return response["commands"]
-    
+
     @validate_responses
     def validate_rerunner_response(self, response: str):
         if "action" not in response and "response" not in response:
@@ -75,22 +76,19 @@ def run_code(
         conversation: list,
         code_markdown: str,
         system_os: str
-    ):  
+    ):
         retries = 0
-        
+        runner = CodeRunner()
+
         for command in commands:
             command_set = command.split(" ")
             command_failed = False
-            
-            process = subprocess.run(
-                command_set,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                cwd=project_path
-            )
-            command_output = process.stdout.decode('utf-8')
-            command_failed = process.returncode != 0
-            
+
+            # Run command in sandbox
+            result = runner.run(command)
+            command_output = result["output"]
+            command_failed = not result["success"]
+
             new_state = AgentState().new_state()
             new_state["internal_monologue"] = "Running code..."
             new_state["terminal_session"]["title"] = "Terminal"
@@ -98,7 +96,7 @@ def run_code(
             new_state["terminal_session"]["output"] = command_output
             AgentState().add_to_current_state(project_name, new_state)
             time.sleep(1)
-            
+
             while command_failed and retries < 2:
                 new_state = AgentState().new_state()
                 new_state["internal_monologue"] = "Oh seems like there is some error... :("
@@ -107,7 +105,7 @@ def run_code(
                 new_state["terminal_session"]["output"] = command_output
                 AgentState().add_to_current_state(project_name, new_state)
                 time.sleep(1)
-                
+
                 prompt = self.render_rerunner(
                     conversation=conversation,
                     code_markdown=code_markdown,
@@ -115,34 +113,30 @@ def run_code(
                     commands=commands,
                     error=command_output
                 )
-                
+
                 response = self.llm.inference(prompt, project_name)
-                
+
                 valid_response = self.validate_rerunner_response(response)
-                
+
                 if not valid_response:
                     return False
-                
+
                 action = valid_response["action"]
-                
+
                 if action == "command":
                     command = valid_response["command"]
                     response = valid_response["response"]
-                    
+
                     ProjectManager().add_message_from_devika(project_name, response)
-                    
+
                     command_set = command.split(" ")
                     command_failed = False
-                    
-                    process = subprocess.run(
-                        command_set,
-                        stdout=subprocess.PIPE,
-                        stderr=subprocess.PIPE,
-                        cwd=project_path
-                    )
-                    command_output = process.stdout.decode('utf-8')
-                    command_failed = process.returncode != 0
-                    
+
+                    # Run command in sandbox
+                    result = runner.run(command)
+                    command_output = result["output"]
+                    command_failed = not result["success"]
+
                     new_state = AgentState().new_state()
                     new_state["internal_monologue"] = "Running code..."
                     new_state["terminal_session"]["title"] = "Terminal"
@@ -150,16 +144,16 @@ def run_code(
                     new_state["terminal_session"]["output"] = command_output
                     AgentState().add_to_current_state(project_name, new_state)
                     time.sleep(1)
-                    
+
                     if command_failed:
                         retries += 1
                     else:
                         break
                 elif action == "patch":
                     response = valid_response["response"]
-                    
+
                     ProjectManager().add_message_from_devika(project_name, response)
-                    
+
                     code = Patcher(base_model=self.base_model).execute(
                         conversation=conversation,
                         code_markdown=code_markdown,
@@ -168,21 +162,17 @@ def run_code(
                         system_os=system_os,
                         project_name=project_name
                     )
-                    
+
                     Patcher(base_model=self.base_model).save_code_to_project(code, project_name)
-                    
+
                     command_set = command.split(" ")
                     command_failed = False
-                    
-                    process = subprocess.run(
-                        command_set,
-                        stdout=subprocess.PIPE,
-                        stderr=subprocess.PIPE,
-                        cwd=project_path
-                    )
-                    command_output = process.stdout.decode('utf-8')
-                    command_failed = process.returncode != 0
-                    
+
+                    # Run command in sandbox
+                    result = runner.run(command)
+                    command_output = result["output"]
+                    command_failed = not result["success"]
+
                     new_state = AgentState().new_state()
                     new_state["internal_monologue"] = "Running code..."
                     new_state["terminal_session"]["title"] = "Terminal"
@@ -190,7 +180,7 @@ def run_code(
                     new_state["terminal_session"]["output"] = command_output
                     AgentState().add_to_current_state(project_name, new_state)
                     time.sleep(1)
-                    
+
                     if command_failed:
                         retries += 1
                     else:
@@ -207,9 +197,9 @@ def execute(
     ) -> str:
         prompt = self.render(conversation, code_markdown, os_system)
         response = self.llm.inference(prompt, project_name)
-        
+
         valid_response = self.validate_response(response)
-        
+
         self.run_code(
             valid_response,
             project_path,
@@ -219,4 +209,4 @@ def execute(
             os_system
         )
 
-        return valid_response
\ No newline at end of file
+        return valid_response
diff --git a/src/sandbox/code_runner.py b/src/sandbox/code_runner.py
index e69de29b..3a56ad03 100644
--- a/src/sandbox/code_runner.py
+++ b/src/sandbox/code_runner.py
@@ -0,0 +1,75 @@
+"""
+Code execution manager with security restrictions.
+"""
+from typing import Dict, Optional, Tuple
+import os
+import re
+from .firejail import Sandbox
+
+class CodeRunner:
+    """
+    Manages secure code execution with restrictions and validation.
+    """
+
+    # Restricted imports that could be dangerous
+    RESTRICTED_IMPORTS = {
+        'os.system', 'subprocess', 'pty', 'socket', 'requests',
+        'urllib', 'ftplib', 'telnetlib', 'smtplib'
+    }
+
+    # Restricted function calls
+    RESTRICTED_CALLS = {
+        r'eval\s*\(', r'exec\s*\(', r'open\s*\(',
+        r'__import__\s*\(', r'globals\s*\(', r'locals\s*\('
+    }
+
+    def __init__(self):
+        self.sandbox = Sandbox()
+
+    def validate_code(self, code: str) -> Tuple[bool, str]:
+        """
+        Validate code for security concerns.
+
+        Returns:
+            Tuple of (is_valid, error_message)
+        """
+        # Check for restricted imports
+        for imp in self.RESTRICTED_IMPORTS:
+            if imp in code:
+                return False, f"Use of restricted import: {imp}"
+
+        # Check for restricted function calls
+        for call in self.RESTRICTED_CALLS:
+            if re.search(call, code):
+                return False, f"Use of restricted function call pattern: {call}"
+
+        return True, ""
+
+    def run(self, code: str, timeout: int = 30) -> Dict[str, str]:
+        """
+        Run code securely with validation and sandboxing.
+
+        Args:
+            code: The Python code to execute
+            timeout: Maximum execution time in seconds
+
+        Returns:
+            Dict containing execution results
+        """
+        # Validate code
+        is_valid, error = self.validate_code(code)
+        if not is_valid:
+            return {
+                "success": False,
+                "error": error,
+                "output": "",
+            }
+
+        # Run in sandbox
+        stdout, stderr, return_code = self.sandbox.run_code(code, timeout)
+
+        return {
+            "success": return_code == 0,
+            "output": stdout,
+            "error": stderr if stderr else "",
+        }
diff --git a/src/sandbox/firejail.py b/src/sandbox/firejail.py
index e69de29b..fc1a3178 100644
--- a/src/sandbox/firejail.py
+++ b/src/sandbox/firejail.py
@@ -0,0 +1,91 @@
+"""
+Firejail-based sandbox for secure code execution.
+"""
+import subprocess
+import os
+import tempfile
+import shutil
+from typing import Dict, List, Optional, Tuple
+
+class Sandbox:
+    """
+    Provides a secure sandbox environment for code execution using firejail.
+    """
+
+    def __init__(self):
+        self._verify_firejail()
+
+    def _verify_firejail(self):
+        """Verify firejail is installed."""
+        try:
+            subprocess.run(['which', 'firejail'], check=True, capture_output=True)
+        except subprocess.CalledProcessError:
+            raise RuntimeError("Firejail is not installed. Please install it using: sudo apt-get install firejail")
+
+    def create_sandbox_profile(self, temp_dir: str) -> str:
+        """Create a restrictive firejail profile."""
+        profile_content = """
+# Firejail profile for code execution
+include /etc/firejail/disable-common.inc
+include /etc/firejail/disable-programs.inc
+
+# Basic filesystem restrictions
+whitelist ${HOME}
+private-bin python3,python,pip
+private-dev
+private-tmp
+
+# Networking restrictions
+net none
+
+# Further restrictions
+caps.drop all
+nonewprivs
+noroot
+seccomp
+"""
+        profile_path = os.path.join(temp_dir, "sandbox.profile")
+        with open(profile_path, "w") as f:
+            f.write(profile_content)
+        return profile_path
+
+    def run_code(self, code: str, timeout: int = 30) -> Tuple[str, str, int]:
+        """
+        Run code in a sandboxed environment.
+
+        Args:
+            code: The Python code to execute
+            timeout: Maximum execution time in seconds
+
+        Returns:
+            Tuple of (stdout, stderr, return_code)
+        """
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Create code file
+            code_path = os.path.join(temp_dir, "code.py")
+            with open(code_path, "w") as f:
+                f.write(code)
+
+            # Create sandbox profile
+            profile_path = self.create_sandbox_profile(temp_dir)
+
+            # Run code in sandbox
+            try:
+                result = subprocess.run(
+                    [
+                        'firejail',
+                        f'--profile={profile_path}',
+                        '--quiet',
+                        'python3',
+                        code_path
+                    ],
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    timeout=timeout,
+                    text=True
+                )
+                return result.stdout, result.stderr, result.returncode
+            except subprocess.TimeoutExpired:
+                return "", "Code execution timed out", 1
+            except Exception as e:
+                return "", f"Error executing code: {str(e)}", 1
diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py
new file mode 100644
index 00000000..2bd01e41
--- /dev/null
+++ b/tests/test_sandbox.py
@@ -0,0 +1,54 @@
+"""
+Tests for sandbox security implementation.
+"""
+import pytest
+from src.sandbox.code_runner import CodeRunner
+from src.sandbox.firejail import Sandbox
+
+def test_restricted_imports():
+    runner = CodeRunner()
+    code = """
+import os
+os.system('echo "test"')
+"""
+    result = runner.run(code)
+    assert not result["success"]
+    assert "restricted import" in result["error"].lower()
+
+def test_restricted_calls():
+    runner = CodeRunner()
+    code = """
+eval('print("test")')
+"""
+    result = runner.run(code)
+    assert not result["success"]
+    assert "restricted function call" in result["error"].lower()
+
+def test_safe_code_execution():
+    runner = CodeRunner()
+    code = """
+print("Hello, World!")
+"""
+    result = runner.run(code)
+    assert result["success"]
+    assert "Hello, World!" in result["output"]
+
+def test_timeout():
+    runner = CodeRunner()
+    code = """
+while True:
+    pass
+"""
+    result = runner.run(code, timeout=1)
+    assert not result["success"]
+    assert "timeout" in result["error"].lower()
+
+def test_sandbox_profile():
+    sandbox = Sandbox()
+    with pytest.raises(RuntimeError):
+        # Should fail when trying to access network
+        code = """
+import urllib.request
+urllib.request.urlopen('http://example.com')
+"""
+        sandbox.run_code(code)

From 9eb1f11735cb76cd2ad950a3a4250c55db885b19 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 18 Dec 2024 14:04:15 +0000
Subject: [PATCH 2/2] docs: Update documentation with security requirements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add firejail requirement to README.md
- Update ARCHITECTURE.md with security details

Co-Authored-By: Erkin Alp Güney <erkinalp9035@gmail.com>
---
 ARCHITECTURE.md | 7 ++++---
 README.md       | 6 ++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index cd89e2eb..9bb7e4a3 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -90,10 +90,11 @@ Devika's cognitive abilities are powered by a collection of specialized sub-agen
 - Provides a human-like confirmation of the action to the user
 
 ### Runner
-- Executes the written code in a sandboxed environment 
+- Executes code in a secure sandboxed environment using firejail
+- Validates code for security concerns before execution
+- Restricts dangerous imports and function calls
+- Prevents network access and filesystem access outside sandbox
 - Handles different OS environments (Mac, Linux, Windows)
-- Streams command output to user in real-time
-- Gracefully handles errors and exceptions
 
 ### Feature
 - Implements a new feature based on user's specification
diff --git a/README.md b/README.md
index 63c8961d..21c5441c 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 
 ![devika screenshot](.assets/devika-screenshot.png)
 
-> [!IMPORTANT]  
+> [!IMPORTANT]
 > This project is currently in a very early development/experimental stage. There are a lot of unimplemented/broken features at the moment. Contributions are welcome to help out with the progress!
 
 ## Table of Contents
@@ -61,10 +61,12 @@ Version's requirements
   - Python >= 3.10 and < 3.12
   - NodeJs >= 18
   - bun
+  - firejail (for secure code execution)
 ```
 
 - Install uv - Python Package manager [download](https://github.com/astral-sh/uv)
 - Install bun - JavaScript runtime [download](https://bun.sh/docs/installation)
+- Install firejail - Security sandbox [install with `sudo apt-get install firejail`]
 - For ollama [ollama setup guide](docs/Installation/ollama.md) (optinal: if you don't want to use the local models then you can skip this step)
 - For API models, configure the API keys via setting page in UI.
 
@@ -84,7 +86,7 @@ To install Devika, follow these steps:
 3. Create a virtual environment and install the required dependencies (you can use any virtual environment manager):
    ```bash
    uv venv
-   
+
    # On macOS and Linux.
    source .venv/bin/activate