Skip to content

Commit

Permalink
fix k6
Browse files Browse the repository at this point in the history
  • Loading branch information
sachaarbonel committed Nov 4, 2024
1 parent 4dd2d66 commit 463c753
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 13 deletions.
32 changes: 20 additions & 12 deletions examples/server/script.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,49 @@ import http from "k6/http";
import { check } from "k6";
import { FormData } from "https://jslib.k6.io/formdata/0.0.2/index.js";

// Load the binary audio file once to reuse across all virtual users
const audioFile = open("../../samples/jfk.wav", "b"); // 11s audio sample

export const options = {
scenarios: {
ramping_load: {
executor: "ramping-arrival-rate",
startRate: 1,
timeUnit: "1s",
preAllocatedVUs: 8, // Pre-allocate 4 VUs
maxVUs: 8,
stages: [
{ target: 8, duration: "10s" }, // Start with 4 requests per second for warm-up
{ target: 1, duration: "50s" }, // Sustain 1 request per second for 50s with 4 VUs
],
constant_load: {
executor: "constant-arrival-rate",
rate: 20, // 20 requests per second
timeUnit: "1s", // Defines the time unit for the arrival rate
duration: "1m", // Total duration of the test (adjust as needed)
preAllocatedVUs: 20, // Number of VUs to preallocate
maxVUs: 20, // Maximum number of VUs to allow
},
},
// Optionally, you can define thresholds to monitor p90, p95, etc.
thresholds: {
http_req_duration: ["p90<2000"], // Example: p90 should be below 2 seconds
},
};

export default function () {
// Initialize FormData for multipart/form-data requests
const fd = new FormData();

// Append the binary audio file correctly
fd.append("file", {
data: new Uint8Array(audioFile).buffer,
data: audioFile, // Pass the binary string directly without conversion
filename: "jfk.wav",
content_type: "audio/wav",
});

// Append additional form fields as needed
fd.append("temperature", "0.0");
fd.append("temperature_inc", "0.2");
fd.append("response_format", "json");

const res = http.post("http://127.0.0.1:8080/inference", fd.body(), {
// Perform the HTTP POST request with appropriate headers
const res = http.post("http://127.0.0.1:8081/inference", fd.body(), {
headers: { "Content-Type": "multipart/form-data; boundary=" + fd.boundary },
timeout: "30s", // Set a timeout to prevent hanging requests (adjust as needed)
});

// Validate the response status
check(res, {
"status is 200": (r) => r.status === 200,
});
Expand Down
4 changes: 3 additions & 1 deletion examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,10 @@ namespace whisper_server {
size_t available_memory = 0;
size_t total_memory = 0;
cudaMemGetInfo( & available_memory, & total_memory);
// remove 1164MiB to account for the memory used by the system
// available_memory -= 1164 * 1024 * 1024;
int calculated_instances = static_cast<int>(available_memory / MODEL_MEMORY_USAGE);
return std::max(1, std::min(17, calculated_instances));
return std::max(1, std::min(15, calculated_instances));
#elif defined(__APPLE__)
// Fixed number for Apple devices
return 8;
Expand Down

0 comments on commit 463c753

Please sign in to comment.