Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[k8s] use ip to get metrics/api in pods #1897

Merged
merged 3 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions javascript/packages/orchestrator/src/metrics/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@ enum metricKeysMapping {
PeersCount = "sub_libp2p_peers_count",
}

export async function fetchMetrics(metricUri: string): Promise<Metrics> {
export async function fetchMetrics(
metricUri: string,
node_name = "",
): Promise<Metrics> {
let metrics = {}; // empty by default
const debug_msg = node_name ? `[${node_name}]` : "";
try {
debug(`fetching: ${metricUri}`);
debug([debug_msg, `fetching: ${metricUri}`].join(" "));
const fetchResult = await fetch(metricUri, {
signal: TimeoutAbortController(2).signal,
method: "GET",
Expand All @@ -34,6 +38,9 @@ export async function fetchMetrics(metricUri: string): Promise<Metrics> {
});

if (!fetchResult.ok) {
debug(
[debug_msg, `fetch error - status: ${fetchResult.status}`].join(" "),
);
throw new Error(`Error - status: ${fetchResult.status}`);
}

Expand All @@ -43,7 +50,7 @@ export async function fetchMetrics(metricUri: string): Promise<Metrics> {
debug(`ERR: ${err}`);
console.log(
`\n${decorators.red(`Error`)} \t ${decorators.bright(
`fetching metrics from: ${metricUri}`,
`fetching metrics from: ${metricUri} ${debug_msg}`,
)}`,
);
}
Expand Down
12 changes: 8 additions & 4 deletions javascript/packages/orchestrator/src/networkNode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,9 @@ export class NetworkNode implements NetworkNodeInterface {
desiredMetricValue === null ||
compare(comparator!, value, desiredMetricValue)
) {
debug(`value: ${value} ~ desiredMetricValue: ${desiredMetricValue}`);
debug(
`[${this.name}] value: ${value} ~ desiredMetricValue: ${desiredMetricValue}`,
);
return value;
}
}
Expand All @@ -244,7 +246,9 @@ export class NetworkNode implements NetworkNodeInterface {
while (!done && !timedout) {
c++;
await new Promise((resolve) => setTimeout(resolve, 1000));
debug(`fetching metrics - q: ${c} time: ${new Date()}`);
debug(
`[${this.name}] Fetching metrics - q: ${c} time: ${new Date()}`,
);
this.cachedMetrics = await fetchMetrics(this.prometheusUri);
value = this._getMetric(metricName, desiredMetricValue === null);

Expand All @@ -256,7 +260,7 @@ export class NetworkNode implements NetworkNodeInterface {
done = true;
} else {
debug(
`current value: ${value} for metric ${rawMetricName}, keep trying...`,
`[${this.name}] Current value: ${value} for metric ${rawMetricName}, keep trying...`,
);
}
}
Expand All @@ -268,7 +272,7 @@ export class NetworkNode implements NetworkNodeInterface {
setTimeout(() => {
timedout = true;
const err = new Error(
`Timeout(${timeout}), "getting desired metric value ${desiredMetricValue} within ${timeout} secs".`,
`[${this.name}] Timeout(${timeout}), "getting desired metric value ${desiredMetricValue} within ${timeout} secs".`,
);
return resolve(err);
}, timeout * 1000),
Expand Down
10 changes: 7 additions & 3 deletions javascript/packages/orchestrator/src/spawner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,20 @@ export const spawnNode = async (

const endpointPort = RPC_WS_PORT;
if (opts.inCI) {
// UPDATE: 04-10-2024 Since we have several reports of failures related to
// can't access metrics by dns, we switch back to use the pod ip.

// in CI we deploy a service (with the pod name) in front of each pod
// so here we can use the name (as short dns in the ns) to connect to pod.
const nodeDns = `${podDef.metadata.name}.${namespace}.svc.cluster.local`;
// const nodeDns = `${podDef.metadata.name}.${namespace}.svc.cluster.local`;
const pod_ip = await client.getNodeIP(node.name);
networkNode = new NetworkNode(
node.name,
WS_URI_PATTERN.replace("{{IP}}", nodeDns).replace(
WS_URI_PATTERN.replace("{{IP}}", pod_ip).replace(
"{{PORT}}",
endpointPort.toString(),
),
METRICS_URI_PATTERN.replace("{{IP}}", nodeDns).replace(
METRICS_URI_PATTERN.replace("{{IP}}", pod_ip).replace(
"{{PORT}}",
PROMETHEUS_PORT.toString(),
),
Expand Down
Loading