Fix Get trained models statistics API types #2763

svalbuena · 2024-08-02T11:13:04Z

Fixes #2762, I've tested these changes by downloading the .net client project, doing the equivalent changes done in this spec PR, and using it in the project I'm working on to invoke the GET http://localhost:9200/_ml/trained_models/intfloat__e5-small-v2/_stats endpoint, I now get a successful response.

Sample response of GET http://localhost:9200/_ml/trained_models/intfloat__e5-small-v2/_stats

{
    "count": 1,
    "trained_model_stats": [
        {
            "model_id": "intfloat__e5-small-v2",
            "model_size_stats": {
                "model_size_bytes": 132921240,
                "required_native_memory_bytes": 734682164
            },
            "pipeline_count": 1,
            "ingest": {
                "total": {
                    "count": 0,
                    "time_in_millis": 0,
                    "current": 0,
                    "failed": 0
                },
                "pipelines": {
                    "custom_embedding": {
                        "count": 0,
                        "time_in_millis": 0,
                        "current": 0,
                        "failed": 0,
                        "processors": [
                            {
                                "script": {
                                    "type": "script",
                                    "stats": {
                                        "count": 0,
                                        "time_in_millis": 0,
                                        "current": 0,
                                        "failed": 0
                                    }
                                }
                            },
                            {
                                "foreach": {
                                    "type": "foreach",
                                    "stats": {
                                        "count": 0,
                                        "time_in_millis": 0,
                                        "current": 0,
                                        "failed": 0
                                    }
                                }
                            }
                        ]
                    }
                }
            },
            "inference_stats": {
                "failure_count": 0,
                "inference_count": 13,
                "cache_miss_count": 0,
                "missing_all_fields_count": 0,
                "timestamp": 1722591230386
            },
            "deployment_stats": {
                "deployment_id": "intfloat__e5-small-v2",
                "model_id": "intfloat__e5-small-v2",
                "threads_per_allocation": 1,
                "number_of_allocations": 1,
                "queue_capacity": 1024,
                "state": "started",
                "allocation_status": {
                    "allocation_count": 1,
                    "target_allocation_count": 1,
                    "state": "fully_allocated"
                },
                "cache_size": "126.7mb",
                "priority": "normal",
                "start_time": 1722254553506,
                "inference_count": 13,
                "peak_throughput_per_minute": 5,
                "nodes": [
                    {
                        "node": {
                            "u8L8FqPtSb-eBsbz3yroew": {
                                "name": "d2acd1f5a58c",
                                "ephemeral_id": "q6hDvPcpQJaab7lSOE4sZw",
                                "transport_address": "172.18.0.2:9300",
                                "external_id": "d2acd1f5a58c",
                                "attributes": {
                                    "ml.max_jvm_size": "1073741824",
                                    "ml.allocated_processors_double": "16.0",
                                    "ml.allocated_processors": "16",
                                    "ml.machine_memory": "56860405760",
                                    "transform.config_version": "10.0.0",
                                    "xpack.installed": "true",
                                    "ml.config_version": "12.0.0"
                                },
                                "roles": [
                                    "data",
                                    "data_cold",
                                    "data_content",
                                    "data_frozen",
                                    "data_hot",
                                    "data_warm",
                                    "ingest",
                                    "master",
                                    "ml",
                                    "remote_cluster_client",
                                    "transform"
                                ],
                                "version": "8.12.0",
                                "min_index_version": 7000099,
                                "max_index_version": 8500008
                            }
                        },
                        "routing_state": {
                            "routing_state": "started"
                        },
                        "inference_count": 13,
                        "average_inference_time_ms": 14.615384615384615,
                        "average_inference_time_ms_excluding_cache_hits": 95,
                        "inference_cache_hit_count": 11,
                        "last_access": 1722591142606,
                        "number_of_pending_requests": 0,
                        "start_time": 1722588669854,
                        "threads_per_allocation": 1,
                        "number_of_allocations": 1,
                        "peak_throughput_per_minute": 5,
                        "throughput_last_minute": 2,
                        "average_inference_time_ms_last_minute": 0,
                        "inference_cache_hit_count_last_minute": 2
                    }
                ]
            }
        }
    ]
}

Closes #2770

flobernd

Hi @svalbuena, thanks again for your contribution 🙂 I left some comments.

specification/ml/_types/TrainedModel.ts

flobernd · 2024-08-02T11:25:18Z

specification/ml/_types/TrainedModel.ts

@@ -120,7 +120,7 @@ export class TrainedModelInferenceStats {
  /** The number of inference calls where all the training features for the model were missing. */
  missing_all_fields_count: integer
  /** The time when the statistics were last updated. */
-  timestamp: DateTime
+  timestamp: long


I think DateTime might have been correct here, as it's defined as:

/** * A date and time, either as a string whose format can depend on the context (defaulting to ISO 8601), or a * number of milliseconds since the Epoch. Elasticsearch accepts both as input, but will generally output a string * representation. */ export type DateTime = string | EpochTime<UnitMillis>

We have to check the server-code in order to verify if it should be DateTime or just EpochTime<UnitMillis>.

In any way:

The .NET client correctly uses DateTimeOffset as the CLR type, but fails to deserialize it from the epoch-time representation. This is something that must be fixed on the client itself and not in the spec.

interesting, in this case it seems EpochTime<UnitMillis> is the right type. I've checked other classes that use the same kind of value like start_time_in_millis of AsyncSearchResponseBase and it uses EpochTime for the one that is long. (I don't find any example of the start_timefield in the tests I've done nor in https://www.elastic.co/guide/en/elasticsearch/reference/current/async-search.html).

I'll set the field to be EpochTime for now until you find the right answer or fix for the .net client.

Yes, this seems right. I think this should be the corresponding server code:
https://github.com/elastic/elasticsearch/blob/02c494963a59610a0be07c7d54337017a1e5beaf/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/TrainedModelStatsService.java#L282C58-L282C106

so EpochTime<UnitMillis> looks good? anything else to address in this PR?

flobernd

LGTM from my side, thank you!

Let's wait for a second pair of eyes to double-check it.

l-trotta

LGTM as well!

svalbuena · 2024-08-05T18:58:50Z

How can we merge this? @l-trotta @flobernd

pquentin · 2024-08-06T09:10:11Z

@svalbuena Since validation only runs from branches, I opened #2770 with your changes, which allowed validation to pass. Your pull request did not affect the reported failures, since apparently tests did not surface this specific issue. (ML request issues are tracked in #2621.)

Thanks, merging!

* Fix Get trained models statistics API types * Push schema changes * Use EpochTime<UnitMillis> (cherry picked from commit d1a0565)

Fix Get trained models statistics API types

7303fc3

svalbuena requested review from a team as code owners August 2, 2024 11:13

github-actions bot added the specification label Aug 2, 2024

svalbuena mentioned this pull request Aug 2, 2024

MachineLearning.GetTrainedModelsStats throws a deserialization exception part 2 elastic/elasticsearch-net#8281

Closed

Push schema changes

9563caa

svalbuena mentioned this pull request Aug 2, 2024

MachineLearning.GetTrainedModelsStats always throws a deserialization exception elastic/elasticsearch-net#8271

Closed

flobernd reviewed Aug 2, 2024

View reviewed changes

Use EpochTime<UnitMillis>

cabc6a3

svalbuena requested a review from flobernd August 5, 2024 12:03

flobernd requested a review from l-trotta August 5, 2024 13:52

flobernd approved these changes Aug 5, 2024

View reviewed changes

l-trotta approved these changes Aug 5, 2024

View reviewed changes

pquentin mentioned this pull request Aug 6, 2024

Fix Get trained models statistics API types #2770

Closed

pquentin added backport 8.14 backport 8.15 labels Aug 6, 2024

pquentin merged commit d1a0565 into elastic:main Aug 6, 2024
12 of 13 checks passed

github-actions bot pushed a commit that referenced this pull request Aug 6, 2024

Fix Get trained models statistics API types (#2763)

7d40d7f

* Fix Get trained models statistics API types * Push schema changes * Use EpochTime<UnitMillis> (cherry picked from commit d1a0565)

github-actions bot mentioned this pull request Aug 6, 2024

[Backport 8.14] Fix Get trained models statistics API types #2771

Merged

github-actions bot pushed a commit that referenced this pull request Aug 6, 2024

Fix Get trained models statistics API types (#2763)

3e171c2

* Fix Get trained models statistics API types * Push schema changes * Use EpochTime<UnitMillis> (cherry picked from commit d1a0565)

github-actions bot mentioned this pull request Aug 6, 2024

[Backport 8.15] Fix Get trained models statistics API types #2772

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix Get trained models statistics API types #2763

Fix Get trained models statistics API types #2763

svalbuena commented Aug 2, 2024 •

edited by pquentin

Loading

flobernd left a comment

flobernd Aug 2, 2024 •

edited

Loading

svalbuena Aug 2, 2024 •

edited

Loading

flobernd Aug 2, 2024

svalbuena Aug 2, 2024

flobernd left a comment

l-trotta left a comment

svalbuena commented Aug 5, 2024

pquentin commented Aug 6, 2024 •

edited

Loading

Fix Get trained models statistics API types #2763

Fix Get trained models statistics API types #2763

Conversation

svalbuena commented Aug 2, 2024 • edited by pquentin Loading

flobernd left a comment

Choose a reason for hiding this comment

flobernd Aug 2, 2024 • edited Loading

Choose a reason for hiding this comment

svalbuena Aug 2, 2024 • edited Loading

Choose a reason for hiding this comment

flobernd Aug 2, 2024

Choose a reason for hiding this comment

svalbuena Aug 2, 2024

Choose a reason for hiding this comment

flobernd left a comment

Choose a reason for hiding this comment

l-trotta left a comment

Choose a reason for hiding this comment

svalbuena commented Aug 5, 2024

pquentin commented Aug 6, 2024 • edited Loading

svalbuena commented Aug 2, 2024 •

edited by pquentin

Loading

flobernd Aug 2, 2024 •

edited

Loading

svalbuena Aug 2, 2024 •

edited

Loading

pquentin commented Aug 6, 2024 •

edited

Loading