Skip to content

Commit

Permalink
CA-404597 - rrd: Fix incorrect processing of Gauge and Absolute data …
Browse files Browse the repository at this point in the history
…sources (#6233)

Some recent changes related to RRDs likely exposed a long-standing
latent issue
where the RRD library would process the passed-in values for Gauge and
Absolute
data sources incorrectly leading to constant values changing from update
to
update, for example:
```
$ rrd2csv memory_total_kib
timestamp, AVERAGE:host:8b533333-91e1-4698-bd17-95b9732ffbb6:memory_total_kib
2025-01-15T08:41:40Z, 33351000
2025-01-15T08:41:45Z, 33350000
2025-01-15T08:41:50Z, 33346000
2025-01-15T08:41:55Z, 33352000
```

Instead of treating Gauge and Absolute data sources as a
variation on the rate-based Derive data source type, expecting
time-based
calculations to cancel each other out, do not undertake any calculations
on
non-rate data sources at all.

First commit adds a failing unit test, second makes it pass.

===

I've verified these changes through manual testing, they've also passed
the testcases that discovered this issue: SNMP memory testcases (JobIDs
4197305, 4196759, 4196744) and ShimMemory testcase (4197050). This
branch also passed Ring3 BST+BVT (210577)
  • Loading branch information
last-genius authored Jan 16, 2025
2 parents 6c1e7ea + 73ca3cc commit 43d01ca
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 11 deletions.
36 changes: 26 additions & 10 deletions ocaml/libs/xapi-rrd/lib/rrd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,10 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps =
Array.iter updatefn rrd.rrd_rras

(* We assume that the data being given is of the form of a rate; that is,
it's dependent on the time interval between updates. To be able to
deal with gauge DSs, we multiply by the interval so that it cancels
the subsequent divide by interval later on *)
it's dependent on the time interval between updates.
Gauge and Absolute data sources are simply kept as is without any
time-based calculations, while Derive data sources will be changed according
to the time passed since the last measurement. (see CA-404597) *)
let process_ds_value ds value interval new_rrd =
if interval > ds.ds_mrhb then
nan
Expand All @@ -360,10 +361,8 @@ let process_ds_value ds value interval new_rrd =

let rate =
match (ds.ds_ty, new_rrd) with
| Absolute, _ | Derive, true ->
| Absolute, _ | Derive, true | Gauge, _ ->
value_raw
| Gauge, _ ->
value_raw *. interval
| Derive, false -> (
match (ds.ds_last, value) with
| VT_Int64 x, VT_Int64 y ->
Expand Down Expand Up @@ -433,7 +432,14 @@ let ds_update rrd timestamp valuesandtransforms new_rrd =
if Utils.isnan value then
ds.ds_unknown_sec <- pre_int
else
ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval)
(* CA-404597 - Gauge and Absolute values should be passed as-is,
without being involved in time-based calculations at all.
This applies to calculations below as well *)
match ds.ds_ty with
| Gauge | Absolute ->
ds.ds_value <- value
| Derive ->
ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval)
)
v2s ;

Expand All @@ -450,7 +456,13 @@ let ds_update rrd timestamp valuesandtransforms new_rrd =
let raw =
let proc_pdp_st = get_float_time last_updated rrd.timestep in
let occu_pdp_st = get_float_time timestamp rrd.timestep in
ds.ds_value /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec)

match ds.ds_ty with
| Gauge | Absolute ->
ds.ds_value
| Derive ->
ds.ds_value
/. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec)
in
(* Apply the transform after the raw value has been calculated *)
let raw = apply_transform_function transform raw in
Expand All @@ -473,8 +485,12 @@ let ds_update rrd timestamp valuesandtransforms new_rrd =
ds.ds_value <- 0.0 ;
ds.ds_unknown_sec <- post_int
) else (
ds.ds_value <- post_int *. value /. interval ;
ds.ds_unknown_sec <- 0.0
ds.ds_unknown_sec <- 0.0 ;
match ds.ds_ty with
| Gauge | Absolute ->
ds.ds_value <- value
| Derive ->
ds.ds_value <- post_int *. value /. interval
)
)
v2s
Expand Down
109 changes: 108 additions & 1 deletion ocaml/libs/xapi-rrd/lib_test/unit_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,110 @@ let test_length_invariants rrd () =
let check_length dss rra = check_length_of_fring dss rra.rra_data in
Array.iter (check_length rrd.rrd_dss) rrd.rrd_rras

let absolute_rrd =
let rra = rra_create CF_Average 100 1 0.5 in
let rra2 = rra_create CF_Average 100 10 0.5 in
let rra3 = rra_create CF_Average 100 100 0.5 in
let rra4 = rra_create CF_Average 100 1000 0.5 in
let ts = 1000000000.0 in
let ds = ds_create "foo" Absolute ~mrhb:10.0 (VT_Float 0.0) in
let ds2 = ds_create "bar" Absolute ~mrhb:10.0 (VT_Float 0.0) in
let ds3 = ds_create "baz" Absolute ~mrhb:10.0 (VT_Float 0.0) in
let ds4 = ds_create "boo" Absolute ~mrhb:10.0 (VT_Float 0.0) in
let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in
let id = Identity in
for i = 1 to 100000 do
let t = 1000000.0 +. (0.7 *. float_of_int i) in
let v1 =
(0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id})
in
let v2 =
(1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id})
in
let v3 =
(2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id})
in
let v4 =
(3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id})
in
ds_update rrd t [|v1; v2; v3; v4|] false
done ;
rrd

let absolute_rrd_CA_404597 () =
let rra = rra_create CF_Average 100 1 0.5 in
let rra2 = rra_create CF_Average 100 10 0.5 in
let rra3 = rra_create CF_Average 100 100 0.5 in
let rra4 = rra_create CF_Average 100 1000 0.5 in
let ts = 1000000000.0 in
let ds = ds_create "foo" Absolute ~mrhb:10.0 (VT_Float 0.0) in
let ds2 = ds_create "bar" Absolute ~mrhb:10.0 (VT_Float 0.0) in
let ds3 = ds_create "baz" Absolute ~mrhb:10.0 (VT_Float 0.0) in
let ds4 = ds_create "boo" Absolute ~mrhb:10.0 (VT_Float 0.0) in
let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in
let id = Identity in
for i = 1 to 100000 do
let t = 1000000.0 +. (0.7 *. float_of_int i) in
let ((_, val1) as v1) =
(0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id})
in
let ((_, val2) as v2) =
(1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id})
in
let ((_, val3) as v3) =
(2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id})
in
let ((_, val4) as v4) =
(3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id})
in
ds_update rrd t [|v1; v2; v3; v4|] false ;

Array.iter2
(fun ds value ->
compare_float __LOC__ ds.ds_value
(float_of_string (ds_value_to_string value.value))
)
rrd.rrd_dss [|val1; val2; val3; val4|]
done

(** Verify that Gauge data soruce values are correctly handled by the RRD lib
and that timestamps do not cause absolute values to fluctuate *)
let gauge_rrd_CA_404597 () =
let rra = rra_create CF_Average 100 1 0.5 in
let rra2 = rra_create CF_Average 100 10 0.5 in
let rra3 = rra_create CF_Average 100 100 0.5 in
let rra4 = rra_create CF_Average 100 1000 0.5 in
let ts = 1000000000.0 in
let ds = ds_create "foo" Gauge ~mrhb:10.0 (VT_Float 0.0) in
let ds2 = ds_create "bar" Gauge ~mrhb:10.0 (VT_Float 0.0) in
let ds3 = ds_create "baz" Gauge ~mrhb:10.0 (VT_Float 0.0) in
let ds4 = ds_create "boo" Gauge ~mrhb:10.0 (VT_Float 0.0) in
let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in
let id = Identity in
for i = 1 to 100000 do
let t = 1000000.0 +. (0.7 *. float_of_int i) in
let ((_, val1) as v1) =
(0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id})
in
let ((_, val2) as v2) =
(1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id})
in
let ((_, val3) as v3) =
(2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id})
in
let ((_, val4) as v4) =
(3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id})
in
ds_update rrd t [|v1; v2; v3; v4|] false ;

Array.iter2
(fun ds value ->
compare_float __LOC__ ds.ds_value
(float_of_string (ds_value_to_string value.value))
)
rrd.rrd_dss [|val1; val2; val3; val4|]
done

let gauge_rrd =
let rra = rra_create CF_Average 100 1 0.5 in
let rra2 = rra_create CF_Average 100 10 0.5 in
Expand Down Expand Up @@ -328,12 +432,15 @@ let regression_suite =
; ("CA-329043 (1)", `Quick, test_ranges ca_329043_rrd_1)
; ("CA-329043 (2)", `Quick, test_ranges ca_329043_rrd_2)
; ("CA-329813", `Quick, test_ranges ca_329813_rrd)
; ("CA-404597 (1)", `Quick, gauge_rrd_CA_404597)
; ("CA-404597 (2)", `Quick, absolute_rrd_CA_404597)
]

let () =
Alcotest.run "Test RRD library"
[
("Gauge RRD", rrd_suite gauge_rrd)
("Absolute RRD", rrd_suite absolute_rrd)
; ("Gauge RRD", rrd_suite gauge_rrd)
; ("RRD for CA-322008", rrd_suite ca_322008_rrd)
; ("RRD for CA-329043", rrd_suite ca_329043_rrd_1)
; ("RRD for CA-329813", rrd_suite ca_329813_rrd)
Expand Down

0 comments on commit 43d01ca

Please sign in to comment.