Skip to content

Commit

Permalink
CA-404013: do not relock the mutex when backing up rrds (#6215)
Browse files Browse the repository at this point in the history
The point of using try_lock is to not get lock while trying to hold the
mutex.
Releasing it and blocking to lock it defeats the purpose of using
try_lock.
Reorganise the sequence to read and copy all the rrds first while under
the
locked mutex, release it, and then archive the copies.

This doesn't fix CA-404013 directly, so this can wait until the release
is cut. I'm opening otherwise I'll forget to open it when things are
unlocked.

I've run the smoke and validation tests, as well as checking that the
rrd files in the bugtools are well formed for hosts and vms (they are
created suing this function)
  • Loading branch information
robhoes authored Jan 10, 2025
2 parents 96f7cd1 + 77258a4 commit f9b5e52
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 36 deletions.
7 changes: 2 additions & 5 deletions ocaml/database/db_xml.ml
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,9 @@ module From = struct
D.warn "no lifetime information about %s.%s, ignoring" tblname k ;
false
in
if do_not_load then (
D.info
{|dropping column "%s.%s": it has been removed from the datamodel|}
tblname k ;
if do_not_load then
row
) else
else
let column_schema = Schema.Table.find k table_schema in
let value =
Schema.Value.unmarshal column_schema.Schema.Column.ty
Expand Down
52 changes: 21 additions & 31 deletions ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ let archive_rrd vm_uuid (remote_address : string option) : unit =
master host, exclusively. Any attempt to send the rrds to pools outside
the host will fail. *)
let backup_rrds (remote_address : string option) () : unit =
let __FUN = __FUNCTION__ in
let transport =
Option.map
(fun address ->
Expand All @@ -119,50 +120,39 @@ let backup_rrds (remote_address : string option) () : unit =
| Some address ->
Printf.sprintf "host %s" address
in
info "%s: trying to back up RRDs to %s" __FUNCTION__ destination ;
info "%s: trying to back up RRDs to %s" __FUN destination ;
let total_cycles = 5 in
let cycles_tried = ref 0 in
let host_uuid = Inventory.lookup Inventory._installation_uuid in
while !cycles_tried < total_cycles do
if Mutex.try_lock mutex then (
cycles_tried := total_cycles ;
let vrrds =
try Hashtbl.fold (fun k v acc -> (k, v.rrd) :: acc) vm_rrds []
with exn -> Mutex.unlock mutex ; raise exn
in
Mutex.unlock mutex ;
List.iter
(fun (uuid, rrd) ->
debug "%s: saving RRD for VM uuid=%s" __FUNCTION__ uuid ;
let rrd = with_lock mutex (fun () -> Rrd.copy_rrd rrd) in
archive_rrd_internal ~transport ~uuid ~rrd ()
)
vrrds ;
Mutex.lock mutex ;
let srrds =
try Hashtbl.fold (fun k v acc -> (k, v.rrd) :: acc) sr_rrds []
with exn -> Mutex.unlock mutex ; raise exn
let rrds_copy =
[
Hashtbl.fold
(fun k v acc -> ("VM", k, Rrd.copy_rrd v.rrd) :: acc)
vm_rrds []
; Hashtbl.fold
(fun k v acc -> ("SR", k, Rrd.copy_rrd v.rrd) :: acc)
sr_rrds []
; Option.fold ~none:[]
~some:(fun rrdi -> [("host", host_uuid, Rrd.copy_rrd rrdi.rrd)])
!host_rrd
]
|> List.concat
in
Mutex.unlock mutex ;

List.iter
(fun (uuid, rrd) ->
debug "%s: saving RRD for SR uuid=%s" __FUNCTION__ uuid ;
let rrd = with_lock mutex (fun () -> Rrd.copy_rrd rrd) in
(fun (cls, uuid, rrd) ->
debug "%s: saving RRD for %s uuid=%s" __FUN cls uuid ;
archive_rrd_internal ~transport ~uuid ~rrd ()
)
srrds ;
match !host_rrd with
| Some rrdi ->
debug "%s: saving RRD for host" __FUNCTION__ ;
let rrd = with_lock mutex (fun () -> Rrd.copy_rrd rrdi.rrd) in
archive_rrd_internal ~transport
~uuid:(Inventory.lookup Inventory._installation_uuid)
~rrd ()
| None ->
()
rrds_copy
) else (
cycles_tried := 1 + !cycles_tried ;
if !cycles_tried >= total_cycles then
warn "%s: Could not acquire RRD lock, skipping RRD backup" __FUNCTION__
warn "%s: Could not acquire RRD lock, skipping RRD backup" __FUN
else
Thread.delay 1.
)
Expand Down

0 comments on commit f9b5e52

Please sign in to comment.