Skip to content

Commit

Permalink
feature: Add option to rebalance VMs by their assigned resources. [#16]
Browse files Browse the repository at this point in the history
Fixes: #16
  • Loading branch information
gyptazy committed Jul 14, 2024
1 parent 3d634ef commit 664cd73
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 37 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
added:
- Add option to rebalance by assigned VM resources to avoid overprovisioning. [#16]
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ The following options can be set in the `proxlb.conf` file:
| api_pass | FooBar | Password for the API. |
| verify_ssl | 1 | Validate SSL certificates (1) or ignore (0). (default: 1) |
| method | memory | Defines the balancing method (default: memory) where you can use `memory`, `disk` or `cpu`. |
| mode | used | Rebalance by `used` resources (efficiency) or `assigned` (avoid overprovisioning) resources. (default: used)|
| balanciness | 10 | Value of the percentage of lowest and highest resource consumption on nodes may differ before rebalancing. (default: 10) |
| ignore_nodes | dummynode01,dummynode02,test* | Defines a comma separated list of nodes to exclude. |
| ignore_vms | testvm01,testvm02 | Defines a comma separated list of VMs to exclude. (`*` as suffix wildcard or tags are also supported) |
Expand All @@ -101,6 +102,7 @@ api_pass: FooBar
verify_ssl: 1
[balancing]
method: memory
mode: used
# Balanciness defines how much difference may be
# between the lowest & highest resource consumption
# of nodes before rebalancing will be done.
Expand Down
112 changes: 75 additions & 37 deletions proxlb
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def initialize_config_options(config_path):
proxmox_api_ssl_v = config['proxmox']['verify_ssl']
# Balancing
balancing_method = config['balancing'].get('method', 'memory')
balancing_mode = config['balancing'].get('mode', 'used')
balanciness = config['balancing'].get('balanciness', 10)
ignore_nodes = config['balancing'].get('ignore_nodes', None)
ignore_vms = config['balancing'].get('ignore_vms', None)
Expand All @@ -198,7 +199,7 @@ def initialize_config_options(config_path):

logging.info(f'{info_prefix} Configuration file loaded.')
return proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v, balancing_method, \
balanciness, ignore_nodes, ignore_vms, daemon, schedule, log_verbosity
balancing_mode, balanciness, ignore_nodes, ignore_vms, daemon, schedule, log_verbosity


def api_connect(proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v):
Expand Down Expand Up @@ -237,18 +238,24 @@ def get_node_statistics(api_object, ignore_nodes):
for node in api_object.nodes.get():
if node['status'] == 'online' and node['node'] not in ignore_nodes_list:
node_statistics[node['node']] = {}
node_statistics[node['node']]['cpu_total'] = node['maxcpu']
node_statistics[node['node']]['cpu_used'] = node['cpu']
node_statistics[node['node']]['cpu_free'] = int(node['maxcpu']) - int(node['cpu'])
node_statistics[node['node']]['cpu_free_percent'] = int((node_statistics[node['node']]['cpu_free']) / int(node['maxcpu']) * 100)
node_statistics[node['node']]['memory_total'] = node['maxmem']
node_statistics[node['node']]['memory_used'] = node['mem']
node_statistics[node['node']]['memory_free'] = int(node['maxmem']) - int(node['mem'])
node_statistics[node['node']]['memory_free_percent'] = int((node_statistics[node['node']]['memory_free']) / int(node['maxmem']) * 100)
node_statistics[node['node']]['disk_total'] = node['maxdisk']
node_statistics[node['node']]['disk_used'] = node['disk']
node_statistics[node['node']]['disk_free'] = int(node['maxdisk']) - int(node['disk'])
node_statistics[node['node']]['disk_free_percent'] = int((node_statistics[node['node']]['disk_free']) / int(node['maxdisk']) * 100)
node_statistics[node['node']]['cpu_total'] = node['maxcpu']
node_statistics[node['node']]['cpu_assigned'] = 0
node_statistics[node['node']]['cpu_assigned_percent'] = int((node_statistics[node['node']]['cpu_assigned']) / int(node_statistics[node['node']]['cpu_total']) * 100)
node_statistics[node['node']]['cpu_used'] = node['cpu']
node_statistics[node['node']]['cpu_free'] = int(node['maxcpu']) - int(node['cpu'])
node_statistics[node['node']]['cpu_free_percent'] = int((node_statistics[node['node']]['cpu_free']) / int(node['maxcpu']) * 100)
node_statistics[node['node']]['memory_total'] = node['maxmem']
node_statistics[node['node']]['memory_assigned'] = 0
node_statistics[node['node']]['memory_assigned_percent'] = int((node_statistics[node['node']]['memory_assigned']) / int(node_statistics[node['node']]['memory_total']) * 100)
node_statistics[node['node']]['memory_used'] = node['mem']
node_statistics[node['node']]['memory_free'] = int(node['maxmem']) - int(node['mem'])
node_statistics[node['node']]['memory_free_percent'] = int((node_statistics[node['node']]['memory_free']) / int(node['maxmem']) * 100)
node_statistics[node['node']]['disk_total'] = node['maxdisk']
node_statistics[node['node']]['disk_assigned'] = 0
node_statistics[node['node']]['disk_assigned_percent'] = int((node_statistics[node['node']]['disk_assigned']) / int(node_statistics[node['node']]['disk_total']) * 100)
node_statistics[node['node']]['disk_used'] = node['disk']
node_statistics[node['node']]['disk_free'] = int(node['maxdisk']) - int(node['disk'])
node_statistics[node['node']]['disk_free_percent'] = int((node_statistics[node['node']]['disk_free']) / int(node['maxdisk']) * 100)
logging.info(f'{info_prefix} Added node {node["node"]}.')

logging.info(f'{info_prefix} Created node statistics.')
Expand Down Expand Up @@ -307,6 +314,33 @@ def get_vm_statistics(api_object, ignore_vms):
return vm_statistics


def update_node_statistics(node_statistics, vm_statistics):
""" Update node statistics by VMs statistics. """
info_prefix = 'Info: [node-update-statistics]:'
warn_prefix = 'Warning: [node-update-statistics]:'

for vm, vm_value in vm_statistics.items():
node_statistics[vm_value['node_parent']]['cpu_assigned'] = node_statistics[vm_value['node_parent']]['cpu_assigned'] + int(vm_value['cpu_total'])
node_statistics[vm_value['node_parent']]['cpu_assigned_percent'] = (node_statistics[vm_value['node_parent']]['cpu_assigned'] / node_statistics[vm_value['node_parent']]['cpu_total']) * 100
node_statistics[vm_value['node_parent']]['memory_assigned'] = node_statistics[vm_value['node_parent']]['memory_assigned'] + int(vm_value['memory_total'])
node_statistics[vm_value['node_parent']]['memory_assigned_percent'] = (node_statistics[vm_value['node_parent']]['memory_assigned'] / node_statistics[vm_value['node_parent']]['memory_total']) * 100
node_statistics[vm_value['node_parent']]['disk_assigned'] = node_statistics[vm_value['node_parent']]['disk_assigned'] + int(vm_value['disk_total'])
node_statistics[vm_value['node_parent']]['disk_assigned_percent'] = (node_statistics[vm_value['node_parent']]['disk_assigned'] / node_statistics[vm_value['node_parent']]['disk_total']) * 100

if node_statistics[vm_value['node_parent']]['cpu_assigned_percent'] > 99:
logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for CPU by {int(node_statistics[vm_value["node_parent"]]["cpu_assigned_percent"])}%.')

if node_statistics[vm_value['node_parent']]['memory_assigned_percent'] > 99:
logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for memory by {int(node_statistics[vm_value["node_parent"]]["memory_assigned_percent"])}%.')

if node_statistics[vm_value['node_parent']]['disk_assigned_percent'] > 99:
logging.warning(f'{warn_prefix} Node {vm_value["node_parent"]} is overprovisioned for Disk by {int(node_statistics[vm_value["node_parent"]]["disk_assigned_percent"])}%.')

logging.info(f'{info_prefix} Updated node resource assignments by all VMs.')
logging.debug('node_statistics')
return node_statistics


def __validate_ignore_vm_wildcard(ignore_vms):
""" Validate if a wildcard is used for ignored VMs. """
if '*' in ignore_vms:
Expand Down Expand Up @@ -355,7 +389,7 @@ def __get_proxlb_groups(vm_tags):
return group_include, group_exclude, vm_ignore


def balancing_calculations(balancing_method, node_statistics, vm_statistics, balanciness):
def balancing_calculations(balancing_method, balancing_mode, node_statistics, vm_statistics, balanciness):
""" Calculate re-balancing of VMs on present nodes across the cluster. """
info_prefix = 'Info: [rebalancing-calculator]:'
balanciness = int(balanciness)
Expand All @@ -364,29 +398,29 @@ def balancing_calculations(balancing_method, node_statistics, vm_statistics, bal
rebalance = True
emergency_counter = 0

# Validate for a supported balancing method.
__validate_balancing_method(balancing_method)
# # Validate for a supported balancing method.
# __validate_balancing_method(balancing_method)

# Rebalance VMs with the highest resource usage to a new
# node until reaching the desired balanciness.
while rebalance and emergency_counter < 10000:
emergency_counter = emergency_counter + 1
rebalance = __validate_balanciness(balanciness, balancing_method, node_statistics)
# # Rebalance VMs with the highest resource usage to a new
# # node until reaching the desired balanciness.
# while rebalance and emergency_counter < 10000:
# emergency_counter = emergency_counter + 1
# rebalance = __validate_balanciness(balanciness, balancing_method, node_statistics)

if rebalance:
resource_highest_used_resources_vm, processed_vms = __get_most_used_resources_vm(balancing_method, vm_statistics, processed_vms)
resource_highest_free_resources_node = __get_most_free_resources_node(balancing_method, node_statistics)
node_statistics, vm_statistics = __update_resource_statistics(resource_highest_used_resources_vm, resource_highest_free_resources_node,
vm_statistics, node_statistics, balancing_method)
# if rebalance:
# resource_highest_used_resources_vm, processed_vms = __get_most_used_resources_vm(balancing_method, vm_statistics, processed_vms)
# resource_highest_free_resources_node = __get_most_free_resources_node(balancing_method, node_statistics)
# node_statistics, vm_statistics = __update_resource_statistics(resource_highest_used_resources_vm, resource_highest_free_resources_node,
# vm_statistics, node_statistics, balancing_method)

# Honour groupings for include and exclude groups for rebalancing VMs.
node_statistics, vm_statistics = __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method)
node_statistics, vm_statistics = __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method)
# # Honour groupings for include and exclude groups for rebalancing VMs.
# node_statistics, vm_statistics = __get_vm_tags_include_groups(vm_statistics, node_statistics, balancing_method)
# node_statistics, vm_statistics = __get_vm_tags_exclude_groups(vm_statistics, node_statistics, balancing_method)

# Remove VMs that are not being relocated.
vms_to_remove = [vm_name for vm_name, vm_info in vm_statistics.items() if 'node_rebalance' in vm_info and vm_info['node_rebalance'] == vm_info.get('node_parent')]
for vm_name in vms_to_remove:
del vm_statistics[vm_name]
# # Remove VMs that are not being relocated.
# vms_to_remove = [vm_name for vm_name, vm_info in vm_statistics.items() if 'node_rebalance' in vm_info and vm_info['node_rebalance'] == vm_info.get('node_parent')]
# for vm_name in vms_to_remove:
# del vm_statistics[vm_name]

logging.info(f'{info_prefix} Balancing calculations done.')
return node_statistics, vm_statistics
Expand Down Expand Up @@ -617,7 +651,7 @@ def main():
pre_validations(config_path)

# Parse global config.
proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v, balancing_method, \
proxmox_api_host, proxmox_api_user, proxmox_api_pass, proxmox_api_ssl_v, balancing_method, balancing_mode, \
balanciness, ignore_nodes, ignore_vms, daemon, schedule, log_verbosity = initialize_config_options(config_path)

# Overwrite logging handler with user defined log verbosity.
Expand All @@ -629,10 +663,14 @@ def main():

# Get metric & statistics for vms and nodes.
node_statistics = get_node_statistics(api_object, ignore_nodes)
vm_statistics = get_vm_statistics(api_object, ignore_vms)
vm_statistics = get_vm_statistics(api_object, ignore_vms)
node_statistics = update_node_statistics(node_statistics, vm_statistics)

print(node_statistics)
sys.exit(1)

# Calculate rebalancing of vms.
node_statistics_rebalanced, vm_statistics_rebalanced = balancing_calculations(balancing_method, node_statistics, vm_statistics, balanciness)
#node_statistics_rebalanced, vm_statistics_rebalanced = balancing_calculations(balancing_method, balancing_mode, node_statistics, vm_statistics, balanciness)

# Rebalance vms to new nodes within the cluster.
run_vm_rebalancing(api_object, vm_statistics_rebalanced, app_args)
Expand All @@ -645,4 +683,4 @@ def main():


if __name__ == '__main__':
main()
main()
1 change: 1 addition & 0 deletions proxlb.conf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ api_pass: FooBar
verify_ssl: 1
[balancing]
method: memory
mode: used
ignore_nodes: dummynode01,dummynode02
ignore_vms: testvm01,testvm02
[service]
Expand Down

0 comments on commit 664cd73

Please sign in to comment.