Skip to content

Commit

Permalink
Collectinfo and Feature list Improvement
Browse files Browse the repository at this point in the history
* TOOLS-1089: (ASADM) Modify collectinfo to collect multiple reports for 'ip -s link'.

* TOOLS-1108: (ASADM-HEALTHCHECK) Add check for IO scheduler.

* TOOLS-1130: (ASADM-HEALTHCHECK) Fix Health Check to catch high Disk Utilization.

* TOOLS-1137: (ASADM) Modify collectinfo to ignore first report from iostat output.

* TOOLS-1147: (ASADM) Add TLS and Security to Features list.

* TOOLS-1150: (ASADM) Modify to dump service and services in Json file.

* TOOLS-1168: (ASADM) Update to consider new record storage overhead.

* TOOLS-1175: (ASADM) Fix collectinfo to handle JSON dump error.

* TOOLS-1185: (ASADM) Update collectinfo to dump environment variables.

* TOOLS-1187: (ASADM) Update asadm help to caution against the use of health.

* TOOLS-1188: (ASADM) Add SC in features list.
  • Loading branch information
hbpatre committed Jul 12, 2018
1 parent 08062fd commit 902d894
Show file tree
Hide file tree
Showing 14 changed files with 462 additions and 153 deletions.
4 changes: 2 additions & 2 deletions asadm.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def __init__(self, admin_version, seeds, user=None, password=None, auth_mode=Aut
self.intro = str(self.ctrl.loghdlr)
else:
if user is not None:
if password == "prompt" or password is None:
if password == conf.DEFAULTPASSWORD:
if sys.stdin.isatty():
password = getpass.getpass("Enter Password:")
else:
Expand Down Expand Up @@ -492,7 +492,7 @@ def execute_asinfo_commands(commands_arg, seed, user=None, password=None, auth_m
return

if user is not None:
if password == "prompt":
if password == conf.DEFAULTPASSWORD:
if sys.stdin.isatty():
password = getpass.getpass("Enter Password:")
else:
Expand Down
31 changes: 25 additions & 6 deletions lib/basiccontroller.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from lib.controllerlib import (BaseController, CommandController, CommandHelp,
ShellException)
from lib.getcontroller import (GetConfigController, GetDistributionController,
GetPmapController, GetStatisticsController,
GetPmapController, GetStatisticsController, GetFeaturesController,
get_sindex_stats)
from lib.health.util import (create_health_input_dict, create_snapshot_key,
h_eval)
Expand Down Expand Up @@ -1035,13 +1035,17 @@ def _get_as_metadata(self):
xdr_builds = util.Future(self.cluster.info_XDR_build_version, nodes=self.nodes).start()
node_ids = util.Future(self.cluster.info_node, nodes=self.nodes).start()
ips = util.Future(self.cluster.info_ip_port, nodes=self.nodes).start()
endpoints = util.Future(self.cluster.info_service, nodes=self.nodes).start()
services = util.Future(self.cluster.info_services, nodes=self.nodes).start()
udf_data = util.Future(self.cluster.info_udf_list, nodes=self.nodes).start()

builds = builds.result()
editions = editions.result()
xdr_builds = xdr_builds.result()
node_ids = node_ids.result()
ips = ips.result()
endpoints = endpoints.result()
services = services.result()
udf_data = udf_data.result()

for nodeid in builds:
Expand All @@ -1051,6 +1055,8 @@ def _get_as_metadata(self):
self._get_meta_for_sec(xdr_builds, 'xdr_build', nodeid, metamap)
self._get_meta_for_sec(node_ids, 'node_id', nodeid, metamap)
self._get_meta_for_sec(ips, 'ip', nodeid, metamap)
self._get_meta_for_sec(endpoints, 'endpoints', nodeid, metamap)
self._get_meta_for_sec(services, 'services', nodeid, metamap)
self._get_meta_for_sec(udf_data, 'udf', nodeid, metamap)

return metamap
Expand Down Expand Up @@ -1104,8 +1110,9 @@ def _dump_in_json_file(self, as_logfile_prefix, dump):
self.aslogfile = as_logfile_prefix + 'ascinfo.json'

try:
json_dump = json.dumps(dump, indent=4, separators=(',', ':'))
with open(self.aslogfile, "w") as f:
f.write(json.dumps(dump, indent=4, separators=(',', ':')))
f.write(json_dump)
except Exception as e:
self.logger.error("Failed to write JSON file: " + str(e))

Expand Down Expand Up @@ -1567,7 +1574,7 @@ class FeaturesController(BasicCommandController):

def __init__(self):
self.modifiers = set(['with', 'like'])
self.getter = GetStatisticsController(self.cluster)
self.getter = GetFeaturesController(self.cluster)

def _do_default(self, line):

Expand Down Expand Up @@ -1601,7 +1608,8 @@ def do_scroll(self, line):
CliView.pager = CliView.SCROLL


@CommandHelp('Checks for common inconsistencies and print if there is any')
@CommandHelp('Checks for common inconsistencies and print if there is any.',
'This command is still in beta and its output should not be directly acted upon without further analysis.')
class HealthCheckController(BasicCommandController):
last_snapshot_collection_time = 0
last_snapshot_count = 0
Expand Down Expand Up @@ -1858,6 +1866,8 @@ def _do_default(self, line):
[("CLUSTER", cluster_name), ("NODE", None), ("LSB", None)]),
("environment", "SYSTEM", "ENVIRONMENT", True,
[("CLUSTER", cluster_name), ("NODE", None), ("ENVIRONMENT", None)]),
("scheduler", "SYSTEM", "SCHEDULER", False,
[("CLUSTER", cluster_name), ("NODE", None), (None, None), ("DEVICE", None)]),
]),
}
health_input = {}
Expand Down Expand Up @@ -2011,7 +2021,9 @@ def _do_default(self, line):
namespace_stats = util.Future(self.cluster.info_all_namespace_statistics, nodes=self.nodes).start()
set_stats = util.Future(self.cluster.info_set_statistics, nodes=self.nodes).start()

cluster_configs = util.Future(self.cluster.info_set_statistics, nodes=self.nodes).start()
service_configs = util.Future(self.cluster.info_get_config, nodes=self.nodes, stanza='service').start()
namespace_configs = util.Future(self.cluster.info_get_config, nodes=self.nodes, stanza='namespace').start()
cluster_configs = util.Future(self.cluster.info_get_config, nodes=self.nodes, stanza='cluster').start()

os_version = self.cluster.info_system_statistics(nodes=self.nodes, default_user=default_user, default_pwd=default_pwd, default_ssh_key=default_ssh_key,
default_ssh_port=default_ssh_port, credential_file=credential_file, commands=["lsb"], collect_remote_data=enable_ssh)
Expand All @@ -2024,17 +2036,22 @@ def _do_default(self, line):
service_stats = service_stats.result()
namespace_stats = namespace_stats.result()
set_stats = set_stats.result()
service_configs = service_configs.result()
namespace_configs = namespace_configs.result()
cluster_configs = cluster_configs.result()
server_version = server_version.result()
server_edition = server_edition.result()

metadata = {}
metadata["server_version"] = {}
metadata["server_build"] = {}

for node, version in server_version.iteritems():
if not version or isinstance(version, Exception):
continue

metadata["server_build"][node] = version

if node in server_edition and server_edition[node] and not isinstance(server_edition[node], Exception):
if 'enterprise' in server_edition[node].lower():
metadata["server_version"][node] = "E-%s" % (str(version))
Expand Down Expand Up @@ -2075,5 +2092,7 @@ def _do_default(self, line):
metadata["os_version"] = os_version

return util.Future(self.view.print_summary, common.create_summary(service_stats=service_stats, namespace_stats=namespace_stats,
set_stats=set_stats, metadata=metadata, cluster_configs=cluster_configs),
set_stats=set_stats, metadata=metadata,
service_configs=service_configs, ns_configs=namespace_configs,
cluster_configs=cluster_configs),
list_view=enable_list_view)
42 changes: 22 additions & 20 deletions lib/client/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,24 +131,26 @@ def __init__(self, address, port=3000, tls_name=None, timeout=5,
self.sys_default_pwd = None
self.sys_default_ssh_key = None
self.sys_cmds = [
('hostname', ['hostname -I', 'hostname']),
('top', ['top -n1 -b', 'top -l 1']),
('lsb', ['lsb_release -a', 'ls /etc|grep release|xargs -I f cat /etc/f']),
('meminfo', ['cat /proc/meminfo', 'vmstat -s']),
('interrupts', ['cat /proc/interrupts', '']),
('iostat', ['iostat -x 1 1', '']),
('dmesg', ['dmesg -T', 'dmesg']),
('limits', ['sudo pgrep asd | xargs -I f sh -c "sudo cat /proc/f/limits"', '']),
('lscpu', ['lscpu', '']),
('sysctlall', ['sudo sysctl vm fs', '']),
('iptables', ['sudo iptables -S', '']),
('hdparm', ['sudo fdisk -l |grep Disk |grep dev | cut -d " " -f 2 | cut -d ":" -f 1 | xargs sudo hdparm -I 2>/dev/null', '']),
('df', ['df -h', '']),
('free-m', ['free -m', '']),
('uname', ['uname -a', '']),
# format: (command name as in parser, ignore error, command list)
('hostname', False, ['hostname -I', 'hostname']),
('top', False, ['top -n1 -b', 'top -l 1']),
('lsb', False, ['lsb_release -a', 'ls /etc|grep release|xargs -I f cat /etc/f']),
('meminfo', False, ['cat /proc/meminfo', 'vmstat -s']),
('interrupts', False, ['cat /proc/interrupts', '']),
('iostat', False, ['iostat -y -x 5 1', '']),
('dmesg', False, ['dmesg -T', 'dmesg']),
('limits', False, ['sudo pgrep asd | xargs -I f sh -c "sudo cat /proc/f/limits"', '']),
('lscpu', False, ['lscpu', '']),
('sysctlall', False, ['sudo sysctl vm fs', '']),
('iptables', False, ['sudo iptables -S', '']),
('hdparm', False, ['sudo fdisk -l |grep Disk |grep dev | cut -d " " -f 2 | cut -d ":" -f 1 | xargs sudo hdparm -I 2>/dev/null', '']),
('df', False, ['df -h', '']),
('free-m', False, ['free -m', '']),
('uname', False, ['uname -a', '']),
('scheduler', True, ['ls /sys/block/{sd*,xvd*,nvme*}/queue/scheduler |xargs -I f sh -c "echo f; cat f;"', '']),

# Todo: Add more commands for other cloud platform detection
('environment', ['curl -m 1 -s http://169.254.169.254/1.0/', 'uname']),
('environment', False, ['curl -m 1 -s http://169.254.169.254/1.0/', 'uname']),
]

# hack, _key needs to be defines before info calls... but may have
Expand Down Expand Up @@ -1287,7 +1289,7 @@ def info_system_statistics(self, default_user=None, default_pwd=None, default_ss
if commands:
cmd_list = copy.deepcopy(commands)
else:
cmd_list = [_key for _key, cmds in self.sys_cmds]
cmd_list = [_key for _key, _, _ in self.sys_cmds]

if self.localhost:
return self._get_localhost_system_statistics(cmd_list)
Expand All @@ -1303,13 +1305,13 @@ def info_system_statistics(self, default_user=None, default_pwd=None, default_ss
def _get_localhost_system_statistics(self, commands):
sys_stats = {}

for _key, cmds in self.sys_cmds:
for _key, ignore_error, cmds in self.sys_cmds:
if _key not in commands:
continue

for cmd in cmds:
o, e = util.shell_command([cmd])
if e or not o:
if (e and not ignore_error) or not o:
continue
else:
parse_system_live_command(_key, o, sys_stats)
Expand Down Expand Up @@ -1512,7 +1514,7 @@ def _get_remote_host_system_statistics(self, commands):
continue

try:
for _key, cmds in self.sys_cmds:
for _key, _, cmds in self.sys_cmds:
if _key not in commands:
continue

Expand Down
25 changes: 23 additions & 2 deletions lib/collectinfo_parser/full_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,11 @@ def _get_section_list_for_parsing(imap, available_section):
final_section_list = []
imap_section_list = []
imap_section_list.extend(DERIVED_SECTION_LIST)

if 'section_ids' not in imap:
logger.warning("`section_ids` section missing in section_json.")
return final_section_list

for section_id in imap['section_ids']:
section = SECTION_FILTER_LIST[section_id]
if 'final_section_name' in section:
Expand All @@ -240,6 +242,7 @@ def _get_section_list_for_parsing(imap, available_section):
else:
sec_name = section['final_section_name']
imap_section_list.append(sec_name)

final_section_list = list(set(imap_section_list).intersection(available_section))
return final_section_list

Expand Down Expand Up @@ -567,11 +570,26 @@ def _add_missing_dmesg_data(sys_map, parsed_map, timestamps, node, node_ip_mappi
dmesg_map[node]["dmesg"] = sys_map["dmesg"]
_merge_nodelevel_map_to_mainmap(parsed_map, dmesg_map, timestamps, node_ip_mapping, ["sys_stat"])

def _add_missing_scheduler_data(sys_map, parsed_map, timestamps, node, node_ip_mapping, ignore_exception):
"""
Add missing IO scheduler details into parsed_map.
"""

if not sys_map or "scheduler" not in sys_map:
return

scheduler_map = {}
scheduler_map[node] = {}
scheduler_map[node]["scheduler"] = sys_map["scheduler"]
_merge_nodelevel_map_to_mainmap(parsed_map, scheduler_map, timestamps, node_ip_mapping, ["sys_stat"])

# Format: [version, key to identify version changes, parent keys of key till node]
new_additional_field_pointers = [
[1, "node_id", ["as_stat", "meta_data"]],
[2, "dmesg", ["sys_stat"]],
[3, "latency", ["as_stat"]],
[3, "endpoints", ["as_stat", "meta_data"]],
[4, "latency", ["as_stat"]],
]

def _find_missing_data_version(cinfo_map):
Expand Down Expand Up @@ -641,10 +659,13 @@ def _add_missing_data(imap, parsed_map, parsed_conf_map={}, timestamps=[], missi
_merge_nodelevel_map_to_mainmap(parsed_map, meta_map, timestamps, node_to_ip_mapping, ["as_stat", "meta_data"])
_add_missing_as_data(imap, parsed_map, timestamps, node_to_ip_mapping, ignore_exception)
_add_missing_histogram_data(imap, parsed_map, timestamps, node_to_ip_mapping, ignore_exception)
_add_missing_endpoints_data(imap, parsed_map, timestamps, node_to_ip_mapping, ignore_exception)

if missing_version <= 2:
_add_missing_dmesg_data(sys_map, parsed_map, timestamps, node, node_to_ip_mapping, ignore_exception)

if missing_version <= 3:
_add_missing_scheduler_data(sys_map, parsed_map, timestamps, node, node_to_ip_mapping, ignore_exception)
_add_missing_endpoints_data(imap, parsed_map, timestamps, node_to_ip_mapping, ignore_exception)

if missing_version <= 4:
_add_missing_latency_data(imap, parsed_map, timestamps, node_to_ip_mapping, ignore_exception)
5 changes: 3 additions & 2 deletions lib/collectinfo_parser/section_filter_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,8 @@
'ID_100': {
'enable': True,
'raw_section_name': 'scheduler_info',
'regex_new': 'ls /sys/block/{sd[*],xvd[*]}/queue/scheduler [|]xargs -I f sh -c "echo f; cat f;"'
'final_section_name': 'scheduler',
'regex_new': 'ls /sys/block/{.*}/queue/scheduler [|]xargs -I f sh -c "echo f; cat f;"'
# 'parser_func'
},
'ID_101': {
Expand Down Expand Up @@ -819,7 +820,7 @@
'statistics.xdr', 'config', 'config.dc', 'config.xdr', 'config.cluster']
# Other Available sections ['latency', 'sindex_info', 'features']

SYS_SECTION_NAME_LIST = ['top', 'lsb', 'uname', 'meminfo',
SYS_SECTION_NAME_LIST = ['top', 'lsb', 'uname', 'meminfo', 'scheduler',
'hostname', 'df', 'free-m', 'iostat', 'interrupts', 'ip_addr', 'dmesg']
# Meta data have all meta info (asd_build, xdr_build, cluster_name)
DERIVED_SECTION_LIST = ['features']
Expand Down
60 changes: 58 additions & 2 deletions lib/collectinfo_parser/sys_section_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,15 @@ def parse_sys_section(section_list, imap, parsed_map):
elif section == 'environment':
_parse_environment_section(imap, parsed_map)

elif section == 'scheduler':
_parse_scheduler_section(imap, parsed_map)

else:
logger.warning(
"Section unknown, can not be parsed. Check SYS_SECTION_NAME_LIST. Section: " + section)

logger.info(
"Converting basic raw string vals to original vals. Sections: " + str(section_list))
logger.info("Converting basic raw string vals to original vals. Sections: " + str(section_list))

for section in section_list:
if section in parsed_map:
param_map = {section: parsed_map[section]}
Expand Down Expand Up @@ -790,6 +793,59 @@ def _parse_environment_section(imap, parsed_map):

parsed_map[final_section_name]["platform"] = platform

def _parse_scheduler_section(imap, parsed_map):
sec_id = 'ID_100'
raw_section_name, final_section_name, _ = get_section_name_from_id(sec_id)

logger.info("Parsing section: " + final_section_name)

if not is_valid_section(imap, raw_section_name, final_section_name):
return

scheduler_section = imap[raw_section_name][0]

schedulers = []
scheduler = ""
device = ""
for line in scheduler_section:
line = line.strip()
if not line or "cannot access" in line:
continue

if "scheduler" in line:
l = line.split("/sys/block/")
if not l:
continue

l = l[1].split("/queue/scheduler")
if not l:
continue

device = l[0].strip()
continue

if not device:
# device not found yet, no need to proceed with this line
continue

# find scheduler
for s in line.split():
if not s:
continue
if s.startswith('[') and s.endswith(']'):
scheduler = s[1:len(s)-1].lower()

# if scheduler found, set details
if scheduler:
schedulers.append({"device":device, "scheduler": scheduler})
scheduler = ""
device = ""

if final_section_name not in parsed_map:
parsed_map[final_section_name] = {}

parsed_map[final_section_name]["scheduler_stat"] = schedulers


### "iostat -x 1 10\n",
### "Linux 2.6.32-279.el6.x86_64 (bfs-dl360g8-02) \t02/02/15 \t_x86_64_\t(24 CPU)\n",
Expand Down
Loading

0 comments on commit 902d894

Please sign in to comment.