diff --git a/auto_log/autolog.py b/auto_log/autolog.py index c8865a8..70fbea3 100644 --- a/auto_log/autolog.py +++ b/auto_log/autolog.py @@ -25,22 +25,23 @@ from .env import get_env_info from .utils import Times from .device import MemInfo, SubprocessGetMem +from .smoothed_meter import SmoothedMeter class RunConfig: - def __init(self, + def __init(self, run_devices="cpu", ir_optim=False, enable_tensorrt=False, enable_mkldnn=False, cpu_threads=0, enable_mem_optim=True): - + self.run_devices = run_devices self.ir_optim = ir_optim self.enable_mkldnn = enable_mkldnn self.enable_tensorrt = enable_tensorrt - self.cpu_math_library_num_threads = self.cpu_threads + self.cpu_math_library_num_threads = cpu_threads self.enable_mem_optim = enable_mem_optim @@ -52,10 +53,12 @@ def __init__(self, data_shape, save_path, inference_config=None, - pids=None, - process_name=None, - gpu_ids=None, - time_keys=['preprocess_time', 'inference_time', 'postprocess_time'], + pids=None, + process_name=None, + gpu_ids=None, + time_keys=[ + 'preprocess_time', 'inference_time', 'postprocess_time' + ], warmup=0, logger=None, **kwargs): @@ -71,17 +74,17 @@ def __init__(self, self.config_status = self.parse_config(self.paddle_infer_config) self.time_keys = time_keys - self.times = Times(keys=time_keys,warmup=warmup) + self.times = Times(keys=time_keys, warmup=warmup) self.get_paddle_info() - + self.logger = self.init_logger() if logger is None else logger self.get_mem = SubprocessGetMem(pid=pids, gpu_id=gpu_ids) self.start_subprocess_get_mem() self.pids = pids self.gpu_ids = gpu_ids - + def start_subprocess_get_mem(self): self.get_mem.get_mem_subprocess_run(0.2) @@ -95,10 +98,10 @@ def end_subprocess_get_mem(self): else: self.gpu_infos = gpu_infos[str(self.gpu_ids)] return self.cpu_infos, self.gpu_infos - + def init_logger(self, name='root', log_level=logging.DEBUG): log_file = self.save_path - + logger = logging.getLogger(name) formatter = logging.Formatter( @@ -128,7 +131,8 @@ def parse_config(self, config) -> dict: """ config_status = {} if config is not None and type(config) is not dict: - config_status['runtime_device'] = "gpu" if config.use_gpu() else "cpu" + config_status['runtime_device'] = "gpu" if config.use_gpu( + ) else "cpu" config_status['ir_optim'] = config.ir_optim() config_status['enable_tensorrt'] = config.tensorrt_engine_enabled() config_status['precision'] = self.precision @@ -137,12 +141,18 @@ def parse_config(self, config) -> dict: 'cpu_math_library_num_threads'] = config.cpu_math_library_num_threads( ) elif type(config) is dict: - config_status['runtime_device'] = config['runtime_device'] if 'runtime_device' in config else None - config_status['ir_optim'] = config['ir_optim'] if 'ir_optim' in config else None - config_status['enable_tensorrt'] = config['enable_tensorrt'] if 'enable_tensorrt' in config else None - config_status['precision'] = config['precision'] if 'precision' in config else None - config_status['enable_mkldnn'] = config['enable_mkldnn'] if 'enable_mkldnn' in config else None - config_status['cpu_math_library_num_threads'] = config['cpu_math_library_num_threads'] if 'cpu_math_library_num_threads' in config else None + config_status['runtime_device'] = config[ + 'runtime_device'] if 'runtime_device' in config else None + config_status['ir_optim'] = config[ + 'ir_optim'] if 'ir_optim' in config else None + config_status['enable_tensorrt'] = config[ + 'enable_tensorrt'] if 'enable_tensorrt' in config else None + config_status['precision'] = config[ + 'precision'] if 'precision' in config else None + config_status['enable_mkldnn'] = config[ + 'enable_mkldnn'] if 'enable_mkldnn' in config else None + config_status['cpu_math_library_num_threads'] = config[ + 'cpu_math_library_num_threads'] if 'cpu_math_library_num_threads' in config else None else: config_status['runtime_device'] = "None" config_status['ir_optim'] = "None" @@ -178,10 +188,12 @@ def report(self, identifier=None): # report memory cpu_infos, gpu_infos = self.end_subprocess_get_mem() - + cpu_rss_mb = self.cpu_infos['cpu_rss'] - gpu_rss_mb = self.gpu_infos['memory.used'] if self.gpu_ids is not None else None - gpu_util = self.gpu_infos['utilization.gpu'] if self.gpu_ids is not None else None + gpu_rss_mb = self.gpu_infos[ + 'memory.used'] if self.gpu_ids is not None else None + gpu_util = self.gpu_infos[ + 'utilization.gpu'] if self.gpu_ids is not None else None # report env envs = get_env_info() @@ -189,16 +201,19 @@ def report(self, identifier=None): self.logger.info("\n") self.logger.info( "---------------------- Env info ----------------------") - # envs['nvidia_driver_version'] envs['cudnn_version']envs['cuda_version'] envs['os_info'] + # envs['nvidia_driver_version'] envs['cudnn_version']envs['cuda_version'] envs['os_info'] self.logger.info(f"{identifier} OS_version: {envs['os_info']}") self.logger.info(f"{identifier} CUDA_version: {envs['cuda_version']}") - self.logger.info(f"{identifier} CUDNN_version: {envs['cudnn_version']}") - self.logger.info(f"{identifier} drivier_version: {envs['nvidia_driver_version']}") + self.logger.info( + f"{identifier} CUDNN_version: {envs['cudnn_version']}") + self.logger.info( + f"{identifier} drivier_version: {envs['nvidia_driver_version']}") self.logger.info( "---------------------- Paddle info ----------------------") self.logger.info(f"{identifier} paddle_version: {self.paddle_version}") self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}") - self.logger.info(f"{identifier} log_api_version: {self.autolog_version}") + self.logger.info( + f"{identifier} log_api_version: {self.autolog_version}") self.logger.info( "----------------------- Conf info -----------------------") self.logger.info( @@ -211,7 +226,8 @@ def report(self, identifier=None): f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}" ) self.logger.info( - f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}") + f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}" + ) self.logger.info( f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}" ) @@ -229,8 +245,7 @@ def report(self, identifier=None): self.logger.info( f"{identifier} cpu_rss(MB): {cpu_rss_mb}, gpu_rss(MB): {gpu_rss_mb}, gpu_util: {gpu_util}%" ) - self.logger.info( - f"{identifier} total time spent(s): {total_time_s}") + self.logger.info(f"{identifier} total time spent(s): {total_time_s}") self.logger.info( f"{identifier} preprocess_time(ms): {preprocess_time_ms}, inference_time(ms): {inference_time_ms}, postprocess_time(ms): {postprocess_time_ms}" ) @@ -259,9 +274,97 @@ def print_help(self): """) +class TrainerLogger(object): + def __init__(self, output_dir=None, mode="train"): + self.mode = mode.upper() + self.logger = self.init_logger(output_dir) + + self.smoothed_infos = dict() + + self.init_fixed_fields() + + def init_fixed_fields(self, ): + fixed_unit_maps = { + "avg_samples": "", + "avg_reader_cost": "sec", + "avg_batch_cost": "sec", + } + if self.mode == "TRAIN": + fixed_unit_maps["loss"] = "" + + for key in fixed_unit_maps: + self.smoothed_infos[key] = SmoothedMeter( + key, unit=fixed_unit_maps[key]) + + def add_record_info(self, key, unit=""): + self.smoothed_infos[key] = SmoothedMeter(key, unit=unit) + + def update(self, samples, reader_cost, run_cost, loss, **kwargs): + self.smoothed_infos["avg_samples"].update(samples) + self.smoothed_infos["avg_reader_cost"].update(reader_cost) + self.smoothed_infos["avg_batch_cost"].update(run_cost + reader_cost) + self.smoothed_infos["loss"].update(loss) + for key in kwargs: + self.smoothed_infos[key].update(kwargs[key]) + + def get_ips_info(self, scope="local"): + if self.smoothed_infos["avg_samples"].get_sum(scope=scope) == 0: + return "" + ips_info = "avg_ips: {:.7f} images/sec".format( + self.smoothed_infos["avg_samples"].get_sum(scope=scope) / + self.smoothed_infos["avg_batch_cost"].get_sum(scope=scope)) + return ips_info + + def log(self, epoch, batch_idx=None): + if batch_idx is None: + scope = "global" + epoch_info = "[end of epoch {}]".format(epoch) + else: + scope = "local" + epoch_info = "[epoch {}, batch_idx {}]".format(epoch, epoch) + + mode_info = "[{}]".format(self.mode) + smooth_val_info = ", ".join([ + self.smoothed_infos[key].log(scope=scope) + for key in self.smoothed_infos + ]) + ips_info = self.get_ips_info(scope=scope) + + msg = "{}{}{}, {}".format(epoch_info, mode_info, smooth_val_info, + ips_info) + self.logger.info(msg) + self.reset(scope=scope) + + def reset(self, scope="local"): + for key in self.smoothed_infos: + self.smoothed_infos[key].reset(scope=scope) + + def init_logger(self, save_path, name='root', log_level=logging.DEBUG): + os.makedirs(save_path, exist_ok=True) + log_file = os.path.join(save_path, "train.log") + + logger = logging.getLogger(name) + + formatter = logging.Formatter( + '[%(asctime)s] %(name)s %(levelname)s: %(message)s', + datefmt="%Y/%m/%d %H:%M:%S") + + stream_handler = logging.StreamHandler(stream=sys.stdout) + stream_handler.setFormatter(formatter) + logger.addHandler(stream_handler) + if log_file is not None: + dir_name = os.path.dirname(log_file) + if len(dir_name) > 0 and not os.path.exists(dir_name): + os.makedirs(dir_name) + file_handler = logging.FileHandler(log_file, 'w') + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + logger.setLevel(log_level) + return logger + + # if __name__ == "__main__": # get_os_info() # print(envs['os_info']) # get_cudnn_info() # print(envs['cudnn_version']) - diff --git a/auto_log/smoothed_meter.py b/auto_log/smoothed_meter.py new file mode 100644 index 0000000..f9eeacb --- /dev/null +++ b/auto_log/smoothed_meter.py @@ -0,0 +1,59 @@ +class SmoothedMeter(object): + def __init__(self, key, unit=""): + # basic info + self.key = key + self.unit = unit + # value and count for local average + self.local_sum = 0 + self.local_count = 0 + # value and count in all for global average + self.global_sum = 0 + self.global_count = 0 + self.fmt = None + + def reset(self, scope="global"): + self.local_sum = 0 + self.local_count = 0 + + if scope == "global": + self.global_sum = 0 + self.global_count = 0 + return + + def update(self, value, n=1): + assert isinstance(value, ( + int, + float)), "value can just be int or float bu got {}".format(value) + if self.fmt is None: + self.fmt = "{}: {:.7f} {}" if isinstance(value, + float) else "{}: {} {}" + self.local_count += n + self.local_sum += value * n + + self.global_sum += value * n + self.global_count += n + + def _average(self, scope="local"): + assert self.fmt is not None, "update must be used before add_record" + if scope == "local": + if self.local_count <= 0: + return "" + else: + return self.fmt.format(self.key, self.local_sum / + self.local_count, self.unit) + else: + if self.global_count <= 0: + return "" + else: + return self.fmt.format(self.key, self.global_sum / + self.global_count, self.unit) + + def get_sum(self, scope="local"): + if scope == "local": + return self.local_sum + else: + return self.global_sum + + def log(self, scope="local"): + info = self._average(scope=scope) + return info