LDOUBLEV · littletomatodonkey · Dec 4, 2021 · Dec 4, 2021
diff --git a/auto_log/autolog.py b/auto_log/autolog.py
@@ -25,22 +25,23 @@
 from .env import get_env_info
 from .utils import Times
 from .device import MemInfo, SubprocessGetMem
+from .smoothed_meter import SmoothedMeter
 
 
 class RunConfig:
-    def __init(self, 
+    def __init(self,
                run_devices="cpu",
                ir_optim=False,
                enable_tensorrt=False,
                enable_mkldnn=False,
                cpu_threads=0,
                enable_mem_optim=True):
-        
+
         self.run_devices = run_devices
         self.ir_optim = ir_optim
         self.enable_mkldnn = enable_mkldnn
         self.enable_tensorrt = enable_tensorrt
-        self.cpu_math_library_num_threads = self.cpu_threads
+        self.cpu_math_library_num_threads = cpu_threads
         self.enable_mem_optim = enable_mem_optim
 
 
@@ -52,10 +53,12 @@ def __init__(self,
                  data_shape,
                  save_path,
                  inference_config=None,
-                 pids=None, 
-                 process_name=None, 
-                 gpu_ids=None, 
-                 time_keys=['preprocess_time', 'inference_time', 'postprocess_time'],
+                 pids=None,
+                 process_name=None,
+                 gpu_ids=None,
+                 time_keys=[
+                     'preprocess_time', 'inference_time', 'postprocess_time'
+                 ],
                  warmup=0,
                  logger=None,
                  **kwargs):
@@ -71,17 +74,17 @@ def __init__(self,
         self.config_status = self.parse_config(self.paddle_infer_config)
 
         self.time_keys = time_keys
-        self.times = Times(keys=time_keys,warmup=warmup)
+        self.times = Times(keys=time_keys, warmup=warmup)
 
         self.get_paddle_info()
-        
+
         self.logger = self.init_logger() if logger is None else logger
 
         self.get_mem = SubprocessGetMem(pid=pids, gpu_id=gpu_ids)
         self.start_subprocess_get_mem()
         self.pids = pids
         self.gpu_ids = gpu_ids
-    
+
     def start_subprocess_get_mem(self):
         self.get_mem.get_mem_subprocess_run(0.2)
 
@@ -95,10 +98,10 @@ def end_subprocess_get_mem(self):
         else:
             self.gpu_infos = gpu_infos[str(self.gpu_ids)]
         return self.cpu_infos, self.gpu_infos
-    
+
     def init_logger(self, name='root', log_level=logging.DEBUG):
         log_file = self.save_path
-        
+
         logger = logging.getLogger(name)
 
         formatter = logging.Formatter(
@@ -128,7 +131,8 @@ def parse_config(self, config) -> dict:
         """
         config_status = {}
         if config is not None and type(config) is not dict:
-            config_status['runtime_device'] = "gpu" if config.use_gpu() else "cpu"
+            config_status['runtime_device'] = "gpu" if config.use_gpu(
+            ) else "cpu"
             config_status['ir_optim'] = config.ir_optim()
             config_status['enable_tensorrt'] = config.tensorrt_engine_enabled()
             config_status['precision'] = self.precision
@@ -137,12 +141,18 @@ def parse_config(self, config) -> dict:
                 'cpu_math_library_num_threads'] = config.cpu_math_library_num_threads(
                 )
         elif type(config) is dict:
-            config_status['runtime_device'] = config['runtime_device'] if 'runtime_device' in config else None
-            config_status['ir_optim'] = config['ir_optim'] if 'ir_optim' in config else None 
-            config_status['enable_tensorrt'] = config['enable_tensorrt'] if 'enable_tensorrt' in config else None
-            config_status['precision'] = config['precision'] if 'precision' in config else None
-            config_status['enable_mkldnn'] = config['enable_mkldnn'] if 'enable_mkldnn'  in config else None
-            config_status['cpu_math_library_num_threads'] = config['cpu_math_library_num_threads'] if 'cpu_math_library_num_threads' in config else None
+            config_status['runtime_device'] = config[
+                'runtime_device'] if 'runtime_device' in config else None
+            config_status['ir_optim'] = config[
+                'ir_optim'] if 'ir_optim' in config else None
+            config_status['enable_tensorrt'] = config[
+                'enable_tensorrt'] if 'enable_tensorrt' in config else None
+            config_status['precision'] = config[
+                'precision'] if 'precision' in config else None
+            config_status['enable_mkldnn'] = config[
+                'enable_mkldnn'] if 'enable_mkldnn' in config else None
+            config_status['cpu_math_library_num_threads'] = config[
+                'cpu_math_library_num_threads'] if 'cpu_math_library_num_threads' in config else None
         else:
             config_status['runtime_device'] = "None"
             config_status['ir_optim'] = "None"
@@ -178,27 +188,32 @@ def report(self, identifier=None):
 
         # report memory
         cpu_infos, gpu_infos = self.end_subprocess_get_mem()
-        
+
         cpu_rss_mb = self.cpu_infos['cpu_rss']
-        gpu_rss_mb = self.gpu_infos['memory.used'] if self.gpu_ids is not None else None      
-        gpu_util = self.gpu_infos['utilization.gpu'] if self.gpu_ids is not None else None 
+        gpu_rss_mb = self.gpu_infos[
+            'memory.used'] if self.gpu_ids is not None else None
+        gpu_util = self.gpu_infos[
+            'utilization.gpu'] if self.gpu_ids is not None else None
 
         # report env
         envs = get_env_info()
 
         self.logger.info("\n")
         self.logger.info(
             "---------------------- Env info ----------------------")
-            # envs['nvidia_driver_version'] envs['cudnn_version']envs['cuda_version'] envs['os_info']
+        # envs['nvidia_driver_version'] envs['cudnn_version']envs['cuda_version'] envs['os_info']
         self.logger.info(f"{identifier} OS_version: {envs['os_info']}")
         self.logger.info(f"{identifier} CUDA_version: {envs['cuda_version']}")
-        self.logger.info(f"{identifier} CUDNN_version: {envs['cudnn_version']}")
-        self.logger.info(f"{identifier} drivier_version: {envs['nvidia_driver_version']}")
+        self.logger.info(
+            f"{identifier} CUDNN_version: {envs['cudnn_version']}")
+        self.logger.info(
+            f"{identifier} drivier_version: {envs['nvidia_driver_version']}")
         self.logger.info(
             "---------------------- Paddle info ----------------------")
         self.logger.info(f"{identifier} paddle_version: {self.paddle_version}")
         self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}")
-        self.logger.info(f"{identifier} log_api_version: {self.autolog_version}")
+        self.logger.info(
+            f"{identifier} log_api_version: {self.autolog_version}")
         self.logger.info(
             "----------------------- Conf info -----------------------")
         self.logger.info(
@@ -211,7 +226,8 @@ def report(self, identifier=None):
             f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}"
         )
         self.logger.info(
-            f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}")
+            f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}"
+        )
         self.logger.info(
             f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}"
         )
@@ -229,8 +245,7 @@ def report(self, identifier=None):
         self.logger.info(
             f"{identifier} cpu_rss(MB): {cpu_rss_mb}, gpu_rss(MB): {gpu_rss_mb}, gpu_util: {gpu_util}%"
         )
-        self.logger.info(
-            f"{identifier} total time spent(s): {total_time_s}")
+        self.logger.info(f"{identifier} total time spent(s): {total_time_s}")
         self.logger.info(
             f"{identifier} preprocess_time(ms): {preprocess_time_ms}, inference_time(ms): {inference_time_ms}, postprocess_time(ms): {postprocess_time_ms}"
         )
@@ -259,9 +274,97 @@ def print_help(self):
             """)
 
 
+class TrainerLogger(object):
+    def __init__(self, output_dir=None, mode="train"):
+        self.mode = mode.upper()
+        self.logger = self.init_logger(output_dir)
+
+        self.smoothed_infos = dict()
+
+        self.init_fixed_fields()
+
+    def init_fixed_fields(self, ):
+        fixed_unit_maps = {
+            "avg_samples": "",
+            "avg_reader_cost": "sec",
+            "avg_batch_cost": "sec",
+        }
+        if self.mode == "TRAIN":
+            fixed_unit_maps["loss"] = ""
+
+        for key in fixed_unit_maps:
+            self.smoothed_infos[key] = SmoothedMeter(
+                key, unit=fixed_unit_maps[key])
+
+    def add_record_info(self, key, unit=""):
+        self.smoothed_infos[key] = SmoothedMeter(key, unit=unit)
+
+    def update(self, samples, reader_cost, run_cost, loss, **kwargs):
+        self.smoothed_infos["avg_samples"].update(samples)
+        self.smoothed_infos["avg_reader_cost"].update(reader_cost)
+        self.smoothed_infos["avg_batch_cost"].update(run_cost + reader_cost)
+        self.smoothed_infos["loss"].update(loss)
+        for key in kwargs:
+            self.smoothed_infos[key].update(kwargs[key])
+
+    def get_ips_info(self, scope="local"):
+        if self.smoothed_infos["avg_samples"].get_sum(scope=scope) == 0:
+            return ""
+        ips_info = "avg_ips: {:.7f} images/sec".format(
+            self.smoothed_infos["avg_samples"].get_sum(scope=scope) /
+            self.smoothed_infos["avg_batch_cost"].get_sum(scope=scope))
+        return ips_info
+
+    def log(self, epoch, batch_idx=None):
+        if batch_idx is None:
+            scope = "global"
+            epoch_info = "[end of epoch {}]".format(epoch)
+        else:
+            scope = "local"
+            epoch_info = "[epoch {}, batch_idx {}]".format(epoch, epoch)
+
+        mode_info = "[{}]".format(self.mode)
+        smooth_val_info = ", ".join([
+            self.smoothed_infos[key].log(scope=scope)
+            for key in self.smoothed_infos
+        ])
+        ips_info = self.get_ips_info(scope=scope)
+
+        msg = "{}{}{}, {}".format(epoch_info, mode_info, smooth_val_info,
+                                  ips_info)
+        self.logger.info(msg)
+        self.reset(scope=scope)
+
+    def reset(self, scope="local"):
+        for key in self.smoothed_infos:
+            self.smoothed_infos[key].reset(scope=scope)
+
+    def init_logger(self, save_path, name='root', log_level=logging.DEBUG):
+        os.makedirs(save_path, exist_ok=True)
+        log_file = os.path.join(save_path, "train.log")
+
+        logger = logging.getLogger(name)
+
+        formatter = logging.Formatter(
+            '[%(asctime)s] %(name)s %(levelname)s: %(message)s',
+            datefmt="%Y/%m/%d %H:%M:%S")
+
+        stream_handler = logging.StreamHandler(stream=sys.stdout)
+        stream_handler.setFormatter(formatter)
+        logger.addHandler(stream_handler)
+        if log_file is not None:
+            dir_name = os.path.dirname(log_file)
+            if len(dir_name) > 0 and not os.path.exists(dir_name):
+                os.makedirs(dir_name)
+            file_handler = logging.FileHandler(log_file, 'w')
+            file_handler.setFormatter(formatter)
+            logger.addHandler(file_handler)
+        logger.setLevel(log_level)
+        return logger
+
+
 # if __name__ == "__main__":
 #     get_os_info()
 #     print(envs['os_info'])
 #     get_cudnn_info()
 #     print(envs['cudnn_version'])
-
diff --git a/auto_log/smoothed_meter.py b/auto_log/smoothed_meter.py
@@ -0,0 +1,59 @@
+class SmoothedMeter(object):
+    def __init__(self, key, unit=""):
+        # basic info
+        self.key = key
+        self.unit = unit
+        # value and count for local average
+        self.local_sum = 0
+        self.local_count = 0
+        # value and count in all for global average
+        self.global_sum = 0
+        self.global_count = 0
+        self.fmt = None
+
+    def reset(self, scope="global"):
+        self.local_sum = 0
+        self.local_count = 0
+
+        if scope == "global":
+            self.global_sum = 0
+            self.global_count = 0
+        return
+
+    def update(self, value, n=1):
+        assert isinstance(value, (
+            int,
+            float)), "value can just be int or float bu got {}".format(value)
+        if self.fmt is None:
+            self.fmt = "{}: {:.7f} {}" if isinstance(value,
+                                                     float) else "{}: {} {}"
+        self.local_count += n
+        self.local_sum += value * n
+
+        self.global_sum += value * n
+        self.global_count += n
+
+    def _average(self, scope="local"):
+        assert self.fmt is not None, "update must be used before add_record"
+        if scope == "local":
+            if self.local_count <= 0:
+                return ""
+            else:
+                return self.fmt.format(self.key, self.local_sum /
+                                       self.local_count, self.unit)
+        else:
+            if self.global_count <= 0:
+                return ""
+            else:
+                return self.fmt.format(self.key, self.global_sum /
+                                       self.global_count, self.unit)
+
+    def get_sum(self, scope="local"):
+        if scope == "local":
+            return self.local_sum
+        else:
+            return self.global_sum
+
+    def log(self, scope="local"):
+        info = self._average(scope=scope)
+        return info