Skip to content

Commit

Permalink
Add training 2.1 ruleset (#263)
Browse files Browse the repository at this point in the history
  • Loading branch information
pgmpablo157321 authored Sep 28, 2022
1 parent ae5ae9a commit d12211b
Show file tree
Hide file tree
Showing 40 changed files with 2,863 additions and 37 deletions.
10 changes: 10 additions & 0 deletions mlperf_logging/benchmark_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@
'rnnt',
'unet3d',
],
'2.1': [
'bert',
'dlrm',
'maskrcnn',
'minigo',
'resnet',
'ssd',
'rnnt',
'unet3d',
],
},

'hpc': {
Expand Down
42 changes: 21 additions & 21 deletions mlperf_logging/compliance_checker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ To check a log file for compliance:

python -m mlperf_logging.compliance_checker [--config YAML] [--usage training/hpc] [--ruleset MLPERF_EDITION] FILENAME

By default, 2.0.0 training edition rules are used and the default config is set to `2.0.0/common.yaml`.
By default, 2.1.0 training edition rules are used and the default config is set to `2.1.0/common.yaml`.
This config will check all common keys and enqueue benchmark specific config to be checked as well.
Old training editions, still supported are 1.1.0, 1.0.0, 0.7.0 amd 0.6.0
Old training editions, still supported are 2.0.0, 1.1.0, 1.0.0, 0.7.0 and 0.6.0

To check hpc compliance rules (only 1.0.0 ruleset is supported), set --usage hpc --ruleset 1.0.0.

Expand All @@ -22,25 +22,25 @@ As log examples use [NVIDIA's training logs](https://github.com/mlperf/training_

### Existing config files for training submissions

2.0.0/common.yaml - currently the default config file, checks common fields complience and equeues benchmark-specific config file
2.0.0/closed_common.yaml - the common rules file for closed submissions. These rules apply to all benchmarks
2.0.0/open_common.yaml - the common rules file for open submissions. These rules apply to all benchmarks
2.0.0/closed_resnet.yaml - Per-benchmark rules, closed submissions.
2.0.0/closed_ssd.yaml
2.0.0/closed_minigo.yaml
2.0.0/closed_maskrcnn.yaml
2.0.0/closed_rnnt.yaml
2.0.0/closed_unet3d.yaml
2.0.0/closed_bert.yaml
2.0.0/closed_dlrm.yaml
2.0.0/open_resnet.yaml - Per-benchmark rules, closed submissions.
2.0.0/open_ssd.yaml
2.0.0/open__minigo.yaml
2.0.0/open_maskrcnn.yaml
2.0.0/open_rnnt.yaml
2.0.0/open_unet3d.yaml
2.0.0/open_bert.yaml
2.0.0/open_dlrm.yaml
2.1.0/common.yaml - currently the default config file, checks common fields complience and equeues benchmark-specific config file
2.1.0/closed_common.yaml - the common rules file for closed submissions. These rules apply to all benchmarks
2.1.0/open_common.yaml - the common rules file for open submissions. These rules apply to all benchmarks
2.1.0/closed_resnet.yaml - Per-benchmark rules, closed submissions.
2.1.0/closed_ssd.yaml
2.1.0/closed_minigo.yaml
2.1.0/closed_maskrcnn.yaml
2.1.0/closed_rnnt.yaml
2.1.0/closed_unet3d.yaml
2.1.0/closed_bert.yaml
2.1.0/closed_dlrm.yaml
2.1.0/open_resnet.yaml - Per-benchmark rules, closed submissions.
2.1.0/open_ssd.yaml
2.1.0/open__minigo.yaml
2.1.0/open_maskrcnn.yaml
2.1.0/open_rnnt.yaml
2.1.0/open_unet3d.yaml
2.1.0/open_bert.yaml
2.1.0/open_dlrm.yaml

### Existing config files for HPC submissions

Expand Down
2 changes: 1 addition & 1 deletion mlperf_logging/compliance_checker/mlp_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def get_parser():
parser.add_argument('--usage', type=str, default='training',
choices=usage_choices(),
help='what WG do the benchmarks come from')
parser.add_argument('--ruleset', type=str, default='2.0.0',
parser.add_argument('--ruleset', type=str, default='2.1.0',
choices=rule_choices(),
help='what version of rules to check the log against')
parser.add_argument('--config', type=str,
Expand Down
7 changes: 5 additions & 2 deletions mlperf_logging/compliance_checker/mlp_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .ruleset_060 import parse_file as parse_file_060
from .ruleset_070 import parse_file as parse_file_070
from .ruleset_100 import parse_file as parse_file_100
from .ruleset_100 import parse_file as parse_file_110
from .ruleset_100 import parse_file as parse_file_200
from .ruleset_110 import parse_file as parse_file_110
from .ruleset_200 import parse_file as parse_file_200
from .ruleset_210 import parse_file as parse_file_210


def parse_file(filename, ruleset='0.6.0'):
Expand All @@ -16,5 +17,7 @@ def parse_file(filename, ruleset='0.6.0'):
return parse_file_110(filename)
elif ruleset == '2.0.0':
return parse_file_200(filename)
elif ruleset == '2.1.0':
return parse_file_210(filename)
else:
raise Exception(f'Ruleset "{ruleset}" is not supported')
104 changes: 104 additions & 0 deletions mlperf_logging/compliance_checker/mlp_parser/ruleset_210.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
'''
Parses a text MLPerf log into a structured format.
'''

from __future__ import print_function

import collections
import json
import re
import sys

from io import open

LogLine = collections.namedtuple('LogLine', [
'full_string', # the complete line as a string
'timestamp', # seconds as a float, e.g. 1234.567
'key', # the string key
'value', # the parsed value associated with the tag, or None if no value
'lineno', # the line number in the file
])


TOKEN = ':::MLLOG '


def parse_line(line):
if not line.startswith(TOKEN):
return None

return json.loads(line[len(TOKEN):])


def string_to_logline(lineno, string):
''' Returns a LogLine or raises a ValueError '''
m = parse_line(string)

if m is None:
raise ValueError('does not match regex')

args = []
args.append(string) # full string

ts = float(m['time_ms']) # may raise error, e.g. "1.2.3"
# TODO check for weird values
args.append(ts)

args.append(m['key']) # key

j = { 'value': m['value'], 'metadata': m['metadata'] }
args.append(j)

args.append(lineno)
return LogLine(*args)


def parse_file(filename):
''' Reads a file by name and returns list of loglines and list of errors'''
with open(filename, encoding='latin-1') as f:
return parse_generator(f)


def strip_and_dedup(gen):
lines = []
for l in gen:
if TOKEN not in l:
continue
lines.append(re.sub(".*"+TOKEN, TOKEN, l))
return lines



def parse_generator(gen):
''' Reads a generator of lines and returns (loglines, errors)
The list of errors are any parsing issues as a tuple (str_line, error_msg)
'''
loglines = []
failed = []
for lineno, line in enumerate(strip_and_dedup(gen)):
line = line.strip()
try:
ll = string_to_logline(lineno, line)
loglines.append(ll)
except ValueError as e:
failed.append((line, str(e)))
return loglines, failed


if __name__ == '__main__':
if len(sys.argv) != 2:
print('usage: mlp_parser.py FILENAME')
print(' tests parsing on the file.')
sys.exit(1)

filename = sys.argv[1]
lines, errors = parse_file(filename)

print('Parsed {} log lines with {} errors.'.format(len(lines), len(errors)))

if len(errors) > 0:
print('Lines which failed to parse:')
for line, error in errors:
print(' Following line failed: {}'.format(error))
print(line)

Loading

0 comments on commit d12211b

Please sign in to comment.