Skip to content

Commit

Permalink
Compiler type specific weights and custom weights (#129)
Browse files Browse the repository at this point in the history
  • Loading branch information
snuffysasa authored Jun 19, 2022
1 parent c59030b commit 9215b34
Show file tree
Hide file tree
Showing 12 changed files with 254 additions and 62 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ You'll first need to install a couple of prerequisites: `python3 -m pip install
The permuter expects as input one or more directory containing:
- a .c file with a single function,
- a .o file to match,
- a .sh file that compiles the .c file.
- a .sh file that compiles the .c file,
- a .toml file specifying settings.

For projects with a properly configured makefile, you should be able to set these up by running
```
Expand All @@ -35,6 +36,10 @@ build_system = "ninja"
```
Then `import.py` should work as expected if `build.ninja` is at the root of the project.

All of the possible randomizations are assigned a weight value that affects the frequency with which the randomization is chosen.
The default set of weights is specified in `default_weights.toml` and vary based on the targeted compiler.
These weights can be overridden by modifying `settings.toml` in the input directory.

The .c file may be modified with any of the following macros which affect manual permutation:

- `PERM_GENERAL(a, b, ...)` expands to any of `a`, `b`, ...
Expand Down
1 change: 1 addition & 0 deletions USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ does all this for you. See README.md for more details.

* create a directory that will contain all of the input files for the invokation
* put a compile command into `<dir>/compile.sh` (see e.g. `compile_example.sh`; it will be invoked as `./compile.sh input.c -o output.o`)
* optionally create a toml file at `<dir>/settings.toml` (see `example_settings.toml` for reference)
* `gcc -E -P -I header_dir -D'__attribute__(x)=' orig_c_file.c > <dir>/base.c`
* `python3 strip_other_fns.py <dir>/base.c func_name`
* put asm for `func_name` into `<dir>/target.s`, with the following header:
Expand Down
45 changes: 45 additions & 0 deletions default_weights.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# These are the default weights for each randomization pass for each compiler type.
# They can be overridden by settings.toml in the non-matching directory.

[base]
perm_temp_for_expr = 100
perm_expand_expr = 20
perm_reorder_stmts = 20
perm_add_mask = 15
perm_xor_zero = 10
perm_cast_simple = 10
perm_refer_to_var = 10
perm_float_literal = 3
perm_randomize_internal_type = 10
perm_randomize_external_type = 5
perm_randomize_function_type = 5
perm_split_assignment = 10
perm_sameline = 3
perm_ins_block = 10
perm_struct_ref = 10
perm_empty_stmt = 10
perm_condition = 10
perm_mult_zero = 5
perm_dummy_comma_expr = 5
perm_add_self_assignment = 5
perm_commutative = 5
perm_add_sub = 5
perm_inequalities = 5
perm_compound_assignment = 5
perm_remove_ast = 5
perm_duplicate_assignment = 5
perm_chain_assignment = 5
perm_pad_var_decl = 1

[ido]
perm_float_literal = 10
perm_sameline = 10

[mwcc]
perm_compound_assignment = 0.5
perm_empty_stmt = 0.5
perm_struct_ref = 0.5
perm_sameline = 0.5
perm_xor_zero = 0.5

[gcc]
5 changes: 5 additions & 0 deletions example_settings.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
func_name = "func_800123456"
compiler_type = "ido" # examples: base, ido, mwcc, gcc

[weight_overrides]
perm_temp_for_expr = 100
37 changes: 34 additions & 3 deletions import.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import subprocess
import sys
import toml
from typing import Callable, Dict, List, Match, Mapping, Optional, Pattern, Set, Tuple
from typing import Callable, Dict, List, Match, Mapping, Optional, Pattern, Tuple
import urllib.request
import urllib.parse

from src import ast_util
from src.compiler import Compiler
from src.error import CandidateConstructionFailure
from src.helpers import get_default_randomization_weights

is_macos = platform.system() == "Darwin"

Expand Down Expand Up @@ -644,6 +645,23 @@ def compile_base(compile_script: str, source: str, c_file: str, out_file: str) -
print("Warning: failed to compile .c file.")


def create_write_settings_toml(
func_name: str, compiler_type: str, filename: str
) -> None:

rand_weights = get_default_randomization_weights(compiler_type)

with open(filename, "w", encoding="utf-8") as f:
f.write(f'func_name = "{func_name}"\n')
f.write(f'compiler_type = "{compiler_type}"\n\n')

f.write("# uncomment lines below to customize the weights\n")
f.write("# see README.md\n")
f.write("[weight_overrides]\n")
for key, weight in rand_weights.items():
f.write(f"# {key} = {weight}\n")


def write_to_file(cont: str, filename: str) -> None:
with open(filename, "w", encoding="utf-8") as f:
f.write(cont)
Expand Down Expand Up @@ -717,11 +735,24 @@ def main() -> None:
settings = toml.load(f)
break

compiler_type = settings.get("compiler_type", "base")
build_system = settings.get("build_system", "make")
compiler = settings.get("compiler_command")
assembler = settings.get("assembler_command")
make_flags = args.make_flags

compiler_type = settings.get("compiler_type")
if compiler_type is not None:
assert isinstance(compiler_type, str)
print(f"Compiler type: {compiler_type}")
else:
compiler_type = "base"
print(
"Warning: Compiler type is missing from this project's permuter settings.\n"
"Defaulting to base compiler randomization settings. For best permutation results,\n"
"please set 'compiler_type' in this project's permuter_settings.toml."
)

func_name, asm_cont = parse_asm(args.asm_file)
print(f"Function name: {func_name}")

Expand Down Expand Up @@ -783,11 +814,11 @@ def main() -> None:
target_s_file = f"{dirname}/target.s"
target_o_file = f"{dirname}/target.o"
compile_script = f"{dirname}/compile.sh"
func_name_file = f"{dirname}/function.txt"
settings_file = f"{dirname}/settings.toml"

try:
write_to_file(source, base_c_file)
write_to_file(func_name, func_name_file)
create_write_settings_toml(func_name, compiler_type, settings_file)
write_compile_command(compiler, root_dir, compile_script)
write_asm(asm_cont, target_s_file)
compile_asm(assembler, root_dir, target_s_file, target_o_file)
Expand Down
2 changes: 2 additions & 0 deletions permuter_settings_example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

build_system = "ninja"

compiler_type = "ido"

[preserve_macros]
"g[DS]P.*" = "void"
"gDma.*" = "void"
Expand Down
10 changes: 7 additions & 3 deletions src/candidate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import copy
from dataclasses import dataclass, field
import functools
from typing import Optional, Tuple
from typing import Mapping, Optional, Tuple

from pycparser import c_ast as ca

Expand Down Expand Up @@ -54,7 +54,11 @@ def _cached_shared_ast(

@staticmethod
def from_source(
source: str, eval_state: EvalState, fn_name: str, rng_seed: int
source: str,
eval_state: EvalState,
fn_name: str,
randomization_weights: Mapping[str, float],
rng_seed: int,
) -> "Candidate":
# Use the same AST for all instances of the same original source, but
# with the target function deeply copied. Since we never change the
Expand All @@ -70,7 +74,7 @@ def from_source(
ast=ast,
fn_index=fn_index,
rng_seed=rng_seed,
randomizer=Randomizer(rng_seed),
randomizer=Randomizer(randomization_weights, rng_seed),
)

def randomize_ast(self) -> None:
Expand Down
34 changes: 33 additions & 1 deletion src/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
from typing import NoReturn
import toml
from typing import NoReturn, Mapping, Dict
from .error import CandidateConstructionFailure


def plural(n: int, noun: str) -> str:
Expand Down Expand Up @@ -29,3 +31,33 @@ def trim_source(source: str, fn_name: str) -> str:
if new_index != -1:
return source[new_index:]
return source


def get_default_randomization_weights(compiler_type: str) -> Mapping[str, float]:
weights: Dict[str, float] = {}
with open("default_weights.toml") as f:
all_weights: Mapping[str, object] = toml.load(f)

base_weights = all_weights.get("base", {})
assert isinstance(base_weights, Mapping)
if compiler_type not in all_weights:
raise CandidateConstructionFailure(
f"Unable to find compiler type {compiler_type} in default_weights.toml"
)
compiler_weights = all_weights[compiler_type]
assert isinstance(compiler_weights, Mapping)

for key, weight in base_weights.items():
weight = compiler_weights.get(key, weight)
assert isinstance(weight, (int, float))
weights[key] = float(weight)

return weights


def get_settings(dir: str) -> Mapping[str, object]:
try:
with open(os.path.join(dir, "settings.toml")) as f:
return toml.load(f)
except FileNotFoundError:
return {}
53 changes: 38 additions & 15 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,19 @@
import sys
import threading
import time
from typing import (
Callable,
Dict,
Iterable,
Iterator,
List,
Optional,
Tuple,
)

from typing import Callable, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple

from .candidate import CandidateResult
from .compiler import Compiler
from .error import CandidateConstructionFailure
from .helpers import plural, static_assert_unreachable, trim_source
from .helpers import (
get_settings,
get_default_randomization_weights,
plural,
static_assert_unreachable,
trim_source,
)
from .net.client import start_client
from .net.core import ServerError, connect, enable_debug_mode, MAX_PRIO, MIN_PRIO
from .permuter import (
Expand Down Expand Up @@ -291,12 +290,35 @@ def run_inner(options: Options, heartbeat: Callable[[], None]) -> List[int]:
print(f"{compile_cmd} must be marked executable.", file=sys.stderr)
sys.exit(1)

settings: Mapping[str, object] = get_settings(d)

compiler_type = settings.get("compiler_type", "base")
assert isinstance(compiler_type, str)

compiler_weights = get_default_randomization_weights(compiler_type)
weight_overrides = settings.get("weight_overrides", {})
assert isinstance(weight_overrides, Mapping)
final_weights: Dict[str, float] = {}

# Merge compiler weights with user specified weights.
for rand_type, compiler_weight in compiler_weights.items():
if rand_type in weight_overrides:
assert isinstance(weight_overrides[rand_type], (int, float))
final_weights[rand_type] = float(weight_overrides[rand_type])
else:
final_weights[rand_type] = compiler_weight

fn_name: Optional[str] = None
try:
with open(os.path.join(d, "function.txt"), encoding="utf-8") as f:
fn_name = f.read().strip()
except FileNotFoundError:
pass
if "func_name" in settings:
assert isinstance(settings["func_name"], str)
fn_name = settings["func_name"]

if not fn_name:
try:
with open(os.path.join(d, "function.txt"), encoding="utf-8") as f:
fn_name = f.read().strip()
except FileNotFoundError:
pass

if fn_name:
print(f"{base_c} ({fn_name})")
Expand All @@ -321,6 +343,7 @@ def run_inner(options: Options, heartbeat: Callable[[], None]) -> List[int]:
scorer,
base_c,
c_source,
final_weights,
force_seed=force_seed,
force_rng_seed=force_rng_seed,
keep_prob=options.keep_prob,
Expand Down
19 changes: 15 additions & 4 deletions src/net/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,19 @@
import threading
import time
import traceback
from typing import Counter, Dict, List, Optional, Set, Tuple, Union
from typing import Counter, Dict, List, Mapping, Optional, Set, Tuple, Union
import zlib

from nacl.secret import SecretBox

from ..candidate import CandidateResult
from ..compiler import Compiler
from ..error import CandidateConstructionFailure
from ..helpers import exception_to_string, static_assert_unreachable
from ..helpers import (
exception_to_string,
get_default_randomization_weights,
static_assert_unreachable,
)
from ..permuter import EvalError, EvalResult, Permuter
from ..profiler import Profiler
from ..scorer import Scorer
Expand Down Expand Up @@ -63,7 +67,9 @@ def _setup_port(secret: bytes) -> Port:
return port


def _create_permuter(data: PermuterData) -> Permuter:
def _create_permuter(
data: PermuterData, randomization_weights: Mapping[str, float]
) -> Permuter:
fd, path = mkstemp(suffix=".o", prefix="permuter", text=False)
try:
with os.fdopen(fd, "wb") as f:
Expand All @@ -88,6 +94,7 @@ def _create_permuter(data: PermuterData) -> Permuter:
scorer=scorer,
source_file=data.filename,
source=data.source,
randomization_weights=randomization_weights,
force_seed=None,
force_rng_seed=None,
keep_prob=data.keep_prob,
Expand Down Expand Up @@ -308,6 +315,10 @@ def main() -> None:
remaining_work: Counter[str] = Counter()
should_remove: Set[str] = set()
permuters: Dict[str, Permuter] = {}

# TODO pass weights across the network
randomization_weights = get_default_randomization_weights("base")

timestamp = 0

def try_remove(perm_id: str) -> None:
Expand Down Expand Up @@ -338,7 +349,7 @@ def try_remove(perm_id: str) -> None:
try:
# Construct a permuter. This involves a compilation on the main
# thread, which isn't great but we can live with it for now.
permuter = _create_permuter(item.data)
permuter = _create_permuter(item.data, randomization_weights)

if permuter.base_score != item.data.base_score:
_remove_permuter(permuter)
Expand Down
Loading

0 comments on commit 9215b34

Please sign in to comment.