diff --git a/README.md b/README.md index 4f85633d85..aa59531f0c 100644 --- a/README.md +++ b/README.md @@ -312,6 +312,6 @@ There are README files in many subdirectories, like * [oilshell.zulipchat.com][] is for any kind of discussion * Subscribe for updates: * [/r/oilshell on Reddit](https://www.reddit.com/r/oilshell/) - * [@oilshellblog on Twitter](https://twitter.com/oilshellblog) + * [@oilsforunix on Twitter](https://twitter.com/oilsforunix) diff --git a/build/doc.sh b/build/doc.sh index 4055584f76..022dd48918 100755 --- a/build/doc.sh +++ b/build/doc.sh @@ -21,6 +21,9 @@ REPO_ROOT=$(cd $THIS_DIR/.. && pwd) readonly REPO_ROOT +readonly HTML_BASE_DIR=_release/VERSION + + log() { echo "$@" 1>&2 } @@ -167,7 +170,7 @@ split-and-render() { local rel_path=${src%'.md'} # doc/known-differences local tmp_prefix=_tmp/$rel_path # temp dir for splitting - local out=${2:-_release/VERSION/$rel_path.html} + local out=${2:-$HTML_BASE_DIR/$rel_path.html} local web_url=${3:-'../web'} mkdir -v -p $(dirname $out) $tmp_prefix @@ -227,8 +230,6 @@ render-from-kate() { # Do NOT split because we don't want front matter in the markdown source. render-only() { local src=${1:-README.md} - local css_files=${2:-'../web/manual.css ../web/toc.css'} - local title=${3:-'Oils Source Code'} local name case $src in @@ -243,8 +244,11 @@ render-only() { ;; esac + local out=${2:-$HTML_BASE_DIR/doc/$name.html} + local css_files=${3:-'../web/manual.css ../web/toc.css'} + local title=${4:-'Oils Source Code'} + local prefix=_tmp/doc/$name - local out=_release/VERSION/doc/$name.html local meta=${prefix}_meta.json cat >$meta < + + ## Instructions -### Translating and Compiling `oil-native` +### Translating and Compiling `oils-cpp` Running `mycpp` is best done on a Debian / Ubuntu-ish machine. Follow the instructions at to create @@ -102,12 +118,189 @@ More work in this pass: void Foo:method() { ... } + void Bar:method() { ... } Note: I really wish we were not using visitors, but that's inherited from MyPy. +## mycpp Idioms / "Creative Hacks" + +Oils is written in typed Python 2. It will run under a stock Python 2 +interpreter, and it will typecheck with stock MyPy. + +However, there are a few language features that don't map cleanly from typed +Python to C++: + +- switch statements (unfortunately we don't have the Python 3 match statement) +- C++ destructors - the RAII ptatern +- casting - MyPy has one kind of cast; C++ has `static_cast` and + `reinterpret_cast`. (We don't use C-style casting.) + +So this describes the idioms we use. There are some hacks in +[mycpp/cppgen_pass.py]($oils-src) to handle these cases, and also Python +runtime equivalents in `mycpp/mylib.py`. + +### `with {,tag,str_}switch` → Switch statement + +We have three constructs that translate to a C++ switch statement. They use a +Python context manager `with Xswitch(obj) ...` as a little hack. + +Here are examples like the ones in [mycpp/examples/test_switch.py]($oils-src). +(`ninja mycpp-logs-equal` translates, compiles, and tests all the examples.) + +Simple switch: + + myint = 99 + with switch(myint) as case: + if case(42, 43): + print('forties') + else: + print('other') + +Switch on **object type**, which goes well with ASDL sum types: + + val = value.Str('foo) # type: value_t + with tagswitch(val) as case: + if case(value_e.Str, value_e.Int): + print('string or int') + else: + print('other') + +We usually need to apply the `UP_val` pattern here, described in the next +section. + +Switch on **string**, which generates a fast **two-level dispatch** -- first on +length, and then with `str_equals_c()`: + + s = 'foo' + with str_switch(s) as case: + if case("foo") + print('FOO') + else: + print('other') + +### `val` → `UP_val` → `val` Downcasting pattern + +Summary: variable names like `UP_*` are **special** in our Python code. + +Consider the downcasts marked BAD: + + val = value.Str('foo) # type: value_t + + with tagswitch(obj) as case: + if case(value_e.Str): + val = cast(value.Str, val) # BAD: conflicts with first declaration + print('s = %s' % val.s) + + elif case(value_e.Int): + val = cast(value.Int, val) # BAD: conflicts with both + print('i = %d' % val.i) + + else: + print('other') + +MyPy allows this, but it translates to invalid C++ code. C++ can't have a +variable named `val`, with 2 related types `value_t` and `value::Str`. + +So we use this idiom instead, which takes advantage of **local vars in case +blocks** in C++: + + val = value.Str('foo') # type: value_t + + UP_val = val # temporary variable that will be casted + + with tagswitch(val) as case: + if case(value_e.Str): + val = cast(value.Str, UP_val) # this works + print('s = %s' % val.s) + + elif case(value_e.Int): + val = cast(value.Int, UP_val) # also works + print('i = %d' % val.i) + + else: + print('other') + +This translates to something like: + + value_t* val = Alloc(str42); + value_t* UP_val = val; + + switch (val->tag()) { + case value_e::Str: { + // DIFFERENT local var + value::Str* val = static_cast(UP_val); + print(StrFormat(str43, val->s)) + } + break; + case value_e::Int: { + // ANOTHER DIFFERENT local var + value::Int* val = static_cast(UP_val); + print(StrFormat(str44, val->i)) + } + break; + default: + print(str45); + } + +This works because there's no problem having **different** variables with the +same name within each `case { }` block. + +Again, the names `UP_*` are **special**. If the name doesn't start with `UP_`, +the inner blocks will look like: + + case value_e::Str: { + val = static_cast(val); // BAD: val reused + print(StrFormat(str43, val->s)) + } + +And they will fail to compile. It's not valid C++ because the superclass +`value_t` doesn't have a field `val->s`. Only the subclass `value::Str` has +it. + +(Note that Python has a single flat scope per function, while C++ has nested +scopes.) + +### Python context manager → C++ constructor and destructor (RAII) + +This Python code: + + with ctx_Foo(42): + f() + +translates to this C++ code: + + { + ctx_Foo tmp(42); + f() + + // destructor ~ctx_Foo implicitly called + } + +## Limitations Requiring Source Rewrites + +mycpp itself may cause limitations on expressiveness, or the C++ language may +be able express what we want. + +- C++ doesn't have `try / except / else`, or `finally` + - Use the `with ctx_Foo` pattern instead. +- `if mylist` tests if the pointer is non-NULL; use `if len(mylist)` for + non-empty test +- Functions can have at most one keyword / optional argument. + - We generate two methods: `f(x)` which calls `f(x, y)` with the default + value of `y` + - If there are two or more optional arguments: + - For classes, you can use the "builder pattern", i.e. add an + `Init_MyMember()` method + - If the arguments are booleans, translate it to a single bitfield argument +- C++ has nested scope and Python has flat function scope. This can cause name + collisions. + - Could enforce this if it becomes a problem + +Also see `mycpp/examples/invalid_*` for Python code that fails to translate. + ## WARNING: Assumptions Not Checked ### Global Constants Can't Be Mutated @@ -122,7 +315,7 @@ We translate top level constants to statically initialized C data structures Even though `List` and `Dict` are mutable in general, you should **NOT** mutate these global instances! The C++ code will break at runtime. -### Gotcha about Returning Variants (Subclasses) of a type +### Gotcha about Returning Variants (Subclasses) of a Type MyPy will accept this code: @@ -173,21 +366,38 @@ Related: ## More Translation Notes -### "Creative Hacks" - -- `with tagswitch(d) as case` → `switch / case` - - We don't have Python 3 pattern matching -- Scope-based resource management - - `with ctx_Foo(...)` → C++ constructors and destructors +### Hacky Heuristics -### Major Features - -- `callable(arg)` to either → +- `callable(arg)` to either: - function call `f(arg)` - instantiation `Alloc(arg)` -- `name.attr` to either → +- `name.attr` to either: - `obj->member` - `module::Func` +- `cast(MyType, obj)` to either + - `static_cast(obj)` + - `reinterpret_cast(obj)` + +### Hacky Hard-Coded Names + +These are signs of coupling between mycpp and Oils, which ideally shouldn't +exist. + +- `mycpp_main.py` + - `ModulesToCompile()` -- some files have to be ordered first, like the ASDL + runtime. + - TODO: Pea can respect parameter order? So we do that outside the project? + - Another ordering constraint comes from **inheritance**. The forward + declaration is NOT sufficient in that case. +- `cppgen_pass.py` + - `_GetCastKind()` has some hard-coded names + - `AsdlType::Create()` is special cased to `::`, not `->` + - Default arguments e.g. `scope_e::Local` need a repeated `using`. + +Issue on mycpp improvements: + +### Major Features + - Python `int` and `bool` → C++ `int` and `bool` - `None` → `nullptr` - Statically Typed Python Collections @@ -214,6 +424,9 @@ Related: - Python generators `Iterator[T]` → eager `List` accumulators - Python Exceptions → C++ exceptions - Python Modules → C++ namespace (we assume a 2-level hierarchy) + - TODO: mycpp need real modules, because our `oils_for_unix.mycpp.cc` + translation unit is getting big. + - And `cpp/preamble.h` is a hack to work around the lack of modules. ### Minor Translations @@ -246,44 +459,7 @@ Neither of them needs any rooting! This is because we use **manual collection points** in the interpreter, and these functions don't call any functions that can collect. They are "leaves" in the call tree. -### Hard-Coded Names - -These are signs of coupling between mycpp and Oil, which ideally shouldn't -exist. - -- `mycpp_main.py` - - `ModulesToCompile()` -- some files have to be ordered first, like the ASDL - runtime. - - TODO: Pea can respect parameter order? So we do that outside the project? - - Another ordering constraint comes from **inheritance**. The forward - declaration is NOT sufficient in that case. -- `cppgen_pass.py` - - `_GetCastKind()` has some hard-coded names - - `AsdlType::Create()` is special cased to `::`, not `->` - - Default arguments e.g. `scope_e::Local` need a repeated `using`. - -Issue on mycpp improvements: - -## Limitations Requiring Source Rewrites - -### Due to the Translation or C++ language - -- C++ doesn't have `try / except / else`, or `finally` - - This usually requires some rewriting -- `if mylist` tests if the pointer is non-NULL; use `if len(mylist)` for - non-empty test -- Functions can have at most one keyword / optional argument. - - We generate two methods: `f(x)` which calls `f(x, y)` with the default - value of `y` - - If there are two or more optional arguments: - - For classes, you can use the "builder pattern", i.e. add an - `Init_MyMember()` method - - If the arguments are booleans, translate it to a single bitfield argument -- C++ has nested scope and Python has flat function scope. Can cause name - collisions. - - Could enforce this if it becomes a problem - -## C++ +## C++ Notes ### Gotchas diff --git a/mycpp/cppgen_pass.py b/mycpp/cppgen_pass.py index b904a91edd..4f0b1c9c13 100644 --- a/mycpp/cppgen_pass.py +++ b/mycpp/cppgen_pass.py @@ -412,8 +412,8 @@ def __init__(self, self.virtual = virtual # local_vars: FuncDef node -> list of type, var - # This is different from member_vars because we collect it in the 'decl' - # phase. But then write it in the definition phase. + # This is different from member_vars because we collect it in the + # 'decl' phase, and write it in the definition phase. self.local_vars = local_vars self.fmt_ids = fmt_ids self.field_gc = field_gc @@ -1414,8 +1414,11 @@ def visit_assignment_stmt(self, o: 'mypy.nodes.AssignmentStmt') -> T: c_type[:-1]) return - # src = cast(source__SourcedFile, src) - # -> source__SourcedFile* src = static_cast(src) + # is_downcast_and_shadow idiom: + # + # src = cast(source__SourcedFile, UP_src) + # -> source__SourcedFile* src = static_cast(UP_src) + if callee.name == 'cast': assert isinstance(lval, NameExpr) call = o.rvalue @@ -1424,19 +1427,22 @@ def visit_assignment_stmt(self, o: 'mypy.nodes.AssignmentStmt') -> T: cast_kind = _GetCastKind(self.module_path, subtype_name) - # HACK: Distinguish between UP cast and DOWN cast. - # osh/cmd_parse.py _MakeAssignPair does an UP cast within branches. - # _t is the base type, so that means it's an upcast. - if 0: - #if (isinstance(type_expr, NameExpr) and - # type_expr.name.endswith('_t')): + is_downcast_and_shadow = False + to_cast = call.args[1] + if isinstance(to_cast, NameExpr): + if to_cast.name.startswith('UP_'): + is_downcast_and_shadow = True + + if is_downcast_and_shadow: + # Declare NEW local variable inside case, which shadows it + self.def_write_ind('%s %s = %s<%s>(', subtype_name, + lval.name, cast_kind, subtype_name) + else: + # Normal variable if self.decl: self.local_var_list.append((lval.name, subtype_name)) self.def_write_ind('%s = %s<%s>(', lval.name, cast_kind, subtype_name) - else: - self.def_write_ind('%s %s = %s<%s>(', subtype_name, - lval.name, cast_kind, subtype_name) self.accept(call.args[1]) # variable being casted self.def_write(');\n') diff --git a/mycpp/examples/test_cast.py b/mycpp/examples/test_cast.py index 469b5a25ba..09cf7a7420 100755 --- a/mycpp/examples/test_cast.py +++ b/mycpp/examples/test_cast.py @@ -83,8 +83,9 @@ def tag(self): def TestSwitchDowncast(val): # type: (value_t) -> None - - # typical UP_ pattern + """ + The common val -> UP_val -> val pattern + """ UP_val = val with tagswitch(val) as case: if case(1): @@ -97,6 +98,24 @@ def TestSwitchDowncast(val): print('other') +def TestSwitchDowncastBad(val): + # type: (value_t) -> None + + #UP_val = val + with tagswitch(val) as case: + if case(1): + val = cast(value__Int, val) + print('Int') + #print('Int = %d' % val.i) + elif case(2): + val = cast(value__Eggex, val) + print('Eggex') + # If we enable this, then it fails to compile + #print('Eggex = %r' % val.ere) + else: + print('other') + + def TestCastInSwitch(): # type: () -> None @@ -125,7 +144,11 @@ def run_tests(): TestCastBufWriter() TestSwitchDowncast(value__Eggex('[0-9]')) TestSwitchDowncast(value__Int(42)) - #TestCastInSwitch() + + TestSwitchDowncastBad(value__Eggex('[0-9]')) + TestSwitchDowncastBad(value__Int(42)) + + TestCastInSwitch() def run_benchmarks():