From 34a55b88efb6b08cd76eda16ac3fe838e71171ae Mon Sep 17 00:00:00 2001 From: k32 <10274441+k32@users.noreply.github.com> Date: Tue, 4 Jul 2023 15:33:57 +0200 Subject: [PATCH] doc: Update README --- README.adoc | 336 ++++++++++++++++++++++++++++++++++++++++++ README.org | 335 ----------------------------------------- doc/src/schema.adoc | 3 +- src/framework/lee.erl | 11 +- 4 files changed, 344 insertions(+), 341 deletions(-) create mode 100644 README.adoc delete mode 100644 README.org diff --git a/README.adoc b/README.adoc new file mode 100644 index 0000000..70f6a13 --- /dev/null +++ b/README.adoc @@ -0,0 +1,336 @@ +:!sectid: += Lee + +== User stories + +- As a power user I want to configure tools without looking into their code. + I want a useful error message instead of a BEAM dump when I make an error in the config. + I want documentation about all configurable parameters, their purpose, default value and the type. + +- As a software designer I want to focus on the business logic instead of dealing with the boring configuration-related stuff. + I want to have a `?magic:get(Key)` function that always returns a value that is guaranteed to be safe. + +- As a software designer I want to work with native Erlang data types. + +There are a few approaches to this conflict: + +image::doc/images/explanation.png[galaxy brain meme explaining how Lee absolutely crashes the competitors by utilizing reflection of dialyzer type specs] + +This library _does_ implement `?magic:get/1` function. + +== Features + +* Configuration validation (completeness, type safety) +** Type checking uses standard Erlang types, so the type safety guarantees can be extended all the way to the code using the configuration +* CLI arguments parser +* Reading configuration from OS environment variables +* Integration with OTP logger +* Automatic syncing of the configuration with the OTP application environment +* Multiple storage backends for configuration data to choose from: +** persistent term +** mnesia +** regular map +** add your own +* Documentation generation (HTML, manpages, PDF, epub, ...) +** Using Asciidoc or DocBook as input +* Transactional configuration changes +** Configuration patches are validated before taking effect +* Automatic validation of the schema (meta-validation) +* Extensive plugin support (in fact, every feature mentioned above is implemented as a plugin) +* ...All in https://github.com/k32/Lee/blob/master/support/linelimit[less than 3000 lines of code] + +== Example model + +Get a taste of what Lee configuration specification looks like: + +[code:erlang] +---- +model() -> + #{ logging => + #{ level => + {[value, os_env, cli_param, logger_level], + #{ oneliner => "Primary log level" + , type => lee_logger:level() + , default => notice + , cli_operand => "log-level" + }} + , default_handler_level => + {[value, os_env, logger_level], + #{ oneliner => "Log level for the default handler" + , type => lee_logger:level() + , default_ref => [logging, level] + , logger_handler => default + }} + } + , listener => + {[map, cli_action, default_instance], + #{ oneliner => "REST API listener" + , key_elements => [ [id] ] + , cli_operand => "listener" + }, + #{ id => + {[value, cli_param], + #{ oneliner => "Unique identifier of the listener" + , type => atom() + , default => local + , cli_operand => "id" + , cli_short => $i + }} + , port => + {[value, cli_param], + #{ oneliner => "Listening interface/port" + , type => typerefl:listen_port_ip4() + , default_str => "0.0.0.0:8017" + , cli_operand => "port" + , cli_short => $p + }} + }} + }. +---- + +Business logic can access values from this model like this: + +[code:erlang] +---- +LogLevel = lee:get(?MY_STORAGE, [logging, level]), +%% List listener IDs: +Listeners = lee:list(?MY_STORAGE, [listener, {}]), +%% Get parameters for a listener with ID='local': +{IP, Port} = lee:get(?MY_STORAGE, [listener, {local}, port]), +... +---- + +where `?MY_STORAGE` is a static term explained later. + +Note that this function returns the value straight away, without wrapping it `{ok, ...} | undefined` tuple +because Lee guarantees that the configuration is always complete. + +== Schema +=== Type reflections + +As advertised, Lee configuration is fully aware of the Dialyzer types. +Lee relies on https://github.com/k32/typerefl[typerefl] library to reify types. + +=== Model nodes + +Note: we use word "model" as a synonym for "schema" or "specification". + +Model is the central concept in Lee. +Lee models are made of two basic building blocks: *namespaces* and *nodes*. +Namespace is a regular Erlang map where keys are atoms and values can be either nodes or other namespaces. + +Leaf node is a tuple that looks like this: + +[code:erlang] +---- +{ MetaTypes :: [MetaType :: atom()] +, MetaParameters :: #{atom() => term()} +, Children :: lee:namespace() +} +---- + +or this: + +[code:erlang] +---- +{ MetaTypes :: [atom()] +, MetaParameters :: #{atom() => term()} +} +---- + +(The latter is just a shortcut where `Children` is an empty map.) + +`MetaTypes` is a list of behaviors associated with the node. + +=== Metatypes + +Metatypes are the meat and potatoes of Lee: they define the behaviors associated with the node. +Every feature, such as type checking or CLI parsing, is handled by one of the metatypes. +Metatypes are defined by the Erlang modules implementing https://github.com/k32/Lee/blob/master/src/framework/lee_metatype.erl:"lee_metatype behavior" +which defines a number of callbacks invoked during different configuration-related workflows. + +Example metatypes: + +* `value` denotes a configurable value that can be accessed using `lee:get/2` function. + It defines type and default value. + +* `map` denotes that the node is a container for child values. + +* `app_env` allows to sync values defined in the Lee schema with the OTP application environment. + +* `os_env` reads configurable values from the OS environment variables. + +* `cli_param`, `cli_action`, and `cli_positional` read configurable values from the CLI arguments. + +* `logger_level` automatically sets logger level. + +* `default_instance` automatically creates the default instance of a map. + +* ... + +And of course users can create custom metatypes. + +=== Metaparameters + +`MetaParameters` field of the node is map containing arbitrary data relevant to the metatypes assigned to the node. +There are no strict rules about it. +For example, `value` metatype requires `type` metaparameter and optional `default` parameter. + +=== Meta-validation + +Metatype callback modules validate correctness and consistency of the Lee model itself. +This process is called meta-validation. +For example, `value` metatype checks that value of metaparameter `default` has correct type. + +=== Model compilation + +Lee models have a nice property: they are composable as long as their keys do not clash, so they can be merged together. + +Model modules should be compiled to a machine-friendly form before use using `lee_model:compile/2` function: + +[code:erlang] +---- +lee_model:compile( [lee:base_metamodel(), lee_metatype:create(lee_cli)] + , [Model] + ) +---- + +It takes two arguments: the second argument is a list of "raw" models to be merged, +and the first one is a list of terms produced by applying `lee_metatype:create` function to each callback module used by the model. +Most common metatypes such as `value` and `map` are contained in `lee:base_metamodel()` function. + +== Data storage + +Lee provides an abstraction called `lee_storage` that is used as a container for the runtime configuration data. +Any key-value storage (from proplist to a mnesia table) can serve as a `lee_storage`. +There are a few prepackaged implementations: + +- `lee_map_storage` the most basic backend keeping data in a regular map +- `lee_persistent_term_storage` stores data in a persistent term tagged with the specified atom +- `lee_mnesia_storage` uses mnesia as storage, reads are transactional +- `lee_dirty_mnesia_storage` is the same, but reads are dirty (this storage is read-only) + +The contents of the storage can be modified via *patches*. +The following example illustrates how to create a new storage and populate it with some data: + +[code:erlang] +---- +-include_lib("lee/include/lee.hrl"). + +-define(MY_STORAGE_KEY, my_storage_key). +-define(MY_STORAGE, ?lee_persistent_term_storage(?MY_STORAGE_KEY)). + +... + +%% Initialization: +%% Create the model: +{ok, Model} = lee_model:compile(...), +%% Create am empty storage: +?MY_STORAGE = lee_storage:new(lee_persistent_term_storage, ?MY_STORAGE_KEY), +%% Initialize the config. This will read OS environment variables, CLI +%% arguments, etc. and apply this data to the config storage: +lee:init_config(Model, ?MY_STORAGE), + +... + +%% Modify configuration in the runtime: +Patch = [ %% Set some values: + {set, [foo], false} + , {set, [bar, quux], [quux]} + %% Delete a value: + , {rm, [bar, baz]} + ], +lee:patch(?MY_STORAGE, Patch) +---- + +`lee:patch` function first applies the patch to a temporary storage, validates its consistency, and only then transfers the data to `?MY_STORAGE`. + +== Documentation + +Lee helps to generate user manuals using https://docbook.org/:Docbook as an intermediate format. +(You don't have to write any DocBook by hand, though) + +In the simplest case, it is possible to embed docstrings directly into the model: + +[code:erlang] +---- +#{ foo => + {[value], + #{ oneliner => "This value controls fooing" %% Very short description in plain text + , type => integer() + , default => 41 + %% Long description in DocBook format: + , doc => "This is a long and elaborate description of + the parameter using docbook markup. + It just goes on and on..." + }} + }. +---- + +`oneliner` is a one-sentence summary, and `doc` is a more elaborate description formatted as DocBook. +Lee does the job of assembling an intermediate DocBook file from the fragments. + +Also element with `doc_root` metatype containing information about the application itself should be present somewhere in the model: + +[code:erlang] +---- +#{ '$doc_root' => + {[doc_root], + #{ oneliner => "An ultimate frobnicator" + , app_name => "Frob" + , doc => "Long and elaborate description of this + tool" + %% Name of executable: + , prog_name => "frob" + }} + }. +---- + +(Note: location of doc root node doesn't matter, but there should be one and only one node with `doc_root` metatype). + +=== Why DocBook + +DocBook is not the most popular or concise markup language, but it was chosen as an intermediate format because of the following properties: + +* It's the easiest format to assemble from small fragments +* Erlang has great support for XML out of the box +* It's easy to validate programmatically, thanks to the XML schema +** It's easier to integrate spell checking +* It's whitespace-insensitive. + Given that the docstrings may come from string literals embedded into Erlang code, + formatting of the source code should not affect the resulting documents. + Also it generally focuses on structure rather than representation, + which is useful for extracting a diverse range of formats from HTML to manpage. + +=== Asciidoc + +Embedding documentation into code as XML works in simple cases, but it doesn't scale well. +In a large project it's preferable to keep documentation separate, and use a less verbose format. + +Lee supports https://docs.asciidoctor.org/asciidoc/latest/:"AsciiDoc" as an alternative format. +<> module allows to "enrich" the model with the docstrings extracted from an external source, +see <>. + +=== Export + +The job of `lee_doc` module is to produce a docbook XML file. +This can be done with XSLT stylesheets: + +* https://docbook.org/tools/ + Note: many Linux distributions make these stylesheets available in the repositories. + For example, in Ubuntu the package is called `docbook-xsl`. + +* https://xsltng.docbook.org + XSLTNG can create very fancy static HTML webpages. + +<> gives an example of how to integrate these tools into the build flow. + +== Name? + +This library is named after Tsung-Dao Lee, a physicist who predicted P-symmetry violation together with Chen-Ning Yang. + +== Design goals +=== Speed + +TL;DR: getting values from config should be very fast, but updating and validating config may be slow. +It should be possible to use `lee:get` function in hotspots. diff --git a/README.org b/README.org deleted file mode 100644 index 28d2825..0000000 --- a/README.org +++ /dev/null @@ -1,335 +0,0 @@ -#+TITLE: Lee - -* User stories - -- As a power user I want to configure tools without looking into their code. - I want a useful error message instead of a BEAM dump when I make an error in the config. - I want documentation about all configurable parameters, their purpose and type. - -- As a software designer I want to focus on the business logic instead of dealing with the boring configuration-related stuff. - I want to have a =?magic:get(Key)= function that returns a value that is guaranteed safe. - -- As a software designer I want to work with native Erlang data types. - -There are a few approaches to this conflict: - -[[file:doc/images/explanation.png]] - -This library /does/ provide =?magic:get/1= function. -The below document explains how. (If you just want to check what the API looks like, skip to [[#gathering-it-all-together][Gathering it all together]]) - -* Features - -- CLI arguments parser -- Reading configuration from OS environment variables -- Syncing configuration with the OTP application environment variables -- Type checking of the runtime configuration using Dialyzer types -- Consistent configuration changes -- Documentation generation (HTML, Markdown, man, texinfo... via [[https:pandoc.org][pandoc]]) -- Configuration schema validation -- Extensive plugin support (in fact, every feature mentioned above is implemented as a plugin) -- ...All in [[https://github.com/k32/Lee/blob/master/support/linelimit][less than 3000 lines of code]] - -* Introduction - -/Lee/ helps creating type-safe, self-documenting configuration for Erlang applications. -It is basically a data modeling DSL, vaguely inspired by [[https://tools.ietf.org/html/rfc7950][YANG]], however scaled down /a lot/. - -Software configuration is a solved problem. -The solution is to gather all information about the user-facing commands and parameters in one place called /data model/ (or schema, if you prefer) and generate all kinds of code and documentation from it, instead of trying to keep everything in sync by hand and inevitably failing in the end. - -This approach has been widely used in telecom where the number of configurable parameters per device can easily reach thousands. -Unfortunately the existing solutions are extremely heavy and difficult to deal with, also almost none of them is open source. -One doesn't want to mess with YANG compilers and proprietary libraries for a mere small tool, and it's understandable. -/Lee/ attempts to implement a /reasonably useful/ alternative to these tools as an embedded DSL. -And be fully Erlang-native too. - -The below document explains Lee from the bottom-up. - -* Type reflections - -As advertized, Lee configuration is fully aware of the Dialyzer types. -In order to make use of them to the runtime, Lee relies on [[https://github.com/k32/typerefl][typerefl]] library. - -* Defining models - -/Model/ is the central concept in Lee. -Generally speaking, Lee model can be seen as a schema of the configuration. -The model is a tree-like structure where each node represent some entity. - -Lee models are made of two basic building blocks: /namespaces/ and /mnodes/ (model nodes). -Namespace is a regular Erlang map where values are either mnodes or other namespaces. - -Mnode is a tuple that looks like this: - -#+BEGIN_SRC erlang -{ MetaTypes :: [MetaType :: atom()] -, MetaParameters :: #{atom() => term()} -, Children :: lee:namespace() -} -#+END_SRC - -or this: - -#+BEGIN_SRC erlang -{ MetaTypes :: [atom()] -, MetaParameters :: #{atom() => term()} -} -#+END_SRC - -(The latter is just a shortcut where =Children= is an empty map.) - -=MetaTypes= is a list of /metatype/ IDs which are applicable to the mnode. -Meta types are the meat of Lee, as they define the behaviors associated with the mnodes. -Every feature (such as CLI parsing) is handled by one of the metatypes. -Metatypes are defined by the Erlang modules implementing =lee_metatype= behavior, defining a number of callbacks that are called during every operation with the configuration. - -Example metatypes: - -- =value= metatype means the mnode denotes a configurable value - -- =map= metatype denotes that mnode is a container for the values - -Multiple metatypes can be assigned to each mnode. - -=MetaParameters= field contains data relevant to the metatypes assigned to the mnode. -There are no strict rules about it. -For example, =value= metatype requires =type= metaparameter and optional =default= parameter. - -Finally, =Children= field allows nesting of mnodes. - -Any mnode is uniquely identified by the /model key/. -Model key is a list of namespace keys or empty tuples (={}=) separating parent and child parts of the key. - -The following example shows how to define a Lee model: - -#+BEGIN_SRC erlang --spec model() -> lee:lee_module(). -model() -> - #{ foo => - {[value], - #{ type => boolean() - , oneliner => "This value controls fooing" - }} - , bar => - #{ baz => - {[value], - #{ type => integer() - , oneliner => "This value controls bazing" - , default => 42 - }} - , quux => - {[map], - #{ oneliner => "This value controls quuxing" - , key_elements => [[id]] - , #{ id => - {[value], - #{ type => boolean() - }} - }} - } - }. -#+END_SRC - -=[foo]=, =[bar, baz]=, =[bar, quux]= and =[bar, quux, {}, id]= are valid /model keys/ in the above model. -Note the empty tuple in the last key: it separates the part of the key that belongs to the child mnode with key =[id]=. - -Lee models have a nice property: they are /composable/ as long as their keys do not clash, so they can be merged together. - -Model modules should be compiled to a machine-friendly form before use using =lee_model:compile/2= function: - -#+begin_src erlang -lee_model:compile( [lee:base_metamodel(), lee_metatype:create(lee_cli)] - , [Model] - ) -#+end_src - -It takes two arguments: the second argument is a list of "raw" models to be merged, -and the first one is a list of =lee_metatype:create= calls for each callback module used by the model. -Most commonly used metatypes such as =value= and =map= are gathered into =lee:base_metamodel()= function. - -* Data storage - -Lee provides an abstraction called =lee_storage= to keep track of the actual configuration data. -Any key-value storage (from proplist to a mnesia table) can serve as a =lee_storage=. -There are a few prepackaged implementations: - -- =lee_map_storage= the most basic backend keeping data in a regular map -- =lee_persistent_term_storage= stores data in a persistent term tagged with a specified atom -- =lee_mnesia_storage= uses mnesia as storage, reads are transactional -- =lee_dirty_mnesia_storage= is the same, but reads are dirty (this storage is read-only) - -The contents of the storage can be modified via /patches/. -The following example illustrates how to create a new storage and populate it with some data: - -#+BEGIN_SRC erlang -%% Create am empty storage: -Data0 = lee_storage:new(lee_map_storage), -%% Define a patch: -Patch = [ %% Set some values: - {set, [foo], false} - , {set, [bar, quux], [quux]} - %% Delete a value (if present): - , {rm, [bar, baz]} - ], -%% Apply the patch: -lee:patch(Model, Data0, Patch) -#+END_SRC - -=lee:patch= function first applies the patch to a temporary storage, validates its consistency, and only then transfers the data to the active configuration. - -** Data validation - -Successful validation ensures the following properties of =Data=: - -- All values described in the model are either present in =Data=, or =Model= declares their default values -- All values present in =Data= have correct types -- Any additional properties declared by the metatypes - -** Getting the data - -Now when we know that data is complete and type-safe, getting values becomes extremely simple: - -#+BEGIN_SRC erlang - [quux] = lee:get(Data, [bar, quux]), - false = lee:get(Data, [foo]), -#+END_SRC - -Note that =lee:get= returns plain values rather than something like ={ok, Value} | undefined=. - -This is perfectly safe, as long as the data is validated using =lee:validate=. - -Complete code of the example can be found [[file:doc/example/example_model.erl][here]]. - -* Creating patches and initializing the configuration - -Lee comes with a few modules for reading data: - -- =lee_cli= read data by parsing CLI arguments -- =lee_os_env= read data from environment variables - -In order to utilize these modules one should extend the model with new metatypes and metaparameters: - -#+BEGIN_SRC erlang --spec model() -> lee:lee_module(). -model() -> - #{ foo => - {[value, cli_param], %% This value is read from CLI - #{ type => boolean() - , oneliner => "This value controls fooing" - , cli_short => $f - , cli_operand => "foo" - }} - , bar => - #{ baz => - {[value, os_env], %% This value is read from environment variable - #{ type => integer() - , oneliner => "This value controls bazing" - , default => 42 - }} - , quux => - {[value, cli_param, os_env], %% This value is read from both CLI and environment - #{ type => nonempty_list(atom()) - , oneliner => "This value controls quuxing" - , default => [foo] - , cli_operand => "quux" - , os_env => "QUUX" - }} - } - }. -#+END_SRC - -Reading data is done like this: - -#+BEGIN_SRC erlang -%% Test data: --spec data(lee:model(), [string()]) -> lee:data(). -data(Model, CliArgs) -> - %% Create an empty storage: - Data0 = lee_storage:new(lee_map_storage), - %% Read environment variables: - Data1 = lee_os_env:read_to(Model, Data0), - %% Read CLI arguments and return the resulting data: - lee_cli:read_to(Model, CliArgs, Data1). -#+END_SRC - -Full code of the example can be found [[file:doc/example/example_model2.erl][here]]. - -* Extracting documentation from the model - -It is possible to extract user manuals from a Lee model. First, one -has to annotate the model with =oneliner= and =doc= metaparameters, -like in the following example: - -#+BEGIN_SRC erlang -#{ foo => - {[value], - #{ oneliner => "This value controls fooing" - , type => integer() - , default => 41 - , doc => "This is a long and elaborate description of - the parameter using docbook markup. - It just goes on and on..." - }} - }. -#+END_SRC - -=oneliner= is a one sentence summary. =doc= is a more elaborate -description formatted using [[https://docbook.org/][DocBook]] markup. Also element with -=[doc_root]= metatype should be added to the model, that contains -information about the documentation root: - -#+BEGIN_SRC erlang -#{ '$doc_root' => - {[doc_root], - #{ oneliner => "This is a test model for doc extraction" - , app_name => "Lee Test Application" - , doc => "Long and elaborate description of this - application" - %% Name of executable: - , prog_name => "lee_test" - }} - }. -#+END_SRC - -Then Lee does the job of assembling an intermediate DocBook file from -the fragments. Finally, [[https://pandoc.org/][pandoc]] is used to transform DocBook to HTML -([[https://k32.github.io/Lee/Lee%20Test%20Application.html][example]]), manpages, texinfo and what not. - -Export of documentation is triggered like this: - -#+BEGIN_SRC erlang -%% List of metatypes that should be mentioned in the documentation -MTs = [ os_env - , cli_param - , value - , map - ], -Config = #{ metatypes => MTs - }, -lee_doc:make_docs(model(), Config) -#+END_SRC - -** Why DocBook - -DocBook is not the most popular and concise markup language, however -it was chosen because of the following properties: - - + It's the easiest format to assemble from small fragments - + It's a supported source format in pandoc - + It's whitespace-insensitive. Given that the docstrings come from - literals embedded into Erlang code, formatting of the source code - should not affect the resulting documents. Also it generally - focuses on structure rather than representation - -* Name? - -This library is named after Tsung-Dao Lee, a physicist who predicted P-symmetry violation together with Chen-Ning Yang. - -* Design goals -** Speed - -Tl;dr: getting values from config should be very fast, but updating and validating config may be slow. - -It should be possible to use =lee:get= in hotspots. -It means any call to =lee:get= should be theoretically possible to implement using at most (N + 1) hashtable lookups (N for the number of configuration overlays and 1 for the defaults). diff --git a/doc/src/schema.adoc b/doc/src/schema.adoc index 10daab5..73fe143 100644 --- a/doc/src/schema.adoc +++ b/doc/src/schema.adoc @@ -1,3 +1,4 @@ +:!sectid: = Documentation [id=quux] @@ -12,4 +13,4 @@ It _uses_ a lot of formatting and *whatnot*. [id=more_stuff.default_ref] == default_ref demonastrates how to use nested keys -And here goes very long text. \ No newline at end of file +And here goes very long text. diff --git a/src/framework/lee.erl b/src/framework/lee.erl index 276cc6d..ac951e7 100644 --- a/src/framework/lee.erl +++ b/src/framework/lee.erl @@ -10,6 +10,7 @@ , patch/2, patch/3 , init_config/2 + , get_model/1 ]). -export_type([node_id/0, metatype/0, type/0, mnode/0, model/0, @@ -93,7 +94,7 @@ base_metamodel() -> -spec get(data(), lee:key()) -> term(). get(Data, Key) -> - get(get_bakedin_model(Data), Data, Key). + get(get_model(Data), Data, Key). %% @doc Get a value from the config %% @@ -150,7 +151,7 @@ init_config(Model, Data0) -> -spec patch(data(), patch()) -> patch_result(). patch(Data, Patch) -> - patch(get_bakedin_model(Data), Data, Patch). + patch(get_model(Data), Data, Patch). -spec patch(model(), data(), patch()) -> patch_result(). patch(Model, Data0, Patch) -> @@ -171,7 +172,7 @@ patch(Model, Data0, Patch) -> -spec list(data(), lee:key()) -> [lee:key()]. list(Data, Key) -> - list(get_bakedin_model(Data), Data, Key). + list(get_model(Data), Data, Key). %% @doc List objects in `Data' that can match `Key' %% @@ -362,8 +363,8 @@ meta_validate_node(MT, Model, Key, MNode = #mnode{metaparams = MP}) -> end, lee_lib:inject_error_location(Key, {Errors, Warnings ++ Warn}). --spec get_bakedin_model(data()) -> model(). -get_bakedin_model(Data) -> +-spec get_model(data()) -> model(). +get_model(Data) -> case lee_storage:get(?bakedin_model_key, Data) of {ok, Model} -> Model; undefined -> error("Data has not been initilized properly")