diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 0000000..fa0e24a --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,24 @@ +name: "CodeQL Analysis" +on: + push: + branches: [ main, beta, develop ] + pull_request: + branches: [ main ] + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: python + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 + diff --git a/.github/workflows/update_wiki.yml b/.github/workflows/update_wiki.yml index 4c235f7..c7ad2b3 100644 --- a/.github/workflows/update_wiki.yml +++ b/.github/workflows/update_wiki.yml @@ -5,9 +5,9 @@ on: push: # Trigger only when wiki directory changes paths: - - 'wiki/**' + - 'wiki/pages/**' - # Trigger only on main/beta + # Trigger only on main branches: [ main ] jobs: @@ -16,10 +16,35 @@ jobs: steps: - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install tools to build whl package + run: | + pip install sdist + + - name: Build whl package + run: | + python setup.py sdist bdist_wheel + env: + SEMANTIC_VERSION: 1.0.0 # not actually 1.0.0, but has no impact on wiki-building + + - name: Install do-calculus package + run: | + pip install -e . + env: + SEMANTIC_VERSION: 1.0.0 # not actually 1.0.0, but has no impact on wiki-building + + - name: Build Pages (replace stubs) + run: | + python ./wiki/build_wiki.py + - name: Push Wiki Changes uses: Andrew-Chen-Wang/github-wiki-action@v2 env: - WIKI_DIR: docs/ + WIKI_DIR: wiki/pages/ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_MAIL: ${{ secrets.EMAIL }} GH_NAME: ${{ github.repository_owner }} diff --git a/do/probability/shpitser/Shpitser.py b/debug/Shpitser.py similarity index 97% rename from do/probability/shpitser/Shpitser.py rename to debug/Shpitser.py index ad7cf59..96514dd 100644 --- a/do/probability/shpitser/Shpitser.py +++ b/debug/Shpitser.py @@ -7,7 +7,7 @@ # # ######################################################### -from src.probability.shpitser.identification.IDAlgorithm import ID +from identification.IDAlgorithm import ID from src.probability.shpitser.identification.IDProcessing import parse_shpitser from src.probability.shpitser.latent.LatentProjection import latent_projection from src.probability.shpitser.structures.Distribution import Distribution diff --git a/debug/scrap.py b/debug/scrap.py new file mode 100644 index 0000000..774b931 --- /dev/null +++ b/debug/scrap.py @@ -0,0 +1,26 @@ +from pathlib import Path +from do.structures.Graph import Graph +from yaml import safe_dump + +from gen_distribution import generate_distribution + +V = {"U1", "U2", "U3", "A", "B", "X", "Z", "W", "Y"} +E = { + ("U1", "A"), ("U1", "X"), + ("U2", "A"), ("U2", "Y"), + ("U3", "X"), ("U3", "W"), + ("A", "B"), ("B", "X"), ("X", "Z"), ("X", "W"), ("Z", "Y"), ("W", "Y") +} + +G = Graph(V, E) + +distribution = generate_distribution(G) + +P = Path(".", "shpitser2a.yml") + +with P.open("w") as f: + safe_dump({ + "name": "Shpitser Figure 2a", + "model": distribution + + }, f) diff --git a/debug/shpitser2a.yml b/debug/shpitser2a.yml new file mode 100644 index 0000000..e95df4b --- /dev/null +++ b/debug/shpitser2a.yml @@ -0,0 +1,331 @@ +model: + A: + outcomes: + - a + - ~a + parents: + - U1 + - U2 + table: + - - a + - u1 + - u2 + - 0.423 + - - ~a + - u1 + - u2 + - 0.577 + - - a + - u1 + - ~u2 + - 0.1251 + - - ~a + - u1 + - ~u2 + - 0.8749 + - - a + - ~u1 + - u2 + - 0.0919 + - - ~a + - ~u1 + - u2 + - 0.9081 + - - a + - ~u1 + - ~u2 + - 0.6934 + - - ~a + - ~u1 + - ~u2 + - 0.3066 + B: + outcomes: + - b + - ~b + parents: + - A + table: + - - b + - a + - 0.9977 + - - ~b + - a + - 0.0023 + - - b + - ~a + - 0.179 + - - ~b + - ~a + - 0.821 + U1: + latent: true + outcomes: + - u1 + - ~u1 + parents: [] + table: + - - u1 + - 0.7436 + - - ~u1 + - 0.2564 + U2: + latent: true + outcomes: + - u2 + - ~u2 + parents: [] + table: + - - u2 + - 0.2091 + - - ~u2 + - 0.7909 + U3: + latent: true + outcomes: + - u3 + - ~u3 + parents: [] + table: + - - u3 + - 0.3793 + - - ~u3 + - 0.6207 + W: + outcomes: + - w + - ~w + parents: + - U3 + - X + table: + - - w + - u3 + - x + - 0.0042 + - - ~w + - u3 + - x + - 0.9958 + - - w + - u3 + - ~x + - 0.2203 + - - ~w + - u3 + - ~x + - 0.7797 + - - w + - ~u3 + - x + - 0.0979 + - - ~w + - ~u3 + - x + - 0.9021 + - - w + - ~u3 + - ~x + - 0.0332 + - - ~w + - ~u3 + - ~x + - 0.9668 + X: + outcomes: + - x + - ~x + parents: + - B + - U1 + - U3 + table: + - - x + - b + - u1 + - u3 + - 0.5804 + - - ~x + - b + - u1 + - u3 + - 0.4196 + - - x + - b + - u1 + - ~u3 + - 0.8262 + - - ~x + - b + - u1 + - ~u3 + - 0.1738 + - - x + - b + - ~u1 + - u3 + - 0.0599 + - - ~x + - b + - ~u1 + - u3 + - 0.9401 + - - x + - b + - ~u1 + - ~u3 + - 0.611 + - - ~x + - b + - ~u1 + - ~u3 + - 0.389 + - - x + - ~b + - u1 + - u3 + - 0.593 + - - ~x + - ~b + - u1 + - u3 + - 0.407 + - - x + - ~b + - u1 + - ~u3 + - 0.2531 + - - ~x + - ~b + - u1 + - ~u3 + - 0.7469 + - - x + - ~b + - ~u1 + - u3 + - 0.0309 + - - ~x + - ~b + - ~u1 + - u3 + - 0.9691 + - - x + - ~b + - ~u1 + - ~u3 + - 0.3666 + - - ~x + - ~b + - ~u1 + - ~u3 + - 0.6334 + Y: + outcomes: + - y + - ~y + parents: + - U2 + - W + - Z + table: + - - y + - u2 + - w + - z + - 0.1137 + - - ~y + - u2 + - w + - z + - 0.8863 + - - y + - u2 + - w + - ~z + - 0.1122 + - - ~y + - u2 + - w + - ~z + - 0.8878 + - - y + - u2 + - ~w + - z + - 0.3202 + - - ~y + - u2 + - ~w + - z + - 0.6798 + - - y + - u2 + - ~w + - ~z + - 0.7963 + - - ~y + - u2 + - ~w + - ~z + - 0.2037 + - - y + - ~u2 + - w + - z + - 0.2066 + - - ~y + - ~u2 + - w + - z + - 0.7934 + - - y + - ~u2 + - w + - ~z + - 0.8179 + - - ~y + - ~u2 + - w + - ~z + - 0.1821 + - - y + - ~u2 + - ~w + - z + - 0.0669 + - - ~y + - ~u2 + - ~w + - z + - 0.9331 + - - y + - ~u2 + - ~w + - ~z + - 0.2198 + - - ~y + - ~u2 + - ~w + - ~z + - 0.7802 + Z: + outcomes: + - z + - ~z + parents: + - X + table: + - - z + - x + - 0.3939 + - - ~z + - x + - 0.6061 + - - z + - ~x + - 0.3917 + - - ~z + - ~x + - 0.6083 +name: Shpitser Figure 2a diff --git a/do/API.py b/do/API.py index 2e83284..8528e7d 100644 --- a/do/API.py +++ b/do/API.py @@ -2,8 +2,9 @@ # probability-code API # ########################################################### -from typing import Collection, List, Optional, Set, Union +from itertools import product from pathlib import Path +from typing import Collection, Optional, Sequence, TextIO, Union from .api.backdoor_paths import api_backdoor_paths from .api.deconfounding_sets import api_deconfounding_sets @@ -13,7 +14,9 @@ from .structures.BackdoorController import BackdoorController from .structures.CausalGraph import CausalGraph from .structures.ConditionalProbabilityTable import ConditionalProbabilityTable -from .structures.Types import Vertex, Vertices +from .structures.Graph import Graph +from .structures.Types import Vertex +from .structures.Exceptions import ProbabilityException from .structures.VariableStructures import Outcome, Intervention from .util.ModelLoader import parse_model @@ -22,16 +25,20 @@ class Do: - def __init__(self, model: dict or None, print_detail=False, print_result=False, log=False, log_fd=None): + def __init__(self, model: Optional[Union[str, bytes, dict, Path]] = None, + print_detail: bool = False, print_result: bool = False, + log: bool = False, log_fd: Optional[TextIO] = None): """ Initializer for an instance of the API. - @param model: An optional dictionary of a loaded causal graph model. Can be None, and loaded later. + @param model: An optional causal model. Can be a string path to a file, a pathlib.Path to a file, a dictionary + of a valid model. Can also be specified as None, and loaded later using load_model. @param print_detail: Boolean; whether the computation steps involved in queries should be printed. @param print_result: Boolean; whether the result of a query should be printed. @param log: Boolean; whether the computation steps involved in queries should logged to a file. If this is true, a file must have been set to log to. This can be done by providing a file descriptor either as an argument to log_fd, or can be done later with a call to set_log_fd. - @param log_fd: An open file descriptor to write to, if log_details is enabled. + @raise FileNotFoundError or KeyError if a model is provided but encounters errors in loading. See load_model for + details on when these exceptions occur. """ self._print_result = print_result self._output = OutputLogger(print_result, print_detail, log, log_fd) @@ -48,19 +55,28 @@ def __init__(self, model: dict or None, print_detail=False, print_result=False, # API Modifications # ################################################################ - def load_model(self, data: Union[str, dict, Path]): + def load_model(self, data: Union[str, bytes, dict, Path]): """ - Load a model into the API. - @param data: A dictionary conforming to the required causal model specification to be loaded - into the API. + Parse and load a model into the API. + @param data: Any of a string path or pathlib.Path to a file, or a dictionary conforming to the required causal + model specification. + @raise FileNotFoundError if a string path or pathlib.Path object does not point to a file, or does not point to + a file that can be loaded. This can occur if the file does not end in .json, .yml, or .yaml. + @raise KeyError on issues relating to parsing the model. This can occur if the model does not conform to the + required specification and is missing an attribute. """ - d = parse_model(data) + try: + d = parse_model(data) + + self._cg: CausalGraph = CausalGraph(output=self._output, **d) + self._g: Graph = d["graph"] + self._bc: BackdoorController = BackdoorController(self._g.copy()) - self._cg = CausalGraph(output=self._output, **d) - self._g = d["graph"] - self._bc = BackdoorController(self._g.copy()) + except Union[FileNotFoundError, KeyError] as e: + self._output.detail(str(e)) + raise e - def set_print_result(self, to_print: bool): + def set_print_result(self, to_print: bool = True): """ Set whether or not to print the result of any API query to standard output @param to_print: Boolean; True to print results, False to not print results. @@ -68,14 +84,14 @@ def set_print_result(self, to_print: bool): self._output.set_print_result(to_print) self._print_result = to_print - def set_print_detail(self, to_print: bool): + def set_print_detail(self, to_print: bool = True): """ Set whether or not to print the computation steps of any API query to standard output @param to_print: Boolean; True to print results, False to not print steps. """ self._output.set_print_detail(to_print) - def set_logging(self, to_log: bool): + def set_logging(self, to_log: bool = True): """ Set whether to log computation steps and results. @precondition A file descriptor has been given to the API either in the initializer, or in a call to set_log_fd. @@ -83,7 +99,7 @@ def set_logging(self, to_log: bool): """ self._output.set_log(to_log) - def set_log_fd(self, log_fd): + def set_log_fd(self, log_fd: Optional[TextIO] = None): """ Set the internal file descriptor to log computation steps to, if this option is enabled. @param log_fd: An open file descriptor to write computation details to. @@ -94,35 +110,41 @@ def set_log_fd(self, log_fd): # Distributions # ################################################################ - def p(self, y: Collection[Outcome], x: Collection[Union[Outcome, Intervention]]) -> Optional[float]: + def p(self, y: Collection[Outcome], x: Optional[Collection[Union[Outcome, Intervention]]] = None) -> float: """ - Compute a probability query of Y, given X. - @param y: Head of query; a set of Outcome objects - @param x: Body of query; a set of Outcome and/or Variable objects - @return: The probability of P(Y | X), in the range [0.0, 1.0] - @raise ProbabilityException when the given probability cannot be computed, such as an invalid Outcome + Compute a probability query of Y, given X. All deconfounding and standard inference rules are handled by the + Causal Graph automatically. + @param y: The head of a query; a collection of Outcome objects. + @param x: The body of a query; a collection of Outcome and/or Intervention objects + @return: The probability, P(Y | X), as a float in the range [0.0, 1.0] + @raise AssertionError If two results differ by a significant margin; this indicates a fault with the software, + not with the model or query. + @raise InvalidOutcome If a given Outcome or Intervention does not exist in the model, or the specified value + is not a valid outcome of the respective Variable. + @raise NoDeconfoundingSet If there does not exist a sufficient set of deconfounding variables in the model to + block all backdoor paths from x->y. + @raise ProbabilityIndeterminableException if the query can not be completed for any reason. With a consistent + model, this should never occur. """ try: - # All deconfounding is handled by the CG - result = api_probability_query(self._cg, y, x) + result = api_probability_query(self._cg, y, x if x else set()) self._output.result(result) return result - except AssertionError as e: + except Union[AssertionError, ProbabilityException] as e: self._output.detail(e) - return None + raise e def joint_distribution_table(self) -> ConditionalProbabilityTable: """ - Compute a joint distribution table across the entire model loaded. - @return: A list of tuples, (Outcomes, P), where Outcomes is a unique set of Outcome objects for the model, and - P is the corresponding probability. + Compute a singular ConditionalProbabilityTable across the joint distribution of the model. + @return: A ConditionalProbabilityTable representing the each possible joint outcome of the model. """ + result: ConditionalProbabilityTable = api_joint_distribution_table(self._cg) if self._print_result: - keys = sorted(self._cg.variables.keys()) - self._output.result(f"Joint Distribution Table for: {','.join(keys)}") + self._output.result(f"Joint Distribution Table for: {','.join(sorted(self._cg.variables.keys()))}") self._output.result(f"{result}") return result @@ -131,17 +153,19 @@ def joint_distribution_table(self) -> ConditionalProbabilityTable: # Pathfinding (Backdoor Controller) # ################################################################ - def backdoor_paths(self, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> List[Path]: + def backdoor_paths(self, src: Collection[Vertex], dst: Collection[Vertex], dcf: Optional[Collection[Vertex]] = None) -> Collection[Sequence[str]]: """ - Find all the "backdoor paths" between two sets of variables. - @param src: A set of (string) vertices defined in the loaded model, which will be the source to begin searching - for paths from, to any vertex in dst - @param dst: A set of (string) vertices defined in the loaded model, which are the destination vertices to be - reached by any vertex in src - @param dcf: A set of (string) vertices which will be considered part of the given deconfounding set as a means - of blocking (or potentially unblocking) backdoor paths - @return: A list of lists, where each sub-list is a backdoor path from some vertex in src to some vertex in dst, - and each vertex within the sub-list is a vertex along this path. + Find all backdoor paths between two collections of variables in the model. + @param src: A collection of variables defined in the model, which will be the source to begin searching for + paths from, to any vertex in dst + @param dst: A collection of variables defined in the model, which are the destination vertices to be reached by + any vertex in src + @param dcf: An optional set of variables defined in the model, which will be considered part of the given + deconfounding set as a means of blocking (or potentially unblocking) backdoor paths. To indicate no + deconfounding variables, an empty collection or None can be specified. + @return: A collection of paths, where each path is represented as a sequence of (string) vertices. Each path + is ordered (endpoints in src and dst included) preserving the order of the path. + @raise: IntersectingSets if any of src, dst, and dcf have any intersection. """ result = api_backdoor_paths(self._bc, src, dst, dcf) @@ -153,30 +177,140 @@ def backdoor_paths(self, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) return result - def deconfounding_sets(self, src: set, dst: set) -> List[Set[str]]: + def standard_paths(self, src: Collection[Vertex], dst: Collection[Vertex]) -> Collection[Sequence[str]]: """ - Find the sets of vertices in the loaded model that are sufficient at blocking all backdoor paths from all - vertices in src to any vertices in dst - @param src: A set of (string) vertices defined in the loaded model, acting as the source for backdoor paths - to find, and have blocked by a sufficient deconfounding set of vertices. - @param dst: A set of (string) vertices defined in the loaded model, acting as the destination set of vertices - @return: A list of sets, where each set contains (string) vertices sufficient at blocking all backdoor paths - between any pair of vertices in src X dst + Find all "standard" paths from any pair vertices in the product of some source and destination set of vertices. + @param src: A collection of vertices from which to search for a path to dst. + @param dst: A collection of vertices that will be reached from src. + @return: A collection of paths, where each path is represented as a sequence of string vertices in the graph, + (endpoints in src and dst included), the order of which represents the path. + @raise: IntersectingSets if src and dst have any intersection. + """ + paths = set() + for s, t in product(src, dst): + paths.update(self._bc.all_paths_cumulative(s, t)) + self._output.result(paths) + return paths + + def deconfounding_sets(self, src: Collection[Vertex], dst: Collection[Vertex]) -> Collection[Collection[str]]: """ + Find the sets of vertices in the model that are sufficient at blocking all backdoor paths from all vertices in + src to any vertices in dst + @param src: A collection of vertices defined in the model, acting as the source for backdoor paths to find, + and have blocked by a sufficient deconfounding set of vertices. + @param dst: A collection of vertices defined in the model, acting as the destination set of vertices + @return: A collection of sufficient deconfounding sets, where each deconfounding set consists of a collection of + (string) vertices sufficient at blocking all backdoor paths between any pair of vertices in (src X dst). + @raise: IntersectingSets if src and dst have any intersection. + """ + result = api_deconfounding_sets(self._bc, src, dst) + if self._print_result: for s in result: print("-", ", ".join(map(str, s))) + return result + def independent(self, s: Collection[Vertex], t: Collection[Vertex], dcf: Optional[Collection[Vertex]] = None) -> bool: + """ + Determine if two sets of vertices in the model are conditionally independent, given an optional third set of + deconfounding vertices. + @param s: A collection of vertices in the model. + @param t: A collection of destination vertices in the model. + @param dcf: An optional collection of deconfounding vertices in the model to block backdoor paths between s and + t. This can also be an empty set, or explicitly set to None. + @return: True if all vertices in s and t are conditionally independent. + @raise: IntersectingSets if any of s, t, and dcf have any intersection. + """ + independent = all(self._bc.independent(s, t, dcf if dcf else set()) for (s, t) in product(s, t)) + self._output.result(f"{s} x {t}: {independent}") + return independent + ################################################################ # Graph-Related # ################################################################ - ################################################################ - # Bookkeeping # - ################################################################ + def roots(self) -> Collection[Vertex]: + """ + Find all roots in the graph, where a root is defined as any vertex with no ancestors. This definition contrasts + with some causal inference literature, in which a root is actually defined as any vertex with no descendants. + @return: A collection of all vertices in the graph with no ancestors. + """ + roots = self._g.roots() + self._output.result(roots) + return roots + + def sinks(self) -> Collection[Vertex]: + """ + Find all sinks in the graph, where a sink is defined as any vertex with no descendants. + @return: A collection of all vertices in the graph with no descendants. + """ + sinks = self._g.sinks() + self._output.result(sinks) + return sinks + + def parents(self, v: Vertex) -> Collection[Vertex]: + """ + Find all parents in the graph of some given vertex. A parent is defined as a vertex p such that (p, v) exists + in E, the collection of edges comprising the graph. + @param v: Some vertex defined in the graph. + @return: A collection of all parents of v. + """ + parents = self._g.parents(v) + self._output.result(parents) + return parents + + def children(self, v: Vertex) -> Collection[Vertex]: + """ + Find all children in the graph of some given vertex. A child is defined as a vertex c such that (v, c) exists + in E, the collection of edges comprising the graph. + @param v: Some vertex defined in the graph. + @return: A collection of all children of v. + """ + children = self._g.children(v) + self._output.result(children) + return children + + def ancestors(self, v: Vertex) -> Collection[Vertex]: + """ + Find all ancestors in the graph of some given vertex. An ancestors is defined as a vertex a such that a directed + path (p, x1, ... xi, v) in E, the collection of edges comprising the graph. + @param v: Some vertex defined in the graph. + @return: A collection of all ancestors of v. + """ + ancestors = self._g.ancestors(v) + self._output.result(ancestors) + return ancestors - # TODO - Decorator to require the API to have a model loaded - def require_model(self, function): - ... + def descendants(self, v: Vertex) -> Collection[Vertex]: + """ + Find all descendants in the graph of some given vertex. A descendant is defined as a vertex d such that a + directed path (v, x1, ... xi, d) in E, the collection of edges comprising the graph. + @param v: Some vertex defined in the graph. + @return: A collection of all descendants of v. + """ + descendants = self._g.descendants(v) + self._output.result(descendants) + return descendants + + def topology(self) -> Sequence[Vertex]: + """ + Get a topological ordering of all vertices defined in the graph. A topological ordering is some sequence of the + N vertices in the graph, V1, ..., VN such that for any i, j | 1 <= i < j <= N, Vj is not an ancestor of Vi. + @return: A sequence of vertices defining a topological ordering, V1, ..., VN. + """ + topology = self._g.topology_sort() + self._output.result(topology) + return topology + + def topology_position(self, v: Vertex) -> int: + """ + Find the position of some given vertex in a topological ordering of the graph. + @param v: Some vertex defined in the graph. + @return: An integer i in the range [1, N] representing the index of vertex v such that Vi = v in the + topological ordering of G, given as 1 <= Vi <= Vn. + """ + topology = self._g.get_topology(v) + self._output.result(topology) + return topology diff --git a/do/__init__.py b/do/__init__.py index 604039c..861691f 100644 --- a/do/__init__.py +++ b/do/__init__.py @@ -2,6 +2,7 @@ "api", "config", "probability", + "shpitser", "util", "API", ] diff --git a/do/api/backdoor_paths.py b/do/api/backdoor_paths.py index 2071c8e..76ea13b 100644 --- a/do/api/backdoor_paths.py +++ b/do/api/backdoor_paths.py @@ -1,4 +1,4 @@ -from typing import Collection, Dict, List, Optional +from typing import Collection, Dict, Optional from ..structures.BackdoorController import BackdoorController from ..structures.Types import Path, Vertices @@ -29,7 +29,7 @@ def clean(x): } -def api_backdoor_paths(bc: BackdoorController, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> List[Path]: +def api_backdoor_paths(bc: BackdoorController, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) -> Collection[Path]: """ Compute and return all the backdoor paths from any vertex in src to any vertex in dst @param bc: A Backdoor Controller with a graph conforming to the given source and destination sets. diff --git a/do/api/deconfounding_sets.py b/do/api/deconfounding_sets.py index fa79843..428e207 100644 --- a/do/api/deconfounding_sets.py +++ b/do/api/deconfounding_sets.py @@ -22,7 +22,7 @@ def clean(x): } -def api_deconfounding_sets(bc: BackdoorController, src: Vertices, dst: Vertices) -> List[Set[str]]: +def api_deconfounding_sets(bc: BackdoorController, src: Vertices, dst: Vertices) -> Collection[Collection[str]]: """ Compute and return all the backdoor paths from any vertex in src to any vertex is dst @param bc: A Backdoor Controller with a graph conforming to the given source and destination sets. diff --git a/do/graphs/test.json b/do/graphs/test.json index 9cacb8e..f161b53 100644 --- a/do/graphs/test.json +++ b/do/graphs/test.json @@ -3,22 +3,15 @@ "model": { "Y": { - "latent": true, - "outcomes": ["y", "~y"], - "table": [ - ["y", 0.6], - ["~y", 0.4] - ] + "outcomes": ["y", "~y"] }, "X":{ "outcomes": ["x", "~x"], "parents": ["Y"], "table": [ - ["x", "y", 0.7], - ["x", "~y", 0.2], - ["~x", "y", 0.3], - ["~x", "~y", 0.8] + ["x", 0.7], + ["~x", 0.3] ] } } diff --git a/do/probability/__init__.py b/do/probability/__init__.py deleted file mode 100644 index 9a419cb..0000000 --- a/do/probability/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -__all__ = [ - "structures" -] diff --git a/do/shpitser/__init__.py b/do/shpitser/__init__.py new file mode 100644 index 0000000..bb882f0 --- /dev/null +++ b/do/shpitser/__init__.py @@ -0,0 +1,5 @@ +__all__ = [ + "identification", + "latent", + "structures" +] diff --git a/do/probability/shpitser/identification/IDAlgorithm.py b/do/shpitser/identification/IDAlgorithm.py similarity index 100% rename from do/probability/shpitser/identification/IDAlgorithm.py rename to do/shpitser/identification/IDAlgorithm.py diff --git a/do/probability/shpitser/identification/IDProcessing.py b/do/shpitser/identification/IDProcessing.py similarity index 99% rename from do/probability/shpitser/identification/IDProcessing.py rename to do/shpitser/identification/IDProcessing.py index c0c1f74..e8d5758 100755 --- a/do/probability/shpitser/identification/IDProcessing.py +++ b/do/shpitser/identification/IDProcessing.py @@ -63,7 +63,7 @@ def parse_shpitser(result: Symbol, cg: CausalGraph, known: dict): # Compute probability elif isinstance(result, Distribution): h = result.tables - b = result.given + b = result.parents head = [] for key in h: diff --git a/do/shpitser/identification/__init__.py b/do/shpitser/identification/__init__.py new file mode 100644 index 0000000..6cebf04 --- /dev/null +++ b/do/shpitser/identification/__init__.py @@ -0,0 +1,4 @@ +__all__ = [ + "IDAlgorithm", + "IDProcessing" +] diff --git a/do/probability/shpitser/latent/LatentProjection.py b/do/shpitser/latent/LatentProjection.py similarity index 95% rename from do/probability/shpitser/latent/LatentProjection.py rename to do/shpitser/latent/LatentProjection.py index b2d73f8..9109672 100644 --- a/do/probability/shpitser/latent/LatentProjection.py +++ b/do/shpitser/latent/LatentProjection.py @@ -9,8 +9,8 @@ import itertools -from src.probability.structures.Graph import Graph -from src.probability.shpitser.structures.LatentGraph import LatentGraph +from ...structures.Graph import Graph +from ..structures.LatentGraph import LatentGraph # A method to convert a Graph and a set of unobservable variables into a LatentGraph, # in which all unobservable variables are replaced with bidirected arcs diff --git a/do/probability/shpitser/latent/ProjectionTesting.py b/do/shpitser/latent/ProjectionTesting.py similarity index 100% rename from do/probability/shpitser/latent/ProjectionTesting.py rename to do/shpitser/latent/ProjectionTesting.py diff --git a/do/shpitser/latent/__init__.py b/do/shpitser/latent/__init__.py new file mode 100644 index 0000000..c2e2854 --- /dev/null +++ b/do/shpitser/latent/__init__.py @@ -0,0 +1,4 @@ +__all__ = [ + "LatentProjection", + "ProjectionTesting" +] diff --git a/do/probability/shpitser/structures/Distribution.py b/do/shpitser/structures/Distribution.py similarity index 100% rename from do/probability/shpitser/structures/Distribution.py rename to do/shpitser/structures/Distribution.py diff --git a/do/probability/shpitser/structures/Expressions.py b/do/shpitser/structures/Expressions.py similarity index 100% rename from do/probability/shpitser/structures/Expressions.py rename to do/shpitser/structures/Expressions.py diff --git a/do/probability/shpitser/structures/LatentGraph.py b/do/shpitser/structures/LatentGraph.py similarity index 52% rename from do/probability/shpitser/structures/LatentGraph.py rename to do/shpitser/structures/LatentGraph.py index f8512f8..6467491 100644 --- a/do/probability/shpitser/structures/LatentGraph.py +++ b/do/shpitser/structures/LatentGraph.py @@ -7,12 +7,28 @@ # # ######################################################### -from src.probability.structures.Graph import Graph +from random import choice +from typing import Set + +from ...structures.Graph import Graph # A representation of a Latent Graph, in which we have replaced all unobservable # variables with bidirected arcs between the observable variables +class CComponent(set): + + def __init__(self, v: Set[str]): + super(CComponent, self).__init__() + self.v = v + + def __hash__(self): + return sum(map(hash, self.v)) + + def __str__(self): + return str(self.v) + + class LatentGraph(Graph): """ Represents a graph that has had its unobservable variables replaced with bidirected arcs. @@ -23,33 +39,38 @@ def __init__(self, v, e): # Pre-compute all C components self.c_components = dict() # Map V -> C(V) - seen = set() # Prevent infinite loops, just a set of vertices examined - for vertex in v: - if vertex in seen: # No repeats / infinite loops - continue - - # Initialize current C component to begin - component = set() # C Component is a set of vertices, can construct a subgraph - component.add(vertex) # C component is at least a component of 1, the original vertex - queue = list(self.incoming[vertex] | self.outgoing[vertex]) # Start with everything connected to v - - while len(queue) > 0: - current = queue.pop() - if current in seen: - continue - - # Check if this vertex is connected by a bidirected arc to any vertex already in the component - if any(self.bidirected(item, current) for item in component): - component.add(current) - seen.add(current) - # Tentatively queue all vertices connected to this vertex - queue = list(self.incoming[current] | self.outgoing[current]) - - # Map all elements of the c-component to the set representing the full component - for element in component: - self.c_components[element] = component - - # print(self.incoming) + + all_components = self._all_c_components() + for component in all_components: + for v in component.v: + self.c_components[v] = component + + def _all_c_components(self): + + all_c_components = set() + + v_working = self.v.copy() + + while len(v_working): + + start = choice(list(v_working)) + component = set() + queue = [start] + + while len(queue): + c = queue.pop() + component.add(c) + + for other in v_working - ({c} | component): + if self.bidirected(c, other): + queue.append(other) + + v_working -= component + + all_c_components.add(CComponent(component)) + + assert len(v_working) == 0 + return all_c_components def __call__(self, v: set): """ @@ -70,8 +91,4 @@ def bidirected(self, s, t) -> bool: return (s, t) in self.e and (t, s) in self.e def all_c_components(self) -> list: - no_duplicates = [] - for component in self.c_components.values(): - if component not in no_duplicates: - no_duplicates.append(component) - return no_duplicates + return list(map(lambda c: c.v, set(self.c_components.values()))) diff --git a/do/shpitser/structures/__init__.py b/do/shpitser/structures/__init__.py new file mode 100644 index 0000000..e28a961 --- /dev/null +++ b/do/shpitser/structures/__init__.py @@ -0,0 +1,5 @@ +__all__ = [ + "Distribution", + "Expressions", + "LatentGraph" +] diff --git a/do/structures/BackdoorController.py b/do/structures/BackdoorController.py index 6f676bb..987f6bc 100755 --- a/do/structures/BackdoorController.py +++ b/do/structures/BackdoorController.py @@ -12,9 +12,10 @@ from .Graph import Graph from .Types import Collection, Path, Vertices, Vertex, V_Type +from .Exceptions import IntersectingSets from ..config.settings import Settings -from ..util.helpers import minimal_sets, power_set, str_map +from ..util.helpers import minimal_sets, power_set, str_map, disjoint class BackdoorController: @@ -46,12 +47,15 @@ def backdoor_paths(self, src: Vertices, dst: Vertices, dcf: Optional[Vertices]) string vertices. """ - paths = [] - src_str = str_map(src) dst_str = str_map(dst) dcf_str = str_map(dcf) if dcf else set() + if not disjoint(src_str, dst_str, dcf_str): + raise IntersectingSets + + paths = [] + # Use the product of src, dst to try each possible pairing for s, t in product(src_str, dst_str): paths += self._backdoor_paths_pair(s, t, dcf_str) @@ -93,7 +97,7 @@ def get_backdoor_paths(cur: str, path: list, path_list: list, previous="up") -> if previous == "down": # We can ascend on a controlled collider, OR an ancestor of a controlled collider - if cur in dcf or any(map(lambda v: v in dcf, self.graph.reach(cur))): + if cur in dcf or any(map(lambda v: v in dcf, self.graph.descendants(cur))): for parent in self.graph.parents(cur): path_list = get_backdoor_paths(parent, path + [cur], path_list, "up") @@ -118,7 +122,7 @@ def get_backdoor_paths(cur: str, path: list, path_list: list, previous="up") -> backdoor_paths = get_backdoor_paths(s, [], []) # Filter out the paths that don't "enter" x; see the definition of a backdoor path - return list(filter(lambda l: l[0] in self.graph.children(l[1]) and l[1] != t, backdoor_paths)) + return list(filter(lambda l: len(l) > 2 and l[0] in self.graph.children(l[1]) and l[1] != t, backdoor_paths)) def all_dcf_sets(self, src: Vertices, dst: Vertices) -> List[Collection[str]]: """ @@ -133,7 +137,7 @@ def all_dcf_sets(self, src: Vertices, dst: Vertices) -> List[Collection[str]]: dst_str = str_map(dst) # Can't use anything in src, dst, or any descendant of any vertex in src as a deconfounding/blocking vertex - disallowed_vertices = src_str | dst_str | set().union(*[self.graph.reach(s) for s in src_str]) + disallowed_vertices = src_str | dst_str | set().union(*[self.graph.descendants(s) for s in src_str]) valid_deconfounding_sets = list() diff --git a/do/structures/CausalGraph.py b/do/structures/CausalGraph.py index 9f10d18..ac5ac02 100755 --- a/do/structures/CausalGraph.py +++ b/do/structures/CausalGraph.py @@ -8,12 +8,15 @@ ######################################################### from itertools import product -from typing import Collection, Union +from typing import Collection, Dict, Union from .BackdoorController import BackdoorController +from .ConditionalProbabilityTable import ConditionalProbabilityTable from .Graph import Graph from .Probability_Engine import ProbabilityEngine -from .VariableStructures import Outcome, Intervention +from .Types import V_Type +from .Exceptions import NoDeconfoundingSet +from .VariableStructures import Variable, Outcome, Intervention from ..config.settings import Settings from ..util.OutputLogger import OutputLogger @@ -99,7 +102,8 @@ def strings(s: Collection[Union[Outcome, Intervention]]): # Filter down the deconfounding sets not overlapping with our query body vertex_dcf = list(filter(lambda s: len(set(s) & strings(body)) == 0, deconfounding_sets)) - assert len(vertex_dcf) != 0, "No deconfounding set Z can exist for the given data." + if len(vertex_dcf) == 0: + raise NoDeconfoundingSet # Compute with every possible deconfounding set as a safety measure; ensuring they all match probability = None # Sentinel value diff --git a/do/structures/ConditionalProbabilityTable.py b/do/structures/ConditionalProbabilityTable.py index 9fe24a0..de67287 100755 --- a/do/structures/ConditionalProbabilityTable.py +++ b/do/structures/ConditionalProbabilityTable.py @@ -10,10 +10,10 @@ from numpy import empty from typing import List +from .Exceptions import MissingTableRow from .VariableStructures import Variable, Outcome, Intervention from ..config.settings import Settings -from ..util.ProbabilityExceptions import MissingTableRow class ConditionalProbabilityTable: @@ -21,25 +21,27 @@ class ConditionalProbabilityTable: A basic conditional probability table that reflects the values of one Variable, and any number of conditional values @param variable: A Variable object, representing the variable this table computes a probability for - @param given: A (possibly empty) list of Variables, representing the parents for the variable given + @param parents: A (possibly empty) list of Variables, representing the parents for the variable given @param table_rows: A list of rows in the table, each formatted as [, [", ...],

] """ # Padding units on the left/right sides of each cell padding = 1 - def __init__(self, variable: Variable, given: List[str], table_rows: List): + def __init__(self, variable: Variable, parents: List[str], table_rows: List): self.variable = variable # The LHS of the table, single-variable only - self.given = given # The RHS/body of the table + self.parents = parents # The RHS/body of the table self.table_rows = [] + latent = len(parents) - (len(table_rows) - 2) + # Clean up the rows; Each is formatted as: [outcome of variable, parent_1, parent_2, ..., probability] for row in table_rows: outcome = Outcome(variable.name, row[0]) - parents = row[1:-1] + p = row[1:-1] - self.table_rows.append([outcome, [Outcome(v, x) for v, x in zip(given, parents)], row[-1]]) + self.table_rows.append([outcome, [Outcome(v, x) for v, x in zip(parents[:-latent], p)], row[-1]]) def __str__(self) -> str: """ @@ -50,15 +52,15 @@ def __str__(self) -> str: # Create a snazzy numpy table # Rows: 1 for a header + 1 for each row; Columns: 1 for variable, 1 for each given var, 1 for the probability rows = 1 + len(self.table_rows) - columns = 1 + len(self.given) + 1 + columns = 1 + len(self.parents) + 1 # dtype declaration is better than "str", as str only allows one character in each cell table = empty((rows, columns), dtype=' str: table[i+1][table.shape[1]-1] = "{0:.{prec}f}".format(row[2], prec=Settings.output_levels_of_precision) # Wiggle/Padding, column by column - for column_index in range(1 + len(self.given) + 1): + for column_index in range(1 + len(self.parents) + 1): widest_element = max([len(cell) for cell in table[:, column_index]]) for row_index in range(1 + len(self.table_rows)): cell_value = table[row_index][column_index] diff --git a/do/structures/Exceptions.py b/do/structures/Exceptions.py new file mode 100644 index 0000000..25568c3 --- /dev/null +++ b/do/structures/Exceptions.py @@ -0,0 +1,45 @@ +class ProbabilityException(BaseException): + """ + A base Exception to catch all Probability-code-related Exceptions, + but still crash on any other Exceptions as they should be caught" + """ + pass + + +class ProbabilityIndeterminableException(ProbabilityException): + """ + A slightly more specialized Exception for indicating a failure to compute a probability, and inability to + continue/no further options. This should never occur with a consistent model. + """ + pass + + +class MissingTableRow(ProbabilityException): + """ + Raised when a row is missing from a table, but was expected. Can occur during probability queries. + """ + pass + + +class InvalidOutcome(ProbabilityException): + """ + Raised when attempting to evaluate some query, where a given Outcome or Intervention has been assigned an outcome + that is not possible for that respective variable. + """ + pass + + +class NoDeconfoundingSet(ProbabilityException): + """ + Raised when attempting to perform a query on a set of data for which deconfounding is necessary, but no sufficient + set of variables by which to block backdoor paths is possible. + """ + pass + + +class IntersectingSets(ProbabilityException): + """ + Raised when attempting any backdoor-path related searches, where the source, destination, and/or optional deconfounding + set of vertices intersect. + """ + pass diff --git a/do/structures/Graph.py b/do/structures/Graph.py index 0ffe0ea..166adb7 100755 --- a/do/structures/Graph.py +++ b/do/structures/Graph.py @@ -41,22 +41,8 @@ def __init__(self, v: Set[str], e: Set[Tuple[str, str]]): self.outgoing_disabled = set() self.incoming_disabled = set() - self.topology_map = {vertex: 0 for vertex in v} - - def initialize_topology(vertex: V_Type, depth=0): - """ - Helper function to initialize the ordering of the Variables in the graph - @param vertex: A Variable to set the ordering of, and then all its children - @param depth: How many "levels deep"/variables traversed to reach current - """ - label = to_label(vertex) - self.topology_map[label] = max(self.topology_map[label], depth) - for child in [c for c in self.outgoing[label] if c not in self.incoming[label]]: - initialize_topology(child, depth+1) - - # Begin the topological ordering, which is started from every "root" in the graph - for r in [root_node for root_node in v if len(self.incoming[root_node]) == 0]: - initialize_topology(r) + topology = self.topology_sort() + self.topology_map = {vertex: topology.index(vertex) for vertex in v} def __str__(self) -> str: """ @@ -74,7 +60,14 @@ def roots(self) -> Collection[str]: """ return set([x for x in self.v if len(self.parents(x)) == 0]) - def parents(self, v: V_Type) -> Collection[Union[str, V_Type]]: + def sinks(self) -> Collection[str]: + """ + Get the sinks of the graph G. + @return: A collection of string vertices in G that have no descendants. + """ + return set([x for x in self.v if len(self.children(x)) == 0]) + + def parents(self, v: Union[V_Type, str]) -> Collection[Union[str, V_Type]]: """ Get the parents of v, which may actually be currently controlled @param v: A variable in our graph @@ -86,7 +79,7 @@ def parents(self, v: V_Type) -> Collection[Union[str, V_Type]]: return {p for p in self.incoming[label] if p not in self.outgoing_disabled and p not in self.outgoing[label]} - def children(self, v: V_Type) -> Collection[Union[str, V_Type]]: + def children(self, v: Union[V_Type, str]) -> Collection[Union[str, V_Type]]: """ Get the children of v, which may actually be currently controlled @param v: A variable in our graph @@ -98,7 +91,7 @@ def children(self, v: V_Type) -> Collection[Union[str, V_Type]]: return {c for c in self.outgoing[label] if c not in self.incoming_disabled and c not in self.incoming[label]} - def ancestors(self, v: V_Type) -> Collection[Union[str, V_Type]]: + def ancestors(self, v: Union[V_Type, str]) -> Collection[Union[str, V_Type]]: """ Get the ancestors of v, accounting for disabled vertices @param v: The vertex to find all ancestors of @@ -116,7 +109,7 @@ def ancestors(self, v: V_Type) -> Collection[Union[str, V_Type]]: return ancestors - def reach(self, v: V_Type) -> Collection[Union[str, V_Type]]: + def descendants(self, v: Union[V_Type, str]) -> Collection[Union[str, V_Type]]: """ Get the reach of v, accounting for disabled vertices @param v: The vertex to find all descendants of @@ -134,7 +127,7 @@ def reach(self, v: V_Type) -> Collection[Union[str, V_Type]]: return set(children) - def disable_outgoing(self, *disable: V_Type): + def disable_outgoing(self, *disable: Union[V_Type, str]): """ Disable the given vertices' outgoing edges @param disable: Any number of vertices to disable @@ -142,7 +135,7 @@ def disable_outgoing(self, *disable: V_Type): for v in disable: self.outgoing_disabled.add(to_label(v)) - def disable_incoming(self, *disable: V_Type): + def disable_incoming(self, *disable: Union[V_Type, str]): """ Disable the given vertices' incoming edges @param disable: Any number of vertices to disable @@ -157,7 +150,7 @@ def reset_disabled(self): self.outgoing_disabled.clear() self.incoming_disabled.clear() - def get_topology(self, v: V_Type) -> int: + def get_topology(self, v: Union[V_Type, str]) -> int: """ Determine the "depth" a given Variable is at in a topological sort of the graph @param v: The variable to determine the depth of @@ -182,19 +175,6 @@ def __copy__(self): copied.outgoing_disabled = self.outgoing_disabled.copy() return copied - def topological_variable_sort(self, variables: Collection[Union[str, V_Type]]) -> Collection[Union[str, V_Type]]: - """ - A helper function to abstract what it means to "sort" a list of Variables/Outcomes/Interventions - @param variables: A list of any number of Variable/Outcome/Intervention instances - @return: A list, sorted (currently in the form of a topological sort) - """ - if len(variables) == 0: - return [] - - largest_topology = max(self.get_topology(v) for v in variables) - sorted_variables = [[v for v in variables if self.get_topology(v) == i] for i in range(largest_topology+1)] - return [item for topology_sublist in sorted_variables for item in topology_sublist] - def descendant_first_sort(self, variables: Collection[Union[str, V_Type]]) -> Collection[Union[str, V_Type]]: """ A helper function to "sort" a list of Variables/Outcomes/Interventions such that no element has a @@ -202,8 +182,24 @@ def descendant_first_sort(self, variables: Collection[Union[str, V_Type]]) -> Co @param variables: A list of any number of Variable/Outcome/Intervention instances @return: A sorted list, such that any instance has no ancestor earlier in the list """ - # We can already do top-down sorting, just reverse the answer - return self.topological_variable_sort(variables)[::-1] + return sorted(variables, key=lambda v: self.get_topology(v)) + + def topology_sort(self): + + topology = [] + v = self.v.copy() + e = self.e.copy() + + while len(v) > 0: + + roots = set(filter(lambda s: not any((s, t) in e for t in v), v)) + assert len(roots) > 0 + + topology.extend(sorted(list(roots))) + v -= roots + e -= set(filter(lambda edge: edge[0] in roots, e)) + + return topology def to_label(item: V_Type) -> str: diff --git a/do/structures/Probability_Engine.py b/do/structures/Probability_Engine.py index bd2060d..b3000d3 100755 --- a/do/structures/Probability_Engine.py +++ b/do/structures/Probability_Engine.py @@ -11,12 +11,12 @@ from typing import Collection, Union from .Graph import Graph +from .Exceptions import ProbabilityException, ProbabilityIndeterminableException from .VariableStructures import Outcome, Intervention from ..config.settings import Settings from ..util.OutputLogger import OutputLogger from ..util.helpers import p_str -from ..util.ProbabilityExceptions import ProbabilityException, ProbabilityIndeterminableException class ProbabilityEngine: @@ -46,10 +46,6 @@ def probability(self, head: Collection[Outcome], body: Collection[Union[Outcome, head = set(head) body = set(body) - # Ensure there are no adjustments/interventions in the head - for out in head: - assert not isinstance(out, Intervention), f"Error: {out} is in head; no Interventions should be in head." - # Validate the queried variables and any given # Ensure variable is defined, outcome is possible for that variable, and it's formatted right. for out in head | body: @@ -88,7 +84,7 @@ def _compute(self, head: Collection[Outcome], body: Collection[Union[Outcome, In self.output.detail("Querying:", rep, x=depth) # If the calculation has been done and cached, just return it from storage - if rep in self._stored_computations: + if Settings.cache_computation_results and rep in self._stored_computations: result = self._stored_computations[rep] self.output.detail("Computation already calculated:", rep, "=", result, x=depth) return result @@ -114,6 +110,7 @@ def _compute(self, head: Collection[Outcome], body: Collection[Union[Outcome, In self.output.detail(rep, "=", result, x=depth) self._store_computation(rep, result) return result + except ProbabilityException: # coverage: skip self.output.detail("Failed to resolve by reverse product rule.", x=depth) @@ -147,7 +144,7 @@ def _compute(self, head: Collection[Outcome], body: Collection[Union[Outcome, In # p(a|Cd) = p(d|aC) * p(a|C) / p(d|C) # ################################################# - reachable_from_head = set().union(*[self.graph.reach(outcome) for outcome in head]) + reachable_from_head = set().union(*[self.graph.descendants(outcome) for outcome in head]) descendants_in_rhs = set([var.name for var in body]) & reachable_from_head if descendants_in_rhs: diff --git a/do/structures/Types.py b/do/structures/Types.py index c905963..a752116 100644 --- a/do/structures/Types.py +++ b/do/structures/Types.py @@ -1,11 +1,11 @@ -from typing import Collection, List, NewType, Union +from typing import Collection, Sequence, Union from .VariableStructures import Variable, Outcome, Intervention # General -V_Type = NewType("V_Type", Union[Variable, Outcome, Intervention]) +V_Type = Union[Variable, Outcome, Intervention] # Graph-related -Vertex = NewType("Vertex", Union[V_Type, str]) -Vertices = NewType("Vertices", Collection[Vertex]) -Path = NewType("Path", List[Vertex]) +Vertex = Union[V_Type, str] +Vertices = Collection[Vertex] +Path = Sequence[str] diff --git a/do/structures/VariableStructures.py b/do/structures/VariableStructures.py index 91b8e21..7eb4e2c 100755 --- a/do/structures/VariableStructures.py +++ b/do/structures/VariableStructures.py @@ -49,13 +49,13 @@ class Variable: Has a name, list of potential outcomes, and some list of parent variables. """ - def __init__(self, name: str, outcomes: list, parents: list, reach=None, topological_order=0): + def __init__(self, name: str, outcomes: list, parents: list, descendants=None, topological_order=0): """ A basic Variable for use in a CPT or Causal Graph @param name: The name of the Variable, "X" @param outcomes: A list of all potential outcomes of the variable: ["x", "~x"] @param parents: A list of strings representing the names of all the parents of this Variable - @param reach: An optional set of Variables which are reachable from this Variable + @param descendants: An optional set of Variables which are reachable from this Variable @param topological_order: Used in the ordering of Variables as defined by a topological sort """ self.name = name.strip() @@ -63,9 +63,9 @@ def __init__(self, name: str, outcomes: list, parents: list, reach=None, topolog self.parents = [parent.strip() for parent in parents] self.topological_order = topological_order - if reach is None: - reach = set() - self.reach = reach + if descendants is None: + descendants = set() + self.descendants = descendants def __str__(self) -> str: return self.name + ": <" + ",".join(self.outcomes) + ">, <-- " + ",".join(self.parents) @@ -82,7 +82,7 @@ def __eq__(self, other) -> bool: set(self.parents) == set(other.parents) def __copy__(self): - return Variable(self.name, self.outcomes.copy(), self.parents.copy(), reach=self.reach.copy()) + return Variable(self.name, self.outcomes.copy(), self.parents.copy(), descendants=self.descendants.copy()) def copy(self): return self.__copy__() diff --git a/do/util/ModelLoader.py b/do/util/ModelLoader.py index 1815f4b..5ac31bf 100755 --- a/do/util/ModelLoader.py +++ b/do/util/ModelLoader.py @@ -78,17 +78,18 @@ def parse_model(file: Union[dict, str, Path]): outcomes[name] = v_outcomes outcomes[variable] = v_outcomes - if "latent" in detail and detail["latent"]: + if "table" not in detail: latent.add(name) latent.add(variable) - # Load in the table and construct a CPT - table = detail["table"] - cpt = ConditionalProbabilityTable(variable, v_parents, table) + else: + # Load in the table and construct a CPT + table = detail["table"] + cpt = ConditionalProbabilityTable(variable, v_parents, table) - # Map the name/variable to the table - tables[name] = cpt - tables[variable] = cpt + # Map the name/variable to the table + tables[name] = cpt + tables[variable] = cpt v = set(variables.keys()) e = set() diff --git a/do/util/ProbabilityExceptions.py b/do/util/ProbabilityExceptions.py deleted file mode 100755 index f305c09..0000000 --- a/do/util/ProbabilityExceptions.py +++ /dev/null @@ -1,55 +0,0 @@ -######################################################### -# # -# Probability Exceptions # -# # -# Author: Braden Dubois (braden.dubois@usask.ca) # -# Written for: Dr. Eric Neufeld # -# # -######################################################### - - -class ProbabilityException(Exception): - """ - A base Exception to catch all Probability-code-related Exceptions, - but still crash on any other Exceptions as they should be caught" - """ - pass - - -class ProbabilityIndeterminableException(ProbabilityException): - """ - A slightly more specialized Exception for indicating a failure - to compute a probability, and inability to continue/no further - options - """ - pass - - -class NotFunctionDeterminableException(ProbabilityException): - """ - Raised when a Variable is attempted to be calculated by a - probabilistic function when there is not one. - """ - pass - - -class FunctionFeedbackLoop(ProbabilityException): - """ - Raised when some probabilistic function being evaluated encounters a feedback loop - and will continue into a stack overflow. - """ - pass - - -class ExceptionNotFired(ProbabilityException): - """ - For use in testing; raised when we *expect* an exception to be thrown and one is not. - """ - pass - - -class MissingTableRow(ProbabilityException): - """ - Raised when a row is missing from a table and queried. - """ - pass diff --git a/markov_generator/__init__.py b/markov_generator/__init__.py new file mode 100644 index 0000000..a202312 --- /dev/null +++ b/markov_generator/__init__.py @@ -0,0 +1,5 @@ +__all__ = [ + "gen_distribution", + "gen_graph", + "gen_markov_model" +] diff --git a/debug/dataset_generator/distribution_generation.py b/markov_generator/gen_distribution.py similarity index 96% rename from debug/dataset_generator/distribution_generation.py rename to markov_generator/gen_distribution.py index 4518eb4..bb1fd90 100755 --- a/debug/dataset_generator/distribution_generation.py +++ b/markov_generator/gen_distribution.py @@ -1,7 +1,7 @@ import itertools import random -from src.probability.structures import Graph +from do.structures import Graph def sum_to(x, y): diff --git a/debug/dataset_generator/graph_generator.py b/markov_generator/gen_graph.py old mode 100755 new mode 100644 similarity index 86% rename from debug/dataset_generator/graph_generator.py rename to markov_generator/gen_graph.py index f9ed0f1..c206553 --- a/debug/dataset_generator/graph_generator.py +++ b/markov_generator/gen_graph.py @@ -2,10 +2,10 @@ from string import ascii_uppercase -import random -import itertools +from random import choice, choices, randrange +from itertools import product -from src.probability.structures.Graph import Graph +from do.structures.Graph import Graph def cycle(v, e): @@ -62,7 +62,7 @@ def generate_vertex(): name = "" while len(name) < 3: - name += random.choice(ascii_uppercase) + name += choice(ascii_uppercase) if name in v: name = "" return name @@ -75,7 +75,7 @@ def generate_valid_edge(from_set, to_set): assert len(viable) > 0, "No possible edges!" - temp = random.choice(list(viable)) + temp = choice(list(viable)) tentative_e.remove(temp) if cycle(v, e | {temp}): @@ -91,8 +91,8 @@ def generate_valid_edge(from_set, to_set): print("Generating vertices... {} of {} : ({:.2f}%)".format(len(v), vertices, len(v) / vertices * 100), end='\r') print() - tentative_e = set(itertools.product(v, v)) - inner = set(random.choice(list(v))) + tentative_e = set(product(v, v)) + inner = set(choice(list(v))) def insert_edge(edge): inner.update(edge) @@ -111,4 +111,4 @@ def insert_edge(edge): def randomized_latent_variables(g: Graph): roots = g.roots() - return random.choices(list(roots), k=random.randrange(1, len(roots)+1)) + return choices(list(roots), k=randrange(1, len(roots)+1)) diff --git a/debug/dataset_generator/model_generator.py b/markov_generator/gen_markov_model.py similarity index 86% rename from debug/dataset_generator/model_generator.py rename to markov_generator/gen_markov_model.py index a0978e4..5aa988b 100755 --- a/debug/dataset_generator/model_generator.py +++ b/markov_generator/gen_markov_model.py @@ -5,13 +5,13 @@ from random import randrange from sys import argv -from src.probability.structures.CausalGraph import CausalGraph -from src.validation.inference.inference_tests import model_inference_validation -from src.util.ModelLoader import parse_model +from do.structures.CausalGraph import CausalGraph +from do.util.ModelLoader import parse_model -from graph_generator import generate_graph, randomized_latent_variables -from distribution_generation import generate_distribution +from tests.inference.inference_tests import model_inference_validation +from .gen_graph import generate_graph, randomized_latent_variables +from .gen_distribution import generate_distribution # Default number of graphs to create N = 10 diff --git a/requirements.txt b/requirements.txt index 8da0451..b0d0cd0 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -numpy~=1.19.4 -PyYAML~=5.3.1 +numpy >= 1.19.4 +PyYAML >= 5.4.0 diff --git a/setup.cfg b/setup.cfg index 0fc7713..ec97592 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,13 +1,13 @@ [flake8] exclude = .git, - .github, - __pycache__, - docs, - debug, - do/probability/do_calculus, - do/probability/shpitser, + .github + __pycache__ + debug + do/probability/do_calculus + do/shpitser tests/ + wiki/ [coverage:run] relative_files = True @@ -19,7 +19,9 @@ omit = do/__main__.py do/config/generate_config_docs.py do/config/config_manager.py + do/shpitser/* do/util/OutputLogger.py + do/probability [coverage:report] exclude_lines = @@ -29,4 +31,6 @@ exclude_lines = [tool:pytest] minversion = 6.0 norecursedirs = - debug \ No newline at end of file + build + debug + dist diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..3c44522 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,7 @@ +__all__ = [ + "backdoors", + "inference", + "shpitser", + "test_driver", + "test_util" +] diff --git a/tests/backdoors/backdoor_path_tests.py b/tests/backdoors/backdoor_path_tests.py index b96d340..00208ef 100644 --- a/tests/backdoors/backdoor_path_tests.py +++ b/tests/backdoors/backdoor_path_tests.py @@ -4,6 +4,7 @@ from yaml import safe_load as load from do.structures.BackdoorController import BackdoorController +from do.structures.Exceptions import IntersectingSets from do.util.ModelLoader import parse_model from ..test_util import print_test_result @@ -28,34 +29,41 @@ def model_backdoor_validation(bc: BackdoorController, test_data: dict) -> (bool, for test in test_data["tests"]: - if test["type"] == "backdoor-paths": + expect = test["expect"] + src = test["src"] + dst = test["dst"] + dcf = test["dcf"] if "dcf" in test else set() - expected_paths = list(map(sorted, test["expect"])) + try: - src = test["src"] - dst = test["dst"] - dcf = test["dcf"] if "dcf" in test else set() + if test["type"] == "backdoor-paths": - paths = bc.backdoor_paths(src, dst, dcf) - paths = list(map(sorted, paths)) + if expect != "failure": + expect = list(map(sorted, expect)) - if test["exhaustive"] and len(paths) != len(expected_paths): # coverage: skip - return False, f"{len(paths)} found, expected {len(expected_paths)}: {paths} vs. Exp: {expected_paths}" + paths = bc.backdoor_paths(src, dst, dcf) + paths = list(map(sorted, paths)) - if not all(map(lambda p: p in paths, expected_paths)): # coverage: skip - missing = list(filter(lambda p: p not in paths, expected_paths)) - return False, f"Missing {len(missing)} paths: {missing}" + if test["exhaustive"] and len(paths) != len(expect): # coverage: skip + return False, f"{len(paths)} found, expected {len(expect)}: {paths} vs. Exp: {expect}" - elif test["type"] == "independence": + if not all(map(lambda p: p in paths, expect)): # coverage: skip + missing = list(filter(lambda p: p not in paths, expect)) + return False, f"Missing {len(missing)} paths: {missing}" - expected = test["expect"] - src = test["src"] - dst = test["dst"] - dcf = test["dcf"] if "dcf" in test else set() - independent = bc.independent(src, dst, dcf) + elif test["type"] == "independence": - if independent != expected: # coverage: skip - return False, f"{src} -> {dst} | {dcf}: {independent}, expected {expected}" + independent = bc.independent(src, dst, dcf) + + if independent != expect: # coverage: skip + return False, f"{src} -> {dst} | {dcf}: {independent}, expected {expect}" + + except IntersectingSets: + + if expect != "failure": # coverage: skip + error = f"Unexpected IntersectingSets exception! {src}, {dst}" + print_test_result(False, error) + return False, error return True, "Backdoor tests passed." diff --git a/tests/backdoors/test_files/xi_xj.yml b/tests/backdoors/test_files/xi_xj.yml index afad7a2..15ccace 100644 --- a/tests/backdoors/test_files/xi_xj.yml +++ b/tests/backdoors/test_files/xi_xj.yml @@ -29,3 +29,9 @@ tests: src: [ X5 ] dst: [ X2 ] expect: false + + - type: independence + src: [Xi, X4] + dst: [X4, Xj] + expect: failure + diff --git a/tests/inference/inference_tests.py b/tests/inference/inference_tests.py index fa3464b..babb45e 100755 --- a/tests/inference/inference_tests.py +++ b/tests/inference/inference_tests.py @@ -3,10 +3,10 @@ from yaml import safe_load as load from do.structures.CausalGraph import CausalGraph, Outcome +from do.structures.Exceptions import ProbabilityIndeterminableException, MissingTableRow, NoDeconfoundingSet from do.structures.VariableStructures import parse_outcomes_and_interventions from do.util.helpers import within_precision from do.util.ModelLoader import parse_model -from do.util.ProbabilityExceptions import * from ..test_util import print_test_result @@ -96,8 +96,8 @@ def inference_tests(graph_location: Path) -> (bool, str): result = cg.probability_query(head, body) # Should have raised assertion error... - if expected == "failure": - print_test_result(False, f"Expected test to fail, but it did not! {graph_filename}") + if expected == "failure": # coverage: skip + print_test_result(False, f"Expected test to fail, but it did not! {result} {graph_filename}") test_file_success = False if expected != "failure" and not within_precision(result, expected): # coverage: skip @@ -105,10 +105,14 @@ def inference_tests(graph_location: Path) -> (bool, str): test_file_success = False except AssertionError: - if expected != "failure": + if expected != "failure": # coverage: skip print_test_result(False, f"Unexpected assertion error! {graph_filename}") test_file_success = False + except NoDeconfoundingSet: + if expected != "failure": # coverage: skip + print_test_result(False, f"Unexpected NoDeconfoundingSet exception! {graph_filename}") + if test_file_success: print_test_result(True, f"All tests in {test_file}|{graph_filename} passed") else: # coverage: skip diff --git a/tests/inference/test_files/xi_xj.yml b/tests/inference/test_files/xi_xj.yml index 97d2105..2500d8e 100644 --- a/tests/inference/test_files/xi_xj.yml +++ b/tests/inference/test_files/xi_xj.yml @@ -34,4 +34,8 @@ tests: - head: Xi=xi body: do(Xj=xj) - expect: 0.2669 \ No newline at end of file + expect: 0.2669 + + - head: do(Xi=xi) + body: Xj=xj + expect: 0.85265 diff --git a/tests/shpitser/IDParity.py b/tests/shpitser/IDParity.py index 507afd1..936d9e2 100755 --- a/tests/shpitser/IDParity.py +++ b/tests/shpitser/IDParity.py @@ -11,7 +11,7 @@ import os -from do.probability.shpitser.identification.IDAlgorithm import ID +from do.shpitser.identification.IDAlgorithm import ID from do.probability.shpitser.identification.IDProcessing import parse_shpitser from do.probability.shpitser.latent.LatentProjection import latent_projection from do.probability.shpitser.structures.Distribution import Distribution @@ -20,7 +20,6 @@ from do.probability.structures.VariableStructures import Outcome, Intervention from do.util.helpers import power_set -from do.util.ModelLoader import parse_model # A runnable test to ensure / prove the equivalence of results generated by the backdoor-criterion with the diff --git a/tests/shpitser/IDValidation.py b/tests/shpitser/IDValidation.py index 41d56a6..b02b68a 100755 --- a/tests/shpitser/IDValidation.py +++ b/tests/shpitser/IDValidation.py @@ -9,18 +9,12 @@ # # ######################################################### -import os - -from do.probability.shpitser.identification.IDAlgorithm import ID +from do.shpitser.identification.IDAlgorithm import ID from do.probability.shpitser.identification.IDProcessing import parse_shpitser from do.probability.shpitser.latent.LatentProjection import latent_projection from do.probability.shpitser.structures.Expressions import Distribution from do.probability.structures.CausalGraph import CausalGraph -from do.probability.structures.VariableStructures import Outcome, Intervention - -from do.util.helpers import power_set -from do.util.ModelLoader import parse_model # A runnable test to ensure / prove the equivalence of results generated by the backdoor-criterion with the diff --git a/tests/test_driver.py b/tests/test_driver.py index cbe4053..5d51342 100644 --- a/tests/test_driver.py +++ b/tests/test_driver.py @@ -11,13 +11,14 @@ from do.structures.CausalGraph import CausalGraph from do.structures.ConditionalProbabilityTable import ConditionalProbabilityTable from do.structures.Graph import Graph, to_label +from do.structures.Exceptions import MissingTableRow from do.structures.VariableStructures import Outcome, Variable, Intervention from do.util.helpers import power_set, disjoint, minimal_sets, within_precision from do.util.ModelLoader import parse_model from tests.backdoors.backdoor_path_tests import backdoor_tests -from tests.inference.inference_tests import inference_tests, MissingTableRow +from tests.inference.inference_tests import inference_tests from tests.shpitser.shpitser_tests import shpitser_tests from tests.test_util import print_test_result @@ -186,6 +187,10 @@ def test_roots(): assert sum(map(lambda v: len(graph.parents(v)), graph.roots())) == 0 +def test_descendants(): + assert sum(map(lambda v: len(graph.children(v)), graph.sinks())) == 0 + + def test_parents(): graph.reset_disabled() roots = graph.roots() @@ -216,13 +221,13 @@ def test_ancestors(): for vertex in graph.v: ancestors = graph.ancestors(vertex) for ancestor in ancestors: - assert vertex in graph.reach(ancestor) + assert vertex in graph.descendants(ancestor) def test_reach(): graph.reset_disabled() for vertex in graph.v: - descendants = graph.reach(vertex) + descendants = graph.descendants(vertex) for descendant in descendants: assert vertex in graph.ancestors(descendant) @@ -233,10 +238,10 @@ def test_disable_outgoing(): for v in graph.v: children = graph.children(v) - descendants = graph.reach(v) + descendants = graph.descendants(v) graph.disable_outgoing(v) assert len(graph.children(v)) == 0 - assert len(graph.reach(v)) == 0 + assert len(graph.descendants(v)) == 0 for child in children: assert v not in graph.parents(child) for descendant in descendants: @@ -258,7 +263,7 @@ def test_disable_incoming(): for parent in parents: assert v not in graph.children(parent) for ancestor in ancestors: - assert v not in graph.reach(ancestor) + assert v not in graph.descendants(ancestor) graph.reset_disabled() @@ -340,7 +345,7 @@ def test_variable(): v: Variable assert isinstance(v.name, str) - assert isinstance(v.reach, set) + assert isinstance(v.descendants, set) assert isinstance(v.parents, list) assert isinstance(v.topological_order, int) @@ -351,8 +356,8 @@ def test_variable(): assert v.name == c.name - assert v.reach is not c.reach - assert v.reach == c.reach + assert v.descendants is not c.descendants + assert v.descendants == c.descendants assert v.parents is not c.parents assert v.parents == c.parents @@ -446,6 +451,9 @@ def test_parse_model(): # json parse_model(json_model) + # latent variable + parse_model(Path("do", "graphs", "test.json")) + # validation diff --git a/wiki/Do API.md b/wiki/Do API.md deleted file mode 100644 index 00a6577..0000000 --- a/wiki/Do API.md +++ /dev/null @@ -1,79 +0,0 @@ -Details on the [API](https://en.wikipedia.org/wiki/API) provided in the project. - -This assumes the steps in the [[Installation]] section have been followed, and the project is set up. - -**Note**: For simplicity of import-statements, any examples will *assume* the project was installed as [PyPI](https://pypi.org/project/do-calculus/) package. - -## Table of Contents - -* [Importing the **Do** API](#importing) -* [Loading a Model](#loading-a-model) - -## Importing - -To import the package: - -```python -import do -``` - -**Important**: -- The package name on [PyPI](https://pypi.org/) is [do-calculus](https://pypi.org/project/do-calculus/), but the module to import is called ``do``. - -


- -To import *just* the API: - -```python -from do.API import Do -``` - -**Important**: -- The API, represented as a Python class, is called **Do**. -- **Do** is stored in the file ``API``, so it can be imported from ``do.API``. - -## Loading a Model - -Let's create an instance of the API, using the model from [[Installation]]: - -```python -from do.API import Do - -m = { - "name": "Simple Model", - "model": { - "Y": { - "outcomes": ["y", "~y"], - "table": [ - ["y", 0.7], - ["~y", 0.3] - ] - }, - "X": { - "outcomes": ["x", "~x" ], - "parents": [ "Y" ], - "table": [ - ["x", "y", 0.9], - ["x", "~y", 0.75], - ["~x", "y", 0.1], - ["~x", "~y", 0.25] - ] - } - } -} - -x = Do(m) -``` - -**Important**: -- A regular Python dictionary representation of a [[causal model|Causal Models]] is valid input to **Do**. -- Since **Do** is a class, multiple instances of **Do** - each with their own model - can be instantiated in one project at a time. - -## Further - -Now that a model is successfully loaded, one can begin [[querying distributions|Probability Queries]]. - -See any of the more specific pages: -* [[Probability Queries]] -* [[Backdoor Paths]] -* [[Deconfounding Sets]] diff --git a/wiki/_Sidebar.md b/wiki/_Sidebar.md deleted file mode 100644 index 8761208..0000000 --- a/wiki/_Sidebar.md +++ /dev/null @@ -1,15 +0,0 @@ -### [[Home]] - -### [[Installation]] -* [[PyPI]] -* [[GitHub]] - -### [[Resources]] -* [[Configuration]] -* [[Causal Models]] -* [[Literature]] - -### [[Do API]] -* [[Probability Queries]] -* [[Backdoor Paths]] -* [[Deconfounding Sets]] diff --git a/wiki/__init__.py b/wiki/__init__.py new file mode 100644 index 0000000..a6af2b8 --- /dev/null +++ b/wiki/__init__.py @@ -0,0 +1,4 @@ +__all__ = [ + "pages", + "build_wiki" +] diff --git a/wiki/build_wiki.py b/wiki/build_wiki.py new file mode 100644 index 0000000..dbd1865 --- /dev/null +++ b/wiki/build_wiki.py @@ -0,0 +1,78 @@ +from inspect import getmembers, getdoc, getsource, isclass, ismethod, signature, Signature +from os import chdir +from os.path import abspath, dirname +from pathlib import Path + +from do.API import Do +import do.structures.Exceptions + + +def api_docstring_description(function_name): + + def parameter_signature(parameter_item): + parameter_key, parameter_value = parameter_item + return f"#### {parameter_key}\n```py\n{parameter_value.annotation}\n```" + + name = str(function_name.__name__) + function_signature = signature(function_name, follow_wrapped=True) + + title = f"## Function Signature - Do.{name}\n" + + source = getsource(function_name) + header = source.split("\n")[0][:-1].split(" ", maxsplit=1)[1].strip(" ") + header = f"### Header\n\n```py\n{header}\n```\n" + + parameters = "### Parameters\n\n" + "\n".join(map(parameter_signature, function_signature.parameters.items())) + if len(function_signature.parameters) == 0: + parameters = "### Parameters\n\n**None**\n" + + if function_signature.return_annotation is not Signature.empty: + return_annotation = function_signature.return_annotation + else: + return_annotation = "None" + + return_value = f"### Return Value\n\n```py\n{return_annotation}\n```\n" + + sections = [title, header, parameters, return_value] + + return "\n".join(sections) + "\n
\n" + + +def exception_description(exception_name): + return f"## {exception_name}\n\n> {getdoc(exception_name)}\n\n" + + +def populate_wiki_stubs(): + + chdir(dirname(abspath(__file__))) + + api_signatures = {name: api_docstring_description(method) for (name, method) in + getmembers(Do(model=None), predicate=ismethod)} + + exceptions = {name: exception_description(exception) for (name, exception) in + getmembers(do.structures.Exceptions, predicate=isclass)} + + wiki_dir = Path("pages") + + for file in wiki_dir.iterdir(): + if not file.is_file(): + continue + + text = file.read_text().splitlines() + + found = False + for line, content in enumerate(text): + if content.startswith("STUB"): + stub, replace = content.split("|") + if replace in api_signatures: + text[line] = api_signatures[replace] + elif replace == "exceptions": + text[line] = "\n\n".join(exceptions.values()) + found = True + + if found: + file.write_text("\n".join(text)) + + +if __name__ == "__main__": + populate_wiki_stubs() diff --git a/wiki/pages/Ancestors.md b/wiki/pages/Ancestors.md new file mode 100644 index 0000000..769dec6 --- /dev/null +++ b/wiki/pages/Ancestors.md @@ -0,0 +1,17 @@ +Get the ancestors of some vertex *v*, where an ancestor is some vertex *a* such that a directed path ``(a, ..., v)`` is in the graph. + +STUB|ancestors + +### Example + +```python +from do.API import Do + +model = "models/model1.yml" +do_api = Do(model) + +ancestors = do_api.ancestors("x") +``` + +**Important** +- The ancestors are always returned as a (possibly **empty**) collection of vertices. diff --git a/wiki/Backdoor Paths.md b/wiki/pages/Backdoor Paths.md similarity index 52% rename from wiki/Backdoor Paths.md rename to wiki/pages/Backdoor Paths.md index ef2a768..01bd9e4 100644 --- a/wiki/Backdoor Paths.md +++ b/wiki/pages/Backdoor Paths.md @@ -1,4 +1,6 @@ -How to discover backdoor paths between two sets of variables in a given [[causal model|Causal Models]]. +How to discover backdoor paths between two sets of variables in a given [[Markovian model|Markovian Models]]. + +STUB|backdoor_paths ## Basic Backdoor Paths @@ -14,23 +16,21 @@ model = dict() do_api = Do(model) -backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}, set()) +backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}) for path in backdoor_paths: print(f"Backdoor path from x->y!: {path}") ``` -``backdoor_paths`` returns a list of lists, in which each sub-list consists of the vertices (end-points included) connecting some vertex in the ``src`` set to some vertex in the ``dst`` set. +``backdoor_paths`` returns a collection of paths, in which each path consists of the vertices (end-points included) connecting some vertex in the ``src`` collection to some vertex in the ``dst`` collection. - In this example, the return value would be ``[["x", "z", "y"]]``, as this denotes the singular backdoor path ``x <- z -> y``. **Important** -- The first parameter is the set of source variables from which the pathfinding begins. -- The second parameter is the set of destination variables to which the pathfinding attempts to reach. -- A third parameter is a set of *deconfounding* variables by which to "block" backdoor paths. -- The deconfounding set currently must be given, even if empty. -- Each sublist, a backdoor path, is ordered such that the path order is correctly maintained. +- The first parameter is the collection of source variables from which the pathfinding begins. +- The second parameter is the collection of destination variables to which the pathfinding attempts to reach. +- Each path, a backdoor path, is ordered such that the path order is correctly maintained. -## Deconfounding Variables +## Blocking Backdoor Paths Assuming the same graph as defined [above](#basic-backdoor-paths)... @@ -42,7 +42,7 @@ model = dict() do_api = Do(model) -backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}, set()) +backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}) for path in backdoor_paths: print(f"Backdoor path from x->y!: {path}") @@ -53,4 +53,6 @@ assert len(blocked) == 0 ``` **Important** +- A third parameter is a collection of *deconfounding* variables by which to "block" backdoor paths. +- To represent that there are no deconfounding variables, an *empty* collection of vertices can be given, explicitly set as ``None``, or *omitted entirely*. - If all backdoor paths are successfully blocked, an **empty list** is returned. diff --git a/wiki/pages/Children.md b/wiki/pages/Children.md new file mode 100644 index 0000000..de2b6c1 --- /dev/null +++ b/wiki/pages/Children.md @@ -0,0 +1,17 @@ +Get the children of some vertex *v*, where a child is some vertex *c* such that the edge ``(v, c)`` is in the graph. + +STUB|children + +### Example + +```python +from do.API import Do + +model = "models/model1.yml" +do_api = Do(model) + +children = do_api.children("x") +``` + +**Important** +- The children are always returned as a (possibly **empty**) collection of vertices. diff --git a/wiki/pages/Conditional Independence.md b/wiki/pages/Conditional Independence.md new file mode 100644 index 0000000..a42153b --- /dev/null +++ b/wiki/pages/Conditional Independence.md @@ -0,0 +1,31 @@ +Determine if two sets of variables in the model are conditionally independent. + +STUB|independent + +## Independent + +```python +from do.API import Do + +# Assume this were a detailed model conforming to the above graph... +model = dict() + +do_api = Do(model) + +independent = do_api.independent({"x"}, {"y"}) + +independent_2 = do_api.independent({"x"}, {"y"}, {"z"}) + +independent_3 = do_api.independent({"y"}, {"z"}, dcf=None) + +if independent: + print("Independent!") +else: + print("Not independent!") +``` + +A boolean for whether the two sets are conditionally independent, given some optional deconfounding set, is returned. + +**Important** +- The third parameter, a set of deconfounding variables, can be given, to block backdoor paths from ``s`` to ``t``. +- If there are no deconfounding variables, an empty collection can be provided, *omitted entirely*, or explicitly set to ``None``. diff --git a/wiki/Configuration.md b/wiki/pages/Configuration.md similarity index 100% rename from wiki/Configuration.md rename to wiki/pages/Configuration.md diff --git a/wiki/Deconfounding Sets.md b/wiki/pages/Deconfounding Sets.md similarity index 92% rename from wiki/Deconfounding Sets.md rename to wiki/pages/Deconfounding Sets.md index ab258fe..b660b96 100644 --- a/wiki/Deconfounding Sets.md +++ b/wiki/pages/Deconfounding Sets.md @@ -1,7 +1,7 @@ -# Deconfounding Sets - Finding all deconfounding sets between two sets of vertices. +STUB|deconfounding_sets + ## Basic Example Assuming the basic 3-vertex graph from [[Backdoor Paths]], **G = (V, E)** where: @@ -28,8 +28,7 @@ for deconfounding_set in dcf: ## Usage of Deconfounding Sets -Finding a deconfounding set can be helpful, but any [[probability queries involving interventions|Probability Queries]] automatically handles deconfounding. An easy check to verify each deconfounding set: - +Finding a deconfounding set can be helpful, but any [[probability queries involving interventions|Probability Queries]] automatically handle deconfounding. An easy check to verify each deconfounding set: ```python from do.API import Do diff --git a/wiki/pages/Definitions.md b/wiki/pages/Definitions.md new file mode 100644 index 0000000..fe620aa --- /dev/null +++ b/wiki/pages/Definitions.md @@ -0,0 +1,50 @@ +WORK IN PROGRESS (while we iron out who says what!) + +- Vertices +- Edges +- Path +- Backdoor Path +- Confounding / Deconfounding +- Markovian Model / semi-Markovian model / causal Bayesian network +- Parents(V) + +## Tian & Pearl, 2004 + +> The most common such representation involves a Markovian model (also known as a causal Bayesian network). A Markovian model consists of a DAG G over a set V = {V1, ..., Vn } of variables, called a *causal graph*. + +- Tian & Pearl, 2004, p. 562 + +> The probabilistic interpretation views *G* as representing conditional independence assertions: Each variable is independent of all its non-descendants given its direct parents in the graph. These assertions imply that the joint probability function *P(v) = P(v_1, ..., v_n)* factorizes according to the product *P(v) = Π_{i} P(V_i | pa_i)* where *pa_i* are (values of) the parents of variable *V_i* in the graph. + +- Tian & Pearl, 2004, p. 562 + +*pa_i* is **exclusive**. + +> Let *V* and *U* stand for the sets of observed and unobserved variables, respectively. In this paper, we assume that no *U* variable is a descendant of any *V* variable (called a semi-Markovian model). Then the observed probability distribution, *P(v)*, becomes a mixture of products: *P(v) = Σ_{u} Π_{i} P(v_{i} | pa_i, u^i) P(u)* where *Pa_i* and *U^i* stand for the sets of the observed and unobserved parents of *V_i*, and the summation ranges over all the *U* variables. + +- Tian & Pearl, 2004, p. 562 + +## Santu Tikku, 2018 + +> For a directed graph G = (V, E) and a set of vertices W ⊆ V the sets Pa(W)_G , Ch(W)_G, An(W)_G and De(W)_G denote a set that +contains W in addition to its parents, children, ancestors and descendants in G, respectively. +- Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8 + +Inclusive with given set. + +> Contrary to usual graph theoretic conventions, we call a vertex without any descendants a root (typically referred to as sink). The root set of G is the set of all roots of G, which is {X ∈ V | De(X)G \ {X} = ∅}. The reason for this reversal of the names of sinks and roots is to retain consistency with relevant literature (e.g. Shpitser and Pearl, 2006b) and other +important definitions. + +- Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8 + +> When a DAG is considered, we can relate an ordering of its vertices to its topological structure. This is useful especially when a causal interpretation is associated with the graph. A topological ordering π of a DAG G = (V, E) is an ordering of its vertices, such that if X is an ancestor of Y in G then X < Y in π. The subset of vertices that are less than V_j in π is denoted by V_π^{j-1}. + +- Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8 + +> An algorithm by Kahn (1962) can be used to derive a topological ordering for any DAG. First, we add the vertices without ancestors to the ordering in any order. At the next stage, we add all vertices such that their parents are already contained in the ordering. This is repeated until every vertex has been included. It should be noted that a DAG may have more than one ordering. + +- Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8 + +- Backdoor Paths +- Definitions +- Sorting for ordering diff --git a/wiki/pages/Descendants.md b/wiki/pages/Descendants.md new file mode 100644 index 0000000..c2e7b30 --- /dev/null +++ b/wiki/pages/Descendants.md @@ -0,0 +1,17 @@ +Get the descendants of some vertex *v*, where a descendant is some vertex *d* such that a directed path ``(v, ..., d)`` is in the graph. + +STUB|descendants + +### Example + +```python +from do.API import Do + +model = "models/model1.yml" +do_api = Do(model) + +descendants = do_api.descendants("x") +``` + +**Important** +- The descendants are always returned as a (possibly **empty**) collection of vertices. diff --git a/wiki/pages/Do API.md b/wiki/pages/Do API.md new file mode 100644 index 0000000..fb8c2d5 --- /dev/null +++ b/wiki/pages/Do API.md @@ -0,0 +1,53 @@ +Details on the [API](https://en.wikipedia.org/wiki/API) provided in the project. + +This assumes the steps in the [[Installation]] section have been followed, and the project is set up. + +**Note**: For simplicity of import-statements, any examples will *assume* the project was installed as [PyPI](https://pypi.org/project/do-calculus/) package. + +## Importing + +To import the package: + +```python +import do +``` + +**Important**: +- The package name on [PyPI](https://pypi.org/) is [do-calculus](https://pypi.org/project/do-calculus/), but the module to import is called ``do``. + +
+ +To import *just* the API: + +```python +from do.API import Do +``` + +**Important**: +- The API, represented as a Python class, is called **Do**. +- **Do** is stored in the file ``API``, so it can be imported from ``do.API``. + +## Further + +See any of the specific pages on API functions provided: +* [[Do.\_\_init\_\_|\_\_init\_\_]] +* [[Do.load_model|Loading a Model]] +* [[Do.p|Probability Queries]] +* [[Do.joint_distribution_table|Joint Distribution Table]] +* [[Do.backdoor_paths|Backdoor Paths]] +* [[Do.standard_paths|Standard Paths]] +* [[Do.deconfounding_sets|Deconfounding Sets]] +* [[Do.independent|Conditional Independence]] +* [[Do.roots|Roots]] +* [[Do.sinks|Sinks]] +* [[Do.parents|Parents]] +* [[Do.children|Children]] +* [[Do.ancestors|Ancestors]] +* [[Do.descendants|Descendants]] +* [[Do.topology|Topology]] +* [[Do.topology_position|Topology]] +* [[Do.set_print_result|Output]] +* [[Do.set_print_detail|Output]] +* [[Do.set_logging|Output]] +* [[Do.set_log_fd|Output]] +* [[Exceptions]] diff --git a/wiki/pages/Exceptions.md b/wiki/pages/Exceptions.md new file mode 100644 index 0000000..75a97bc --- /dev/null +++ b/wiki/pages/Exceptions.md @@ -0,0 +1,5 @@ +Details on the custom exceptions that can be raised when using the [[API|Do API]]. + +Exceptions are stored in ``do/structures/Exceptions``. + +STUB|exceptions diff --git a/wiki/GitHub.md b/wiki/pages/GitHub.md similarity index 100% rename from wiki/GitHub.md rename to wiki/pages/GitHub.md diff --git a/wiki/Home.md b/wiki/pages/Home.md similarity index 100% rename from wiki/Home.md rename to wiki/pages/Home.md diff --git a/wiki/Installation.md b/wiki/pages/Installation.md similarity index 100% rename from wiki/Installation.md rename to wiki/pages/Installation.md diff --git a/wiki/pages/Joint Distribution Table.md b/wiki/pages/Joint Distribution Table.md new file mode 100644 index 0000000..4dc62e8 --- /dev/null +++ b/wiki/pages/Joint Distribution Table.md @@ -0,0 +1,18 @@ +Get a joint distribution table for all possible combination of outcomes for all variables in the model. + +STUB|joint_distribution_table + +## Example + +```python +from do.API import Do + +model = "models/model1.yml" +do_api = Do(model) + +table = do_api.joint_distribution_table() +``` + +**Important** +- This table can be *extremely* computationally intensive if there are many outcomes and/or many variables in the model. +- To improve performance, ensure that [[computation-caching is enabled|Configuration]]. diff --git a/wiki/Literature.md b/wiki/pages/Literature.md similarity index 75% rename from wiki/Literature.md rename to wiki/pages/Literature.md index 151d30b..dd2087e 100644 --- a/wiki/Literature.md +++ b/wiki/pages/Literature.md @@ -9,3 +9,8 @@ TODO - References galore to backdoor paths, deconfounding, and more! ## Papers TODO - Shpitser & Pearl 2004, Thesis, and a few more. + +## Graph Related + +Kahn, A. B. (1962). Topological sorting of large networks. Communications of the ACM, +5(11):558–562. diff --git a/wiki/pages/Loading a Model.md b/wiki/pages/Loading a Model.md new file mode 100644 index 0000000..43372b3 --- /dev/null +++ b/wiki/pages/Loading a Model.md @@ -0,0 +1,43 @@ +How to load a model into an instance of the API. + +All examples will be using the model from [[Markovian Models]]. + +STUB|load_model + +As shown in [[\_\_init\_\_|\_\_init\_\_]], the following forms of models are acceptable: +- a Python dictionary +- a string path to a file +- a [pathlib.Path](https://docs.python.org/3/library/pathlib.html#pathlib.Path) object + +One can have instantiated a **Do**, and wish to replace the model loaded, or one may have deferred providing a model at the time of instantiation, and wish to provide one now. + +## Examples + +### Swapping a Model + +```python +from do.API import Do +from pathlib import Path + +model_1 = "data/graph1.yml" + +do_api = Do(model=model_1) + +model_2 = Path("data/graph2.yml") + +do_api.load_model(model_2) +``` + +**Important**: +- One can mix and match the model argument provided when swapping models; a dictionary could be given, then a path, or vice versa. + +### Deferred Loading a Model + +```python +from do.API import Do + +do_api = Do(model=None) + +model_path = "data/graph1.yml" +do_api.load_model(model_path) +``` diff --git a/wiki/Causal Models.md b/wiki/pages/Markovian Models.md similarity index 56% rename from wiki/Causal Models.md rename to wiki/pages/Markovian Models.md index 4409c8c..b776825 100755 --- a/wiki/Causal Models.md +++ b/wiki/pages/Markovian Models.md @@ -1,18 +1,19 @@ -This document outlines the structure of how to create a causal model for use in the package, such as in the [[API|Do API]]. +This document outlines the structure of how to create and represent a Markovian model for use in the package, such as in the [[API|Do API]]. Models are inherently **DAGs** (Directed Acyclic Graph), where each variable in a model is also represented as a vertex in the DAG. ## Model Structure A model is represented as dictionary, mapping the name of one variable in the model to its detailed information. -- A variable's detailed information consists of the following key-value pairs: - - ``outcomes``: all discrete outcomes the variable may take, represented as a list. - - ``parents``: parent variables (also defined in the model) of the current variable, represented as a list. - - If the variable is a root - that is, there are no parents - the list can be left empty, or this key can be absent from this variable entirely. - - ``table``: a list of lists, representing the probability distribution of the variable. Each sub-list is one unique combination of outcomes of the given variable and each of its parents, along with a probability between 0 and 1. - - The order of the parent variables must correspond to the order given in the ``parents`` entry, if there are any. - - ``latent``: a boolean representing whether the variable is unobservable in the given model. - - If this key is absent, it will be assumed ``False`` - that is, assumed observable. + +A variable's detailed information consists of the following key-value pairs: +- ``outcomes``: all discrete outcomes the variable may take, represented as a list. +- ``parents``: parent variables (also defined in the model) of the current variable, represented as a list. + - If the variable is a root - that is, there are no parents - the list can be left empty, or this key can be absent from this variable entirely. + - If there are *unobservable parents* - that is, *latent variables* - they must be listed *after* all observable parents, but in any particular order. +- ``table``: a list of lists, representing the probability distribution of the variable. Each sub-list is one unique combination of outcomes of the given variable and each of its parents, along with a probability between 0 and 1. + - The order of the observable parent variables must correspond to the order given in the ``parents`` entry, if there are any. + - to represent a latent variable, **omit** this key. Additionally, a key ``name`` can be given, corresponding to an arbitrary name for the model. @@ -54,7 +55,6 @@ model: ``` This represents the basic graph of a single edge, (Y, X). -- In the absence of any ``latent`` attributes, both variables are observable. - ``Y`` has no parents, it is a root. #### Dictionary @@ -86,4 +86,4 @@ m = { } ``` -Both representations be used in the [[API|Do API]]. +Both representations can be used in the [[API|Do API]]; if a string path to a file is given, an attempt will be made to load and parse it. diff --git a/wiki/pages/Output.md b/wiki/pages/Output.md new file mode 100644 index 0000000..95c72c7 --- /dev/null +++ b/wiki/pages/Output.md @@ -0,0 +1,102 @@ +Control over the output that is printed to standard output from usage of the [[API|Do API]]. + +Here, we will make clear *two* categorizations of output: +1. **Result**: the final result returned from some computation +2. **Detail**: any intermediate information involved in some computation + +## Print Result + +Set whether to print the result of an API call to standard output. + +STUB|set_print_result + +### Example + +```python +from do.API import Do + +do_api = Do("models/model1.yml") + +do_api.set_print_result(True) + +# queries here... +``` + +
+ +## Print Detail + +Set whether to print the detail of an API call to standard output. + +STUB|set_print_detail + +### Example + +```python +from do.API import Do + +do_api = Do("models/model1.yml") +do_api.set_print_detail(True) + +# queries here... +``` + +
+ +## Set Logging + +Set whether to log results and details to some file descriptor. + +Requires a file descriptor to have been set when [[instantiating the API|Loading a Model]], or [explicitly set](#set-log-fd). + +STUB|set_logging + +### Example + +```python +from pathlib import Path +from do.API import Do + +file = Path("output/model1-output") +f = file.open("w") + +do_api = Do("models/model1.yml", log_fd=f) + +do_api.set_logging(True) + +# queries here... + +f.close() +``` + +**Important** +- If logging is enabled, What is written to the file descriptor set will be all results and details will be written to the file, regardless of settings for whether to *print* results and/or details. + +
+ +## Set Log FD + +Set an open file descriptor as the file descriptor to write to. + +STUB|set_log_fd + +### Example + +```python +from pathlib import Path +from do.API import Do + +do_api = Do("models/model1.yml") + +file = Path("output/model1-output") +f = file.open("w") + +do_api.set_log_fd(f) + +# queries here... + +f.close() +``` + +**Important** +- For this, *any* open file descriptor can be given, as long as the file descriptor object given *has write permission*, and supports a ``.write()`` method that **takes a string as input**. diff --git a/wiki/pages/Parents.md b/wiki/pages/Parents.md new file mode 100644 index 0000000..e97a011 --- /dev/null +++ b/wiki/pages/Parents.md @@ -0,0 +1,17 @@ +Get the parents of some vertex *v*, where a parent is some vertex *p* such that the edge ``(p, v)`` is in the graph. + +STUB|parents + +### Example + +```python +from do.API import Do + +model = "models/model1.yml" +do_api = Do(model) + +parents = do_api.parents("x") +``` + +**Important** +- The parents are always returned as a (possibly **empty**) collection of vertices. diff --git a/wiki/Probability Queries.md b/wiki/pages/Probability Queries.md similarity index 86% rename from wiki/Probability Queries.md rename to wiki/pages/Probability Queries.md index a98d030..4cd91f9 100644 --- a/wiki/Probability Queries.md +++ b/wiki/pages/Probability Queries.md @@ -1,9 +1,13 @@ How to measure probabilities using the **Do** API. -## Making a Query +STUB|p + +## Basic Query For this, we will query a standard probability through the **Do** API. +This will use the [[simple model from Markovian Models|Markovian Models]]. + ```python from do.API import Do from do.structures.VariableStructures import Outcome @@ -36,7 +40,7 @@ do_api = Do(m) x = Outcome("X", "x") y = Outcome("Y", "y") -x_alone = do_api.p({x}, set()) +x_alone = do_api.p({x}) print(f"The probability of X=x, P(X=x) = {x_alone:5}") x_if_y = do_api.p({x}, {y}) @@ -52,10 +56,12 @@ print(f"The probability of P(X=x, Y=y) = {x_and_y:5}") - The Outcome class is located at ``do.structures.VariableStructures``. - The API function provided in **Do** to query a probability is the ``p`` function. - **Do.p** takes *two* arguments, a *Collection of outcome outcomes*, and a *Collection of "given" outcomes*. -- **Do.p** requires an empty set as its "given" outcomes even if there are none. +- **Do.p** can take an *empty collection* if there are no "given" outcomes. +- **Do.p** can *completely omit* the "given" collection of outcomes if there are none. +- **Do.p** can have its "given" collection of outcomes explicitly set to ``None`` if there are none. - **Do.p** returns a *float*, between [0, 1]. -## Querying an Interventional Distribution +## Querying an Interventional Measurement Assume the existence of some more complicated model, ``m_confounded``, in which multiple variables are susceptible to *backdoor paths* or *confounding*, but a sufficient *deconfounding set* can block all backdoor paths. - See [[Literature]] for more details on *backdoor paths* and *deconfounding*. diff --git a/wiki/PyPI.md b/wiki/pages/PyPI.md similarity index 100% rename from wiki/PyPI.md rename to wiki/pages/PyPI.md diff --git a/wiki/Resources.md b/wiki/pages/Resources.md similarity index 53% rename from wiki/Resources.md rename to wiki/pages/Resources.md index 4eb50c3..3e48d99 100644 --- a/wiki/Resources.md +++ b/wiki/pages/Resources.md @@ -1,5 +1,5 @@ A collection of resources for information the project, or *do-calculus* generally. - +* [[Definitions]]: Details on various definitions and terminology used within the project. +* [[Markovian Models]]: Details on the structure of a Markovian model for use in the package. * [[Configuration]]: Settings for the project. -* [[Causal Models]]: Details on the structure of a causal model for use in the package. * [[Literature]]: Books and papers referenced in the implementation of this project. diff --git a/wiki/pages/Roots.md b/wiki/pages/Roots.md new file mode 100644 index 0000000..4bf8e36 --- /dev/null +++ b/wiki/pages/Roots.md @@ -0,0 +1,17 @@ +Get all roots in the graph, where a root is defined as a vertex with no parent. + +STUB|roots + +### Example + +```python +from do.API import Do + +model = "models/model1.yml" +do_api = Do(model) + +roots = do_api.roots() +``` + +**Important** +- The roots are always returned as a collection of vertices. diff --git a/wiki/pages/Sinks.md b/wiki/pages/Sinks.md new file mode 100644 index 0000000..84c347a --- /dev/null +++ b/wiki/pages/Sinks.md @@ -0,0 +1,17 @@ +Get all sinks in the graph, where a sink is defined as vertex with no child. + +STUB|sinks + +### Example + +```python +from do.API import Do + +model = "models/model1.yml" +do_api = Do(model) + +sinks = do_api.sinks() +``` + +**Important** +- The sinks are always returned as a collection of vertices. diff --git a/wiki/pages/Standard Paths.md b/wiki/pages/Standard Paths.md new file mode 100644 index 0000000..829eab3 --- /dev/null +++ b/wiki/pages/Standard Paths.md @@ -0,0 +1,18 @@ +Get traditional, directed paths from the *DAG* encoded in the model. + +This includes all standard, directed paths as well-defined in graph terminology, and explicitly does **not** include any backdoor paths. + +STUB|standard_paths + +## Example + +```python +from do.API import Do + +do_api = Do("models/model1.yml") + +paths = do_api.standard_paths({"x", "y"}, {"z"}) +``` + +**Important** +- Since collections of vertices are provided, any path from some vertex in ``src`` to some vertex in ``dst`` is included in the returned collection. diff --git a/wiki/pages/Topology.md b/wiki/pages/Topology.md new file mode 100644 index 0000000..42e95ed --- /dev/null +++ b/wiki/pages/Topology.md @@ -0,0 +1,45 @@ +Details on getting the topology of the model. + +See details in [[Definitions]] for information on the topology-ordering. + +## Topology + +Getting a topological ordering of the model. + +STUB|topology + +### Example + +```python +from do.API import Do + +do_api = Do("models/model1.yml") + +topology = do_api.topology() + +for v in topology: + print(v) +``` + +**Important** +- A sequence of *N* vertices is returned. + +
+ +## Topology Position + +Get the position of some vertex in the model in its topological ordering. + +STUB|topology_position + +```python +from do.API import Do + +do_api = Do("models/model1.yml") + +position = do_api.topology("x") +print(position) +``` + +**Important** +- The topological ordering begins at V1, so the value returned for a graph of N vertices is in the range \[1, N\]. diff --git a/wiki/pages/_Sidebar.md b/wiki/pages/_Sidebar.md new file mode 100644 index 0000000..a9df452 --- /dev/null +++ b/wiki/pages/_Sidebar.md @@ -0,0 +1,34 @@ +### [[Home]] + +### [[Installation]] +* [[PyPI]] +* [[GitHub]] + +### [[Resources]] +* [[Definitions]] +* [[Markovian Models]] +* [[Configuration]] +* [[Literature]] + +### [[Do API]] +* [[Do.\_\_init\_\_|\_\_init\_\_]] +* [[Do.load_model|Loading a Model]] +* [[Do.p|Probability Queries]] +* [[Do.joint_distribution_table|Joint Distribution Table]] +* [[Do.backdoor_paths|Backdoor Paths]] +* [[Do.standard_paths|Standard Paths]] +* [[Do.deconfounding_sets|Deconfounding Sets]] +* [[Do.independent|Conditional Independence]] +* [[Do.roots|Roots]] +* [[Do.sinks|Sinks]] +* [[Do.parents|Parents]] +* [[Do.children|Children]] +* [[Do.ancestors|Ancestors]] +* [[Do.descendants|Descendants]] +* [[Do.topology|Topology]] +* [[Do.topology_position|Topology]] +* [[Do.set_print_result|Output]] +* [[Do.set_print_detail|Output]] +* [[Do.set_logging|Output]] +* [[Do.set_log_fd|Output]] +* [[Exceptions]] diff --git a/wiki/pages/__init__.md b/wiki/pages/__init__.md new file mode 100644 index 0000000..19597a9 --- /dev/null +++ b/wiki/pages/__init__.md @@ -0,0 +1,119 @@ +How to instantiate the **Do** API. + +STUB|__init__ + +## Examples + +One can provide a model, and specify what details and results to print and/or log to a file. + +```python +from pathlib import Path +from do.API import Do + +file = Path("output/model1.yml") +f = file.open("w") + +do_api = Do( + model=m, + print_detail=False, + print_result=True, + log=True, + log_fd=f +) +``` + +**Note**: Here, ``m`` is not defined, but multiple examples will follow, detailing acceptable forms of ``m``. + +**Important**: +- Since **Do** is a class, multiple instances of **Do** - each with their own model - can be instantiated in one project at a time. +- Various parameters of outputting and logging details can be [[tweaked|Output]]. + +### Model: Python dictionary + +One can have a model represented as a dictionary, and pass this as a *constructor argument* to instantiate **Do**. + +```python +from pathlib import Path +from do.API import Do + +m = { + "name": "Simple Model", + "model": { + "Y": { + "outcomes": ["y", "~y"], + "table": [ + ["y", 0.7], + ["~y", 0.3] + ] + }, + "X": { + "outcomes": ["x", "~x" ], + "parents": [ "Y" ], + "table": [ + ["x", "y", 0.9], + ["x", "~y", 0.75], + ["~x", "y", 0.1], + ["~x", "~y", 0.25] + ] + } + } +} + +file = Path("output/model1.yml") +f = file.open("w") + +do_api = Do( + model=m, + print_detail=False, + print_result=True, + log=True, + log_fd=f +) +``` + +**Important** +- A regular Python dictionary representation of a [[Markovian model|Markovian Models]] is valid input to **Do**. + +### Model: string path to a file + +One can also have a file contain a valid model, and pass the *path* to the file as input as well. + +```python +from do.API import Do + +model_path = "data/graph1.yml" +do_api = Do(model_path) # All good! + +fake_path = "does/not/exist.file" +do_api_2 = Do(fake_path) # This will raise an exception! +``` + +**Important**: +- A *string path* is valid to pass to **Do**. +- If the file cannot be found or parsed, an exception will be raised. + +## Model: pathlib.Path + +One can also provide a [Path](https://docs.python.org/3/library/pathlib.html#pathlib.Path) object, as part of the [pathlib library](https://docs.python.org/3/library/pathlib.html). +- **Trivia**: Providing a [string path to a file](#model-string-path-to-a-file) works by attempting to create a [Path](https://docs.python.org/3/library/pathlib.html#pathlib.Path) from the string path. + +```python +from pathlib import Path +from do.API import Do + +model_path = Path("graph2.yml") +do_api = Do(model_path) +``` + +## Model: None + +One can also create an instance of **Do**, in which no model is provided, and instead [[defer loading the model until later|Loading a Model]]. + +```python +from do.API import Do + +do_api = Do(model=None, print_result=True) +``` + +**Important** +- If no model is loaded, any relevant API functionality will fail until a model [[has been loaded|Loading a Model]].