diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..8aa2645 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) [year] [fullname] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..f13b725 --- /dev/null +++ b/README.rst @@ -0,0 +1,34 @@ +GraphScraper +================= + +GraphScraper is a Python 3 library that contains a base graph implementation designed +to be turned into a web scraper for graph data. It has two major features: +1) The graph automatically manages a database (using either SQLAlchemy or + Flask-SQLAlchemy) where it stores all the nodes and edges the graph has seen. +2) The base graph implementation provides hook methods that, if implemented, + turn the graph into a web scraper. + +Demo - igraph +------------------ + +Besides the actual graph implementation, a working demo using the igraph_ library +is also included that shows how you can implement and use an actual graph-scraper. +Instead of actual web-scraping, this demo uses igraph graph instance as the "remote" +source to scrape data from. + +Dependencies +----------------- + +The project requires SQLAlchemy_ or Flask-SQLAlchemy_ to be installed. +If you wish to the included igraph-based graph implementation, you will also need +igraph_ library. + +Contribution +----------------- + +Any form of constructive contribution (feedback, features, bug fixes, tests, additional +documentation, etc.) is welcome. + +.. _Flask-SQLAlchemy: http://flask-sqlalchemy.pocoo.org/ +.. _igraph: http://igraph.org +.. _SQLAlchemy: https://www.sqlalchemy.org/ diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..9644355 --- /dev/null +++ b/setup.py @@ -0,0 +1,37 @@ +from codecs import open +from os import path +from setuptools import setup, find_packages + +# Get the long description from the README file +with open(path.join(path.abspath(path.dirname(__file__)), 'README.rst'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name="graphscraper", + version="0.1.0", + description="Graph implementation that loads graph data (nodes and edges) from external sources " + "and caches the loaded data in a database using sqlalchemy or flask-sqlalchemy.", + long_description=long_description, + url="https://github.com/volfpeter/graphscraper", + author="Peter Volf", + author_email="do.volfp@gmail.com", + license="MIT", + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Programming Language :: Python :: 3", + "Topic :: Database", + "Topic :: Internet", + "Topic :: Scientific/Engineering", + "Topic :: Software Development :: Libraries", + "Topic :: Utilities" + ], + keywords="graph network webscraping sqlalchemy database db caching", + package_dir={"": "src"}, + packages=find_packages("src"), + python_requires=">=3" +) diff --git a/src/graphscraper/__init__.py b/src/graphscraper/__init__.py new file mode 100755 index 0000000..4acfe22 --- /dev/null +++ b/src/graphscraper/__init__.py @@ -0,0 +1,23 @@ +""" +The root package of the graphscraper project. +""" + +# Imports +# ------------------------------------------------------------ + +# Expose the core modules for ease of use. +from graphscraper import base +from graphscraper import db + +# No need to expose the eventdispatcher module. It won't be needed by the user. +# from graphscraper import eventdispatcher + +# Do not import the rest of the modules, because they have dependencies that +# might not be available on the user's machine. +# from graphscraper import demo +# from graphscraper import igraphwrapper + +# Module constants +# ------------------------------------------------------------ + +__author__ = 'Peter Volf' diff --git a/src/graphscraper/base.py b/src/graphscraper/base.py new file mode 100755 index 0000000..dda5024 --- /dev/null +++ b/src/graphscraper/base.py @@ -0,0 +1,746 @@ +""" +Base graph implementation that builds a graph by loading data from some data source, caching the +retrieved data in a database of your choice in the meantime. + +The building blocks of the graph implementation are nodes and edges that are stored in node and +edge lists respectively that are bound together by the graph itself. + +You can build on this base graph implementation by implementing i) the graph method that turns a +(potentially incorrect) node name into a valid node name and ii) the node method that returns the +neighbors of the given node. +""" + +# Imports +# ------------------------------------------------------------ + +from operator import attrgetter +from typing import Dict, List, Optional, Tuple, Union + +from graphscraper.db import DBEdge, DBNode, GraphDatabaseInterface +from graphscraper.eventdispatcher import Event, EventDispatcher + +# Module constants +# ------------------------------------------------------------ + +__author__ = 'Peter Volf' + +# Classes +# ------------------------------------------------------------ + + +class Node(EventDispatcher): + """ + Node implementation that is able to load its neighbors from a data source and + store the loaded data in a local database. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, graph: "Graph", index: int, name: str): + """ + Initialization. + + Arguments: + graph (Graph): The graph that owns this node. + index (int): The unique index of the node in the graph. + name (str): The name of the node. + """ + EventDispatcher.__init__(self) + + self._are_neighbors_loaded: bool = False + """Whether the neighbors of the node have been loaded from the local cache.""" + self._graph: "Graph" = graph + """The graph that owns this node.""" + self._index: int = index + """The unique index of the node in the graph.""" + self._neighbors: Dict[(int, int), Edge] = {} + """Dictionary mapping node index tuples to the corresponding edge.""" + + self.are_neighbors_cached: bool = False + """Whether the neighbors of the node have already been added to the local cache.""" + self.name = name + """The name of the node.""" + + # Properties + # ------------------------------------------------------------ + + @property + def degree(self) -> int: + """ + The degree of the node. + """ + self._load_neighbors() + return len(self._neighbors) + + @property + def index(self) -> int: + """ + The unique index of the node in the graph. + """ + return self._index + + @property + def neighbors(self) -> List['Node']: + """ + The list of neighbors of the node. + """ + self._load_neighbors() + return [edge.source if edge.source != self else edge.target + for edge in self._neighbors.values()] + + # Public methods + # ------------------------------------------------------------ + + def add_neighbor(self, edge: "Edge") -> None: + """ + Adds a new neighbor to the node. + + Arguments: + edge (Edge): The edge that would connect this node with its neighbor. + """ + if edge is None or (edge.source != self and edge.target != self): + return + + if edge.source == self: + other: Node = edge.target + elif edge.target == self: + other: Node = edge.source + else: + raise ValueError("Tried to add a neighbor with an invalid edge.") + + edge_key: Tuple(int, int) = edge.key + + # The graph is considered undirected, check neighbor existence accordingly. + if self._neighbors.get(edge_key) or self._neighbors.get((edge_key[1], edge_key[0])): + return # The neighbor is already added. + + self._neighbors[edge_key] = edge + self.dispatch_event(NeighborAddedEvent(other)) + + # Private methods + # ------------------------------------------------------------ + + def _load_neighbors(self) -> None: + """ + Loads all neighbors of the node from the local database and + from the external data source if needed. + """ + if not self.are_neighbors_cached: + self._load_neighbors_from_external_source() + db: GraphDatabaseInterface = self._graph.database + db_node: DBNode = db.Node.find_by_name(self.name) + db_node.are_neighbors_cached = True + db.session.commit() + self.are_neighbors_cached = True + if not self._are_neighbors_loaded: + self._load_neighbors_from_database() + + def _load_neighbors_from_database(self) -> None: + """ + Loads the neighbors of the node from the local database. + """ + self._are_neighbors_loaded = True + + graph: Graph = self._graph + neighbor_names: List[str] = graph.database.Node.find_by_name(self.name).neighbor_names + nodes: NodeList = graph.nodes + + for name in neighbor_names: + graph.add_node(name) + neighbor: Node = nodes.get_node_by_name(name) + graph.add_edge(self, neighbor, 1, False) + + def _load_neighbors_from_external_source(self) -> None: + """ + Loads the neighbors of the node from the external data source. + """ + # The default implementation is empty, Node relies on a database filled with data. + # Override this method in child classes to let the node load its neighbors from + # an external data source. + pass + + +class Edge(object): + """ + Graph edge. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, source: Node, target: Node, weight: float = 1): + """ + Initialization. + + Arguments: + source (Node): The source node of the edge. + target (Node): The target node of the edge. + weight (float): The weight of the edge. + """ + if not isinstance(source, Node): + raise TypeError("Invalid source node: {}".format(source)) + if not isinstance(target, Node): + raise TypeError("Invalid target node: {}".format(target)) + if (not isinstance(weight, float) and not isinstance(weight, int)) or weight <= 0: + raise TypeError("Invalid edge weight: {}".format(weight)) + if source.index == target.index: + raise ValueError("Creating a loop edge is not allowed.") + + self._source: Node = source + """The source node of the edge.""" + self._target: Node = target + """The target node of the edge.""" + self._weight: float = weight + """The weight of the edge.""" + + source.add_neighbor(self) + target.add_neighbor(self) + + # Properties + # ------------------------------------------------------------ + + @property + def key(self) -> Tuple[int, int]: + """ + The unique identifier of the edge consisting of the indexes of its + source and target nodes. + """ + return self._source.index, self._target.index + + @property + def source(self) -> Node: + """ + The source node of the edge. + """ + return self._source + + @property + def target(self) -> Node: + """ + The target node of the edge. + """ + return self._target + + @property + def weight(self) -> float: + """ + The weight of the edge. + """ + return self._weight + + +class NodeList(object): + """ + Container that stores `Node` instances. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, graph: "Graph"): + """ + Initialization. + + Arguments: + graph (Graph): The graph the node list belongs to. + """ + self._graph: Graph = graph + """The graph the node list belongs to.""" + self._nodes: Dict[int, Node] = {} + """Storage for the nodes of node list as a node index to node instance mapping.""" + self._node_name_map: Dict[str, Node] = {} + """Dictionary that maps node names to node instances.""" + + # Special methods + # ------------------------------------------------------------ + + def __len__(self) -> int: + """ + Returns the number of items in the container. + """ + return len(self._nodes) + + def __getitem__(self, key: Union[int, str]) -> Node: + """ + Returns the node corresponding to the given key. + + If the given key is an integer, then the node with the given index will be returned. + + If the given key is a string, then the node with the given name will be returned. + + Arguments: + key (Union[int, str]): The key that identifies the node to return. + + Raises: + IndexError: If the index is invalid or out of range. + """ + node: Node = None + if isinstance(key, int): + node = self._nodes.get(key) + if isinstance(key, str): + node = self._node_name_map.get(key) + + if node is None: + raise IndexError("Invalid key.") + + return node + + # Public methods + # ------------------------------------------------------------ + + def add_node_by_name(self, node_name: str) -> None: + """ + Adds a new node to the graph if it doesn't exist. + + Arguments: + node_name (str): The name of the node to add. + """ + if node_name is None: + return + + node_name = node_name.strip() + if len(node_name) == 0: + return + + node: Node = self.get_node_by_name(node_name) + if node is None: + self._internal_add_node(node_name=node_name, + are_neighbors_cached=False, + add_to_cache=True) + + def get_node(self, index: int) -> Optional[Node]: + """ + Returns the node with the given index if such a node currently exists in the node list. + + Arguments: + index (int): The index of the queried node. + + Returns: + The node with the given index if such a node currently exists in the node list, + `None` otherwise. + """ + return self._nodes.get(index) + + def get_node_by_name(self, node_name: str, can_validate_and_load: bool = False) -> Optional[Node]: + """ + Returns the node with the given name if it exists either in the graph + or in its database cache or `None` otherwise. + + Arguments: + node_name (str): The name of the node to return. + can_validate_and_load (bool): Whether `self._graph.get_authentic_node_name(node_name)` + can be called to validate the node name and add the node + to the graph if the node name is valid. + + Returns: + The node with the given name if it exists either in the graph + or in its database cache or `None` otherwise + """ + node: Node = self._node_name_map.get(node_name) + if node is not None: + return node + + db_node: DBNode = self._graph.database.Node.find_by_name(node_name) + if db_node is None: + if can_validate_and_load and node_name == self._graph.get_authentic_node_name(node_name): + self._internal_add_node(node_name=node_name, + are_neighbors_cached=False, + add_to_cache=True) + else: + return None + else: + self._internal_add_node(node_name=node_name, + are_neighbors_cached=db_node.are_neighbors_cached, + add_to_cache=False) + node = self._node_name_map.get(node_name) + # Trying to load the cached neighbors of the created node from the database could + # cause a very-very-very deep recursion, so don't even think about doing it here. + return node + + # Private methods + # ------------------------------------------------------------ + + def _create_node(self, index: int, name: str) -> Node: + """ + Returns a new `Node` instance with the given index and name. + + Arguments: + index (int): The index of the node to create. + name (str): The name of the node to create. + """ + return Node(graph=self._graph, index=index, name=name) + + def _internal_add_node(self, + node_name: str, + are_neighbors_cached: bool = False, + add_to_cache: bool = False) -> None: + """ + Adds a node with the given name to the graph without checking whether it already exists or not. + + Arguments: + node_name (str): The name of the node to add. + are_neighbors_cached (bool): Whether the neighbors of the node have already been cached. + add_to_cache (bool): Whether the node should also be created in the local cache. + """ + index: int = len(self) + node: Node = self._create_node(index, node_name) + node.are_neighbors_cached = are_neighbors_cached + self._nodes[index] = node + self._node_name_map[node_name] = node + + if add_to_cache: + db: GraphDatabaseInterface = self._graph.database + db_node: DBNode = db.Node.find_by_name(node_name) + if db_node is None: + db_node = db.Node(node_name) + db_node.are_neighbors_cached = False + db.session.add(db_node) + db.session.commit() + + +class EdgeList(object): + """ + Container that stores `Edge` instances. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, graph: "Graph"): + """ + Initialization. + + Arguments: + graph (Graph): The graph the edge list belongs to. + """ + self._graph: "Graph" = graph + """The graph the edge list belongs to.""" + self._edges: Dict[(int, int), Edge] = {} + """Edge key to edge instance mapping.""" + + # Special methods + # ------------------------------------------------------------ + + def __len__(self) -> int: + """ + Returns the number of edge instances contained by the edge list. + """ + return len(self._edges) + + def __getitem__(self, key: Union[Tuple[int, int], + Tuple[str, str], + Tuple[Node, Node]]) -> Optional[Edge]: + """ + Returns the edge corresponding to the given key. + + If the given key is a tuple of nodes or node indexes, then the edge connecting the two + nodes will be returned if such an edge exists. + + If the given key is a tuple of node names, then the edge connecting the corresponding + nodes will be returned if such an edge exists. + + Arguments: + key (Union[Tuple[int, int], Tuple[str, str], Tuple[Node, Node]]): The key identifying the edge to return. + """ + if isinstance(key[0], Node) and isinstance(key[1], Node): + return self.get_edge(key[0], key[1]) + elif isinstance(key[0], int) and isinstance(key[1], int): + return self.get_edge_by_index(key[0], key[1]) + elif isinstance(key[0], str) and isinstance(key[1], str): + return self.get_edge_by_name(key[0], key[1]) + raise ValueError("Invalid edge key: {}".format(key)) + + # Properties + # ------------------------------------------------------------ + + @property + def edge_list(self) -> List[Edge]: + """ + The ordered list of edges in the container. + """ + return [edge for edge in sorted(self._edges.values(), key=attrgetter("key"))] + + # Public properties + # ------------------------------------------------------------ + + def add_edge(self, + source: Node, + target: Node, + weight: float = 1, + save_to_cache: bool = True) -> None: + """ + Adds an edge to the edge list that will connect the specified nodes. + + Arguments: + source (Node): The source node of the edge. + target (Node): The target node of the edge. + weight (float): The weight of the created edge. + save_to_cache (bool): Whether the edge should be saved to the local database. + """ + if not isinstance(source, Node): + raise TypeError("Invalid source: expected Node instance, got {}.".format(source)) + if not isinstance(target, Node): + raise TypeError("Invalid target: expected Node instance, got {}.".format(target)) + + if source.index == target.index or\ + self.get_edge_by_index(source.index, target.index) is not None: + return + + self._edges[(source.index, target.index)] = Edge(source, target, weight) + + if save_to_cache: + should_commit: bool = False + database: GraphDatabaseInterface = self._graph.database + db_edge: DBEdge = database.Edge.find_by_name(source.name, target.name) + if db_edge is None: + database.session.add(database.Edge(source.name, target.name, weight)) + should_commit = True + elif db_edge.weight != weight: + db_edge.weight = weight + should_commit = True + + if should_commit: + database.session.commit() + + def get_edge(self, source: Node, target: Node) -> Optional[Edge]: + """ + Returns the edge connection the given nodes if such an edge exists. + + Arguments: + source (Node): One of the endpoints of the queried edge. + target (Node): The other endpoint of the queried edge. + + Returns: + Returns the edge connection the given nodes + or `None` if no such node exists. + """ + return self.get_edge_by_index(source.index, target.index) + + def get_edge_by_index(self, source_index: int, target_index: int) -> Optional[Edge]: + """ + Returns the edge connecting the nodes with the specified indices if such an edge exists. + + Arguments: + source_index (int): The index of one of the endpoints of queried edge. + target_index (int): The index of the other endpoint of the queried edge. + + Returns: + The edge connecting the nodes with the specified indices + or `None` if no such node exists. + """ + edge = self._edges.get((source_index, target_index)) + if edge is not None: + return edge + return self._edges.get((target_index, source_index)) + + def get_edge_by_name(self, source_name: str, target_name: str) -> Optional[Edge]: + """ + Returns the edge connecting the nodes with the specified names if such an edge exists. + + Arguments: + source_name (str): The name of one of the endpoints of queried edge. + target_name (str): The name of the other endpoint of the queried edge. + + Returns: + The edge connecting the nodes with the specified names + or `None` if no such node exists. + """ + nodes: NodeList = self._graph.nodes + source: Optional[Node] = nodes.get_node_by_name(source_name) + if source is None: + return None + target: Optional[Node] = nodes.get_node_by_name(target_name) + if target is None: + return None + return self.get_edge_by_index(source.index, target.index) + + +class Graph(object): + """ + Undirected graph implementation. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, database: GraphDatabaseInterface): + """ + Initialization. + + Arguments: + database (GraphDatabaseInterface): The database interface the graph is using. + """ + self._edges: EdgeList = self._create_edge_ist() + """The edge list of the graph.""" + self._nodes: NodeList = self._create_node_list() + """The node list of the graph.""" + + self.database: GraphDatabaseInterface = database + """The database interface the graph is using.""" + + # Properties + # ------------------------------------------------------------ + + @property + def edges(self) -> EdgeList: + """ + The edge list of the graph. + """ + return self._edges + + @property + def nodes(self) -> NodeList: + """ + The node list of the graph. + """ + return self._nodes + + # Public methods + # ------------------------------------------------------------ + + def add_edge(self, source: Node, target: Node, + weight: float = 1, save_to_cache: bool = True) -> None: + """ + Adds an edge between the specified nodes of the graph. + + Arguments: + source (Node): The source node of the edge to add. + target (Node): The target node of the edge to add. + weight (float): The weight of the edge. + save_to_cache (bool): Whether the edge should be saved to the local database. This + argument is necessary (and `False`) when we load edges from + the local cache. + """ + if self._edges.get_edge(source, target) is not None: + return + + self._edges.add_edge( + source=source, + target=target, + weight=weight, + save_to_cache=save_to_cache + ) + + def add_edge_by_index(self, source_index: int, target_index: int, + weight: float, save_to_cache: bool = True) -> None: + """ + Adds an edge between the nodes with the specified indices to the graph. + + Arguments: + source_index (int): The index of the source node of the edge to add. + target_index (int): The index of the target node of the edge to add. + weight (float): The weight of the edge. + save_to_cache (bool): Whether the edge should be saved to the local database. This + argument is necessary (and `False`) when we load edges from + the local cache. + """ + source: Node = self._nodes.get_node(source_index) + target: Node = self._nodes.get_node(target_index) + if source is None or target is None: + return + + self.add_edge( + source=source, + target=target, + weight=weight, + save_to_cache=save_to_cache + ) + + def add_node(self, node_name: str) -> None: + """ + Adds the node with the given name to the graph. + + Arguments: + node_name (str): The name of the node to add to the graph. + """ + self._nodes.add_node_by_name(node_name) + + def get_authentic_node_name(self, node_name: str) -> Optional[str]: + """ + Returns the exact, authentic node name for the given node name if a node corresponding to + the given name exists in the graph (maybe not locally yet) or `None` otherwise. + + By default, this method checks whether a node with the given name exists locally in the + graph and return `node_name` if it does or `None` otherwise. + + In `Graph` extensions that are used by applications where the user can enter potentially + incorrect node names, this method should be overridden to improve usability. + + Arguments: + node_name (str): The node name to return the authentic node name for. + + Returns: + The authentic name of the node corresponding to the given node name or + `None` if no such node exists. + """ + node: Node = self._nodes.get_node_by_name(node_name) + return node.name if node is not None else None + + def node_exists(self, node_name: str) -> bool: + """ + Returns whether a node with the given name exists in the graph. + + This method relies on the value returned by `get_authentic_node_name()` to decide whether + a node with a certain name exists or not. + + Arguments: + node_name (str): The name of the node to check. + + Returns: + `True` if there is a node with the specified name in the graph, `False` otherwise. + """ + return self.get_authentic_node_name(node_name) is not None + + # Private methods + # ------------------------------------------------------------ + + def _create_edge_ist(self) -> EdgeList: + """ + Called during the initialization of the graph instance, + creates and returns the edge list of the graph. + """ + return EdgeList(self) + + def _create_node_list(self) -> NodeList: + """ + Called during the initialization of the graph instance, + creates and returns the node list of the graph. + """ + return NodeList(self) + + +class NeighborAddedEvent(Event): + """ + The event `Node` instances use to let others know that they have received a new neighbor. + """ + + # Class constants + # ------------------------------------------------------------ + + NEIGHBOR_ADDED = "neighborAdded" + """Dispatched when a neighbor is added to a node through its `add_neighbor()` method.""" + + # Initializations + # ------------------------------------------------------------ + + def __init__(self, neighbor: Node): + """ + Initializations. + + Arguments: + neighbor (Node): The neighbor that was added to the node. + """ + super(NeighborAddedEvent, self).__init__(NeighborAddedEvent.NEIGHBOR_ADDED) + self._neighbor: Node = neighbor + """The neighbor that was added to the node.""" + + # Properties + # ------------------------------------------------------------ + + @property + def neighbor(self) -> Node: + """ + The neighbor that was added to the node that dispatched this event. + """ + return self._neighbor diff --git a/src/graphscraper/db.py b/src/graphscraper/db.py new file mode 100755 index 0000000..41930b3 --- /dev/null +++ b/src/graphscraper/db.py @@ -0,0 +1,547 @@ +""" +Database interface definition for the graph implementation. +""" + +# Imports +# ------------------------------------------------------------ + +from typing import List, Optional + +import datetime + +try: + from sqlalchemy.engine.base import Engine + from sqlalchemy.ext.declarative.api import DeclarativeMeta + from sqlalchemy.orm import RelationshipProperty + from sqlalchemy.orm.session import Session +except ImportError: + # SQLAlchemy imports failed (they are used for typing only, so no problem). + Engine = None + DeclarativeMeta = None + RelationshipProperty = None + Session = None + +# Module constants +# ------------------------------------------------------------ + +__author__ = "Peter Volf" + +# Classes +# ------------------------------------------------------------ + + +class DBNode(object): + """ + Interface specification for the node database object model. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, node_name: str): + """ + Initialization. + + Arguments: + node_name (str): The name of the node. + """ + # We must not only declare the properties but also initialize them, + # otherwise the IDE will show warnings wherever the properties are accessed. + + self.are_neighbors_cached: bool = False + """Whether the neighbors of the node have already been added to the database.""" + + self.edges_where_source: List["DBEdge"] = [] + """The list of edges in the database where this node is the source.""" + + self.edges_where_target: List["DBEdge"] = [] + """The list of edges in the database where this node is the target.""" + + self.name: str = "" + """The name of the node.""" + raise NotImplementedError("DBNode is just an abstract base class that defines the " + "interface of actual node model objects. {}".format(node_name)) + + # Properties + # ------------------------------------------------------------ + + @property + def creation_date(self) -> datetime.date: + """ + The date when the node was created. + """ + raise NotImplementedError("DBNode is just an abstract base class that defines " + "the interface of actual node model objects.") + + @property + def edges(self) -> List["DBEdge"]: + """ + The edges where this node is one of the endpoints. + """ + raise NotImplementedError("DBNode is just an abstract base class that defines " + "the interface of actual node model objects.") + + @property + def neighbor_names(self) -> List[str]: + """ + The names of the neighbors of the node that are currently in the database. + """ + raise NotImplementedError("DBNode is just an abstract base class that defines " + "the interface of actual node model objects.") + + @property + def neighbors(self) -> List["DBNode"]: + """ + The list of neighbors the node currently has in the database. + """ + raise NotImplementedError("DBNode is just an abstract base class that defines " + "the interface of actual node model objects.") + + # Class methods + # ------------------------------------------------------------ + + @classmethod + def find_by_name(cls, node_name: str) -> Optional["DBNode"]: + """ + Returns the `DBNode` with the given name if such a node exists in the database. + + Arguments: + node_name (str): The queried node name. + + Returns: + The node with the given name if it exists. + """ + raise NotImplementedError("DBNode is just an abstract base class that defines " + "the interface of actual node model objects.") + + +class DBEdge(object): + """ + Interface specification for the edge database object model. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, source_name: str, target_name: str, weight: float = 1): + """ + Initialization. + + Arguments: + source_name (str): The name of the source node of the edge. + target_name (str): The name of the target node of the edge. + weight (float): The weight of the edge. + """ + # We must not only declare the properties but also initialize them, + # otherwise the IDE will show warnings wherever the properties are accessed. + + self.source_name: str = source_name + """The name of the source node of the edge.""" + + self.target_name: str = target_name + """The name of the target node of the edge.""" + + self.weight: float = weight + """The weight of the edge.""" + + raise NotImplementedError("DBEdge is just an abstract base class that defines " + "the interface of actual edge model objects. " + "{} - {} ({})".format(source_name, target_name, weight)) + + # Class methods + # ------------------------------------------------------------ + + @classmethod + def find_by_name(cls, source_name: str, target_name: str) -> Optional["DBEdge"]: + """ + Returns the `DBEdge` connecting the edges with the given names if such an edge + currently exists in the database. + + Arguments: + source_name (str): The name of one of the endpoints of the queried edge. + target_name (str): The name of the other endpoint of the queried edge. + + Returns: + The edge connecting the given nodes in the database if such an edge exists. + """ + raise NotImplementedError("DBEdge is just an abstract base class that defines " + "the interface of actual edge model objects.") + + +class GraphDatabaseInterface(object): + """ + Database interface implementation that provides users access to a graph database model + using SQLAlchemy or Flask-SQLAlchemy. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, + session: Session, + node: DBNode, + edge: DeclarativeMeta): + """ + Initialization. + """ + self.session: Session = session + """The session used to execute database operations.""" + + self.Node: DBNode = node + """The database object model metaclass for nodes.""" + self.set_query_on_table_metaclass(self.Node, self.session) + + self.Edge: DBEdge = edge + """The database object model metaclass for edges.""" + self.set_query_on_table_metaclass(self.Edge, self.session) + + # Static methods + # ------------------------------------------------------------ + + @staticmethod + def set_query_on_table_metaclass(model: object, session: Session): + """ + Ensures that the given database model (`DeclarativeMeta`) has a `query` property through + which the user can easily query the corresponding database table. + + Database object models derived from Flask-SQLAlchemy's `database.Model` have this property + set up by default, but when using SQLAlchemy, this may not be the case. In this method this + problem we fix. + + Argumentss: + model (DeclarativeMeta): The database model object whose `query` property should be + set up if it's not set up already. + session (Session): The session to use to set up the `query` property on `model`. + """ + if not hasattr(model, "query"): + model.query = session.query(model) + +# Methods +# ------------------------------------------------------------ + + +def create_graph_database_interface(db: Engine, + session: Session, + declarative_meta: DeclarativeMeta, + relationship: RelationshipProperty) -> GraphDatabaseInterface: + """ + Creates a graph database interface to the database specified by the input arguments. + + Note that the created database model assumes that the database enforces constraints such + as foreign key validity. This is _not true_ for SQLite databases for example, where + `PRAGMA foreign_key` is off by default. If you don't ensure that the database enforces + constraints, then some of the database tables might end up containing invalid records. + + SQLAlchemy example: + >>> try: + >>> import sqlalchemy + >>> from sqlalchemy.ext.declarative import declarative_base + >>> from sqlalchemy.orm import sessionmaker + >>> except ImportError: + >>> raise ImportError("SQLAlchemy not found.") + >>> + >>> # Import this method + >>> # import create_graph_database_interface + >>> + >>> # Database interface setup. + >>> Base = declarative_base() + >>> engine = sqlalchemy.create_engine("sqlite://") + >>> Session = sessionmaker(bind=engine) + >>> dbi: GraphDatabaseInterface = create_graph_database_interface( + >>> sqlalchemy, + >>> Session(), + >>> Base, + >>> sqlalchemy.orm.relationship + >>> ) + >>> + >>> # Complete database reset. + >>> Base.metadata.drop_all(engine) + >>> Base.metadata.create_all(engine) + >>> + >>> # Database interface test. + >>> node: DBNode = dbi.Node("Some vertex") + >>> dbi.session.add(node) + >>> dbi.session.commit() + >>> + >>> node = dbi.Node.find_by_name("Some vertex") + >>> if node is not None: + >>> print("Got it!") + + Flask-SQLAlchemy example: + >>> try: + >>> from flask import Flask + >>> from flask_sqlalchemy import SQLAlchemy + >>> except ImportError: + >>> raise ImportError("The flask and flask-sqlalchemy libraries are required in this example.") + >>> + >>> # Import this method + >>> # import create_graph_database_interface + >>> + >>> # Set up the Flask application. + >>> app = Flask(__name__) + >>> app.config['SQLALCHEMY_DATABASE_URI'] = "sqlite://" + >>> flask_db = SQLAlchemy(app) + >>> + >>> # Create the database interface + >>> dbi: GraphDatabaseInterface = create_graph_database_interface( + >>> flask_db, + >>> flask_db.session, + >>> flask_db.Model, + >>> flask_db.relationship + >>> ) + >>> + >>> # Complete database reset. + >>> flask_db.drop_all() + >>> flask_db.create_all() + >>> + >>> # Database interface test. + >>> node: DBNode = dbi.Node("Some vertex") + >>> dbi.session.add(node) + >>> dbi.session.commit() + >>> + >>> node = dbi.Node.find_by_name("Some vertex") + >>> if node is not None: + >>> print("Got it!") + + Arguments: + db (Engine): The SQLAlchemy or Flask-SQLAlchemy database to use. + session (Session): The session database operations are executed on. + declarative_meta (DeclarativeMeta): The metaclass that should be the base class + of the database model classes. + relationship (RelationshipProperty): Relationship property generator. + + Returns: + A graph database interface object the user can use to query and manipulate the database + that is specified by the input arguments of the method. + """ + + class DBNode(declarative_meta): + """ + Declarative database object model representing a node. + """ + + # Metaclass definition + # ------------------------------------------------------------ + + # __bind_key__ = bind_key # Variable name must not be changed. + __tablename__ = "nodes" # Variable name must not be changed. + + _creation_date = db.Column(db.Date, + nullable=False, + default=datetime.date.today) + + are_neighbors_cached = db.Column(db.Boolean, + default=False) + name = db.Column(db.String(100), + primary_key=True) + + edges_where_source = relationship("DBEdge", + primaryjoin="DBNode.name==DBEdge.source_name", + cascade="all, delete-orphan", + backref="source") + edges_where_target = relationship("DBEdge", + primaryjoin="DBNode.name==DBEdge.target_name", + cascade="all, delete-orphan", + backref="target") + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, node_name: str): + """ + Initialization. + + Arguments: + node_name (str): The name of the node. + """ + if node_name is None: + raise ValueError("Node name must not be None.") + + node_name = node_name.strip() + if len(node_name) == 0: + raise ValueError("Node name must contain non-whitespace characters.") + + self.name = node_name + + # Special methods + # ------------------------------------------------------------ + + def __repr__(self) -> str: + """ + The string representation of the object. + """ + return "DBNode({})".format(self.name) + + # Properties + # ------------------------------------------------------------ + + @property + def creation_date(self) -> datetime.date: + """ + The date when the node was created. + """ + return self._creation_date + + @property + def edges(self) -> List["DBEdge"]: + """ + The edges where this node is one of the endpoints. + """ + result: List["DBEdge"] = [] + edges: List["DBEdge"] = self.edges_where_target + if edges is not None: + result.extend(edges) + + edges = self.edges_where_source + if edges is not None: + result.extend(edges) + + return result + + @property + def neighbor_names(self) -> List[str]: + """ + The names of the neighbors of the node that are currently in the database. + """ + result: List[str] = [] + neighbors: List[str] = [edge.source_name + for edge in DBEdge.query.filter_by(target_name=self.name)] + if neighbors is not None: + result.extend(neighbors) + + neighbors = [edge.target_name + for edge in DBEdge.query.filter_by(source_name=self.name)] + if neighbors: + result.extend(neighbors) + + return result + + @property + def neighbors(self) -> List["DBNode"]: + """ + The list of neighbors the node currently has in the database. + """ + result: List[DBNode] = [] + neighbors: List[DBNode] = [edge.source + for edge in DBEdge.query.filter_by(target_name=self.name)] + if neighbors: + result.extend(neighbors) + + neighbors = [edge.target + for edge in DBEdge.query.filter_by(source_name=self.name)] + if neighbors: + result.extend(neighbors) + + return result + + # Class methods + # ------------------------------------------------------------ + + @classmethod + def find_by_name(cls, node_name: str) -> Optional["DBNode"]: + """ + Returns the `DBNode` with the given name if such a node exists in the database. + + Arguments: + node_name (str): The queried node name. + + Returns: + The node with the given name if it exists. + """ + return cls.query.filter_by(name=node_name).first() + + class DBEdge(declarative_meta): + """ + Declarative database object model representing an edge. + """ + + # Metaclass definition + # ------------------------------------------------------------ + + # __bind_key__ = bind_key # Variable name must not be changed. + __tablename__ = "edges" # Variable name must not be changed. + + _creation_date = db.Column(db.Date, + nullable=False, + default=datetime.date.today) + + source_name = db.Column(db.String, + db.ForeignKey("nodes.name"), + db.CheckConstraint("source_name < target_name"), + primary_key=True) + target_name = db.Column(db.String, + db.ForeignKey("nodes.name"), + db.CheckConstraint("source_name < target_name"), + primary_key=True) + weight = db.Column(db.Float, + db.CheckConstraint("weight > 0"), + default=1) + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, source_name: str, target_name: str, weight: float = 1): + """ + Initialization. + + Arguments: + source_name (str): The name of the source node of the edge. + target_name (str): The name of the target node of the edge. + weight (float): The weight of the edge. + """ + if source_name is None or target_name is None or source_name == target_name: + raise ValueError("Invalid source and target name pair: {} - {}".format( + source_name, target_name) + ) + + # Make sure the order is right, we don't want the graph to be directed. + if source_name < target_name: + self.source_name = source_name + self.target_name = target_name + else: + self.source_name = target_name + self.target_name = source_name + + self.weight = weight + + # Special methods + # ------------------------------------------------------------ + + def __repr__(self): + """ + The string representation of the object. + """ + return "DBEdge({} <> {})".format(self.source_name, self.target_name) + + # Properties + # ------------------------------------------------------------ + + @property + def creation_date(self) -> datetime.date: + """ + The date when the edge was created. + """ + return self._creation_date + + # Class methods + # ------------------------------------------------------------ + + @classmethod + def find_by_name(cls, source_name: str, target_name: str) -> Optional["DBEdge"]: + """ + Returns the `DBEdge` connecting the edges with the given names if such an edge + currently exists in the database. + + Arguments: + source_name (str): The name of one of the endpoints of the queried edge. + target_name (str): The name of the other endpoint of the queried edge. + + Returns: + The edge connecting the given nodes in the database if such an edge exists. + """ + if source_name < target_name: + return cls.query.filter_by(source_name=source_name, target_name=target_name).first() + else: + return cls.query.filter_by(source_name=target_name, target_name=source_name).first() + + return GraphDatabaseInterface(session, DBNode, DBEdge) diff --git a/src/graphscraper/demo.py b/src/graphscraper/demo.py new file mode 100644 index 0000000..0e4e52a --- /dev/null +++ b/src/graphscraper/demo.py @@ -0,0 +1,61 @@ +""" +Demo script that shows the basic usage of the graphscraper project +through the usage of the `graphscraper.igraph` module. + +Requirements: + The demo requires the `SQLAlchemy` and `igraph` (`python-igraph` on PyPi) libraries. +""" + +from igraph import Graph, Vertex + +from graphscraper.igraphwrapper import IGraphNode, IGraphWrapper + + +def create_graph(named: bool = False): + g: Graph = Graph.Erdos_Renyi(n=50, p=0.2) + if named: + for v in g.vs: + v["name"] = "Node-{}".format(v.index) + return IGraphWrapper(g) + + +def test_named_graph(): + graph: IGraphWrapper = create_graph(True) + test(graph) + + +def test_unnamed_graph(): + graph: IGraphWrapper = create_graph(False) + test(graph) + + +def test_zachary(): + test(IGraphWrapper(Graph.Famous("Zachary"))) + + +def test(graph: IGraphWrapper): + print("Name of Joe: {}".format(graph.get_authentic_node_name("Joe"))) + print("Name of 1: {}".format(graph.get_authentic_node_name("1"))) + print("Name of Node-22: {}".format(graph.get_authentic_node_name("Node-22"))) + + node_name: str = graph.get_authentic_node_name("5") + print("Node name for 5: {}".format(node_name)) + + vertex: Vertex = graph._wrapped_graph.vs[5] + print("IGraph neighbors:") + for index, neighbor in enumerate(vertex.neighbors()): + try: + print(" - Neighbor {}: {}, {}".format(index, neighbor["name"], neighbor.index)) + except KeyError: + print(" - Neighbor {}: {}".format(index, neighbor.index)) + + node: IGraphNode = graph.nodes.get_node_by_name(node_name, can_validate_and_load=True) + print("Graph neighbors:") + for index, neighbor in enumerate(node.neighbors): + print(" - Neighbor {}: {}, {}, {}".format(index, neighbor.name, neighbor.igraph_index, neighbor.index)) + + +def demo(): + test_unnamed_graph() + test_named_graph() + test_zachary() diff --git a/src/graphscraper/eventdispatcher.py b/src/graphscraper/eventdispatcher.py new file mode 100755 index 0000000..6ec004c --- /dev/null +++ b/src/graphscraper/eventdispatcher.py @@ -0,0 +1,227 @@ +""" +This module contains an event dispatcher interface along with a simple implementation and the base event class. +""" + +# Imports +# ------------------------------------------------------------ + +import types + +# Module constants +# ------------------------------------------------------------ + +__author__ = "Peter Volf" + +# Classes +# ------------------------------------------------------------ + + +class EventDispatcherBase(object): + """Defines the interface all event dispatchers must implement.""" + + # Public methods + # ------------------------------------------------------------ + + def add_event_listener(self, event_type: str, event_handler: types.MethodType): + """ + Registers the given event handler on the dispatcher for the given event type. + + Args: + event_type (str): The type of the event to add the event handler to. + Must not be `None` or empty string. + event_handler (types.MethodType): The event handler to register for the given event + type of the dispatcher. Must not be `None`. + + Raises: + ValueError: If any of the parameters have an incorrect type. + """ + raise NotImplementedError("EventDispatcherBase is abstract, " + "its child classes must override all its methods.") + + def dispatch_event(self, event: "Event"): + """ + Dispatches the given event. + + It is the duty of this method to set the target of the dispatched event by calling + `event.set_target(self)`. + + Args: + event (Event): The event to dispatch. Must not be `None`. + + Raises: + TypeError: If the event is `None` or its type is incorrect. + """ + raise NotImplementedError("EventDispatcherBase is abstract, " + "its child classes must override all its methods.") + + def remove_event_listener(self, event_type: str, event_handler: types.MethodType): + """ + Removes the given event listener registered on the dispatcher for the given event type. + + Args: + event_type (str): The type of the event to remove the event handler from. + Must not be `None` or empty string. + event_handler (types.MethodType): The event handler to remove from the given event type + of the dispatcher. Must not be `None`. + + Raises: + ValueError: If any of the parameters are invalid. + """ + raise NotImplementedError("EventDispatcherBase is abstract, " + "its child classes must override all its methods.") + + +class EventDispatcher(EventDispatcherBase): + """A simple `EventDispatcherBase` implementation.""" + + # Initialization + # ------------------------------------------------------------ + + def __init__(self): + """Constructor.""" + # TODO: use a dict[str, set] instead. + self._registered_listeners: dict[str, dict[types.MethodType, bool]] = {} + + # Public methods + # ------------------------------------------------------------ + + def add_event_listener(self, event_type: str, event_handler: types.MethodType): + """ + Registers the given event handler on the dispatcher for the given event type. + + Args: + event_type (str): The type of the event to add the event handler to. + Must not be `None` or empty string. + event_handler (types.MethodType): The event handler to register for the given event + type of the dispatcher. Must not be `None`. + + Raises: + ValueError: If any of the parameters have an incorrect type. + """ + # TODO: we should also accept types.FunctionType, + # don't forget the documentation here and in the interface. + if not isinstance(event_type, str) or event_type == "" or\ + not isinstance(event_handler, types.MethodType): + raise ValueError("Invalid arguments: {}, {}".format(event_type, event_handler)) + + listeners: dict[types.MethodType, bool] = self._registered_listeners.get(event_type) + if listeners is None: + listeners = {event_handler: True} + self._registered_listeners[event_type] = listeners + else: + listener = listeners.get(event_handler) + # One listener function can only be added once. + if listener is not None: + return + + listeners[event_handler] = True + + def dispatch_event(self, event: "Event"): + """ + Dispatches the given event. + + It is the duty of this method to set the target of the dispatched event by calling + `event.set_target(self)`. + + Args: + event (Event): The event to dispatch. Must not be `None`. + + Raises: + TypeError: If the event is `None` or its type is incorrect. + """ + # Set the target of the event if it doesn't have one already. It could happen that + # we are simply redispatching an event. + if event.target is None: + event.set_target(self) + + listeners: dict[types.MethodType, bool] = self._registered_listeners.get(event.type) + if listeners is None: + return + + for listener in listeners: + listener(event) + + def remove_event_listener(self, event_type: str, event_handler: types.MethodType): + """ + Removes the given event listener registered on the dispatcher for the given event type. + + Args: + event_type (str): The type of the event to remove the event handler from. + Must not be `None` or empty string. + event_handler (types.MethodType): The event handler to remove from the given event + type of the dispatcher. Must not be `None`. + + Raises: + ValueError: If any of the parameters are invalid. + """ + # TODO: we should also accept types.FunctionType, + # don't forget the documentation here and in the interface. + if not isinstance(event_type, str) or event_type == "" or\ + not isinstance(event_handler, types.MethodType): + raise ValueError("Invalid arguments: {}, {}".format(event_type, event_handler)) + + listeners: dict[types.MethodType, bool] = self._registered_listeners.get(event_type) + listener: types.MethodType = None if listeners is None else listeners.get(event_handler) + if listener is not None: + del listeners[event_handler] + + +class Event(object): + """The base event class.""" + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, event_type: str): + """ + Constructor. + + Args: + event_type (str): The type - string identifier - of the event. + Must not be `None` or empty string. + """ + if not isinstance(event_type, str) or event_type == "": + raise TypeError("Invalid event type: {}".format(event_type)) + + self._event_type: str = event_type + self._target: EventDispatcherBase = None + + # Public properties + # ------------------------------------------------------------ + + @property + def target(self) -> EventDispatcherBase: + """ + The event dispatcher object that dispatched this event. + """ + return self._target + + @property + def type(self) -> str: + """ + The type of the event. + """ + return self._event_type + + # Public methods + # ------------------------------------------------------------ + + def set_target(self, target: EventDispatcherBase): + """ + This method should be called by the event dispatcher that dispatches this event + to set its target property. + + Args: + target (EventDispatcherBase): The event dispatcher that will dispatch this event. + + Raises: + PermissionError: If the target property of the event has already been set. + TypeError: If `target` is not an `EventDispatcherBase` instance. + """ + if self._target is not None: + raise PermissionError("The target property already has a valid value.") + + if not isinstance(target, EventDispatcherBase): + raise TypeError("Invalid target type: {}".format(target)) + + self._target = target diff --git a/src/graphscraper/igraphwrapper.py b/src/graphscraper/igraphwrapper.py new file mode 100644 index 0000000..3b20cbf --- /dev/null +++ b/src/graphscraper/igraphwrapper.py @@ -0,0 +1,218 @@ +""" +Graph specialization that wraps an igraph `Graph` object. + +This graph implementation supports only in-memory SQLite databases. + +The wrapped igraph graph must be static once an `IGraphWrapper` starts using it. +(In general it's not a good idea to mutate an igraph `Graph` instance because of +the way igraph stores a graph and indexes its components.) + +Requirements: + This module requires the `SQLAlchemy` and `igraph` (`python-igraph` on PyPi) libraries. +""" + +from typing import List, Optional + +from igraph import Graph as IGraph +from igraph import Vertex as IGraphVertex + +import sqlalchemy + +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import StaticPool + +from graphscraper.db import create_graph_database_interface, GraphDatabaseInterface +from graphscraper.base import Graph, Node, NodeList + + +class IGraphWrapper(Graph): + """ + Graph implementation that takes data from an igraph `Graph` object. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, graph: IGraph): + """ + Initialization. + + Arguments: + graph (IGraph): The igraph `Graph` object to wrap. + """ + super(IGraphWrapper, self).__init__(self._create_memory_database_interface()) + + if not isinstance(graph, IGraph): + raise ValueError("Invalid graph instance provided to IGraphWrapper") + + self._wrapped_graph: IGraph = graph + """The wrapped igraph `Graph` object.""" + + # Properties + # ------------------------------------------------------------ + + @property + def wrapped_graph(self) -> IGraph: + """ + The wrapped igraph `Graph` object. + """ + return self._wrapped_graph + + # Public methods + # ------------------------------------------------------------ + + def get_authentic_node_name(self, node_name: str) -> Optional[str]: + """ + Returns the exact, authentic node name for the given node name if a node corresponding to + the given name exists in the graph (maybe not locally yet) or `None` otherwise. + + By default, this method checks whether a node with the given name exists locally in the + graph and return `node_name` if it does or `None` otherwise. + + In `Graph` extensions that are used by applications where the user can enter potentially + incorrect node names, this method should be overridden to improve usability. + + Arguments: + node_name (str): The node name to return the authentic node name for. + + Returns: + The authentic name of the node corresponding to the given node name or + `None` if no such node exists. + """ + # Is there a node with the given name? + vertex: IGraphVertex = None + try: + vertex: IGraphVertex = self._wrapped_graph.vs.find(node_name) + except ValueError: + pass + + # Is node_name a node index? + if vertex is None: + try: + vertex: IGraphVertex = self._wrapped_graph.vs[int(node_name)] + except ValueError: + return None + except IndexError: + return None + + try: + return vertex["name"] + except KeyError: + return str(vertex.index) + + # Private methods + # ------------------------------------------------------------ + + def _create_memory_database_interface(self) -> GraphDatabaseInterface: + """ + Creates and returns the in-memory database interface the graph will use. + """ + Base = declarative_base() + engine = sqlalchemy.create_engine("sqlite://", poolclass=StaticPool) + Session = sessionmaker(bind=engine) + + dbi: GraphDatabaseInterface = create_graph_database_interface( + sqlalchemy, Session(), Base, sqlalchemy.orm.relationship + ) + + Base.metadata.drop_all(engine) + Base.metadata.create_all(engine) + + return dbi + + def _create_node_list(self) -> NodeList: + """ + Called during the initialization of the graph instance, + creates and returns the node list of the graph. + """ + return IGraphNodeList(self) + + +class IGraphNode(Node): + """ + `Node` extension that takes its neighbors from the corresponding `IGraphWrapper`'s + wrapped igraph `Graph`. + """ + + # Initialization + # ------------------------------------------------------------ + + def __init__(self, graph: IGraphWrapper, index: int, name: str): + """ + Initialization. + + Arguments: + graph (IGraphWrapper): The graph that owns this node. + index (int): The unique index of the node in the graph. + name (str): The name of the node. + """ + super(IGraphNode, self).__init__(graph, index, name) + + vertex: IGraphVertex = None + try: + vertex = graph.wrapped_graph.vs.find(name) + except ValueError: + vertex = graph.wrapped_graph.vs[int(name)] + + if vertex is None: + raise ValueError("The wrapped igraph graph doesn't have a vertex with the given name.") + + self._igraph_index: int = vertex.index + """The index of the corresponding node in the igraph `Graph` instance.""" + + # Private methods + # ------------------------------------------------------------ + + @property + def igraph_index(self) -> int: + """ + The index of the corresponding node in the igraph `Graph` instance. + """ + return self._igraph_index + + @property + def igraph_vertex(self) -> IGraphVertex: + """ + The vertex in the igraph `Graph` this node represents. + """ + return self._graph.wrapped_graph.vs[self._igraph_index] + + # Private methods + # ------------------------------------------------------------ + + def _load_neighbors_from_external_source(self) -> None: + """ + Loads the neighbors of the node from the igraph `Graph` instance that is + wrapped by the graph that has this node. + """ + graph: IGraphWrapper = self._graph + ig_vertex: IGraphVertex = graph.wrapped_graph.vs[self._igraph_index] + ig_neighbors: List[IGraphVertex] = ig_vertex.neighbors() + for ig_neighbor in ig_neighbors: + try: + name: str = ig_neighbor["name"] + except KeyError: + name: str = str(ig_neighbor.index) + + neighbor: IGraphNode = graph.nodes.get_node_by_name(name, can_validate_and_load=True) + graph.add_edge(self, neighbor) + + +class IGraphNodeList(NodeList): + """ + `NodeList` extension that creates `IGraphNode` instances. + """ + + # Private methods + # ------------------------------------------------------------ + + def _create_node(self, index: int, name: str) -> IGraphNode: + """ + Returns a new `IGraphNode` instance with the given index and name. + + Arguments: + index (int): The index of the node to create. + name (str): The name of the node to create. + """ + return IGraphNode(graph=self._graph, index=index, name=name)