From 4c87ca18df9ab0c4cd360e80b7c283c40654d027 Mon Sep 17 00:00:00 2001 From: Caglar Demir Date: Fri, 18 Oct 2024 11:01:01 +0200 Subject: [PATCH] We need to ignore sampled learning problem if they lead Top to have 0 quality. It pertains to https://github.com/dice-group/Ontolearn/issues/447 --- ontolearn/learners/drill.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/ontolearn/learners/drill.py b/ontolearn/learners/drill.py index abf82af4..8e46aefa 100644 --- a/ontolearn/learners/drill.py +++ b/ontolearn/learners/drill.py @@ -51,8 +51,8 @@ import torch from ontolearn.data_struct import PrepareBatchOfTraining, PrepareBatchOfPrediction from tqdm import tqdm -from owlapy.utils import OWLClassExpressionLengthMetric from ..utils.static_funcs import make_iterable_verbose +from owlapy.utils import get_expression_length class Drill(RefinementBasedConceptLearner): # pragma: no cover @@ -173,7 +173,8 @@ def initialize_training_class_expression_learning_problem(self, neg: FrozenSet[OWLNamedIndividual]) -> RL_State: """ Initialize """ assert isinstance(pos, frozenset) and isinstance(neg, frozenset), "Pos and neg must be sets" - assert 0 < len(pos) and 0 < len(neg) + assert 0 < len(pos) and 0 < len(neg), ("Positive and negative examples must have at least a single item\n" + "fCurrently: Pos:len(pos)\t Neg:len(neg)\n") # print("Initializing learning problem") # (2) Obtain embeddings of positive and negative examples. self.init_embeddings_of_examples(pos_uri=pos, neg_uri=neg) @@ -247,7 +248,7 @@ def train(self, dataset: Optional[Iterable[Tuple[str, Set, Set]]] = None, """ if isinstance(self.heuristic_func, CeloeBasedReward): - print("No training") + print("No training...") return self.terminate_training() if self.verbose > 0: @@ -257,6 +258,9 @@ def train(self, dataset: Optional[Iterable[Tuple[str, Set, Set]]] = None, else: training_data = self.generate_learning_problems(num_of_target_concepts, num_learning_problems) + if isinstance(training_data,Iterable) is False: + print(f"We couldn't generate training data on this given knowledge base ({self.kb})") + return self.terminate_training() for (target_owl_ce, positives, negatives) in training_data: print(f"\nGoal Concept:\t {target_owl_ce}\tE^+:[{len(positives)}]\t E^-:[{len(negatives)}]") @@ -319,7 +323,7 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None): root_state = self.initialize_training_class_expression_learning_problem(pos=learning_problem.pos, neg=learning_problem.neg) self.operator.set_input_examples(pos=learning_problem.pos, neg=learning_problem.neg) - assert root_state.quality > 0, f"Root state {root_state} must have quality >0" + assert root_state.quality > 0, f"Root state {root_state} must have the quality >0" # (5) Add root state into search tree root_state.heuristic = root_state.quality self.search_tree.add(root_state) @@ -337,7 +341,7 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None): for _ in make_iterable_verbose(range(0, self.iter_bound), verbose=self.verbose, desc=f"Learning OWL Class Expression at most {self.iter_bound} iteration"): - assert len(self.search_tree) > 0 + assert len(self.search_tree) > 0, "Search Tree cannot be empty!" self.search_tree.show_current_search_tree() # (6.1) Get the most fitting RL-state. most_promising = self.next_node_to_expand() @@ -419,8 +423,7 @@ def create_rl_state(self, c: OWLClassExpression, parent_node: Optional[RL_State] is_root: bool = False) -> RL_State: """ Create an RL_State instance.""" rl_state = RL_State(c, parent_node=parent_node, is_root=is_root) - # TODO: Will be fixed by https://github.com/dice-group/owlapy/issues/35 - rl_state.length = OWLClassExpressionLengthMetric.get_default().length(c) + rl_state.length = get_expression_length(c) return rl_state def compute_quality_of_class_expression(self, state: RL_State) -> None: @@ -455,8 +458,8 @@ def sequence_of_actions(self, root_rl_state: RL_State) \ current_state = root_rl_state path_of_concepts = [] rewards = [] - assert current_state.quality > 0 - assert current_state.heuristic is None + assert current_state.quality > 0, f"Root state ({current_state}) must have quality >0. \tCurrently {current_state.quality}" + assert current_state.heuristic is None,f"Root state ({current_state}) must have heuristic value >0 . \tCurrently {current_state.heuristic}" # (1) for _ in range(self.num_of_sequential_actions): assert isinstance(current_state, RL_State) @@ -745,11 +748,15 @@ def generate_learning_problems(self, individuals_j = set(self.kb.individuals(j)) if len(individuals_j) < size_of_examples: continue + # Generate Learning problems from a single target for _ in range(num_of_target_concepts): - lp = (str_dl_concept_i, - set(random.sample(individuals_i, size_of_examples)), - set(random.sample(individuals_j, size_of_examples))) + sampled_positives = set(random.sample(individuals_i, size_of_examples)) + sampled_negatives = set(random.sample(individuals_j, size_of_examples)) + if sampled_negatives== sampled_positives: + print("Sampled Positives and negatives are same. We need to ignore this example") + continue + lp = (str_dl_concept_i,sampled_positives,sampled_negatives) examples.append(lp) counter += 1 if counter == num_learning_problems: