diff --git a/docker-compose.yml b/docker-compose.yml
index d1857a2d..93808b77 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,7 +3,7 @@ version: "3.9"
 services:
   neo4j:
     restart: ${RESTART_POLICY:-no}
-    image: neo4j:5.3.0-community
+    image: neo4j:5.14.0-community
     ports:
       # admin console
       - "${NEO4J_ADMIN_EXPOSE:-127.0.0.1:7474}:7474"
diff --git a/parser/Makefile b/parser/Makefile
index 83a0b6d6..dc6e744e 100644
--- a/parser/Makefile
+++ b/parser/Makefile
@@ -13,7 +13,7 @@ quality:
 
 tests:
 	cd .. && docker compose up -d neo4j
-	pytest .
+	poetry run pytest .
 # we do not shutdown neo4j
 
 checks: quality tests
\ No newline at end of file
diff --git a/parser/openfoodfacts_taxonomy_parser/normalizer.py b/parser/openfoodfacts_taxonomy_parser/normalizer.py
index 68e1e635..7a16b663 100644
--- a/parser/openfoodfacts_taxonomy_parser/normalizer.py
+++ b/parser/openfoodfacts_taxonomy_parser/normalizer.py
@@ -7,7 +7,7 @@
 import unidecode
 
 
-def normalizing(line, lang="default", char="-"):
+def normalizing(line: str, lang="default", char="-"):
     """Normalize a string depending on the language code"""
     line = unicodedata.normalize("NFC", line)
 
diff --git a/parser/openfoodfacts_taxonomy_parser/parser.py b/parser/openfoodfacts_taxonomy_parser/parser.py
deleted file mode 100644
index cd346928..00000000
--- a/parser/openfoodfacts_taxonomy_parser/parser.py
+++ /dev/null
@@ -1,495 +0,0 @@
-import logging
-import os
-import re
-import sys
-
-import iso639
-from neo4j import GraphDatabase
-
-from .exception import DuplicateIDError
-from .normalizer import normalizing
-
-
-def ellipsis(text, max=20):
-    """Cut a text adding eventual ellipsis if we do not display it fully"""
-    return text[:max] + ("..." if len(text) > max else "")
-
-
-class ParserConsoleLogger:
-    def __init__(self):
-        self.parsing_warnings = []  # Stores all warning logs
-        self.parsing_errors = []  # Stores all error logs
-
-    def info(self, msg, *args, **kwargs):
-        """Stores all parsing info logs"""
-        logging.info(msg, *args, **kwargs)
-
-    def warning(self, msg, *args, **kwargs):
-        """Stores all parsing warning logs"""
-        self.parsing_warnings.append(msg % args)
-        logging.warning(msg, *args, **kwargs)
-
-    def error(self, msg, *args, **kwargs):
-        """Stores all parsing error logs"""
-        self.parsing_errors.append(msg % args)
-        logging.error(msg, *args, **kwargs)
-
-
-class Parser:
-    """Parse a taxonomy file and build a neo4j graph"""
-
-    def __init__(self, session):
-        self.session = session
-        self.parser_logger = ParserConsoleLogger()
-
-    def create_headernode(self, header, multi_label):
-        """Create the node for the header"""
-        query = f"""
-                CREATE (n:{multi_label}:TEXT)
-                SET n.id = '__header__'
-                SET n.preceding_lines= $header
-                SET n.src_position= 1
-            """
-        self.session.run(query, header=header)
-
-    def create_node(self, data, multi_label):
-        """Run the query to create the node with data dictionary"""
-        position_query = """
-            SET n.id = $id
-            SET n.is_before = $is_before
-            SET n.preceding_lines = $preceding_lines
-            SET n.src_position = $src_position
-        """
-        entry_query = ""
-        if data["id"] == "__footer__":
-            id_query = f" CREATE (n:{multi_label}:TEXT) \n "
-        elif data["id"].startswith("synonyms"):
-            id_query = f" CREATE (n:{multi_label}:SYNONYMS) \n "
-        elif data["id"].startswith("stopwords"):
-            id_query = f" CREATE (n:{multi_label}:STOPWORDS) \n "
-        else:
-            id_query = f" CREATE (n:{multi_label}:ENTRY) \n "
-            position_query += " SET n.main_language = $main_language "
-            if data["parent_tag"]:
-                entry_query += " SET n.parents = $parent_tag \n"
-            for key in data:
-                if key.startswith("prop_"):
-                    entry_query += " SET n." + key + " = $" + key + "\n"
-
-        for key in data:
-            if key.startswith("tags_"):
-                entry_query += " SET n." + key + " = $" + key + "\n"
-
-        query = id_query + entry_query + position_query
-        self.session.run(query, data)
-
-    def normalized_filename(self, filename):
-        """Add the .txt extension if it is missing in the filename"""
-        return filename + (".txt" if (len(filename) < 4 or filename[-4:] != ".txt") else "")
-
-    def get_project_name(self, taxonomy_name, branch_name):
-        """Create a project name for given branch and taxonomy"""
-        return "p_" + taxonomy_name + "_" + branch_name
-
-    def create_multi_label(self, taxonomy_name, branch_name):
-        """Create a combined label with taxonomy name and branch name"""
-        project_name = self.get_project_name(taxonomy_name, branch_name)
-        return project_name + ":" + ("t_" + taxonomy_name) + ":" + ("b_" + branch_name)
-
-    def file_iter(self, filename, start=0):
-        """Generator to get the file line by line"""
-        with open(filename, "r", encoding="utf8") as file:
-            for line_number, line in enumerate(file):
-                if line_number < start:
-                    continue
-                # sanitizing
-                # remove any space characters at end of line
-                line = line.rstrip()
-                # replace ’ (typographique quote) to simple quote '
-                line = line.replace("’", "'")
-                # replace commas that have no space around by a lower comma character
-                # and do the same for escaped comma (preceded by a \)
-                # (to distinguish them from commas acting as tags separators)
-                line = re.sub(r"(\d),(\d)", r"\1‚\2", line)
-                line = re.sub(r"\\,", "\\‚", line)
-                # removes parenthesis for roman numeral
-                line = re.sub(r"\(([ivx]+)\)", r"\1", line, flags=re.I)
-                yield line_number, line
-        yield line_number, ""  # to end the last entry if not ended
-
-    def remove_stopwords(self, lc, words):
-        """Remove the stopwords that were read at the beginning of the file"""
-        # First check if this language has stopwords
-        if lc in self.stopwords:
-            words_to_remove = self.stopwords[lc]
-            new_words = []
-            for word in words.split("-"):
-                if word not in words_to_remove:
-                    new_words.append(word)
-            return ("-").join(new_words)
-        else:
-            return words
-
-    def add_line(self, line):
-        """
-        Get a normalized string but keeping the language code "lc:",
-        used for id and parent tag
-        """
-        lc, line = line.split(":", 1)
-        new_line = lc + ":"
-        new_line += self.remove_stopwords(lc, normalizing(line, lc))
-        return new_line
-
-    def get_lc_value(self, line):
-        """Get the language code "lc" and a list of normalized values"""
-        lc, line = line.split(":", 1)
-        new_line = []
-        for word in line.split(","):
-            new_line.append(self.remove_stopwords(lc, normalizing(word, lc)))
-        return lc, new_line
-
-    def new_node_data(self, is_before):
-        """To create an empty dictionary that will be used to create node"""
-        data = {
-            "id": "",
-            "main_language": "",
-            "preceding_lines": [],
-            "parent_tag": [],
-            "src_position": None,
-            "is_before": is_before,
-        }
-        return data
-
-    def set_data_id(self, data, id, line_number):
-        if not data["id"]:
-            data["id"] = id
-        else:
-            raise DuplicateIDError(line_number)
-        return data
-
-    def header_harvest(self, filename):
-        """
-        Harvest the header (comment with #),
-        it has its own function because some header has multiple blocks
-        """
-        h = 0
-        header = []
-        for _, line in self.file_iter(filename):
-            if not (line) or line[0] == "#":
-                header.append(line)
-            else:
-                break
-            h += 1
-
-        # we don't want to eat the comments of the next block
-        # and it removes the last separating line
-        for i in range(len(header)):
-            if header.pop():
-                h -= 1
-            else:
-                break
-
-        return header, h
-
-    def entry_end(self, line, data):
-        """Return True if the block ended"""
-        # stopwords and synonyms are one-liner, entries are separated by a blank line
-        if line.startswith("stopwords") or line.startswith("synonyms") or not line:
-            # can be the end of an block or just additional line separator,
-            # file_iter() always end with ''
-            if data["id"]:  # to be sure that it's an end
-                return True
-        return False
-
-    def remove_separating_line(self, data):
-        """
-        To remove the one separating line that is always there,
-        between synonyms part and stopwords part and before each entry
-        """
-        is_before = data["is_before"]
-        # first, check if there is at least one preceding line
-        if data["preceding_lines"] and not data["preceding_lines"][0]:
-            if data["id"].startswith("synonyms"):
-                # it's a synonyms block,
-                # if the previous block is a stopwords block,
-                # there is at least one separating line
-                if "stopwords" in is_before:
-                    data["preceding_lines"].pop(0)
-
-            elif data["id"].startswith("stopwords"):
-                # it's a stopwords block,
-                # if the previous block is a synonyms block,
-                # there is at least one separating line
-                if "synonyms" in is_before:
-                    data["preceding_lines"].pop(0)
-
-            else:
-                # it's an entry block, there is always a separating line
-                data["preceding_lines"].pop(0)
-        return data
-
-    def harvest(self, filename):
-        """Transform data from file to dictionary
-        """
-        saved_nodes = []
-        index_stopwords = 0
-        index_synonyms = 0
-        language_code_prefix = re.compile(
-            r"[a-zA-Z][a-zA-Z][a-zA-Z]?([-_][a-zA-Z][a-zA-Z][a-zA-Z]?)?:"
-        )
-        # Check if it is correctly written
-        correctly_written = re.compile(r"\w+\Z")
-        # stopwords will contain a list of stopwords with their language code as key
-        self.stopwords = {}
-
-        # header
-        header, next_line = self.header_harvest(filename)
-        yield header
-
-        # the other entries
-        data = self.new_node_data(is_before="__header__")
-        data["is_before"] = "__header__"
-        for line_number, line in self.file_iter(filename, next_line):
-            # yield data if block ended
-            if self.entry_end(line, data):
-                if data["id"] in saved_nodes:
-                    msg = (
-                        "Entry with same id %s already created, "
-                         "duplicate id in file at line %s. "
-                         "Node creation cancelled."
-                    )
-                    self.parser_logger.error(msg, data['id'], data['src_position'])
-                else:
-                    data = self.remove_separating_line(data)
-                    yield data  # another function will use this dictionary to create a node
-                    saved_nodes.append(data["id"])
-                data = self.new_node_data(is_before=data["id"])
-
-            # harvest the line
-            if not (line) or line[0] == "#":
-                # comment or blank
-                data["preceding_lines"].append(line)
-            else:
-                line = line.rstrip(",")
-                if not data["src_position"]:
-                    data["src_position"] = line_number + 1
-                if line.startswith("stopwords"):
-                    # general stopwords definition for a language
-                    id = "stopwords:" + str(index_stopwords)
-                    data = self.set_data_id(data, id, line_number)
-                    index_stopwords += 1
-                    try:
-                        lc, value = self.get_lc_value(line[10:])
-                    except ValueError:
-                        self.parser_logger.error(
-                            "Missing language code at line %d ? '%s'",
-                            line_number + 1,
-                            ellipsis(line),
-                        )
-                    else:
-                        data["tags_" + lc] = value
-                        # add the list with its lc
-                        self.stopwords[lc] = value
-                elif line.startswith("synonyms"):
-                    # general synonyms definition for a language
-                    id = "synonyms:" + str(index_synonyms)
-                    data = self.set_data_id(data, id, line_number)
-                    index_synonyms += 1
-                    line = line[9:]
-                    tags = [words.strip() for words in line[3:].split(",")]
-                    try:
-                        lc, value = self.get_lc_value(line)
-                    except ValueError:
-                        self.parser_logger.error(
-                            "Missing language code at line %d ? '%s'",
-                            line_number + 1,
-                            ellipsis(line),
-                        )
-                    else:
-                        data["tags_" + lc] = tags
-                        data["tags_ids_" + lc] = value
-                elif line[0] == "<":
-                    # parent definition
-                    data["parent_tag"].append(self.add_line(line[1:]))
-                elif language_code_prefix.match(line):
-                    # synonyms definition
-                    if not data["id"]:
-                        data["id"] = self.add_line(line.split(",", 1)[0])
-                        # first 2-3 characters before ":" are the language code
-                        data["main_language"] = data["id"].split(":", 1)[0]
-                    # add tags and tagsid
-                    lang, line = line.split(":", 1)
-                    # to transform '-' from language code to '_'
-                    lang = lang.strip().replace("-", "_")
-                    tags_list = []
-                    tagsids_list = []
-                    for word in line.split(","):
-                        tags_list.append(word.strip())
-                        word_normalized = self.remove_stopwords(lang, normalizing(word, lang))
-                        if word_normalized not in tagsids_list:
-                            # in case 2 normalized synonyms are the same
-                            tagsids_list.append(word_normalized)
-                    data["tags_" + lang] = tags_list
-                    data["tags_ids_" + lang] = tagsids_list
-                else:
-                    # property definition
-                    property_name = None
-                    try:
-                        property_name, lc, property_value = line.split(":", 2)
-                    except ValueError:
-                        self.parser_logger.error(
-                            "Reading error at line %d, unexpected format: '%s'",
-                            line_number + 1,
-                            ellipsis(line),
-                        )
-                    else:
-                        # in case there is space before or after the colons
-                        property_name = property_name.strip()
-                        lc = lc.strip().replace("-", "_")
-                        if not (
-                            correctly_written.match(property_name) and correctly_written.match(lc)
-                        ):
-                            self.parser_logger.error(
-                                "Reading error at line %d, unexpected format: '%s'",
-                                line_number + 1,
-                                ellipsis(line),
-                            )
-                    if property_name:
-                        data["prop_" + property_name + "_" + lc] = property_value
-
-        data["id"] = "__footer__"
-        data["preceding_lines"].pop(0)
-        data["src_position"] = line_number + 1 - len(data["preceding_lines"])
-        yield data
-
-    def create_nodes(self, filename, multi_label):
-        """Adding nodes to database"""
-        self.parser_logger.info("Creating nodes")
-        harvested_data = self.harvest(filename)
-        self.create_headernode(next(harvested_data), multi_label)
-        for entry in harvested_data:
-            self.create_node(entry, multi_label)
-
-    def create_previous_link(self, multi_label):
-        self.parser_logger.info("Creating 'is_before' links")
-        query = f"MATCH(n:{multi_label}) WHERE n.is_before IS NOT NULL return n.id, n.is_before"
-        results = self.session.run(query)
-        for result in results:
-            id = result["n.id"]
-            id_previous = result["n.is_before"]
-
-            query = f"""
-                MATCH(n:{multi_label}) WHERE n.id = $id
-                MATCH(p:{multi_label}) WHERE p.id= $id_previous
-                CREATE (p)-[r:is_before]->(n)
-                RETURN r
-            """
-            results = self.session.run(query, id=id, id_previous=id_previous)
-            relation = results.values()
-            if len(relation) > 1:
-                self.parser_logger.error(
-                    "2 or more 'is_before' links created for ids %s and %s, "
-                    "one of the ids isn't unique",
-                    id,
-                    id_previous,
-                )
-            elif not relation[0]:
-                self.parser_logger.error("link not created between %s and %s", id, id_previous)
-
-    def parent_search(self, multi_label):
-        """Get the parent and the child to link"""
-        query = f"MATCH (n:{multi_label}:ENTRY) WHERE SIZE(n.parents)>0 RETURN n.id, n.parents"
-        results = self.session.run(query)
-        for result in results:
-            id = result["n.id"]
-            parent_list = result["n.parents"]
-            for parent in parent_list:
-                yield parent, id
-
-    def create_child_link(self, multi_label):
-        """Create the relations between nodes"""
-        self.parser_logger.info("Creating 'is_child_of' links")
-        for parent, child_id in self.parent_search(multi_label):
-            lc, parent_id = parent.split(":")
-            query = f""" MATCH (p:{multi_label}:ENTRY) WHERE $parent_id IN p.tags_ids_""" + lc
-            query += f"""
-                MATCH (c:{multi_label}) WHERE c.id= $child_id
-                CREATE (c)-[r:is_child_of]->(p)
-                RETURN r
-            """
-            result = self.session.run(query, parent_id=parent_id, child_id=child_id)
-            if not result.value():
-                self.parser_logger.warning(
-                    f"parent not found for child {child_id} with parent {parent_id}"
-                )
-
-    def delete_used_properties(self):
-        query = "MATCH (n) SET n.is_before = null, n.parents = null"
-        self.session.run(query)
-
-    def create_fulltext_index(self, taxonomy_name, branch_name):
-        """Create indexes for search"""
-        project_name = self.get_project_name(taxonomy_name, branch_name)
-        query = [
-            f"""CREATE FULLTEXT INDEX {project_name+'_SearchIds'} IF NOT EXISTS
-            FOR (n:{project_name}) ON EACH [n.id]\n"""
-        ]
-        query.append("""OPTIONS {indexConfig: {`fulltext.analyzer`: 'keyword'}}""")
-        self.session.run("".join(query))
-
-        language_codes = [lang.alpha2 for lang in list(iso639.languages) if lang.alpha2 != ""]
-        tags_prefixed_lc = ["n.tags_" + lc for lc in language_codes]
-        tags_prefixed_lc = ", ".join(tags_prefixed_lc)
-        query = f"""CREATE FULLTEXT INDEX {project_name+'_SearchTags'} IF NOT EXISTS
-            FOR (n:{project_name}) ON EACH [{tags_prefixed_lc}]"""
-        self.session.run(query)
-
-    def create_parsing_errors_node(self, taxonomy_name, branch_name):
-        """Create node to list parsing errors"""
-        multi_label = self.create_multi_label(taxonomy_name, branch_name)
-        query = f"""
-            CREATE (n:{multi_label}:ERRORS)
-            SET n.id = $project_name
-            SET n.branch_name = $branch_name
-            SET n.taxonomy_name = $taxonomy_name
-            SET n.created_at = datetime()
-            SET n.warnings = $warnings_list
-            SET n.errors = $errors_list
-        """
-        params = {
-            "project_name": self.get_project_name(taxonomy_name, branch_name),
-            "branch_name": branch_name,
-            "taxonomy_name": taxonomy_name,
-            "warnings_list": self.parser_logger.parsing_warnings,
-            "errors_list": self.parser_logger.parsing_errors,
-        }
-        self.session.run(query, params)
-
-    def __call__(self, filename, branch_name, taxonomy_name):
-        """Process the file"""
-        filename = self.normalized_filename(filename)
-        branch_name = normalizing(branch_name, char="_")
-        multi_label = self.create_multi_label(taxonomy_name, branch_name)
-        self.create_nodes(filename, multi_label)
-        self.create_child_link(multi_label)
-        self.create_previous_link(multi_label)
-        self.create_fulltext_index(taxonomy_name, branch_name)
-        self.create_parsing_errors_node(taxonomy_name, branch_name)
-        # self.delete_used_properties()
-
-
-if __name__ == "__main__":
-    # Setup logs
-    logging.basicConfig(handlers=[logging.StreamHandler()], level=logging.INFO)
-    filename = sys.argv[1] if len(sys.argv) > 1 else "test"
-    branch_name = sys.argv[2] if len(sys.argv) > 1 else "branch"
-    taxonomy_name = sys.argv[3] if len(sys.argv) > 1 else filename.rsplit(".", 1)[0]
-
-    # Initialize neo4j
-    uri = os.environ.get("NEO4J_URI", "bolt://localhost:7687")
-    driver = GraphDatabase.driver(uri)
-    session = driver.session()
-
-    # Pass session variable to parser object
-    parse = Parser(session)
-    parse(filename, branch_name, taxonomy_name)
diff --git a/parser/openfoodfacts_taxonomy_parser/parser/__init__.py b/parser/openfoodfacts_taxonomy_parser/parser/__init__.py
new file mode 100644
index 00000000..9da82bc2
--- /dev/null
+++ b/parser/openfoodfacts_taxonomy_parser/parser/__init__.py
@@ -0,0 +1,2 @@
+from .parser import Parser
+from .taxonomy_parser import TaxonomyParser
diff --git a/parser/openfoodfacts_taxonomy_parser/exception.py b/parser/openfoodfacts_taxonomy_parser/parser/exception.py
similarity index 100%
rename from parser/openfoodfacts_taxonomy_parser/exception.py
rename to parser/openfoodfacts_taxonomy_parser/parser/exception.py
diff --git a/parser/openfoodfacts_taxonomy_parser/parser/logger.py b/parser/openfoodfacts_taxonomy_parser/parser/logger.py
new file mode 100644
index 00000000..555c5bb8
--- /dev/null
+++ b/parser/openfoodfacts_taxonomy_parser/parser/logger.py
@@ -0,0 +1,21 @@
+import logging
+
+
+class ParserConsoleLogger:
+    def __init__(self):
+        self.parsing_warnings = []  # Stores all warning logs
+        self.parsing_errors = []  # Stores all error logs
+
+    def info(self, msg, *args, **kwargs):
+        """Stores all parsing info logs"""
+        logging.info(msg, *args, **kwargs)
+
+    def warning(self, msg, *args, **kwargs):
+        """Stores all parsing warning logs"""
+        self.parsing_warnings.append(msg % args)
+        logging.warning(msg, *args, **kwargs)
+
+    def error(self, msg, *args, **kwargs):
+        """Stores all parsing error logs"""
+        self.parsing_errors.append(msg % args)
+        logging.error(msg, *args, **kwargs)
diff --git a/parser/openfoodfacts_taxonomy_parser/parser/parser.py b/parser/openfoodfacts_taxonomy_parser/parser/parser.py
new file mode 100644
index 00000000..d0af686d
--- /dev/null
+++ b/parser/openfoodfacts_taxonomy_parser/parser/parser.py
@@ -0,0 +1,199 @@
+import logging
+import os
+import sys
+
+import iso639
+from neo4j import GraphDatabase, Session
+
+from .logger import ParserConsoleLogger
+from ..normalizer import normalizing
+from .taxonomy_parser import (
+    NodeType,
+    PreviousLink,
+    TaxonomyParser,
+    NodeData,
+    ChildLink,
+)
+
+
+def ellipsis(text, max=20):
+    """Cut a text adding eventual ellipsis if we do not display it fully"""
+    return text[:max] + ("..." if len(text) > max else "")
+
+
+class Parser:
+    """Parse a taxonomy file and build a neo4j graph"""
+
+    def __init__(self, session: Session):
+        self.session = session
+        self.parser_logger = ParserConsoleLogger()
+
+    def _create_headernode(self, header: list[str], multi_label: str):
+        """Create the node for the header"""
+        query = f"""
+                CREATE (n:{multi_label}:TEXT)
+                SET n.id = '__header__'
+                SET n.preceding_lines= $header
+                SET n.src_position= 1
+            """
+        self.session.run(query, header=header)
+
+    def _create_node(self, node_data: NodeData, multi_label: str):
+        """Run the query to create the node with data dictionary"""
+        position_query = """
+            SET n.id = $id
+            SET n.is_before = $is_before
+            SET n.preceding_lines = $preceding_lines
+            SET n.src_position = $src_position
+        """
+        entry_query = ""
+        if node_data.get_node_type() == NodeType.TEXT:
+            id_query = f" CREATE (n:{multi_label}:TEXT) \n "
+        elif node_data.get_node_type() == NodeType.SYNONYMS:
+            id_query = f" CREATE (n:{multi_label}:SYNONYMS) \n "
+        elif node_data.get_node_type() == NodeType.STOPWORDS:
+            id_query = f" CREATE (n:{multi_label}:STOPWORDS) \n "
+        else:
+            id_query = f" CREATE (n:{multi_label}:ENTRY) \n "
+            position_query += " SET n.main_language = $main_language "
+            if node_data.parent_tag:
+                entry_query += " SET n.parents = $parent_tag \n"
+            for key in node_data.properties:
+                if key.startswith("prop_"):
+                    entry_query += " SET n." + key + " = $" + key + "\n"
+
+        for key in node_data.tags:
+            if key.startswith("tags_"):
+                entry_query += " SET n." + key + " = $" + key + "\n"
+
+        query = id_query + entry_query + position_query
+        self.session.run(query, node_data.to_dict())
+
+    def _get_project_name(self, taxonomy_name: str, branch_name: str):
+        """Create a project name for given branch and taxonomy"""
+        return "p_" + taxonomy_name + "_" + branch_name
+
+    def _create_multi_label(self, taxonomy_name: str, branch_name: str) -> str:
+        """Create a combined label with taxonomy name and branch name"""
+        project_name = self._get_project_name(taxonomy_name, branch_name)
+        return project_name + ":" + ("t_" + taxonomy_name) + ":" + ("b_" + branch_name)
+
+    def create_nodes(self, nodes: list[NodeData], multi_label: str):
+        """Adding nodes to database"""
+        self.parser_logger.info("Creating nodes")
+        for node in nodes:
+            if node.id == "__header__":
+                self._create_headernode(node.preceding_lines, multi_label)
+            else:
+                self._create_node(node, multi_label)
+
+    def create_previous_link(self, previous_links: list[PreviousLink], multi_label: str):
+        self.parser_logger.info("Creating 'is_before' links")
+        for previous_link in previous_links:
+            id = previous_link["id"]
+            before_id = previous_link["before_id"]
+
+            query = f"""
+                MATCH(n:{multi_label}) WHERE n.id = $id
+                MATCH(p:{multi_label}) WHERE p.id= $before_id
+                CREATE (p)-[r:is_before]->(n)
+                RETURN r
+            """
+            results = self.session.run(query, id=id, before_id=before_id)
+            relation = results.values()
+            if len(relation) > 1:
+                self.parser_logger.error(
+                    "2 or more 'is_before' links created for ids %s and %s, "
+                    "one of the ids isn't unique",
+                    id,
+                    before_id,
+                )
+            elif not relation[0]:
+                self.parser_logger.error("link not created between %s and %s", id, before_id)
+
+    def create_child_link(self, child_links: list[ChildLink], multi_label: str):
+        """Create the relations between nodes"""
+        self.parser_logger.info("Creating 'is_child_of' links")
+        for child_link in child_links:
+            child_id = child_link["id"]
+            parent = child_link["parent_id"]
+            lc, parent_id = parent.split(":")
+            query = f""" MATCH (p:{multi_label}:ENTRY) WHERE $parent_id IN p.tags_ids_""" + lc
+            query += f"""
+                MATCH (c:{multi_label}) WHERE c.id= $child_id
+                CREATE (c)-[r:is_child_of]->(p)
+                RETURN r
+            """
+            result = self.session.run(query, parent_id=parent_id, child_id=child_id)
+            if not result.value():
+                self.parser_logger.warning(
+                    f"parent not found for child {child_id} with parent {parent_id}"
+                )
+
+    def _create_fulltext_index(self, taxonomy_name: str, branch_name: str):
+        """Create indexes for search"""
+        project_name = self._get_project_name(taxonomy_name, branch_name)
+        query = (
+            f"""CREATE FULLTEXT INDEX {project_name+'_SearchIds'} IF NOT EXISTS
+            FOR (n:{project_name}) ON EACH [n.id]\n"""
+            + """
+            OPTIONS {indexConfig: {`fulltext.analyzer`: 'keyword'}}"""
+        )
+        self.session.run(query)
+
+        language_codes = [lang.alpha2 for lang in list(iso639.languages) if lang.alpha2 != ""]
+        tags_prefixed_lc = ["n.tags_" + lc for lc in language_codes]
+        tags_prefixed_lc = ", ".join(tags_prefixed_lc)
+        query = f"""CREATE FULLTEXT INDEX {project_name+'_SearchTags'} IF NOT EXISTS
+            FOR (n:{project_name}) ON EACH [{tags_prefixed_lc}]"""
+        self.session.run(query)
+
+    def _create_parsing_errors_node(self, taxonomy_name: str, branch_name: str):
+        """Create node to list parsing errors"""
+        multi_label = self._create_multi_label(taxonomy_name, branch_name)
+        query = f"""
+            CREATE (n:{multi_label}:ERRORS)
+            SET n.id = $project_name
+            SET n.branch_name = $branch_name
+            SET n.taxonomy_name = $taxonomy_name
+            SET n.created_at = datetime()
+            SET n.warnings = $warnings_list
+            SET n.errors = $errors_list
+        """
+        params = {
+            "project_name": self._get_project_name(taxonomy_name, branch_name),
+            "branch_name": branch_name,
+            "taxonomy_name": taxonomy_name,
+            "warnings_list": self.parser_logger.parsing_warnings,
+            "errors_list": self.parser_logger.parsing_errors,
+        }
+        self.session.run(query, params)
+
+    def __call__(self, filename: str, branch_name: str, taxonomy_name: str):
+        """Process the file"""
+        branch_name = normalizing(branch_name, char="_")
+        multi_label = self._create_multi_label(taxonomy_name, branch_name)
+        taxonomy_parser = TaxonomyParser()
+        taxonomy = taxonomy_parser.parse_file(filename, self.parser_logger)
+        self.create_nodes([*taxonomy.entry_nodes, *taxonomy.other_nodes], multi_label)
+        self.create_child_link(taxonomy.child_links, multi_label)
+        self.create_previous_link(taxonomy.previous_links, multi_label)
+        self._create_fulltext_index(taxonomy_name, branch_name)
+        self._create_parsing_errors_node(taxonomy_name, branch_name)
+
+
+if __name__ == "__main__":
+    # Setup logs
+    logging.basicConfig(handlers=[logging.StreamHandler()], level=logging.INFO)
+    filename = sys.argv[1] if len(sys.argv) > 1 else "test"
+    branch_name = sys.argv[2] if len(sys.argv) > 1 else "branch"
+    taxonomy_name = sys.argv[3] if len(sys.argv) > 1 else filename.rsplit(".", 1)[0]
+
+    # Initialize neo4j
+    uri = os.environ.get("NEO4J_URI", "bolt://localhost:7687")
+    driver = GraphDatabase.driver(uri)
+    session = driver.session()
+
+    # Pass session variable to parser object
+    parse = Parser(session)
+    parse(filename, branch_name, taxonomy_name)
diff --git a/parser/openfoodfacts_taxonomy_parser/parser/taxonomy_parser.py b/parser/openfoodfacts_taxonomy_parser/parser/taxonomy_parser.py
new file mode 100644
index 00000000..d46a52fa
--- /dev/null
+++ b/parser/openfoodfacts_taxonomy_parser/parser/taxonomy_parser.py
@@ -0,0 +1,393 @@
+import logging
+import re
+import sys
+import timeit
+from enum import Enum
+from dataclasses import dataclass, field
+from typing import Iterator, TypedDict
+
+from .logger import ParserConsoleLogger
+from .exception import DuplicateIDError
+from ..normalizer import normalizing
+
+
+def ellipsis(text, max=20):
+    """Cut a text adding eventual ellipsis if we do not display it fully"""
+    return text[:max] + ("..." if len(text) > max else "")
+
+
+class NodeType(str, Enum):
+    TEXT = "TEXT"
+    SYNONYMS = "SYNONYMS"
+    STOPWORDS = "STOPWORDS"
+    ENTRY = "ENTRY"
+
+
+@dataclass(slots=True)
+class NodeData:
+    id: str = ""
+    is_before: str | None = None
+    main_language: str | None = None
+    preceding_lines: list[str] = field(default_factory=list)
+    parent_tag: list[str] = field(default_factory=list)
+    src_position: int | None = None
+    properties: dict[str, str] = field(default_factory=dict)
+    tags: dict[str, list[str]] = field(default_factory=dict)
+
+    def to_dict(self):
+        return {
+            "id": self.id,
+            "is_before": self.is_before,
+            "main_language": self.main_language,
+            "preceding_lines": self.preceding_lines,
+            "parent_tag": self.parent_tag,
+            "src_position": self.src_position,
+            **self.properties,
+            **self.tags,
+        }
+
+    def get_node_type(self):
+        if self.id in ["__header__", "__footer__"]:
+            return NodeType.TEXT
+        elif self.id.startswith("synonyms"):
+            return NodeType.SYNONYMS
+        elif self.id.startswith("stopwords"):
+            return NodeType.STOPWORDS
+        else:
+            return NodeType.ENTRY
+
+
+class PreviousLink(TypedDict):
+    before_id: str
+    id: str
+
+
+class ChildLink(TypedDict):
+    parent_id: str
+    id: str
+
+
+@dataclass(slots=True)
+class Taxonomy:
+    entry_nodes: list[NodeData]
+    other_nodes: list[NodeData]
+    previous_links: list[PreviousLink]
+    child_links: list[ChildLink]
+
+
+class TaxonomyParser:
+    """Parse a taxonomy file"""
+
+    def __init__(self):
+        self.parser_logger = ParserConsoleLogger()
+
+    def _normalized_filename(self, filename: str) -> str:
+        """Add the .txt extension if it is missing in the filename"""
+        return filename + (".txt" if (len(filename) < 4 or filename[-4:] != ".txt") else "")
+
+    def _file_iter(self, filename: str, start: int = 0) -> Iterator[tuple[int, str]]:
+        """Generator to get the file line by line"""
+        with open(filename, "r", encoding="utf8") as file:
+            line_count = 0
+            for line_number, line in enumerate(file):
+                if line_number < start:
+                    continue
+                # sanitizing
+                # remove any space characters at end of line
+                line = line.rstrip()
+                # replace ’ (typographique quote) to simple quote '
+                line = line.replace("’", "'")
+                # replace commas that have no space around by a lower comma character
+                # and do the same for escaped comma (preceded by a \)
+                # (to distinguish them from commas acting as tags separators)
+                line = re.sub(r"(\d),(\d)", r"\1‚\2", line)
+                line = re.sub(r"\\,", "\\‚", line)
+                # removes parenthesis for roman numeral
+                line = re.sub(r"\(([ivx]+)\)", r"\1", line, flags=re.I)
+                yield line_number, line
+                line_count += 1
+            yield line_count, ""  # to end the last entry if not ended
+
+    def _remove_stopwords(self, lc: str, words: str) -> str:
+        """Remove the stopwords that were read at the beginning of the file"""
+        # First check if this language has stopwords
+        if lc in self.stopwords:
+            words_to_remove = self.stopwords[lc]
+            new_words = []
+            for word in words.split("-"):
+                if word not in words_to_remove:
+                    new_words.append(word)
+            return ("-").join(new_words)
+        else:
+            return words
+
+    def _add_line(self, line: str) -> str:
+        """
+        Get a normalized string but keeping the language code "lc:",
+        used for id and parent tag
+        """
+        lc, line = line.split(":", 1)
+        new_line = lc + ":"
+        new_line += self._remove_stopwords(lc, normalizing(line, lc))
+        return new_line
+
+    def _get_lc_value(self, line: str) -> tuple[str, list[str]]:
+        """Get the language code "lc" and a list of normalized values"""
+        lc, line = line.split(":", 1)
+        new_line: list[str] = []
+        for word in line.split(","):
+            new_line.append(self._remove_stopwords(lc, normalizing(word, lc)))
+        return lc, new_line
+
+    def _set_data_id(self, data: NodeData, id: str, line_number: int) -> NodeData:
+        if not data.id:
+            data.id = id
+        else:
+            raise DuplicateIDError(line_number)
+        return data
+
+    def _header_harvest(self, filename: str) -> tuple[list[str], int]:
+        """
+        Harvest the header (comment with #),
+        it has its own function because some header has multiple blocks
+        """
+        h = 0
+        header: list[str] = []
+        for _, line in self._file_iter(filename):
+            if not (line) or line[0] == "#":
+                header.append(line)
+            else:
+                break
+            h += 1
+
+        # we don't want to eat the comments of the next block
+        # and it removes the last separating line
+        for i in range(len(header)):
+            if header.pop():
+                h -= 1
+            else:
+                break
+
+        return header, h
+
+    def _entry_end(self, line: str, data: NodeData) -> bool:
+        """Return True if the block ended"""
+        # stopwords and synonyms are one-liner, entries are separated by a blank line
+        if line.startswith("stopwords") or line.startswith("synonyms") or not line:
+            # can be the end of an block or just additional line separator,
+            # file_iter() always end with ''
+            if data.id:  # to be sure that it's an end
+                return True
+        return False
+
+    def _remove_separating_line(self, data: NodeData) -> NodeData:
+        """
+        To remove the one separating line that is always there,
+        between synonyms part and stopwords part and before each entry
+        """
+        is_before = data.is_before
+        # first, check if there is at least one preceding line
+        if data.preceding_lines and not data.preceding_lines[0]:
+            if data.id.startswith("synonyms"):
+                # it's a synonyms block,
+                # if the previous block is a stopwords block,
+                # there is at least one separating line
+                if is_before and "stopwords" in is_before:
+                    data.preceding_lines.pop(0)
+
+            elif data.id.startswith("stopwords"):
+                # it's a stopwords block,
+                # if the previous block is a synonyms block,
+                # there is at least one separating line
+                if is_before and "synonyms" in is_before:
+                    data.preceding_lines.pop(0)
+
+            else:
+                # it's an entry block, there is always a separating line
+                data.preceding_lines.pop(0)
+        return data
+
+    def _harvest_entries(self, filename: str, entries_start_line: int) -> Iterator[NodeData]:
+        """Transform data from file to dictionary"""
+        saved_nodes = []
+        index_stopwords = 0
+        index_synonyms = 0
+        language_code_prefix = re.compile(
+            r"[a-zA-Z][a-zA-Z][a-zA-Z]?([-_][a-zA-Z][a-zA-Z][a-zA-Z]?)?:"
+        )
+        # Check if it is correctly written
+        correctly_written = re.compile(r"\w+\Z")
+        # stopwords will contain a list of stopwords with their language code as key
+        self.stopwords = {}
+        # the other entries
+        data = NodeData(is_before="__header__")
+        line_number = (
+            entries_start_line  # if the iterator is empty, line_number will not be unbound
+        )
+        for line_number, line in self._file_iter(filename, entries_start_line):
+            # yield data if block ended
+            if self._entry_end(line, data):
+                if data.id in saved_nodes:
+                    msg = (
+                        "Entry with same id %s already created, "
+                        "duplicate id in file at line %s. "
+                        "Node creation cancelled."
+                    )
+                    self.parser_logger.error(msg, data.id, data.src_position)
+                else:
+                    data = self._remove_separating_line(data)
+                    yield data  # another function will use this dictionary to create a node
+                    saved_nodes.append(data.id)
+                data = NodeData(is_before=data.id)
+
+            # harvest the line
+            if not (line) or line[0] == "#":
+                # comment or blank
+                data.preceding_lines.append(line)
+            else:
+                line = line.rstrip(",")
+                if not data.src_position:
+                    data.src_position = line_number + 1
+                if line.startswith("stopwords"):
+                    # general stopwords definition for a language
+                    id = "stopwords:" + str(index_stopwords)
+                    data = self._set_data_id(data, id, line_number)
+                    index_stopwords += 1
+                    try:
+                        lc, value = self._get_lc_value(line[10:])
+                    except ValueError:
+                        self.parser_logger.error(
+                            "Missing language code at line %d ? '%s'",
+                            line_number + 1,
+                            ellipsis(line),
+                        )
+                    else:
+                        data.tags["tags_" + lc] = value
+                        # add the list with its lc
+                        self.stopwords[lc] = value
+                elif line.startswith("synonyms"):
+                    # general synonyms definition for a language
+                    id = "synonyms:" + str(index_synonyms)
+                    data = self._set_data_id(data, id, line_number)
+                    index_synonyms += 1
+                    line = line[9:]
+                    tags = [words.strip() for words in line[3:].split(",")]
+                    try:
+                        lc, value = self._get_lc_value(line)
+                    except ValueError:
+                        self.parser_logger.error(
+                            "Missing language code at line %d ? '%s'",
+                            line_number + 1,
+                            ellipsis(line),
+                        )
+                    else:
+                        data.tags["tags_" + lc] = tags
+                        data.tags["tags_ids_" + lc] = value
+                elif line[0] == "<":
+                    # parent definition
+                    data.parent_tag.append(self._add_line(line[1:]))
+                elif language_code_prefix.match(line):
+                    # synonyms definition
+                    if not data.id:
+                        data.id = self._add_line(line.split(",", 1)[0])
+                        # first 2-3 characters before ":" are the language code
+                        data.main_language = data.id.split(":", 1)[0]
+                    # add tags and tagsid
+                    lang, line = line.split(":", 1)
+                    # to transform '-' from language code to '_'
+                    lang = lang.strip().replace("-", "_")
+                    tags_list = []
+                    tagsids_list = []
+                    for word in line.split(","):
+                        tags_list.append(word.strip())
+                        word_normalized = self._remove_stopwords(lang, normalizing(word, lang))
+                        if word_normalized not in tagsids_list:
+                            # in case 2 normalized synonyms are the same
+                            tagsids_list.append(word_normalized)
+                    data.tags["tags_" + lang] = tags_list
+                    data.tags["tags_ids_" + lang] = tagsids_list
+                else:
+                    # property definition
+                    property_name = None
+                    try:
+                        property_name, lc, property_value = line.split(":", 2)
+                    except ValueError:
+                        self.parser_logger.error(
+                            "Reading error at line %d, unexpected format: '%s'",
+                            line_number + 1,
+                            ellipsis(line),
+                        )
+                    else:
+                        # in case there is space before or after the colons
+                        property_name = property_name.strip()
+                        lc = lc.strip().replace("-", "_")
+                        if not (
+                            correctly_written.match(property_name) and correctly_written.match(lc)
+                        ):
+                            self.parser_logger.error(
+                                "Reading error at line %d, unexpected format: '%s'",
+                                line_number + 1,
+                                ellipsis(line),
+                            )
+                        if property_name:
+                            data.properties["prop_" + property_name + "_" + lc] = property_value
+
+        data.id = "__footer__"
+        data.preceding_lines.pop(0)
+        data.src_position = line_number + 1 - len(data.preceding_lines)
+        yield data
+
+    def _create_taxonomy(self, filename: str) -> Taxonomy:
+        """Create the taxonomy from the file"""
+        self.parser_logger.info("Parsing taxonomy file %s", filename)
+        harvested_header_data, entries_start_line = self._header_harvest(filename)
+        entry_nodes: list[NodeData] = []
+        other_nodes = [
+            NodeData(id="__header__", preceding_lines=harvested_header_data, src_position=1)
+        ]
+        previous_links: list[PreviousLink] = []
+        child_links: list[ChildLink] = []
+        harvested_data = self._harvest_entries(filename, entries_start_line)
+        for entry in harvested_data:
+            if entry.get_node_type() == NodeType.ENTRY:
+                entry_nodes.append(entry)
+            else:
+                other_nodes.append(entry)
+            if entry.is_before:
+                previous_links.append(PreviousLink(before_id=entry.is_before, id=entry.id))
+            if entry.parent_tag:
+                for parent in entry.parent_tag:
+                    child_links.append(ChildLink(parent_id=parent, id=entry.id))
+        return Taxonomy(
+            entry_nodes=entry_nodes,
+            other_nodes=other_nodes,
+            previous_links=previous_links,
+            child_links=child_links,
+        )
+
+    def parse_file(self, filename: str, logger: ParserConsoleLogger | None = None) -> Taxonomy:
+        if logger:
+            self.parser_logger = logger
+        """Process the file into a Taxonomy object"""
+        start_time = timeit.default_timer()
+        filename = self._normalized_filename(filename)
+        taxonomy = self._create_taxonomy(filename)
+        end_time = timeit.default_timer()
+        self.parser_logger.info("Parsing done in %s seconds", end_time - start_time)
+        self.parser_logger.info(
+            "Found %d nodes", len(taxonomy.entry_nodes) + len(taxonomy.other_nodes)
+        )
+        self.parser_logger.info("Found %d previous links", len(taxonomy.previous_links))
+        self.parser_logger.info("Found %d child links", len(taxonomy.child_links))
+
+        return taxonomy
+
+
+if __name__ == "__main__":
+    # Setup logs
+    logging.basicConfig(handlers=[logging.StreamHandler()], level=logging.INFO)
+    filename = sys.argv[1] if len(sys.argv) > 1 else "test"
+
+    # Pass session variable to parser object
+    parse = TaxonomyParser()
+    parse.parse_file(filename)
diff --git a/parser/tests/conftest.py b/parser/tests/conftest.py
index cf53d30b..8e8a87d5 100644
--- a/parser/tests/conftest.py
+++ b/parser/tests/conftest.py
@@ -11,7 +11,7 @@ def neo4j():
     """waiting for neo4j to be ready"""
     uri = os.environ.get("NEO4J_URI", "bolt://localhost:7687")
     driver = GraphDatabase.driver(uri)
-    session = driver.session()
+    session = driver.session(database="neo4j")
     connected = False
     while not connected:
         try:
diff --git a/parser/tests/integration/test_parse_unparse_integration.py b/parser/tests/integration/test_parse_unparse_integration.py
index a21a7456..6bf91bb8 100644
--- a/parser/tests/integration/test_parse_unparse_integration.py
+++ b/parser/tests/integration/test_parse_unparse_integration.py
@@ -11,21 +11,21 @@
 @pytest.fixture(autouse=True)
 def test_setup(neo4j):
     # delete all the nodes, relations and search indexes in the database
-    query = "MATCH (n:p_test_branch:t_test:b_branch) DETACH DELETE n"
+    query = "MATCH (n:p_test_branch) DETACH DELETE n"
     neo4j.session().run(query)
     query = "DROP INDEX p_test_branch_SearchIds IF EXISTS"
     neo4j.session().run(query)
     query = "DROP INDEX p_test_branch_SearchTags IF EXISTS"
     neo4j.session().run(query)
 
-    query1 = "MATCH (n:p_test_branch1:t_test:b_branch1) DETACH DELETE n"
+    query1 = "MATCH (n:p_test_branch1) DETACH DELETE n"
     neo4j.session().run(query1)
     query1 = "DROP INDEX p_test_branch1_SearchIds IF EXISTS"
     neo4j.session().run(query1)
     query1 = "DROP INDEX p_test_branch1_SearchTags IF EXISTS"
     neo4j.session().run(query1)
 
-    query2 = "MATCH (n:p_test_branch2:t_test:b_branch2) DETACH DELETE n"
+    query2 = "MATCH (n:p_test_branch2) DETACH DELETE n"
     neo4j.session().run(query2)
     query2 = "DROP INDEX p_test_branch2_SearchIds IF EXISTS"
     neo4j.session().run(query2)
@@ -35,22 +35,20 @@ def test_setup(neo4j):
 
 def test_round_trip(neo4j):
     """test parsing and dumping back a taxonomy"""
-    session = neo4j.session()
-    test_parser = parser.Parser(session)
+    with neo4j.session() as session:
+        test_parser = parser.Parser(session)
 
-    # parse taxonomy
-    test_parser(TEST_TAXONOMY_TXT, "branch", "test")
-    # just quick check it runs ok with total number of nodes
-    query = "MATCH (n:p_test_branch:t_test:b_branch) RETURN COUNT(*)"
-    result = session.run(query)
-    number_of_nodes = result.value()[0]
-    assert number_of_nodes == 14
+        # parse taxonomy
+        test_parser(TEST_TAXONOMY_TXT, "branch", "test")
+        # just quick check it runs ok with total number of nodes
+        query = "MATCH (n:p_test_branch) RETURN COUNT(*)"
+        result = session.run(query)
+        number_of_nodes = result.value()[0]
+        assert number_of_nodes == 14
 
-    # dump taxonomy back
-    test_dumper = unparser.WriteTaxonomy(session)
-    lines = list(test_dumper.iter_lines("p_test_branch:t_test:b_branch"))
-
-    session.close()
+        # dump taxonomy back
+        test_dumper = unparser.WriteTaxonomy(session)
+        lines = list(test_dumper.iter_lines("p_test_branch"))
 
     original_lines = [line.rstrip("\n") for line in open(TEST_TAXONOMY_TXT)]
     # expected result is close to original file with a few tweaks
@@ -75,32 +73,29 @@ def test_round_trip(neo4j):
 def test_two_branch_round_trip(neo4j):
     """test parsing and dumping the same taxonomy with two different branches"""
 
-    session = neo4j.session()
-
-    test_parser = parser.Parser(session)
-
-    # parse taxonomy with branch1
-    test_parser(TEST_TAXONOMY_TXT, "branch1", "test")
-    # parse taxonomy with branch2
-    test_parser(TEST_TAXONOMY_TXT, "branch2", "test")
-
-    # just quick check it runs ok with total number of nodes
-    query = "MATCH (n:p_test_branch1:t_test:b_branch1) RETURN COUNT(*)"
-    result = session.run(query)
-    number_of_nodes = result.value()[0]
-    assert number_of_nodes == 14
-
-    query = "MATCH (n:p_test_branch2:t_test:b_branch2) RETURN COUNT(*)"
-    result = session.run(query)
-    number_of_nodes = result.value()[0]
-    assert number_of_nodes == 14
-
-    # dump taxonomy back
-    test_dumper = unparser.WriteTaxonomy(session)
-    lines_branch1 = list(test_dumper.iter_lines("p_test_branch1:t_test:b_branch1"))
-    lines_branch2 = list(test_dumper.iter_lines("p_test_branch2:t_test:b_branch2"))
-
-    session.close()
+    with neo4j.session() as session:
+        test_parser = parser.Parser(session)
+
+        # parse taxonomy with branch1
+        test_parser(TEST_TAXONOMY_TXT, "branch1", "test")
+        # parse taxonomy with branch2
+        test_parser(TEST_TAXONOMY_TXT, "branch2", "test")
+
+        # just quick check it runs ok with total number of nodes
+        query = "MATCH (n:p_test_branch1) RETURN COUNT(*)"
+        result = session.run(query)
+        number_of_nodes = result.value()[0]
+        assert number_of_nodes == 14
+
+        query = "MATCH (n:p_test_branch2) RETURN COUNT(*)"
+        result = session.run(query)
+        number_of_nodes = result.value()[0]
+        assert number_of_nodes == 14
+
+        # dump taxonomy back
+        test_dumper = unparser.WriteTaxonomy(session)
+        lines_branch1 = list(test_dumper.iter_lines("p_test_branch1"))
+        lines_branch2 = list(test_dumper.iter_lines("p_test_branch2"))
 
     original_lines = [line.rstrip("\n") for line in open(TEST_TAXONOMY_TXT)]
     # expected result is close to original file with a few tweaks
diff --git a/parser/tests/integration/test_parser_integration.py b/parser/tests/integration/test_parser_integration.py
index c5223107..5dd4e090 100644
--- a/parser/tests/integration/test_parser_integration.py
+++ b/parser/tests/integration/test_parser_integration.py
@@ -13,7 +13,9 @@
 @pytest.fixture(autouse=True)
 def test_setup(neo4j):
     # delete all the nodes and relations in the database
-    query = "MATCH (n:p_test_branch:t_test:b_branch) DETACH DELETE n"
+    query = "MATCH (n:p_test_branch) DETACH DELETE n"
+    neo4j.session().run(query)
+    query = "DROP INDEX p_test_branch_id_index IF EXISTS"
     neo4j.session().run(query)
     query = "DROP INDEX p_test_branch_SearchIds IF EXISTS"
     neo4j.session().run(query)
@@ -22,192 +24,185 @@ def test_setup(neo4j):
 
 
 def test_calling(neo4j):
-    session = neo4j.session()
-    test_parser = parser.Parser(session)
-
-    # Create node test
-    test_parser.create_nodes(TEST_TAXONOMY_TXT, "p_test_branch:t_test:b_branch")
-
-    # total number of nodes
-    query = "MATCH (n:p_test_branch:t_test:b_branch) RETURN COUNT(*)"
-    result = session.run(query)
-    number_of_nodes = result.value()[0]
-    assert number_of_nodes == 13
-
-    # header correctly added
-    query = (
-        "MATCH (n:p_test_branch:t_test:b_branch) WHERE n.id = '__header__' RETURN n.preceding_lines"
-    )
-    result = session.run(query)
-    header = result.value()[0]
-    assert header == ["# test taxonomy"]
-
-    # synonyms correctly added
-    query = "MATCH (n:p_test_branch:t_test:b_branch:SYNONYMS) RETURN n ORDER BY n.src_position"
-    results = session.run(query)
-    expected_synonyms = [
-        {
-            "id": "synonyms:0",
-            "tags_en": ["passion fruit", "passionfruit"],
-            "tags_ids_en": ["passion-fruit", "passionfruit"],
-            "preceding_lines": [],
-            "src_position": 5,
-        },
-        {
-            "id": "synonyms:1",
-            "tags_fr": ["fruit de la passion", "maracuja", "passion"],
-            "tags_ids_fr": ["fruit-passion", "maracuja", "passion"],
-            "preceding_lines": [""],
-            "src_position": 7,
-        },
-    ]
-    for i, result in enumerate(results):
-        node = result.value()
-        for key in expected_synonyms[i]:
-            assert node[key] == expected_synonyms[i][key]
-
-    # stopwords correctly added
-    query = "MATCH (n:p_test_branch:t_test:b_branch:STOPWORDS) RETURN n"
-    results = session.run(query)
-    expected_stopwords = {
-        "id": "stopwords:0",
-        "tags_fr": ["aux", "au", "de", "le", "du", "la", "a", "et"],
-        "preceding_lines": [],
-    }
-    for result in results:
-        node = result.value()
-        for key in expected_stopwords:
-            assert node[key] == expected_stopwords[key]
-
-    # entries correctly added
-    # check for two of them
-    query = """
-        MATCH (n:p_test_branch:t_test:b_branch:ENTRY)
-        WHERE n.id='en:banana-yogurts'
-        OR n.id='en:meat'
-        RETURN n
-        ORDER BY n.src_position
-    """
-    results = session.run(query)
-    expected_entries = [
-        {
-            "tags_en": ["banana yogurts"],
-            "tags_ids_en": ["banana-yogurts"],
-            "tags_fr": ["yaourts à la banane"],
-            "tags_ids_fr": ["yaourts-banane"],
+    with neo4j.session() as session:
+        test_parser = parser.Parser(session)
+        test_parser(TEST_TAXONOMY_TXT, "branch", "test")
+
+        # total number of nodes (TEXT, ENTRY, SYNONYMS, STOPWORDS) + 1 ERROR node
+        query = "MATCH (n:p_test_branch) RETURN COUNT(*)"
+        result = session.run(query)
+        number_of_nodes = result.value()[0]
+        assert number_of_nodes == 14
+
+        # header correctly added
+        query = "MATCH (n:p_test_branch) WHERE n.id = '__header__' RETURN n.preceding_lines"
+        result = session.run(query)
+        header = result.value()[0]
+        assert header == ["# test taxonomy"]
+
+        # synonyms correctly added
+        query = "MATCH (n:p_test_branch:SYNONYMS) RETURN n ORDER BY n.src_position"
+        results = session.run(query)
+        expected_synonyms = [
+            {
+                "id": "synonyms:0",
+                "tags_en": ["passion fruit", "passionfruit"],
+                "tags_ids_en": ["passion-fruit", "passionfruit"],
+                "preceding_lines": [],
+                "src_position": 5,
+            },
+            {
+                "id": "synonyms:1",
+                "tags_fr": ["fruit de la passion", "maracuja", "passion"],
+                "tags_ids_fr": ["fruit-passion", "maracuja", "passion"],
+                "preceding_lines": [""],
+                "src_position": 7,
+            },
+        ]
+        for i, result in enumerate(results):
+            node = result.value()
+            for key in expected_synonyms[i]:
+                assert node[key] == expected_synonyms[i][key]
+
+        # stopwords correctly added
+        query = "MATCH (n:p_test_branch:STOPWORDS) RETURN n"
+        results = session.run(query)
+        expected_stopwords = {
+            "id": "stopwords:0",
+            "tags_fr": ["aux", "au", "de", "le", "du", "la", "a", "et"],
             "preceding_lines": [],
-        },
-        {
-            "tags_en": ["meat"],
-            "tags_ids_en": ["meat"],
-            "preceding_lines": ["# meat", ""],
-            "prop_vegan_en": "no",
-            "prop_carbon_footprint_fr_foodges_value_fr": "10",
-        },
-    ]
-    for i, result in enumerate(results):
-        node = result.value()
-        for key in expected_entries[i]:
-            assert node[key] == expected_entries[i][key]
-
-    # Child link test
-    test_parser.create_child_link("p_test_branch:t_test:b_branch")  # nodes already added
-    query = """
-        MATCH (c:p_test_branch:t_test:b_branch)-[:is_child_of]->(p:p_test_branch:t_test:b_branch)
-        RETURN c.id, p.id
-    """
-    results = session.run(query)
-    created_pairs = results.values()
-
-    # correct number of links
-    number_of_links = len(created_pairs)
-    assert number_of_links == 6
-
-    # correctly linked
-    expected_pairs = [
-        ["en:banana-yogurts", "en:yogurts"],
-        ["en:passion-fruit-yogurts", "en:yogurts"],
-        ["fr:yaourts-fruit-passion-alleges", "en:passion-fruit-yogurts"],
-        ["en:fake-meat", "en:meat"],
-        ["en:fake-duck-meat", "en:fake-meat"],
-        ["en:fake-duck-meat", "en:fake-stuff"],
-    ]
-    for pair in created_pairs:
-        assert pair in expected_pairs
-
-    # Order link test
-    test_parser.create_previous_link("p_test_branch:t_test:b_branch")
-    query = """
-        MATCH (n:p_test_branch:t_test:b_branch)-[:is_before]->(p:p_test_branch:t_test:b_branch)
-        RETURN n.id, p.id
-    """
-    results = session.run(query)
-    created_pairs = results.values()
-
-    # correct number of links
-    number_of_links = len(created_pairs)
-    assert number_of_links == 12
-
-    # correctly linked
-    expected_pairs = [
-        ["__header__", "stopwords:0"],
-        ["stopwords:0", "synonyms:0"],
-        ["synonyms:0", "synonyms:1"],
-        ["synonyms:1", "en:yogurts"],
-        ["en:yogurts", "en:banana-yogurts"],
-        ["en:banana-yogurts", "en:passion-fruit-yogurts"],
-        ["en:passion-fruit-yogurts", "fr:yaourts-fruit-passion-alleges"],
-        ["fr:yaourts-fruit-passion-alleges", "en:meat"],
-        ["en:meat", "en:fake-meat"],
-        ["en:fake-meat", "en:fake-stuff"],
-        ["en:fake-stuff", "en:fake-duck-meat"],
-        ["en:fake-duck-meat", "__footer__"],
-    ]
-    for pair in created_pairs:
-        assert pair in expected_pairs
-    session.close()
+        }
+        for result in results:
+            node = result.value()
+            for key in expected_stopwords:
+                assert node[key] == expected_stopwords[key]
+
+        # entries correctly added
+        # check for two of them
+        query = """
+            MATCH (n:p_test_branch:ENTRY)
+            WHERE n.id='en:banana-yogurts'
+            OR n.id='en:meat'
+            RETURN n
+            ORDER BY n.src_position
+        """
+        results = session.run(query)
+        expected_entries = [
+            {
+                "tags_en": ["banana yogurts"],
+                "tags_ids_en": ["banana-yogurts"],
+                "tags_fr": ["yaourts à la banane"],
+                "tags_ids_fr": ["yaourts-banane"],
+                "preceding_lines": [],
+            },
+            {
+                "tags_en": ["meat"],
+                "tags_ids_en": ["meat"],
+                "preceding_lines": ["# meat", ""],
+                "prop_vegan_en": "no",
+                "prop_carbon_footprint_fr_foodges_value_fr": "10",
+            },
+        ]
+        for i, result in enumerate(results):
+            node = result.value()
+            for key in expected_entries[i]:
+                assert node[key] == expected_entries[i][key]
+
+        query = """
+            MATCH (c:p_test_branch)-[:is_child_of]->(p:p_test_branch)
+            RETURN c.id, p.id
+        """
+        results = session.run(query)
+        created_pairs = results.values()
+
+        # correct number of links
+        number_of_links = len(created_pairs)
+        assert number_of_links == 6
+
+        # correctly linked
+        expected_pairs = [
+            ["en:banana-yogurts", "en:yogurts"],
+            ["en:passion-fruit-yogurts", "en:yogurts"],
+            ["fr:yaourts-fruit-passion-alleges", "en:passion-fruit-yogurts"],
+            ["en:fake-meat", "en:meat"],
+            ["en:fake-duck-meat", "en:fake-meat"],
+            ["en:fake-duck-meat", "en:fake-stuff"],
+        ]
+        for pair in created_pairs:
+            assert pair in expected_pairs
+
+        query = """
+            MATCH (n:p_test_branch)-[:is_before]->(p:p_test_branch)
+            RETURN n.id, p.id
+        """
+        results = session.run(query)
+        created_pairs = results.values()
+
+        # correct number of links
+        number_of_links = len(created_pairs)
+        assert number_of_links == 12
+
+        # correctly linked
+        expected_pairs = [
+            ["__header__", "stopwords:0"],
+            ["stopwords:0", "synonyms:0"],
+            ["synonyms:0", "synonyms:1"],
+            ["synonyms:1", "en:yogurts"],
+            ["en:yogurts", "en:banana-yogurts"],
+            ["en:banana-yogurts", "en:passion-fruit-yogurts"],
+            ["en:passion-fruit-yogurts", "fr:yaourts-fruit-passion-alleges"],
+            ["fr:yaourts-fruit-passion-alleges", "en:meat"],
+            ["en:meat", "en:fake-meat"],
+            ["en:fake-meat", "en:fake-stuff"],
+            ["en:fake-stuff", "en:fake-duck-meat"],
+            ["en:fake-duck-meat", "__footer__"],
+        ]
+        for pair in created_pairs:
+            assert pair in expected_pairs
 
 
 def test_error_log(neo4j, tmp_path, caplog):
     # error entries with same id
-    session = neo4j.session()
-    test_parser = parser.Parser(session)
-
-    taxonomy_txt = textwrap.dedent("""
-    # a fake taxonomy
-    stopwords:fr: aux,au,de,le,du,la,a,et
-
-    # meat
-    en:meat
-
-    <en:meat
-    en:fake-meat
-
-    # duplicate
-    en:fake-meat
-    """)
-    taxonomy_path = tmp_path / "test.txt"
-    taxonomy_path.open("w").write(taxonomy_txt)
-
-    # parse
-    with caplog.at_level(logging.ERROR):
-        test_parser(str(taxonomy_path), "branch", "test")
-
-    # only the 2 nodes imported, not the duplicate
-    query = "MATCH (n:p_test_branch:t_test:b_branch:ENTRY) RETURN COUNT(*)"
-    result = session.run(query)
-    number_of_nodes = result.value()[0]
-    assert number_of_nodes == 2
-    # error logged
-    assert "Entry with same id en:fake-meat already created" in caplog.text
-    assert "duplicate id in file at line 12" in caplog.text
-    assert "Node creation cancelled." in caplog.text
-    # and present on project
-    query = "MATCH (n:ERRORS) WHERE n.id = 'p_test_branch' RETURN n"
-    results = session.run(query).value()
-    node = results[0]
-    assert len(node["errors"]) == 1
-    error = node["errors"][0]
-    assert "Entry with same id en:fake-meat already created" in error
-    assert "duplicate id in file at line 12" in error
-    assert "Node creation cancelled." in error
+    with neo4j.session() as session:
+        test_parser = parser.Parser(session)
+
+        taxonomy_txt = textwrap.dedent(
+            """
+        # a fake taxonomy
+        stopwords:fr: aux,au,de,le,du,la,a,et
+
+        # meat
+        en:meat
+
+        <en:meat
+        en:fake-meat
+
+        # duplicate
+        en:fake-meat
+        """
+        )
+        taxonomy_path = tmp_path / "test.txt"
+        taxonomy_path.open("w").write(taxonomy_txt)
+
+        # parse
+        with caplog.at_level(logging.ERROR):
+            test_parser(str(taxonomy_path), "branch", "test")
+
+        # only the 2 nodes imported, not the duplicate
+        query = "MATCH (n:p_test_branch:ENTRY) RETURN COUNT(*)"
+        result = session.run(query)
+        number_of_nodes = result.value()[0]
+        assert number_of_nodes == 2
+        # error logged
+        assert "Entry with same id en:fake-meat already created" in caplog.text
+        assert "duplicate id in file at line 12" in caplog.text
+        assert "Node creation cancelled." in caplog.text
+        # and present on project
+        query = "MATCH (n:ERRORS) WHERE n.id = 'p_test_branch' RETURN n"
+        results = session.run(query).value()
+        node = results[0]
+        assert len(node["errors"]) == 1
+        error = node["errors"][0]
+        assert "Entry with same id en:fake-meat already created" in error
+        assert "duplicate id in file at line 12" in error
+        assert "Node creation cancelled." in error
diff --git a/parser/tests/unit/test_parser_unit.py b/parser/tests/unit/test_parser_unit.py
index 25757b8d..86badc1b 100644
--- a/parser/tests/unit/test_parser_unit.py
+++ b/parser/tests/unit/test_parser_unit.py
@@ -1,44 +1,43 @@
 import pathlib
 
+import pytest
+
 from openfoodfacts_taxonomy_parser import normalizer, parser
 
 # taxonomy in text format : test.txt
 TEST_TAXONOMY_TXT = str(pathlib.Path(__file__).parent.parent / "data" / "test.txt")
 
 
-def test_normalized_filename(neo4j):
-    session = neo4j.session()
-
-    x = parser.Parser(session)
-    normalizer = x.normalized_filename
-    name = normalizer("test")
-    assert name == "test.txt"
-    name = normalizer("test.txt")
-    assert name == "test.txt"
-    name = normalizer("t")
-    assert name == "t.txt"
-    session.close()
+@pytest.mark.parametrize(
+    "filename, normalized_name",
+    [
+        ("test", "test.txt"),
+        ("test.txt", "test.txt"),
+        ("t", "t.txt"),
+    ],
+)
+def test_normalized_filename(filename: str, normalized_name: str):
+    taxonomy_parser = parser.TaxonomyParser()
+    assert taxonomy_parser._normalized_filename(filename) == normalized_name
 
 
 def test_fileiter(neo4j):
-    session = neo4j.session()
-    x = parser.Parser(session)
-    file = x.file_iter(TEST_TAXONOMY_TXT)
-
-    for counter, (_, line) in enumerate(file):
+    taxonomy_parser = parser.TaxonomyParser()
+    file_iterator = taxonomy_parser._file_iter(TEST_TAXONOMY_TXT)
+    for counter, (_, line) in enumerate(file_iterator):
         assert line == "" or line[0] == "#" or ":" in line
         if counter == 26:
             assert line == "carbon_footprint_fr_foodges_value:fr:10"
     assert counter == 37
-    session.close()
-
-
-def test_normalizing():
-    text = "Numéro #1, n°1 des ¾ des Français*"
-    text = normalizer.normalizing(text, "fr")
-    assert text == "numero-1-n-1-des-des-francais"
-    text = "Randôm Languäge wìth àccénts"
-    normal_text = normalizer.normalizing(text, "fr")
-    assert normal_text == "random-language-with-accents"
-    normal_text = normalizer.normalizing(text, "de")
-    assert normal_text == "randôm-languäge-wìth-àccénts"
+
+
+@pytest.mark.parametrize(
+    "text, normalized_text, lang",
+    [
+        ("Numéro #1, n°1 des ¾ des Français*", "numero-1-n-1-des-des-francais", "fr"),
+        ("Randôm Languäge wìth àccénts", "random-language-with-accents", "fr"),
+        ("Randôm Languäge wìth àccénts", "randôm-languäge-wìth-àccénts", "de"),
+    ],
+)
+def test_normalizing(text: str, normalized_text: str, lang: str):
+    assert normalizer.normalizing(text, lang) == normalized_text