Skip to content

Commit

Permalink
[REF] Test allergens migration
Browse files Browse the repository at this point in the history
  • Loading branch information
legalsylvain committed Apr 9, 2024
1 parent f3d2c0b commit f81d879
Show file tree
Hide file tree
Showing 7 changed files with 259 additions and 131 deletions.
72 changes: 68 additions & 4 deletions product_food/demo/product_allergen.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,74 @@ License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
-->
<odoo>

<record id="allergen_peanut" model="product.allergen">
<field name="code">AR101</field>
<field name="name">Peanut</field>
<field name="website">https://en.wikipedia.org/wiki/Peanut_allergen_powder-dnfp</field>
<record id="allergen_ARA" model="product.allergen">
<field name="code">ARA</field>
<field name="name">Arachide</field>
</record>

<record id="allergen_CEL" model="product.allergen">
<field name="code">CEL</field>
<field name="name">Céleri</field>
</record>

<record id="allergen_CRU" model="product.allergen">
<field name="code">CRU</field>
<field name="name">Crustacés</field>
</record>

<record id="allergen_FAC" model="product.allergen">
<field name="code">FAC</field>
<field name="name">Fruits à coques (oléagineux)</field>
</record>

<record id="allergen_GLU" model="product.allergen">
<field name="code">GLU</field>
<field name="name">Gluten</field>
</record>

<record id="allergen_LAIT" model="product.allergen">
<field name="code">LAIT</field>
<field name="name">Lait</field>
</record>

<record id="allergen_LUP" model="product.allergen">
<field name="code">LUP</field>
<field name="name">Lupin</field>
</record>

<record id="allergen_MOL" model="product.allergen">
<field name="code">MOL</field>
<field name="name">Mollusques</field>
</record>

<record id="allergen_MOU" model="product.allergen">
<field name="code">MOU</field>
<field name="name">Moutarde</field>
</record>

<record id="allergen_POI" model="product.allergen">
<field name="code">POI</field>
<field name="name">Poissons</field>
</record>

<record id="allergen_SES" model="product.allergen">
<field name="code">SES</field>
<field name="name">Sésame</field>
</record>

<record id="allergen_SOJA" model="product.allergen">
<field name="code">SOJA</field>
<field name="name">Soja</field>
</record>

<record id="allergen_SUL" model="product.allergen">
<field name="code">SUL</field>
<field name="name">Sulfites</field>
</record>

<record id="allergen_OEUF" model="product.allergen">
<field name="code">ŒUF</field>
<field name="name">OEUF</field>
</record>

</odoo>
2 changes: 1 addition & 1 deletion product_food/demo/product_product.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
<field name="is_alimentary" eval="True" />
<field name="list_price">2.70</field>
<field name="allergen_ids" eval="[
(4, ref('allergen_peanut')),
(4, ref('allergen_ARA')),
]"/>
<field name="label_ids" eval="[
(4, ref('product_label.label_agriculture_biologique')),
Expand Down
135 changes: 9 additions & 126 deletions product_food/migrations/12.0.2.0.0/post-migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@


import logging
import re

from bs4 import BeautifulSoup
from openupgradelib import openupgrade
from psycopg2.extensions import AsIs
from unidecode import unidecode

from ...models.tools import get_allergen_data

_logger = logging.getLogger(__name__)

Expand All @@ -36,28 +35,16 @@ def _populate_allergen_ids(env, company, all_allergens):
results = env.cr.fetchall()
_logger.info(f"Found {len(results)} products...")
for (product_id, allergen_html) in results:
soup = BeautifulSoup(allergen_html, features="lxml")
allergen_text = unidecode(soup.get_text(strip=True).lower())
(
allergen_text,
allergens,
residual,
trace_allergens,
trace_residual,
) = get_allergen_data(env, allergen_html, all_allergens)
if not allergen_text:
continue

the_text = _clean_text(" " + allergen_text + " ")
if "==TRACE==" in the_text:
allergen_split = the_text.split("==TRACE==")
allergen_part_text = allergen_split[0]
allergen_trace_text = allergen_split[1]
if len(allergen_split) > 2:
_logger.info("WARNING, many 'trace' found")
else:
allergen_part_text = the_text
allergen_trace_text = ""

# Handle main allergens
allergens, residual = _find_allergens(env, allergen_part_text, all_allergens)
trace_allergens, trace_residual = _find_allergens(
env, allergen_trace_text, all_allergens
)

if allergens or trace_allergens:
vals = {}
product = (
Expand Down Expand Up @@ -87,110 +74,6 @@ def _populate_allergen_ids(env, company, all_allergens):
_logger.warning(f"Found {len(v)} time the word '{k}' ...")


def _find_allergens(env, allergen_text, all_allergens):
reg = r"-([Œ|\w]+)-"
code_list = re.findall(reg, allergen_text)
allergens = all_allergens.filtered(lambda x: x.code in code_list)
residual = re.sub(reg, "", allergen_text).strip()
return allergens, residual


def _clean_text(allergen_text):
result = allergen_text
for replace in _REPLACE:
if type(replace) is list or type(replace) is tuple:
result = re.sub(replace[0], replace[1], result)
else:
result = re.sub(replace, " ", result)
return result


_REPLACE = [
# Make 'trace' that is THE split
(r"traces?", "==TRACE=="),
# removal of punctuation
r"\.",
r"\,",
r"\/",
# ###############
# ARACHIDE
# ###############
(r"arachides?", "-ARA-"),
(r"cacahuetes?", "-ARA-"),
# ###############
# FRUITS A COQUES
# ###############
(r"fruits? a coques?", "-FAC-"),
(r"amandes?", "-FAC-"),
(r"noisettes?", "-FAC-"),
(r"(noix de )?cajou", "-FAC-"),
(r"noix de pecan", "-FAC-"),
(r"noix de macadamia", "-FAC-"),
(r"noix du bresil", "-FAC-"),
(r"noix du queensland", "-FAC-"),
(r"pistaches?", "-FAC-"),
(r"noix", "-FAC-"),
# ###############
# GLUTEN
# ###############
(r"gluten", "-GLU-"),
(r"\s(gluten de)?ble( dur)?\s", " -GLU- "),
(r"seigle", "-GLU-"),
(r"orge", "-GLU-"),
(r"avoine", "-GLU-"),
(r"epeautre", "-GLU-"),
(r"kamut", "-GLU-"),
(r"cereales?", "-GLU-"),
# ###############
# LAIT
# ###############
(r"lait", "-LAIT-"),
(r"beurre", "-LAIT-"),
(r"lactose", "-LAIT-"),
# ###############
# POISSONS
# ###############
(r"poissons?", "-POI-"),
(r"thon", "-POI-"),
# ###############
# OTHER ALLERGENS
# ###############
(r"celeri(-rave)?", "-CEL-"),
(r"crustaces?", "-CRU-"),
(r"lupin", "-LUP-"),
(r"(graines? de )?moutarde", "-MOU-"),
(r"mollusques?", "-MOL-"),
(r"oeufs?", "-ŒUF-"),
(r"(graines? de )?sesame?", "-SES-"),
(r"soja", "-SOJ-"),
(r"sulfites?", "-SUL-"),
# Remove little words
r"\sl'",
r"\sd'",
r"\sles?\s",
r"\sla\s",
r"\sdes?\s",
r"\sdu\s",
r"\sune?\s",
r"\set\s",
r"\sou\s",
# Remove recurring words
r"\speut\s",
r"\spresence\s",
r"\scontenir\s",
r"\scontient\s",
r"\scontenant\s",
r"\sfabrique\s",
r"\sdans\s",
r"\satelier\s",
r"\squi\s",
r"\sutilise\s",
r"\spossibles?\s",
r"\seventuelles?\s",
r"\sautres?\s",
]


@openupgrade.migrate()
def migrate(env, version):
all_allergens = env["product.allergen"].search([])
Expand Down
1 change: 1 addition & 0 deletions product_food/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
from . import product_category
from . import product_product
from . import product_template
from . import tools
136 changes: 136 additions & 0 deletions product_food/models/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import logging
import re

from bs4 import BeautifulSoup
from unidecode import unidecode

_logger = logging.getLogger(__name__)


def get_allergen_data(env, allergen_html, all_allergens):
soup = BeautifulSoup(allergen_html, features="lxml")
allergen_text = unidecode(soup.get_text(strip=True).lower())
if not allergen_text:
return False, False, False, False, False

the_text = _clean_text(" " + allergen_text + " ")
if "==TRACE==" in the_text:
allergen_split = the_text.split("==TRACE==")
allergen_part_text = allergen_split[0]
allergen_trace_text = allergen_split[1]
if len(allergen_split) > 2:
_logger.info("WARNING, many 'trace' found")
else:
allergen_part_text = the_text
allergen_trace_text = ""

# Handle main allergens
allergens, residual = _find_allergens(env, allergen_part_text, all_allergens)
trace_allergens, trace_residual = _find_allergens(
env, allergen_trace_text, all_allergens
)
return allergen_text, allergens, residual, trace_allergens, trace_residual


def _find_allergens(env, allergen_text, all_allergens):
reg = r"-([Œ|\w]+)-"
code_list = re.findall(reg, allergen_text)
allergens = all_allergens.filtered(lambda x: x.code in code_list)
residual = re.sub(reg, "", allergen_text).strip()
return allergens, residual


def _clean_text(allergen_text):
result = allergen_text
for replace in _REPLACE:
if type(replace) is list or type(replace) is tuple:
result = re.sub(replace[0], replace[1], result)
else:
result = re.sub(replace, " ", result)
return result


_REPLACE = [
# Make 'trace' that is THE split
(r"traces?", "==TRACE=="),
# removal of punctuation
r"\.",
r"\,",
r"\/",
# ###############
# ARACHIDE
# ###############
(r"arachides?", "-ARA-"),
(r"cacahuetes?", "-ARA-"),
# ###############
# FRUITS A COQUES
# ###############
(r"fruits? a coques?", "-FAC-"),
(r"amandes?", "-FAC-"),
(r"noisettes?", "-FAC-"),
(r"(noix de )?cajou", "-FAC-"),
(r"noix de pecan", "-FAC-"),
(r"noix de macadamia", "-FAC-"),
(r"noix du bresil", "-FAC-"),
(r"noix du queensland", "-FAC-"),
(r"pistaches?", "-FAC-"),
(r"noix", "-FAC-"),
# ###############
# GLUTEN
# ###############
(r"gluten", "-GLU-"),
(r"\s(gluten de)?ble( dur)?\s", " -GLU- "),
(r"seigle", "-GLU-"),
(r"orge", "-GLU-"),
(r"avoine", "-GLU-"),
(r"epeautre", "-GLU-"),
(r"kamut", "-GLU-"),
(r"cereales?", "-GLU-"),
# ###############
# LAIT
# ###############
(r"lait", "-LAIT-"),
(r"beurre", "-LAIT-"),
(r"lactose", "-LAIT-"),
# ###############
# POISSONS
# ###############
(r"poissons?", "-POI-"),
(r"thon", "-POI-"),
# ###############
# OTHER ALLERGENS
# ###############
(r"celeri(-rave)?", "-CEL-"),
(r"crustaces?", "-CRU-"),
(r"lupin", "-LUP-"),
(r"(graines? de )?moutarde", "-MOU-"),
(r"mollusques?", "-MOL-"),
(r"oeufs?", "-ŒUF-"),
(r"(graines? de )?sesame?", "-SES-"),
(r"soja", "-SOJA-"),
(r"sulfites?", "-SUL-"),
# Remove little words
r"\sl'",
r"\sd'",
r"\sles?\s",
r"\sla\s",
r"\sdes?\s",
r"\sdu\s",
r"\sune?\s",
r"\set\s",
r"\sou\s",
# Remove recurring words
r"\speut\s",
r"\spresence\s",
r"\scontenir\s",
r"\scontient\s",
r"\scontenant\s",
r"\sfabrique\s",
r"\sdans\s",
r"\satelier\s",
r"\squi\s",
r"\sutilise\s",
r"\spossibles?\s",
r"\seventuelles?\s",
r"\sautres?\s",
]
1 change: 1 addition & 0 deletions product_food/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from . import test_module
from . import test_migration
Loading

0 comments on commit f81d879

Please sign in to comment.