Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IMP] start extraction of chunk processing \o/ #80

Open
wants to merge 10 commits into
base: 14.0
Choose a base branch
from
2 changes: 2 additions & 0 deletions chunk_processing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from . import models
from . import components
28 changes: 28 additions & 0 deletions chunk_processing/__manifest__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2021 Akretion (https://www.akretion.com).
# @author Sébastien BEAU <[email protected]>
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).


{
"name": "Chunk Processing",
"summary": "Base module for processing chunk",
"version": "14.0.1.0.0",
"category": "Uncategorized",
"website": "https://github.com/shopinvader/pattern-import-export",
"author": " Akretion",
"license": "AGPL-3",
"application": False,
"installable": True,
"external_dependencies": {
"python": [],
"bin": [],
},
"depends": [
"queue_job",
"component",
],
"data": [
"views/chunk_item_view.xml",
],
"demo": [],
}
3 changes: 3 additions & 0 deletions chunk_processing/components/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from . import processor
from . import splitter
from . import splitter_json
14 changes: 14 additions & 0 deletions chunk_processing/components/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2021 Akretion (https://www.akretion.com).
# @author Sébastien BEAU <[email protected]>
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).


from odoo.addons.component.core import AbstractComponent


class ChunkProcessor(AbstractComponent):
_name = "chunk.processor"
_collection = "chunk.item"

def run(self):
raise NotImplementedError
50 changes: 50 additions & 0 deletions chunk_processing/components/splitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2021 Akretion (https://www.akretion.com).
# @author Sébastien BEAU <[email protected]>
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).

from odoo.addons.component.core import AbstractComponent


class ChunkSplitter(AbstractComponent):
_name = "chunk.splitter"
_collection = "chunk.group"

def _parse_data(self, data):
raise NotImplementedError

def _prepare_chunk(self, start_idx, stop_idx, data):
return {
"start_idx": start_idx,
"stop_idx": stop_idx,
"data": data,
"nbr_item": len(data),
"state": "pending",
"group_id": self.collection.id,
}

def _should_create_chunk(self, items, next_item):
"""Customise this code if you want to add some additionnal
item after reaching the limit"""
return len(items) > self.collection.chunk_size

def _create_chunk(self, start_idx, stop_idx, data):
vals = self._prepare_chunk(start_idx, stop_idx, data)
chunk = self.env["chunk.item"].create(vals)
# we enqueue the chunk in case of multi process of if it's the first chunk
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this comment gives no new info, is equivalent to reading the next line

if self.collection.process_multi or len(self.collection.item_ids) == 1:
chunk.with_delay(priority=self.collection.job_priority).run()
return chunk

def run(self, data):
items = []
start_idx = 1
previous_idx = None
for idx, item in self._parse_data(data):
if self._should_create_chunk(items, item):
self._create_chunk(start_idx, previous_idx, items)
items = []
start_idx = idx
items.append((idx, item))
previous_idx = idx
if items:
self._create_chunk(start_idx, idx, items)
18 changes: 18 additions & 0 deletions chunk_processing/components/splitter_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2021 Akretion (https://www.akretion.com).
# @author Sébastien BEAU <[email protected]>
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).

import json

from odoo.addons.component.core import Component


class ChunkSplitterJson(Component):
_inherit = "chunk.splitter"
_name = "chunk.splitter.json"
_usage = "json"

def _parse_data(self, data):
items = json.loads(data.decode("utf-8"))
for idx, item in enumerate(items):
yield idx + 1, item
2 changes: 2 additions & 0 deletions chunk_processing/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from . import chunk_item
from . import chunk_group
69 changes: 69 additions & 0 deletions chunk_processing/models/chunk_group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2021 Akretion (https://www.akretion.com).
# @author Sébastien BEAU <[email protected]>
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).


from odoo import _, api, fields, models


class ChunkGroup(models.Model):
_inherit = "collection.base"
_name = "chunk.group"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the word "batch" is very standard and expressive for this kind of purpose, instead of "group"


item_ids = fields.One2many("chunk.item", "group_id", "Item")
process_multi = fields.Boolean()
job_priority = fields.Integer(default=20)
chunk_size = fields.Integer(default=500, help="Define the size of the chunk")
progress = fields.Float(compute="_compute_stat")
date_done = fields.Datetime()
data_format = fields.Selection(
[
("json", "Json"),
("xml", "XML"),
]
)
state = fields.Selection(
[("pending", "Pending"), ("failed", "Failed"), ("done", "Done")],
default="pending",
)
info = fields.Char()
nbr_error = fields.Integer(compute="_compute_stat")
nbr_success = fields.Integer(compute="_compute_stat")
apply_on_model = fields.Char()
usage = fields.Char()

@api.depends("item_ids.nbr_error", "item_ids.nbr_success")
def _compute_stat(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpicking: stats

for record in self:
record.nbr_error = sum(record.mapped("item_ids.nbr_error"))
record.nbr_success = sum(record.mapped("item_ids.nbr_success"))
todo = sum(record.mapped("item_ids.nbr_item"))
if todo:
record.progress = (record.nbr_error + record.nbr_success) * 100.0 / todo
else:
record.progress = 0

def _get_data(self):
raise NotImplementedError

def split_in_chunk(self):
"""Split Group into Chunk"""
# purge chunk in case of retring a job
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

retrying

self.item_ids.unlink()
try:
data = self._get_data()
with self.work_on(self._name) as work:
splitter = work.component(usage=self.data_format)
splitter.run(data)
except Exception as e:
self.state = "failed"
self.info = _("Failed to create the chunk: %s") % e
return True

def set_done(self):
for record in self:
if record.nbr_error:
record.state = "failed"
else:
record.state = "done"
record.date_done = fields.Datetime.now()
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
from odoo import fields, models


class PatternChunk(models.Model):
_name = "pattern.chunk"
_description = "Pattern Chunk"
class ChunkItem(models.Model):
_inherit = "collection.base"
_name = "chunk.item"
_description = "Chunk Item"
_order = "start_idx"
_rec_name = "start_idx"

pattern_file_id = fields.Many2one(
"pattern.file", "Pattern File", required=True, ondelete="cascade"
group_id = fields.Many2one(
"chunk.group", "Chunk Group", required=True, ondelete="cascade"
)
start_idx = fields.Integer()
stop_idx = fields.Integer()
Expand All @@ -32,33 +33,31 @@ class PatternChunk(models.Model):
]
)

def run_import(self):
model = self.pattern_file_id.pattern_config_id.model_id.model
res = (
self.with_context(pattern_config={"model": model, "record_ids": []})
.env[model]
.load([], self.data)
)
self.write(self._prepare_chunk_result(res))
config = self.pattern_file_id.pattern_config_id
priority = config.job_priority
if not config.process_multi:
def manual_run(self):
""" Run the import without try/except, easier for debug """
return self._run()

def _run(self):
with self.work_on(self.group_id.apply_on_model) as work:
processor = work.component(usage=self.group_id.usage)
processor.run()
if not self.group_id.process_multi:
next_chunk = self.get_next_chunk()
if next_chunk:
next_chunk.with_delay(priority=priority).run()
next_chunk.with_delay(priority=self.group_id.job_priority).run()
else:
self.with_delay(priority=5).check_last()
else:
self.with_delay(priority=5).check_last()

def run(self):
"""Process Import of Pattern Chunk"""
"""Process Chunk Item in a savepoint"""
cr = self.env.cr
try:
self.state = "started"
cr.commit() # pylint: disable=invalid-commit
with cr.savepoint():
self.run_import()
self._run()
except Exception as e:
self.write(
{
Expand All @@ -70,6 +69,7 @@ def run(self):
self.with_delay().check_last()
return "OK"

# TODO move this in pattern-import
def _prepare_chunk_result(self, res):
# TODO rework this part and add specific test case
nbr_error = len(res["messages"])
Expand Down Expand Up @@ -98,23 +98,19 @@ def _prepare_chunk_result(self, res):
}

def get_next_chunk(self):
return self.search(
[
("pattern_file_id", "=", self.pattern_file_id.id),
("state", "=", "pending"),
],
limit=1,
return fields.first(
self.group_id.item_ids.filtered(lambda s: s.state == "pending")
)

def is_last_job(self):
return not self.pattern_file_id.chunk_ids.filtered(
return not self.group_id.item_ids.filtered(
lambda s: s.state in ("pending", "started")
)

def check_last(self):
"""Check if all chunk have been processed"""
if self.is_last_job():
self.pattern_file_id.set_import_done()
return "Pattern file is done"
self.group_id.set_done()
return "Chunk group is done"
else:
return "There is still some running chunk"
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<?xml version="1.0" encoding="UTF-8" ?>
<odoo>

<record id="pattern_chunk_view_tree" model="ir.ui.view">
<field name="model">pattern.chunk</field>
<record id="chunk_item_view_tree" model="ir.ui.view">
<field name="model">chunk.item</field>
<field name="arch" type="xml">
<tree string="Chunk">
<field name="start_idx" />
Expand All @@ -14,12 +14,12 @@
</field>
</record>

<record id="pattern_chunk_view_form" model="ir.ui.view">
<field name="model">pattern.chunk</field>
<record id="chunk_item_view_form" model="ir.ui.view">
<field name="model">chunk.item</field>
<field name="arch" type="xml">
<form string="Chunk">
<header>
<button name="run_import" type="object" string="Force manual run" />
<button name="manual_run" type="object" string="Force manual run" />
</header>
<sheet>
<group>
Expand Down
1 change: 1 addition & 0 deletions pattern_import_export/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from . import models
from . import wizard
from . import components
4 changes: 2 additions & 2 deletions pattern_import_export/__manifest__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"web_notify",
"base_sparse_field_list_support",
"base_sparse_field",
"chunk_processing",
],
"data": [
"security/pattern_security.xml",
Expand All @@ -23,11 +24,10 @@
"wizard/import_pattern_wizard.xml",
"views/pattern_config.xml",
"views/pattern_file.xml",
"views/pattern_chunk.xml",
"views/menuitems.xml",
"views/templates.xml",
"data/queue_job_channel_data.xml",
"data/queue_job_function_data.xml",
# "data/queue_job_function_data.xml",
],
"demo": ["demo/demo.xml"],
"installable": True,
Expand Down
1 change: 1 addition & 0 deletions pattern_import_export/components/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import processor
47 changes: 47 additions & 0 deletions pattern_import_export/components/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright 2021 Akretion (https://www.akretion.com).
# @author Sébastien BEAU <[email protected]>
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).

from odoo.addons.component.core import Component


class ChunkProcessorPattern(Component):
_inherit = "chunk.processor"
_name = "chunk.processor.pattern"
_usage = "pattern.import"

def run(self):
model = self.collection.group_id.apply_on_model
res = (
self.env[model]
.with_context(pattern_config={"model": model, "record_ids": []})
.load([], self.collection.data)
)
self.collection.write(self._prepare_chunk_result(res))

def _prepare_chunk_result(self, res):
# TODO rework this part and add specific test case
nbr_error = len(res["messages"])
nbr_success = max(self.collection.nbr_item - nbr_error, 0)

# case where error are not return and record are not imported
nbr_imported = len(res.get("ids") or [])
if nbr_success > nbr_imported:
nbr_success = nbr_imported
nbr_error = self.collection.nbr_item - nbr_imported

if nbr_error:
state = "failed"
else:
state = "done"
result = self.env["ir.qweb"]._render(
"pattern_import_export.format_message", res
)
return {
"record_ids": res.get("ids"),
"messages": res.get("messages"),
"result_info": result,
"state": state,
"nbr_success": nbr_success,
"nbr_error": nbr_error,
}
Loading