Skip to content

Commit

Permalink
feat(joblib): add
Browse files Browse the repository at this point in the history
  • Loading branch information
Caceresenzo committed Aug 30, 2024
1 parent 540e9ac commit 39394bd
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ rw.write(df, "data.parquet")
| [Binary](./readwrite/handlers/binary.py) | `bin` | [Python's `bytes`](https://docs.python.org/3/library/stdtypes.html#bytes) | :heavy_check_mark: | :heavy_check_mark: |
| [Csv](./readwrite/handlers/csv.py) | `csv` | [`pandas`](https://pandas.pydata.org/) | :heavy_check_mark: | :heavy_check_mark: |
| [Excel](./readwrite/handlers/excel.py) | `xlsx` | [`pandas`](https://pandas.pydata.org/) | :heavy_check_mark: | :heavy_check_mark: |
| [Joblib](./readwrite/handlers/joblib.py) | `joblib` | [`joblib`](https://joblib.readthedocs.io/) | :heavy_check_mark: | :heavy_check_mark: |
| [Json](./readwrite/handlers/json.py) | `json` | [Python's `json`](https://docs.python.org/3/library/json.html) | :heavy_check_mark: | :heavy_check_mark: |
| [Parquet](./readwrite/handlers/parquet.py) | `parquet` | [`pandas`](https://pandas.pydata.org/) | :heavy_check_mark: | :heavy_check_mark: |
| [Pickle](./readwrite/handlers/pickle.py) | `pkl`, `pickle` | [Python's `pickle`](https://docs.python.org/3/library/pickle.html) or [`pandas`](https://pandas.pydata.org/) | :heavy_check_mark: | :heavy_check_mark: |
Expand Down
20 changes: 20 additions & 0 deletions readwrite/handlers/joblib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from .base import Handler


class JoblibHandler(Handler):

def __init__(self):
super().__init__(
"joblib",
["joblib"]
)

def read(self, path: str, **kwargs):
import joblib

return joblib.load(path, **kwargs)

def write(self, x, path, **kwargs):
import joblib

joblib.dump(x, path, **kwargs)
5 changes: 4 additions & 1 deletion readwrite/registry.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import typing

from .constants import LOGGER
from .handlers.base import Handler
from .utils import measure_duration


class UnknownExtension(ValueError):
def __init__(self, extension: str):
super().__init__(f"unknown extension: {extension}")
Expand Down Expand Up @@ -69,6 +69,9 @@ def add_defaults(self):
from .handlers.excel import ExcelHandler
self.add(ExcelHandler())

from .handlers.joblib import JoblibHandler
self.add(JoblibHandler())

from .handlers.json import JsonHandler
self.add(JsonHandler())

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ tqdm
pyyaml
toml
openpyxl
joblib
Binary file added tests/fixtures/hello.joblib
Binary file not shown.
19 changes: 19 additions & 0 deletions tests/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pickle
import unittest

import joblib
import pandas
import toml
import yaml
Expand Down Expand Up @@ -59,6 +60,24 @@ def test_write(self):
self.assertTrue(self.content.equals(pandas.read_csv(path)))


class HandlerJoblibTest(unittest.TestCase):

handler = registry.get("joblib")
content = {'hello': 'world', 'from': 42}

def test_read(self):
path = fixture_path("hello.joblib")
x = self.handler.read(path)

self.assertEqual(self.content, x)

def test_write(self):
path = "/tmp/hello.joblib"
self.handler.write(self.content, path)

self.assertEqual(self.content, joblib.load(path))


class HandlerExcelTest(unittest.TestCase):

handler = registry.get("xlsx")
Expand Down

0 comments on commit 39394bd

Please sign in to comment.