Skip to content

Commit

Permalink
Migrated from pydantic==1.10.15 to pydantic>=2.10.5
Browse files Browse the repository at this point in the history
  • Loading branch information
adivekar-utexas committed Jan 23, 2025
1 parent 9baad03 commit 320a55c
Show file tree
Hide file tree
Showing 36 changed files with 252 additions and 274 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ __pycache__/
/doc/_apidoc/
*.swp

.vscode/settings.json
.vscode/settings.json
Test-bears.ipynb
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ dependencies = [
"urllib3",
"pandas==2.*",
"numpy",
"pydantic==1.10.15",
"pydantic>=2.10.5",
"xlrd",
"XlsxWriter",
"openpyxl",
Expand Down
36 changes: 20 additions & 16 deletions src/bears/FileMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import *

import requests
from pydantic import constr, root_validator
from pydantic import constr, model_validator

from bears.constants import (
FILE_ENDING_TO_FILE_FORMAT_MAP,
Expand All @@ -22,42 +22,45 @@


class FileMetadata(Parameters):
name: Optional[constr(min_length=1, max_length=63, strip_whitespace=True)]
name: Optional[constr(min_length=1, max_length=63, strip_whitespace=True)] = None
path: Union[constr(min_length=1, max_length=1023), Any]
storage: Optional[Storage]
format: Optional[FileFormat]
contents: Optional[FileContents]
file_glob: Optional[str]
data_schema: Optional[MLTypeSchema]
storage: Optional[Storage] = None
format: Optional[FileFormat] = None
contents: Optional[FileContents] = None
file_glob: Optional[str] = None
data_schema: Optional[MLTypeSchema] = None

@classmethod
def of(cls, path: Union[io.IOBase, FileMetadata, Dict, str], **kwargs) -> "FileMetadata":
if isinstance(path, FileMetadata):
path: Dict = path.dict(exclude=None)
path: Dict = path.model_dump(exclude=None)
elif isinstance(path, (str, pathlib.Path)):
path: Dict = dict(path=str(path))
elif isinstance(path, io.IOBase):
path: Dict = dict(path=path)
assert isinstance(path, dict)
path: Dict = {**path, **kwargs}
return FileMetadata(**path)
params: Dict = {**path, **kwargs}
return cls(**params)

@root_validator(pre=True)
def set_params(cls, params: Dict):
@model_validator(mode="before")
@classmethod
def _set_params(cls, params: Dict):
Alias.set_format(params)
if params.get("path") is None:
raise ValueError("'path' must be provided.")
if isinstance(params["path"], pathlib.Path):
params["path"]: str = str(params["path"])
if isinstance(params["path"], str) and params["path"].startswith("~"):
params["path"]: str = FileSystemUtil.expand_dir(params["path"])

if "storage" not in params:
if params.get("storage") is None:
params["storage"]: Storage = cls.detect_storage(params["path"])
if params["storage"] is Storage.STREAM:
raise ValueError("Storage cannot be a stream.")
elif params["storage"] is Storage.LOCAL_FILE_SYSTEM:
params["path"]: str = FileSystemUtil.expand_dir(params["path"])

if "format" not in params:
if params.get("format") is None:
format: Optional[FileFormat] = cls.detect_file_format(params["path"], raise_error=False)
if format is not None:
params["format"] = format
Expand All @@ -69,7 +72,8 @@ def is_remote_storage(self, remote_storages: Tuple[Storage, ...] = tuple(REMOTE_
@classmethod
@safe_validate_arguments
def detect_storage(
cls, path: Union[io.IOBase, constr(min_length=1, max_length=1023)]
cls,
path: Union[io.IOBase, constr(min_length=1, max_length=1023)],
) -> Optional[Storage]:
if isinstance(path, io.IOBase) and hasattr(path, "read"):
return Storage.STREAM
Expand Down Expand Up @@ -239,7 +243,7 @@ def subdir_in_dir(
return self.path
subdir_path: str = self.path_in_dir(path, is_dir=True, **kwargs)
if mkdir:
FileMetadata(path=subdir_path, **self.dict(exclude={"path"})).mkdir(raise_error=raise_error)
FileMetadata(path=subdir_path, **self.model_dump(exclude={"path"})).mkdir(raise_error=raise_error)
if return_metadata:
return self.update_params(path=subdir_path)
return subdir_path
Expand Down
5 changes: 4 additions & 1 deletion src/bears/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@
from bears.FileMetadata import FileMetadata
from bears.core.frame import ScalableDataFrame, ScalableSeries
from bears.reader import Reader
from bears.writer import Writer
from bears.writer import Writer

to_sdf = ScalableDataFrame.of
to_ss = ScalableSeries.of
10 changes: 5 additions & 5 deletions src/bears/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from typing import *

import numpy as np
from pydantic import conint, root_validator
from pydantic.typing import Literal
from pydantic import conint, model_validator

from bears.constants import (
AVAILABLE_TENSOR_TYPES,
Expand All @@ -24,7 +23,8 @@ class Asset(Parameters, Registry, ABC):
data: Any
layout: DataLayout

@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def validate_params(cls, params: Dict) -> Dict:
params["layout"]: DataLayout = cls.detect_layout(params["data"])
return params
Expand Down Expand Up @@ -119,7 +119,7 @@ def to_channels_first(self) -> Asset:
return Image(
data=img,
channels="first",
**self.dict(exclude={"data", "channels"}),
**self.model_dump(exclude={"data", "channels"}),
)

def to_channels_last(self) -> Asset:
Expand All @@ -135,7 +135,7 @@ def to_channels_last(self) -> Asset:
return Image(
data=img,
channels="last",
**self.dict(exclude={"data", "channels"}),
**self.model_dump(exclude={"data", "channels"}),
)


Expand Down
2 changes: 1 addition & 1 deletion src/bears/constants/_FileConstants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class FileFormat(AutoEnum):
## Config:
## Configs:
YAML = auto()
JSON = auto()
## Dataframe:
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/DaskScalableDataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import numpy as np
import pandas as pd
from pydantic import conint
from pydantic.typing import Literal

from bears.constants import DataLayout, Parallelize
from bears.core.frame.DaskScalableSeries import DaskScalableSeries
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/DaskScalableSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import numpy as np
import pandas as pd
from pydantic.typing import Literal

from bears.constants import DataLayout
from bears.core.frame.ScalableDataFrame import ScalableDataFrame
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/DatumScalableSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import pandas as pd
from pydantic import conint
from pydantic.typing import Literal

from bears.constants import DataLayout
from bears.core.frame.ScalableDataFrame import ScalableDataFrame
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/DictScalableDataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import numpy as np
import pandas as pd
from pydantic.typing import Literal

from bears.constants import DataLayout
from bears.core.frame.NumpyArrayScalableSeries import NumpyArrayScalableSeries
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/ListOfDictScalableDataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import numpy as np
import pandas as pd
from pydantic.typing import Literal

from bears.constants import DataLayout
from bears.core.frame import RAW_DATA_MEMBER, ScalableDataFrame
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/NumpyArrayScalableSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import pandas as pd
from pydantic import conint
from pydantic.typing import Literal

from bears.constants import DataLayout
from bears.core.frame.ScalableDataFrame import ScalableDataFrame
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/RecordScalableDataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from bears.constants import DataLayout, Parallelize
from bears.util import String, any_are_not_none, as_list, safe_validate_arguments
from pydantic import conint
from pydantic.typing import Literal

from bears.core.frame.DatumScalableSeries import DatumScalableSeries
from bears.core.frame.ScalableDataFrame import ScalableDataFrame, ScalableDataFrameOrRaw
Expand Down
12 changes: 6 additions & 6 deletions src/bears/core/frame/ScalableDataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@

import numpy as np
import pandas as pd
from pydantic import conint, constr, root_validator
from pydantic.typing import Literal
from pydantic import conint, constr, model_validator

from bears.constants import (
LAZY_SDF_DATA_LAYOUTS,
Expand Down Expand Up @@ -506,7 +505,7 @@ def _stream_chunks(
might mean we drop a negligible number of rows, which should not affect the overall training procuedure.
:return: yield a single smaller ScalableDataFrame.
"""
## TODO: implement chunk_size: Optional[Union[conint(ge=1), constr(regex=String.FILE_SIZE_REGEX)]] = None
## TODO: implement chunk_size: Optional[Union[conint(ge=1), constr(pattern=String.FILE_SIZE_REGEX)]] = None
## docstring for chunk_size: maximum size of each ScalableDataFrame in bytes (int) or string (e.g. "10MB").
try:
mapped_sdf_chunks: Deque[Dict[str, Union[int, Future]]] = deque()
Expand Down Expand Up @@ -2012,8 +2011,7 @@ def to_npz(self, path, storage, **kwargs):
ScalableDataFrameOrRaw = Union[ScalableDataFrame, ScalableDataFrameRawType]
ScalableOrRaw = Union[ScalableSeriesOrRaw, ScalableDataFrameOrRaw]

to_sdf: Callable = ScalableDataFrame.of
to_ss: Callable = ScalableSeries.of
CompressedScalableDataFrame = "CompressedScalableDataFrame"


class CompressedScalableDataFrame(Parameters):
Expand All @@ -2022,8 +2020,10 @@ class CompressedScalableDataFrame(Parameters):
layout: DataLayout
base64_encoding: bool = False

@root_validator(pre=False)
@model_validator(mode="before")
@classmethod
def _set_params(cls, params: Dict) -> Dict:
cls.set_param_default_values(params)
if params["base64_encoding"] is False and not isinstance(params["payload"], bytes):
raise ValueError(
f"Must pass a bytes `payload` when passing `base64_encoding=False`; "
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/ScalableSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import pandas as pd
from pydantic import conint
from pydantic.typing import Literal

from bears.constants import (
SHORTHAND_TO_TENSOR_LAYOUT_MAP,
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/TensorScalableSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import *

import pandas as pd
from pydantic.typing import Literal

from bears.core.frame.ScalableSeries import SS_DEFAULT_NAME, ScalableSeries
from bears.util import get_default, is_function, wrap_fn_output
Expand Down
1 change: 0 additions & 1 deletion src/bears/core/frame/TorchScalableSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import numpy as np
import pandas as pd
from pydantic import conint
from pydantic.typing import Literal

from bears.constants import DataLayout
from bears.core.frame.NumpyArrayScalableSeries import NumpyArrayScalableSeries
Expand Down
17 changes: 8 additions & 9 deletions src/bears/reader/Reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import *

import numpy as np
from pydantic import Extra, Field, confloat, conint, constr, root_validator
from pydantic import ConfigDict, Field, confloat, conint, constr, model_validator

from bears.constants import FILE_FORMAT_TO_FILE_ENDING_MAP, FileContents, FileFormat, MLTypeSchema, Storage
from bears.FileMetadata import FileMetadata
Expand Down Expand Up @@ -41,23 +41,22 @@ class Reader(Parameters, Registry, ABC):
retry_wait: confloat(ge=0.0) = 5.0
shuffled_multi_read: bool = True

class Config(Parameters.Config):
extra = Extra.ignore
model_config = ConfigDict(extra="ignore")

class Params(Parameters):
"""
BaseModel for parameters. Expected to be overridden by subclasses.
"""

class Config(Parameters.Config):
## Allow extra keyword parameters to be used when initializing the class.
## These will be forwarded to the respective reader method like .read_csv, .read_json, etc.
extra = Extra.allow
## Allow extra keyword parameters to be used when initializing the class.
## These will be forwarded to the respective reader method like .read_csv, .read_json, etc.
model_config = ConfigDict(extra="ignore")

params: Params = Field(default_factory=Params)
filter_kwargs: bool = True

@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def convert_params(cls, params: Dict) -> Dict:
Alias.set_retry(params)
params["params"] = cls._convert_params(cls.Params, params.get("params"))
Expand All @@ -69,7 +68,7 @@ def _registry_keys(cls) -> Optional[Union[List[Any], Any]]:

def filtered_params(self, *reader_fn: Union[Callable, Tuple[Callable, ...]]) -> Dict:
filtered_params: Dict[str, Any] = {
**self.params.dict(),
**self.params.model_dump(),
}
if self.filter_kwargs:
filtered_params: Dict[str, Any] = filter_kwargs(reader_fn, **filtered_params)
Expand Down
3 changes: 1 addition & 2 deletions src/bears/reader/asset/audio/TorchAudioReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import numpy as np
from pydantic import constr
from pydantic.typing import Literal

from bears.constants import FileContents, FileFormat, Storage
from bears.reader.asset.audio.AudioReader import AudioReader
Expand Down Expand Up @@ -38,7 +37,7 @@ def _read_image(
source: io.BytesIO = io.BytesIO(S3Util.stream_s3_object(source).read())
img: np.ndarray = iio.imread(
source,
**self.params.dict(),
**self.params.model_dump(),
)
if not postprocess:
return img
Expand Down
5 changes: 2 additions & 3 deletions src/bears/reader/asset/image/ImageIOReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import numpy as np
from pydantic import constr
from pydantic.typing import Literal

from bears.constants import FileContents, FileFormat, Storage
from bears.asset import Image
Expand Down Expand Up @@ -39,7 +38,7 @@ def _read_image(
source: io.BytesIO = io.BytesIO(S3Util.stream_s3_object(source).read())
img: np.ndarray = iio.imread(
source,
**self.params.dict(),
**self.params.model_dump(),
)
return Image(
path=source if storage in {Storage.S3, Storage.LOCAL_FILE_SYSTEM} else None,
Expand Down Expand Up @@ -70,7 +69,7 @@ def _read_image(
source: io.BytesIO = io.BytesIO(S3Util.stream_s3_object(source).read())
img: np.ndarray = iio.imread(
source,
**self.params.dict(),
**self.params.model_dump(),
)
if self.channels == "first":
img: np.ndarray = np.moveaxis(img, -1, 0)
Expand Down
1 change: 0 additions & 1 deletion src/bears/reader/asset/image/ImageReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from abc import ABC, abstractmethod
from typing import *

from pydantic.typing import Literal

from bears.constants import FileContents, MLType, Storage
from bears.asset import Image
Expand Down
Loading

0 comments on commit 320a55c

Please sign in to comment.