Skip to content

Commit

Permalink
✨ Add get_page_iterator() method to S3 (#1033)
Browse files Browse the repository at this point in the history
* ✨ Added `get_page_iterator` function

* ✨ Added `boto3_session` and `operation_name` param

* ✨ Replaced `wr.s3` with `self.session.client`
  • Loading branch information
Rafalz13 authored Sep 13, 2024
1 parent 0b564ff commit 310a2fa
Showing 1 changed file with 31 additions and 2 deletions.
33 changes: 31 additions & 2 deletions src/viadot/sources/s3.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""A module for working with Amazon S3 as a data source."""

from collections.abc import Iterable
from collections.abc import Iterable, Iterator
import os
from pathlib import Path
from typing import Literal
from typing import Any, Literal


try:
Expand Down Expand Up @@ -310,3 +310,32 @@ def download(self, from_path: str, to_path: str) -> None:
to_path (str): Path to local file(s) to be stored.
"""
wr.s3.download(boto3_session=self.session, path=from_path, local_file=to_path)

def get_page_iterator(
self,
bucket_name: str,
directory_path: str,
operation_name: str = "list_objects_v2",
**kwargs,
) -> Iterator[dict[str, Any]]:
"""Returns an iterator to paginate through the objects in S3 bucket directory.
This method uses the S3 paginator to list objects under a specified directory
path in a given S3 bucket. It can accept additional optional parameters
through **kwargs, which will be passed to the paginator.
Args:
bucket_name (str): The name of the S3 bucket.
directory_path (str): The directory path (prefix) in the bucket to list
objects from.
operation_name (str): The operation name. This is the same name as
the method name on the client. Defaults as "list_objects_v2".
**kwargs: Additional arguments to pass to the paginator (optional).
Returns:
Iterator: An iterator to paginate through the S3 objects.
"""
client = self.session.client("s3")
paginator = client.get_paginator(operation_name=operation_name)

return paginator.paginate(Bucket=bucket_name, Prefix=directory_path, **kwargs)

0 comments on commit 310a2fa

Please sign in to comment.