Add video models + functions #814

dreadatour · 2025-01-13T16:58:56Z

TODO:

Add tests
Update examples (add to https://github.com/iterative/datachain-examples) with all usages
Finish Studio PR for video/clip/frame preview

Video models added

class VideoFile(File):
    """`DataModel` for reading video files."""


class VideoClip(VideoFile):
    """`DataModel` for reading video clips."""

    start_time: float
    end_time: float


class VideoFrame(VideoFile):
    """`DataModel` for reading video frames."""

    frame: int
    timestamp: float

Meta models added

class ImageMeta(DataModel):
    """`DataModel` for image file meta information."""

    width: int
    height: int
    format: str


class VideoMeta(DataModel):
    """`DataModel` for video file meta information."""

    width: int
    height: int
    fps: float
    duration: float
    frames_count: int
    codec: str


class VideoFrameMeta(DataModel):
    """`DataModel` for video frame image meta information."""

    frame: int
    timestamp: float
    width: int
    height: int
    format: str

Couple usage examples

Listing

from datachain import DataChain

ds = DataChain.from_storage("./src", type="video").save("videos")
ds.show(3)

$ python 01-index.py
                                                file                 file       file    file                   file      file                      file     file
                                              source                 path       size version                   etag is_latest             last_modified location
0  file:///Users/vlad/work/iterative/playground/v...  age_16_IMG_3341.MOV  280685520          0x1.9bc2127c00000p+30         1 2024-09-22 20:57:03+00:00     None
1  file:///Users/vlad/work/iterative/playground/v...         yura_big.mp4   32482027          0x1.9bc1de3400000p+30         1 2024-09-22 20:01:17+00:00     None
2  file:///Users/vlad/work/iterative/playground/v...         IMG_6648.mov  404354596          0x1.9bc220c800000p+30         1 2024-09-22 21:12:18+00:00     None

[Limited by 3 rows]

Add meta

from datachain import DataChain
from datachain.lib.video import video_meta

ds = DataChain.from_dataset("videos").map(meta=video_meta).save("videos-meta")
ds.show(3)

$ python 02-meta.py
                                                file                 file       file    file                   file      file                      file     file  meta   meta  \
                                              source                 path       size version                   etag is_latest             last_modified location width height
0  file:///Users/vlad/work/iterative/playground/v...  age_16_IMG_3341.MOV  280685520          0x1.9bc2127c00000p+30         1 2024-09-22 20:57:03+00:00     None  1080   1920
1  file:///Users/vlad/work/iterative/playground/v...         yura_big.mp4   32482027          0x1.9bc1de3400000p+30         1 2024-09-22 20:01:17+00:00     None   848    480
2  file:///Users/vlad/work/iterative/playground/v...         IMG_6648.mov  404354596          0x1.9bc220c800000p+30         1 2024-09-22 21:12:18+00:00     None  1080   1920

       meta        meta         meta  meta
        fps    duration frames_count codec
0  59.94006  124.613333         7472  hevc
1  60.00000  179.826667        10789  h264
2  60.00000  180.415000        10827  hevc

[Limited by 3 rows]

Split video to virtual frames

from typing import Iterator

from datachain import DataChain
from datachain.lib.file import VideoFile, VideoMeta, VideoFrame


def gen_frames(file: VideoFile, meta: VideoMeta) -> Iterator[tuple[VideoFrame, VideoMeta]]:
    for idx, img in enumerate(range(0, meta.frames_count, 100)):
        frame = idx * 100
        timestamp = frame / meta.fps
        video_frame = VideoFrame(**file.model_dump(), frame=frame, timestamp=timestamp)
        yield video_frame, meta


ds = (
    DataChain.from_dataset("videos-meta")
        .gen(gen_frames, output=("file", "meta"))
        .save("videos-frames-virtual")
)
ds.show(3)

$ python 03-frames-virtual.py
                                                file                 file       file    file                   file      file                      file     file  file      file  meta  \
                                              source                 path       size version                   etag is_latest             last_modified location frame timestamp width
0  file:///Users/vlad/work/iterative/playground/v...  age_16_IMG_3341.MOV  280685520          0x1.9bc2127c00000p+30         1 2024-09-22 20:57:03+00:00     None     0  0.000000  1080
1  file:///Users/vlad/work/iterative/playground/v...  age_16_IMG_3341.MOV  280685520          0x1.9bc2127c00000p+30         1 2024-09-22 20:57:03+00:00     None   100  1.668333  1080
2  file:///Users/vlad/work/iterative/playground/v...  age_16_IMG_3341.MOV  280685520          0x1.9bc2127c00000p+30         1 2024-09-22 20:57:03+00:00     None   200  3.336667  1080

    meta      meta        meta         meta  meta
  height       fps    duration frames_count codec
0   1920  59.94006  124.613333         7472  hevc
1   1920  59.94006  124.613333         7472  hevc
2   1920  59.94006  124.613333         7472  hevc

[Limited by 3 rows]

Split video into frames and upload to storage

from typing import Iterator

from datachain import DataChain
from datachain.catalog import get_catalog
from datachain.client import Client
from datachain.lib.file import VideoFile, VideoMeta, VideoFrameMeta, ImageFile
from datachain.lib.video import video_frames


def gen_frames(client: Client, file: VideoFile, meta: VideoMeta) -> Iterator[tuple[VideoFile, ImageFile, VideoFrameMeta]]:
    stem = file.get_file_stem()

    for idx, img in enumerate(video_frames(file, step=100)):
        frame = idx * 100
        filename = f"{stem}_{frame:06d}.jpg"
        f = client.upload(filename, img)
        timestamp = frame / meta.fps

        video_frame = ImageFile(**f.model_dump())
        image_meta = VideoFrameMeta(
            frame=frame,
            timestamp=timestamp,
            width=meta.width,
            height=meta.height,
            format="jpeg",
        )

        yield file, video_frame, image_meta


ds = (
    DataChain.from_dataset("videos-meta")
        .limit(1)
        .setup(client=lambda: get_catalog().get_client("gs://videos/frames"))
        .gen(gen_frames, output=("video", "frame", "meta"))
        .save("videos-frames-upload")
)
ds.show(3)

$ python 04-frames-upload.py
                                               video                video      video   video                  video     video                     video    video  \
                                              source                 path       size version                   etag is_latest             last_modified location
0  file:///Users/vlad/work/iterative/playground/v...  age_16_IMG_3341.MOV  280685520          0x1.9bc2127c00000p+30         1 2024-09-22 20:57:03+00:00     None
1  file:///Users/vlad/work/iterative/playground/v...  age_16_IMG_3341.MOV  280685520          0x1.9bc2127c00000p+30         1 2024-09-22 20:57:03+00:00     None
2  file:///Users/vlad/work/iterative/playground/v...  age_16_IMG_3341.MOV  280685520          0x1.9bc2127c00000p+30         1 2024-09-22 20:57:03+00:00     None

                frame                       frame   frame             frame             frame     frame                            frame    frame  meta      meta  meta  \
               source                        path    size           version              etag is_latest                    last_modified location frame timestamp width
0  gs://videos/frames  age_16_IMG_3341_000000.jpg  206936  1736786510082205  CJ3h7/eR84oDEAE=         1 2025-01-13 16:41:50.184000+00:00     None     0  0.000000  1080
1  gs://videos/frames  age_16_IMG_3341_000100.jpg  174064  1736786512007892  CNSl5fiR84oDEAE=         1 2025-01-13 16:41:52.118000+00:00     None   100  1.668333  1080
2  gs://videos/frames  age_16_IMG_3341_000200.jpg  149928  1736786513921389  CO2K2vmR84oDEAE=         1 2025-01-13 16:41:54.055000+00:00     None   200  3.336667  1080

    meta   meta
  height format
0   1920   jpeg
1   1920   jpeg
2   1920   jpeg

[Limited by 3 rows]

codecov · 2025-01-13T17:11:06Z

Codecov Report

Attention: Patch coverage is 26.60550% with 80 lines in your changes missing coverage. Please review.

Project coverage is 86.84%. Comparing base (3767173) to head (5892ab9).
Report is 7 commits behind head on main.

Files with missing lines	Patch %	Lines
src/datachain/lib/video.py	0.00%	77 Missing ⚠️
src/datachain/lib/file.py	90.62%	2 Missing and 1 partial ⚠️

Additional details and impacted files

@@            Coverage Diff             @@
##             main     #814      +/-   ##
==========================================
- Coverage   87.42%   86.84%   -0.59%     
==========================================
  Files         128      129       +1     
  Lines       11373    11479     +106     
  Branches     1537     1553      +16     
==========================================
+ Hits         9943     9969      +26     
- Misses       1049     1128      +79     
- Partials      381      382       +1

Flag	Coverage Δ
datachain	`86.78% <26.60%> (-0.59%)`	⬇️

Flags with carried forward coverage won't be shown. Click here to find out more.

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

dmpetrov

Amazing PR!

It would be great to use concise and minimalistic naming and API because we are going to have many file types for multiple domains.

Naming

Keywords like Meta will make it hard for user to remember and use the classes - user have their own meta 🙂

How about this renaming:
VideoFile -> BaseVideo (I assume people won't use this often)
VideoMeta -> Video (the most used class)
VideoClip -> Clip (also, shouldn't it be based on Video with meta?)
VideoFrame -> FrameBase
VideoFrameMeta -> Frame

start_time --> start
end_time --> end
frames_count --> count

Image -> BaseImage
ImageMeta -> Image

FileTypes can be also extended: image (read meta), base_image (do not read meta), video (read meta), base_video (do not read meta), video_clip, base_video_clip , ...

Do we need dummy classes?

I assume that people prefer working with meta information while dealing with images and videos. A followup question - do we really need BaseImages and BaseVideo without any logic? Why don't we clean up API and keep only Meta-enrich version in the API? User still can work with videos as File if meta is not needed.

Do we need singular methods?

save_video_clips() and save_video_clip() How much extra code user needs to get rid of singular form. If one method - let's avoid the singular version.

The same question for video_frames() and video_frames_np()

I assume, we can add the method and classes later if there is a need. But I'd not start with such rich API for now and try my best to keep in minimalistic.

WDYT?

dmpetrov · 2025-01-13T18:15:43Z

src/datachain/lib/file.py

+
+    width: int
+    height: int
+    format: str


How about EXIF and XMP? :)

pyproject.toml

src/datachain/lib/file.py

shcheklein · 2025-01-14T01:04:38Z

src/datachain/lib/video.py

+        yield img
+
+
+def video_frames(


can a lot of these helpers become part of the Video* classes?

Good question 👍 I was thinking about this and tried to implement it this way, but in the end I've checked other types and files in lib module (images, hf) and make it the same way.

I was also thinking and trying to move all the models to the datachain.model module, but it turns out it needs more work and may be not backward compatible with File model. In is a subject for a separate PR.

yeah, we need all of theses to become methods of Video class. Should it be a followup or in this PR?

I'd appreciate more insights on the issues with this approach.

dreadatour · 2025-01-14T01:49:30Z

src/datachain/lib/video.py

+    props = iio.improps(file.stream(), plugin="pyav")
+    frames_count, width, height, _ = props.shape
+
+    meta = iio.immeta(file.stream(), plugin="pyav")


I don't like this part, it looks like we are reading video file twice here. Need to check the other way to get video meta information.

yep, also are we reading the whole file to get meta?

cloudflare-workers-and-pages · 2025-01-14T15:24:52Z

Deploying datachain-documentation with Cloudflare Pages

Latest commit:	`5892ab9`
Status:	✅ Deploy successful!
Preview URL:	https://beaeae60.datachain-documentation.pages.dev
Branch Preview URL:	https://video-models.datachain-documentation.pages.dev

View logs

for more information, see https://pre-commit.ci

dreadatour · 2025-01-14T16:01:28Z

Naming

Keywords like Meta will make it hard for user to remember and use the classes - user have their own meta 🙂

👍

How about this renaming: VideoFile -> BaseVideo (I assume people won't use this often) VideoMeta -> Video (the most used class) VideoClip -> Clip (also, shouldn't it be based on Video with meta?) VideoFrame -> FrameBase VideoFrameMeta -> Frame

For now we have naming with File: TextFile, ImageFile and File itself. I left VideoFile for now, but rename others:

ImageMeta -> Image
VideoClipFile -> VideoClip (I can rename it to Clip as you suggested, just not sure yet, because see next line)
VideoFrameFile -> VideoFrame (I can rename it to Frame to be consistent with Clip, also Frame is already busy, see below)
VideoMeta -> Video
VideoFrameMeta -> Frame

start_time --> start end_time --> end frames_count --> count

Done. Only frames_count became frames, because I am not sure about count, too general, IMO.

Image -> BaseImage ImageMeta -> Image

We don't have Image model, we have ImageFile model, left it as is for now. ImageMeta -> Image done.

FileTypes can be also extended: image (read meta), base_image (do not read meta), video (read meta), base_video (do not read meta), video_clip, base_video_clip , ...

That's good suggestion, only we use FileTypes for now only in from_storage method. I am not sure we we want to change it to download files and read meta 🤔 Even with additional param.

Do we need dummy classes?

I assume that people prefer working with meta information while dealing with images and videos. A followup question - do we really need BaseImages and BaseVideo without any logic? Why don't we clean up API and keep only Meta-enrich version in the API? User still can work with videos as File if meta is not needed.

Good question. I've added VideoFile only because we already have ImageFile, just to be consistent. Also it is useful when we use from_storage with type=video, and then we can use VideoFile type in mappers, like this:

def video_meta(file: "VideoFile") -> Video:
    """
    Returns video file meta information.

    Args:
        file (VideoFile): VideoFile object.

    Returns:
        Video: Video file meta information.
    """

Do we need singular methods?

save_video_clips() and save_video_clip() How much extra code user needs to get rid of singular form. If one method - let's avoid the singular version.
The same question for video_frames() and video_frames_np()

Sounds reasonable to me 👍 Will update the code (not done yet).

Default values

Done.

WDYT?

Those are great comments! Love the discussion ❤️

src/datachain/lib/video.py

shcheklein · 2025-01-14T20:49:59Z

src/datachain/lib/file.py

+    """`DataModel` for reading video files."""
+
+
+class VideoClip(VideoFile):


so, how are these all modes connected with the helpers? how do I instantiate them? do I have to write my own UDFs to do that (just instantiate these classes?)

shcheklein · 2025-01-14T20:51:35Z

src/datachain/lib/file.py


    def save(self, destination: str):
        """Writes it's content to destination"""
        self.read().save(destination)


+class Image(DataModel):


why do we need this separate model?

shcheklein · 2025-01-14T20:54:26Z

src/datachain/lib/file.py

+    timestamp: float = Field(default=0)
+
+
+class Video(DataModel):


Should it be a subclass of VideoFile?

dmpetrov

Great improvements.
A few followup questions about moving the methods to Video class and a plural-singular method.

dmpetrov · 2025-01-16T01:47:34Z

src/datachain/lib/file.py

+class VideoClip(VideoFile):
+    """`DataModel` for reading video clips."""
+
+    start: float = Field(default=0)


I'd use some impossible value like -1.0

dmpetrov · 2025-01-16T01:48:13Z

src/datachain/lib/file.py

+    """`DataModel` for reading video frames."""
+
+    frame: int = Field(default=0)
+    timestamp: float = Field(default=0)


-1 and -1.0 as defaults?

dmpetrov · 2025-01-16T01:50:15Z

src/datachain/lib/video.py

+    ) from exc
+
+
+def video_meta(file: "VideoFile") -> Video:


Could you please avoid using erm meta? How about file_to_video(file: File)?
Btw... not just File as input type?

dmpetrov · 2025-01-16T01:52:15Z

src/datachain/lib/video.py

+    return iio.imread(file.stream(), index=frame, plugin="pyav")
+
+
+def video_frame(file: "VideoFile", frame: int, format: str = "jpeg") -> bytes:


we usually use jpg in the codebase, not jpeg

dmpetrov · 2025-01-16T01:52:59Z

src/datachain/lib/video.py

+    file: "VideoFile",
+    frame: int,
+    output_file: Union[str, pathlib.Path],
+    format: str = "jpeg",


dmpetrov · 2025-01-16T01:53:48Z

src/datachain/lib/video.py

+def save_video_frame(
+    file: "VideoFile",
+    frame: int,
+    output_file: Union[str, pathlib.Path],


Should we really support Path?

dmpetrov · 2025-01-16T01:58:05Z

src/datachain/lib/video.py

+        yield img
+
+
+def video_frames(


yeah, we need all of theses to become methods of Video class. Should it be a followup or in this PR?

I'd appreciate more insights on the issues with this approach.

dmpetrov · 2025-01-16T01:58:12Z

src/datachain/lib/video.py

+    start_frame: int = 0,
+    end_frame: Optional[int] = None,
+    step: int = 1,
+    format: str = "jpeg",


dmpetrov · 2025-01-16T02:03:23Z

src/datachain/lib/video.py

+        yield output_file
+
+
+def save_video_clip(


It looks like it needs to be renamed to save_subvideo()
In the class names, we use term Clip for virtual videos (start-end) while in this case you are creating just another Video, not clip.

So, it needs to be renamed or we need to avoid this Clip-as-virtual-reference terminology.

dmpetrov · 2025-01-16T02:12:07Z

src/datachain/lib/video.py

+    output_file: Union[str, pathlib.Path],
+    codec: str = "libx264",
+    audio_codec: str = "aac",
+) -> None:


It would be great to generalize the single and plural methods. We just need to come up with output format like output="{name}{:06d}.{ext}") and provide a string in case of a single file.

Also, this method will require generalization for writing to cloud like output={source}/tmp/{name}{:06d}.{ext}

Add video models + functions

75877d1

dreadatour requested a review from a team January 13, 2025 16:58

dreadatour self-assigned this Jan 13, 2025

dreadatour temporarily deployed to internal January 13, 2025 16:59 — with GitHub Actions Inactive

dreadatour linked an issue Jan 13, 2025 that may be closed by this pull request

Support Video file and Video clip, Video frame models and operations with them #797

Open

dreadatour mentioned this pull request Jan 13, 2025

Support Video file and Video clip, Video frame models and operations with them #797

Open

dmpetrov requested changes Jan 13, 2025

View reviewed changes

shcheklein reviewed Jan 14, 2025

View reviewed changes

pyproject.toml Show resolved Hide resolved

shcheklein reviewed Jan 14, 2025

View reviewed changes

src/datachain/lib/file.py Outdated Show resolved Hide resolved

shcheklein reviewed Jan 14, 2025

View reviewed changes

src/datachain/lib/file.py Outdated Show resolved Hide resolved

shcheklein reviewed Jan 14, 2025

View reviewed changes

src/datachain/lib/file.py Show resolved Hide resolved

shcheklein reviewed Jan 14, 2025

View reviewed changes

dreadatour commented Jan 14, 2025

View reviewed changes

Code review update

031b9df

dreadatour temporarily deployed to internal January 14, 2025 15:24 — with GitHub Actions Inactive

[pre-commit.ci] auto fixes from pre-commit.com hooks

548bbd5

for more information, see https://pre-commit.ci

pre-commit-ci bot temporarily deployed to internal January 14, 2025 15:25 Inactive

Code review update

b55149a

dreadatour temporarily deployed to internal January 14, 2025 15:28 — with GitHub Actions Inactive

shcheklein reviewed Jan 14, 2025

View reviewed changes

src/datachain/lib/video.py Outdated Show resolved Hide resolved

shcheklein reviewed Jan 14, 2025

View reviewed changes

src/datachain/lib/file.py

timestamp: float = Field(default=0)

class Video(DataModel):

Copy link

Member

shcheklein Jan 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be a subclass of VideoFile?

Code review update

2cd6d62

dreadatour temporarily deployed to internal January 15, 2025 01:48 — with GitHub Actions Inactive

Small fixes due to work on usage examples

5892ab9

dreadatour temporarily deployed to internal January 15, 2025 16:24 — with GitHub Actions Inactive

dmpetrov requested changes Jan 16, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add video models + functions #814

Add video models + functions #814

dreadatour commented Jan 13, 2025

codecov bot commented Jan 13, 2025 •

edited

Loading

dmpetrov left a comment

dmpetrov Jan 13, 2025

shcheklein Jan 14, 2025

dreadatour Jan 14, 2025

dmpetrov Jan 16, 2025

dreadatour Jan 14, 2025

shcheklein Jan 14, 2025

cloudflare-workers-and-pages bot commented Jan 14, 2025 •

edited

Loading

dreadatour commented Jan 14, 2025

shcheklein Jan 14, 2025

shcheklein Jan 14, 2025

shcheklein Jan 14, 2025

dmpetrov left a comment

dmpetrov Jan 16, 2025

dmpetrov Jan 16, 2025

dmpetrov Jan 16, 2025

dmpetrov Jan 16, 2025

dmpetrov Jan 16, 2025

dmpetrov Jan 16, 2025

dmpetrov Jan 16, 2025

dmpetrov Jan 16, 2025

dmpetrov Jan 16, 2025

dmpetrov Jan 16, 2025

		"""`DataModel` for reading video files."""


		class VideoClip(VideoFile):

		return iio.imread(file.stream(), index=frame, plugin="pyav")


		def video_frame(file: "VideoFile", frame: int, format: str = "jpeg") -> bytes:

Add video models + functions #814

Are you sure you want to change the base?

Add video models + functions #814

Conversation

dreadatour commented Jan 13, 2025

Video models added

Meta models added

Couple usage examples

Listing

Add meta

Split video to virtual frames

Split video into frames and upload to storage

codecov bot commented Jan 13, 2025 • edited Loading

Codecov Report

dmpetrov left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

cloudflare-workers-and-pages bot commented Jan 14, 2025 • edited Loading

Deploying datachain-documentation with Cloudflare Pages

dreadatour commented Jan 14, 2025

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

dmpetrov left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

codecov bot commented Jan 13, 2025 •

edited

Loading

cloudflare-workers-and-pages bot commented Jan 14, 2025 •

edited

Loading