Package restructuring

midusi · Jun 27, 2022 · f1ee565 · f1ee565
1 parent ea5bd23
commit f1ee565
Show file tree

Hide file tree

Showing 48 changed files with 133 additions and 70 deletions.
diff --git a/lsat.egg-info/PKG-INFO b/lsat.egg-info/PKG-INFO
@@ -0,0 +1,9 @@
+Metadata-Version: 2.1
+Name: lsat
+Version: 1.0
+Summary: UNKNOWN
+License: UNKNOWN
+Platform: UNKNOWN
+
+UNKNOWN
+
diff --git a/lsat.egg-info/SOURCES.txt b/lsat.egg-info/SOURCES.txt
@@ -0,0 +1,7 @@
+README.md
+setup.py
+lsat/__init__.py
+lsat.egg-info/PKG-INFO
+lsat.egg-info/SOURCES.txt
+lsat.egg-info/dependency_links.txt
+lsat.egg-info/top_level.txt
diff --git a/lsat.egg-info/dependency_links.txt b/lsat.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/lsat.egg-info/top_level.txt b/lsat.egg-info/top_level.txt
@@ -0,0 +1 @@
+lsat
diff --git a/lsat/__init__.py b/lsat/__init__.py
diff --git a/dataset/LSA_Dataset.py → lsat/dataset/LSA_Dataset.py b/dataset/LSA_Dataset.py → lsat/dataset/LSA_Dataset.py
@@ -1,34 +1,38 @@
 import json
 from pathlib import Path
+from urllib.request import urlretrieve
+from typing import Callable, Optional, Generator, Literal, Iterable, Iterator
 
 from torch import Tensor
 from torch.utils.data import Dataset
 from torchvision.io import VideoReader
 from torchtext.data.utils import get_tokenizer
 from torchtext.vocab import build_vocab_from_iterator
+import py7zr
 
-from typing import Callable, Optional, Generator, Literal, Iterable, Iterator
-from type_hints import CutData, SignerData, KeypointData
-from type_hints.dataset import (
+from lsat.typing import CutData, SignerData, KeypointData
+from lsat.typing import (
     Sample,
     CLIP_HINT,
     KEYPOINTS_HINT,
     LABEL_HINT
 )
-from helpers.sample_filters import sample_contains_oov, sample_above_confidence_threshold
-from helpers.get_cut_paths import get_cut_paths
-from helpers.train_test import split_train_test, load_train_test, store_samples_to_csv
+from lsat.helpers.sample_filters import sample_contains_oov, sample_above_confidence_threshold
+from lsat.helpers.get_cut_paths import get_cut_paths
+from lsat.helpers.train_test import split_train_test, load_train_test, store_samples_to_csv
+from lsat.helpers.ProgressBar import ProgressBar
 
 
-def yield_tokens(samples: Iterable[Path], tokenizer: Callable[[str], list[str]]) -> Generator[list[str], None, None]:
+def _yield_tokens(samples: Iterable[Path], tokenizer: Callable[[str], list[str]]) -> Generator[list[str], None, None]:
     for sample in samples:
         with sample.open() as data_file:
             data: CutData = json.load(data_file)
             yield tokenizer(data['label'])
 
-def load_clip_as_tensors(paths: dict[str, Path]) -> Iterable[Tensor]:
+def _load_clip_as_tensors(paths: dict[str, Path]) -> Iterable[Tensor]:
     return (map(lambda frame: frame['data'], VideoReader(str(paths['mp4']), "video")))
 
+
 class LSA_Dataset(Dataset):
 
     def __init__(self,
@@ -52,28 +56,42 @@ def __init__(self,
         self.keypoints_transform = keypoints_transform
         self.label_transform = label_transform
 
-        train_path = self.root.parent / f"min_freq_{words_min_freq}_threshold_{str(signer_confidence_threshold).replace('.','')}" / "train.csv"
-        test_path = self.root.parent / f"test_min_freq_{words_min_freq}_threshold_{str(signer_confidence_threshold).replace('.','')}" / "test.csv"
+        if not self.root.exists() or not any(self.root.iterdir()):
+            self.root.mkdir(exist_ok=True, parents=True)
+            pb = ProgressBar()
+            urlretrieve("http://c1781468.ferozo.com/data/lsa-t.7z", self.root / "lsat.7z", pb)
+            with py7zr.SevenZipFile(self.root / 'lsat.7z', mode='r') as z:
+                z.extractall(self.root)
+
+        splits_path = self.root.parent / "splits" / f"min_freq_{words_min_freq}_threshold_{str(signer_confidence_threshold).replace('.','')}"
+        splits_path.mkdir(exist_ok=True, parents=True)
+        train_path = splits_path / "train.csv"
+        test_path = splits_path / "test.csv"
         sample_paths = map(lambda p: Path(str(p.resolve())[:-3] + "json"), self.root.glob('**/*.mp4'))
 
         special_symbols = ['<unk>', '<pad>', '<bos>', '<eos>']
         self.tokenizer: Callable[[str], list[str]] = get_tokenizer('spacy', language='es_core_news_lg')
-        self.vocab = build_vocab_from_iterator(yield_tokens(sample_paths, self.tokenizer),
+        full_vocab = build_vocab_from_iterator(_yield_tokens(sample_paths, self.tokenizer),
                                                             min_freq = words_min_freq,
                                                             specials = special_symbols,
                                                             special_first = True)
         # by default returns <unk> index
-        self.vocab.set_default_index(0)
+        full_vocab.set_default_index(0)
 
         if train_path.exists() and test_path.exists():
             self.train_samples, self.test_samples = load_train_test(train_path, test_path)
         else:
             self.train_samples, self.test_samples = split_train_test(self.root, lambda path:
-                (not sample_contains_oov(path, self.vocab, self.tokenizer))
+                (not sample_contains_oov(path, full_vocab, self.tokenizer))
                 and (sample_above_confidence_threshold(path, self.signer_confidence_threshold) if self.signer_confidence_threshold != 0 else True))
             store_samples_to_csv(train_path, self.train_samples)
             store_samples_to_csv(test_path, self.test_samples)
-        self.max_label_len = max(map(len, yield_tokens(self.train_samples + self.test_samples, self.tokenizer)))
+        self.max_label_len = max(map(len, _yield_tokens(self.train_samples + self.test_samples, self.tokenizer)))
+        self.vocab = build_vocab_from_iterator(_yield_tokens(self.train_samples, self.tokenizer),
+                                                            min_freq = words_min_freq,
+                                                            specials = special_symbols,
+                                                            special_first = True)
+        self.vocab.set_default_index(0)
 
     def __len__(self) -> int:
         return len(self.train_samples if self.mode == "train" else self.test_samples)
@@ -86,8 +104,8 @@ def __getitem__(self, index: int) -> Sample:
             data: CutData = json.load(data_file)
         clip = (
             None if not self.load_clips
-            else load_clip_as_tensors(paths) if self.clip_transform is None
-            else self.clip_transform(load_clip_as_tensors(paths))
+            else _load_clip_as_tensors(paths) if self.clip_transform is None
+            else self.clip_transform(_load_clip_as_tensors(paths))
         )
         keypoints = (
             None if not self.load_keypoints

diff --git a/lsat/dataset/__init__.py b/lsat/dataset/__init__.py
diff --git a/dataset/transforms.py → lsat/dataset/transforms.py b/dataset/transforms.py → lsat/dataset/transforms.py
@@ -6,7 +6,7 @@
 from torchvision.transforms.functional import crop, resize
 from torchtext.vocab import Vocab
 
-from type_hints import Box, KeypointData
+from lsat.typing import Box, KeypointData
 
 T = TypeVar('T')
 def get_frames_reduction_transform(max_frames: int) -> Callable[[list[T]], list[T]]:
@@ -22,21 +22,25 @@ def frames_reduction_transform(clip: list[T]) -> list[T]:
         return frames
     return frames_reduction_transform
 
-def get_roi_selector_transform(height: int, width: int) -> Callable[[Tensor, Box], Tensor]:
+def _frame_roi_selector_transform(img: Tensor, roi: Box, height: int, width: int) -> Tensor:
+    '''Frame-level transform that crops a given roi from the frame and resizes it to to the desired values keeping the aspect ratio and padding with zeros if necessary'''
+    img = crop(img, int(roi['y1']),int(roi['x1']),int(roi['height']),int(roi['width']))
+    pad = torch.zeros(3, height, width, dtype=torch.uint8)
+    if (roi['height'] - height) > (roi['width'] - width):
+        new_width = int(roi['width']*height/roi['height'])
+        img = resize(img, [height, new_width])
+        pad[:, :, int((width - new_width)/2):-int((width - new_width)/2) - (1 if (width - new_width) % 2 == 1 else 0)] = img
+    else:
+        new_height = int(roi['height']*width/roi['width'])
+        img = resize(img, [new_height, width])
+        pad[:, int((height - new_height)/2):-int((height - new_height)/2) - (1 if (height - new_height) % 2 == 1 else 0), :] = img
+    return pad
+
+def get_roi_selector_transform(height: int, width: int, rois: list[Box]) -> Callable[[Tensor], Tensor]:
     '''Given height and width, returns a frame-level roi selector transform'''
-    def roi_selector_transform(img: Tensor, box: Box) -> Tensor:
-        '''Frame-level transform that crops a given roi from the frame and resizes it to to the desired values keeping the aspect ratio and padding with zeros if necessary'''
-        img = crop(img, int(box['y1']),int(box['x1']),int(box['height']),int(box['width']))
-        pad = torch.zeros(3, height, width, dtype=torch.uint8)
-        if (box['height'] - height) > (box['width'] - width):
-            new_width = int(box['width']*height/box['height'])
-            img = resize(img, [height, new_width])
-            pad[:, :, int((width - new_width)/2):-int((width - new_width)/2) - (1 if (width - new_width) % 2 == 1 else 0)] = img
-        else:
-            new_height = int(box['height']*width/box['width'])
-            img = resize(img, [new_height, width])
-            pad[:, int((height - new_height)/2):-int((height - new_height)/2) - (1 if (height - new_height) % 2 == 1 else 0), :] = img
-        return pad
+    def roi_selector_transform(frames: Tensor) -> Tensor:
+        cropped_frames = [_frame_roi_selector_transform(frame, roi, height, width) for (roi, frame) in zip(rois, frames)]
+        return stack(cropped_frames)
     return roi_selector_transform
 
 def get_keypoint_format_transform(keypoints_to_use: list[int]) -> Callable[[KeypointData], Tensor]:
@@ -56,7 +60,7 @@ def keypoints_norm_to_nose_transform(keypoints: Tensor) -> Tensor:
         [0]
     ]))
 
-def __get_interpolated_point__(i: int, points: list[tuple[float, float, float]], threshold: float, default: tuple[float, float] = (0,0)) -> tuple[float, float]:
+def _get_interpolated_point(i: int, points: list[tuple[float, float, float]], threshold: float, default: tuple[float, float] = (0,0)) -> tuple[float, float]:
     '''Returns for a point, if confidence lower than threshold, the interpolation of the next and previous point with confidence over threshold'''
     next_point = next(((point[0], point[1]) for point in points[(i+1):] if point[2] > threshold), None)
     prev_point = next(((point[0], point[1]) for point in reversed(points[:i]) if point[2] > threshold), None)
@@ -66,21 +70,21 @@ def __get_interpolated_point__(i: int, points: list[tuple[float, float, float]],
         )
     )
 
-def __interpolate_each__(keypoints: list[tuple[float, float, float]], threshold: float, max_missing_percent: float, default: Optional[tuple[float, float]] = None) -> Optional[list[tuple[float, float]]]:
+def _interpolate_each(keypoints: list[tuple[float, float, float]], threshold: float, max_missing_percent: float, default: Optional[tuple[float, float]] = None) -> Optional[list[tuple[float, float]]]:
     '''For a list of points, replaces those with confidence lower than threshold with the interpolation of the next and previous point with confidence over threshold'''
     # keypoints contains [x,y,z] for each frame 
     missing = sum(1 for point in keypoints if point[2] < threshold)
     if missing / len(keypoints) <= max_missing_percent:
         return [
-            (each[0], each[1]) if each[2] > threshold else __get_interpolated_point__(i, keypoints, threshold) for i, each in enumerate(keypoints)
+            (each[0], each[1]) if each[2] > threshold else _get_interpolated_point(i, keypoints, threshold) for i, each in enumerate(keypoints)
         ]
     return None if not default else [default for _ in keypoints]
 
 def interpolate_keypoints_transform(keypoints: list[Tensor]) -> list[Tensor]:
     '''For a list of keypoint frames (each in format given by keypoint_format_transform), applies __interpolate_each__ to each frame'''
     # switch dims to keypoints, frames, (x,y,c)
     keypoints_trans = stack(keypoints).permute(2, 0, 1)
-    interpolated_keypoints = Tensor([__interpolate_each__(each.tolist(), 0.2, 0.7, (0, 0)) for each in keypoints_trans])
+    interpolated_keypoints = Tensor([_interpolate_each(each.tolist(), 0.2, 0.7, (0, 0)) for each in keypoints_trans])
     return [
         frame for frame in interpolated_keypoints.permute(1, 2, 0)
     ]

diff --git a/generation/download.py → lsat/generation/download.py b/generation/download.py → lsat/generation/download.py
@@ -2,7 +2,7 @@
 from pytube import Playlist
 from pytube.cli import on_progress
 
-from helpers.slugify import slugify
+from lsat.helpers.slugify import slugify
 
 def main():
     'Downloads videos and subtitles into raw folder.'

diff --git a/generation/extra_dbs/gen_cuts_only_db.py → .../generation/extra_dbs/gen_cuts_only_db.py b/generation/extra_dbs/gen_cuts_only_db.py → .../generation/extra_dbs/gen_cuts_only_db.py
@@ -2,8 +2,8 @@
 from pathlib import Path
 from shutil import copyfile
 
-from helpers.get_cut_paths import get_cut_paths
-from type_hints import SignerData
+from lsat.helpers.get_cut_paths import get_cut_paths
+from lsat.typing import SignerData
 
 
 def gen_cuts_only_db():

diff --git a/generation/extra_dbs/gen_vis_db.py → lsat/generation/extra_dbs/gen_vis_db.py b/generation/extra_dbs/gen_vis_db.py → lsat/generation/extra_dbs/gen_vis_db.py
@@ -4,9 +4,9 @@
 from typing import Callable
 from numpy.typing import ArrayLike
 
-from helpers.get_cut_paths import get_cut_paths
-from helpers.group_kds import group_kds
-from type_hints import KeypointData, SignerData, Box
+from lsat.helpers.get_cut_paths import get_cut_paths
+from lsat.helpers.group_kds import group_kds
+from lsat.typing import KeypointData, SignerData, Box
 
 
 def draw_rectangle(box: Box) -> Callable[[ArrayLike], ArrayLike]:

diff --git a/generation/gen_clips.py → lsat/generation/gen_clips.py b/generation/gen_clips.py → lsat/generation/gen_clips.py
diff --git a/generation/process_ap.py → lsat/generation/process_ap.py b/generation/process_ap.py → lsat/generation/process_ap.py
@@ -3,9 +3,9 @@
 from pathlib import Path
 from math import sqrt
 
-from type_hints import Box, KeypointData
-from helpers.get_cut_paths import get_cut_paths
-from helpers.group_kds import group_kds
+from lsat.typing import Box, KeypointData
+from lsat.helpers.get_cut_paths import get_cut_paths
+from lsat.helpers.group_kds import group_kds
 
 
 def format_box(box: list[float]) -> Box:

diff --git a/generation/run_ap.sh → lsat/generation/run_ap.sh b/generation/run_ap.sh → lsat/generation/run_ap.sh
diff --git a/lsat/helpers/ProgressBar.py b/lsat/helpers/ProgressBar.py
@@ -0,0 +1,16 @@
+import progressbar
+
+class ProgressBar():
+    def __init__(self):
+        self.pbar = None
+
+    def __call__(self, block_num: int, block_size: int, total_size: int):
+        if not self.pbar:
+            self.pbar=progressbar.ProgressBar(maxval=total_size)
+            self.pbar.start()
+
+        downloaded = block_num * block_size
+        if downloaded < total_size:
+            self.pbar.update(downloaded)
+        else:
+            self.pbar.finish()
diff --git a/helpers/count_ap.sh → lsat/helpers/count_ap.sh b/helpers/count_ap.sh → lsat/helpers/count_ap.sh
diff --git a/helpers/get_cut_paths.py → lsat/helpers/get_cut_paths.py b/helpers/get_cut_paths.py → lsat/helpers/get_cut_paths.py
diff --git a/helpers/get_score.py → lsat/helpers/get_score.py b/helpers/get_score.py → lsat/helpers/get_score.py
@@ -2,4 +2,4 @@ def get_score(scores: list[float]) -> float:
     m1 = max(scores)
     scores = scores.copy()
     scores.remove(m1)
-    return 0 if m1 == 0 else (m1 - max(scores)) / m1
+    return 0 if m1 == 0 else (m1 - max(scores)) / m1
diff --git a/helpers/group_kds.py → lsat/helpers/group_kds.py b/helpers/group_kds.py → lsat/helpers/group_kds.py
@@ -1,4 +1,4 @@
-from type_hints import KeypointData
+from lsat.typing import KeypointData
 
 
 def group_kds(kds: list[KeypointData]) -> list[list[KeypointData]]:

diff --git a/helpers/sample_filters.py → lsat/helpers/sample_filters.py b/helpers/sample_filters.py → lsat/helpers/sample_filters.py
@@ -4,9 +4,10 @@
 
 from torchtext.vocab import Vocab
 
-from type_hints import CutData
-from helpers.get_cut_paths import get_cut_paths
-from helpers.get_score import get_score
+from lsat.typing import CutData
+from lsat.helpers.get_cut_paths import get_cut_paths
+from lsat.helpers.get_score import get_score
+
 
 def sample_contains_oov(data_path: Path, vocab: Vocab, tokenizer: Callable[[str], list[str]]) -> bool:
     with open(data_path) as data_file:

diff --git a/helpers/slugify.py → lsat/helpers/slugify.py b/helpers/slugify.py → lsat/helpers/slugify.py
diff --git a/helpers/train_test.py → lsat/helpers/train_test.py b/helpers/train_test.py → lsat/helpers/train_test.py
diff --git a/helpers/utils.py → lsat/helpers/utils.py b/helpers/utils.py → lsat/helpers/utils.py
diff --git a/helpers/visualization.py → lsat/helpers/visualization.py b/helpers/visualization.py → lsat/helpers/visualization.py
diff --git a/type_hints/Box.py → lsat/typing/Box.py b/type_hints/Box.py → lsat/typing/Box.py
diff --git a/lsat/typing/__init__.py b/lsat/typing/__init__.py
@@ -0,0 +1,3 @@
+from lsat.typing.Box import Box
+from lsat.typing.data_formats import KeypointData, CutData, SignerData
+from lsat.typing.dataset import Sample, CLIP_HINT, KEYPOINTS_HINT, LABEL_HINT
diff --git a/type_hints/data_formats.py → lsat/typing/data_formats.py b/type_hints/data_formats.py → lsat/typing/data_formats.py
@@ -1,6 +1,6 @@
 from typing import TypedDict
 
-from .Box import Box
+from lsat.typing.Box import Box
 
 
 class KeypointData(TypedDict):

diff --git a/type_hints/dataset.py → lsat/typing/dataset.py b/type_hints/dataset.py → lsat/typing/dataset.py
@@ -1,15 +1,16 @@
-from typing import Iterable, Optional, Tuple, TypeVar, Union
+from typing import Iterable, Optional, TypeVar, Union
 
 from torch import Tensor
 
-from data_formats import KeypointData
+from lsat.typing.data_formats import KeypointData
 
 
 CLIP_HINT = TypeVar('CLIP_HINT')
 KEYPOINTS_HINT = TypeVar('KEYPOINTS_HINT')
 LABEL_HINT = TypeVar('LABEL_HINT')
 
-Sample = Tuple[
+Sample = tuple[
     Optional[Union[Iterable[Tensor], CLIP_HINT]],
     Optional[Union[Iterable[KeypointData], KEYPOINTS_HINT]],
-    Union[str, LABEL_HINT]]
+    Union[str, LABEL_HINT]
+]
diff --git a/visualization/gen_fiftyone_visualization.py → ...sualization/gen_fiftyone_visualization.py b/visualization/gen_fiftyone_visualization.py → ...sualization/gen_fiftyone_visualization.py
@@ -1,10 +1,9 @@
-import argparse, json, sys
+import argparse, json
 import fiftyone as fo
 from pathlib import Path
 from fiftyone import Sample
 
-sys.path.append("/mnt/data/datasets/LSA-T")
-from helpers.get_score import get_score
+from lsat.helpers.get_score import get_score
 
 
 def store_sample(clip_file: Path, dataset):

diff --git a/...mples/bar_30_most_common_bigrams_freq.png → ...mples/bar_30_most_common_bigrams_freq.png b/...mples/bar_30_most_common_bigrams_freq.png → ...mples/bar_30_most_common_bigrams_freq.png
diff --git a/...ples/bar_30_most_common_trigrams_freq.png → ...ples/bar_30_most_common_trigrams_freq.png b/...ples/bar_30_most_common_trigrams_freq.png → ...ples/bar_30_most_common_trigrams_freq.png
diff --git a/...samples/bar_30_most_common_words_freq.png → ...samples/bar_30_most_common_words_freq.png b/...samples/bar_30_most_common_words_freq.png → ...samples/bar_30_most_common_words_freq.png
diff --git a/...tion/plots/samples/hist_times_per_vid.png → ...tion/plots/samples/hist_times_per_vid.png b/...tion/plots/samples/hist_times_per_vid.png → ...tion/plots/samples/hist_times_per_vid.png
diff --git a/...lots/samples/hist_videos_by_playlists.png → ...lots/samples/hist_videos_by_playlists.png b/...lots/samples/hist_videos_by_playlists.png → ...lots/samples/hist_videos_by_playlists.png
diff --git a/.../samples/hist_words_freq_grouped_gt10.png → .../samples/hist_words_freq_grouped_gt10.png b/.../samples/hist_words_freq_grouped_gt10.png → .../samples/hist_words_freq_grouped_gt10.png
diff --git a/...s/hist_words_freq_no_lt5_grouped_gt10.png → ...s/hist_words_freq_no_lt5_grouped_gt10.png b/...s/hist_words_freq_no_lt5_grouped_gt10.png → ...s/hist_words_freq_no_lt5_grouped_gt10.png
diff --git a/.../hist_words_freq_no_sing_grouped_gt10.png → .../hist_words_freq_no_sing_grouped_gt10.png b/.../hist_words_freq_no_sing_grouped_gt10.png → .../hist_words_freq_no_sing_grouped_gt10.png
diff --git a/...tion/plots/samples/hist_words_per_vid.png → ...tion/plots/samples/hist_words_per_vid.png b/...tion/plots/samples/hist_words_per_vid.png → ...tion/plots/samples/hist_words_per_vid.png
diff --git a/...lization/plots/signers/nose_conf_hist.png → ...lization/plots/signers/nose_conf_hist.png b/...lization/plots/signers/nose_conf_hist.png → ...lization/plots/signers/nose_conf_hist.png
diff --git a/...alization/plots/signers/scores_2_hist.png → ...alization/plots/signers/scores_2_hist.png b/...alization/plots/signers/scores_2_hist.png → ...alization/plots/signers/scores_2_hist.png
diff --git a/...ization/plots/signers/scores_gt2_hist.png → ...ization/plots/signers/scores_gt2_hist.png b/...ization/plots/signers/scores_gt2_hist.png → ...ization/plots/signers/scores_gt2_hist.png
diff --git a/visualization/plots/signers/scores_hist.png → ...sualization/plots/signers/scores_hist.png b/visualization/plots/signers/scores_hist.png → ...sualization/plots/signers/scores_hist.png
diff --git a/visualization/plots/signers/signers_hist.png → ...ualization/plots/signers/signers_hist.png b/visualization/plots/signers/signers_hist.png → ...ualization/plots/signers/signers_hist.png
diff --git a/visualization/samples_analysis.ipynb → lsat/visualization/samples_analysis.ipynb b/visualization/samples_analysis.ipynb → lsat/visualization/samples_analysis.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {
     "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
     "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
@@ -24,13 +24,14 @@
    },
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "La base de datos contiene:\n",
-      "    64 videos completos\n",
-      "    14880 clips resultantes de dividir los videos de acuerdo a los subtítulos\n",
-      "    21.78172527777782 hs de video\n"
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'lsat'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_17682/1906465196.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mIPython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisplay\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdisplay\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mlsat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhelpers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisualization\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msave_fig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     12\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mlsat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtyping\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mCutData\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'lsat'"
      ]
     }
    ],
@@ -45,9 +46,9 @@
     "import matplotlib.pyplot as plt\n",
     "from IPython.display import display\n",
     "\n",
-    "sys.path.append(\"/mnt/data/datasets/LSA-T\")\n",
+    "sys.path.append(\"/mnt/data/datasets/LSA-T/lsat\")\n",
     "from helpers.visualization import save_fig\n",
-    "from type_hints import CutData\n",
+    "from typing import CutData\n",
     "\n",
     "\n",
     "plt.style.use(\"ggplot\")\n",

diff --git a/visualization/signers_analysis.ipynb → lsat/visualization/signers_analysis.ipynb b/visualization/signers_analysis.ipynb → lsat/visualization/signers_analysis.ipynb
diff --git a/visualization/train_test_analysis.ipynb → lsat/visualization/train_test_analysis.ipynb b/visualization/train_test_analysis.ipynb → lsat/visualization/train_test_analysis.ipynb
diff --git a/setup.py b/setup.py
@@ -0,0 +1,5 @@
+from distutils.core import setup
+setup(name='lsat',
+      version='1.0',
+      packages=['lsat'],
+      )