diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 046c4ec3..c5e0f157 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -9,19 +9,21 @@ on: jobs: miniconda: - name: Miniconda ${{ matrix.os }} + name: Miniconda ${{ matrix.os }} - Python ${{ matrix.python-version }} runs-on: ${{ matrix.os }} strategy: matrix: os: ["ubuntu-latest", "windows-latest"] + python-version: [ "3.8", "3.12" ] steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 + - name: Set up Miniconda (Python ${{ matrix.python-version }}) + uses: conda-incubator/setup-miniconda@v2 with: activate-environment: test channels: conda-forge,defaults environment-file: environment.yml - python-version: 3.8 + python-version: ${{ matrix.python-version }} auto-activate-base: false - shell: bash -l {0} run: | @@ -44,6 +46,7 @@ jobs: conda install pytest py.test . --cov-report=xml --cov=traja -vvv - name: Upload coverage to Codecov + if: ${{ matrix.python-version }} == '3.8' uses: codecov/codecov-action@v1 with: token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.gitignore b/.gitignore index c3f65827..841b7464 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,5 @@ docs/source/reference # Model parameter files *.pt +.python-version +datasets/ \ No newline at end of file diff --git a/environment.yml b/environment.yml index c959db94..56adce84 100644 --- a/environment.yml +++ b/environment.yml @@ -21,7 +21,6 @@ dependencies: - networkx - seaborn - pytorch - - pytest==6.2.2 + - pytest>=8.0.0 - numba>=0.50.0 - - pyDOE2>=1.3.0 - statsmodels \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 263feb79..7b597ce0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ pandas>=1.2.0 -numpy==1.18.5 +numpy>=1.22.0 matplotlib shapely scipy>=1.4.1 diff --git a/requirements/docs.txt b/requirements/docs.txt index 7a8b4307..c2dfee6c 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,5 +1,5 @@ pandas>=1.2.0 -numpy==1.18.5 +numpy>=1.22.0 matplotlib shapely scipy diff --git a/requirements/extra.txt b/requirements/extra.txt index 9193415a..87ce5253 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -4,13 +4,7 @@ pytest h5py ipython pre-commit -shapely -scipy>=1.4.1 -scikit-learn -fastdtw -networkx -seaborn -torch h5py -numba>=0.50.0 -pyDOE2>=1.3.0 \ No newline at end of file +numba>=0.50.1 +black +isort \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 674af40a..03faf68a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.0 +current_version = 23.0.0 [yapf] column_limit = 120 diff --git a/traja/__init__.py b/traja/__init__.py index 90fa0d51..306488a6 100644 --- a/traja/__init__.py +++ b/traja/__init__.py @@ -1,15 +1,15 @@ import logging -from traja import dataset -from traja import models +from traja import dataset, models + from .accessor import TrajaAccessor -from .frame import TrajaDataFrame, TrajaCollection -from .parsers import read_file, from_df +from .frame import TrajaCollection, TrajaDataFrame +from .parsers import from_df, read_file from .plotting import * from .trajectory import * __author__ = "justinshenk" -__version__ = "22.0.0" +__version__ = "23.0.0" logging.basicConfig(level=logging.INFO) diff --git a/traja/contrib/rdp.py b/traja/contrib/rdp.py index e00c4179..a787b877 100644 --- a/traja/contrib/rdp.py +++ b/traja/contrib/rdp.py @@ -19,8 +19,9 @@ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. """ + from functools import partial -from typing import Union, Callable +from typing import Callable, Union import numpy as np diff --git a/traja/core.py b/traja/core.py new file mode 100644 index 00000000..e3f7fc28 --- /dev/null +++ b/traja/core.py @@ -0,0 +1,8 @@ +import pandas as pd + + +# Check whether pandas series is datetime or timedelta +def is_datetime_or_timedelta_dtype(series: pd.Series) -> bool: + return pd.api.types.is_datetime64_dtype( + series + ) or pd.api.types.is_timedelta64_dtype(series) diff --git a/traja/dataset/__init__.py b/traja/dataset/__init__.py index 0feb17b9..85e6bc3b 100644 --- a/traja/dataset/__init__.py +++ b/traja/dataset/__init__.py @@ -1,3 +1,3 @@ from . import example -from .dataset import TimeSeriesDataset, MultiModalDataLoader +from .dataset import MultiModalDataLoader, TimeSeriesDataset from .pedestrian import load_ped_data, ped_datasets diff --git a/traja/dataset/dataset.py b/traja/dataset/dataset.py index 3a8e1413..5822b13e 100644 --- a/traja/dataset/dataset.py +++ b/traja/dataset/dataset.py @@ -11,6 +11,7 @@ 1. Class distribution in the dataset """ + import logging import math from collections import defaultdict diff --git a/traja/dataset/example.py b/traja/dataset/example.py index 32ad890e..fb3ca9c2 100644 --- a/traja/dataset/example.py +++ b/traja/dataset/example.py @@ -6,5 +6,5 @@ def jaguar(cache_url=default_cache_url): # Sample data data_url = "https://raw.githubusercontent.com/traja-team/traja-research/dataset_und_notebooks/dataset_analysis/jaguar5.csv" - df = pd.read_csv(data_url, error_bad_lines=False) + df = pd.read_csv(data_url, on_bad_lines="skip") return df diff --git a/traja/dataset/pedestrian.py b/traja/dataset/pedestrian.py index 89ad8f26..5a62a92f 100644 --- a/traja/dataset/pedestrian.py +++ b/traja/dataset/pedestrian.py @@ -1,11 +1,12 @@ -import subprocess import glob import os +import subprocess from typing import List + import pandas as pd -from traja.dataset import dataset -import traja +import traja +from traja.dataset import dataset """Convenience module for downloading pedestrian-related datasets.""" diff --git a/traja/dataset/pituitary_gland.py b/traja/dataset/pituitary_gland.py deleted file mode 100644 index ed6c9e63..00000000 --- a/traja/dataset/pituitary_gland.py +++ /dev/null @@ -1,151 +0,0 @@ -import numpy as np -import pandas as pd -from numpy import exp -from numba import jit -from scipy.integrate import odeint -from pyDOE2 import lhs - - -# PyTest will not compute coverage correctly for @jit-compiled code. -# Thus we must explicitly suppress the coverage check. -@jit -def pituitary_ode(w, t, p): # pragma: no cover - """ - Defines the differential equations for the pituirary gland system. - To be used with scipy.integrate.odeint (this is the rhs equation). - - Arguments: - w : vector of the state variables: - w = [v, n, f, c] - t : time - p : vector of the parameters: - p = [gk, gcal, gsk, gbk, gl, k] - """ - vca = 60 - vk = -75 - vl = -50 - Cm = 10 - vn = -5 - vm = -20 - vf = -20 - sn = 10 - sm = 12 - sf = 2 - taun = 30 - taubk = 5 - ff = 0.01 - alpha = 0.0015 - ks = 0.4 - auto = 0 - cpar = 0 - noise = 4.0 - - v, n, f, c = w - - gk, gcal, gsk, gbk, gl, kc = p - - cd = (1 - auto) * c + auto * cpar - - phik = 1 / (1 + exp((vn - v) / sn)) - phif = 1 / (1 + exp((vf - v) / sf)) - phical = 1 / (1 + exp((vm - v) / sm)) - cinf = cd ** 2 / (cd ** 2 + ks ** 2) - - ica = gcal * phical * (v - vca) - isk = gsk * cinf * (v - vk) - ibk = gbk * f * (v - vk) - ikdr = gk * n * (v - vk) - ileak = gl * (v - vl) - - ikdrx = ikdr - ibkx = ibk - - ik = isk + ibk + ikdr - inoise = 0 # noise*w #TODO fix - - dv = -(ica + ik + inoise + ileak) / Cm - dn = (phik - n) / taun - df = (phif - f) / taubk - dc = -ff * (alpha * ica + kc * c) - return dv, dn, df, dc - - -def compute_pituitary_gland_df_from_parameters(downsample_rate, - gcal, gsk, gk, gbk, gl, kc, - sample_id, - trim_start=20000): - """ - Computes a Traja dataframe from the pituitary gland simulation. - - It is easier to discuss ion flow in term of conductances than resistances. - If V / R = I, where V is the voltage, R is the resistance and I is the - current, then V * C = I, where C = 1 / R is the conductance. - - Below we specify arguments in terms of maximum conductances, - i.e. the maximum rate at which ion channels let ions through - the cell walls. - - Arguments: - downsample_rate : How much the dataframe will be downsampled (relative - to the original simulation) - gcal : The maximum calcium conductance - gsk : The maximum s-potassiun conductance - gk : The maximum potassium conductance - gbk : The maximum b-potassium conductance - gl : The maximum leak conductance - kc : - sample_id : The ID of this particular sample. Must be unique - trim_start : How much of the start of the sample to trim. - The start of an activation (before converging to a limit cycle - or fixed point) is usually not interesting from a biological - perspective, so the default is to remove it. - """ - - # Initial conditions - v = -60. - n = 0.1 - f = 0.01 - c = 0.1 - - p = (gk, gcal, gsk, gbk, gl, kc) - w0 = (v, n, f, c) - abserr = 1.0e-8 - relerr = 1.0e-6 - - t = np.arange(0, 5000, 0.05) - # print("Generating gcal={}, gsk={}, gk={}, gbk={}, gl={}, kc={}".format(gcal, gsk, gk, gbk, gl, kc)) - wsol = odeint(pituitary_ode, w0, t, args=(p,), atol=abserr, rtol=relerr) - df = pd.DataFrame(wsol, columns=['v', 'n', 'f', 'c']) - df = df[trim_start:] - df['ID'] = sample_id - df['gcal'] = gcal - df['gsk'] = gsk - df['gk'] = gk - df['gbk'] = gbk - df['gl'] = gl - df['kc'] = kc - df = df.iloc[::downsample_rate, :] - # df = df.drop(columns=['t', 'ikdrx', 'ibkx']) - - return df - - -def create_latin_hypercube_sampled_pituitary_df(downsample_rate=100, samples=1000): - latin_hypercube_samples = lhs(6, criterion='center', samples=samples) - - # gcal, gsk, gk, gbk, gl, kc, - range_start = (0.5, 0.5, 0.8, 0., 0.05, 0.03) - range_end = (3.5, 3.5, 5.6, 4., 0.35, 0.21) - - parameters = latin_hypercube_samples * range_end - latin_hypercube_samples * range_start - - dataframes = [] - for sample_id, parameter in enumerate(parameters): - gcal, gsk, gk, gbk, gl, kc = parameter - df = compute_pituitary_gland_df_from_parameters(downsample_rate, - gcal, gsk, gk, gbk, gl, kc, - sample_id) - dataframes.append(df) - - num_samples = len(dataframes) - return pd.concat(dataframes), num_samples diff --git a/traja/frame.py b/traja/frame.py index 9c60c872..bf0b7126 100644 --- a/traja/frame.py +++ b/traja/frame.py @@ -1,6 +1,6 @@ import logging -from typing import Optional, Union, Tuple import warnings +from typing import Optional, Tuple, Union import numpy as np import pandas as pd @@ -54,13 +54,13 @@ def __init__(self, *args, **kwargs): args[0]._copy_attrs(self) for name, value in traja_kwargs.items(): self.__dict__[name] = value - - # Initialize + + # Initialize self._convex_hull = None # Initialize metadata like 'fps','spatial_units', etc. self._init_metadata() - + @property def _constructor(self): return TrajaDataFrame @@ -171,7 +171,7 @@ def center(self): x = self.x y = self.y return float(x.mean()), float(y.mean()) - + @property def convex_hull(self): """Property of TrajaDataFrame class representing @@ -179,7 +179,7 @@ def convex_hull(self): """ # Calculate if it doesn't exist - if self._convex_hull is None: + if self._convex_hull is None: xy_arr = self.traja.xy point_arr = traja.trajectory.calc_convex_hull(xy_arr) self._convex_hull = point_arr diff --git a/traja/models/__init__.py b/traja/models/__init__.py index 65c12776..3a8b9151 100644 --- a/traja/models/__init__.py +++ b/traja/models/__init__.py @@ -2,6 +2,7 @@ from traja.models.generative_models.vaegan import MultiModelVAEGAN from traja.models.predictive_models.ae import MultiModelAE from traja.models.predictive_models.lstm import LSTM + from .inference import * from .train import HybridTrainer -from .utils import TimeDistributed, read_hyperparameters, save, load +from .utils import TimeDistributed, load, read_hyperparameters, save diff --git a/traja/models/inference.py b/traja/models/inference.py index 2bab46f1..f72defef 100644 --- a/traja/models/inference.py +++ b/traja/models/inference.py @@ -134,6 +134,7 @@ def generate(self, num_steps, classify=True, scaler=None, plot_data=True): elif self.model_type == "vaegan" or "custom": return NotImplementedError + class Predictor: def __init__( self, diff --git a/traja/models/losses.py b/traja/models/losses.py index e0b51752..4dc025ed 100644 --- a/traja/models/losses.py +++ b/traja/models/losses.py @@ -2,6 +2,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu" + class Criterion: """Implements the loss functions of Autoencoders, Variational Autoencoders and LSTM models Huber loss is set as default for reconstruction loss, alternative is to use rmse, @@ -30,7 +31,7 @@ def forecasting_criterion( """ if mu is not None and logvar is not None: - kld = -0.5 * torch.sum(1 + logvar - mu ** 2 - logvar.exp()) + kld = -0.5 * torch.sum(1 + logvar - mu**2 - logvar.exp()) else: kld = 0 diff --git a/traja/models/optimizers.py b/traja/models/optimizers.py index b967583a..d7fbdc34 100644 --- a/traja/models/optimizers.py +++ b/traja/models/optimizers.py @@ -4,7 +4,6 @@ class Optimizer: def __init__(self, model_type, model, optimizer_type, classify=False): - """ Wrapper for setting the model optimizer and learning rate schedulers using ReduceLROnPlateau; If the model type is 'ae' or 'vae' - var optimizers is a dict with separate optimizers for encoder, decoder, @@ -94,7 +93,6 @@ def get_optimizers(self, lr=0.0001): return forecasting_optimizers, classification_optimizers, regression_optimizers def get_lrschedulers(self, factor: float, patience: int): - """Learning rate scheduler for each network in the model NOTE: Scheduler metric should be test set loss diff --git a/traja/models/predictive_models/lstm.py b/traja/models/predictive_models/lstm.py index 1df554e0..aae1cb0d 100644 --- a/traja/models/predictive_models/lstm.py +++ b/traja/models/predictive_models/lstm.py @@ -1,4 +1,5 @@ """Implementation of Multimodel LSTM""" + import torch from traja.models.utils import TimeDistributed @@ -14,7 +15,7 @@ class LSTM(torch.nn.Module): hidden_size: The number of features in the hidden state ``h`` output_size: The number of output dimensions batch_size: Size of batch. Default is 8 - sequence_length: The number of in each sample + sequence_length: The number of in each sample num_layers: Number of recurrent layers. E.g., setting ``num_layers=2`` would mean stacking two LSTMs together to form a `stacked LSTM`, with the second LSTM taking in outputs of the first LSTM and @@ -27,13 +28,13 @@ class LSTM(torch.nn.Module): """ def __init__( - self, - input_size: int, + self, + input_size: int, hidden_size: int, output_size: int, num_future: int = 8, batch_size: int = 8, - num_layers: int = 1, + num_layers: int = 1, reset_state: bool = True, bidirectional: bool = False, dropout: float = 0, @@ -47,7 +48,7 @@ def __init__( self.num_future = num_future self.hidden_size = hidden_size self.num_layers = num_layers - self.output_size = output_size + self.output_size = output_size self.dropout = dropout self.batch_first = batch_first self.reset_state = reset_state diff --git a/traja/models/train.py b/traja/models/train.py index 4c753d56..2e4da99a 100644 --- a/traja/models/train.py +++ b/traja/models/train.py @@ -131,7 +131,13 @@ def __str__(self): return f"Training model type {self.model_type}" def fit( - self, dataloaders, model_save_path=None, training_mode="forecasting", epochs=50, test_every=10, validate_every=None + self, + dataloaders, + model_save_path=None, + training_mode="forecasting", + epochs=50, + test_every=10, + validate_every=None, ): """ This method implements the batch- wise training and testing protocol for both time series forecasting and @@ -161,8 +167,8 @@ def fit( train_loader = dataloaders["train_loader"] test_loader = dataloaders["test_loader"] - if 'validation_loader' in dataloaders: - validation_loader = dataloaders['validation_loader'] + if "validation_loader" in dataloaders: + validation_loader = dataloaders["validation_loader"] else: validate_every = None @@ -273,7 +279,7 @@ def fit( correct = 0.0 self.model.eval() for idx, (data, target, ids, parameters, classes) in enumerate( - data_loader_to_evaluate + data_loader_to_evaluate ): if type(ids) == list: ids = ids[0] diff --git a/traja/models/utils.py b/traja/models/utils.py index 2680f2ab..cf32855b 100644 --- a/traja/models/utils.py +++ b/traja/models/utils.py @@ -5,7 +5,7 @@ class TimeDistributed(torch.nn.Module): - """ Time distributed wrapper compatible with linear/dense pytorch layer modules""" + """Time distributed wrapper compatible with linear/dense pytorch layer modules""" def __init__(self, module, batch_first=True): super(TimeDistributed, self).__init__() @@ -37,7 +37,7 @@ def forward(self, x): return out -def save(model, hyperparameters, path:str=""): +def save(model, hyperparameters, path: str = ""): """Save the trained model(.pth) along with its hyperparameters as a json (hyper.json) at the user defined Path Parameters: ----------- @@ -55,7 +55,7 @@ def save(model, hyperparameters, path:str=""): if path == "": path = os.path.join(os.getcwd(), "model.pt") torch.save(model.state_dict(), path) - + hyperdir, _ = os.path.split(path) if hyperparameters is not None: with open(os.path.join(hyperdir, "hypers.json"), "w") as fp: @@ -65,7 +65,7 @@ def save(model, hyperparameters, path:str=""): print(f"Model and hyperparameters saved at {os.path.abspath(hyperdir)}") -def load(model, path: str=""): +def load(model, path: str = ""): """Load trained model from path using the model_hyperparameters saved in the Parameters: ----------- diff --git a/traja/parsers.py b/traja/parsers.py index 4135464a..dd399cfc 100644 --- a/traja/parsers.py +++ b/traja/parsers.py @@ -2,7 +2,8 @@ import numpy as np import pandas as pd -from pandas.core.dtypes.common import is_datetime64_any_dtype, is_timedelta64_dtype +from pandas.core.dtypes.common import (is_datetime64_any_dtype, + is_timedelta64_dtype) from traja import TrajaDataFrame @@ -102,7 +103,7 @@ def read_file( converters = {**stripped_cols, **kwargs.pop("converters", {})} # Downcast to float32 # TODO: Benchmark float32 vs float64 for very big dataset - float_cols = df_test.select_dtypes(include=[np.float]).columns + float_cols = df_test.select_dtypes(include=[float]).columns float32_cols = {c: np.float32 for c in float_cols} # Convert string columns to sequence_ids diff --git a/traja/plotting.py b/traja/plotting.py index 391d75bc..f9feffbd 100644 --- a/traja/plotting.py +++ b/traja/plotting.py @@ -1,27 +1,23 @@ import logging +import os from collections import OrderedDict from datetime import timedelta -import os -from typing import Union, Optional, Tuple, List +from typing import List, Optional, Tuple, Union import matplotlib import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch - from matplotlib import dates as md from matplotlib.axes import Axes from matplotlib.collections import PathCollection from matplotlib.figure import Figure -from mpl_toolkits.mplot3d import Axes3D -from pandas.core.dtypes.common import ( - is_datetime_or_timedelta_dtype, - is_datetime64_any_dtype, - is_timedelta64_dtype, -) +from pandas.core.dtypes.common import (is_datetime64_any_dtype, + is_timedelta64_dtype) import traja +from traja.core import is_datetime_or_timedelta_dtype from traja.frame import TrajaDataFrame from traja.trajectory import coords_to_flow @@ -44,8 +40,8 @@ "plot_clustermap", "plot_flow", "plot_pca", - "plot_periodogram", - "plot_quiver", + "plot_periodogram", + "plot_quiver", "plot_stream", "plot_surface", "plot_transition_graph", @@ -497,7 +493,7 @@ def plot_periodogram(trj, coord: str = "y", fs: int = 1, interactive: bool = Tru trj = traja.generate() trj.traja.plot_periodogram() - .. note:: + .. note:: Convenience wrapper for :meth:`scipy.signal.periodogram`. @@ -536,7 +532,7 @@ def plot_autocorrelation( .. plot:: import traja - + df = traja.generate() df.traja.plot_autocorrelation() @@ -553,9 +549,15 @@ def plot_autocorrelation( return plt.gcf() -def plot_pca(trj: TrajaDataFrame, id_col: str="id", bins: tuple = (8,8), three_dims: bool = False, ax = None): +def plot_pca( + trj: TrajaDataFrame, + id_col: str = "id", + bins: tuple = (8, 8), + three_dims: bool = False, + ax=None, +): """Plot PCA comparing animals ids by trip grids. - + Args: trj - Trajectory id_col - column representing animal IDs @@ -565,20 +567,19 @@ def plot_pca(trj: TrajaDataFrame, id_col: str="id", bins: tuple = (8,8), three_d Returns: fig - Figure - + .. plot:: # Load sample jaguar dataset with trajectories for 9 animals df = traja.dataset.example.jaguar() # Bin trajectory into a trip grid then perform PCA - traja.plotting.plot_pca(df, id_col="ID", bins=(8,8)) + traja.plotting.plot_pca(df, id_col="ID", bins=(8,8)) """ from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler - DIMS = 3 if three_dims else 2 # Bin trajectories to trip grids @@ -586,9 +587,9 @@ def plot_pca(trj: TrajaDataFrame, id_col: str="id", bins: tuple = (8,8), three_d ids = trj[id_col].unique() for id in ids: - animal = trj[trj[id_col]==id].copy() - animal.drop(columns=[id_col],inplace=True) - grid = animal.traja.trip_grid(bins = bins, hist_only=True)[0] + animal = trj[trj[id_col] == id].copy() + animal.drop(columns=[id_col], inplace=True) + grid = animal.traja.trip_grid(bins=bins, hist_only=True)[0] grids.append(grid.flatten()) # Standardize the data @@ -602,24 +603,35 @@ def plot_pca(trj: TrajaDataFrame, id_col: str="id", bins: tuple = (8,8), three_d # Create plot axes if DIMS == 3: fig = plt.figure() - ax = fig.add_subplot(111, projection='3d') + ax = fig.add_subplot(111, projection="3d") if not ax: _, ax = plt.subplots() - + # Visualize 2D projection for idx, animal in enumerate(X_r): if DIMS == 2: - ax.scatter(X_r[idx, 0], X_r[idx, 1], color=f'C{idx}', alpha=.8, lw=2, label=idx) + ax.scatter( + X_r[idx, 0], X_r[idx, 1], color=f"C{idx}", alpha=0.8, lw=2, label=idx + ) elif DIMS == 3: - ax.scatter(X_r[idx, 0], X_r[idx, 1], ax.scatter[idx,2], color=f'C{idx}', alpha=.8, lw=2, label=idx) + ax.scatter( + X_r[idx, 0], + X_r[idx, 1], + ax.scatter[idx, 2], + color=f"C{idx}", + alpha=0.8, + lw=2, + label=idx, + ) plt.title("PCA") - plt.legend(title=id_col, loc='best', shadow=False, scatterpoints=1) + plt.legend(title=id_col, loc="best", shadow=False, scatterpoints=1) plt.xlabel("Principal Component 1") - plt.ylabel("Principal Component 2") + plt.ylabel("Principal Component 2") return plt.gcf() + def plot_collection( trjs: Union[pd.DataFrame, TrajaDataFrame], id_col: str = "id", @@ -772,7 +784,7 @@ def plot_contour( X, Y, U, V = coords_to_flow(trj, bins) Z = np.sqrt(U * U + V * V) - if not ax: + if not ax: _, ax = plt.subplots() if filled: @@ -815,10 +827,8 @@ def plot_surface( Z = np.sqrt(U * U + V * V) fig = plt.figure() - ax = fig.gca(projection="3d") - ax.plot_surface( - X, Y, Z, cmap= cmap, linewidth=0, **surfaceplot_kws - ) + ax = fig.add_subplot(projection="3d") + ax.plot_surface(X, Y, Z, cmap=cmap, linewidth=0, **surfaceplot_kws) ax = _label_axes(trj, ax) try: @@ -968,7 +978,7 @@ def trip_grid( x, y = zip(*df.values) hist, x_edges, y_edges = np.histogram2d( - x, y, bins, range=((xmin, xmax), (ymin, ymax)), normed=normalize + x, y, bins, range=((xmin, xmax), (ymin, ymax)), density=normalize ) # rotate to keep y as first dimension @@ -1307,9 +1317,9 @@ def plot_transition_graph( """ try: + import graphviz import networkx as nx import pydot - import graphviz except ImportError as e: raise ImportError(f"{e} - please install it with pip") @@ -1389,8 +1399,8 @@ def animate(trj: TrajaDataFrame, polar: bool = True, save: bool = False): save (bool): save video to ``trajectory.mp4`` Returns: - anim (matplotlib.animation.FuncAnimation): animation - + anim (matplotlib.animation.FuncAnimation): animation + """ from matplotlib import animation from matplotlib.animation import FuncAnimation @@ -1408,7 +1418,7 @@ def animate(trj: TrajaDataFrame, polar: bool = True, save: bool = False): fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot(211) - fig.add_subplot(ax1) + fig.add_subplot(ax1) if polar: ax2 = plt.subplot(212, polar="projection") ax2.set_theta_zero_location("N") @@ -1436,7 +1446,7 @@ def animate(trj: TrajaDataFrame, polar: bool = True, save: bool = False): ) def update(frame_number): - if frame_number < (XY_STEPS+2): + if frame_number < (XY_STEPS + 2): pass else: ind = frame_number % len(xy) @@ -1483,7 +1493,9 @@ def update(frame_number): bar.set_facecolor(plt.cm.viridis(h / max_height)) bar.set_alpha(0.8 * (idx / POLAR_STEPS)) ax2.set_theta_zero_location("N") - ax2.set_xticklabels(["0", "45", "90", "135", "180", "-135", "-90", "-45"]) + ax2.set_xticklabels( + ["0", "45", "90", "135", "180", "-135", "-90", "-45"] + ) anim = FuncAnimation(fig, update, interval=10, frames=len(xy)) if save: @@ -1493,5 +1505,5 @@ def update(frame_number): raise Exception("FFmpeg not installed, please install it.") else: plt.show() - + return anim diff --git a/traja/stats/brownian.py b/traja/stats/brownian.py index c198ad6d..fdb2a77a 100644 --- a/traja/stats/brownian.py +++ b/traja/stats/brownian.py @@ -1,5 +1,5 @@ -from scipy.stats import norm import numpy as np +from scipy.stats import norm class Brownian: @@ -22,8 +22,10 @@ class Brownian: dt: delta-time between every step. """ - def __init__(self, x0=0, mean_value=0, variance=1, dt=1., length=100000): - assert (type(x0) == float or type(x0) == int or x0 is None), "Expect a float or None for the initial value" + def __init__(self, x0=0, mean_value=0, variance=1, dt=1.0, length=100000): + assert ( + type(x0) == float or type(x0) == int or x0 is None + ), "Expect a float or None for the initial value" self._x0 = float(x0) @@ -42,7 +44,11 @@ def _generate_noise(self): x0 = np.asarray(self._x0) # Generate self._length samples of noise - r = norm.rvs(loc=self._mean_value, scale=self._variance * np.sqrt(self._dt), size=self._length) + r = norm.rvs( + loc=self._mean_value, + scale=self._variance * np.sqrt(self._dt), + size=self._length, + ) out = np.empty(r.shape) # This computes the Brownian motion by forming the cumulative sum of diff --git a/traja/tests/test_dataset.py b/traja/tests/test_dataset.py index 7b2daa00..c70c5b98 100644 --- a/traja/tests/test_dataset.py +++ b/traja/tests/test_dataset.py @@ -1,12 +1,15 @@ -import os +import sys + import pandas as pd import pytest -from traja.dataset import dataset -from traja.dataset.pituitary_gland import create_latin_hypercube_sampled_pituitary_df +from traja.dataset import dataset, load_ped_data -@pytest.mark.skipif(os.name == 'nt', reason="hangs on Windows for unknown reason") +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_time_based_sampling_dataloaders_do_not_overlap(): data = list() num_ids = 140 @@ -49,85 +52,27 @@ def test_time_based_sampling_dataloaders_do_not_overlap(): for data, target, ids, parameters, classes in dataloaders["train_loader"]: for sequence in data: - assert all(sample == -1.0 for sample in sequence[:,0]) + assert all(sample == -1.0 for sample in sequence[:, 0]) for sequence in target: - assert all(sample == -1.0 for sample in sequence[:,0]) + assert all(sample == -1.0 for sample in sequence[:, 0]) for data, target, ids, parameters, classes in dataloaders["test_loader"]: for sequence in data: - assert all(sample == 0 for sample in sequence[:,0]) + assert all(sample == 0 for sample in sequence[:, 0]) for sequence in target: - assert all(sample == 0 for sample in sequence[:,0]) + assert all(sample == 0 for sample in sequence[:, 0]) for data, target, ids, parameters, classes in dataloaders["validation_loader"]: for sequence in data: - assert all(sample == 1 for sample in sequence[:,0]) + assert all(sample == 1 for sample in sequence[:, 0]) for sequence in target: - assert all(sample == 1 for sample in sequence[:,0]) - - -def test_time_based_sampling_dataloaders_do_not_overlap(): - data = list() - num_ids = 140 - sequence_length = 2000 + assert all(sample == 1 for sample in sequence[:, 0]) - # Hyperparameters - batch_size = 15 - num_past = 10 - num_future = 5 - train_split_ratio = 0.498 - validation_split_ratio = 0.25 - - stride = 5 - - split_by_id = False # The test condition - - # The train[0] column should contain only 1s, the test column should contain 2s and the - # validation column set should contain 3s. - # When scaled, this translates to -1., 0 and 1. respectively. - for sample_id in range(num_ids): - for element in range(round(sequence_length * train_split_ratio) - 6): - data.append([1, element, sample_id]) - for element in range( - round(sequence_length * (1 - train_split_ratio - validation_split_ratio)) - + -4 - ): - data.append([2, element, sample_id]) - for element in range(round(sequence_length * validation_split_ratio) + 10): - data.append([3, element, sample_id]) - - df = pd.DataFrame(data, columns=["x", "y", "ID"]) - - dataloaders = dataset.MultiModalDataLoader( - df, - batch_size=batch_size, - n_past=num_past, - n_future=num_future, - num_workers=1, - train_split_ratio=train_split_ratio, - validation_split_ratio=validation_split_ratio, - split_by_id=split_by_id, - stride=stride, - ) - - for data, target, ids, parameters, classes in dataloaders["train_loader"]: - for sequence in data: - assert all(sample == -1. for sample in sequence[:,0]) - for sequence in target: - assert all(sample == -1. for sample in sequence[:,0]) - - for data, target, ids, parameters, classes in dataloaders["test_loader"]: - for sequence in data: - assert all(sample == 0 for sample in sequence[:,0]) - for sequence in target: - assert all(sample == 0 for sample in sequence[:,0]) - - for data, target, ids, parameters, classes in dataloaders["validation_loader"]: - for sequence in data: - assert all(sample == 1 for sample in sequence[:,0]) - for sequence in target: - assert all(sample == 1 for sample in sequence[:,0]) +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_time_based_sampling_dataloaders_with_stride_one_do_not_overlap(): data = list() num_ids = 2 @@ -174,23 +119,27 @@ def test_time_based_sampling_dataloaders_with_stride_one_do_not_overlap(): for data, target, ids, parameters, classes in dataloaders["train_loader"]: for sequence in data: - assert all(sample == -1. for sample in sequence[:,0]) + assert all(sample == -1.0 for sample in sequence[:, 0]) for sequence in target: - assert all(sample == -1. for sample in sequence[:,0]) + assert all(sample == -1.0 for sample in sequence[:, 0]) for data, target, ids, parameters, classes in dataloaders["test_loader"]: for sequence in data: - assert all(sample == 0 for sample in sequence[:,0]) + assert all(sample == 0 for sample in sequence[:, 0]) for sequence in target: - assert all(sample == 0 for sample in sequence[:,0]) + assert all(sample == 0 for sample in sequence[:, 0]) for data, target, ids, parameters, classes in dataloaders["validation_loader"]: for sequence in data: - assert all(sample == 1 for sample in sequence[:,0]) + assert all(sample == 1 for sample in sequence[:, 0]) for sequence in target: - assert all(sample == 1 for sample in sequence[:,0]) + assert all(sample == 1 for sample in sequence[:, 0]) +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_time_based_weighted_sampling_dataloaders_do_not_overlap(): data = list() num_ids = 232 @@ -247,6 +196,10 @@ def test_time_based_weighted_sampling_dataloaders_do_not_overlap(): ) +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_id_wise_sampling_with_few_ids_does_not_put_id_in_multiple_dataloaders(): data = list() num_ids = 5 @@ -282,6 +235,10 @@ def test_id_wise_sampling_with_few_ids_does_not_put_id_in_multiple_dataloaders() ) +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_id_wise_sampling_with_short_sequences_does_not_divide_by_zero(): data = list() num_ids = 283 @@ -323,6 +280,10 @@ def test_id_wise_sampling_with_short_sequences_does_not_divide_by_zero(): ) +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_id_wise_sampling_does_not_put_id_in_multiple_dataloaders(): data = list() num_ids = 150 @@ -358,6 +319,10 @@ def test_id_wise_sampling_does_not_put_id_in_multiple_dataloaders(): ) +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_id_wise_weighted_sampling_does_not_put_id_in_multiple_dataloaders(): data = list() num_ids = 150 @@ -596,6 +561,10 @@ def verify_that_indices_belong_to_precisely_one_loader( ), f"Index {index} is in both the test and validation loaders!" +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_sequential_data_loader_indices_are_sequential(): data = list() num_ids = 46 @@ -648,8 +617,11 @@ def test_sequential_data_loader_indices_are_sequential(): ), "IDs in sequential test loader should increase monotonically!" -def test_pituitary_gland_latin_hypercube_generator_gives_correct_number_of_samples(): - num_samples = 30 - _, num_samples_out = create_latin_hypercube_sampled_pituitary_df(samples=num_samples) - - assert num_samples == num_samples_out, "Hypercube sampler returned the wrong number of samples!" +# Load the pedestrian datasets +@pytest.mark.skipif( + sys.platform == "win32", + reason="GitHub actions images don't have wget installed.", +) +def test_ped_datasets(): + dfs = load_ped_data() + assert len(dfs) == 3 diff --git a/traja/tests/test_models.py b/traja/tests/test_models.py index 61a9f157..6544287f 100644 --- a/traja/tests/test_models.py +++ b/traja/tests/test_models.py @@ -1,15 +1,20 @@ +import sys + import numpy as np import pandas as pd +import pytest import traja from traja.dataset import dataset from traja.dataset.example import jaguar -from traja.models import LSTM -from traja.models import MultiModelAE -from traja.models import MultiModelVAE +from traja.models import LSTM, MultiModelAE, MultiModelVAE from traja.models.train import HybridTrainer +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_aevae_jaguar(): """ Test variational autoencoder forecasting with the Jaguar dataset @@ -59,7 +64,14 @@ def test_aevae_jaguar(): trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="huber") # Train the model - trainer.fit(data_loaders, model_save_path, epochs=1, training_mode="forecasting", validate_every=5, test_every=10) + trainer.fit( + data_loaders, + model_save_path, + epochs=1, + training_mode="forecasting", + validate_every=5, + test_every=10, + ) scaler = data_loaders["train_loader"].dataset.scaler @@ -80,6 +92,10 @@ def test_aevae_jaguar(): trainer.validate(data_loaders["validation_loader"]) +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_ae_jaguar(): """ Test autoencoder forecasting with the Jaguar dataset @@ -125,13 +141,38 @@ def test_ae_jaguar(): trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="huber") # Train the model - trainer.fit(data_loaders, model_save_path, epochs=1, training_mode="forecasting", validate_every=2, test_every=5) - trainer.fit(data_loaders, model_save_path, epochs=1, training_mode="forecasting", validate_every=None, test_every=5) - trainer.fit(data_loaders, model_save_path, epochs=1, training_mode="forecasting", validate_every=2, test_every=None) + trainer.fit( + data_loaders, + model_save_path, + epochs=1, + training_mode="forecasting", + validate_every=2, + test_every=5, + ) + trainer.fit( + data_loaders, + model_save_path, + epochs=1, + training_mode="forecasting", + validate_every=None, + test_every=5, + ) + trainer.fit( + data_loaders, + model_save_path, + epochs=1, + training_mode="forecasting", + validate_every=2, + test_every=None, + ) trainer.validate(data_loaders["sequential_validation_loader"]) +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_lstm_jaguar(): """ Testing method for lstm model used for forecasting. @@ -170,22 +211,33 @@ def test_lstm_jaguar(): ) # Model Trainer - trainer = HybridTrainer(model=model, - optimizer_type='Adam', - loss_type='huber') + trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="huber") - forecasting_loss_pre_training, _, _ = trainer.validate(data_loaders['train_loader']) - print(f'Loss pre training: {forecasting_loss_pre_training}') + forecasting_loss_pre_training, _, _ = trainer.validate(data_loaders["train_loader"]) + print(f"Loss pre training: {forecasting_loss_pre_training}") # Train the model - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="forecasting", validate_every=1, test_every=2) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="forecasting", + validate_every=1, + test_every=2, + ) - forecasting_loss_post_training, _, _ = trainer.validate(data_loaders['train_loader']) + forecasting_loss_post_training, _, _ = trainer.validate( + data_loaders["train_loader"] + ) - print(f'Loss post training: {forecasting_loss_post_training}') + print(f"Loss post training: {forecasting_loss_post_training}") assert forecasting_loss_post_training < forecasting_loss_pre_training +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_aevae_regression_network_converges(): """ Test Autoencoder and variational auto encoder models for training/testing/generative network and @@ -251,20 +303,38 @@ def test_aevae_regression_network_converges(): # Model types; "ae" or "vae" trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="mse") - _, regression_lost_pre_training, _ = trainer.validate(data_loaders['train_loader']) + _, regression_lost_pre_training, _ = trainer.validate(data_loaders["train_loader"]) - print(f'Loss pre training: {regression_lost_pre_training}') + print(f"Loss pre training: {regression_lost_pre_training}") # Train the model - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="forecasting", validate_every=1, test_every=2) - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="regression", validate_every=1, test_every=2) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="forecasting", + validate_every=1, + test_every=2, + ) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="regression", + validate_every=1, + test_every=2, + ) - _, regression_lost_post_training, _ = trainer.validate(data_loaders['train_loader']) + _, regression_lost_post_training, _ = trainer.validate(data_loaders["train_loader"]) - print(f'Loss post training: {regression_lost_post_training}') + print(f"Loss post training: {regression_lost_post_training}") assert regression_lost_post_training < regression_lost_pre_training +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_ae_regression_network_converges(): """ Test that Autoencoder and variational auto encoder models for regression networks converge @@ -288,58 +358,78 @@ def test_ae_regression_network_converges(): num_past = 10 num_future = 5 # Prepare the dataloader - data_loaders = dataset.MultiModalDataLoader(df, - batch_size=batch_size, - n_past=num_past, - n_future=num_future, - train_split_ratio=0.333, - validation_split_ratio=0.333, - num_workers=1, - parameter_columns=parameter_columns, - split_by_id=False, - stride=1) - - model_save_path = './model.pt' - - model = MultiModelAE(input_size=2, - output_size=2, - lstm_hidden_size=32, - num_lstm_layers=2, - num_regressor_parameters=len(parameter_columns), - latent_size=10, - dropout=0.1, - num_regressor_layers=4, - regressor_hidden_size=32, - batch_size=batch_size, - num_future=num_future, - num_past=num_past, - bidirectional=False, - batch_first=True, - reset_state=True) + data_loaders = dataset.MultiModalDataLoader( + df, + batch_size=batch_size, + n_past=num_past, + n_future=num_future, + train_split_ratio=0.333, + validation_split_ratio=0.333, + num_workers=1, + parameter_columns=parameter_columns, + split_by_id=False, + stride=1, + ) + + model_save_path = "./model.pt" + + model = MultiModelAE( + input_size=2, + output_size=2, + lstm_hidden_size=32, + num_lstm_layers=2, + num_regressor_parameters=len(parameter_columns), + latent_size=10, + dropout=0.1, + num_regressor_layers=4, + regressor_hidden_size=32, + batch_size=batch_size, + num_future=num_future, + num_past=num_past, + bidirectional=False, + batch_first=True, + reset_state=True, + ) # Test resetting the regressor, to make sure this function works model.reset_regressor(regressor_hidden_size=32, num_regressor_layers=4) # Model Trainer # Model types; "ae" or "vae" - trainer = HybridTrainer(model=model, - optimizer_type='Adam', - loss_type='mse') + trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="mse") - _, regression_lost_pre_training, _ = trainer.validate(data_loaders['train_loader']) + _, regression_lost_pre_training, _ = trainer.validate(data_loaders["train_loader"]) - print(f'Loss pre training: {regression_lost_pre_training}') + print(f"Loss pre training: {regression_lost_pre_training}") # Train the model - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode='forecasting', validate_every=1, test_every=2) - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode='regression', validate_every=1, test_every=2) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="forecasting", + validate_every=1, + test_every=2, + ) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="regression", + validate_every=1, + test_every=2, + ) - _, regression_lost_post_training, _ = trainer.validate(data_loaders['train_loader']) + _, regression_lost_post_training, _ = trainer.validate(data_loaders["train_loader"]) - print(f'Loss post training: {regression_lost_post_training}') + print(f"Loss post training: {regression_lost_post_training}") assert regression_lost_post_training < regression_lost_pre_training +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_vae_regression_network_converges(): """ Test that Autoencoder and variational auto encoder models for regression networks converge @@ -354,43 +444,47 @@ def test_vae_regression_network_converges(): parameter_two = 91.235 * sample_id data.append([sequence, sequence, sample_id, parameter_one, parameter_two]) # Sample data - df = pd.DataFrame(data, columns=['x', 'y', 'ID', 'parameter_one', 'parameter_two']) + df = pd.DataFrame(data, columns=["x", "y", "ID", "parameter_one", "parameter_two"]) - parameter_columns = ['parameter_one', 'parameter_two'] + parameter_columns = ["parameter_one", "parameter_two"] # Hyperparameters batch_size = 1 num_past = 10 num_future = 5 # Prepare the dataloader - data_loaders = dataset.MultiModalDataLoader(df, - batch_size=batch_size, - n_past=num_past, - n_future=num_future, - train_split_ratio=0.333, - validation_split_ratio=0.333, - num_workers=1, - parameter_columns=parameter_columns, - split_by_id=False, - stride=1) - - model_save_path = './model.pt' - - model = MultiModelVAE(input_size=2, - output_size=2, - lstm_hidden_size=32, - num_lstm_layers=2, - num_regressor_parameters=len(parameter_columns), - latent_size=10, - dropout=0.1, - num_regressor_layers=4, - regressor_hidden_size=32, - batch_size=batch_size, - num_future=num_future, - num_past=num_past, - bidirectional=False, - batch_first=True, - reset_state=True) + data_loaders = dataset.MultiModalDataLoader( + df, + batch_size=batch_size, + n_past=num_past, + n_future=num_future, + train_split_ratio=0.333, + validation_split_ratio=0.333, + num_workers=1, + parameter_columns=parameter_columns, + split_by_id=False, + stride=1, + ) + + model_save_path = "./model.pt" + + model = MultiModelVAE( + input_size=2, + output_size=2, + lstm_hidden_size=32, + num_lstm_layers=2, + num_regressor_parameters=len(parameter_columns), + latent_size=10, + dropout=0.1, + num_regressor_layers=4, + regressor_hidden_size=32, + batch_size=batch_size, + num_future=num_future, + num_past=num_past, + bidirectional=False, + batch_first=True, + reset_state=True, + ) # Test resetting the regressor, to make sure this function works model.reset_regressor(regressor_hidden_size=32, num_regressor_layers=4) @@ -399,20 +493,38 @@ def test_vae_regression_network_converges(): # Model types; "ae" or "vae" trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="mse") - _, regression_lost_pre_training, _ = trainer.validate(data_loaders['train_loader']) + _, regression_lost_pre_training, _ = trainer.validate(data_loaders["train_loader"]) - print(f'Loss pre training: {regression_lost_pre_training}') + print(f"Loss pre training: {regression_lost_pre_training}") # Train the model - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="forecasting", validate_every=1, test_every=2) - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="regression", validate_every=1, test_every=2) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="forecasting", + validate_every=1, + test_every=2, + ) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="regression", + validate_every=1, + test_every=2, + ) - _, regression_lost_post_training, _ = trainer.validate(data_loaders['train_loader']) + _, regression_lost_post_training, _ = trainer.validate(data_loaders["train_loader"]) - print(f'Loss post training: {regression_lost_post_training}') + print(f"Loss post training: {regression_lost_post_training}") assert regression_lost_post_training < regression_lost_pre_training +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_ae_classification_network_converges(): """ Test that Autoencoder and variational auto encoder models for classification networks converge @@ -428,7 +540,7 @@ def test_ae_classification_network_converges(): yy = sample_class * np.cos(sequence / 20.0) + (sample_class - 1) * sequence data.append([xx, yy, sample_id, sample_class]) # Sample data - df = pd.DataFrame(data, columns=['x', 'y', 'ID', 'class']) + df = pd.DataFrame(data, columns=["x", "y", "ID", "class"]) # Hyperparameters batch_size = 2 num_past = 10 @@ -473,20 +585,42 @@ def test_ae_classification_network_converges(): # Model types; "ae" or "vae" trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="mse") - _, _, classification_loss_pre_training = trainer.validate(data_loaders['train_loader']) + _, _, classification_loss_pre_training = trainer.validate( + data_loaders["train_loader"] + ) - print(f'Loss pre training: {classification_loss_pre_training}') + print(f"Loss pre training: {classification_loss_pre_training}") # Train the model - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode='forecasting', validate_every=1, test_every=2) - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode='classification', validate_every=1, test_every=2) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="forecasting", + validate_every=1, + test_every=2, + ) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="classification", + validate_every=1, + test_every=2, + ) - _, _, classification_loss_post_training = trainer.validate(data_loaders['train_loader']) + _, _, classification_loss_post_training = trainer.validate( + data_loaders["train_loader"] + ) - print(f'Loss post training: {classification_loss_post_training}') + print(f"Loss post training: {classification_loss_post_training}") assert classification_loss_post_training < classification_loss_pre_training +@pytest.mark.skipif( + sys.platform == "darwin" or sys.platform == "win32", + reason="hangs on Windows and Mac for unknown reason", +) def test_vae_classification_network_converges(): """ Test that Autoencoder and variational auto encoder models for classification networks converge @@ -502,59 +636,79 @@ def test_vae_classification_network_converges(): yy = sample_class * np.cos(sequence / 20.0) + (sample_class - 1) * sequence data.append([xx, yy, sample_id, sample_class]) # Sample data - df = pd.DataFrame(data, columns=['x', 'y', 'ID', 'class']) + df = pd.DataFrame(data, columns=["x", "y", "ID", "class"]) # Hyperparameters batch_size = 2 num_past = 10 num_future = 5 # Prepare the dataloader - data_loaders = dataset.MultiModalDataLoader(df, - batch_size=batch_size, - n_past=num_past, - n_future=num_future, - train_split_ratio=0.333, - validation_split_ratio=0.333, - num_workers=1, - split_by_id=False, - stride=1) - - model_save_path = './model.pt' - - model = MultiModelVAE(input_size=2, - output_size=2, - lstm_hidden_size=32, - num_lstm_layers=2, - num_classes=2, - latent_size=10, - dropout=0.1, - num_classifier_layers=4, - classifier_hidden_size=32, - batch_size=batch_size, - num_future=num_future, - num_past=num_past, - bidirectional=False, - batch_first=True, - reset_state=True) + data_loaders = dataset.MultiModalDataLoader( + df, + batch_size=batch_size, + n_past=num_past, + n_future=num_future, + train_split_ratio=0.333, + validation_split_ratio=0.333, + num_workers=1, + split_by_id=False, + stride=1, + ) + + model_save_path = "./model.pt" + + model = MultiModelVAE( + input_size=2, + output_size=2, + lstm_hidden_size=32, + num_lstm_layers=2, + num_classes=2, + latent_size=10, + dropout=0.1, + num_classifier_layers=4, + classifier_hidden_size=32, + batch_size=batch_size, + num_future=num_future, + num_past=num_past, + bidirectional=False, + batch_first=True, + reset_state=True, + ) # Test resetting the classifier, to make sure this function works model.reset_classifier(classifier_hidden_size=32, num_classifier_layers=4) # Model Trainer # Model types; "ae" or "vae" - trainer = HybridTrainer(model=model, - optimizer_type='Adam', - loss_type='mse') + trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="mse") - _, _, classification_loss_pre_training = trainer.validate(data_loaders['train_loader']) + _, _, classification_loss_pre_training = trainer.validate( + data_loaders["train_loader"] + ) - print(f'Loss pre training: {classification_loss_pre_training}') + print(f"Loss pre training: {classification_loss_pre_training}") # Train the model - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="forecasting", validate_every=1, test_every=2) - trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="classification", validate_every=1, test_every=2) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="forecasting", + validate_every=1, + test_every=2, + ) + trainer.fit( + data_loaders, + model_save_path, + epochs=2, + training_mode="classification", + validate_every=1, + test_every=2, + ) - _, _, classification_loss_post_training = trainer.validate(data_loaders['train_loader']) + _, _, classification_loss_post_training = trainer.validate( + data_loaders["train_loader"] + ) - print(f'Loss post training: {classification_loss_post_training}') + print(f"Loss post training: {classification_loss_post_training}") assert classification_loss_post_training < classification_loss_pre_training diff --git a/traja/tests/test_stats.py b/traja/tests/test_stats.py index 84d60141..f153c062 100644 --- a/traja/tests/test_stats.py +++ b/traja/tests/test_stats.py @@ -1,10 +1,12 @@ -from traja.stats.brownian import Brownian import numpy as np +from traja.stats.brownian import Brownian + + def test_brownian_walk_generates_correct_number_of_samples(): length = 1000000 brownian = Brownian(length=length) - assert(len(brownian) == length) + assert len(brownian) == length def test_brownian_motion_with_drift_approximately_sums_to_the_drift(): @@ -58,8 +60,12 @@ def test_brownians_with_different_time_steps_walk_approximately_equally(): drift1 = 0 drift2 = 0 - brownian1 = Brownian(length=length1, mean_value=mean_drift, variance=variance, dt=dt1) - brownian2 = Brownian(length=length2, mean_value=mean_drift, variance=variance, dt=dt2) + brownian1 = Brownian( + length=length1, mean_value=mean_drift, variance=variance, dt=dt1 + ) + brownian2 = Brownian( + length=length2, mean_value=mean_drift, variance=variance, dt=dt2 + ) for i in range(length1): drift1 = brownian1() diff --git a/traja/tests/test_trajadataframe.py b/traja/tests/test_trajadataframe.py index 41949a1c..1897a433 100644 --- a/traja/tests/test_trajadataframe.py +++ b/traja/tests/test_trajadataframe.py @@ -6,7 +6,7 @@ from pandas import DataFrame import traja -from traja import TrajaDataFrame, read_file, TrajaCollection +from traja import TrajaCollection, TrajaDataFrame, read_file class TestDataFrame: diff --git a/traja/tests/test_trajectory.py b/traja/tests/test_trajectory.py index 864889bd..b64800df 100644 --- a/traja/tests/test_trajectory.py +++ b/traja/tests/test_trajectory.py @@ -1,7 +1,7 @@ import numpy as np import numpy.testing as npt import pytest -from pandas.util.testing import assert_series_equal +from pandas.testing import assert_series_equal import traja diff --git a/traja/trajectory.py b/traja/trajectory.py index a28c2305..85b1a08c 100644 --- a/traja/trajectory.py +++ b/traja/trajectory.py @@ -1,20 +1,18 @@ import logging import math from collections import OrderedDict -from typing import Callable, Optional, Union, Tuple +from typing import Callable, Optional, Tuple, Union import numpy as np import pandas as pd -from pandas.core.dtypes.common import ( - is_datetime_or_timedelta_dtype, - is_datetime64_any_dtype, - is_timedelta64_dtype, -) +from pandas.core.dtypes.common import (is_datetime64_any_dtype, + is_timedelta64_dtype) from scipy import signal from scipy.spatial.distance import directed_hausdorff, euclidean import traja from traja import TrajaDataFrame +from traja.core import is_datetime_or_timedelta_dtype __all__ = [ "_bins_to_tuple", @@ -201,10 +199,10 @@ def expected_sq_displacement( sl = traja.step_lengths(trj) ta = traja.calc_angle(trj) l1 = np.mean(sl) - l2 = np.mean(sl ** 2) + l2 = np.mean(sl**2) c = np.mean(np.cos(ta)) s = np.mean(np.sin(ta)) - s2 = s ** 2 + s2 = s**2 if eqn1: # Eqn 1 @@ -214,9 +212,9 @@ def expected_sq_displacement( ) * np.sin((n + 1) * alpha) esd = ( n * l2 - + 2 * l1 ** 2 * ((c - c ** 2 - s2) * n - c) / ((1 - c) ** 2 + s2) + + 2 * l1**2 * ((c - c**2 - s2) * n - c) / ((1 - c) ** 2 + s2) + 2 - * l1 ** 2 + * l1**2 * ((2 * s2 + (c + s2) ** ((n + 1) / 2)) / ((1 - c) ** 2 + s2) ** 2) * gamma ) @@ -224,7 +222,7 @@ def expected_sq_displacement( else: logger.info("This method is experimental and requires testing.") # Eqn 2 - esd = n * l2 + 2 * l1 ** 2 * c / (1 - c) * (n - (1 - c ** n) / (1 - c)) + esd = n * l2 + 2 * l1**2 * c / (1 - c) * (n - (1 - c**n) / (1 - c)) return esd @@ -373,7 +371,7 @@ def transition_matrix(grid_indices1D: np.ndarray): M = [[0] * n for _ in range(n)] - for (i, j) in zip(grid_indices1D, grid_indices1D[1:]): + for i, j in zip(grid_indices1D, grid_indices1D[1:]): M[i][j] += 1 # Convert to probabilities @@ -458,7 +456,7 @@ def calc_flow_angles(grid_indices: np.ndarray): M = np.empty((bins[1], bins[0]), dtype=np.ndarray) - for (i, j) in zip(grid_indices, grid_indices[1:]): + for i, j in zip(grid_indices, grid_indices[1:]): # Account for fact that grid indices uses 1-base indexing ix = i[0] - 1 iy = i[1] - 1 @@ -668,7 +666,7 @@ def generate( if random: # Accumulate angular errors - coords = np.zeros(n, dtype=np.complex) + coords = np.zeros(n, dtype=complex) angle = 0 for i in range(n - 1): angle += angular_errors[i] @@ -930,7 +928,7 @@ def _rediscretize_points( V = (curr_result_y - prev_y) * cos_l - (curr_result_x - prev_x) * sin_l # Compute distance H between (X_{i+1}, Y_{i+1}) and (x_{k-1}, y_{k-1}) - H = U + np.sqrt(abs(R ** 2 - V ** 2)) + H = U + np.sqrt(abs(R**2 - V**2)) XIp1 = H * cos_l + prev_x YIp1 = H * sin_l + prev_y @@ -1073,7 +1071,7 @@ def calc_derivatives(trj: TrajaDataFrame): # get cumulative seconds if is_datetime64_any_dtype(trj[time_col]): displacement_time = ( - trj[time_col].astype(int).div(10 ** 9).diff().fillna(0).cumsum() + trj[time_col].astype(int).div(10**9).diff().fillna(0).cumsum() ) else: try: