diff --git a/aw_research/classify.py b/aw_research/classify.py index 5fa5672..fb52c45 100644 --- a/aw_research/classify.py +++ b/aw_research/classify.py @@ -1,29 +1,32 @@ -import typing -import logging -from typing import List, Dict, Optional, Tuple, Set - import argparse -import re import json -from urllib.parse import urlparse +import logging +import re +import typing from collections import Counter from datetime import datetime, timedelta, timezone from functools import wraps +from typing import ( + Dict, + List, + Optional, + Set, + Tuple, +) +from urllib.parse import urlparse -import toml -import pytz -import pydash +import joblib import matplotlib.pyplot as plt import pandas as pd -import joblib - -from aw_core.models import Event -from aw_transform import flood, filter_period_intersect, union_no_overlap +import pydash +import pytz +import toml from aw_client import ActivityWatchClient +from aw_core.models import Event +from aw_transform import filter_period_intersect, flood, union_no_overlap from .plot_sunburst import sunburst - logger = logging.getLogger(__name__) memory = joblib.Memory("./.cache/joblib") @@ -358,7 +361,8 @@ def _get_events_toggl(since: datetime, filepath: str) -> List[Event]: def _get_events_smartertime(since: datetime, filepath: str = "auto") -> List[Event]: - # TODO: Use aw_research.importers.smartertime to generate json file if filepath is smartertime export (.csv) + # TODO: Use quantifiedme.load.smartertime to generate json file if filepath is smartertime export (.csv) + # NOTE: deprecated, use methods in quantifiedme instead if filepath == "auto": from glob import glob diff --git a/aw_research/importers/smartertime.py b/aw_research/importers/smartertime.py deleted file mode 100644 index ed5755f..0000000 --- a/aw_research/importers/smartertime.py +++ /dev/null @@ -1,115 +0,0 @@ -# Code originally from now deprecated repo: https://github.com/ActivityWatch/aw-importer-smartertime - -import csv -from datetime import datetime, timedelta, timezone -import secrets -import json - -from tabulate import tabulate - -from aw_core.models import Event -import aw_client - - -def parse(filepath): - events = [] - with open(filepath, "r") as f: - c = csv.DictReader(f) - for r in c: - # print(r) - dt = datetime.fromtimestamp(float(r["Timestamp UTC ms"]) / 1000) - tz_h, tz_m = map(int, r["Time"].split("GMT+")[1].split()[0].split(":")) - dt = dt.replace(tzinfo=timezone(timedelta(hours=tz_h, minutes=tz_m))) - td = timedelta(milliseconds=float(r["Duration ms"])) - e = Event( - timestamp=dt, - duration=td, - data={ - "activity": r["Activity"], - "device": r["Device"], - "place": r["Place"], - "room": r["Room"], - }, - ) - events.append(e) - return events - - -def import_as_bucket(filepath): - events = parse(filepath) - end = max(e.timestamp + e.duration for e in events) - bucket = { - "id": f"smartertime_export_{end.date()}_{secrets.token_hex(4)}", - "created": datetime.now(), - "event_type": "smartertime.v0", - "client": "", - "hostname": "", - "data": {"readonly": True}, - "events": events, - } - return bucket - - -def print_info(bucket): - events = bucket["events"] - rows = [] - for a in [ - "Messenger", - "Plex", - "YouTube", - "Firefox", - "reddit", - "call:", - "Anki", - "Duolingo", - "HelloChinese", - "Notes", - "Gmail", - "Sheets", - "Docs", - "Spotify", - ]: - rows.append( - [ - a, - sum( - (e.duration for e in events if a in e.data["activity"]), - timedelta(0), - ), - ] - ) - rows = sorted(rows, key=lambda r: -r[1]) - print(tabulate(rows, ["title", "time"])) - - -def default(o): - if hasattr(o, "isoformat"): - return o.isoformat() - elif hasattr(o, "total_seconds"): - return o.total_seconds() - else: - raise NotImplementedError - - -def save_bucket(bucket): - filename = bucket["id"] + ".awbucket.json" - with open(filename, "w") as f: - json.dump(bucket, f, indent=True, default=default) - print(f"Saved as {filename}") - - -def import_to_awserver(bucket): - awc = aw_client.ActivityWatchClient("smartertime2activitywatch", testing=True) - buckets = json.loads(json.dumps({"buckets": [bucket]}, default=default)) - awc._post("import", buckets) - - -if __name__ == "__main__": - import sys - - assert len(sys.argv) > 1 - filename = sys.argv.pop() - bucket = import_as_bucket(filename) - save_bucket(bucket) - # import_to_awserver(bucket) - print_info(bucket) diff --git a/aw_research/util.py b/aw_research/util.py index 1d5d376..0666e0b 100644 --- a/aw_research/util.py +++ b/aw_research/util.py @@ -1,8 +1,12 @@ +from datetime import ( + datetime, + time, + timedelta, + timezone, +) from typing import List, Tuple -from datetime import datetime, time, timedelta, timezone import pandas as pd - from aw_core import Event @@ -119,7 +123,8 @@ def test_split_into_days() -> None: assert len(split) == 4 -def verify_no_overlap(events: List[Event]): +def verify_no_overlap(events: List[Event]) -> None: + events = sorted(events, key=lambda e: e.timestamp) try: assert all( [ @@ -127,19 +132,90 @@ def verify_no_overlap(events: List[Event]): for e1, e2 in zip(events[:-1], events[1:]) ] ) - except AssertionError as e: - n_overlaps = 0 - total_overlap = timedelta() - for e1, e2 in zip(events[:-1], events[1:]): - if e1.timestamp + e1.duration > e2.timestamp: - overlap = (e1.timestamp + e1.duration) - e2.timestamp - n_overlaps += 1 - total_overlap += overlap + except AssertionError: + n_overlaps, total_overlap = compute_total_overlap(events) print( f"[WARNING] Found {n_overlaps} events overlapping, totalling: {total_overlap}" ) +def compute_total_overlap(events: List[Event]) -> Tuple[int, timedelta]: + events = sorted(events, key=lambda e: e.timestamp) + n_overlaps = 0 + total_overlap = timedelta() + i, j = 0, 1 + assert len(events) > 1 + while j < len(events): + e1, e2 = events[i], events[j] + if e1.timestamp + e1.duration > e2.timestamp: + n_overlaps += 1 + overlap_start = max(e1.timestamp, e2.timestamp) + overlap_end = min(e1.timestamp + e1.duration, e2.timestamp + e2.duration) + total_overlap += overlap_end - overlap_start + j += 1 + print("j+") + else: + if j - i > 1: + # if j isn't directly ahead of i, we can skip ahead + i += 1 + print("i+") + else: + # if j is directly ahead of i, we can step both forward + i += 1 + j += 1 + print("i=j+") + return n_overlaps, total_overlap + + +def test_compute_total_overlap() -> None: + # Simple test + events = [ + Event( + timestamp=datetime(2019, 1, 1, 12, tzinfo=timezone.utc), + duration=timedelta(hours=1), + ), + Event( + timestamp=datetime(2019, 1, 1, 12, 30, tzinfo=timezone.utc), + duration=timedelta(hours=1), + ), + ] + assert compute_total_overlap(events) == (1, timedelta(minutes=30)) + + # Test with multiple overlaps in sequence after long event + events = [ + Event( + timestamp=datetime(2019, 1, 1, 12, tzinfo=timezone.utc), + duration=timedelta(hours=2), + ), + Event( + timestamp=datetime(2019, 1, 1, 12, 30, tzinfo=timezone.utc), + duration=timedelta(hours=1), + ), + Event( + timestamp=datetime(2019, 1, 1, 13, 30, tzinfo=timezone.utc), + duration=timedelta(hours=1), + ), + ] + assert compute_total_overlap(events) == (2, timedelta(minutes=90)) + + # Test with multiple overlaps in sequence after long event, with inter-overlap overlap + events = [ + Event( + timestamp=datetime(2019, 1, 1, 12, tzinfo=timezone.utc), + duration=timedelta(hours=2), + ), + Event( + timestamp=datetime(2019, 1, 1, 12, 30, tzinfo=timezone.utc), + duration=timedelta(hours=1), + ), + Event( + timestamp=datetime(2019, 1, 1, 13, 15, tzinfo=timezone.utc), + duration=timedelta(minutes=15), + ), + ] + assert compute_total_overlap(events) == (2, timedelta(minutes=75)) + + # TODO: Write test that ensures timezone localization is handled correctly def categorytime_per_day(events, category): events = [e for e in events if category in e.data["$category_hierarchy"]]