Skip to content

Commit

Permalink
Redefine 'active' log files for store
Browse files Browse the repository at this point in the history
For uploading, or zipping, we identify log files we think are not
active. This adds a time component, so we don't orphan log files if
events stop logging to them.
  • Loading branch information
rhettg committed Jun 1, 2015
1 parent 3f8f25f commit 2062ab2
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 26 deletions.
53 changes: 43 additions & 10 deletions blueox/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,27 +225,60 @@ def list_log_files(log_path):
return log_files


def filter_log_files_for_active(log_files):
"""Filter our list of log files to remove those we expect might be active."""
out_log_files = []

files_by_type = collections.defaultdict(list)
for lf in log_files:
files_by_type[lf.type_name].append(lf)

for type_files in files_by_type.values():
type_files.sort(key=lambda f: f.sort_dt)

# We assume only the last log file in the list can be possibly be in
# use.
last_lf = type_files.pop()

out_log_files += type_files

# If that last log file is old, then it's probably not being used either.
# We add a buffer of an hour just to make sure everything has rotated
# away safely when this is run close to midnight.
cutoff_date = (datetime.datetime.utcnow() - datetime.timedelta(hours=1)).date()
if last_lf.date < cutoff_date:
out_log_files.append(last_lf)

return out_log_files


def filter_log_files_for_zipping(log_files):
"""Identify unzipped log files that are approporate for zipping.
Each unique log type found should have the most recent log file unzipped
as it's probably still in use.
"""
files_by_type = collections.defaultdict(list)
for f in log_files:
if f.bzip:
out_files = []
for lf in filter_log_files_for_active(log_files):
if lf.bzip:
continue

files_by_type[f.type_name].append(f)
out_files.append(lf)

out_files = []
return out_files

for type_files in files_by_type.values():
type_files.sort(key=lambda f: f.sort_dt)

# We should always leave one unzipped file for each type (the likely
# active one)
out_files += type_files[:-1]
def filter_log_files_for_uploading(log_files, zipped_only):
"""Filter out log files that we shouldn't upload
specify zipped_only if we should only bother to upload zipped log files
"""
out_files = []
for lf in filter_log_files_for_active(log_files):
if zipped_only and not lf.bzip:
continue

out_files.append(lf)

return out_files

Expand Down
51 changes: 35 additions & 16 deletions tests/store_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,46 +130,65 @@ def test_simple(self):
assert_equal(files[0].type_name, "foo")


class FilterUnzippedTest(TestCase):
def test_no_zipped(self):
files = [store.LogFile('foo', date=datetime.date.today(), bzip=True)]
out_files = store.filter_log_files_for_zipping(files)
assert_equal(len(out_files), 0)

class FilterActiveTest(TestCase):
def test_leave_active(self):
files = [store.LogFile('foo', date=datetime.date.today())]
out_files = store.filter_log_files_for_zipping(files)
out_files = store.filter_log_files_for_active(files)
assert_equal(len(out_files), 0)

def test_zippable(self):
def test_only_yesterday(self):
files = [
store.LogFile('foo', date=datetime.date.today()),
store.LogFile('bar', date=datetime.date.today()),
store.LogFile('bar', date=datetime.date.today() - datetime.timedelta(days=1))
]
out_files = store.filter_log_files_for_zipping(files)
out_files = store.filter_log_files_for_active(files)
assert_equal(len(out_files), 1)
assert_equal(out_files[0], files[-1])

def test_zippable_hourly(self):
def test_hourly(self):
now_hourly = datetime.datetime.utcnow().replace(minute=0, second=0)

files = [
store.LogFile('bar', dt=datetime.datetime(2015, 5, 21, 19)),
store.LogFile('bar', dt=datetime.datetime(2015, 5, 21, 20))
store.LogFile('bar', dt=now_hourly - datetime.timedelta(hours=1)),
store.LogFile('bar', dt=now_hourly)
]
out_files = store.filter_log_files_for_zipping(files)
out_files = store.filter_log_files_for_active(files)
assert_equal(len(out_files), 1)
assert_equal(out_files[0], files[0])

def test_hourly_and_daily(self):
files = [
store.LogFile('bar', date=datetime.date(2015, 5, 20)),
store.LogFile('bar', dt=datetime.datetime(2015, 5, 21, 19)),
store.LogFile('bar', dt=datetime.datetime(2015, 5, 21, 20))
store.LogFile('bar', dt=datetime.datetime(2015, 5, 21, 20)),
store.LogFile('bar', dt=datetime.datetime.utcnow().replace(minute=0, second=0))
]
out_files = store.filter_log_files_for_zipping(files)
out_files = store.filter_log_files_for_active(files)
assert_equal(len(out_files), 2)


class FilterUnzippedTest(TestCase):
def test_no_zipped(self):
date = (datetime.datetime.utcnow() - datetime.timedelta(days=2)).date()
files = [store.LogFile('foo', date=date, bzip=True)]
out_files = store.filter_log_files_for_zipping(files)
assert_equal(len(out_files), 0)


class FilterUploadTest(TestCase):
def test_only_zipped(self):
date = (datetime.datetime.utcnow() - datetime.timedelta(days=2)).date()
files = [store.LogFile('foo', date=date, bzip=False)]
out_files = store.filter_log_files_for_uploading(files, True)
assert_equal(len(out_files), 0)

def test_any(self):
date = (datetime.datetime.utcnow() - datetime.timedelta(days=2)).date()
files = [store.LogFile('foo', date=date, bzip=False)]
out_files = store.filter_log_files_for_uploading(files, False)
assert_equal(len(out_files), 1)


class ZipLogFileTest(TestCase):
@setup
def build_log_directory(self):
Expand Down

0 comments on commit 2062ab2

Please sign in to comment.