Skip to content

Commit

Permalink
Compress logs as .tar.xz files.
Browse files Browse the repository at this point in the history
Logs are now stored in:

1. logs/YYYYMMDD_*.gz (while still active)
2. oldlogs/YYYYMMDD_*.xz (when the session expires)
3. logarchive/YYYYMM.tar.xz (created after each month)

Experiments on old logs show that level 2 is 3x smaller than level 1, and
level 3 is 13x smaller than level 1.

logutil.py doesn't support reading level 3 logs yet.

For #123.
  • Loading branch information
TomiBelan committed Dec 22, 2018
1 parent 18b575b commit 780b21c
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 24 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ docs/_build
/logs
/logdb
/oldlogs
/logarchive
*_settings.py
!default_settings.py
*.log
94 changes: 72 additions & 22 deletions votrfront/cron.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,61 @@

import subprocess
import datetime
import gzip
import json
import lzma
import os
import shutil
import subprocess
import tarfile
import time
from . import logutil


def create_archive(app, prefix):
wip = app.var_path('logarchive', 'wip.tar.xz')
sources = [filename for filename in os.listdir(app.var_path('oldlogs'))
if filename.startswith(prefix) and filename.endswith('.xz')]
sources.sort()
dest = app.var_path('logarchive', prefix + '.tar.xz')

if os.path.exists(wip):
os.unlink(wip)

if not os.path.exists(dest):
with tarfile.open(wip, 'w:xz', preset=9) as tar:
for source in sources:
with lzma.open(app.var_path('oldlogs', source)) as f:
last = None
for line in f: last = line
if not last.endswith(b']\n'): raise Exception(source)
mtime = json.loads(last.decode('utf8'))[0]
size = f.tell()
f.seek(0)

tarinfo = tarfile.TarInfo('logs/' + source[:-3])
tarinfo.size = size
tarinfo.mtime = mtime
tar.addfile(tarinfo, fileobj=f)

os.rename(wip, dest)

with tarfile.open(dest, 'r') as tar:
for entry in tar:
sessid = entry.name.rpartition('/')[2]
path = app.var_path('oldlogs', sessid + '.xz')
if os.path.exists(path):
os.unlink(path)

remains = [filename for filename in os.listdir(app.var_path('oldlogs'))
if filename.startswith(prefix) and filename.endswith('.xz')]
if remains:
raise Exception('Remaining files: %r' % remains)


def cron(app):
logutil.process_logfiles(
app, [app.var_path('logs', sessid)
for sessid in os.listdir(app.var_path('logs'))])
app, [app.var_path('logs', filename)
for filename in os.listdir(app.var_path('logs'))])

now = time.time()

Expand All @@ -18,29 +65,32 @@ def cron(app):
if now - mtime > app.settings.session_max_age:
os.unlink(path)

return # TODO: implement new logs structure.

for sessid in os.listdir(app.var_path('logs')):
if os.path.exists(app.var_path('sessions', sessid)):
continue

path = app.var_path('logs', sessid)
for filename in os.listdir(app.var_path('logs')):
if not filename.endswith('.gz'): continue
path = app.var_path('logs', filename)
sessid = filename.partition('.')[0]
mtime = os.path.getmtime(path)
if now - mtime > app.settings.log_max_age:
newpath = app.var_path('oldlogs', sessid[0:2], sessid + '.gz')
if not (now - mtime > app.settings.session_max_age): continue
if os.path.exists(app.var_path('sessions', sessid)): continue

try:
os.mkdir(os.path.dirname(newpath))
except FileExistsError:
pass
newpath = app.var_path('oldlogs', sessid + '.xz')

with open(newpath, 'wb') as output:
# this is slower than using python's gzip library, but
# correctly preserves the original name, mtime etc.
subprocess.check_call(['gzip', '-c', path], stdout=output)
os.utime(newpath, (mtime, mtime))
with gzip.open(path) as src:
with lzma.open(newpath, 'w', preset=9) as dest:
shutil.copyfileobj(src, dest)
os.unlink(path)

os.unlink(path)
this_month = datetime.datetime.utcfromtimestamp(now).strftime('%Y%m')
prefixes = set(
filename[0:6] for filename in os.listdir(app.var_path('oldlogs'))
if filename.endswith('.xz') and not filename.startswith(this_month))

for filename in os.listdir(app.var_path('logs')):
prefixes.discard(filename[0:6])
for filename in os.listdir(app.var_path('sessions')):
prefixes.discard(filename[0:6])
for prefix in prefixes:
create_archive(app, prefix)

cron.help = ' $0 cron'

Expand Down
8 changes: 6 additions & 2 deletions votrfront/logutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import gzip
import io
import json
import lzma
import os
import re
import sqlite3
Expand Down Expand Up @@ -66,9 +67,10 @@ def _connect(app):

def locate(app, sessid):
options = [
app.var_path('logs', sessid),
app.var_path('logs', sessid), # legacy
app.var_path('logs', sessid + '.gz'),
app.var_path('oldlogs', sessid[0:2], sessid + '.gz'),
app.var_path('oldlogs', sessid[0:2], sessid + '.gz'), # legacy
app.var_path('oldlogs', sessid + '.xz'),
sessid,
]
for option in options:
Expand All @@ -80,6 +82,8 @@ def locate(app, sessid):
def open_log(filename):
if filename.endswith('.gz'):
return gzip.open(filename, 'rt', encoding='utf8')
elif filename.endswith('.xz'):
return lzma.open(filename, 'rt', encoding='utf8')
else:
return open(filename, encoding='utf8')

Expand Down
1 change: 1 addition & 0 deletions votrfront/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def serve(app, *args):
os.makedirs(app.var_path('logs'), exist_ok=True)
os.makedirs(app.var_path('logdb'), exist_ok=True)
os.makedirs(app.var_path('oldlogs'), exist_ok=True)
os.makedirs(app.var_path('logarchive'), exist_ok=True)
os.makedirs(app.var_path('sessions'), exist_ok=True)

app.wrap_static()
Expand Down

0 comments on commit 780b21c

Please sign in to comment.