diff --git a/lib/cache.py b/lib/cache.py index 8f923b4..b7c5179 100644 --- a/lib/cache.py +++ b/lib/cache.py @@ -5,12 +5,21 @@ LRU-Cache implementation for formatted (`format=`) answers import datetime import re import time -import pylru +import os +import hashlib + import pytz +import pylru + +from globals import LRU_CACHE CACHE_SIZE = 10000 CACHE = pylru.lrucache(CACHE_SIZE) +# strings longer than this are stored not in ram +# but in the file cache +MIN_SIZE_FOR_FILECACHE = 80 + def _update_answer(answer): def _now_in_tz(timezone): return datetime.datetime.now(pytz.timezone(timezone)).strftime("%H:%M:%S%z") @@ -26,9 +35,10 @@ def get_signature(user_agent, query_string, client_ip_address, lang): `lang`, and `client_ip_address` """ - timestamp = int(time.time()) / 1000 + timestamp = int(time.time() / 1000) signature = "%s:%s:%s:%s:%s" % \ (user_agent, query_string, client_ip_address, lang, timestamp) + print(signature) return signature def get(signature): @@ -38,13 +48,54 @@ def get(signature): the `_update_answer` function. """ - if signature in CACHE: - return _update_answer(CACHE[signature]) + value = CACHE.get(signature) + if value: + if value.startswith("file:"): + sighash = value[5:] + value = _read_from_file(signature, sighash=sighash) + if not value: + return None + return _update_answer(value) return None def store(signature, value): """ Store in cache `value` for `signature` """ - CACHE[signature] = value + if len(value) < MIN_SIZE_FOR_FILECACHE: + CACHE[signature] = value + else: + sighash = _store_in_file(signature, value) + CACHE[signature] = "file:%s" % sighash return _update_answer(value) + +def _hash(signature): + return hashlib.md5(signature.encode("utf-8")).hexdigest() + +def _store_in_file(signature, value): + """Store `value` for `signature` in cache file. + Return file name (signature_hash) as the result. + """ + + signature_hash = _hash(signature) + filename = os.path.join(LRU_CACHE, signature_hash) + if not os.path.exists(LRU_CACHE): + os.makedirs(LRU_CACHE) + with open(filename, "w") as f_cache: + f_cache.write(value) + return signature_hash + +def _read_from_file(signature, sighash=None): + """Read value for `signature` from cache file, + or return None if file is not found. + If `sighash` is specified, do not calculate file name + from signature, but use `sighash` instead. + """ + + signature_hash = sighash or _hash(signature) + filename = os.path.join(LRU_CACHE, signature_hash) + if not os.path.exists(filename): + return None + + with open(filename, "r") as f_cache: + return f_cache.read() diff --git a/lib/globals.py b/lib/globals.py index b24d791..0664d4a 100644 --- a/lib/globals.py +++ b/lib/globals.py @@ -14,6 +14,7 @@ from __future__ import print_function import logging import os +import re MYDIR = os.path.abspath(os.path.dirname(os.path.dirname('__file__'))) @@ -31,6 +32,7 @@ _LOGDIR = "/wttr.in/log" CACHEDIR = os.path.join(_DATADIR, "cache/wego/") IP2LCACHE = os.path.join(_DATADIR, "cache/ip2l/") PNG_CACHE = os.path.join(_DATADIR, "cache/png") +LRU_CACHE = os.path.join(_DATADIR, "cache/lru") LOG_FILE = os.path.join(_LOGDIR, 'main.log')