Skip to content

Storage

Caches a amora.storage.Cacheable into the provider selected at setting.STORAGE_CACHE_PROVIDER.

To disable the cache, set the env var AMORA_STORAGE_CACHE_ENABLED to false.

from datetime import datetime
from time import sleep

import pandas as pd

from amora.storage import cache


@cache()
def a_slow_to_build_dataframe():
    sleep(3)
    return pd.DataFrame([{"a": 4, "b": 2}])


t0 = datetime.now()
print(a_slow_to_build_dataframe())
print("Uncached execution:", datetime.now() - t0)

t0 = datetime.now()
print(a_slow_to_build_dataframe())
print("Cached execution:", datetime.now() - t0)
   a  b
0  4  2
Uncached execution: 0:00:03.011570

   a  b
0  4  2
Cached execution: 0:00:00.005567

If the cached function expects arguments, a suffix function should be provided to the cache decorator. The function will be called with the same arguments a the cached function, and a str should be returned.

@cache(suffix=lambda arg1, arg2: f"{arg1}_{arg2}")
def cacheable_func(arg1, arg2):
    return pd.DataFrame([{"amora": arg1, "storage": arg2}])


cacheable_func(4, 2)
# CacheKey -> foo_module.cacheable_func.e6ccc38abffd7081822da108971e9d9c.4_2

cacheable_func(5, 3)
# CacheKey -> foo_module.cacheable_func.e6ccc38abffd7081822da108971e9d9c.5_3

The key function can also be used to implement a cache TTL:

from datetime import date


@cache(suffix=lambda: date.today().isoformat())
def im_cached_for_one_day():
    sleep(3)
    return pd.DataFrame([[1, 2, 3], [4, 5, 6]])
Source code in amora/storage.py
def cache(suffix: Union[Callable[..., str], None] = None):
    """
    Caches a `amora.storage.Cacheable` into the provider selected
    at `setting.STORAGE_CACHE_PROVIDER`.

    To disable the cache, set the env var `AMORA_STORAGE_CACHE_ENABLED` to false.

    ```python
    from datetime import datetime
    from time import sleep

    import pandas as pd

    from amora.storage import cache


    @cache()
    def a_slow_to_build_dataframe():
        sleep(3)
        return pd.DataFrame([{"a": 4, "b": 2}])


    t0 = datetime.now()
    print(a_slow_to_build_dataframe())
    print("Uncached execution:", datetime.now() - t0)

    t0 = datetime.now()
    print(a_slow_to_build_dataframe())
    print("Cached execution:", datetime.now() - t0)
    ```

    ```
       a  b
    0  4  2
    Uncached execution: 0:00:03.011570

       a  b
    0  4  2
    Cached execution: 0:00:00.005567
    ```

    If the cached function expects arguments, a `suffix` function should be provided
    to the `cache` decorator. The function will be called with the same arguments a the
    cached function, and a `str` should be returned.

    ```python
    @cache(suffix=lambda arg1, arg2: f"{arg1}_{arg2}")
    def cacheable_func(arg1, arg2):
        return pd.DataFrame([{"amora": arg1, "storage": arg2}])


    cacheable_func(4, 2)
    # CacheKey -> foo_module.cacheable_func.e6ccc38abffd7081822da108971e9d9c.4_2

    cacheable_func(5, 3)
    # CacheKey -> foo_module.cacheable_func.e6ccc38abffd7081822da108971e9d9c.5_3
    ```

    The key function can also be used to implement a cache TTL:

    ```python
    from datetime import date


    @cache(suffix=lambda: date.today().isoformat())
    def im_cached_for_one_day():
        sleep(3)
        return pd.DataFrame([[1, 2, 3], [4, 5, 6]])
    ```
    """

    def wrapper(fn: Cacheable):
        func_checksum = hashlib.md5(inspect.getsource(fn).encode("utf-8")).hexdigest()

        @wraps(fn)
        def decorator(*args, **kwargs):
            if not settings.STORAGE_CACHE_ENABLED:
                return fn(*args, **kwargs)

            cache_key = CacheKey(
                func_module=fn.__module__,
                func_name=fn.__name__,
                func_checksum=func_checksum,
                suffix=suffix(*args, **kwargs) if suffix else "",
            )

            try:
                return CACHE[cache_key]
            except KeyError:
                result = fn(*args, **kwargs)
                CACHE[cache_key] = result
                return result

        return decorator

    return wrapper

E.g:

CacheKey(
    func_module="amora.questions",
    func_name="answer_df",
    func_checksum="e6ccc38abffd7081822da108971e9d9c",
    suffix="how_many_data_points_where_acquired",
)

"amora.questions.answer_df.e6ccc38abffd7081822da108971e9d9c.how_many_data_points_where_acquired"
Source code in amora/storage.py
class CacheKey(NamedTuple):
    """
    E.g:

    ```python
    CacheKey(
        func_module="amora.questions",
        func_name="answer_df",
        func_checksum="e6ccc38abffd7081822da108971e9d9c",
        suffix="how_many_data_points_where_acquired",
    )

    "amora.questions.answer_df.e6ccc38abffd7081822da108971e9d9c.how_many_data_points_where_acquired"
    ```

    """

    func_module: str
    func_name: str
    func_checksum: str
    suffix: str

    def __repr__(self):
        return str(self)

    def __str__(self):
        return ".".join(value for value in self if value)

__getnewargs__(self) special

Return self as a plain tuple. Used by copy and pickle.

Source code in amora/storage.py
def __getnewargs__(self):
    'Return self as a plain tuple.  Used by copy and pickle.'
    return _tuple(self)

__new__(_cls, func_module, func_name, func_checksum, suffix) special staticmethod

Create new instance of CacheKey(func_module, func_name, func_checksum, suffix)


Last update: 2023-11-23