diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..1bae4be --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,120 @@ +# This file is autogenerated by maturin v1.4.0 +# To update, run +# +# maturin generate-ci github +# +name: CI + +on: + push: + branches: + - main + - master + tags: + - '*' + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + linux: + runs-on: ubuntu-latest + strategy: + matrix: + target: [x86_64, x86, aarch64, armv7, s390x, ppc64le] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + windows: + runs-on: windows-latest + strategy: + matrix: + target: [x64, x86] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + architecture: ${{ matrix.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + macos: + runs-on: macos-latest + strategy: + matrix: + target: [x86_64, aarch64] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist + - name: Upload sdist + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + release: + name: Release + runs-on: ubuntu-latest + if: "startsWith(github.ref, 'refs/tags/')" + needs: [linux, windows, macos, sdist] + steps: + - uses: actions/download-artifact@v3 + with: + name: wheels + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + with: + command: upload + args: --non-interactive --skip-existing * diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c8f0442 --- /dev/null +++ b/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..c9bd835 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "priority-expiry-cache" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "priority_expiry_cache" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = "0.19.0" +lru = "0.12.1" diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..822b14a --- /dev/null +++ b/Readme.md @@ -0,0 +1,106 @@ +# Priority-expiry-cache [![codecov](https://codecov.io/gh/LucaPaterlini/python-wrapper-priority-expiry-cache/graph/badge.svg?token=GIJFVWT7NC)](https://codecov.io/gh/LucaPaterlini/priority-expiry-cache) + +## Wrapper of the rust [cargo](https://crates.io/crates/priority-expiry-cache) + +## Intro + +This problem is one of the famous questions asked in companies interviews, +the like of tesla, amazon, bp, etc. + +[Tesla Phone Screen Cache Problem](https://medium.com/double-pointer/tesla-google-facebook-phone-screen-cache-problem-4e24f5b886f8) + + +I personally found this question a bit too much for an hour interview, +to go from ideation to writing a full code implementation. +This crate it's an attempt to make asking this question obsolete +and spare good candidates from a rejection for a problem which +is more based on intuition than on algo and data structures skills. + +For all of those interested in just having a good priority, expiry cache in place +feel free to look at the [official crate](https://crates.io/crates/priority-expiry-cache) of this crate send prs +and tickets at the official github repo. + +All code is released under and informal +[Beerware](https://en.wikipedia.org/wiki/Beerware) licence. + +## Problem Statement + +The problem statement requires us to design a cache with the following methods: + +- get(String key) +- set(String key, String value, int priority, int expiry) +- evictItem(int currentTime) + +The rules by which the cache operates is are follows: + +1. If an expired item is available. Remove it. If multiple items have the same expiry, removing any one suffices. +2. If condition #1 can’t be satisfied, remove an item with the least priority. +3. If more than one item satisfies condition #2, remove the least recently used one. +4. Multiple items can have the same priority and expiry. + +Untold rules: + - All of those operations should be O(1) time and space complexity. + +## 1 Min Solution summary + +It's an extension of the [LRU Cache Wikipedia](https://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU)) +as explained in this implementation [LRU Cache Interview Cake](https://www.interviewcake.com/concept/java/lru-cache) +the difference it's the addition of a binary tree to keep track of the min and max priority and expiry. + +## Solution + +Assumptions: + - all the parameters do have fixed length e.g. String= len 1024; Int = u32 + +Data structure used: + - [Doubly linked list](https://en.wikipedia.org/wiki/Doubly_linked_list) + - [Binary Tree](https://en.wikipedia.org/wiki/Binary_tree) + - [Hash map](https://en.wikipedia.org/wiki/Hash_table) + +### Set O(1) time and space +Let's start from set, to reduce the Lookup time we are going to use hashmap to store a reference +to the object that will encapsulate the "value" parameters at cost O(K) time and space +assuming the map its pre-initialized, now K is the length of the String because it will be the input of our +hash function, given our assumption is value it's a fixed length then O(1) will be our cost. + +Now we can make an assumption that the int it's a finite number say u32, this means we can construct +a binary tree with a depth of 32 with access time of O(32) therefore O(1) time and space. + +This way we can build 2 binary trees that will give us the min and max priority and expiry in O(1) cost. +to satisfy rule 4 we need as well to use a double linked list as leaf level of the binary trees +so we can have multiple items with the same expiry or same priority and to satisfy rule 3 about +the removal of the least recently used. + +The complexity is O(1) for the insertion + O(1) for the insertion binary tree of priority and expiry O(K)x2 +assuming K its constant then O(1)+O(1)x2 = O(1) time and space. + +### Get O(1) time and space + +The get is simpler because we only have to access the hashmap to get the reference to the object which +we do already know happens in O(1) time and space. + +And + +Keep the expiry least used doubly linked list on both expiry and priority consistent +we are going to move the item to the head of the list, this way we can keep track of the +least recently used O(1). + +The complexity is O(1) for the lookup + O(1) for the insertion binary tree of priority and expiry O(K)x2 +assuming K its constant then O(1)+O(1)x2 = O(1) time and space. + + +### EvictItem O(1) time and space + +Following rule number 1 we are going to get the min expiry time in O(1) thanks to the binary tree mentioned earlier, +and delete the first item, as policy we are using the least recently used for both expiry and priority. + +As requested if the min it's still not expired we are going to get in O(1) the min priority, +and we can remove the least recently used thanks to the doubly linked list. + +The complexity is O(1)x2 for the find of the min expiry time and the min in priority and O(1) +to remove the tail of the doubly linked list = O(1) time and space. + + +## Credits +- [Giacomo Sorbi](https://www.linkedin.com/in/giacomosorbi/) for proofreading +- [Maturin](https://pypi.org/project/maturin/) for the wrapper scaffolding \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d47e59b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["maturin>=1.4,<2.0"] +build-backend = "maturin" + +[project] +name = "priority-expiry-cache" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +[project.optional-dependencies] +tests = [ + "pytest", +] +dynamic = ["version"] + +[tool.maturin] +python-source = "python" +features = ["pyo3/extension-module"] diff --git a/python/priority_expiry_cache/__init__.py b/python/priority_expiry_cache/__init__.py new file mode 100644 index 0000000..a54ea54 --- /dev/null +++ b/python/priority_expiry_cache/__init__.py @@ -0,0 +1,6 @@ +from .priority_expiry_cache import * + + +__doc__ = priority_expiry_cache.__doc__ +if hasattr(priority_expiry_cache, "__all__"): + __all__ = priority_expiry_cache.__all__ diff --git a/python/tests/test_all.py b/python/tests/test_all.py new file mode 100644 index 0000000..450a262 --- /dev/null +++ b/python/tests/test_all.py @@ -0,0 +1,81 @@ +from priority_expiry_cache import PECache + +def test_new_cache(): + PECache() + +def test_get_missing_key(): + assert None == PECache().get("") + +def test_evict_from_empty_cache(): + PECache().evict(0) + +def test_get_and_set_single_element(): + cache = PECache() + key,value,priority,expiry = "key","value",1,1 + cache.set(key,value,priority,expiry) + assert cache.get(key) == value + +def test_set_2_items_same_key_override(): + cache = PECache() + key,value,priority,expiry = "key","value",1,1 + cache.set(key, value, priority, expiry) + value1,priority1,expiry = "value1",2,2 + cache.set(key, value1, priority1, expiry) + assert cache.get(key) == value1 + assert cache.len() == 1 + +def test_get_and_set_evict_single_element(): + cache = PECache() + key,value,priority,expiry = "key","value",1,1 + cache.set(key,value,priority,expiry) + cache.evict(2) + assert cache.get(key) is None + +def test_insert_2_elements_evict_get_different_time(): + cache = PECache() + key, value, priority, expiry = "key", "value", 1, 0 + cache.set(key, value, priority, expiry) + key1, value1, priority1, expiry1 = ("key1", "value1", 2, 2) + cache.set(key1, value1, priority1, expiry1) + + assert cache.get(key) == value + assert cache.get(key1) == value1 + cache.evict(1) + assert cache.get(key1) == value1 + assert cache.get(key) is None + cache.evict(3) + assert cache.get(key1) is None + assert cache.get(key) is None + cache.evict(0) + +def test_insert_2_elements_evict_by_priority(): + cache = PECache() + key, value, priority, expiry = "z_key", "z_value", 2, 10 + cache.set(key, value, priority, expiry) + key1, value1, priority1, expiry1 = "key1", "value1", 1, 10 + cache.set(key1, value1, priority1, expiry1) + assert cache.get(key) == value + assert cache.get(key1) == value1 + cache.evict(2) + assert cache.get(key1) is None + assert cache.get(key) == value + cache.evict(0) + assert cache.get(key1) is None + assert cache.get(key) is None + cache.evict(0) + +def test_eviction_by_lru(): + cache = PECache() + key, value, priority, expiry = "z_key", "z_value", 2, 10 + cache.set(key, value, priority, expiry) + + key1, value1, priority1, expiry1 = "key1", "value1", 2, 11 + cache.set(key1, value1, priority1, expiry1) + + key2, value2, priority2, expiry2 = "key2", "value2", 2, 12 + cache.set(key2, value2, priority2, expiry2) + cache.get(key) + cache.evict(5) + assert cache.get(key) == value + assert cache.get(key1) is None + assert cache.get(key2) == value2 diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..213fb40 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,105 @@ +use pyo3::prelude::*; +extern crate lru; + +use std::collections::{BTreeMap, BTreeSet,HashMap}; +use lru::LruCache; + +/// Formats the sum of two numbers as string. +#[pyfunction] +fn sum_as_string(a: usize, b: usize) -> PyResult { + Ok((a + b).to_string()) +} + +#[pyclass] +struct Page{ + key: String, + value: String, + priority: u32, + expiry: u32, +} + +#[pyclass] +#[derive(Ord,PartialOrd,Eq, PartialEq)] +struct ItemExpiry{ + expiry: u32, + key: String, +} +#[pyclass] +pub struct PECache{ + access_map: HashMap, + evict_expiry: BTreeSet, + evict_priority: BTreeMap>, +} + +#[pymethods] +impl PECache{ + #[new] + pub fn new() -> Self { + Self { + access_map: Default::default(), + evict_expiry: Default::default(), + evict_priority: Default::default(), + } + } + pub fn set(&mut self, key: String, value: String,priority: u32, expiry: u32) { + // addition to the btree for time + let key_expiry = ItemExpiry { + expiry: expiry.clone(), + key: key.clone(), + }; + self.evict_expiry.insert(key_expiry); + // addition to the btree for priority + self.evict_priority.entry(priority.clone()) + .or_insert(LruCache::unbounded()).push(key.clone(), true); + // add to the map + let page = Page { key: key.clone(), value, expiry, priority }; + self.access_map.insert(key, page); + return; + } + pub fn get(&mut self, key: String) -> Option { + if let Some(page) = self.access_map.get(&key) { + // change the order in the last recently data structure + self.evict_priority.get_mut(&page.priority).unwrap().promote(&page.key); + return Some(page.value.clone()); + } + None + } + pub fn evict(&mut self, barrier: u32) { + if self.access_map.len() == 0 { + return; + } + // get key by check firs by time and then by priority/lru + let target_item = self.evict_expiry.first().unwrap(); + + let key_target = if target_item.expiry <= barrier { target_item.key.clone() } else { + let target_item = self.evict_priority.first_entry().unwrap(); + let (key,_) = target_item.get().peek_lru().unwrap(); + key.clone() + }; + // delete from the map + let page = self.access_map.remove(&key_target).unwrap(); + // delete from the expiry tree + self.evict_expiry.remove(&ItemExpiry{expiry: page.expiry.clone(),key: page.key.clone()}); + // delete from priority tree + let node_priority = self.evict_priority.get_mut(&page.priority).unwrap(); + node_priority.pop(&page.key); + // delete the node if empty after the item removal + if node_priority.len() == 0 { + self.evict_priority.remove(&page.priority); + } + return + } + pub fn len(&self)->usize{ + self.access_map.len() + } +} + + + +/// A Python module implemented in Rust. +#[pymodule] +fn priority_expiry_cache(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_class::()?; + m.add_function(wrap_pyfunction!(sum_as_string, m)?)?; + Ok(()) +}