I've not read up much about LRU Caching outside of what structures its made of but I am still quite surprised at how much faster it is than a regular hashmap.

I did a test, a recursive combinatorics problem, using a regular hashmap to save the results of outcomes during recursion (dynamic programming), and did the same with the only difference being that an LRU cache implementation (size 1024) was used instead.

The performance dropped from 1 second to 0.006 seconds!

Now this was very surprising, and I had no idea why this was the case. Hashmaps have a O(1) time complexity for most operations and an LRU cache requires both a hashmap and a doubly linked list.

context:

I'm using c for this project. The hashmap in question is an unordered_map with a string as the key and an integer as the value. I have heard something about an unordered_map having a worst case complexity of N or N², but as far as I am aware, it usually performs all operations in O(1).
The LRU cache implementation was copypasted from stack overflow :D

the code

with LRU caching

#include <bits/stdc  .h>

using namespace std;
using namespace std::chrono;

template <typename T,typename U>                                                   
std::pair<T,U> operator (const std::pair<T,U> & l,const std::pair<T,U> & r) {   
    return {l.first r.first,l.second r.second};                                    
}
#pragma GCC optimize ("Ofast")
#pragma GCC target ("avx2")


// LRU Cache implementation
template <class KEY_T, class VAL_T> class LRUCache{
private:
        list< pair<KEY_T,VAL_T> > item_list;
        unordered_map<KEY_T, decltype(item_list.begin()) > item_map;
        size_t cache_size;
private:
        void clean(void){
                while(item_map.size()>cache_size){
                        auto last_it = item_list.end(); last_it --;
                        item_map.erase(last_it->first);
                        item_list.pop_back();
                }
        };
public:
        LRUCache(int cache_size_):cache_size(cache_size_){
                ;
        };

        void put(const KEY_T &key, const VAL_T &val){
                auto it = item_map.find(key);
                if(it != item_map.end()){
                        item_list.erase(it->second);
                        item_map.erase(it);
                }
                item_list.push_front(make_pair(key,val));
                item_map.insert(make_pair(key, item_list.begin()));
                clean();
        };
        bool exist(const KEY_T &key){
                return (item_map.count(key)>0);
        };
        VAL_T get(const KEY_T &key){
                assert(exist(key));
                auto it = item_map.find(key);
                item_list.splice(item_list.begin(), item_list, it->second);
                return it->second->second;
        };

};


// recursive solution to a combinatorics problem

// number of permutations of each parcel
int item_ways(int w, int n, int max_w){
    if (w == 0 and n == 0)
        return 1;
    if (w <= 0 or n <= 0)
        return 0;
    int ways = 0;
    for (int i = 1; i <= max_w; i  )
        ways  = item_ways(w-i, n-1, i);
    return ways;
}   

// total combinations for answer
LRUCache<string,int> dp(1024);
//unordered_map<string,int> dp;
int parcel_ways(int p, int max_w, int n, int w){
    if (p == 0 and n == 0) 
        return 1;
    if (p <= 0 and n <= 0)
        return 0;

    string x; 
    x  = char(p); 
    x  = char(max_w);
    x  = char(n);
    x  = char(w);
    
    if(dp.exist(x)) // caching/dp skips recursion here
    {
        return dp.get(x);
    }

    int ways = 0;
    for (int i = 1; i <= n; i  ){
        ways  = parcel_ways(p-1, max_w, n-i, w) * item_ways(w, i, max_w);
    }
    
    dp.put(x,ways); // cache here
    return ways;
}

// input any 4 numbers for problem
void solve()
{
    auto start = high_resolution_clock::now();
    cout << parcel_ways(5,8,23,17);
    auto stop = high_resolution_clock::now();
    auto duration = duration_cast<microseconds>(stop - start);
    cout << "Time taken by function: "
         << duration.count() << " microseconds" << endl;

}

int main()
{
    solve();
    return 0;
}

with an unordered_map (hashmap)

#include <bits/stdc  .h>

using namespace std;
using namespace std::chrono;

template <typename T,typename U>                                                   
std::pair<T,U> operator (const std::pair<T,U> & l,const std::pair<T,U> & r) {   
    return {l.first r.first,l.second r.second};                                    
}
#pragma GCC optimize ("Ofast")
#pragma GCC target ("avx2")

// number of permutations of each parcel
int item_ways(int w, int n, int max_w){
    if (w == 0 and n == 0)
        return 1;
    if (w <= 0 or n <= 0)
        return 0;
    int ways = 0;
    for (int i = 1; i <= max_w; i  )
        ways  = item_ways(w-i, n-1, i);
    return ways;
}   

// total combinations for answer
unordered_map<string,int> dp;
int parcel_ways(int p, int max_w, int n, int w){
    if (p == 0 and n == 0) 
        return 1;
    if (p <= 0 and n <= 0)
        return 0;

    string x; 
    x  = char(p); 
    x  = char(max_w);
    x  = char(n);
    x  = char(w);
    
    if(dp[x]) // caching/dp skips recursion here
    {
        return dp[x];
    }

    int ways = 0;
    for (int i = 1; i <= n; i  ){
        ways  = parcel_ways(p-1, max_w, n-i, w) * item_ways(w, i, max_w);
    }
    
    dp[x] = ways; // cache here
    return ways;
}

void solve()
{
    auto start = high_resolution_clock::now();
    cout << parcel_ways(5,8,23,17);
    auto stop = high_resolution_clock::now();
    auto duration = duration_cast<microseconds>(stop - start);
    cout << "Time taken by function: "
         << duration.count() << " microseconds" << endl;
}

int main()
{
    solve();
    return 0;
}

CodePudding user response：

There are a lot of things that aren't optimal in your implementation, but the only thing I see that can make the magnitude of difference you are seeing is this:

if(dp[x]) // caching/dp skips recursion here
{
    return dp[x];
}

This will not return if dp[x]==0, so you will recalculate any 0 result.

The version with the LRU cache uses exists, which will do an early return in this case.

This can be done by using dp.contains(x) (or dp.count(x) if you do not have c 20)