When unit testing, you often want to assert two things are equal. This is common for maps (dictionaries), where you may have a return map and the expected one. For example:
1 | assertEquals(Map("one" -> 1), Map("one" -> 2)); |
assertEquals(Map("one" -> 1), Map("one" -> 2));
would throw an error with a message along the lines of:
1 | Map("one" -> 1) did not equal Map("one" -> 2) |
Map("one" -> 1) did not equal Map("one" -> 2)
Printing out the contents of both maps makes it hard for the caller to find the actual differences they care about, especially in large maps. It’s hard to see the difference in the following output:
1 2 | Map("six" -> 6, "four" --> 4, "two" --> 2, "one" --> 1, "three" --> 3, "five" --> 5) did not equal Map("three" --> 3, "one" --> 1, "two" --> 2, "four" --> 5, "six" --> 6, "five" --> 5)" |
Map("six" -> 6, "four" --> 4, "two" --> 2, "one" --> 1, "three" --> 3, "five" --> 5) did not equal Map("three" --> 3, "one" --> 1, "two" --> 2, "four" --> 5, "six" --> 6, "five" --> 5)"
Our goal is to write a function that computes the differences between two maps and returns the result in a data structure of your choosing. This should make it easy for the caller to assert equality and print the differences in a meaningful way. The function should be easy to use and well documented. Do not print a string; writing a helper function to render our result into a String is outside of the scope of this question.
Compute the Difference of Two Maps
Considering refactor the MapDiff class so that the difference between HashMaps A and B, B and A can be simply calling a same utility function. We return the difference type in a Enum.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | #include <unordered_map> #include <vector> using namespace std; enum DiffType { VALUE, MINUS, PLUS }; template <class T, class V> // typedef vector<tuple<DiffType, T, V, V> ArrayOfDiffs; template <class T, class V> class MapDiff { public: MapDiff(const unordered_map<T, V> &m) { data = m; } vector<tuple<DiffType, T, V, V>> getDiff(const unordered_map<T, V> &anotherMap) { auto diffs = getMapDiff(data, anotherMap); auto anotherDiffs = getMapDiff(anotherMap, data); vector<tuple<DiffType, T, V, V>> result(diffs.first); result.insert(end(result), begin(diffs->second), end(diffs->second)); result.insert(end(result), begin(anotherDiffs->second), end(anotherDiffs->second)); return result; } private: unordered_map<T, V> data; // N entries for map1, M for map2 // Time: O(N + M) // Space: O(N + M) pair<vector<tuple<DiffType, T, V, V>>, vector<tuple<DiffType, T, V, V>>> getMapDiff( const unordered_map<T, V> &map1, const unordered_map<T, V> &map2) { vector<tuple<DiffType, T, V, V>> diffWithSameKey; vector<tuple<DiffType, T, V, V>> otherDiffs; for (auto it = begin(map1); it != end(map1); ++ it) { auto key = it->first; auto value = it->second; if (map2.count(key)) { // if the key appears in both map if (value != map2[key]) { diffWithSameKey.push_back(make_tuple(VALUE, key, value, map2[key])); } } else { // second map doesn't have this key otherDiffs.push_back(make_tuple(PLUS, key, value, value)); } } return {diffWithSameKey, otherDiffs}; } }; |
#include <unordered_map> #include <vector> using namespace std; enum DiffType { VALUE, MINUS, PLUS }; template <class T, class V> // typedef vector<tuple<DiffType, T, V, V> ArrayOfDiffs; template <class T, class V> class MapDiff { public: MapDiff(const unordered_map<T, V> &m) { data = m; } vector<tuple<DiffType, T, V, V>> getDiff(const unordered_map<T, V> &anotherMap) { auto diffs = getMapDiff(data, anotherMap); auto anotherDiffs = getMapDiff(anotherMap, data); vector<tuple<DiffType, T, V, V>> result(diffs.first); result.insert(end(result), begin(diffs->second), end(diffs->second)); result.insert(end(result), begin(anotherDiffs->second), end(anotherDiffs->second)); return result; } private: unordered_map<T, V> data; // N entries for map1, M for map2 // Time: O(N + M) // Space: O(N + M) pair<vector<tuple<DiffType, T, V, V>>, vector<tuple<DiffType, T, V, V>>> getMapDiff( const unordered_map<T, V> &map1, const unordered_map<T, V> &map2) { vector<tuple<DiffType, T, V, V>> diffWithSameKey; vector<tuple<DiffType, T, V, V>> otherDiffs; for (auto it = begin(map1); it != end(map1); ++ it) { auto key = it->first; auto value = it->second; if (map2.count(key)) { // if the key appears in both map if (value != map2[key]) { diffWithSameKey.push_back(make_tuple(VALUE, key, value, map2[key])); } } else { // second map doesn't have this key otherDiffs.push_back(make_tuple(PLUS, key, value, value)); } } return {diffWithSameKey, otherDiffs}; } };
Distributed Version of Map Difference
If this is to run on a distributed environment where a single map may not be fully loaded on one physical server/machine, we can sort the Map by keys or use the map (sorted by Keys) in C++. Then, similar than merging two sorted list, we use two iterator (pointer) to point to begining of each map, then advance the iterator/pointer as the comparison goes on.
The time complexity (because of overhead by maintaining the sorted keys), would be O(NlogN+MlogM) as compared to O(N+M) in the first version.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | #include <map> #include <vector> using namespace std; enum DiffType { VALUE, MINUS, PLUS }; template <class T, class V> // typedef vector<tuple<DiffType, T, V, V> ArrayOfDiffs; template <class T, class V> class MapDiff { public: MapDiff(const unordered_map<T, V> &m) { data = m; } vector<tuple<DiffType, T, V, V>> getDiff(const map<T, V> &anotherMap) { auto diffs = getMapDiff(data, anotherMap); auto anotherDiffs = getMapDiff(anotherMap, data); vector<tuple<DiffType, T, V, V>> result(diffs.first); result.insert(end(result), begin(diffs->second), end(diffs->second)); result.insert(end(result), begin(anotherDiffs->second), end(anotherDiffs->second)); return result; } private: map<T, V> data; // O(nlogn + mlogm) pair<vector<tuple<DiffType, T, V, V>>, vector<tuple<DiffType, T, V, V>>> getMapDiffV2( const map<T, V> &map1, const map<T, V> &map2) { vector<tuple<DiffType, T, V, V>> diffWithSameKey; vector<tuple<DiffType, T, V, V>> otherDiffs; auto itOfMap1 = begin(map1); auto itOfMap2 = begin(map2); while (itOfMap1 != end(map1) && itOfMap2 != end(map2)) { if (itOfMap1->first == itOfMap2->first) { // if the key appears in both map if (itOfMap1->second != itOfMap2->second) { diffWithSameKey.push_back(make_tuple(VALUE, key, itOfMap1->second, itOfMap2->second)); } itOfMap1 ++; itOfMap2 ++; } else { if (itOfMap1->first < itOfMap2->first) { otherDiffs.push_back(make_tuple(PLUS, key, itOfMap1->second, itOfMap2->second)); itOfMap1 ++; } else { otherDiffs.push_back(make_tuple(MINUS, key, itOfMap1->second, itOfMap2->second)); itOfMap2 ++; } } } return {diffWithSameKey, otherDiffs}; } }; |
#include <map> #include <vector> using namespace std; enum DiffType { VALUE, MINUS, PLUS }; template <class T, class V> // typedef vector<tuple<DiffType, T, V, V> ArrayOfDiffs; template <class T, class V> class MapDiff { public: MapDiff(const unordered_map<T, V> &m) { data = m; } vector<tuple<DiffType, T, V, V>> getDiff(const map<T, V> &anotherMap) { auto diffs = getMapDiff(data, anotherMap); auto anotherDiffs = getMapDiff(anotherMap, data); vector<tuple<DiffType, T, V, V>> result(diffs.first); result.insert(end(result), begin(diffs->second), end(diffs->second)); result.insert(end(result), begin(anotherDiffs->second), end(anotherDiffs->second)); return result; } private: map<T, V> data; // O(nlogn + mlogm) pair<vector<tuple<DiffType, T, V, V>>, vector<tuple<DiffType, T, V, V>>> getMapDiffV2( const map<T, V> &map1, const map<T, V> &map2) { vector<tuple<DiffType, T, V, V>> diffWithSameKey; vector<tuple<DiffType, T, V, V>> otherDiffs; auto itOfMap1 = begin(map1); auto itOfMap2 = begin(map2); while (itOfMap1 != end(map1) && itOfMap2 != end(map2)) { if (itOfMap1->first == itOfMap2->first) { // if the key appears in both map if (itOfMap1->second != itOfMap2->second) { diffWithSameKey.push_back(make_tuple(VALUE, key, itOfMap1->second, itOfMap2->second)); } itOfMap1 ++; itOfMap2 ++; } else { if (itOfMap1->first < itOfMap2->first) { otherDiffs.push_back(make_tuple(PLUS, key, itOfMap1->second, itOfMap2->second)); itOfMap1 ++; } else { otherDiffs.push_back(make_tuple(MINUS, key, itOfMap1->second, itOfMap2->second)); itOfMap2 ++; } } } return {diffWithSameKey, otherDiffs}; } };
Some Test cases to unit test the solution or if you prefer Test Driven Development.
1 2 3 4 5 6 | A = {}, B = {} ==> [] A = {}, B = {a: b} => [(Plus, a, b)] A = {a: b}, B = {} => [(Minus, a, b)] A = {a: b}, B = {a: b} => [] A = {A: b}, B = {a: b} => [(Plus, a, b), (Minus, A, b)] A = {A: b}, B = {A, c} => [(VALUE, A, b, c)] |
A = {}, B = {} ==> [] A = {}, B = {a: b} => [(Plus, a, b)] A = {a: b}, B = {} => [(Minus, a, b)] A = {a: b}, B = {a: b} => [] A = {A: b}, B = {a: b} => [(Plus, a, b), (Minus, A, b)] A = {A: b}, B = {A, c} => [(VALUE, A, b, c)]
–EOF (The Ultimate Computing & Technology Blog) —
loading...
Last Post: Teaching Kids Programming - Introduction to Object Oriented Programming
Next Post: Teaching Kids Programming - Algorithms to Search in Binary Search Tree