How to Find Top K Frequent Elements via Priority Queue or Sorting?


Suppose if you are given an array of N integers, how could you find out the top K elements based on its frequencies?

For example,
Input: nums = [1,1,1,2,2,3], k = 2
Output: [1,2]

Sorting

std::sort has complexity of O(nlogn). If we sort the original numbers by its frequency, we will have duplicates. And we can add the numbers one by one to the result vector if it has not been added before, which can be checked via std::find. To count the frequencies, we use unordered_map which is hash table that has O(1) insert, update and remove operations.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class Solution {
public:
    vector<int> topKFrequent(vector<int>& nums, int k) {
        unordered_map<int, int> count;
        for (const auto &n: nums) { // count frequencies and store them in hash table
            if (count.find(n) == count.end()) {
                count[n] = 1;
            } else {
                count[n] ++;
            }            
        }
        sort(nums.begin(), nums.end(), [&count](int a, int b) {
            return count[a] > count[b];  // sort by frequency
        });
        vector<int> r; // result vector
        int c = 0;
        for (int i = 0; i < nums.size(); ++ i) {
            if (std::find(r.begin(), r.end(), nums[i]) == r.end()) { // remove duplicates
                r.push_back(nums[i]);
                c ++;
                if (c >= k) break;  // Top K frequent elements found
            }
        }
        return r;
    }
};
class Solution {
public:
    vector<int> topKFrequent(vector<int>& nums, int k) {
        unordered_map<int, int> count;
        for (const auto &n: nums) { // count frequencies and store them in hash table
            if (count.find(n) == count.end()) {
                count[n] = 1;
            } else {
                count[n] ++;
            }            
        }
        sort(nums.begin(), nums.end(), [&count](int a, int b) {
            return count[a] > count[b];  // sort by frequency
        });
        vector<int> r; // result vector
        int c = 0;
        for (int i = 0; i < nums.size(); ++ i) {
            if (std::find(r.begin(), r.end(), nums[i]) == r.end()) { // remove duplicates
                r.push_back(nums[i]);
                c ++;
                if (c >= k) break;  // Top K frequent elements found
            }
        }
        return r;
    }
};

Priority Queue

std::priority_queue has O(nlogn) in building the queue and has O(logn) when it pops the top element from the queue. So, with priority queue, the implement becomes neat.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
class Solution {
public:
    vector<int> topKFrequent(vector<int>& nums, int k) {
        unordered_map<int, int> count; // hash table to count frequencies
        for (const auto &n: nums) {
            if (count.find(n) == count.end()) { // update the frequencies
                count[n] = 1;
            } else {
                count[n] ++;
            }            
        }
        // lambda function to compare according to frequencies
        auto cmp = [&count](int a, int b) { return count[a] < count[b]; };
        priority_queue<int, vector<int>, decltype(cmp)> q(cmp);
        unordered_set<int> flag;
        for (const auto &n: nums) {
            if (!flag.count(n)) { // push only unique integers to queue
                q.push(n);
                flag.insert(n);
            }
        }
        vector<int> r;
        for (int i = 0; i < k; ++ i) {
            auto p = q.top(); // pop K frequent numbers from the priority queue
            q.pop();
            r.push_back(p);
        }
        return r;
    }
};
class Solution {
public:
    vector<int> topKFrequent(vector<int>& nums, int k) {
        unordered_map<int, int> count; // hash table to count frequencies
        for (const auto &n: nums) {
            if (count.find(n) == count.end()) { // update the frequencies
                count[n] = 1;
            } else {
                count[n] ++;
            }            
        }
        // lambda function to compare according to frequencies
        auto cmp = [&count](int a, int b) { return count[a] < count[b]; };
        priority_queue<int, vector<int>, decltype(cmp)> q(cmp);
        unordered_set<int> flag;
        for (const auto &n: nums) {
            if (!flag.count(n)) { // push only unique integers to queue
                q.push(n);
                flag.insert(n);
            }
        }
        vector<int> r;
        for (int i = 0; i < k; ++ i) {
            auto p = q.top(); // pop K frequent numbers from the priority queue
            q.pop();
            r.push_back(p);
        }
        return r;
    }
};

We use std::unordered_set to keep track of the numbers that have been pushed to the priority queue so only unique integers are in the result vector.

–EOF (The Ultimate Computing & Technology Blog) —

GD Star Rating
loading...
498 words
Last Post: How to Transpose a 2D Matrix?
Next Post: The Wiggle Sort Algorithm on Array of Integers

The Permanent URL is: How to Find Top K Frequent Elements via Priority Queue or Sorting?

Leave a Reply