How to Sort Characters By Frequency?


Given a string that contains characters of ASCII no more than 256, sort it in non-ascending order based on the frequency of the characters.

For example, Input “tree” the output should be “eert” or “eetr” because there are two ‘e’s which should appear before ‘t’ and ‘r’.
Also, Given “Aabb” the output should be “bbAa” or “bbaA” because it is case-sensitive and “A” and “a” are treated as different characters.

We can use a hash table e.g. std::unordered_map in C++ to count the frequency of each character. Also, we can create a static array e.g. int count[256] = {0} to serve as a fixed-size hash table or bucket. The advantage of fixed-size table is that it has a constant O(1) space complexity over O(N) unordered_map data structure.

Using customize sorting

Once we have the frequency table, we iterate and push those which have frequency at least 1 to the vector of pair. Next we sort it based on the frequency and finally, we re-construct the string from the most common characters.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class Solution {
public:
    string frequencySort(string s) {
        int count[256] = {0};
        for (const auto &n: s) {
            count[n] ++; // count the frequency
        }
        vector<pair<int, int>> data;
        for (int i = 0; i < 256; ++ i) {
            if (count[i] > 0) {
                data.emplace_back(i, count[i]);
            }
        }
        sort(begin(data), end(data), [](pair<int, int> a, pair<int, int> b) {
            return a.second > b.second;  // sort it by frequency
        });
        string r = "";
        for (const auto &n: data) {
            for (int i = 0; i < n.second; ++ i) {
                r += string(1, (char)n.first);
            }
        }
        return r;
    }
};
class Solution {
public:
    string frequencySort(string s) {
        int count[256] = {0};
        for (const auto &n: s) {
            count[n] ++; // count the frequency
        }
        vector<pair<int, int>> data;
        for (int i = 0; i < 256; ++ i) {
            if (count[i] > 0) {
                data.emplace_back(i, count[i]);
            }
        }
        sort(begin(data), end(data), [](pair<int, int> a, pair<int, int> b) {
            return a.second > b.second;  // sort it by frequency
        });
        string r = "";
        for (const auto &n: data) {
            for (int i = 0; i < n.second; ++ i) {
                r += string(1, (char)n.first);
            }
        }
        return r;
    }
};

Using priority queue

Instead of customized sorting, we can push the pair to the priority queue based on the frequency. When a pair is popped from the priority queue, it is the currently most common frequent character.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
class Solution {
public:
    string frequencySort(string s) {
        int count[256] = {0};
        for (const auto &n: s) {
            count[n] ++;
        }
        auto cmp = [](pair<int, int> a, pair<int, int> b) {
            return a.second < b.second;  // the most common character pops first
        };
        priority_queue<pair<int, int>, vector<pair<int, int>>, decltype(cmp)> q(cmp);
        for (int i = 0; i < 256; ++ i) {
            if (count[i] > 0) {
                q.emplace(i, count[i]); // push the pair to the priority queue
            }
        }
        string r = "";
        while (!q.empty()) { // pop every elements in priority queue in turn
            auto n = q.top();
            q.pop();
            for (int i = 0; i < n.second; ++ i) {
                r += string(1, (char)n.first);
            }
        }
        return r;
    }
};
class Solution {
public:
    string frequencySort(string s) {
        int count[256] = {0};
        for (const auto &n: s) {
            count[n] ++;
        }
        auto cmp = [](pair<int, int> a, pair<int, int> b) {
            return a.second < b.second;  // the most common character pops first
        };
        priority_queue<pair<int, int>, vector<pair<int, int>>, decltype(cmp)> q(cmp);
        for (int i = 0; i < 256; ++ i) {
            if (count[i] > 0) {
                q.emplace(i, count[i]); // push the pair to the priority queue
            }
        }
        string r = "";
        while (!q.empty()) { // pop every elements in priority queue in turn
            auto n = q.top();
            q.pop();
            for (int i = 0; i < n.second; ++ i) {
                r += string(1, (char)n.first);
            }
        }
        return r;
    }
};

Both sorting and priority queue runs at complexity O(n logn).

–EOF (The Ultimate Computing & Technology Blog) —

GD Star Rating
a WordPress rating system
506 words
Last Post: How to Compute Shortest Distance to a Character in a String?
Next Post: How to Check if Two Strings are Isomorphic?

The Permanent URL is: How to Sort Characters By Frequency?

Leave a Reply