C++ Coding Reference: Partial Sorting with nth_element from Algorithm header


nth_element is a partial sorting algorithm which can be invoked by including the header algorithm. It has following two forms:

1
2
void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last);
void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last, _Pr _Pred);
void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last);
void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last, _Pr _Pred);

Calling nth_element will re-arrange the elements in [First, Last) so that the element at [Nth] will be exactly in that place (correct place) in the sorted sequence. You can give a custom comparator (known as _Pred function in above declaration), and the default is std::less comparator when not given.

1
2
3
4
5
template<class _RanIt> inline
    void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last)
    {   // order Nth element, using operator<
    _STD nth_element(_First, _Nth, _Last, less<>());
    }
template<class _RanIt> inline
	void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last)
	{	// order Nth element, using operator<
	_STD nth_element(_First, _Nth, _Last, less<>());
	}

nth_element does not guarantee to fully sort the sequence in the range of [First, Last). However, the elements before Nth are guaranteed to be smaller and the ones after are to be larger if the predicate function is default the less comparator.

The implementation of the nth_element (based on template generics) is:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
// FUNCTION TEMPLATE nth_element
template<class _RanIt,
    class _Pr> inline
    void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last, _Pr _Pred)
    {   // order Nth element, using _Pred
    _Adl_verify_range(_First, _Nth);
    _Adl_verify_range(_Nth, _Last);
    auto _UFirst = _Get_unwrapped(_First);
    const auto _UNth = _Get_unwrapped(_Nth);
    auto _ULast = _Get_unwrapped(_Last);
    if (_UNth == _ULast)
        {
        return; // nothing to do
        }
 
    while (_ISORT_MAX < _ULast - _UFirst)
        {   // divide and conquer, ordering partition containing Nth
        auto _UMid = _Partition_by_median_guess_unchecked(_UFirst, _ULast, _Pass_fn(_Pred));
 
        if (_UMid.second <= _UNth)
            {
            _UFirst = _UMid.second;
            }
        else if (_UMid.first <= _UNth)
            {
            return; // Nth inside fat pivot, done
            }
        else
            {
            _ULast = _UMid.first;
            }
        }
 
    _Insertion_sort_unchecked(_UFirst, _ULast, _Pass_fn(_Pred));    // sort any remainder
    }
// FUNCTION TEMPLATE nth_element
template<class _RanIt,
	class _Pr> inline
	void nth_element(_RanIt _First, _RanIt _Nth, _RanIt _Last, _Pr _Pred)
	{	// order Nth element, using _Pred
	_Adl_verify_range(_First, _Nth);
	_Adl_verify_range(_Nth, _Last);
	auto _UFirst = _Get_unwrapped(_First);
	const auto _UNth = _Get_unwrapped(_Nth);
	auto _ULast = _Get_unwrapped(_Last);
	if (_UNth == _ULast)
		{
		return;	// nothing to do
		}

	while (_ISORT_MAX < _ULast - _UFirst)
		{	// divide and conquer, ordering partition containing Nth
		auto _UMid = _Partition_by_median_guess_unchecked(_UFirst, _ULast, _Pass_fn(_Pred));

		if (_UMid.second <= _UNth)
			{
			_UFirst = _UMid.second;
			}
		else if (_UMid.first <= _UNth)
			{
			return;	// Nth inside fat pivot, done
			}
		else
			{
			_ULast = _UMid.first;
			}
		}

	_Insertion_sort_unchecked(_UFirst, _ULast, _Pass_fn(_Pred));	// sort any remainder
	}

Complexity of C++ nth_element Implementation

On average, the implementations of C++ are based on introspective selection which has O(N) worst running time.

How to use nth_element to Compute the Median of an Array/List/Vector?

Using the nth_element, we can specify the Nth to be the middle, which will be the definition of the median number (in the sorted array).

1
2
3
vector<int> data = { 5, 4, 2, 3, 9, 8, 6, 7, 0, 1 };
nth_element(begin(data), begin(data) + data.size() / 2, end(data));
cout << "Median is " << (data[data.size()/2]);
vector<int> data = { 5, 4, 2, 3, 9, 8, 6, 7, 0, 1 };
nth_element(begin(data), begin(data) + data.size() / 2, end(data));
cout << "Median is " << (data[data.size()/2]);

How to use nth_element to Compute the Second Largest Element in the Array/List/Vector?

We can specify the Nth to be the second position, and use the std::greater to indicate the order should be descending.

1
2
3
vector<int> data = { 5, 4, 2, 3, 9, 8, 6, 7, 0, 1 };
nth_element(begin(data), begin(data) + 1, end(data), std::greater<int>());
cout << "Second Largest is " << (data[1]);
vector<int> data = { 5, 4, 2, 3, 9, 8, 6, 7, 0, 1 };
nth_element(begin(data), begin(data) + 1, end(data), std::greater<int>());
cout << "Second Largest is " << (data[1]);

Note: the std::greater can also be specified in lambda function:

1
[](auto &a, auto &b) { return a > b; }
[](auto &a, auto &b) { return a > b; }

Alternatively, this can be done in ascending order:

1
2
3
4
vector<int> data = { 5, 4, 2, 3, 9, 8, 6, 7, 0, 1 };
// end(data) - 1 is the last element
nth_element(begin(data), end(data) - 2, end(data));
cout << "Second Largest is " << (data[data.size() - 2]);
vector<int> data = { 5, 4, 2, 3, 9, 8, 6, 7, 0, 1 };
// end(data) - 1 is the last element
nth_element(begin(data), end(data) - 2, end(data));
cout << "Second Largest is " << (data[data.size() - 2]);

This can be easily modified to compute the K-th largest or smallest element in the list (array or vector) using the nth_element.

How to Compute the Sum of the Top K elements using nth_element()?

We can use nth_element with N=K, then we know the first N elements after sorting are in correct place. For example,

1
2
3
4
5
6
vector<int> data = { 5, 4, 2, 3, 9, 8, 6, 7, 0, 1 };
nth_element(begin(data), begin(data) + 5, end(data), std::greater<int>());
// Top 5 Sum = 9 + 8 + 7 + 6 + 5
cout << std::accumulate(begin(data), begin(data) + 5, 0, [](auto &a, auto &b) {
    return a + b;
});
vector<int> data = { 5, 4, 2, 3, 9, 8, 6, 7, 0, 1 };
nth_element(begin(data), begin(data) + 5, end(data), std::greater<int>());
// Top 5 Sum = 9 + 8 + 7 + 6 + 5
cout << std::accumulate(begin(data), begin(data) + 5, 0, [](auto &a, auto &b) {
	return a + b;
});

Using std::accumulate to compute the sum is trivial.

–EOF (The Ultimate Computing & Technology Blog) —

GD Star Rating
loading...
734 words
Last Post: Microbit Programming: Introduction to AI - Letting Computer Play the Game
Next Post: How to Compute the Catalan Numbers using Dynamic Programming Algorithm?

The Permanent URL is: C++ Coding Reference: Partial Sorting with nth_element from Algorithm header

Leave a Reply