Seven classic sorting summary analysis and code implementation

Posted by FFFF on Tue, 12 Oct 2021 21:33:15 +0200

        Sorting is a problem that is very close to the reality of our life. As a programmer, the first problem encountered in the introduction of the algorithm should be these classic sorting. The learning of its ideas and the implementation of the code can let us quickly appreciate the charm brought by the algorithm. For the purpose of learning and communication, then make a summary and analysis of the ranking of the seven classics.

1, Bubble sorting

         Simulation of bubbling in real life. Just as the density of air is smaller than that of water, the number with smaller value (or larger, assuming no decreasing order here) will slowly emerge from top to bottom as bubbles. Take the array with length N as an example, from position 1 to position N-1, compare the values of the current position element and the next position element, exchange the elements with smaller values to the right of the array, and arrange the largest element at the end of the array in one trip; Then repeat the operation from 1 to N-2 to rank the second largest element in the penultimate position, and sort again and again. The specific codes are as follows:

#include <bits/stdc++.h>
int* bubble_sort(int arr[], int len);
void swap(int arr[], int i, int j);
int* get_array(int max_size, int max_num);
void print_array(int arr[], int len);

int* bubble_sort(int arr[], int len) {
	if(!arr || len < 2)
		return arr;
	for(int i = len - 1; i > 0; --i) {
		for(int j = 0; j <= i; ++j) {
			if(arr[j] > arr[j + 1]) {
				swap(arr, j , j + 1);
			}
		}
	}
}

void swap(int arr[], int i, int j) {
	arr[i] = arr[i] ^ arr[j];
	arr[j] = arr[i] ^ arr[j];
	arr[i] = arr[i] ^ arr[j];
}

int* get_array(int max_size, int max_num, int &len) {
	len = (int)max_size * (rand() % 10 + 1); 
	int *arr = new int[len];
	srand(time(0));
    for (int i = 0; i < len; i++) {
    	arr[i] = rand() % max_num - rand() % max_num;
	}
    return arr;
}

void print_array(int arr[], int len) {
	printf("[");
	for(int i = 0; i < len; ++i) {
		if(i != len - 1)
			printf("%d, ", arr[i]);
		else
			printf("%d", arr[i]);
	}
	printf("]");
}

int main()
{
	printf("--------------------------------[bubble_sort]--------------------------------\n");
	int len = 0;
	int *arr = get_array(10, 100, len);
	printf("original:");
	print_array(arr, len);
	bubble_sort(arr, len);
	printf("sorted after:");	
	print_array(arr, len);
	return 0;
}  

Time complexity: O(n^2) -- double for loop

Space complexity: O(1) - no need to apply for additional auxiliary space

Stability: stable - stability can be achieved without swap when equal

/*In addition, you can make a small optimization by adding a flag in the code to judge whether the pass traversal has been exchanged, which can reduce the time complexity of the best case*/

2, Select sort

        As the name suggests, make a choice. That is, each traversal selects a maximum or minimum value in the unordered part and puts it into the head of the current part. It is understandable that you can find a maximum or minimum value only by traversing all numbers. The specific codes are as follows:

#include <bits/stdc++.h>
int* selection_sort(int arr[], int len);
void swap(int arr[], int i, int j);
int* get_array(int max_size, int max_num);
void print_array(int arr[], int len);

int* selection_sort(int arr[], int len) {
	if(!arr || len < 2)
		return arr;
	int min_index = 0;
	for(int i = 0; i < len - 1; ++i) {
		min_index = i;
		for(int j = i + 1; j < len; ++j) {
			if(arr[j] < arr[min_index]) {
				min_index = j;
			}	
		}
		if(min_index != i)
			swap(arr, min_index, i);
	}
}

void swap(int arr[], int i, int j) {
	arr[i] = arr[i] ^ arr[j];
	arr[j] = arr[i] ^ arr[j];
	arr[i] = arr[i] ^ arr[j];
}

int* get_array(int max_size, int max_num, int &len) {
	len = (int)max_size * (rand() % 10 + 1); 
	int *arr = new int[len];
	srand(time(0));
    for (int i = 0; i < len; i++) {
    	arr[i] = rand() % max_num - rand() % max_num;
	}
    return arr;
}

void print_array(int arr[], int len) {
	printf("[");
	for(int i = 0; i < len; ++i) {
		if(i != len - 1)
			printf("%d, ", arr[i]);
		else
			printf("%d", arr[i]);
	}
	printf("]\n");
}

int main()
{
	printf("--------------------------------[selection_sort]--------------------------------\n");
	int len = 0;
	int *arr = get_array(10, 100, len);
	printf("original:");
	print_array(arr, len);
	selection_sort(arr, len);
	printf("sorted after:");	
	print_array(arr, len);
	return 0;
} 

Time complexity: O(n^2) -- double for loop

Space complexity: O(1) - no need to apply for additional auxiliary space

Stability: unstable - e.g.: 33421 - > 43321

/*In addition, a small optimization can be done. The maximum and minimum values can be selected at the same time and placed at the corresponding position in each traversal. It can be used for coefficient optimization, but the complexity index will not be changed*/

3, Direct insert sort

        The idea of direct insertion sorting is to divide the series to be sorted into two parts: sorted and to be sorted. In each step, select a data from the to be sorted and insert it into the previously ordered sequence until all elements are inserted, so as to realize the order of the whole sequence. The specific codes are as follows:

#include <bits/stdc++.h>
int* insertion_sort(int arr[], int len);
void swap(int arr[], int i, int j);
int* get_array(int max_size, int max_num);
void print_array(int arr[], int len);

int* insertion_sort(int arr[], int len) {
	if(!arr || len < 2)
		return arr;
	for(int i = 1; i < len; ++i) {
		for(int j = i - 1; j >= 0 && arr[j] > arr[j + 1]; --j) {
			swap(arr, j, j + 1);	
		}
	}
}

void swap(int arr[], int i, int j) {
	arr[i] = arr[i] ^ arr[j];
	arr[j] = arr[i] ^ arr[j];
	arr[i] = arr[i] ^ arr[j];
}

int* get_array(int max_size, int max_num, int &len) {
	len = (int)max_size * (rand() % 10 + 1); 
	int *arr = new int[len];
	srand(time(0));
    for (int i = 0; i < len; i++) {
    	arr[i] = rand() % max_num - rand() % max_num;
	}
    return arr;
}

void print_array(int arr[], int len) {
	printf("[");
	for(int i = 0; i < len; ++i) {
		if(i != len - 1)
			printf("%d, ", arr[i]);
		else
			printf("%d", arr[i]);
	}
	printf("]\n");
}

int main()
{
	printf("--------------------------------[insertion_sort]--------------------------------\n");
	int len = 0;
	int *arr = get_array(10, 100, len);
	printf("original:");
	print_array(arr, len);
	insertion_sort(arr, len);
	printf("sorted after:");
	print_array(arr, len);
	return 0;
} 

Time complexity: O(n^2) -- double for loop

Space complexity: O(1) - no need to apply for additional auxiliary space

Stability: stable - stability can be achieved without swap when equal

/*The more efficient shell sorting based on direct insertion sorting improves the insertion sorting time efficiency under good data conditions. For details, please refer to Mr. Wang Zhuo's video Qingdao University Wang Zhuo - data structure and algorithm - Hill sorting*/

4, Merge sort

        Using the idea of divide and conquer, the sequence to be sorted is divided into two subsequences to sort the two subsequences respectively. Finally, when the two subsequences are combined, the order of the whole sequence is realized. This operation is done for each sequence in this process, so as to realize the final sorting operation. The specific codes are as follows:

#include <bits/stdc++.h>
void merge_sort(int arr[], int L, int R);
void merge(int arr[], int L, int mid, int R);
int* get_array(int max_size, int max_num);
void print_array(int arr[], int len);

void merge_sort(int arr[], int L, int R) {
	if(L == R)
		return;
	int mid = L + ((R - L) >> 1);
	merge_sort(arr, L, mid);
	merge_sort(arr, mid + 1, R);
	merge(arr, L, mid, R);
}

void merge(int arr[], int L, int mid, int R) {
	const int len = R - L + 1;
	int temp[len];
	int p = L;
	int q = mid + 1;
	int count = 0;
	
	while(p <= mid && q <= R)
		temp[count++] = arr[p] > arr[q] ? arr[q++] : arr[p++];
	while(p <= mid)
		temp[count++] = arr[p++];
	while(q <= R)
		temp[count++] = arr[q++];
		
	for(int i = 0; i < len; ++i)
		arr[L + i] = temp[i];
}

int* get_array(int max_size, int max_num, int &len) {
	len = (int)max_size * (rand() % 10 + 1); 
	int *arr = new int[len];
	srand(time(0));
    for (int i = 0; i < len; i++) {
    	arr[i] = rand() % max_num - rand() % max_num;
	}
    return arr;
}

void print_array(int arr[], int len) {
	printf("[");
	for(int i = 0; i < len; ++i) {
		if(i != len - 1)
			printf("%d, ", arr[i]);
		else
			printf("%d", arr[i]);
	}
	printf("]\n");
}

int main()
{
	printf("--------------------------------[merge_sort]--------------------------------\n");
	int len = 0;
	int *arr = get_array(5, 10, len);
	printf("Before:");
	print_array(arr, len);
	merge_sort(arr, 0, len - 1);
	printf("After :");	
	print_array(arr, len);
	return 0;
}  

Time complexity: O(n*logn) -- calculate the time complexity of recursive process by using master formula

Space complexity: O(n) -- equal length temp [] auxiliary array is required

Stability: stable - keep the original order equal and enter temp []

/*The time complexity is improved from n^2 to nlogn. The reason is that the n^2 algorithm wastes invalid comparison behavior, while the merge process keeps each comparison behavior into an orderly part, and each comparison is used*/

5, Heap sort

        utilize Reactor structure The sorting process is transformed into multiple adjustments to the heap structure, and the heap is allowed to select the maximum or minimum value for us each time, so as to reduce the time complexity. The specific code is as follows:

#include <bits/stdc++.h>
void heap_sort(int arr[], int len);
void heap_insert(int arr[], int index);
void heapify(int arr[], int index, int size);
void swap(int arr[], int i, int j);
int* get_array(int max_size, int max_num);
void print_array(int arr[], int len);

void heap_sort(int arr[], int len) {
	if(!arr || len < 2)
		return;
	
	for(int i = 0; i < len; ++i) {
		heap_insert(arr, i);
	}
	int size = len;
	swap(arr, 0, --size);
	while(size > 0) {
		heapify(arr, 0, size);
		size--;
		swap(arr, 0, size);
	}
}

void heap_insert(int arr[], int index) {
	while(arr[index] > arr[(index - 1) / 2]) {
		swap(arr, index, (index - 1) / 2);
		index = (index - 1) / 2;
	}
}

void heapify(int arr[], int index, int size) {
	int left = 2 * index + 1;
	int larger = 0;
	while(left < size) {
		larger = left + 1 < size && arr[left + 1] > arr[left] ?  left + 1 : left;
		larger = arr[larger] > arr[index] ? larger : index;
		if (larger == index)
			break;
		swap(arr, index, larger);
		index = larger;
		left = 2 * index + 1;
	}
}

void swap(int arr[], int i, int j) {
	int temp = arr[j];
	arr[j] = arr[i];
	arr[i] = temp;
}

int* get_array(int max_size, int max_num, int &len) {
	len = (int)max_size * (rand() % 10 + 1); 
	int *arr = new int[len];
	srand(time(0));
    for (int i = 0; i < len; i++) {
    	arr[i] = rand() % max_num;// - rand() % max_num;
	}
    return arr;
}

void print_array(int arr[], int len) {
	printf("[");
	for(int i = 0; i < len; ++i) {
		if(i != len - 1)
			printf("%d, ", arr[i]);
		else
			printf("%d", arr[i]);
	}
	printf("]\n");
}

int main()
{
	printf("--------------------------------[heap_sort]--------------------------------\n");
	int len = 0;
	int *arr = get_array(5, 10, len);
	printf("Before:");
	print_array(arr, len);
	heap_sort(arr, len);
	printf("After :");	
	print_array(arr, len);
	return 0;
}  

Time complexity: O(n*logn) -- N * adjustment times (binary tree height logn) - > n * logn

Spatial complexity: O(1) -- sort in place

Stability: instability - two equal numbers are swapped from the first to the second

6, Quick sort

        Randomly select a value in the sequence to be sorted (compared with selecting the last or the first every time to avoid the worst case), and place the elements greater than the number on the left and those greater than on the right; Continue to implement rapid sorting on the left and right sides, and finally realize the order of the whole sequence. The specific codes are as follows:

#include <bits/stdc++.h>
void quick_sort(int arr[], int L, int R);
void process(int arr[], int num, int L, int R, int &min_index, int &max_index);
void swap(int arr[], int i, int j);
int* get_array(int max_size, int max_num);
void print_array(int arr[], int len);

void quick_sort(int arr[], int L, int R) {
	if(L == R)
		return;
	else if(L < R) {
		int min_index = -1;
		int max_index = -1;
		srand(time(0));
		int num = L + rand() % (R - L);
		swap(arr, num, R);	
		process(arr, arr[R], L, R, min_index, max_index);	
		quick_sort(arr, L, min_index + 1);
		quick_sort(arr, max_index - 1, R);
	}
}

void process(int arr[], int num, int L, int R, int &min_index, int &max_index) {
	min_index = L - 1;
	max_index = R;
	int i = 0;
	while(i < max_index) {
		if(arr[i] < num) {
			//printf("%d:<:%d i:%d\n", num, max_index, i);
			swap(arr, i++, ++min_index);
		}
			
		else if(arr[i] > num)
			swap(arr, i, --max_index);
		else if(arr[i] == num)
			i++;
	}
}

void swap(int arr[], int i, int j) {
	if(i == j)
		return;
	arr[i] = arr[i] ^ arr[j];
	arr[j] = arr[i] ^ arr[j];
	arr[i] = arr[i] ^ arr[j];
}

int* get_array(int max_size, int max_num, int &len) {
	len = (int)max_size * (rand() % 2 + 1); 
	int *arr = new int[len];
	srand(time(0));
    for (int i = 0; i < len; i++) {
    	arr[i] = rand() % max_num;
	}
    return arr;
}

void print_array(int arr[], int len) {
	printf("[");
	for(int i = 0; i < len; ++i) {
		if(i != len - 1)
			printf("%d, ", arr[i]);
		else
			printf("%d", arr[i]);
	}
	printf("]\n");
}

int main()
{
	printf("--------------------------------[quick_sort]--------------------------------\n");
	int len = 0;
	int *arr = get_array(10, 10, len);
	printf("Before:");
	print_array(arr, len);
	quick_sort(arr, 0, len - 1);
	printf("After:");
	print_array(arr, len);
	return 0;
}  

Time complexity: O(n*logn) -- Calculation by master formula

Spatial complexity: O(1) -- sort in place

Stability: instability - the first of two equal numbers is min_index will be swapped to the back

7, Cardinality sort (bucket sort)

        Using the characteristic of digital variation range of 0-9, set nine buckets, start from one bit, put the number at the corresponding position into the corresponding bucket, and fill the insufficient bit with 0; In this way, the size order on each bit is passed to the highest bit bit by bit, and the final sorting result of each number can be realized. The specific codes are as follows:

#include <bits/stdc++.h>
using namespace std;
void radix_sort(int arr[], int len);
void radix_sort(int arr[], int L, int R, int bits);
int get_digit(int num, int index);
int max_bists(int arr[], int len);
int* get_array(int max_size, int max_num);
void print_array(int arr[], int len);
void sort_array_distance_less_k(int arr[], int k);

void radix_sort(int arr[], int len) {
	//printf("%d", max_bists(arr, len));
	radix_sort(arr, 0, len - 1, max_bists(arr, len));
}

int max_bists(int arr[], int len) {
	int max = -0XFFFF;
	int i = 0;
	int res = 0;
	for(i; i < len; ++i) {
		if(arr[i] > max)
			max = arr[i];
	}
	while(max != 0) {
		res++;
		max /= 10;
	}
	return res;
}

void radix_sort(int arr[], int L, int R, int bits) {
	const int radix = 10;
	const int len = R - L + 1;
	int bucket[len];
	memset(bucket, 0x00, sizeof(bucket));
	for(int d = 0; d < bits; ++d) {
		int counts[radix];
		memset(counts, 0x00, sizeof(counts));
		for(int i = L; i <= R; i++)
			counts[get_digit(arr[i], d + 1)]++;
		for(int i = L + 1; i <= R; i++)
			counts[i] += counts[i - 1];
		for(int i = R; i >= L; i--) {
			int j = get_digit(arr[i], d + 1);
			bucket[counts[j] - 1] = arr[i];
			counts[j]--;
		}
//		int i, j;
//		for (i = L, j = 0; i <= R; i++, j++)
//			arr[i] = bucket[j];
		memcpy(arr, bucket, sizeof(bucket));
	}
}

int get_digit(int num, int index) {
	return ((num / ((int) pow(10, index - 1))) % 10);
}

int* get_array(int max_size, int max_num, int &len) {
	len = (int)max_size * (rand() % 10 + 1); 
	int *arr = new int[len];
	srand(time(0));
    for (int i = 0; i < len; i++) {
    	arr[i] = rand() % max_num;// - rand() % max_num;
	}
    return arr;
}

void print_array(int arr[], int len) {
	printf("[");
	for(int i = 0; i < len; ++i) {
		if(i != len - 1)
			printf("%d, ", arr[i]);
		else
			printf("%d", arr[i]);
	}
	printf("]\n");
}

int main()
{
	printf("--------------------------------[radix_sort]--------------------------------\n");
	int len = 0;
	int *arr = get_array(5, 1000, len);
	printf("before:");
	print_array(arr, len);
	radix_sort(arr, len);
	printf("after");
	print_array(arr, len);
	return 0;
} 

Topics: C++ Algorithm