Big-O and Sorting February 6, 2006
Administrative Stuff Readings for today: Ch Readings for tomorrow: Ch 8
Sorting! Very common to need data in order –Viewing, printing –Faster to search, find min/max, compute median/mode, etc. Lots of different sorting algoritms –From the simple to very complex –Some optimized for certain situations (lots of duplicates, almost sorted, etc.) –Typically sort arrays, but algorithms usually can be adapted for other data structures (e.g. linked lists)
Selection sort Sort by "selecting" smallest and putting in front –Search entire array for minimum value –Min is placed in first slot –Could move elements over to make space, but faster to just swap with current first –Repeat for second smallest, third, and so on
Selection sort code void SelectionSort(int arr[], int n) { for (int i = 0; i < n-1; i++) { int minIndex = i; for (int j = i+1; j < n; j++) { if (arr[j] < arr[minIndex]) minIndex = j; } Swap(arr[i], arr[minIndex]); }
Analyzing selection sort for (int i = 0; i < n-1; i++) { int minIndex = i; for (int j = i+1; j < n; j++) { if (arr[j] < arr[minIndex]) minIndex = j; } Swap(arr[i], arr[minIndex]); } Count statements –First time inner loop N-1 comparisons –N-2 second time, then N-3, … –Last iteration 1 comparison
Analyzing selection sort N-1 + N-2 + N-3 + … –"Gaussian sum" Add sum to self Sum =
Analyzing selection sort N-1 + N-2 + N-3 + … –"Gaussian sum" Add sum to self N-1 + N-2 + N-3 + … …. + N-2 + N-1 = N + N + N + …. + N + N = (N-1)N Sum = 1/2 * (N-1)N O(N 2 )
Quadratic growth In clock time – 10,000 3 sec – 20, sec – 50, sec –100,000 5 min Double input -> 4X time –Feasible for small inputs, quickly unmanagable Halve input -> 1/4 time –Hmm… –If two sorted half-size arrays, how to produce sorted full array?
Mergesort "Divide and conquer" algorithm –Divide array in half –Recursively sort each half –Merge two halves together "Easy-split hard-join" –No complex decision about which goes where, just divide in middle –Merge step preserves ordering from each half
void MergeSort(int array[], int n) { if (n > 1) { int n1 = n/2; int n2 = n - n1; int *arr1 = CopySubArray(array, 0, n1); int *arr2 = CopySubArray(array, n1, n2); MergeSort(arr1, n1); MergeSort(arr2, n2); Merge(array, arr1, n1, arr2, n2); delete[] arr1; delete[] arr2; }
CopySubArray // Create a new array in memory void CopyArray(int arr[], int n, int * & copy) { copy = new int[n]; for(int i = 0; i < n; i++) { copy[i] = arr[i]; }
Merge code void Merge(int array[], int arr1[], int n1, int arr2[], int n2){ int p = 0, p1 = 0, p2 = 0; while (p1 < n1 && p2 < n2) { if (arr1[p1] < arr2[p2]) array[p++] = arr1[p1++]; else array[p++] = arr2[p2++]; } while (p1 < n1) array[p++] = arr1[p1++]; while (p2 < n2) array[p++] = arr2[p2++]; }
void Merge(int array[], int arr1[], int n1, int arr2[], int n2) { int p, p1, p2; p = p1 = p2 = 0; while (p1 < n1 && p2 < n2) { // Merge until hit if (arr1[p1] < arr2[p2]) { // end of one array array[p++] = arr1[p1++]; } else { array[p++] = arr2[p2++]; } while (p1 < n1) { // Merge rest of array[p++] = arr1[p1++]; // remaining array } while (p2 < n2) { array[p++] = arr2[p2++]; } 4 array arr1 arr2 n1 n2 p1 p2 p
Merge sort analysis void MergeSort(int array[], int n) { if (n > 1) { int n1 = n/2; int n2 = n - n1; int *arr1 = CopySubArray(array, 0, n1); int *arr2 = CopySubArray(array, n1, n2); MergeSort(arr1, n1); MergeSort(arr2, n2); Merge(array, arr1, n1, arr2, n2); delete[] arr1; delete[] arr2; }
MS(N) Merge sort analysis = N = N/2 + N/2 MS(N/2) + N/4 N/4 = 4*N/4 + N/8 N/8 N/8 N/8 + = 8*N/8 Each level contributes N...
MS(N) Merge sort analysis MS(N/2) N/4 N/4 N/8 N/8 N/8 N/8 N/2 K = 1 N = 2 K lg N = K lg N levels * N per level= O(NlgN) K levels … N/2 K
In clock time Compare SelectionSort to MergeSort – 10,000 3 sec.05 sec – 20, sec.15 sec – 50, sec.38 sec – 100,000 5 min.81 sec – 200, min 1.7 sec –1,000,000 8 hrs (est) 9 sec O(NlgN) is looking pretty good! But can we do even better?
Can we do even better than MergeSort? O(N log N) is fastest sort in the general case –So, theoretically, answer is “no” But, we can come up with a different O(N log N) sort that is practically faster Want to avoid overhead of creating new arrays (as is done in MergeSort) –Bring on the QuickSort!
Quicksort
Recursive Insight
select “pivot”
Partition array so: everything smaller than pivot is on left everything greater than or equal to pivot is on right pivot is in-between Recursive Insight
Partition array so: everything smaller than pivot is on left everything greater than or equal to pivot is on right pivot is in-between Recursive Insight
Now recursive sort “red” sub-array
Recursive Insight Now recursive sort “red” sub-array
Recursive Insight Now recursive sort “red” sub-array Then, recursive sort “blue” sub-array
Recursive Insight Now recursive sort “red” sub-array Then, recursive sort “blue” sub-array
Recursive Insight Everything is sorted!
void Quicksort(int arr[], int n) { if (n < 2) return; int boundary = Partition(arr, n); // Sort subarray up to pivot Quicksort(arr, boundary); // Sort subarray after pivot to end Quicksort(arr + boundary + 1, n – boundary - 1); } “boundary” is the index of the pivot This is equal to the number of elements before pivot
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; }
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; }
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh
int Partition(int arr[], int n) { int lh = 1, rh = n - 1; int pivot = arr[0]; while (true) { while (lh = pivot) rh--; while (lh < rh && arr[lh] < pivot) lh++; if (lh == rh) break; Swap(arr[lh], arr[rh]); } if (arr[lh] >= pivot) return 0; Swap(arr[0], arr[lh]); return lh; } pivot lhrh Returns 4 (index of pivot)
void Quicksort(int arr[], int n) { if (n < 2) return; int boundary = Partition(arr, n); // Sort subarray up to pivot Quicksort(arr, boundary); // Sort subarray after pivot to end Quicksort(arr + boundary + 1, n – boundary - 1); }
void Quicksort(int arr[], int n) { if (n < 2) return; int boundary = Partition(arr, n); // Sort subarray up to pivot Quicksort(arr, boundary); // Sort subarray after pivot to end Quicksort(arr + boundary + 1, n – boundary - 1); } O(1) O(n) T(n/2) T(n)= O(1) + O(n) + 2T(n/2) = O(n) + 2T(n/2) Same as MergeSort O(n log n)
The whole recursion
First partition
Recursive sort {2, 3, 1, 4}
Partition {2, 3, 1, 4}
Recursive sort {1}
base case
Recursive sort {3, 4}
Partition {3, 4}
Recursive sort {4}
base case
Recursive sort {6, 8, 7}
Leap of faith!
Empirical comparison of MergeSort vs QuickSort NMerge sort Quicksort msec0.10 msec msec0.26 msec msec0.52 msec msec1.76 msec msec4.04 msec msec8.85 msec msec26.04 msec msec56.25 msec msec msec 10, msec msec