How to find the amount of read and write contention and synchronization overheads for a shared memory program? For example, the OpenMP program below: int main(int argc, char** argv) { int* arr = NULL; double start_time, run_time; if (!(argc >= 3)) { printf("Number of array elements must be provided as first argument and number of threads as second argument to program\n"); exit(-1); } long int n = atoi(argv[1]); if (n > MAXSIZE) { n = MAXSIZE; } int p = atoi(argv[2]); if (p > MAXWORKERS) { p = MAXWORKERS; } omp_set_num_threads(p); arr = (int*)malloc(n * sizeof(int)); // Initialize and print the array to be sorted int i; for (i = 0; i < n; i++) arr[i] = ((int)random() % n) + 1; #ifdef DEBUG printf("initial arr: \n"); for (i = 0; i < n; i++) printf(" %3d", arr[i]); printf("\n"); #endif start_time = omp_get_wtime(); // Sorting is done in a parallel region #pragma omp parallel { // The first call to quicksort is done only once #pragma omp single quicksort(0, (n - 1), arr); } run_time = omp_get_wtime() - start_time; // check if the array is sorted and print the sorted array for (i = 0; i < n - 1; i++) if (arr[i] > arr[i + 1]) { printf("The resulting arr is not sorted!\n"); //return(1); } #ifdef DEBUG printf("sorted arr: \n"); for (i = 0; i < n; i++) printf(" %3d", arr[i]); printf("\n"); #endif printPerformanceResults(n, p, run_time); fprintf(stderr, "%f\n", run_time); return 0; } /* Regular quiksort algorithm, with the only exception being that * the recursive step is done in parallel with OpenMp tasks */ void quicksort(int first, int last, int arr[]) { int pivot, i_pivot, temp, left, right; if (first >= last) return; // no need to sort // otherwise select a pivot i_pivot = (first + last) / 2; pivot = arr[i_pivot]; left = first; right = last; while (left <= right) { if (arr[left] > pivot) { // swap left element with right element temp = arr[left]; arr[left] = arr[right]; arr[right] = temp; if (right == i_pivot) { i_pivot = left; } right--; } else { left++; } } // Put the pivot in place (i.e. swap with right element) temp = arr[right]; arr[right] = pivot; arr[i_pivot] = temp; // The recursive steps in quicksort execution are implemented as separate tasks #pragma omp task quicksort(first, (right - 1), arr); #pragma omp task quicksort((right + 1), last, arr); }
How to find the amount of read and write contention and synchronization overheads for a shared memory program?
For example, the OpenMP program below:
int main(int argc, char** argv) {
int* arr = NULL;
double start_time, run_time;
if (!(argc >= 3)) {
printf("Number of array elements must be provided as first argument and number of threads as second argument to program\n");
exit(-1);
}
long int n = atoi(argv[1]);
if (n > MAXSIZE) { n = MAXSIZE; }
int p = atoi(argv[2]);
if (p > MAXWORKERS) { p = MAXWORKERS; }
omp_set_num_threads(p);
arr = (int*)malloc(n * sizeof(int));
// Initialize and print the array to be sorted
int i;
for (i = 0; i < n; i++)
arr[i] = ((int)random() % n) + 1;
#ifdef DEBUG
printf("initial arr: \n");
for (i = 0; i < n; i++)
printf(" %3d", arr[i]);
printf("\n");
#endif
start_time = omp_get_wtime();
// Sorting is done in a parallel region
#pragma omp parallel
{
// The first call to quicksort is done only once
#pragma omp single
quicksort(0, (n - 1), arr);
}
run_time = omp_get_wtime() - start_time;
// check if the array is sorted and print the sorted array
for (i = 0; i < n - 1; i++)
if (arr[i] > arr[i + 1]) {
printf("The resulting arr is not sorted!\n");
//return(1);
}
#ifdef DEBUG
printf("sorted arr: \n");
for (i = 0; i < n; i++)
printf(" %3d", arr[i]);
printf("\n");
#endif
printPerformanceResults(n, p, run_time);
fprintf(stderr, "%f\n", run_time);
return 0;
}
/* Regular quiksort
* the recursive step is done in parallel with OpenMp tasks
*/
void quicksort(int first, int last, int arr[]) {
int pivot, i_pivot, temp, left, right;
if (first >= last) return; // no need to sort
// otherwise select a pivot
i_pivot = (first + last) / 2;
pivot = arr[i_pivot];
left = first;
right = last;
while (left <= right) {
if (arr[left] > pivot) { // swap left element with right element
temp = arr[left];
arr[left] = arr[right];
arr[right] = temp;
if (right == i_pivot) {
i_pivot = left;
}
right--;
}
else {
left++;
}
}
// Put the pivot in place (i.e. swap with right element)
temp = arr[right];
arr[right] = pivot;
arr[i_pivot] = temp;
// The recursive steps in quicksort execution are implemented as separate tasks
#pragma omp task
quicksort(first, (right - 1), arr);
#pragma omp task
quicksort((right + 1), last, arr);
}
Step by step
Solved in 2 steps