From 234a16bee24bcfbabe70b9111db9421cd4e04bf5 Mon Sep 17 00:00:00 2001 From: tong_1001 Date: Wed, 9 Jun 2021 19:52:09 +0800 Subject: [PATCH] Fix data race in packageBinaries() function and prioritize large packages --- ...ata-race-in-packageBinaries-function.patch | 32 +++++ ...port-build-prioritize-large-packages.patch | 132 ++++++++++++++++++ rpm.spec | 10 +- 3 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 backport-Fix-data-race-in-packageBinaries-function.patch create mode 100644 backport-build-prioritize-large-packages.patch diff --git a/backport-Fix-data-race-in-packageBinaries-function.patch b/backport-Fix-data-race-in-packageBinaries-function.patch new file mode 100644 index 0000000..0b5993b --- /dev/null +++ b/backport-Fix-data-race-in-packageBinaries-function.patch @@ -0,0 +1,32 @@ +From c9bb0c30d0eab5ff7db80d920d40c02623732f71 Mon Sep 17 00:00:00 2001 +From: Tom Stellard +Date: Tue, 9 Jun 2020 21:05:16 +0000 +Subject: [PATCH] Fix data race in packageBinaries() function + +The pkg variable used in the parallel loop was declared outside +of the omp parallel construct, so it was shared among tasks. This +had the potential to cause a data race. The gcc openmp implementation +did not hit this problem, but I uncovered it while trying to compile with +clang. My best guess as to what was happening is that after the last +task was launched, all tasks had the same value of pkg and were operating +on the same data at the same time. + +This patch declares the variable inside the omp parallel construct, so each +task gets its own copy of the variable. +--- + build/pack.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/build/pack.c b/build/pack.c +index 1f3d432bb3..8d6f74935e 100644 +--- a/build/pack.c ++++ b/build/pack.c +@@ -765,7 +765,7 @@ rpmRC packageBinaries(rpmSpec spec, const char *cookie, int cheating) + #pragma omp parallel + #pragma omp single + for (int i = 0; i < npkgs; i++) { +- pkg = tasks[i]; ++ Package pkg = tasks[i]; + #pragma omp task untied priority(i) + { + pkg->rc = packageBinary(spec, pkg, cookie, cheating, &pkg->filename); diff --git a/backport-build-prioritize-large-packages.patch b/backport-build-prioritize-large-packages.patch new file mode 100644 index 0000000..de037aa --- /dev/null +++ b/backport-build-prioritize-large-packages.patch @@ -0,0 +1,132 @@ +From 6f6f5e70f16bef21523c3e2f19e7557bfcaa2546 Mon Sep 17 00:00:00 2001 +From: Michal Domonkos +Date: Tue, 21 Apr 2020 11:38:25 +0200 +Subject: [PATCH] build: prioritize large packages + +Binary packages come in different sizes and so their build time can vary +greatly. Dynamic scheduling, which we currently use for parallel +building, is a good strategy to combat such differences and load-balance +the available CPU cores. + +That said, knowing that the build time of a package is proportional to +its size, we can reduce the overall time even further by cleverly +ordering the task queue. + +As an example, consider a set of 5 packages, 4 of which take 1 unit of +time to build and one takes 4 units. If we were to build these on a +dual-core system, one possible unit distribution would look like this: + + TIME ---> +CPU 1 * * * * * * # package 1, 3 and 5 +CPU 2 * * # package 2 and 4 + +Now, compare that to a different distribution where the largest package +5 gets built early on: + + TIME ---> +CPU 1 * * * * # package 5 +CPU 2 * * * * # package 1, 2, 3 and 4 + +It's obvious that processing the largest packages first gives better +results when dealing with such a mix of small and large packages +(typically a regular package and its debuginfo counterpart, +respectively). + +Now, with dynamic scheduling in OpenMP, we cannot directly control the +task queue; we can only generate the tasks and let the runtime system do +its work. What we can do, however, is to provide a hint to the runtime +system for the desired ordering, using the "priority" clause. + +So, in this commit, we use the clause to assign a priority value to each +build task based on the respective package size (the bigger the size, +the higher the priority), to help achieve an optimal execution order. + +Indeed, in my testing, the priorities were followed to the letter (but +remember, that's not guaranteed by the specification). Interestingly, +even without the use of priorities, simply generating the tasks in the +desired order resulted in the same execution order for me, but that's, +again, just an implementation detail. + +Also note that OpenMP is allowed to stop the thread generating the tasks +at any time, and make it execute some of the tasks instead. If the +chosen task happens to be a long-duration one, we might hit a starvation +scenario where the other threads have exhausted the task queue and +there's nobody to generate new tasks. To counter that, this commit also +adds the "untied" clause which allows other threads to pick up where the +generating thread left off, and continue generating new tasks. + +Resolves #1045. +--- + build/pack.c | 38 +++++++++++++++++++++++++++++++++++--- + 1 file changed, 35 insertions(+), 3 deletions(-) + +diff --git a/build/pack.c b/build/pack.c +index a44a3fe9c8..bc40683c4f 100644 +--- a/build/pack.c ++++ b/build/pack.c +@@ -6,6 +6,7 @@ + #include "system.h" + + #include ++#include + #include + + #include /* RPMSIGTAG*, rpmReadPackageFile */ +@@ -726,16 +727,45 @@ static rpmRC packageBinary(rpmSpec spec, Package pkg, const char *cookie, int ch + return rc; + } + ++static int compareBinaries(const void *p1, const void *p2) { ++ Package pkg1 = *(Package *)p1; ++ Package pkg2 = *(Package *)p2; ++ uint64_t size1 = headerGetNumber(pkg1->header, RPMTAG_LONGSIZE); ++ uint64_t size2 = headerGetNumber(pkg2->header, RPMTAG_LONGSIZE); ++ if (size1 > size2) ++ return -1; ++ if (size1 < size2) ++ return 1; ++ return 0; ++} ++ ++/* ++ * Run binary creation in parallel, with task priority based on package size ++ * (largest first) to help achieve an optimal load distribution. ++ */ + rpmRC packageBinaries(rpmSpec spec, const char *cookie, int cheating) + { + rpmRC rc = RPMRC_OK; + Package pkg; ++ Package *tasks; ++ int npkgs = 0; ++ ++ for (pkg = spec->packages; pkg != NULL; pkg = pkg->next) ++ npkgs++; ++ tasks = xcalloc(npkgs, sizeof(Package)); ++ ++ pkg = spec->packages; ++ for (int i = 0; i < npkgs; i++) { ++ tasks[i] = pkg; ++ pkg = pkg->next; ++ } ++ qsort(tasks, npkgs, sizeof(Package), compareBinaries); + +- /* Run binary creation in parallel */ + #pragma omp parallel + #pragma omp single +- for (pkg = spec->packages; pkg != NULL; pkg = pkg->next) { +- #pragma omp task ++ for (int i = 0; i < npkgs; i++) { ++ pkg = tasks[i]; ++ #pragma omp task untied priority(i) + { + pkg->rc = packageBinary(spec, pkg, cookie, cheating, &pkg->filename); + rpmlog(RPMLOG_DEBUG, +@@ -754,6 +784,8 @@ rpmRC packageBinaries(rpmSpec spec, const char *cookie, int cheating) + if (rc == RPMRC_OK) + checkPackageSet(spec->packages); + ++ free(tasks); ++ + return rc; + } + diff --git a/rpm.spec b/rpm.spec index 98eee28..e667473 100644 --- a/rpm.spec +++ b/rpm.spec @@ -1,6 +1,6 @@ Name: rpm Version: 4.15.1 -Release: 24 +Release: 25 Summary: RPM Package Manager License: GPLv2+ URL: http://www.rpm.org/ @@ -51,6 +51,8 @@ Patch40: backport-Use-libelf-for-determining-file-colors.patch Patch41: backport-CVE-2021-20271.patch Patch42: backport-optimize-signature-header-merge-a-bit.patch Patch43: CVE-2021-20266.patch +Patch44: backport-build-prioritize-large-packages.patch +Patch45: backport-Fix-data-race-in-packageBinaries-function.patch BuildRequires: gcc autoconf automake libtool make gawk popt-devel openssl-devel readline-devel libdb-devel BuildRequires: zlib-devel libzstd-devel xz-devel bzip2-devel libarchive-devel ima-evm-utils-devel @@ -299,6 +301,12 @@ make check || (cat tests/rpmtests.log; exit 0) %{_mandir}/man1/gendiff.1* %changelog +* Wed Jun 09 2021 shixuantong - 4.15.1-25 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:Fix data race in packageBinaries() function and prioritize large packages + * Wed Jun 2 2021 guoxiaoqi - 4.15.1-23 - Type:cve - ID:NA