summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLeonard Grey <lgrey@chromium.org>2022-01-12 10:47:04 -0500
committerNico Weber <thakis@chromium.org>2022-01-12 10:47:04 -0500
commit6db04b97e6a2873cbf1252b7b6b7efedf3fceb4e (patch)
tree4e3d3c44f219a94b634e21a3be8b38a9baca1d1e
parent497a4b26c487a9640847c485f9b0c36c110a8545 (diff)
[lld-macho] Port CallGraphSort from COFF/ELF
Depends on D112160 This adds the new options `--call-graph-profile-sort` (default), `--no-call-graph-profile-sort` and `--print-symbol-order=`. If call graph profile sorting is enabled, reads `__LLVM,__cg_profile` sections from object files and uses the resulting graph to put callees and callers close to each other in the final binary via the C3 clustering heuristic. Differential Revision: https://reviews.llvm.org/D112164
-rw-r--r--lld/MachO/CMakeLists.txt1
-rw-r--r--lld/MachO/CallGraphSort.cpp252
-rw-r--r--lld/MachO/CallGraphSort.h22
-rw-r--r--lld/MachO/Config.h8
-rw-r--r--lld/MachO/Driver.cpp28
-rw-r--r--lld/MachO/InputFiles.cpp21
-rw-r--r--lld/MachO/InputFiles.h11
-rw-r--r--lld/MachO/InputSection.h4
-rw-r--r--lld/MachO/Options.td9
-rw-r--r--lld/MachO/Writer.cpp3
-rw-r--r--lld/test/MachO/cgprofile-icf.s46
-rw-r--r--lld/test/MachO/cgprofile-obj.s42
-rw-r--r--lld/test/MachO/cgprofile-print.s34
-rw-r--r--llvm/utils/gn/secondary/lld/MachO/BUILD.gn1
14 files changed, 481 insertions, 1 deletions
diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index abc9e473ef51..61f5c70005b9 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -10,6 +10,7 @@ add_lld_library(lldMachO
Arch/ARM64Common.cpp
Arch/ARM64_32.cpp
Arch/X86_64.cpp
+ CallGraphSort.cpp
ConcatOutputSection.cpp
Driver.cpp
DriverUtils.cpp
diff --git a/lld/MachO/CallGraphSort.cpp b/lld/MachO/CallGraphSort.cpp
new file mode 100644
index 000000000000..7a0192ea691e
--- /dev/null
+++ b/lld/MachO/CallGraphSort.cpp
@@ -0,0 +1,252 @@
+//===- CallGraphSort.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
+/// about the algorithm.
+///
+//===----------------------------------------------------------------------===//
+
+#include "CallGraphSort.h"
+#include "Config.h"
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
+using namespace llvm;
+using namespace lld;
+using namespace lld::macho;
+
+namespace {
+struct Edge {
+ int from;
+ uint64_t weight;
+};
+
+struct Cluster {
+ Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
+
+ double getDensity() const {
+ if (size == 0)
+ return 0;
+ return double(weight) / double(size);
+ }
+
+ int next;
+ int prev;
+ uint64_t size;
+ uint64_t weight = 0;
+ uint64_t initialWeight = 0;
+ Edge bestPred = {-1, 0};
+};
+
+class CallGraphSort {
+public:
+ CallGraphSort();
+
+ DenseMap<const InputSection *, size_t> run();
+
+private:
+ std::vector<Cluster> clusters;
+ std::vector<const InputSection *> sections;
+};
+// Maximum amount the combined cluster density can be worse than the original
+// cluster to consider merging.
+constexpr int MAX_DENSITY_DEGRADATION = 8;
+} // end anonymous namespace
+
+using SectionPair = std::pair<const InputSection *, const InputSection *>;
+
+// Take the edge list in config->callGraphProfile, resolve symbol names to
+// Symbols, and generate a graph between InputSections with the provided
+// weights.
+CallGraphSort::CallGraphSort() {
+ MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile;
+ DenseMap<const InputSection *, int> secToCluster;
+
+ auto getOrCreateCluster = [&](const InputSection *isec) -> int {
+ auto res = secToCluster.try_emplace(isec, clusters.size());
+ if (res.second) {
+ sections.push_back(isec);
+ clusters.emplace_back(clusters.size(), isec->getSize());
+ }
+ return res.first->second;
+ };
+
+ // Create the graph
+ for (std::pair<SectionPair, uint64_t> &c : profile) {
+ const auto fromSec = c.first.first->canonical();
+ const auto toSec = c.first.second->canonical();
+ uint64_t weight = c.second;
+ // Ignore edges between input sections belonging to different output
+ // sections. This is done because otherwise we would end up with clusters
+ // containing input sections that can't actually be placed adjacently in the
+ // output. This messes with the cluster size and density calculations. We
+ // would also end up moving input sections in other output sections without
+ // moving them closer to what calls them.
+ if (fromSec->parent != toSec->parent)
+ continue;
+
+ int from = getOrCreateCluster(fromSec);
+ int to = getOrCreateCluster(toSec);
+
+ clusters[to].weight += weight;
+
+ if (from == to)
+ continue;
+
+ // Remember the best edge.
+ Cluster &toC = clusters[to];
+ if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
+ toC.bestPred.from = from;
+ toC.bestPred.weight = weight;
+ }
+ }
+ for (Cluster &c : clusters)
+ c.initialWeight = c.weight;
+}
+
+// It's bad to merge clusters which would degrade the density too much.
+static bool isNewDensityBad(Cluster &a, Cluster &b) {
+ double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
+ return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
+}
+
+// Find the leader of V's belonged cluster (represented as an equivalence
+// class). We apply union-find path-halving technique (simple to implement) in
+// the meantime as it decreases depths and the time complexity.
+static int getLeader(std::vector<int> &leaders, int v) {
+ while (leaders[v] != v) {
+ leaders[v] = leaders[leaders[v]];
+ v = leaders[v];
+ }
+ return v;
+}
+
+static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
+ Cluster &from, int fromIdx) {
+ int tail1 = into.prev, tail2 = from.prev;
+ into.prev = tail2;
+ cs[tail2].next = intoIdx;
+ from.prev = tail1;
+ cs[tail1].next = fromIdx;
+ into.size += from.size;
+ into.weight += from.weight;
+ from.size = 0;
+ from.weight = 0;
+}
+
+// Group InputSections into clusters using the Call-Chain Clustering heuristic
+// then sort the clusters by density.
+DenseMap<const InputSection *, size_t> CallGraphSort::run() {
+ const uint64_t maxClusterSize = target->getPageSize();
+
+ // Cluster indices sorted by density.
+ std::vector<int> sorted(clusters.size());
+ // For union-find.
+ std::vector<int> leaders(clusters.size());
+
+ std::iota(leaders.begin(), leaders.end(), 0);
+ std::iota(sorted.begin(), sorted.end(), 0);
+
+ llvm::stable_sort(sorted, [&](int a, int b) {
+ return clusters[a].getDensity() > clusters[b].getDensity();
+ });
+
+ for (int l : sorted) {
+ // The cluster index is the same as the index of its leader here because
+ // clusters[L] has not been merged into another cluster yet.
+ Cluster &c = clusters[l];
+
+ // Don't consider merging if the edge is unlikely.
+ if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
+ continue;
+
+ int predL = getLeader(leaders, c.bestPred.from);
+ // Already in the same cluster.
+ if (l == predL)
+ continue;
+
+ Cluster *predC = &clusters[predL];
+ if (c.size + predC->size > maxClusterSize)
+ continue;
+
+ if (isNewDensityBad(*predC, c))
+ continue;
+
+ leaders[l] = predL;
+ mergeClusters(clusters, *predC, predL, c, l);
+ }
+ // Sort remaining non-empty clusters by density.
+ sorted.clear();
+ for (int i = 0, e = (int)clusters.size(); i != e; ++i)
+ if (clusters[i].size > 0)
+ sorted.push_back(i);
+ llvm::stable_sort(sorted, [&](int a, int b) {
+ return clusters[a].getDensity() > clusters[b].getDensity();
+ });
+
+ DenseMap<const InputSection *, size_t> orderMap;
+
+ // Sections will be sorted by decreasing order. Absent sections will have
+ // priority 0 and be placed at the end of sections.
+ // NB: This is opposite from COFF/ELF to be compatible with the existing
+ // order-file code.
+ int curOrder = clusters.size();
+ for (int leader : sorted) {
+ for (int i = leader;;) {
+ orderMap[sections[i]] = curOrder--;
+ i = clusters[i].next;
+ if (i == leader)
+ break;
+ }
+ }
+ if (!config->printSymbolOrder.empty()) {
+ std::error_code ec;
+ raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None);
+ if (ec) {
+ error("cannot open " + config->printSymbolOrder + ": " + ec.message());
+ return orderMap;
+ }
+ // Print the symbols ordered by C3, in the order of decreasing curOrder
+ // Instead of sorting all the orderMap, just repeat the loops above.
+ for (int leader : sorted)
+ for (int i = leader;;) {
+ const InputSection *isec = sections[i];
+ // Search all the symbols in the file of the section
+ // and find out a Defined symbol with name that is within the
+ // section.
+ for (Symbol *sym : isec->getFile()->symbols) {
+ if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+ if (d->isec == isec)
+ os << sym->getName() << "\n";
+ }
+ }
+ i = clusters[i].next;
+ if (i == leader)
+ break;
+ }
+ }
+
+ return orderMap;
+}
+
+// Sort sections by the profile data provided by __LLVM,__cg_profile sections.
+//
+// This first builds a call graph based on the profile data then merges sections
+// according to the C³ heuristic. All clusters are then sorted by a density
+// metric to further improve locality.
+DenseMap<const InputSection *, size_t> macho::computeCallGraphProfileOrder() {
+ TimeTraceScope timeScope("Call graph profile sort");
+ return CallGraphSort().run();
+}
diff --git a/lld/MachO/CallGraphSort.h b/lld/MachO/CallGraphSort.h
new file mode 100644
index 000000000000..034f54a26089
--- /dev/null
+++ b/lld/MachO/CallGraphSort.h
@@ -0,0 +1,22 @@
+//===- CallGraphSort.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_CALL_GRAPH_SORT_H
+#define LLD_MACHO_CALL_GRAPH_SORT_H
+
+#include "InputSection.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace lld {
+namespace macho {
+
+llvm::DenseMap<const InputSection *, size_t> computeCallGraphProfileOrder();
+} // namespace macho
+} // namespace lld
+
+#endif
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 8c7f16c3d553..7a1c15472f74 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -27,6 +28,7 @@
namespace lld {
namespace macho {
+class InputSection;
class Symbol;
struct SymbolPriorityEntry;
@@ -169,6 +171,12 @@ struct Configuration {
std::vector<SegmentProtection> segmentProtections;
llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
+ llvm::MapVector<std::pair<const InputSection *, const InputSection *>,
+ uint64_t>
+ callGraphProfile;
+ bool callGraphProfileSort = false;
+ llvm::StringRef printSymbolOrder;
+
SectionRenameMap sectionRenameMap;
SegmentRenameMap segmentRenameMap;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index e1371dbd7298..92946eb41e1a 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1057,6 +1057,25 @@ static void gatherInputSections() {
assert(inputOrder <= UnspecifiedInputOrder);
}
+static void extractCallGraphProfile() {
+ TimeTraceScope timeScope("Extract call graph profile");
+ for (const InputFile *file : inputFiles) {
+ auto *obj = dyn_cast_or_null<ObjFile>(file);
+ if (!obj)
+ continue;
+ for (const CallGraphEntry &entry : obj->callGraph) {
+ assert(entry.fromIndex < obj->symbols.size() &&
+ entry.toIndex < obj->symbols.size());
+ auto *fromSym = dyn_cast_or_null<Defined>(obj->symbols[entry.fromIndex]);
+ auto *toSym = dyn_cast_or_null<Defined>(obj->symbols[entry.toIndex]);
+
+ if (!fromSym || !toSym)
+ continue;
+ config->callGraphProfile[{fromSym->isec, toSym->isec}] += entry.count;
+ }
+ }
+}
+
static void foldIdenticalLiterals() {
// We always create a cStringSection, regardless of whether dedupLiterals is
// true. If it isn't, we simply create a non-deduplicating CStringSection.
@@ -1266,6 +1285,9 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
config->icfLevel != ICFLevel::none;
config->warnDylibInstallName = args.hasFlag(
OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false);
+ config->callGraphProfileSort = args.hasFlag(
+ OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
+ config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order);
// FIXME: Add a commandline flag for this too.
config->zeroModTime = getenv("ZERO_AR_DATE");
@@ -1458,8 +1480,10 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
replaceCommonSymbols();
StringRef orderFile = args.getLastArgValue(OPT_order_file);
- if (!orderFile.empty())
+ if (!orderFile.empty()) {
parseOrderFile(orderFile);
+ config->callGraphProfileSort = false;
+ }
referenceStubBinder();
@@ -1509,6 +1533,8 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
}
gatherInputSections();
+ if (config->callGraphProfileSort)
+ extractCallGraphProfile();
if (config->deadStrip)
markLive();
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 74bd463706c2..cfe3c41cb160 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -63,10 +63,12 @@
#include "llvm/ADT/iterator.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/LTO/LTO.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TarWriter.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/TextAPI/Architecture.h"
#include "llvm/TextAPI/InterfaceFile.h"
@@ -325,6 +327,25 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
if (name == section_names::compactUnwind)
compactUnwindSection = &sections.back();
} else if (segname == segment_names::llvm) {
+ if (name == "__cg_profile" && config->callGraphProfileSort) {
+ TimeTraceScope timeScope("Parsing call graph section");
+ BinaryStreamReader reader(data, support::little);
+ while (!reader.empty()) {
+ uint32_t fromIndex, toIndex;
+ uint64_t count;
+ if (Error err = reader.readInteger(fromIndex))
+ fatal(toString(this) + ": Expected 32-bit integer");
+ if (Error err = reader.readInteger(toIndex))
+ fatal(toString(this) + ": Expected 32-bit integer");
+ if (Error err = reader.readInteger(count))
+ fatal(toString(this) + ": Expected 64-bit integer");
+ callGraph.emplace_back();
+ CallGraphEntry &entry = callGraph.back();
+ entry.fromIndex = fromIndex;
+ entry.toIndex = toIndex;
+ entry.count = count;
+ }
+ }
// ld64 does not appear to emit contents from sections within the __LLVM
// segment. Symbols within those sections point to bitcode metadata
// instead of actual symbols. Global symbols within those sections could
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 36da70011170..a29e688c58d1 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -65,6 +65,16 @@ struct Section {
Section(uint64_t addr) : address(addr){};
};
+// Represents a call graph profile edge.
+struct CallGraphEntry {
+ // The index of the caller in the symbol table.
+ uint32_t fromIndex;
+ // The index of the callee in the symbol table.
+ uint32_t toIndex;
+ // Number of calls from callee to caller in the profile.
+ uint64_t count;
+};
+
class InputFile {
public:
enum Kind {
@@ -115,6 +125,7 @@ public:
llvm::DWARFUnit *compileUnit = nullptr;
const uint32_t modTime;
std::vector<ConcatInputSection *> debugSections;
+ std::vector<CallGraphEntry> callGraph;
private:
Section *compactUnwindSection = nullptr;
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 58f794227b61..5535e871c539 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -53,6 +53,7 @@ public:
virtual bool isLive(uint64_t off) const = 0;
virtual void markLive(uint64_t off) = 0;
virtual InputSection *canonical() { return this; }
+ virtual const InputSection *canonical() const { return this; }
OutputSection *parent = nullptr;
@@ -124,6 +125,9 @@ public:
ConcatInputSection *canonical() override {
return replacement ? replacement : this;
}
+ const InputSection *canonical() const override {
+ return replacement ? replacement : this;
+ }
static bool classof(const InputSection *isec) {
return isec->kind() == ConcatKind;
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 7cd4d01a0a5a..b4b9ca6582ee 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -79,6 +79,15 @@ def no_warn_dylib_install_name: Joined<["--"], "no-warn-dylib-install-name">,
def warn_dylib_install_name: Joined<["--"], "warn-dylib-install-name">,
HelpText<"Warn on -install-name if -dylib is not passed">,
Group<grp_lld>;
+def call_graph_profile_sort: Flag<["--"], "call-graph-profile-sort">,
+ HelpText<"Reorder sections with call graph profile (default)">,
+ Group<grp_lld>;
+def no_call_graph_profile_sort : Flag<["--"], "no-call-graph-profile-sort">,
+ HelpText<"Do not reorder sections with call graph profile">,
+ Group<grp_lld>;
+def print_symbol_order: Joined<["--"], "print-symbol-order=">,
+ HelpText<"Print a symbol order specified by --call-graph-profile-sort into the specified file">,
+ Group<grp_lld>;
// This is a complete Options.td compiled from Apple's ld(1) manpage
// dated 2018-03-07 and cross checked with ld64 source code in repo
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c633dbb02772..efa0c9d90cb4 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "Writer.h"
+#include "CallGraphSort.h"
#include "ConcatOutputSection.h"
#include "Config.h"
#include "InputFiles.h"
@@ -865,6 +866,8 @@ static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
// Each section gets assigned the priority of the highest-priority symbol it
// contains.
static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() {
+ if (config->callGraphProfileSort)
+ return computeCallGraphProfileOrder();
DenseMap<const InputSection *, size_t> sectionPriorities;
if (config->priorities.empty())
diff --git a/lld/test/MachO/cgprofile-icf.s b/lld/test/MachO/cgprofile-icf.s
new file mode 100644
index 000000000000..3d34b9336790
--- /dev/null
+++ b/lld/test/MachO/cgprofile-icf.s
@@ -0,0 +1,46 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t
+
+# RUN: %lld -e A %t -o %t.out --icf=all
+# RUN: llvm-nm --numeric-sort %t.out | FileCheck %s
+# RUN: %lld -e A %t -o %t2.out
+# RUN: llvm-nm --numeric-sort %t2.out | FileCheck %s --check-prefix=NOICF
+
+.text
+ .globl D
+D:
+ mov $60, %rax
+ retq
+
+ .globl C
+C:
+ mov $60, %rax
+ retq
+
+ .globl B
+B:
+ mov $2, %rax
+ retq
+
+ .globl A
+A:
+ mov $42, %rax
+ retq
+
+.cg_profile A, B, 100
+.cg_profile A, C, 40
+.cg_profile C, D, 61
+
+.subsections_via_symbols
+
+# CHECK: 0000000100000290 T A
+# CHECK-NEXT: 0000000100000298 T C
+# CHECK-NEXT: 0000000100000298 T D
+# CHECK-NEXT: 00000001000002a0 T B
+
+# NOICF: 0000000100000290 T A
+# NOICF-NEXT: 0000000100000298 T B
+# NOICF-NEXT: 00000001000002a0 T C
+# NOICF-NEXT: 00000001000002a8 T D
+
diff --git a/lld/test/MachO/cgprofile-obj.s b/lld/test/MachO/cgprofile-obj.s
new file mode 100644
index 000000000000..97aaf2de8c18
--- /dev/null
+++ b/lld/test/MachO/cgprofile-obj.s
@@ -0,0 +1,42 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: %lld -lSystem -e A -o %t.out %t.o
+# RUN: llvm-nm --numeric-sort %t.out | FileCheck %s
+# RUN: %lld --no-call-graph-profile-sort -lSystem -e A -o %t.out %t.o
+# RUN: llvm-nm --numeric-sort %t.out | FileCheck %s --check-prefix=NO-CG
+
+.text
+
+D:
+ retq
+.globl C
+C:
+ retq
+.globl B
+B:
+ retq
+.globl A
+A:
+Aa:
+ retq
+.subsections_via_symbols
+
+
+
+ .cg_profile A, B, 10
+ .cg_profile A, B, 10
+ .cg_profile Aa, B, 80
+ .cg_profile A, C, 40
+ .cg_profile B, C, 30
+ .cg_profile C, D, 90
+
+# CHECK: 00000001000002c8 T A
+# CHECK: 00000001000002c9 T B
+# CHECK: 00000001000002ca T C
+# CHECK: 00000001000002cb t D
+
+# NO-CG: 00000001000002c8 t D
+# NO-CG: 00000001000002c9 T C
+# NO-CG: 00000001000002ca T B
+# NO-CG: 00000001000002cb T A
diff --git a/lld/test/MachO/cgprofile-print.s b/lld/test/MachO/cgprofile-print.s
new file mode 100644
index 000000000000..9435891695c9
--- /dev/null
+++ b/lld/test/MachO/cgprofile-print.s
@@ -0,0 +1,34 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t
+# RUN: %lld -e A %t -o %t2 --print-symbol-order=%t3
+# RUN: FileCheck %s --input-file %t3
+
+# CHECK: B
+# CHECK-NEXT: C
+# CHECK-NEXT: D
+# CHECK-NEXT: A
+
+.text
+.globl A
+A:
+ nop
+
+.globl B
+B:
+ nop
+
+.globl C
+C:
+ nop
+
+.globl D
+D:
+ nop
+
+.subsections_via_symbols
+
+.cg_profile A, B, 5
+.cg_profile B, C, 50
+.cg_profile C, D, 40
+.cg_profile D, B, 10
diff --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
index 086a71a5b948..4d8a050dcd8c 100644
--- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
@@ -27,6 +27,7 @@ static_library("MachO") {
"Arch/ARM64Common.cpp",
"Arch/ARM64_32.cpp",
"Arch/X86_64.cpp",
+ "CallGraphSort.cpp",
"ConcatOutputSection.cpp",
"Driver.cpp",
"DriverUtils.cpp",