mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-18 17:56:50 +00:00
[BOLT][Docs] Add Sphinx documentation
Add stub Sphinx documentation, with configuration copy-pasted from lld and index page converted from bolt/README.md. Reviewed By: #bolt, rafauler Differential Revision: https://reviews.llvm.org/D140156
This commit is contained in:
parent
f40d25dd8d
commit
75c069584a
@ -97,9 +97,6 @@ if (LLVM_ENABLE_SPHINX)
|
||||
if (${SPHINX_OUTPUT_HTML})
|
||||
add_sphinx_target(html bolt)
|
||||
endif()
|
||||
if (${SPHINX_OUTPUT_MAN})
|
||||
add_sphinx_target(man bolt)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
13
bolt/docs/README.txt
Normal file
13
bolt/docs/README.txt
Normal file
@ -0,0 +1,13 @@
|
||||
BOLT Documentation
|
||||
====================
|
||||
|
||||
The BOLT documentation is written using the Sphinx documentation generator. It
|
||||
is currently tested with Sphinx 1.1.3.
|
||||
|
||||
To build the documents into html configure BOLT with the following cmake options:
|
||||
|
||||
* -DLLVM_ENABLE_SPHINX=ON
|
||||
* -DBOLT_INCLUDE_DOCS=ON
|
||||
|
||||
After configuring BOLT with these options the make rule `docs-bolt-html` should
|
||||
be available.
|
BIN
bolt/docs/_static/favicon.ico
vendored
Normal file
BIN
bolt/docs/_static/favicon.ico
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.1 KiB |
246
bolt/docs/conf.py
Normal file
246
bolt/docs/conf.py
Normal file
@ -0,0 +1,246 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# BOLT documentation build configuration file.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
|
||||
import sys, os
|
||||
from datetime import date
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
# -- General configuration -----------------------------------------------------
|
||||
|
||||
# If your documentation needs a minimal Sphinx version, state it here.
|
||||
#needs_sphinx = '1.0'
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||
extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.todo']
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# The suffix of source filenames.
|
||||
source_suffix = '.rst'
|
||||
|
||||
# The encoding of source files.
|
||||
#source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'BOLT'
|
||||
copyright = u'2015-%d, BOLT team' % date.today().year
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#language = None
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
#today = ''
|
||||
# Else, today_fmt is used as the format for a strftime call.
|
||||
today_fmt = '%Y-%m-%d'
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = ['_build']
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all documents.
|
||||
#default_role = None
|
||||
|
||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||
#add_function_parentheses = True
|
||||
|
||||
# If true, the current module name will be prepended to all description
|
||||
# unit titles (such as .. function::).
|
||||
#add_module_names = True
|
||||
|
||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||
# output. They are ignored by default.
|
||||
show_authors = True
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'friendly'
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
#modindex_common_prefix = []
|
||||
|
||||
|
||||
# -- Options for HTML output ---------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
html_theme = 'haiku'
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
#html_theme_options = {}
|
||||
|
||||
# Add any paths that contain custom themes here, relative to this directory.
|
||||
html_theme_path = ["."]
|
||||
|
||||
# The name for this set of Sphinx documents. If None, it defaults to
|
||||
# "<project> v<release> documentation".
|
||||
#html_title = None
|
||||
|
||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||
#html_short_title = None
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
#html_logo = None
|
||||
|
||||
# If given, this must be the name of an image file (path relative to the
|
||||
# configuration directory) that is the favicon of the docs. Modern browsers use
|
||||
# this as icon for tabs, windows and bookmarks. It should be a Windows-style
|
||||
# icon file (.ico), which is 16x16 or 32x32 pixels large. Default: None. The
|
||||
# image file will be copied to the _static directory of the output HTML, but
|
||||
# only if the file does not already exist there.
|
||||
html_favicon = '_static/favicon.ico'
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
|
||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||
# using the given strftime format.
|
||||
html_last_updated_fmt = '%Y-%m-%d'
|
||||
|
||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||
# typographically correct entities.
|
||||
#html_use_smartypants = True
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
html_sidebars = {'index': ['indexsidebar.html']}
|
||||
|
||||
# Additional templates that should be rendered to pages, maps page names to
|
||||
# template names.
|
||||
# html_additional_pages = {'index': 'index.html'}
|
||||
|
||||
# If false, no module index is generated.
|
||||
#html_domain_indices = True
|
||||
|
||||
# If false, no index is generated.
|
||||
#html_use_index = True
|
||||
|
||||
# If true, the index is split into individual pages for each letter.
|
||||
#html_split_index = False
|
||||
|
||||
# If true, links to the reST sources are added to the pages.
|
||||
html_show_sourcelink = True
|
||||
|
||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||
#html_show_sphinx = True
|
||||
|
||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
||||
#html_show_copyright = True
|
||||
|
||||
# If true, an OpenSearch description file will be output, and all pages will
|
||||
# contain a <link> tag referring to it. The value of this option must be the
|
||||
# base URL from which the finished HTML is served.
|
||||
#html_use_opensearch = ''
|
||||
|
||||
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
||||
#html_file_suffix = None
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'boltdoc'
|
||||
|
||||
|
||||
# -- Options for LaTeX output --------------------------------------------------
|
||||
|
||||
latex_elements = {
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
#'papersize': 'letterpaper',
|
||||
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#'pointsize': '10pt',
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#'preamble': '',
|
||||
}
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title, author, documentclass [howto/manual]).
|
||||
latex_documents = [
|
||||
('contents', 'bolt.tex', u'BOLT Documentation',
|
||||
u'LLVM project', 'manual'),
|
||||
]
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top of
|
||||
# the title page.
|
||||
#latex_logo = None
|
||||
|
||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||
# not chapters.
|
||||
#latex_use_parts = False
|
||||
|
||||
# If true, show page references after internal links.
|
||||
#latex_show_pagerefs = False
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#latex_show_urls = False
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#latex_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#latex_domain_indices = True
|
||||
|
||||
|
||||
# -- Options for manual page output --------------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
('contents', 'bolt', u'BOLT Documentation',
|
||||
[u'LLVM project'], 1)
|
||||
]
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#man_show_urls = False
|
||||
|
||||
|
||||
# -- Options for Texinfo output ------------------------------------------------
|
||||
|
||||
# Grouping the document tree into Texinfo files. List of tuples
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
('contents', 'BOLT', u'BOLT Documentation',
|
||||
u'LLVM project', 'BOLT', 'Binary Optimization and Layout Tool',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#texinfo_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#texinfo_domain_indices = True
|
||||
|
||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||
#texinfo_show_urls = 'footnote'
|
||||
|
||||
|
||||
# FIXME: Define intersphinx configuration.
|
||||
intersphinx_mapping = {}
|
||||
|
||||
|
||||
# -- Options for extensions ----------------------------------------------------
|
||||
|
||||
# Enable this if you want TODOs to show up in the generated documentation.
|
||||
todo_include_todos = True
|
257
bolt/docs/index.rst
Normal file
257
bolt/docs/index.rst
Normal file
@ -0,0 +1,257 @@
|
||||
BOLT
|
||||
====
|
||||
|
||||
BOLT is a post-link optimizer developed to speed up large applications.
|
||||
It achieves the improvements by optimizing application’s code layout
|
||||
based on execution profile gathered by sampling profiler, such as Linux
|
||||
``perf`` tool. An overview of the ideas implemented in BOLT along with a
|
||||
discussion of its potential and current results is available in `CGO’19
|
||||
paper <https://research.fb.com/publications/bolt-a-practical-binary-optimizer-for-data-centers-and-beyond/>`__.
|
||||
|
||||
Input Binary Requirements
|
||||
-------------------------
|
||||
|
||||
BOLT operates on X86-64 and AArch64 ELF binaries. At the minimum, the
|
||||
binaries should have an unstripped symbol table, and, to get maximum
|
||||
performance gains, they should be linked with relocations
|
||||
(``--emit-relocs`` or ``-q`` linker flag).
|
||||
|
||||
BOLT disassembles functions and reconstructs the control flow graph
|
||||
(CFG) before it runs optimizations. Since this is a nontrivial task,
|
||||
especially when indirect branches are present, we rely on certain
|
||||
heuristics to accomplish it. These heuristics have been tested on a code
|
||||
generated with Clang and GCC compilers. The main requirement for C/C++
|
||||
code is not to rely on code layout properties, such as function pointer
|
||||
deltas. Assembly code can be processed too. Requirements for it include
|
||||
a clear separation of code and data, with data objects being placed into
|
||||
data sections/segments. If indirect jumps are used for intra-function
|
||||
control transfer (e.g., jump tables), the code patterns should be
|
||||
matching those generated by Clang/GCC.
|
||||
|
||||
NOTE: BOLT is currently incompatible with the
|
||||
``-freorder-blocks-and-partition`` compiler option. Since GCC8 enables
|
||||
this option by default, you have to explicitly disable it by adding
|
||||
``-fno-reorder-blocks-and-partition`` flag if you are compiling with
|
||||
GCC8 or above.
|
||||
|
||||
NOTE2: DWARF v5 is the new debugging format generated by the latest LLVM
|
||||
and GCC compilers. It offers several benefits over the previous DWARF
|
||||
v4. Currently, the support for v5 is a work in progress for BOLT. While
|
||||
you will be able to optimize binaries produced by the latest compilers,
|
||||
until the support is complete, you will not be able to update the debug
|
||||
info with ``-update-debug-sections``. To temporarily work around the
|
||||
issue, we recommend compiling binaries with ``-gdwarf-4`` option that
|
||||
forces DWARF v4 output.
|
||||
|
||||
PIE and .so support has been added recently. Please report bugs if you
|
||||
encounter any issues.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
Docker Image
|
||||
~~~~~~~~~~~~
|
||||
|
||||
You can build and use the docker image containing BOLT using our `docker
|
||||
file <utils/docker/Dockerfile>`__. Alternatively, you can build BOLT
|
||||
manually using the steps below.
|
||||
|
||||
Manual Build
|
||||
~~~~~~~~~~~~
|
||||
|
||||
BOLT heavily uses LLVM libraries, and by design, it is built as one of
|
||||
LLVM tools. The build process is not much different from a regular LLVM
|
||||
build. The following instructions are assuming that you are running
|
||||
under Linux.
|
||||
|
||||
Start with cloning LLVM repo:
|
||||
|
||||
::
|
||||
|
||||
> git clone https://github.com/llvm/llvm-project.git
|
||||
> mkdir build
|
||||
> cd build
|
||||
> cmake -G Ninja ../llvm-project/llvm -DLLVM_TARGETS_TO_BUILD="X86;AArch64" -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_ENABLE_PROJECTS="bolt"
|
||||
> ninja bolt
|
||||
|
||||
``llvm-bolt`` will be available under ``bin/``. Add this directory to
|
||||
your path to ensure the rest of the commands in this tutorial work.
|
||||
|
||||
Optimizing BOLT’s Performance
|
||||
-----------------------------
|
||||
|
||||
BOLT runs many internal passes in parallel. If you foresee heavy usage
|
||||
of BOLT, you can improve the processing time by linking against one of
|
||||
memory allocation libraries with good support for concurrency. E.g. to
|
||||
use jemalloc:
|
||||
|
||||
::
|
||||
|
||||
> sudo yum install jemalloc-devel
|
||||
> LD_PRELOAD=/usr/lib64/libjemalloc.so llvm-bolt ....
|
||||
|
||||
Or if you rather use tcmalloc:
|
||||
|
||||
::
|
||||
|
||||
> sudo yum install gperftools-devel
|
||||
> LD_PRELOAD=/usr/lib64/libtcmalloc_minimal.so llvm-bolt ....
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
For a complete practical guide of using BOLT see `Optimizing Clang with
|
||||
BOLT <docs/OptimizingClang.md>`__.
|
||||
|
||||
Step 0
|
||||
~~~~~~
|
||||
|
||||
In order to allow BOLT to re-arrange functions (in addition to
|
||||
re-arranging code within functions) in your program, it needs a little
|
||||
help from the linker. Add ``--emit-relocs`` to the final link step of
|
||||
your application. You can verify the presence of relocations by checking
|
||||
for ``.rela.text`` section in the binary. BOLT will also report if it
|
||||
detects relocations while processing the binary.
|
||||
|
||||
Step 1: Collect Profile
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This step is different for different kinds of executables. If you can
|
||||
invoke your program to run on a representative input from a command
|
||||
line, then check **For Applications** section below. If your program
|
||||
typically runs as a server/service, then skip to **For Services**
|
||||
section.
|
||||
|
||||
The version of ``perf`` command used for the following steps has to
|
||||
support ``-F brstack`` option. We recommend using ``perf`` version 4.5
|
||||
or later.
|
||||
|
||||
For Applications
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
This assumes you can run your program from a command line with a typical
|
||||
input. In this case, simply prepend the command line invocation with
|
||||
``perf``:
|
||||
|
||||
::
|
||||
|
||||
$ perf record -e cycles:u -j any,u -o perf.data -- <executable> <args> ...
|
||||
|
||||
For Services
|
||||
^^^^^^^^^^^^
|
||||
|
||||
Once you get the service deployed and warmed-up, it is time to collect
|
||||
perf data with LBR (branch information). The exact perf command to use
|
||||
will depend on the service. E.g., to collect the data for all processes
|
||||
running on the server for the next 3 minutes use:
|
||||
|
||||
::
|
||||
|
||||
$ perf record -e cycles:u -j any,u -a -o perf.data -- sleep 180
|
||||
|
||||
Depending on the application, you may need more samples to be included
|
||||
with your profile. It’s hard to tell upfront what would be a sweet spot
|
||||
for your application. We recommend the profile to cover 1B instructions
|
||||
as reported by BOLT ``-dyno-stats`` option. If you need to increase the
|
||||
number of samples in the profile, you can either run the ``sleep``
|
||||
command for longer and use ``-F<N>`` option with ``perf`` to increase
|
||||
sampling frequency.
|
||||
|
||||
Note that for profile collection we recommend using cycle events and not
|
||||
``BR_INST_RETIRED.*``. Empirically we found it to produce better
|
||||
results.
|
||||
|
||||
If the collection of a profile with branches is not available, e.g.,
|
||||
when you run on a VM or on hardware that does not support it, then you
|
||||
can use only sample events, such as cycles. In this case, the quality of
|
||||
the profile information would not be as good, and performance gains with
|
||||
BOLT are expected to be lower.
|
||||
|
||||
With instrumentation
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
If perf record is not available to you, you may collect profile by first
|
||||
instrumenting the binary with BOLT and then running it.
|
||||
|
||||
::
|
||||
|
||||
llvm-bolt <executable> -instrument -o <instrumented-executable>
|
||||
|
||||
After you run instrumented-executable with the desired workload, its
|
||||
BOLT profile should be ready for you in ``/tmp/prof.fdata`` and you can
|
||||
skip **Step 2**.
|
||||
|
||||
Run BOLT with the ``-help`` option and check the category “BOLT
|
||||
instrumentation options” for a quick reference on instrumentation knobs.
|
||||
|
||||
Step 2: Convert Profile to BOLT Format
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
NOTE: you can skip this step and feed ``perf.data`` directly to BOLT
|
||||
using experimental ``-p perf.data`` option.
|
||||
|
||||
For this step, you will need ``perf.data`` file collected from the
|
||||
previous step and a copy of the binary that was running. The binary has
|
||||
to be either unstripped, or should have a symbol table intact (i.e.,
|
||||
running ``strip -g`` is okay).
|
||||
|
||||
Make sure ``perf`` is in your ``PATH``, and execute ``perf2bolt``:
|
||||
|
||||
::
|
||||
|
||||
$ perf2bolt -p perf.data -o perf.fdata <executable>
|
||||
|
||||
This command will aggregate branch data from ``perf.data`` and store it
|
||||
in a format that is both more compact and more resilient to binary
|
||||
modifications.
|
||||
|
||||
If the profile was collected without LBRs, you will need to add ``-nl``
|
||||
flag to the command line above.
|
||||
|
||||
Step 3: Optimize with BOLT
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Once you have ``perf.fdata`` ready, you can use it for optimizations
|
||||
with BOLT. Assuming your environment is setup to include the right path,
|
||||
execute ``llvm-bolt``:
|
||||
|
||||
::
|
||||
|
||||
$ llvm-bolt <executable> -o <executable>.bolt -data=perf.fdata -reorder-blocks=ext-tsp -reorder-functions=hfsort -split-functions -split-all-cold -split-eh -dyno-stats
|
||||
|
||||
If you do need an updated debug info, then add
|
||||
``-update-debug-sections`` option to the command above. The processing
|
||||
time will be slightly longer.
|
||||
|
||||
For a full list of options see ``-help``/``-help-hidden`` output.
|
||||
|
||||
The input binary for this step does not have to 100% match the binary
|
||||
used for profile collection in **Step 1**. This could happen when you
|
||||
are doing active development, and the source code constantly changes,
|
||||
yet you want to benefit from profile-guided optimizations. However,
|
||||
since the binary is not precisely the same, the profile information
|
||||
could become invalid or stale, and BOLT will report the number of
|
||||
functions with a stale profile. The higher the number, the less
|
||||
performance improvement should be expected. Thus, it is crucial to
|
||||
update ``.fdata`` for release branches.
|
||||
|
||||
Multiple Profiles
|
||||
-----------------
|
||||
|
||||
Suppose your application can run in different modes, and you can
|
||||
generate multiple profiles for each one of them. To generate a single
|
||||
binary that can benefit all modes (assuming the profiles don’t
|
||||
contradict each other) you can use ``merge-fdata`` tool:
|
||||
|
||||
::
|
||||
|
||||
$ merge-fdata *.fdata > combined.fdata
|
||||
|
||||
Use ``combined.fdata`` for **Step 3** above to generate a universally
|
||||
optimized binary.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
BOLT is licensed under the `Apache License v2.0 with LLVM
|
||||
Exceptions <./LICENSE.TXT>`__.
|
Loading…
x
Reference in New Issue
Block a user