[clang-format] Add an option to format integer literal separators

Closes #58949.

Differential Revision: https://reviews.llvm.org/D140543
This commit is contained in:
Owen Pan 2022-12-22 01:21:17 -08:00
parent bbe402b33a
commit 46c94e5067
9 changed files with 552 additions and 0 deletions

View File

@ -3159,6 +3159,37 @@ the configuration (without a prefix: ``Auto``).
**IntegerLiteralSeparator** (``IntegerLiteralSeparatorStyle``) :versionbadge:`clang-format 16`
Format integer literal separators (``'`` for C++ and ``_`` for C#, Java,
and JavaScript).
Nested configuration flags:
Separator format of integer literals of different bases.
<0: Remove separators.
0: Leave the literal as is.
>0: Insert separators between digits, starting from the rightmost digit.
* ``int8_t Binary`` .. code-block:: c++
-1: 0b100111101101
0: 0b10011'11'0110'1
3: 0b100'111'101'101
4: 0b1001'1110'1101
* ``int8_t Decimal`` .. code-block:: c++
-1: 18446744073709550592ull
0: 184467'440737'0'95505'92ull
3: 18'446'744'073'709'550'592ull
* ``int8_t Hex`` .. code-block:: c++
-1: 0xDEADBEEFDEADBEEFuz
0: 0xDEAD'BEEF'DE'AD'BEE'Fuz
2: 0xDE'AD'BE'EF'DE'AD'BE'EFuz
**JavaImportGroups** (``List of Strings``) :versionbadge:`clang-format 8`
A vector of prefixes ordered by the desired groups for Java imports.

View File

@ -867,6 +867,8 @@ clang-format
- Add ``RequiresExpressionIndentation`` option for configuring the alignment of requires-expressions.
The default value of this option is ``OuterScope``, which differs in behavior from clang-format 15.
To match the default behavior of clang-format 15, use the ``Keyword`` value.
- Add ``IntegerLiteralSeparator`` option for fixing integer literal separators
in C++, C#, Java, and JavaScript.
clang-extdef-mapping
--------------------

View File

@ -2450,6 +2450,37 @@ struct FormatStyle {
/// \version 11
TrailingCommaStyle InsertTrailingCommas;
/// Separator format of integer literals of different bases.
/// <0: Remove separators.
/// 0: Leave the literal as is.
/// >0: Insert separators between digits, starting from the rightmost digit.
struct IntegerLiteralSeparatorStyle {
/// \code
/// -1: 0b100111101101
/// 0: 0b10011'11'0110'1
/// 3: 0b100'111'101'101
/// 4: 0b1001'1110'1101
/// \endcode
int8_t Binary;
/// \code
/// -1: 18446744073709550592ull
/// 0: 184467'440737'0'95505'92ull
/// 3: 18'446'744'073'709'550'592ull
/// \endcode
int8_t Decimal;
/// \code
/// -1: 0xDEADBEEFDEADBEEFuz
/// 0: 0xDEAD'BEEF'DE'AD'BEE'Fuz
/// 2: 0xDE'AD'BE'EF'DE'AD'BE'EFuz
/// \endcode
int8_t Hex;
};
/// Format integer literal separators (``'`` for C++ and ``_`` for C#, Java,
/// and JavaScript).
/// \version 16
IntegerLiteralSeparatorStyle IntegerLiteralSeparator;
/// A vector of prefixes ordered by the desired groups for Java imports.
///
/// One group's prefix can be a subset of another - the longest prefix is
@ -4089,6 +4120,10 @@ struct FormatStyle {
IndentWidth == R.IndentWidth &&
IndentWrappedFunctionNames == R.IndentWrappedFunctionNames &&
InsertBraces == R.InsertBraces &&
IntegerLiteralSeparator.Binary == R.IntegerLiteralSeparator.Binary &&
IntegerLiteralSeparator.Decimal ==
R.IntegerLiteralSeparator.Decimal &&
IntegerLiteralSeparator.Hex == R.IntegerLiteralSeparator.Hex &&
JavaImportGroups == R.JavaImportGroups &&
JavaScriptQuotes == R.JavaScriptQuotes &&
JavaScriptWrapImports == R.JavaScriptWrapImports &&

View File

@ -8,6 +8,7 @@ add_clang_library(clangFormat
Format.cpp
FormatToken.cpp
FormatTokenLexer.cpp
IntegerLiteralSeparatorFixer.cpp
MacroCallReconstructor.cpp
MacroExpander.cpp
NamespaceEndCommentsFixer.cpp

View File

@ -20,6 +20,7 @@
#include "FormatInternal.h"
#include "FormatToken.h"
#include "FormatTokenLexer.h"
#include "IntegerLiteralSeparatorFixer.h"
#include "NamespaceEndCommentsFixer.h"
#include "QualifierAlignmentFixer.h"
#include "SortJavaScriptImports.h"
@ -335,6 +336,14 @@ struct ScalarEnumerationTraits<FormatStyle::IndentExternBlockStyle> {
}
};
template <> struct MappingTraits<FormatStyle::IntegerLiteralSeparatorStyle> {
static void mapping(IO &IO, FormatStyle::IntegerLiteralSeparatorStyle &Base) {
IO.mapOptional("Binary", Base.Binary);
IO.mapOptional("Decimal", Base.Decimal);
IO.mapOptional("Hex", Base.Hex);
}
};
template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
@ -881,6 +890,7 @@ template <> struct MappingTraits<FormatStyle> {
Style.IndentWrappedFunctionNames);
IO.mapOptional("InsertBraces", Style.InsertBraces);
IO.mapOptional("InsertTrailingCommas", Style.InsertTrailingCommas);
IO.mapOptional("IntegerLiteralSeparator", Style.IntegerLiteralSeparator);
IO.mapOptional("JavaImportGroups", Style.JavaImportGroups);
IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports);
@ -1335,6 +1345,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
LLVMStyle.IndentWrappedFunctionNames = false;
LLVMStyle.InsertBraces = false;
LLVMStyle.InsertTrailingCommas = FormatStyle::TCS_None;
LLVMStyle.IntegerLiteralSeparator = {/*Binary=*/0, /*Decimal=*/0, /*Hex=*/0};
LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
LLVMStyle.JavaScriptWrapImports = true;
LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
@ -3391,6 +3402,10 @@ reformat(const FormatStyle &Style, StringRef Code,
AnalyzerPass;
SmallVector<AnalyzerPass, 8> Passes;
Passes.emplace_back([&](const Environment &Env) {
return IntegerLiteralSeparatorFixer().process(Env, Expanded);
});
if (Style.isCpp()) {
if (Style.QualifierAlignment != FormatStyle::QAS_Leave) {
Passes.emplace_back([&](const Environment &Env) {

View File

@ -0,0 +1,201 @@
//===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
/// literal separators.
///
//===----------------------------------------------------------------------===//
#include "IntegerLiteralSeparatorFixer.h"
namespace clang {
namespace format {
enum class Base { Binary, Decimal, Hex, Other };
static Base getBase(const StringRef IntegerLiteral) {
assert(IntegerLiteral.size() > 1);
if (IntegerLiteral[0] > '0') {
assert(IntegerLiteral[0] <= '9');
return Base::Decimal;
}
assert(IntegerLiteral[0] == '0');
switch (IntegerLiteral[1]) {
case 'b':
case 'B':
return Base::Binary;
case 'x':
case 'X':
return Base::Hex;
default:
return Base::Other;
}
}
std::pair<tooling::Replacements, unsigned>
IntegerLiteralSeparatorFixer::process(const Environment &Env,
const FormatStyle &Style) {
switch (Style.Language) {
case FormatStyle::LK_Cpp:
case FormatStyle::LK_ObjC:
Separator = '\'';
break;
case FormatStyle::LK_CSharp:
case FormatStyle::LK_Java:
case FormatStyle::LK_JavaScript:
Separator = '_';
break;
default:
return {};
}
const auto &Option = Style.IntegerLiteralSeparator;
const auto Binary = Option.Binary;
const auto Decimal = Option.Decimal;
const auto Hex = Option.Hex;
const bool SkipBinary = Binary == 0;
const bool SkipDecimal = Decimal == 0;
const bool SkipHex = Hex == 0;
if (SkipBinary && SkipDecimal && SkipHex)
return {};
const auto ID = Env.getFileID();
const auto &SourceMgr = Env.getSourceManager();
AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
std::unique_ptr<Lexer> Lex;
Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr,
getFormattingLangOpts(Style)));
Lex->SetCommentRetentionState(true);
Token Tok;
Lex->LexFromRawLexer(Tok);
tooling::Replacements Result;
for (bool Skip = false; Tok.isNot(tok::eof); Lex->LexFromRawLexer(Tok)) {
auto Length = Tok.getLength();
if (Length < 2)
continue;
auto Location = Tok.getLocation();
auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
if (Tok.is(tok::comment)) {
if (Text == "// clang-format off" || Text == "/* clang-format off */")
Skip = true;
else if (Text == "// clang-format on" || Text == "/* clang-format on */")
Skip = false;
continue;
}
if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
!AffectedRangeMgr.affectsCharSourceRange(CharSourceRange::getCharRange(
Location, Location.getLocWithOffset(Length)))) {
continue;
}
const auto B = getBase(Text);
const bool IsBase2 = B == Base::Binary;
const bool IsBase10 = B == Base::Decimal;
const bool IsBase16 = B == Base::Hex;
if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
(IsBase16 && SkipHex) || B == Base::Other) {
continue;
}
if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
(IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
continue;
}
if (((IsBase2 && Binary < 0) || (IsBase10 && Decimal < 0) ||
(IsBase16 && Hex < 0)) &&
Text.find(Separator) == StringRef::npos) {
continue;
}
const auto Start = Text[0] == '0' ? 2 : 0;
auto End = Text.find_first_of("uUlLzZn");
if (End == StringRef::npos)
End = Length;
if (Start > 0 || End < Length) {
Length = End - Start;
Text = Text.substr(Start, Length);
}
auto DigitsPerGroup = Decimal;
if (IsBase2)
DigitsPerGroup = Binary;
else if (IsBase16)
DigitsPerGroup = Hex;
if (DigitsPerGroup > 0 && checkSeparator(Text, DigitsPerGroup))
continue;
if (Start > 0)
Location = Location.getLocWithOffset(Start);
cantFail(Result.add(tooling::Replacement(SourceMgr, Location, Length,
format(Text, DigitsPerGroup))));
}
return {Result, 0};
}
bool IntegerLiteralSeparatorFixer::checkSeparator(
const StringRef IntegerLiteral, int DigitsPerGroup) const {
assert(DigitsPerGroup > 0);
int I = 0;
for (auto C : llvm::reverse(IntegerLiteral)) {
if (C == Separator) {
if (I < DigitsPerGroup)
return false;
I = 0;
} else {
++I;
if (I == DigitsPerGroup)
return false;
}
}
return true;
}
std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
int DigitsPerGroup) const {
assert(DigitsPerGroup != 0);
std::string Formatted;
if (DigitsPerGroup < 0) {
for (auto C : IntegerLiteral)
if (C != Separator)
Formatted.push_back(C);
return Formatted;
}
int DigitCount = 0;
for (auto C : IntegerLiteral)
if (C != Separator)
++DigitCount;
int Remainder = DigitCount % DigitsPerGroup;
int I = 0;
for (auto C : IntegerLiteral) {
if (C == Separator)
continue;
if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
Formatted.push_back(Separator);
I = 0;
Remainder = 0;
}
Formatted.push_back(C);
++I;
}
return Formatted;
}
} // namespace format
} // namespace clang

View File

@ -0,0 +1,38 @@
//===--- IntegerLiteralSeparatorFixer.h -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file declares IntegerLiteralSeparatorFixer that fixes C++ integer
/// literal separators.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H
#define LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H
#include "TokenAnalyzer.h"
namespace clang {
namespace format {
class IntegerLiteralSeparatorFixer {
public:
std::pair<tooling::Replacements, unsigned> process(const Environment &Env,
const FormatStyle &Style);
private:
bool checkSeparator(const StringRef IntegerLiteral, int DigitsPerGroup) const;
std::string format(const StringRef IntegerLiteral, int DigitsPerGroup) const;
char Separator;
};
} // end namespace format
} // end namespace clang
#endif

View File

@ -21,6 +21,7 @@ add_clang_unittest(FormatTests
FormatTestTableGen.cpp
FormatTestTextProto.cpp
FormatTestVerilog.cpp
IntegerLiteralSeparatorTest.cpp
MacroCallReconstructorTest.cpp
MacroExpanderTest.cpp
NamespaceEndCommentsFixerTest.cpp

View File

@ -0,0 +1,228 @@
//===- unittest/Format/IntegerLiteralSeparatorTest.cpp --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Format/Format.h"
#include "../Tooling/ReplacementTest.h"
#include "FormatTestUtils.h"
#define DEBUG_TYPE "integer-literal-separator-test"
namespace clang {
namespace format {
namespace {
// TODO:
// Refactor the class declaration, which is copied from BracesInserterTest.cpp.
class IntegerLiteralSeparatorTest : public ::testing::Test {
protected:
std::string format(llvm::StringRef Code, const FormatStyle &Style,
const std::vector<tooling::Range> &Ranges) {
LLVM_DEBUG(llvm::errs() << "---\n");
LLVM_DEBUG(llvm::errs() << Code << "\n\n");
auto NonEmptyRanges = Ranges;
if (Ranges.empty())
NonEmptyRanges = {1, tooling::Range(0, Code.size())};
FormattingAttemptStatus Status;
tooling::Replacements Replaces =
reformat(Style, Code, NonEmptyRanges, "<stdin>", &Status);
EXPECT_EQ(true, Status.FormatComplete) << Code << "\n\n";
ReplacementCount = Replaces.size();
auto Result = applyAllReplacements(Code, Replaces);
EXPECT_TRUE(static_cast<bool>(Result));
LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n");
return *Result;
}
void _verifyFormat(const char *File, int Line, llvm::StringRef Expected,
llvm::StringRef Code,
const FormatStyle &Style = getLLVMStyle(),
const std::vector<tooling::Range> &Ranges = {}) {
testing::ScopedTrace t(File, Line, ::testing::Message() << Code.str());
EXPECT_EQ(Expected.str(), format(Expected, Style, Ranges))
<< "Expected code is not stable";
EXPECT_EQ(Expected.str(), format(Code, Style, Ranges));
if (Style.Language == FormatStyle::LK_Cpp && Ranges.empty()) {
// Objective-C++ is a superset of C++, so everything checked for C++
// needs to be checked for Objective-C++ as well.
FormatStyle ObjCStyle = Style;
ObjCStyle.Language = FormatStyle::LK_ObjC;
EXPECT_EQ(Expected.str(), format(test::messUp(Code), ObjCStyle, Ranges));
}
}
void _verifyFormat(const char *File, int Line, llvm::StringRef Code,
const FormatStyle &Style = getLLVMStyle(),
const std::vector<tooling::Range> &Ranges = {}) {
_verifyFormat(File, Line, Code, Code, Style, Ranges);
}
int ReplacementCount;
};
#define verifyFormat(...) _verifyFormat(__FILE__, __LINE__, __VA_ARGS__)
TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
FormatStyle Style = getLLVMStyle();
EXPECT_EQ(Style.Language, FormatStyle::LK_Cpp);
EXPECT_EQ(Style.IntegerLiteralSeparator.Binary, 0);
EXPECT_EQ(Style.IntegerLiteralSeparator.Decimal, 0);
EXPECT_EQ(Style.IntegerLiteralSeparator.Hex, 0);
const StringRef Binary("b = 0b10011'11'0110'1u;");
verifyFormat(Binary, Style);
Style.IntegerLiteralSeparator.Binary = -1;
verifyFormat("b = 0b100111101101u;", Binary, Style);
Style.IntegerLiteralSeparator.Binary = 1;
verifyFormat("b = 0b1'0'0'1'1'1'1'0'1'1'0'1u;", Binary, Style);
Style.IntegerLiteralSeparator.Binary = 4;
verifyFormat("b = 0b1001'1110'1101u;", Binary, Style);
const StringRef Decimal("d = 184467'440737'0'95505'92Ull;");
verifyFormat(Decimal, Style);
Style.IntegerLiteralSeparator.Decimal = -1;
verifyFormat("d = 18446744073709550592Ull;", Decimal, Style);
Style.IntegerLiteralSeparator.Decimal = 3;
verifyFormat("d = 18'446'744'073'709'550'592Ull;", Decimal, Style);
const StringRef Hex("h = 0xDEAD'BEEF'DE'AD'BEE'Fuz;");
verifyFormat(Hex, Style);
Style.IntegerLiteralSeparator.Hex = -1;
verifyFormat("h = 0xDEADBEEFDEADBEEFuz;", Hex, Style);
Style.IntegerLiteralSeparator.Hex = 2;
verifyFormat("h = 0xDE'AD'BE'EF'DE'AD'BE'EFuz;", Hex, Style);
verifyFormat("o0 = 0;\n"
"o1 = 07;\n"
"o5 = 012345",
Style);
}
TEST_F(IntegerLiteralSeparatorTest, UnderscoreAsSeparator) {
FormatStyle Style = getLLVMStyle();
const StringRef Binary("B = 0B10011_11_0110_1;");
const StringRef Decimal("d = 184467_440737_0_95505_92;");
const StringRef Hex("H = 0XDEAD_BEEF_DE_AD_BEE_F;");
auto TestUnderscore = [&](auto Language) {
Style.Language = Language;
Style.IntegerLiteralSeparator.Binary = 0;
verifyFormat(Binary, Style);
Style.IntegerLiteralSeparator.Binary = -1;
verifyFormat("B = 0B100111101101;", Binary, Style);
Style.IntegerLiteralSeparator.Binary = 4;
verifyFormat("B = 0B1001_1110_1101;", Binary, Style);
Style.IntegerLiteralSeparator.Decimal = 0;
verifyFormat(Decimal, Style);
Style.IntegerLiteralSeparator.Decimal = -1;
verifyFormat("d = 18446744073709550592;", Decimal, Style);
Style.IntegerLiteralSeparator.Decimal = 3;
verifyFormat("d = 18_446_744_073_709_550_592;", Decimal, Style);
Style.IntegerLiteralSeparator.Hex = 0;
verifyFormat(Hex, Style);
Style.IntegerLiteralSeparator.Hex = -1;
verifyFormat("H = 0XDEADBEEFDEADBEEF;", Hex, Style);
Style.IntegerLiteralSeparator.Hex = 2;
verifyFormat("H = 0XDE_AD_BE_EF_DE_AD_BE_EF;", Hex, Style);
};
TestUnderscore(FormatStyle::LK_CSharp);
TestUnderscore(FormatStyle::LK_Java);
TestUnderscore(FormatStyle::LK_JavaScript);
verifyFormat("d = 9_007_199_254_740_995n;", Style);
verifyFormat("d = 9_007_199_254_740_995n;", "d = 9007199254740995n;", Style);
Style.IntegerLiteralSeparator.Binary = 8;
verifyFormat(
"b = 0b100000_00000000_00000000_00000000_00000000_00000000_00000011n;",
"b = 0b100000000000000000000000000000000000000000000000000011n;", Style);
verifyFormat("h = 0x20_00_00_00_00_00_03n;", Style);
verifyFormat("h = 0x20_00_00_00_00_00_03n;", "h = 0x20000000000003n;", Style);
verifyFormat("o = 0o400000000000000003n;", Style);
}
TEST_F(IntegerLiteralSeparatorTest, FixRanges) {
FormatStyle Style = getLLVMStyle();
Style.IntegerLiteralSeparator.Decimal = 3;
const StringRef Code("i = -12'34;\n"
"// clang-format off\n"
"j = 123'4;\n"
"// clang-format on\n"
"k = +1'23'4;");
const StringRef Expected("i = -1'234;\n"
"// clang-format off\n"
"j = 123'4;\n"
"// clang-format on\n"
"k = +1'234;");
verifyFormat(Expected, Code, Style);
verifyFormat("i = -1'234;\n"
"// clang-format off\n"
"j = 123'4;\n"
"// clang-format on\n"
"k = +1'23'4;",
Code, Style, {tooling::Range(0, 11)}); // line 1
verifyFormat(Code, Style, {tooling::Range(32, 10)}); // line 3
verifyFormat("i = -12'34;\n"
"// clang-format off\n"
"j = 123'4;\n"
"// clang-format on\n"
"k = +1'234;",
Code, Style, {tooling::Range(61, 12)}); // line 5
verifyFormat(Expected, Code, Style,
{tooling::Range(0, 11), tooling::Range(61, 12)}); // lines 1, 5
}
TEST_F(IntegerLiteralSeparatorTest, FloatingPoint) {
FormatStyle Style = getLLVMStyle();
Style.IntegerLiteralSeparator.Decimal = 3;
Style.IntegerLiteralSeparator.Hex = 2;
verifyFormat("d0 = .0;\n"
"d1 = 0.;\n"
"y = 7890.;\n"
"E = 3456E2;\n"
"p = 0xABCp2;",
Style);
Style.Language = FormatStyle::LK_JavaScript;
verifyFormat("y = 7890.;\n"
"e = 3456e2;",
Style);
Style.Language = FormatStyle::LK_Java;
verifyFormat("y = 7890.;\n"
"E = 3456E2;\n"
"P = 0xABCP2;\n"
"f = 1234f;\n"
"D = 5678D;",
Style);
Style.Language = FormatStyle::LK_CSharp;
verifyFormat("y = 7890.;\n"
"e = 3456e2;\n"
"F = 1234F;\n"
"d = 5678d;\n"
"M = 9012M",
Style);
}
} // namespace
} // namespace format
} // namespace clang