mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-24 02:46:05 +00:00
[clangd] Introduce intermediate representation of formatted text
Summary: That can render to markdown or plain text. Used for findHover requests. Reviewers: malaperle, sammccall, kadircet Reviewed By: sammccall Subscribers: mgorny, MaskRay, jkorous, arphaman, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D58547 llvm-svn: 360151
This commit is contained in:
parent
4b81e9f8d1
commit
5b0872fcfd
@ -50,6 +50,7 @@ add_clang_library(clangDaemon
|
||||
FileDistance.cpp
|
||||
FS.cpp
|
||||
FSProvider.cpp
|
||||
FormattedString.cpp
|
||||
FuzzyMatch.cpp
|
||||
GlobalCompilationDatabase.cpp
|
||||
Headers.cpp
|
||||
|
173
clang-tools-extra/clangd/FormattedString.cpp
Normal file
173
clang-tools-extra/clangd/FormattedString.cpp
Normal file
@ -0,0 +1,173 @@
|
||||
//===--- FormattedString.cpp --------------------------------*- C++-*------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "FormattedString.h"
|
||||
#include "clang/Basic/CharInfo.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
namespace {
|
||||
/// Escape a markdown text block. Ensures the punctuation will not introduce
|
||||
/// any of the markdown constructs.
|
||||
static std::string renderText(llvm::StringRef Input) {
|
||||
// Escaping ASCII punctiation ensures we can't start a markdown construct.
|
||||
constexpr llvm::StringLiteral Punctuation =
|
||||
R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
|
||||
|
||||
std::string R;
|
||||
for (size_t From = 0; From < Input.size();) {
|
||||
size_t Next = Input.find_first_of(Punctuation, From);
|
||||
R += Input.substr(From, Next - From);
|
||||
if (Next == llvm::StringRef::npos)
|
||||
break;
|
||||
R += "\\";
|
||||
R += Input[Next];
|
||||
|
||||
From = Next + 1;
|
||||
}
|
||||
return R;
|
||||
}
|
||||
|
||||
/// Renders \p Input as an inline block of code in markdown. The returned value
|
||||
/// is surrounded by backticks and the inner contents are properly escaped.
|
||||
static std::string renderInlineBlock(llvm::StringRef Input) {
|
||||
std::string R;
|
||||
// Double all backticks to make sure we don't close the inline block early.
|
||||
for (size_t From = 0; From < Input.size();) {
|
||||
size_t Next = Input.find("`", From);
|
||||
R += Input.substr(From, Next - From);
|
||||
if (Next == llvm::StringRef::npos)
|
||||
break;
|
||||
R += "``"; // double the found backtick.
|
||||
|
||||
From = Next + 1;
|
||||
}
|
||||
// If results starts with a backtick, add spaces on both sides. The spaces
|
||||
// are ignored by markdown renderers.
|
||||
if (llvm::StringRef(R).startswith("`") || llvm::StringRef(R).endswith("`"))
|
||||
return "` " + std::move(R) + " `";
|
||||
// Markdown render should ignore first and last space if both are there. We
|
||||
// add an extra pair of spaces in that case to make sure we render what the
|
||||
// user intended.
|
||||
if (llvm::StringRef(R).startswith(" ") && llvm::StringRef(R).endswith(" "))
|
||||
return "` " + std::move(R) + " `";
|
||||
return "`" + std::move(R) + "`";
|
||||
}
|
||||
/// Render \p Input as markdown code block with a specified \p Language. The
|
||||
/// result is surrounded by >= 3 backticks. Although markdown also allows to use
|
||||
/// '~' for code blocks, they are never used.
|
||||
static std::string renderCodeBlock(llvm::StringRef Input,
|
||||
llvm::StringRef Language) {
|
||||
// Count the maximum number of consecutive backticks in \p Input. We need to
|
||||
// start and end the code block with more.
|
||||
unsigned MaxBackticks = 0;
|
||||
unsigned Backticks = 0;
|
||||
for (char C : Input) {
|
||||
if (C == '`') {
|
||||
++Backticks;
|
||||
continue;
|
||||
}
|
||||
MaxBackticks = std::max(MaxBackticks, Backticks);
|
||||
Backticks = 0;
|
||||
}
|
||||
MaxBackticks = std::max(Backticks, MaxBackticks);
|
||||
// Use the corresponding number of backticks to start and end a code block.
|
||||
std::string BlockMarker(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`');
|
||||
return BlockMarker + Language.str() + "\n" + Input.str() + "\n" + BlockMarker;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void FormattedString::appendText(std::string Text) {
|
||||
// We merge consecutive blocks of text to simplify the overall structure.
|
||||
if (Chunks.empty() || Chunks.back().Kind != ChunkKind::PlainText) {
|
||||
Chunk C;
|
||||
C.Kind = ChunkKind::PlainText;
|
||||
Chunks.push_back(C);
|
||||
}
|
||||
// FIXME: ensure there is a whitespace between the chunks.
|
||||
Chunks.back().Contents += Text;
|
||||
}
|
||||
|
||||
void FormattedString::appendCodeBlock(std::string Code, std::string Language) {
|
||||
Chunk C;
|
||||
C.Kind = ChunkKind::CodeBlock;
|
||||
C.Contents = std::move(Code);
|
||||
C.Language = std::move(Language);
|
||||
Chunks.push_back(std::move(C));
|
||||
}
|
||||
|
||||
void FormattedString::appendInlineCode(std::string Code) {
|
||||
Chunk C;
|
||||
C.Kind = ChunkKind::InlineCodeBlock;
|
||||
C.Contents = std::move(Code);
|
||||
Chunks.push_back(std::move(C));
|
||||
}
|
||||
|
||||
std::string FormattedString::renderAsMarkdown() const {
|
||||
std::string R;
|
||||
for (const auto &C : Chunks) {
|
||||
switch (C.Kind) {
|
||||
case ChunkKind::PlainText:
|
||||
R += renderText(C.Contents);
|
||||
continue;
|
||||
case ChunkKind::InlineCodeBlock:
|
||||
// Make sure we don't glue two backticks together.
|
||||
if (llvm::StringRef(R).endswith("`"))
|
||||
R += " ";
|
||||
R += renderInlineBlock(C.Contents);
|
||||
continue;
|
||||
case ChunkKind::CodeBlock:
|
||||
if (!R.empty() && !llvm::StringRef(R).endswith("\n"))
|
||||
R += "\n";
|
||||
R += renderCodeBlock(C.Contents, C.Language);
|
||||
R += "\n";
|
||||
continue;
|
||||
}
|
||||
llvm_unreachable("unhanlded ChunkKind");
|
||||
}
|
||||
return R;
|
||||
}
|
||||
|
||||
std::string FormattedString::renderAsPlainText() const {
|
||||
std::string R;
|
||||
auto EnsureWhitespace = [&]() {
|
||||
if (R.empty() || isWhitespace(R.back()))
|
||||
return;
|
||||
R += " ";
|
||||
};
|
||||
for (const auto &C : Chunks) {
|
||||
switch (C.Kind) {
|
||||
case ChunkKind::PlainText:
|
||||
EnsureWhitespace();
|
||||
R += C.Contents;
|
||||
continue;
|
||||
case ChunkKind::InlineCodeBlock:
|
||||
EnsureWhitespace();
|
||||
R += C.Contents;
|
||||
continue;
|
||||
case ChunkKind::CodeBlock:
|
||||
if (!R.empty())
|
||||
R += "\n\n";
|
||||
R += C.Contents;
|
||||
if (!llvm::StringRef(C.Contents).endswith("\n"))
|
||||
R += "\n";
|
||||
continue;
|
||||
}
|
||||
llvm_unreachable("unhanlded ChunkKind");
|
||||
}
|
||||
while (!R.empty() && isWhitespace(R.back()))
|
||||
R.pop_back();
|
||||
return R;
|
||||
}
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
57
clang-tools-extra/clangd/FormattedString.h
Normal file
57
clang-tools-extra/clangd/FormattedString.h
Normal file
@ -0,0 +1,57 @@
|
||||
//===--- FormattedString.h ----------------------------------*- C++-*------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// A simple intermediate representation of formatted text that could be
|
||||
// converted to plaintext or markdown.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H
|
||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
/// A structured string representation that could be converted to markdown or
|
||||
/// plaintext upon requrest.
|
||||
class FormattedString {
|
||||
public:
|
||||
/// Append plain text to the end of the string.
|
||||
void appendText(std::string Text);
|
||||
/// Append a block of C++ code. This translates to a ``` block in markdown.
|
||||
/// In a plain text representation, the code block will be surrounded by
|
||||
/// newlines.
|
||||
void appendCodeBlock(std::string Code, std::string Language = "cpp");
|
||||
/// Append an inline block of C++ code. This translates to the ` block in
|
||||
/// markdown.
|
||||
void appendInlineCode(std::string Code);
|
||||
|
||||
std::string renderAsMarkdown() const;
|
||||
std::string renderAsPlainText() const;
|
||||
|
||||
private:
|
||||
enum class ChunkKind {
|
||||
PlainText, /// A plain text paragraph.
|
||||
CodeBlock, /// A block of code.
|
||||
InlineCodeBlock, /// An inline block of code.
|
||||
};
|
||||
struct Chunk {
|
||||
ChunkKind Kind = ChunkKind::PlainText;
|
||||
std::string Contents;
|
||||
/// Language for code block chunks. Ignored for other chunks.
|
||||
std::string Language;
|
||||
};
|
||||
std::vector<Chunk> Chunks;
|
||||
};
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
||||
#endif
|
@ -38,6 +38,7 @@ add_unittest(ClangdUnitTests ClangdTests
|
||||
FileDistanceTests.cpp
|
||||
FileIndexTests.cpp
|
||||
FindSymbolsTests.cpp
|
||||
FormattedStringTests.cpp
|
||||
FSTests.cpp
|
||||
FunctionTests.cpp
|
||||
FuzzyMatchTests.cpp
|
||||
|
156
clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
Normal file
156
clang-tools-extra/clangd/unittests/FormattedStringTests.cpp
Normal file
@ -0,0 +1,156 @@
|
||||
//===-- FormattedStringTests.cpp ------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "FormattedString.h"
|
||||
#include "clang/Basic/LLVM.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
namespace {
|
||||
|
||||
TEST(FormattedString, Basic) {
|
||||
FormattedString S;
|
||||
EXPECT_EQ(S.renderAsPlainText(), "");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "");
|
||||
|
||||
S.appendText("foobar");
|
||||
EXPECT_EQ(S.renderAsPlainText(), "foobar");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "foobar");
|
||||
|
||||
S = FormattedString();
|
||||
S.appendInlineCode("foobar");
|
||||
EXPECT_EQ(S.renderAsPlainText(), "foobar");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "`foobar`");
|
||||
|
||||
S = FormattedString();
|
||||
S.appendCodeBlock("foobar");
|
||||
EXPECT_EQ(S.renderAsPlainText(), "foobar");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
|
||||
"foobar\n"
|
||||
"```\n");
|
||||
}
|
||||
|
||||
TEST(FormattedString, CodeBlocks) {
|
||||
FormattedString S;
|
||||
S.appendCodeBlock("foobar");
|
||||
S.appendCodeBlock("bazqux", "javascript");
|
||||
|
||||
EXPECT_EQ(S.renderAsPlainText(), "foobar\n\n\nbazqux");
|
||||
std::string ExpectedMarkdown = R"md(```cpp
|
||||
foobar
|
||||
```
|
||||
```javascript
|
||||
bazqux
|
||||
```
|
||||
)md";
|
||||
EXPECT_EQ(S.renderAsMarkdown(), ExpectedMarkdown);
|
||||
|
||||
S = FormattedString();
|
||||
S.appendInlineCode("foobar");
|
||||
S.appendInlineCode("bazqux");
|
||||
EXPECT_EQ(S.renderAsPlainText(), "foobar bazqux");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "`foobar` `bazqux`");
|
||||
|
||||
S = FormattedString();
|
||||
S.appendText("foo");
|
||||
S.appendInlineCode("bar");
|
||||
S.appendText("baz");
|
||||
|
||||
EXPECT_EQ(S.renderAsPlainText(), "foo bar baz");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "foo`bar`baz");
|
||||
}
|
||||
|
||||
TEST(FormattedString, Escaping) {
|
||||
// Check some ASCII punctuation
|
||||
FormattedString S;
|
||||
S.appendText("*!`");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "\\*\\!\\`");
|
||||
|
||||
// Check all ASCII punctuation.
|
||||
S = FormattedString();
|
||||
std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
|
||||
// Same text, with each character escaped.
|
||||
std::string EscapedPunctuation;
|
||||
EscapedPunctuation.reserve(2 * Punctuation.size());
|
||||
for (char C : Punctuation)
|
||||
EscapedPunctuation += std::string("\\") + C;
|
||||
S.appendText(Punctuation);
|
||||
EXPECT_EQ(S.renderAsMarkdown(), EscapedPunctuation);
|
||||
|
||||
// In code blocks we don't need to escape ASCII punctuation.
|
||||
S = FormattedString();
|
||||
S.appendInlineCode("* foo !+ bar * baz");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "`* foo !+ bar * baz`");
|
||||
S = FormattedString();
|
||||
S.appendCodeBlock("#define FOO\n* foo !+ bar * baz");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
|
||||
"#define FOO\n* foo !+ bar * baz\n"
|
||||
"```\n");
|
||||
|
||||
// But we have to escape the backticks.
|
||||
S = FormattedString();
|
||||
S.appendInlineCode("foo`bar`baz");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "`foo``bar``baz`");
|
||||
|
||||
S = FormattedString();
|
||||
S.appendCodeBlock("foo`bar`baz");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
|
||||
"foo`bar`baz\n"
|
||||
"```\n");
|
||||
|
||||
// Inline code blocks starting or ending with backticks should add spaces.
|
||||
S = FormattedString();
|
||||
S.appendInlineCode("`foo");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "` ``foo `");
|
||||
S = FormattedString();
|
||||
S.appendInlineCode("foo`");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "` foo`` `");
|
||||
S = FormattedString();
|
||||
S.appendInlineCode("`foo`");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "` ``foo`` `");
|
||||
|
||||
// Should also add extra spaces if the block stars and ends with spaces.
|
||||
S = FormattedString();
|
||||
S.appendInlineCode(" foo ");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "` foo `");
|
||||
S = FormattedString();
|
||||
S.appendInlineCode("foo ");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "`foo `");
|
||||
S = FormattedString();
|
||||
S.appendInlineCode(" foo");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "` foo`");
|
||||
|
||||
// Code blocks might need more than 3 backticks.
|
||||
S = FormattedString();
|
||||
S.appendCodeBlock("foobarbaz `\nqux");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
|
||||
"foobarbaz `\nqux\n"
|
||||
"```\n");
|
||||
S = FormattedString();
|
||||
S.appendCodeBlock("foobarbaz ``\nqux");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n"
|
||||
"foobarbaz ``\nqux\n"
|
||||
"```\n");
|
||||
S = FormattedString();
|
||||
S.appendCodeBlock("foobarbaz ```\nqux");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "````cpp\n"
|
||||
"foobarbaz ```\nqux\n"
|
||||
"````\n");
|
||||
S = FormattedString();
|
||||
S.appendCodeBlock("foobarbaz ` `` ``` ```` `\nqux");
|
||||
EXPECT_EQ(S.renderAsMarkdown(), "`````cpp\n"
|
||||
"foobarbaz ` `` ``` ```` `\nqux\n"
|
||||
"`````\n");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
Loading…
x
Reference in New Issue
Block a user