mirror of
https://github.com/llvm/llvm-project.git
synced 2025-05-10 11:26:06 +00:00

* The getLine and getColumn functions need to update the position, or they will return stale data for buffered streams. This fixes a bug in the clang -analyzer-checker-option-help option, which was not wrapping the help text correctly when stdout is not a TTY. * If the stream contains multi-byte UTF-8 sequences, then the whole sequence needs to be considered to be a single character. This has the edge case that the buffer might fill up and be flushed part way through a character. * If the stream contains East Asian wide characters, these will be rendered twice as wide as other characters, so we need to increase the column count to match. This doesn't attempt to handle everything unicode can do (combining characters, right-to-left markers, ...), but hopefully covers most things likely to be common in messages and source code we might want to print. Differential revision: https://reviews.llvm.org/D76291
156 lines
5.3 KiB
C++
156 lines
5.3 KiB
C++
//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file contains the implementation of formatted_raw_ostream.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Support/FormattedStream.h"
|
|
#include "llvm/Support/ConvertUTF.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/Unicode.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include <algorithm>
|
|
|
|
using namespace llvm;
|
|
|
|
/// UpdatePosition - Examine the given char sequence and figure out which
|
|
/// column we end up in after output, and how many line breaks are contained.
|
|
/// This assumes that the input string is well-formed UTF-8, and takes into
|
|
/// account Unicode characters which render as multiple columns wide.
|
|
void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
|
|
unsigned &Column = Position.first;
|
|
unsigned &Line = Position.second;
|
|
|
|
auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) {
|
|
int Width = sys::unicode::columnWidthUTF8(CP);
|
|
if (Width != sys::unicode::ErrorNonPrintableCharacter)
|
|
Column += Width;
|
|
|
|
// The only special whitespace characters we care about are single-byte.
|
|
if (CP.size() > 1)
|
|
return;
|
|
|
|
switch (CP[0]) {
|
|
case '\n':
|
|
Line += 1;
|
|
LLVM_FALLTHROUGH;
|
|
case '\r':
|
|
Column = 0;
|
|
break;
|
|
case '\t':
|
|
// Assumes tab stop = 8 characters.
|
|
Column += (8 - (Column & 0x7)) & 0x7;
|
|
break;
|
|
}
|
|
};
|
|
|
|
// If we have a partial UTF-8 sequence from the previous buffer, check that
|
|
// first.
|
|
if (PartialUTF8Char.size()) {
|
|
size_t BytesFromBuffer =
|
|
getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size();
|
|
if (Size < BytesFromBuffer) {
|
|
// If we still don't have enough bytes for a complete code point, just
|
|
// append what we have.
|
|
PartialUTF8Char.append(StringRef(Ptr, Size));
|
|
return;
|
|
} else {
|
|
// The first few bytes from the buffer will complete the code point.
|
|
// Concatenate them and process their effect on the line and column
|
|
// numbers.
|
|
PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer));
|
|
ProcessUTF8CodePoint(PartialUTF8Char);
|
|
PartialUTF8Char.clear();
|
|
Ptr += BytesFromBuffer;
|
|
Size -= BytesFromBuffer;
|
|
}
|
|
}
|
|
|
|
// Now scan the rest of the buffer.
|
|
unsigned NumBytes;
|
|
for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) {
|
|
NumBytes = getNumBytesForUTF8(*Ptr);
|
|
|
|
// The buffer might end part way through a UTF-8 code unit sequence for a
|
|
// Unicode scalar value if it got flushed. If this happens, we can't know
|
|
// the display width until we see the rest of the code point. Stash the
|
|
// bytes we do have, so that we can reconstruct the whole code point later,
|
|
// even if the buffer is being flushed.
|
|
if ((End - Ptr) < NumBytes) {
|
|
PartialUTF8Char = StringRef(Ptr, End - Ptr);
|
|
return;
|
|
}
|
|
|
|
ProcessUTF8CodePoint(StringRef(Ptr, NumBytes));
|
|
}
|
|
}
|
|
|
|
/// ComputePosition - Examine the current output and update line and column
|
|
/// counts.
|
|
void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
|
|
// If our previous scan pointer is inside the buffer, assume we already
|
|
// scanned those bytes. This depends on raw_ostream to not change our buffer
|
|
// in unexpected ways.
|
|
if (Ptr <= Scanned && Scanned <= Ptr + Size)
|
|
// Scan all characters added since our last scan to determine the new
|
|
// column.
|
|
UpdatePosition(Scanned, Size - (Scanned - Ptr));
|
|
else
|
|
UpdatePosition(Ptr, Size);
|
|
|
|
// Update the scanning pointer.
|
|
Scanned = Ptr + Size;
|
|
}
|
|
|
|
/// PadToColumn - Align the output to some column number.
|
|
///
|
|
/// \param NewCol - The column to move to.
|
|
///
|
|
formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
|
|
// Figure out what's in the buffer and add it to the column count.
|
|
ComputePosition(getBufferStart(), GetNumBytesInBuffer());
|
|
|
|
// Output spaces until we reach the desired column.
|
|
indent(std::max(int(NewCol - getColumn()), 1));
|
|
return *this;
|
|
}
|
|
|
|
void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
|
|
// Figure out what's in the buffer and add it to the column count.
|
|
ComputePosition(Ptr, Size);
|
|
|
|
// Write the data to the underlying stream (which is unbuffered, so
|
|
// the data will be immediately written out).
|
|
TheStream->write(Ptr, Size);
|
|
|
|
// Reset the scanning pointer.
|
|
Scanned = nullptr;
|
|
}
|
|
|
|
/// fouts() - This returns a reference to a formatted_raw_ostream for
|
|
/// standard output. Use it like: fouts() << "foo" << "bar";
|
|
formatted_raw_ostream &llvm::fouts() {
|
|
static formatted_raw_ostream S(outs());
|
|
return S;
|
|
}
|
|
|
|
/// ferrs() - This returns a reference to a formatted_raw_ostream for
|
|
/// standard error. Use it like: ferrs() << "foo" << "bar";
|
|
formatted_raw_ostream &llvm::ferrs() {
|
|
static formatted_raw_ostream S(errs());
|
|
return S;
|
|
}
|
|
|
|
/// fdbgs() - This returns a reference to a formatted_raw_ostream for
|
|
/// the debug stream. Use it like: fdbgs() << "foo" << "bar";
|
|
formatted_raw_ostream &llvm::fdbgs() {
|
|
static formatted_raw_ostream S(dbgs());
|
|
return S;
|
|
}
|