mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 04:46:08 +00:00

Original-commit: flang-compiler/f18@d01af89506 Reviewed-on: https://github.com/flang-compiler/f18/pull/9 Tree-same-pre-rewrite: false
576 lines
15 KiB
C++
576 lines
15 KiB
C++
#include "prescan.h"
|
|
#include "idioms.h"
|
|
#include "message.h"
|
|
#include "preprocessor.h"
|
|
#include "source.h"
|
|
#include "token-sequence.h"
|
|
#include <cctype>
|
|
#include <cstring>
|
|
#include <sstream>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace Fortran {
|
|
namespace parser {
|
|
|
|
Prescanner::Prescanner(
|
|
Messages *messages, CookedSource *cooked, Preprocessor *preprocessor)
|
|
: messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor} {}
|
|
|
|
Prescanner::Prescanner(const Prescanner &that)
|
|
: messages_{that.messages_}, cooked_{that.cooked_},
|
|
preprocessor_{that.preprocessor_}, inFixedForm_{that.inFixedForm_},
|
|
fixedFormColumnLimit_{that.fixedFormColumnLimit_},
|
|
enableOldDebugLines_{that.enableOldDebugLines_},
|
|
enableBackslashEscapesInCharLiterals_{
|
|
that.enableBackslashEscapesInCharLiterals_} {}
|
|
|
|
bool Prescanner::Prescan(ProvenanceRange range) {
|
|
startProvenance_ = range.start;
|
|
ProvenanceRange around{
|
|
cooked_->allSources()->GetContiguousRangeAround(startProvenance_)};
|
|
CHECK(startProvenance_ + range.bytes <= around.start + around.bytes);
|
|
const SourceFile *source{
|
|
cooked_->allSources()->GetSourceFile(startProvenance_)};
|
|
size_t offset{startProvenance_ - around.start};
|
|
lineStart_ = start_ = source->content() + offset;
|
|
limit_ = start_ + range.bytes;
|
|
BeginSourceLine(start_);
|
|
TokenSequence tokens, preprocessed;
|
|
while (lineStart_ < limit_) {
|
|
if (CommentLinesAndPreprocessorDirectives() && lineStart_ >= limit_) {
|
|
PayNewlineDebt();
|
|
break;
|
|
}
|
|
BeginSourceLineAndAdvance();
|
|
if (inFixedForm_) {
|
|
LabelField(&tokens);
|
|
} else {
|
|
SkipSpaces();
|
|
}
|
|
while (NextToken(&tokens)) {
|
|
}
|
|
if (preprocessor_->MacroReplacement(tokens, *this, &preprocessed)) {
|
|
preprocessed.PutNextTokenChar('\n', newlineProvenance_);
|
|
preprocessed.CloseToken();
|
|
if (IsFixedFormCommentLine(preprocessed.data()) ||
|
|
IsFreeFormComment(preprocessed.data())) {
|
|
++newlineDebt_;
|
|
} else {
|
|
preprocessed.pop_back(); // clip the newline added above
|
|
preprocessed.EmitWithCaseConversion(cooked_);
|
|
}
|
|
preprocessed.clear();
|
|
} else {
|
|
tokens.EmitWithCaseConversion(cooked_);
|
|
}
|
|
tokens.clear();
|
|
cooked_->Put('\n', newlineProvenance_);
|
|
PayNewlineDebt();
|
|
}
|
|
PayNewlineDebt();
|
|
return !anyFatalErrors_;
|
|
}
|
|
|
|
std::optional<TokenSequence> Prescanner::NextTokenizedLine() {
|
|
if (lineStart_ >= limit_) {
|
|
return {};
|
|
}
|
|
bool wasInPreprocessorDirective{inPreprocessorDirective_};
|
|
auto saveAt = at_;
|
|
inPreprocessorDirective_ = true;
|
|
BeginSourceLineAndAdvance();
|
|
TokenSequence tokens;
|
|
while (NextToken(&tokens)) {
|
|
}
|
|
inPreprocessorDirective_ = wasInPreprocessorDirective;
|
|
at_ = saveAt;
|
|
return {std::move(tokens)};
|
|
}
|
|
|
|
void Prescanner::Complain(const std::string &message) {
|
|
messages_->Put({GetCurrentProvenance(), message});
|
|
}
|
|
|
|
void Prescanner::NextLine() {
|
|
void *vstart{static_cast<void *>(const_cast<char *>(lineStart_))};
|
|
void *v{std::memchr(vstart, '\n', limit_ - lineStart_)};
|
|
if (v == nullptr) {
|
|
lineStart_ = limit_;
|
|
} else {
|
|
const char *nl{const_cast<const char *>(static_cast<char *>(v))};
|
|
lineStart_ = nl + 1;
|
|
}
|
|
}
|
|
|
|
void Prescanner::LabelField(TokenSequence *token) {
|
|
int outCol{1};
|
|
for (; *at_ != '\n' && column_ <= 6; ++at_) {
|
|
if (*at_ == '\t') {
|
|
++at_;
|
|
column_ = 7;
|
|
break;
|
|
}
|
|
if (*at_ != ' ' &&
|
|
(*at_ != '0' || column_ != 6)) { // '0' in column 6 becomes space
|
|
EmitChar(token, *at_);
|
|
++outCol;
|
|
}
|
|
++column_;
|
|
}
|
|
if (outCol > 1) {
|
|
token->CloseToken();
|
|
}
|
|
if (outCol < 7) {
|
|
for (; outCol < 7; ++outCol) {
|
|
token->PutNextTokenChar(' ', spaceProvenance_);
|
|
}
|
|
token->CloseToken();
|
|
}
|
|
}
|
|
|
|
void Prescanner::NextChar() {
|
|
CHECK(*at_ != '\n');
|
|
++at_;
|
|
++column_;
|
|
if (inPreprocessorDirective_) {
|
|
while (*at_ == '/' && at_[1] == '*') {
|
|
char star{' '}, slash{' '};
|
|
at_ += 2;
|
|
column_ += 2;
|
|
while ((*at_ != '\n' || slash == '\\') && (star != '*' || slash != '/')) {
|
|
star = slash;
|
|
slash = *at_++;
|
|
++column_;
|
|
}
|
|
}
|
|
while (*at_ == '\\' && at_ + 2 < limit_ && at_[1] == '\n') {
|
|
BeginSourceLineAndAdvance();
|
|
++newlineDebt_;
|
|
}
|
|
} else {
|
|
if ((inFixedForm_ && column_ > fixedFormColumnLimit_ &&
|
|
!tabInCurrentLine_) ||
|
|
(*at_ == '!' && !inCharLiteral_)) {
|
|
while (*at_ != '\n') {
|
|
++at_;
|
|
}
|
|
}
|
|
while (*at_ == '\n' || *at_ == '&') {
|
|
if ((inFixedForm_ && !FixedFormContinuation()) ||
|
|
(!inFixedForm_ && !FreeFormContinuation())) {
|
|
return;
|
|
}
|
|
}
|
|
if (*at_ == '\t') {
|
|
tabInCurrentLine_ = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Prescanner::SkipSpaces() {
|
|
while (*at_ == ' ' || *at_ == '\t') {
|
|
NextChar();
|
|
}
|
|
}
|
|
|
|
static inline bool IsNameChar(char ch) {
|
|
return isalnum(ch) || ch == '_' || ch == '$' || ch == '@';
|
|
}
|
|
|
|
bool Prescanner::NextToken(TokenSequence *tokens) {
|
|
CHECK(at_ >= start_ && at_ < limit_);
|
|
if (inFixedForm_) {
|
|
SkipSpaces();
|
|
} else if (*at_ == ' ' || *at_ == '\t') {
|
|
Provenance here{GetCurrentProvenance()};
|
|
NextChar();
|
|
SkipSpaces();
|
|
if (*at_ != '\n') {
|
|
tokens->PutNextTokenChar(' ', here);
|
|
tokens->CloseToken();
|
|
return true;
|
|
}
|
|
}
|
|
if (*at_ == '\n') {
|
|
return false;
|
|
}
|
|
|
|
if (*at_ == '\'' || *at_ == '"') {
|
|
QuotedCharacterLiteral(tokens);
|
|
preventHollerith_ = false;
|
|
} else if (isdigit(*at_)) {
|
|
int n{0};
|
|
static constexpr int maxHollerith = 256 * (132 - 6);
|
|
do {
|
|
if (n < maxHollerith) {
|
|
n = 10 * n + *at_ - '0';
|
|
}
|
|
EmitCharAndAdvance(tokens, *at_);
|
|
if (inFixedForm_) {
|
|
SkipSpaces();
|
|
}
|
|
} while (isdigit(*at_));
|
|
if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
|
|
!preventHollerith_) {
|
|
EmitCharAndAdvance(tokens, 'h');
|
|
inCharLiteral_ = true;
|
|
while (n-- > 0) {
|
|
if (PadOutCharacterLiteral()) {
|
|
tokens->PutNextTokenChar(' ', spaceProvenance_);
|
|
} else {
|
|
if (*at_ == '\n') {
|
|
break; // TODO error
|
|
}
|
|
EmitCharAndAdvance(tokens, *at_);
|
|
}
|
|
}
|
|
inCharLiteral_ = false;
|
|
} else if (*at_ == '.') {
|
|
while (isdigit(EmitCharAndAdvance(tokens, *at_))) {
|
|
}
|
|
ExponentAndKind(tokens);
|
|
} else if (ExponentAndKind(tokens)) {
|
|
} else if (isalpha(*at_)) {
|
|
// Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
|
|
// we don't misrecognize I9HOLLERITH as an identifier in the next case.
|
|
EmitCharAndAdvance(tokens, *at_);
|
|
}
|
|
preventHollerith_ = false;
|
|
} else if (*at_ == '.') {
|
|
char nch{EmitCharAndAdvance(tokens, '.')};
|
|
if (isdigit(nch)) {
|
|
while (isdigit(EmitCharAndAdvance(tokens, *at_))) {
|
|
}
|
|
ExponentAndKind(tokens);
|
|
} else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') {
|
|
EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis
|
|
}
|
|
preventHollerith_ = false;
|
|
} else if (IsNameChar(*at_)) {
|
|
while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
|
|
}
|
|
if (*at_ == '\'' || *at_ == '"') {
|
|
QuotedCharacterLiteral(tokens);
|
|
}
|
|
preventHollerith_ = false;
|
|
} else if (*at_ == '*') {
|
|
if (EmitCharAndAdvance(tokens, '*') == '*') {
|
|
EmitCharAndAdvance(tokens, '*');
|
|
} else {
|
|
preventHollerith_ = true; // ambiguity: CHARACTER*2H
|
|
}
|
|
} else {
|
|
char ch{*at_};
|
|
if (ch == '(' || ch == '[') {
|
|
++delimiterNesting_;
|
|
} else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) {
|
|
--delimiterNesting_;
|
|
}
|
|
char nch{EmitCharAndAdvance(tokens, ch)};
|
|
preventHollerith_ = false;
|
|
if ((nch == '=' &&
|
|
(ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) ||
|
|
(ch == nch &&
|
|
(ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' ||
|
|
ch == '|' || ch == '<' || ch == '>')) ||
|
|
(ch == '=' && nch == '>')) {
|
|
// token comprises two characters
|
|
EmitCharAndAdvance(tokens, nch);
|
|
}
|
|
}
|
|
tokens->CloseToken();
|
|
return true;
|
|
}
|
|
|
|
bool Prescanner::ExponentAndKind(TokenSequence *tokens) {
|
|
char ed = tolower(*at_);
|
|
if (ed != 'e' && ed != 'd') {
|
|
return false;
|
|
}
|
|
EmitCharAndAdvance(tokens, ed);
|
|
if (*at_ == '+' || *at_ == '-') {
|
|
EmitCharAndAdvance(tokens, *at_);
|
|
}
|
|
while (isdigit(*at_)) {
|
|
EmitCharAndAdvance(tokens, *at_);
|
|
}
|
|
if (*at_ == '_') {
|
|
while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Prescanner::QuotedCharacterLiteral(TokenSequence *tokens) {
|
|
char quote{*at_};
|
|
inCharLiteral_ = true;
|
|
do {
|
|
EmitCharAndAdvance(tokens, *at_);
|
|
while (PadOutCharacterLiteral()) {
|
|
tokens->PutNextTokenChar(' ', spaceProvenance_);
|
|
}
|
|
if (*at_ == '\\' && enableBackslashEscapesInCharLiterals_) {
|
|
EmitCharAndAdvance(tokens, '\\');
|
|
while (PadOutCharacterLiteral()) {
|
|
tokens->PutNextTokenChar(' ', spaceProvenance_);
|
|
}
|
|
} else if (*at_ == quote) {
|
|
// A doubled quote mark becomes a single instance of the quote character
|
|
// in the literal later.
|
|
EmitCharAndAdvance(tokens, quote);
|
|
if (inFixedForm_) {
|
|
SkipSpaces();
|
|
}
|
|
if (*at_ != quote) {
|
|
break;
|
|
}
|
|
}
|
|
} while (*at_ != '\n');
|
|
inCharLiteral_ = false;
|
|
}
|
|
|
|
bool Prescanner::PadOutCharacterLiteral() {
|
|
if (inFixedForm_ && !tabInCurrentLine_ && *at_ == '\n' &&
|
|
column_ < fixedFormColumnLimit_) {
|
|
++column_;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool Prescanner::IsFixedFormCommentLine(const char *start) {
|
|
if (start >= limit_ || !inFixedForm_) {
|
|
return false;
|
|
}
|
|
const char *p{start};
|
|
char ch{*p};
|
|
if (ch == '*' || ch == 'C' || ch == 'c' ||
|
|
ch == '%' || // VAX %list, %eject, &c.
|
|
((ch == 'D' || ch == 'd') && !enableOldDebugLines_)) {
|
|
return true;
|
|
}
|
|
bool anyTabs{false};
|
|
while (true) {
|
|
ch = *p;
|
|
if (ch == ' ') {
|
|
++p;
|
|
} else if (ch == '\t') {
|
|
anyTabs = true;
|
|
++p;
|
|
} else if (ch == '0' && !anyTabs && p == start + 5) {
|
|
++p; // 0 in column 6 must treated as a space
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
if (!anyTabs && p >= start + fixedFormColumnLimit_) {
|
|
return true;
|
|
}
|
|
if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) {
|
|
return true;
|
|
}
|
|
return *p == '\n';
|
|
}
|
|
|
|
bool Prescanner::IsFreeFormComment(const char *p) {
|
|
if (p >= limit_ || inFixedForm_) {
|
|
return false;
|
|
}
|
|
while (*p == ' ' || *p == '\t') {
|
|
++p;
|
|
}
|
|
return *p == '!' || *p == '\n';
|
|
}
|
|
|
|
bool Prescanner::IncludeLine(const char *p) {
|
|
if (p >= limit_) {
|
|
return false;
|
|
}
|
|
const char *start{p};
|
|
while (*p == ' ' || *p == '\t') {
|
|
++p;
|
|
}
|
|
for (char ch : "include"s) {
|
|
if (tolower(*p++) != ch) {
|
|
return false;
|
|
}
|
|
}
|
|
while (*p == ' ' || *p == '\t') {
|
|
++p;
|
|
}
|
|
if (*p != '"' && *p != '\'') {
|
|
return false;
|
|
}
|
|
char quote{*p};
|
|
std::string path;
|
|
for (++p; *p != '\n'; ++p) {
|
|
if (*p == quote) {
|
|
if (p[1] != quote) {
|
|
break;
|
|
}
|
|
++p;
|
|
}
|
|
path += *p;
|
|
}
|
|
if (*p != quote) {
|
|
messages_->Put({GetProvenance(p), "malformed path name string"});
|
|
anyFatalErrors_ = true;
|
|
return true;
|
|
}
|
|
for (++p; *p == ' ' || *p == '\t'; ++p) {
|
|
}
|
|
if (*p != '\n' && *p != '!') {
|
|
messages_->Put({GetProvenance(p), "excess characters after path name"});
|
|
}
|
|
std::stringstream error;
|
|
Provenance provenance{GetProvenance(start)};
|
|
AllSources *allSources{cooked_->allSources()};
|
|
const SourceFile *included{allSources->Open(path, &error)};
|
|
if (included == nullptr) {
|
|
messages_->Put({provenance, error.str()});
|
|
anyFatalErrors_ = true;
|
|
return true;
|
|
}
|
|
ProvenanceRange includeLineRange{provenance, static_cast<size_t>(p - start)};
|
|
ProvenanceRange fileRange{
|
|
allSources->AddIncludedFile(*included, includeLineRange)};
|
|
anyFatalErrors_ |= !Prescanner{*this}.Prescan(fileRange);
|
|
return true;
|
|
}
|
|
|
|
bool Prescanner::IsPreprocessorDirectiveLine(const char *start) {
|
|
const char *p{start};
|
|
if (p >= limit_ || inPreprocessorDirective_) {
|
|
return false;
|
|
}
|
|
for (; *p == ' '; ++p) {
|
|
}
|
|
if (*p == '#') {
|
|
return !inFixedForm_ || p != start + 5;
|
|
}
|
|
for (; *p == ' ' || *p == '\t'; ++p) {
|
|
}
|
|
return *p == '#';
|
|
}
|
|
|
|
bool Prescanner::CommentLines() {
|
|
bool any{false};
|
|
while (lineStart_ < limit_) {
|
|
if (IsFixedFormCommentLine(lineStart_) || IsFreeFormComment(lineStart_)) {
|
|
NextLine();
|
|
++newlineDebt_;
|
|
any = true;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return any;
|
|
}
|
|
|
|
bool Prescanner::CommentLinesAndPreprocessorDirectives() {
|
|
bool any{false};
|
|
while (lineStart_ < limit_) {
|
|
if (IsFixedFormCommentLine(lineStart_) || IsFreeFormComment(lineStart_) ||
|
|
IncludeLine(lineStart_)) {
|
|
NextLine();
|
|
} else if (IsPreprocessorDirectiveLine(lineStart_)) {
|
|
if (std::optional<TokenSequence> tokens{NextTokenizedLine()}) {
|
|
anyFatalErrors_ |= !preprocessor_->Directive(*tokens, this);
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
++newlineDebt_;
|
|
any = true;
|
|
}
|
|
return any;
|
|
}
|
|
|
|
const char *Prescanner::FixedFormContinuationLine() {
|
|
const char *p{lineStart_};
|
|
if (p >= limit_) {
|
|
return nullptr;
|
|
}
|
|
tabInCurrentLine_ = false;
|
|
if (*p == '&') {
|
|
return p + 1; // extension
|
|
}
|
|
if (*p == '\t' && p[1] >= '1' && p[1] <= '9') {
|
|
tabInCurrentLine_ = true;
|
|
return p + 2; // VAX extension
|
|
}
|
|
if (p[0] == ' ' && p[1] == ' ' && p[2] == ' ' && p[3] == ' ' && p[4] == ' ') {
|
|
char col6{p[5]};
|
|
if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') {
|
|
return p + 6;
|
|
}
|
|
}
|
|
if (delimiterNesting_ > 0) {
|
|
return p;
|
|
}
|
|
return nullptr; // not a continuation line
|
|
}
|
|
|
|
bool Prescanner::FixedFormContinuation() {
|
|
CommentLines();
|
|
const char *cont{FixedFormContinuationLine()};
|
|
if (cont == nullptr) {
|
|
return false;
|
|
}
|
|
BeginSourceLine(cont);
|
|
column_ = 7;
|
|
++newlineDebt_;
|
|
NextLine();
|
|
return true;
|
|
}
|
|
|
|
bool Prescanner::FreeFormContinuation() {
|
|
while (*at_ == ' ' || *at_ == '\t') {
|
|
++at_;
|
|
}
|
|
const char *p{at_};
|
|
bool ampersand{*p == '&'};
|
|
if (ampersand) {
|
|
for (++p; *p == ' ' || *p == '\t'; ++p) {
|
|
}
|
|
}
|
|
if (*p != '\n' && (inCharLiteral_ || *p != '!')) {
|
|
return false;
|
|
}
|
|
CommentLines();
|
|
p = lineStart_;
|
|
if (p >= limit_) {
|
|
return false;
|
|
}
|
|
int column{1};
|
|
for (; *p == ' ' || *p == '\t'; ++p) {
|
|
++column;
|
|
}
|
|
if (*p == '&') {
|
|
++p;
|
|
++column;
|
|
} else if (ampersand || delimiterNesting_ > 0) {
|
|
if (p > lineStart_) {
|
|
--p;
|
|
--column;
|
|
}
|
|
} else {
|
|
return false; // not a continuation
|
|
}
|
|
at_ = p;
|
|
column_ = column;
|
|
tabInCurrentLine_ = false;
|
|
++newlineDebt_;
|
|
NextLine();
|
|
return true;
|
|
}
|
|
|
|
void Prescanner::PayNewlineDebt() {
|
|
for (; newlineDebt_ > 0; --newlineDebt_) {
|
|
cooked_->Put('\n', newlineProvenance_);
|
|
}
|
|
}
|
|
} // namespace parser
|
|
} // namespace Fortran
|