2021-12-29 11:31:02 +00:00
|
|
|
//===-- Transfer.cpp --------------------------------------------*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file defines transfer functions that evaluate program statements and
|
|
|
|
// update an environment accordingly.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "clang/Analysis/FlowSensitive/Transfer.h"
|
|
|
|
#include "clang/AST/Decl.h"
|
|
|
|
#include "clang/AST/DeclBase.h"
|
2022-01-13 13:53:52 +00:00
|
|
|
#include "clang/AST/DeclCXX.h"
|
2021-12-29 11:31:02 +00:00
|
|
|
#include "clang/AST/Expr.h"
|
2022-01-11 12:15:53 +00:00
|
|
|
#include "clang/AST/ExprCXX.h"
|
2022-01-04 13:47:14 +00:00
|
|
|
#include "clang/AST/OperationKinds.h"
|
2021-12-29 11:31:02 +00:00
|
|
|
#include "clang/AST/Stmt.h"
|
|
|
|
#include "clang/AST/StmtVisitor.h"
|
2022-07-26 17:54:13 +00:00
|
|
|
#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
|
2021-12-29 11:31:02 +00:00
|
|
|
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
|
2022-07-26 17:54:13 +00:00
|
|
|
#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
|
2023-06-20 08:00:01 +00:00
|
|
|
#include "clang/Analysis/FlowSensitive/RecordOps.h"
|
2022-03-25 20:01:18 +00:00
|
|
|
#include "clang/Analysis/FlowSensitive/Value.h"
|
2022-04-01 12:51:23 +00:00
|
|
|
#include "clang/Basic/Builtins.h"
|
2022-01-04 13:47:14 +00:00
|
|
|
#include "clang/Basic/OperatorKinds.h"
|
2021-12-29 11:31:02 +00:00
|
|
|
#include "llvm/Support/Casting.h"
|
2023-09-07 07:02:57 +00:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include <assert.h>
|
2021-12-29 11:31:02 +00:00
|
|
|
#include <cassert>
|
2023-09-07 07:02:57 +00:00
|
|
|
|
|
|
|
#define DEBUG_TYPE "dataflow"
|
2021-12-29 11:31:02 +00:00
|
|
|
|
|
|
|
namespace clang {
|
|
|
|
namespace dataflow {
|
|
|
|
|
2023-03-28 07:56:24 +00:00
|
|
|
const Environment *StmtToEnvMap::getEnvironment(const Stmt &S) const {
|
|
|
|
auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S));
|
|
|
|
assert(BlockIt != CFCtx.getStmtToBlock().end());
|
|
|
|
if (!CFCtx.isBlockReachable(*BlockIt->getSecond()))
|
|
|
|
return nullptr;
|
2024-01-23 10:19:06 +01:00
|
|
|
if (BlockIt->getSecond()->getBlockID() == CurBlockID)
|
|
|
|
return &CurState.Env;
|
2023-03-28 07:56:24 +00:00
|
|
|
const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
|
2023-09-27 09:58:49 +02:00
|
|
|
if (!(State))
|
2023-09-08 16:24:08 +02:00
|
|
|
return nullptr;
|
2023-03-28 07:56:24 +00:00
|
|
|
return &State->Env;
|
|
|
|
}
|
|
|
|
|
2022-03-25 20:01:18 +00:00
|
|
|
static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS,
|
|
|
|
Environment &Env) {
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
Value *LHSValue = Env.getValue(LHS);
|
|
|
|
Value *RHSValue = Env.getValue(RHS);
|
2023-06-15 19:07:05 +00:00
|
|
|
|
|
|
|
if (LHSValue == RHSValue)
|
|
|
|
return Env.getBoolLiteralValue(true);
|
|
|
|
|
|
|
|
if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue))
|
|
|
|
if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue))
|
|
|
|
return Env.makeIff(*LHSBool, *RHSBool);
|
2022-03-25 20:01:18 +00:00
|
|
|
|
|
|
|
return Env.makeAtomicBoolValue();
|
|
|
|
}
|
|
|
|
|
2022-10-06 17:56:41 +00:00
|
|
|
static BoolValue &unpackValue(BoolValue &V, Environment &Env) {
|
Reland "[dataflow] Add dedicated representation of boolean formulas"
This reverts commit 7a72ce98224be76d9328e65eee472381f7c8e7fe.
Test problems were due to unspecified order of function arg evaluation.
Reland "[dataflow] Replace most BoolValue subclasses with references to Formula (and AtomicBoolValue => Atom and BoolValue => Formula where appropriate)"
This properly frees the Value hierarchy from managing boolean formulas.
We still distinguish AtomicBoolValue; this type is used in client code.
However we expect to convert such uses to BoolValue (where the
distinction is not needed) or Atom (where atomic identity is intended),
and then fold AtomicBoolValue into FormulaBoolValue.
We also distinguish TopBoolValue; this has distinct rules for
widen/join/equivalence, and top-ness is not represented in Formula.
It'd be nice to find a cleaner representation (e.g. the absence of a
formula), but no immediate plans.
For now, BoolValues with the same Formula are deduplicated. This doesn't
seem desirable, as Values are mutable by their creators (properties).
We can probably drop this for FormulaBoolValue immediately (not in this
patch, to isolate changes). For AtomicBoolValue we first need to update
clients to stop using value pointers for atom identity.
The data structures around flow conditions are updated:
- flow condition tokens are Atom, rather than AtomicBoolValue*
- conditions are Formula, rather than BoolValue
Most APIs were changed directly, some with many clients had a
new version added and the existing one deprecated.
The factories for BoolValues in Environment keep their existing
signatures for now (e.g. makeOr(BoolValue, BoolValue) => BoolValue)
and are not deprecated. These have very many clients and finding the
most ergonomic API & migration path still needs some thought.
Differential Revision: https://reviews.llvm.org/D153469
2023-07-05 11:35:06 +02:00
|
|
|
if (auto *Top = llvm::dyn_cast<TopBoolValue>(&V)) {
|
|
|
|
auto &A = Env.getDataflowAnalysisContext().arena();
|
|
|
|
return A.makeBoolValue(A.makeAtomRef(Top->getAtom()));
|
2022-10-06 17:56:41 +00:00
|
|
|
}
|
Reland "[dataflow] Add dedicated representation of boolean formulas"
This reverts commit 7a72ce98224be76d9328e65eee472381f7c8e7fe.
Test problems were due to unspecified order of function arg evaluation.
Reland "[dataflow] Replace most BoolValue subclasses with references to Formula (and AtomicBoolValue => Atom and BoolValue => Formula where appropriate)"
This properly frees the Value hierarchy from managing boolean formulas.
We still distinguish AtomicBoolValue; this type is used in client code.
However we expect to convert such uses to BoolValue (where the
distinction is not needed) or Atom (where atomic identity is intended),
and then fold AtomicBoolValue into FormulaBoolValue.
We also distinguish TopBoolValue; this has distinct rules for
widen/join/equivalence, and top-ness is not represented in Formula.
It'd be nice to find a cleaner representation (e.g. the absence of a
formula), but no immediate plans.
For now, BoolValues with the same Formula are deduplicated. This doesn't
seem desirable, as Values are mutable by their creators (properties).
We can probably drop this for FormulaBoolValue immediately (not in this
patch, to isolate changes). For AtomicBoolValue we first need to update
clients to stop using value pointers for atom identity.
The data structures around flow conditions are updated:
- flow condition tokens are Atom, rather than AtomicBoolValue*
- conditions are Formula, rather than BoolValue
Most APIs were changed directly, some with many clients had a
new version added and the existing one deprecated.
The factories for BoolValues in Environment keep their existing
signatures for now (e.g. makeOr(BoolValue, BoolValue) => BoolValue)
and are not deprecated. These have very many clients and finding the
most ergonomic API & migration path still needs some thought.
Differential Revision: https://reviews.llvm.org/D153469
2023-07-05 11:35:06 +02:00
|
|
|
return V;
|
2022-10-06 17:56:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Unpacks the value (if any) associated with `E` and updates `E` to the new
|
2023-01-03 15:41:38 +00:00
|
|
|
// value, if any unpacking occured. Also, does the lvalue-to-rvalue conversion,
|
|
|
|
// by skipping past the reference.
|
2022-10-06 17:56:41 +00:00
|
|
|
static Value *maybeUnpackLValueExpr(const Expr &E, Environment &Env) {
|
2023-07-31 12:37:01 +00:00
|
|
|
auto *Loc = Env.getStorageLocation(E);
|
2022-10-06 17:56:41 +00:00
|
|
|
if (Loc == nullptr)
|
|
|
|
return nullptr;
|
|
|
|
auto *Val = Env.getValue(*Loc);
|
|
|
|
|
|
|
|
auto *B = dyn_cast_or_null<BoolValue>(Val);
|
|
|
|
if (B == nullptr)
|
|
|
|
return Val;
|
|
|
|
|
|
|
|
auto &UnpackedVal = unpackValue(*B, Env);
|
|
|
|
if (&UnpackedVal == Val)
|
|
|
|
return Val;
|
|
|
|
Env.setValue(*Loc, UnpackedVal);
|
|
|
|
return &UnpackedVal;
|
|
|
|
}
|
|
|
|
|
2023-05-22 06:17:17 +00:00
|
|
|
static void propagateValue(const Expr &From, const Expr &To, Environment &Env) {
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
if (auto *Val = Env.getValue(From))
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(To, *Val);
|
2023-05-22 06:17:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void propagateStorageLocation(const Expr &From, const Expr &To,
|
|
|
|
Environment &Env) {
|
2023-07-31 12:37:01 +00:00
|
|
|
if (auto *Loc = Env.getStorageLocation(From))
|
|
|
|
Env.setStorageLocation(To, *Loc);
|
2023-05-22 06:17:17 +00:00
|
|
|
}
|
|
|
|
|
2023-06-28 08:36:06 +00:00
|
|
|
// Propagates the value or storage location of `From` to `To` in cases where
|
2023-05-22 06:17:17 +00:00
|
|
|
// `From` may be either a glvalue or a prvalue. `To` must be a glvalue iff
|
|
|
|
// `From` is a glvalue.
|
|
|
|
static void propagateValueOrStorageLocation(const Expr &From, const Expr &To,
|
|
|
|
Environment &Env) {
|
|
|
|
assert(From.isGLValue() == To.isGLValue());
|
|
|
|
if (From.isGLValue())
|
|
|
|
propagateStorageLocation(From, To, Env);
|
|
|
|
else
|
|
|
|
propagateValue(From, To, Env);
|
|
|
|
}
|
|
|
|
|
2023-03-28 08:07:51 +00:00
|
|
|
namespace {
|
|
|
|
|
2021-12-29 11:31:02 +00:00
|
|
|
class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
|
|
|
|
public:
|
2022-12-27 17:34:30 +00:00
|
|
|
TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env)
|
|
|
|
: StmtToEnv(StmtToEnv), Env(Env) {}
|
2021-12-29 11:31:02 +00:00
|
|
|
|
2022-01-04 13:47:14 +00:00
|
|
|
void VisitBinaryOperator(const BinaryOperator *S) {
|
2022-05-04 21:08:43 +00:00
|
|
|
const Expr *LHS = S->getLHS();
|
2022-03-11 11:52:53 +00:00
|
|
|
assert(LHS != nullptr);
|
|
|
|
|
2022-05-04 21:08:43 +00:00
|
|
|
const Expr *RHS = S->getRHS();
|
2022-03-11 11:52:53 +00:00
|
|
|
assert(RHS != nullptr);
|
|
|
|
|
2022-02-16 16:47:37 +00:00
|
|
|
switch (S->getOpcode()) {
|
|
|
|
case BO_Assign: {
|
2023-07-31 12:37:01 +00:00
|
|
|
auto *LHSLoc = Env.getStorageLocation(*LHS);
|
2022-01-04 13:47:14 +00:00
|
|
|
if (LHSLoc == nullptr)
|
2022-02-16 16:47:37 +00:00
|
|
|
break;
|
2022-01-04 13:47:14 +00:00
|
|
|
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
auto *RHSVal = Env.getValue(*RHS);
|
2022-01-04 13:47:14 +00:00
|
|
|
if (RHSVal == nullptr)
|
2022-02-16 16:47:37 +00:00
|
|
|
break;
|
2022-01-04 13:47:14 +00:00
|
|
|
|
|
|
|
// Assign a value to the storage location of the left-hand side.
|
|
|
|
Env.setValue(*LHSLoc, *RHSVal);
|
|
|
|
|
|
|
|
// Assign a storage location for the whole expression.
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setStorageLocation(*S, *LHSLoc);
|
2022-02-16 16:47:37 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case BO_LAnd:
|
|
|
|
case BO_LOr: {
|
2023-05-25 09:22:37 +00:00
|
|
|
BoolValue &LHSVal = getLogicOperatorSubExprValue(*LHS);
|
|
|
|
BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS);
|
2023-03-23 07:45:40 +00:00
|
|
|
|
2022-02-16 16:47:37 +00:00
|
|
|
if (S->getOpcode() == BO_LAnd)
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, Env.makeAnd(LHSVal, RHSVal));
|
2022-02-16 16:47:37 +00:00
|
|
|
else
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, Env.makeOr(LHSVal, RHSVal));
|
2022-02-16 16:47:37 +00:00
|
|
|
break;
|
|
|
|
}
|
2022-03-25 20:01:18 +00:00
|
|
|
case BO_NE:
|
|
|
|
case BO_EQ: {
|
|
|
|
auto &LHSEqRHSValue = evaluateBooleanEquality(*LHS, *RHS, Env);
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, S->getOpcode() == BO_EQ ? LHSEqRHSValue
|
|
|
|
: Env.makeNot(LHSEqRHSValue));
|
2022-03-25 20:01:18 +00:00
|
|
|
break;
|
|
|
|
}
|
2022-06-16 21:57:28 +00:00
|
|
|
case BO_Comma: {
|
2023-05-17 13:27:35 +00:00
|
|
|
propagateValueOrStorageLocation(*RHS, *S, Env);
|
2022-06-16 21:57:28 +00:00
|
|
|
break;
|
|
|
|
}
|
2022-02-16 16:47:37 +00:00
|
|
|
default:
|
|
|
|
break;
|
2022-01-04 13:47:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void VisitDeclRefExpr(const DeclRefExpr *S) {
|
2022-12-07 16:03:37 +00:00
|
|
|
const ValueDecl *VD = S->getDecl();
|
|
|
|
assert(VD != nullptr);
|
2023-06-29 06:39:39 +00:00
|
|
|
|
2023-08-23 10:54:28 +00:00
|
|
|
// Some `DeclRefExpr`s aren't glvalues, so we can't associate them with a
|
|
|
|
// `StorageLocation`, and there's also no sensible `Value` that we can
|
|
|
|
// assign to them. Examples:
|
|
|
|
// - Non-static member variables
|
|
|
|
// - Non static member functions
|
|
|
|
// Note: Member operators are an exception to this, but apparently only
|
|
|
|
// if the `DeclRefExpr` is used within the callee of a
|
|
|
|
// `CXXOperatorCallExpr`. In other cases, for example when applying the
|
|
|
|
// address-of operator, the `DeclRefExpr` is a prvalue.
|
|
|
|
if (!S->isGLValue())
|
2023-06-29 06:39:39 +00:00
|
|
|
return;
|
|
|
|
|
2023-05-08 06:38:42 +00:00
|
|
|
auto *DeclLoc = Env.getStorageLocation(*VD);
|
2022-01-04 13:47:14 +00:00
|
|
|
if (DeclLoc == nullptr)
|
|
|
|
return;
|
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setStorageLocation(*S, *DeclLoc);
|
2022-01-04 13:47:14 +00:00
|
|
|
}
|
|
|
|
|
2021-12-29 11:31:02 +00:00
|
|
|
void VisitDeclStmt(const DeclStmt *S) {
|
2022-01-14 18:27:39 +00:00
|
|
|
// Group decls are converted into single decls in the CFG so the cast below
|
|
|
|
// is safe.
|
|
|
|
const auto &D = *cast<VarDecl>(S->getSingleDecl());
|
2022-02-23 13:38:51 +00:00
|
|
|
|
[clang][dataflow] Eliminate intermediate `ReferenceValue`s from `Environment::DeclToLoc`.
For the wider context of this change, see the RFC at
https://discourse.llvm.org/t/70086.
After this change, global and local variables of reference type are associated
directly with the `StorageLocation` of the referenced object instead of the
`StorageLocation` of a `ReferenceValue`.
Some tests that explicitly check for an existence of `ReferenceValue` for a
variable of reference type have been modified accordingly.
As discussed in the RFC, I have added an assertion to `Environment::join()` to
check that if both environments contain an entry for the same declaration in
`DeclToLoc`, they both map to the same `StorageLocation`. As discussed in
https://discourse.llvm.org/t/70086/5, this also necessitates removing
declarations from `DeclToLoc` when they go out of scope.
In the RFC, I proposed a gradual migration for this change, but it appears
that all of the callers of `Environment::setStorageLocation(const ValueDecl &,
SkipPast` are in the dataflow framework itself, and that there are only a few of
them.
As this is the function whose semantics are changing in a way that callers
potentially need to adapt to, I've decided to change the semantics of the
function directly.
The semantics of `getStorageLocation(const ValueDecl &, SkipPast SP` now no
longer depend on the behavior of the `SP` parameter. (There don't appear to be
any callers that use `SkipPast::ReferenceThenPointer`, so I've added an
assertion that forbids this usage.)
This patch adds a default argument for the `SP` parameter and removes the
explicit `SP` argument at the callsites that are touched by this change. A
followup patch will remove the argument from the remaining callsites,
allowing the `SkipPast` parameter to be removed entirely. (I don't want to do
that in this patch so that semantics-changing changes can be reviewed separately
from semantics-neutral changes.)
Reviewed By: ymandel, xazax.hun, gribozavr2
Differential Revision: https://reviews.llvm.org/D149144
2023-05-04 07:42:05 +00:00
|
|
|
ProcessVarDecl(D);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ProcessVarDecl(const VarDecl &D) {
|
2022-02-23 13:38:51 +00:00
|
|
|
// Static local vars are already initialized in `Environment`.
|
|
|
|
if (D.hasGlobalStorage())
|
|
|
|
return;
|
|
|
|
|
2023-07-17 06:27:59 +00:00
|
|
|
// If this is the holding variable for a `BindingDecl`, we may already
|
|
|
|
// have a storage location set up -- so check. (See also explanation below
|
|
|
|
// where we process the `BindingDecl`.)
|
|
|
|
if (D.getType()->isReferenceType() && Env.getStorageLocation(D) != nullptr)
|
|
|
|
return;
|
|
|
|
|
|
|
|
assert(Env.getStorageLocation(D) == nullptr);
|
|
|
|
|
|
|
|
Env.setStorageLocation(D, Env.createObject(D));
|
2022-02-24 17:07:41 +00:00
|
|
|
|
2023-01-03 15:41:38 +00:00
|
|
|
// `DecompositionDecl` must be handled after we've interpreted the loc
|
|
|
|
// itself, because the binding expression refers back to the
|
|
|
|
// `DecompositionDecl` (even though it has no written name).
|
2022-02-24 17:07:41 +00:00
|
|
|
if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) {
|
|
|
|
// If VarDecl is a DecompositionDecl, evaluate each of its bindings. This
|
|
|
|
// needs to be evaluated after initializing the values in the storage for
|
|
|
|
// VarDecl, as the bindings refer to them.
|
|
|
|
// FIXME: Add support for ArraySubscriptExpr.
|
2022-12-07 16:03:37 +00:00
|
|
|
// FIXME: Consider adding AST nodes used in BindingDecls to the CFG.
|
2022-02-24 17:07:41 +00:00
|
|
|
for (const auto *B : Decomp->bindings()) {
|
2022-12-07 16:03:37 +00:00
|
|
|
if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) {
|
|
|
|
auto *DE = dyn_cast_or_null<DeclRefExpr>(ME->getBase());
|
|
|
|
if (DE == nullptr)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// ME and its base haven't been visited because they aren't included
|
|
|
|
// in the statements of the CFG basic block.
|
|
|
|
VisitDeclRefExpr(DE);
|
|
|
|
VisitMemberExpr(ME);
|
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
if (auto *Loc = Env.getStorageLocation(*ME))
|
2022-12-07 16:03:37 +00:00
|
|
|
Env.setStorageLocation(*B, *Loc);
|
|
|
|
} else if (auto *VD = B->getHoldingVar()) {
|
[clang][dataflow] Eliminate intermediate `ReferenceValue`s from `Environment::DeclToLoc`.
For the wider context of this change, see the RFC at
https://discourse.llvm.org/t/70086.
After this change, global and local variables of reference type are associated
directly with the `StorageLocation` of the referenced object instead of the
`StorageLocation` of a `ReferenceValue`.
Some tests that explicitly check for an existence of `ReferenceValue` for a
variable of reference type have been modified accordingly.
As discussed in the RFC, I have added an assertion to `Environment::join()` to
check that if both environments contain an entry for the same declaration in
`DeclToLoc`, they both map to the same `StorageLocation`. As discussed in
https://discourse.llvm.org/t/70086/5, this also necessitates removing
declarations from `DeclToLoc` when they go out of scope.
In the RFC, I proposed a gradual migration for this change, but it appears
that all of the callers of `Environment::setStorageLocation(const ValueDecl &,
SkipPast` are in the dataflow framework itself, and that there are only a few of
them.
As this is the function whose semantics are changing in a way that callers
potentially need to adapt to, I've decided to change the semantics of the
function directly.
The semantics of `getStorageLocation(const ValueDecl &, SkipPast SP` now no
longer depend on the behavior of the `SP` parameter. (There don't appear to be
any callers that use `SkipPast::ReferenceThenPointer`, so I've added an
assertion that forbids this usage.)
This patch adds a default argument for the `SP` parameter and removes the
explicit `SP` argument at the callsites that are touched by this change. A
followup patch will remove the argument from the remaining callsites,
allowing the `SkipPast` parameter to be removed entirely. (I don't want to do
that in this patch so that semantics-changing changes can be reviewed separately
from semantics-neutral changes.)
Reviewed By: ymandel, xazax.hun, gribozavr2
Differential Revision: https://reviews.llvm.org/D149144
2023-05-04 07:42:05 +00:00
|
|
|
// Holding vars are used to back the `BindingDecl`s of tuple-like
|
|
|
|
// types. The holding var declarations appear after the
|
|
|
|
// `DecompositionDecl`, so we have to explicitly process them here
|
|
|
|
// to know their storage location. They will be processed a second
|
|
|
|
// time when we visit their `VarDecl`s, so we have code that protects
|
|
|
|
// against this above.
|
|
|
|
ProcessVarDecl(*VD);
|
|
|
|
auto *VDLoc = Env.getStorageLocation(*VD);
|
|
|
|
assert(VDLoc != nullptr);
|
|
|
|
Env.setStorageLocation(*B, *VDLoc);
|
2022-12-07 16:03:37 +00:00
|
|
|
}
|
2022-02-24 17:07:41 +00:00
|
|
|
}
|
|
|
|
}
|
2021-12-29 11:31:02 +00:00
|
|
|
}
|
|
|
|
|
2022-01-04 13:47:14 +00:00
|
|
|
void VisitImplicitCastExpr(const ImplicitCastExpr *S) {
|
2022-05-02 21:36:04 +00:00
|
|
|
const Expr *SubExpr = S->getSubExpr();
|
2022-01-13 13:53:52 +00:00
|
|
|
assert(SubExpr != nullptr);
|
|
|
|
|
|
|
|
switch (S->getCastKind()) {
|
2022-04-04 15:31:52 +00:00
|
|
|
case CK_IntegralToBoolean: {
|
|
|
|
// This cast creates a new, boolean value from the integral value. We
|
|
|
|
// model that with a fresh value in the environment, unless it's already a
|
|
|
|
// boolean.
|
[clang][dataflow] Add `Strict` versions of `Value` and `StorageLocation` accessors.
This is part of the gradual migration to strict handling of value categories, as described in the RFC at https://discourse.llvm.org/t/70086.
This patch migrates some representative calls of the newly deprecated accessors to the new `Strict` functions. Followup patches will migrate the remaining callers. (There are a large number of callers, with some subtlety involved in some of them, so it makes sense to split this up into multiple patches rather than migrating all callers in one go.)
The `Strict` accessors as implemented here have some differences in semantics compared to the semantics originally proposed in the RFC; specifically:
* `setStorageLocationStrict()`: The RFC proposes to create an intermediate
`ReferenceValue` that then refers to the `StorageLocation` for the glvalue.
It turns out though that, even today, most places in the code are not doing
this but are instead associating glvalues directly with their
`StorageLocation`. It therefore didn't seem to make sense to introduce new
`ReferenceValue`s where there were none previously, so I have chosen to
instead make `setStorageLocationStrict()` simply call through to
`setStorageLocation(const Expr &, StorageLocation &)` and merely add the
assertion that the expression must be a glvalue.
* `getStorageLocationStrict()`: The RFC proposes that this should assert that
the storage location for the glvalue expression is associated with an
intermediate `ReferenceValue`, but, as explained, this is often not true.
The current state is inconsistent: Sometimes the intermediate
`ReferenceValue` is there, sometimes it isn't. For this reason,
`getStorageLocationStrict()` skips past a `ReferenceValue` if it is there but
otherwise directly returns the storage location associated with the
expression. This behavior is equivalent to the existing behavior of
`SkipPast::Reference`.
* `setValueStrict()`: The RFC proposes that this should always create the same
`StorageLocation` for a given `Value`, but, in fact, the transfer functions
that exist today don't guarantee this; almost all transfer functions
unconditionally create a new `StorageLocation` when associating an expression
with a `Value`.
There appears to be one special case:
`TerminatorVisitor::extendFlowCondition()` checks whether the expression is
already associated with a `StorageLocation` and, if so, reuses the existing
`StorageLocation` instead of creating a new one.
For this reason, `setValueStrict()` implements this logic (preserve an
existing `StorageLocation`) but makes no attempt to always associate the same
`StorageLocation` with a given `Value`, as nothing in the framework appers to
require this.
As `TerminatorVisitor::extendFlowCondition()` is an interesting special case,
the `setValue()` call there is among the ones that this patch migrates to
`setValueStrict()`.
Reviewed By: sammccall, ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D150653
2023-05-17 09:12:46 +00:00
|
|
|
if (auto *SubExprVal =
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr)))
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, *SubExprVal);
|
2022-04-04 15:31:52 +00:00
|
|
|
else
|
|
|
|
// FIXME: If integer modeling is added, then update this code to create
|
|
|
|
// the boolean based on the integer model.
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, Env.makeAtomicBoolValue());
|
2022-04-04 15:31:52 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-01-13 13:53:52 +00:00
|
|
|
case CK_LValueToRValue: {
|
2022-10-06 17:56:41 +00:00
|
|
|
// When an L-value is used as an R-value, it may result in sharing, so we
|
2023-08-29 13:31:48 +00:00
|
|
|
// need to unpack any nested `Top`s.
|
2022-10-06 17:56:41 +00:00
|
|
|
auto *SubExprVal = maybeUnpackLValueExpr(*SubExpr, Env);
|
2022-01-04 13:47:14 +00:00
|
|
|
if (SubExprVal == nullptr)
|
2022-01-13 13:53:52 +00:00
|
|
|
break;
|
2022-01-04 13:47:14 +00:00
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, *SubExprVal);
|
2022-01-13 13:53:52 +00:00
|
|
|
break;
|
|
|
|
}
|
2022-04-04 15:31:52 +00:00
|
|
|
|
|
|
|
case CK_IntegralCast:
|
|
|
|
// FIXME: This cast creates a new integral value from the
|
|
|
|
// subexpression. But, because we don't model integers, we don't
|
|
|
|
// distinguish between this new value and the underlying one. If integer
|
|
|
|
// modeling is added, then update this code to create a fresh location and
|
|
|
|
// value.
|
2022-03-14 14:52:35 +00:00
|
|
|
case CK_UncheckedDerivedToBase:
|
|
|
|
case CK_ConstructorConversion:
|
|
|
|
case CK_UserDefinedConversion:
|
|
|
|
// FIXME: Add tests that excercise CK_UncheckedDerivedToBase,
|
|
|
|
// CK_ConstructorConversion, and CK_UserDefinedConversion.
|
2022-01-13 13:53:52 +00:00
|
|
|
case CK_NoOp: {
|
|
|
|
// FIXME: Consider making `Environment::getStorageLocation` skip noop
|
2023-05-17 13:27:35 +00:00
|
|
|
// expressions (this and other similar expressions in the file) instead
|
|
|
|
// of assigning them storage locations.
|
|
|
|
propagateValueOrStorageLocation(*SubExpr, *S, Env);
|
2022-01-13 13:53:52 +00:00
|
|
|
break;
|
|
|
|
}
|
2023-06-29 06:39:39 +00:00
|
|
|
case CK_NullToPointer: {
|
2022-06-27 14:14:01 +02:00
|
|
|
auto &NullPointerVal =
|
|
|
|
Env.getOrCreateNullPointerValue(S->getType()->getPointeeType());
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, NullPointerVal);
|
2022-06-27 14:14:01 +02:00
|
|
|
break;
|
|
|
|
}
|
2023-06-29 06:39:39 +00:00
|
|
|
case CK_NullToMemberPointer:
|
|
|
|
// FIXME: Implement pointers to members. For now, don't associate a value
|
|
|
|
// with this expression.
|
|
|
|
break;
|
2023-07-05 07:46:52 +00:00
|
|
|
case CK_FunctionToPointerDecay: {
|
2023-07-31 12:37:01 +00:00
|
|
|
StorageLocation *PointeeLoc = Env.getStorageLocation(*SubExpr);
|
2023-04-18 04:49:38 +00:00
|
|
|
if (PointeeLoc == nullptr)
|
|
|
|
break;
|
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, Env.create<PointerValue>(*PointeeLoc));
|
2023-04-18 04:49:38 +00:00
|
|
|
break;
|
|
|
|
}
|
2023-07-05 07:46:52 +00:00
|
|
|
case CK_BuiltinFnToFnPtr:
|
|
|
|
// Despite its name, the result type of `BuiltinFnToFnPtr` is a function,
|
|
|
|
// not a function pointer. In addition, builtin functions can only be
|
|
|
|
// called directly; it is not legal to take their address. We therefore
|
|
|
|
// don't need to create a value or storage location for them.
|
|
|
|
break;
|
2022-01-13 13:53:52 +00:00
|
|
|
default:
|
|
|
|
break;
|
2022-01-04 13:47:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void VisitUnaryOperator(const UnaryOperator *S) {
|
2022-05-02 21:36:04 +00:00
|
|
|
const Expr *SubExpr = S->getSubExpr();
|
2022-01-17 16:23:24 +00:00
|
|
|
assert(SubExpr != nullptr);
|
|
|
|
|
|
|
|
switch (S->getOpcode()) {
|
|
|
|
case UO_Deref: {
|
2023-12-21 09:02:20 +01:00
|
|
|
const auto *SubExprVal = Env.get<PointerValue>(*SubExpr);
|
2022-01-04 13:47:14 +00:00
|
|
|
if (SubExprVal == nullptr)
|
2022-01-17 16:23:24 +00:00
|
|
|
break;
|
2022-01-04 13:47:14 +00:00
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setStorageLocation(*S, SubExprVal->getPointeeLoc());
|
2022-01-17 16:23:24 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case UO_AddrOf: {
|
2023-06-29 06:39:39 +00:00
|
|
|
// FIXME: Model pointers to members.
|
|
|
|
if (S->getType()->isMemberPointerType())
|
2022-01-17 16:23:24 +00:00
|
|
|
break;
|
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
if (StorageLocation *PointeeLoc = Env.getStorageLocation(*SubExpr))
|
|
|
|
Env.setValue(*S, Env.create<PointerValue>(*PointeeLoc));
|
2022-01-17 16:23:24 +00:00
|
|
|
break;
|
|
|
|
}
|
2022-02-16 16:47:37 +00:00
|
|
|
case UO_LNot: {
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
auto *SubExprVal = dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr));
|
2022-02-16 16:47:37 +00:00
|
|
|
if (SubExprVal == nullptr)
|
2022-02-17 09:37:02 +00:00
|
|
|
break;
|
2022-02-16 16:47:37 +00:00
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, Env.makeNot(*SubExprVal));
|
2022-02-17 09:37:02 +00:00
|
|
|
break;
|
2022-02-16 16:47:37 +00:00
|
|
|
}
|
2022-01-17 16:23:24 +00:00
|
|
|
default:
|
|
|
|
break;
|
2022-01-04 13:47:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-11 12:15:53 +00:00
|
|
|
void VisitCXXThisExpr(const CXXThisExpr *S) {
|
|
|
|
auto *ThisPointeeLoc = Env.getThisPointeeStorageLocation();
|
2022-05-25 19:21:08 +00:00
|
|
|
if (ThisPointeeLoc == nullptr)
|
|
|
|
// Unions are not supported yet, and will not have a location for the
|
|
|
|
// `this` expression's pointee.
|
|
|
|
return;
|
2022-01-11 12:15:53 +00:00
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, Env.create<PointerValue>(*ThisPointeeLoc));
|
2022-01-11 12:15:53 +00:00
|
|
|
}
|
|
|
|
|
2023-04-18 03:42:24 +00:00
|
|
|
void VisitCXXNewExpr(const CXXNewExpr *S) {
|
|
|
|
if (Value *Val = Env.createValue(S->getType()))
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, *Val);
|
2023-04-18 03:42:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void VisitCXXDeleteExpr(const CXXDeleteExpr *S) {
|
|
|
|
// Empty method.
|
|
|
|
// We consciously don't do anything on deletes. Diagnosing double deletes
|
|
|
|
// (for example) should be done by a specific analysis, not by the
|
|
|
|
// framework.
|
|
|
|
}
|
|
|
|
|
2022-08-04 17:42:01 +00:00
|
|
|
void VisitReturnStmt(const ReturnStmt *S) {
|
2023-05-02 00:08:30 +00:00
|
|
|
if (!Env.getDataflowAnalysisContext().getOptions().ContextSensitiveOpts)
|
2022-12-20 19:41:19 +00:00
|
|
|
return;
|
|
|
|
|
2022-08-04 17:42:01 +00:00
|
|
|
auto *Ret = S->getRetValue();
|
|
|
|
if (Ret == nullptr)
|
|
|
|
return;
|
|
|
|
|
2023-05-23 09:35:52 +00:00
|
|
|
if (Ret->isPRValue()) {
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
auto *Val = Env.getValue(*Ret);
|
2023-05-23 09:35:52 +00:00
|
|
|
if (Val == nullptr)
|
|
|
|
return;
|
2022-08-04 17:42:01 +00:00
|
|
|
|
2023-05-23 09:35:52 +00:00
|
|
|
// FIXME: Model NRVO.
|
|
|
|
Env.setReturnValue(Val);
|
|
|
|
} else {
|
2023-07-31 12:37:01 +00:00
|
|
|
auto *Loc = Env.getStorageLocation(*Ret);
|
2023-05-23 09:35:52 +00:00
|
|
|
if (Loc == nullptr)
|
|
|
|
return;
|
2022-12-20 19:41:19 +00:00
|
|
|
|
2023-05-23 09:35:52 +00:00
|
|
|
// FIXME: Model NRVO.
|
|
|
|
Env.setReturnStorageLocation(Loc);
|
|
|
|
}
|
2022-08-04 17:42:01 +00:00
|
|
|
}
|
|
|
|
|
2022-01-11 12:15:53 +00:00
|
|
|
void VisitMemberExpr(const MemberExpr *S) {
|
|
|
|
ValueDecl *Member = S->getMemberDecl();
|
|
|
|
assert(Member != nullptr);
|
|
|
|
|
|
|
|
// FIXME: Consider assigning pointer values to function member expressions.
|
|
|
|
if (Member->isFunctionOrFunctionTemplate())
|
|
|
|
return;
|
|
|
|
|
2023-01-09 22:54:53 +00:00
|
|
|
// FIXME: if/when we add support for modeling enums, use that support here.
|
|
|
|
if (isa<EnumConstantDecl>(Member))
|
|
|
|
return;
|
|
|
|
|
2022-02-23 13:38:51 +00:00
|
|
|
if (auto *D = dyn_cast<VarDecl>(Member)) {
|
|
|
|
if (D->hasGlobalStorage()) {
|
2023-05-08 06:38:42 +00:00
|
|
|
auto *VarDeclLoc = Env.getStorageLocation(*D);
|
2022-02-23 13:38:51 +00:00
|
|
|
if (VarDeclLoc == nullptr)
|
|
|
|
return;
|
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setStorageLocation(*S, *VarDeclLoc);
|
2022-02-23 13:38:51 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[clang][dataflow] Rename `AggregateStorageLocation` to `RecordStorageLocation` and `StructValue` to `RecordValue`.
- Both of these constructs are used to represent structs, classes, and unions;
Clang uses the collective term "record" for these.
- The term "aggregate" in `AggregateStorageLocation` implies that, at some
point, the intention may have been to use it also for arrays, but it don't
think it's possible to use it for arrays. Records and arrays are very
different and therefore need to be modeled differently. Records have a fixed
set of named fields, which can have different type; arrays have a variable
number of elements, but they all have the same type.
- Futhermore, "aggregate" has a very specific meaning in C++
(https://en.cppreference.com/w/cpp/language/aggregate_initialization).
Aggregates of class type may not have any user-declared or inherited
constructors, no private or protected non-static data members, no virtual
member functions, and so on, but we use `AggregateStorageLocations` to model all objects of class type.
In addition, for consistency, we also rename the following:
- `getAggregateLoc()` (in `RecordValue`, formerly known as `StructValue`) to
simply `getLoc()`.
- `refreshStructValue()` to `refreshRecordValue()`
We keep the old names around as deprecated synonyms to enable clients to be migrated to the new names.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D156788
2023-08-01 13:23:37 +00:00
|
|
|
RecordStorageLocation *BaseLoc = getBaseObjectLocation(*S, Env);
|
2022-01-11 12:15:53 +00:00
|
|
|
if (BaseLoc == nullptr)
|
|
|
|
return;
|
|
|
|
|
2023-07-20 11:12:39 +00:00
|
|
|
auto *MemberLoc = BaseLoc->getChild(*Member);
|
|
|
|
if (MemberLoc == nullptr)
|
|
|
|
return;
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setStorageLocation(*S, *MemberLoc);
|
2022-01-11 12:15:53 +00:00
|
|
|
}
|
|
|
|
|
2022-01-12 16:28:59 +00:00
|
|
|
void VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *S) {
|
|
|
|
const Expr *InitExpr = S->getExpr();
|
|
|
|
assert(InitExpr != nullptr);
|
2023-06-28 08:36:06 +00:00
|
|
|
propagateValueOrStorageLocation(*InitExpr, *S, Env);
|
2022-01-12 16:28:59 +00:00
|
|
|
}
|
|
|
|
|
2022-01-13 13:53:52 +00:00
|
|
|
void VisitCXXConstructExpr(const CXXConstructExpr *S) {
|
|
|
|
const CXXConstructorDecl *ConstructorDecl = S->getConstructor();
|
|
|
|
assert(ConstructorDecl != nullptr);
|
|
|
|
|
|
|
|
if (ConstructorDecl->isCopyOrMoveConstructor()) {
|
2023-02-22 10:36:39 +00:00
|
|
|
// It is permissible for a copy/move constructor to have additional
|
|
|
|
// parameters as long as they have default arguments defined for them.
|
|
|
|
assert(S->getNumArgs() != 0);
|
2022-01-13 13:53:52 +00:00
|
|
|
|
|
|
|
const Expr *Arg = S->getArg(0);
|
|
|
|
assert(Arg != nullptr);
|
|
|
|
|
2023-12-21 09:02:20 +01:00
|
|
|
auto *ArgLoc = Env.get<RecordStorageLocation>(*Arg);
|
2023-06-20 08:00:01 +00:00
|
|
|
if (ArgLoc == nullptr)
|
|
|
|
return;
|
2022-01-13 13:53:52 +00:00
|
|
|
|
2023-06-20 08:00:01 +00:00
|
|
|
if (S->isElidable()) {
|
2023-07-31 12:35:44 +00:00
|
|
|
if (Value *Val = Env.getValue(*ArgLoc))
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, *Val);
|
2023-07-31 12:35:44 +00:00
|
|
|
} else {
|
[clang][dataflow] Rename `AggregateStorageLocation` to `RecordStorageLocation` and `StructValue` to `RecordValue`.
- Both of these constructs are used to represent structs, classes, and unions;
Clang uses the collective term "record" for these.
- The term "aggregate" in `AggregateStorageLocation` implies that, at some
point, the intention may have been to use it also for arrays, but it don't
think it's possible to use it for arrays. Records and arrays are very
different and therefore need to be modeled differently. Records have a fixed
set of named fields, which can have different type; arrays have a variable
number of elements, but they all have the same type.
- Futhermore, "aggregate" has a very specific meaning in C++
(https://en.cppreference.com/w/cpp/language/aggregate_initialization).
Aggregates of class type may not have any user-declared or inherited
constructors, no private or protected non-static data members, no virtual
member functions, and so on, but we use `AggregateStorageLocations` to model all objects of class type.
In addition, for consistency, we also rename the following:
- `getAggregateLoc()` (in `RecordValue`, formerly known as `StructValue`) to
simply `getLoc()`.
- `refreshStructValue()` to `refreshRecordValue()`
We keep the old names around as deprecated synonyms to enable clients to be migrated to the new names.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D156788
2023-08-01 13:23:37 +00:00
|
|
|
auto &Val = *cast<RecordValue>(Env.createValue(S->getType()));
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, Val);
|
[clang][dataflow] Rename `AggregateStorageLocation` to `RecordStorageLocation` and `StructValue` to `RecordValue`.
- Both of these constructs are used to represent structs, classes, and unions;
Clang uses the collective term "record" for these.
- The term "aggregate" in `AggregateStorageLocation` implies that, at some
point, the intention may have been to use it also for arrays, but it don't
think it's possible to use it for arrays. Records and arrays are very
different and therefore need to be modeled differently. Records have a fixed
set of named fields, which can have different type; arrays have a variable
number of elements, but they all have the same type.
- Futhermore, "aggregate" has a very specific meaning in C++
(https://en.cppreference.com/w/cpp/language/aggregate_initialization).
Aggregates of class type may not have any user-declared or inherited
constructors, no private or protected non-static data members, no virtual
member functions, and so on, but we use `AggregateStorageLocations` to model all objects of class type.
In addition, for consistency, we also rename the following:
- `getAggregateLoc()` (in `RecordValue`, formerly known as `StructValue`) to
simply `getLoc()`.
- `refreshStructValue()` to `refreshRecordValue()`
We keep the old names around as deprecated synonyms to enable clients to be migrated to the new names.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D156788
2023-08-01 13:23:37 +00:00
|
|
|
copyRecord(*ArgLoc, Val.getLoc(), Env);
|
2022-01-13 13:53:52 +00:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-07-27 08:35:57 +00:00
|
|
|
// `CXXConstructExpr` can have array type if default-initializing an array
|
|
|
|
// of records, and we currently can't create values for arrays. So check if
|
|
|
|
// we've got a record type.
|
|
|
|
if (S->getType()->isRecordType()) {
|
[clang][dataflow] Rename `AggregateStorageLocation` to `RecordStorageLocation` and `StructValue` to `RecordValue`.
- Both of these constructs are used to represent structs, classes, and unions;
Clang uses the collective term "record" for these.
- The term "aggregate" in `AggregateStorageLocation` implies that, at some
point, the intention may have been to use it also for arrays, but it don't
think it's possible to use it for arrays. Records and arrays are very
different and therefore need to be modeled differently. Records have a fixed
set of named fields, which can have different type; arrays have a variable
number of elements, but they all have the same type.
- Futhermore, "aggregate" has a very specific meaning in C++
(https://en.cppreference.com/w/cpp/language/aggregate_initialization).
Aggregates of class type may not have any user-declared or inherited
constructors, no private or protected non-static data members, no virtual
member functions, and so on, but we use `AggregateStorageLocations` to model all objects of class type.
In addition, for consistency, we also rename the following:
- `getAggregateLoc()` (in `RecordValue`, formerly known as `StructValue`) to
simply `getLoc()`.
- `refreshStructValue()` to `refreshRecordValue()`
We keep the old names around as deprecated synonyms to enable clients to be migrated to the new names.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D156788
2023-08-01 13:23:37 +00:00
|
|
|
auto &InitialVal = *cast<RecordValue>(Env.createValue(S->getType()));
|
2023-08-28 11:50:03 +00:00
|
|
|
Env.setValue(*S, InitialVal);
|
2023-07-27 08:35:57 +00:00
|
|
|
}
|
2022-08-10 14:01:18 +00:00
|
|
|
|
|
|
|
transferInlineCall(S, ConstructorDecl);
|
2022-01-13 13:53:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *S) {
|
|
|
|
if (S->getOperator() == OO_Equal) {
|
|
|
|
assert(S->getNumArgs() == 2);
|
|
|
|
|
|
|
|
const Expr *Arg0 = S->getArg(0);
|
|
|
|
assert(Arg0 != nullptr);
|
|
|
|
|
|
|
|
const Expr *Arg1 = S->getArg(1);
|
|
|
|
assert(Arg1 != nullptr);
|
|
|
|
|
|
|
|
// Evaluate only copy and move assignment operators.
|
2023-04-24 13:11:02 +00:00
|
|
|
const auto *Method =
|
|
|
|
dyn_cast_or_null<CXXMethodDecl>(S->getDirectCallee());
|
|
|
|
if (!Method)
|
|
|
|
return;
|
|
|
|
if (!Method->isCopyAssignmentOperator() &&
|
|
|
|
!Method->isMoveAssignmentOperator())
|
2022-01-13 13:53:52 +00:00
|
|
|
return;
|
|
|
|
|
2023-11-07 09:48:40 +01:00
|
|
|
RecordStorageLocation *LocSrc = nullptr;
|
|
|
|
if (Arg1->isPRValue()) {
|
2023-12-21 09:02:20 +01:00
|
|
|
if (auto *Val = Env.get<RecordValue>(*Arg1))
|
2023-11-07 09:48:40 +01:00
|
|
|
LocSrc = &Val->getLoc();
|
|
|
|
} else {
|
2023-12-21 09:02:20 +01:00
|
|
|
LocSrc = Env.get<RecordStorageLocation>(*Arg1);
|
2023-11-07 09:48:40 +01:00
|
|
|
}
|
2023-12-21 09:02:20 +01:00
|
|
|
auto *LocDst = Env.get<RecordStorageLocation>(*Arg0);
|
2022-01-13 13:53:52 +00:00
|
|
|
|
2023-09-20 06:28:21 +02:00
|
|
|
if (LocSrc == nullptr || LocDst == nullptr)
|
|
|
|
return;
|
|
|
|
|
2023-09-14 20:45:56 +02:00
|
|
|
// The assignment operators are different from the type of the destination
|
2023-09-20 06:28:21 +02:00
|
|
|
// in this model (i.e. in one of their base classes). This must be very
|
|
|
|
// rare and we just bail.
|
2022-01-27 13:55:08 +01:00
|
|
|
if (Method->getFunctionObjectParameterType()
|
|
|
|
.getCanonicalType()
|
|
|
|
.getUnqualifiedType() !=
|
2023-09-14 20:45:56 +02:00
|
|
|
LocDst->getType().getCanonicalType().getUnqualifiedType())
|
|
|
|
return;
|
|
|
|
|
2023-09-20 06:28:21 +02:00
|
|
|
copyRecord(*LocSrc, *LocDst, Env);
|
|
|
|
Env.setStorageLocation(*S, *LocDst);
|
2024-01-31 17:18:16 +01:00
|
|
|
return;
|
2022-01-13 13:53:52 +00:00
|
|
|
}
|
2024-01-31 17:18:16 +01:00
|
|
|
|
|
|
|
// CXXOperatorCallExpr can be prvalues. Call `VisitCallExpr`() to create
|
|
|
|
// a `RecordValue` for them so that `Environment::getResultObjectLocation()`
|
|
|
|
// can return a value.
|
|
|
|
VisitCallExpr(S);
|
2022-01-13 13:53:52 +00:00
|
|
|
}
|
|
|
|
|
2024-02-08 08:38:35 +01:00
|
|
|
void VisitCXXRewrittenBinaryOperator(const CXXRewrittenBinaryOperator *RBO) {
|
|
|
|
propagateValue(*RBO->getSemanticForm(), *RBO, Env);
|
|
|
|
}
|
|
|
|
|
2022-01-13 13:53:52 +00:00
|
|
|
void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *S) {
|
|
|
|
if (S->getCastKind() == CK_ConstructorConversion) {
|
|
|
|
const Expr *SubExpr = S->getSubExpr();
|
|
|
|
assert(SubExpr != nullptr);
|
|
|
|
|
2023-05-22 06:17:17 +00:00
|
|
|
propagateValue(*SubExpr, *S, Env);
|
2022-01-13 13:53:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void VisitCXXTemporaryObjectExpr(const CXXTemporaryObjectExpr *S) {
|
2022-01-17 15:17:05 +00:00
|
|
|
if (Value *Val = Env.createValue(S->getType()))
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, *Val);
|
2022-01-13 13:53:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void VisitCallExpr(const CallExpr *S) {
|
2022-04-01 12:51:23 +00:00
|
|
|
// Of clang's builtins, only `__builtin_expect` is handled explicitly, since
|
|
|
|
// others (like trap, debugtrap, and unreachable) are handled by CFG
|
|
|
|
// construction.
|
2022-01-13 13:53:52 +00:00
|
|
|
if (S->isCallToStdMove()) {
|
|
|
|
assert(S->getNumArgs() == 1);
|
|
|
|
|
|
|
|
const Expr *Arg = S->getArg(0);
|
|
|
|
assert(Arg != nullptr);
|
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
auto *ArgLoc = Env.getStorageLocation(*Arg);
|
2022-01-13 13:53:52 +00:00
|
|
|
if (ArgLoc == nullptr)
|
|
|
|
return;
|
|
|
|
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setStorageLocation(*S, *ArgLoc);
|
2022-04-01 12:51:23 +00:00
|
|
|
} else if (S->getDirectCallee() != nullptr &&
|
|
|
|
S->getDirectCallee()->getBuiltinID() ==
|
|
|
|
Builtin::BI__builtin_expect) {
|
|
|
|
assert(S->getNumArgs() > 0);
|
|
|
|
assert(S->getArg(0) != nullptr);
|
2023-07-31 12:35:44 +00:00
|
|
|
auto *ArgVal = Env.getValue(*S->getArg(0));
|
|
|
|
if (ArgVal == nullptr)
|
2022-04-01 12:51:23 +00:00
|
|
|
return;
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, *ArgVal);
|
2022-07-26 17:54:13 +00:00
|
|
|
} else if (const FunctionDecl *F = S->getDirectCallee()) {
|
2022-08-10 14:01:18 +00:00
|
|
|
transferInlineCall(S, F);
|
2023-12-18 09:10:03 +01:00
|
|
|
|
|
|
|
// If this call produces a prvalue of record type, make sure that we have
|
|
|
|
// a `RecordValue` for it. This is required so that
|
|
|
|
// `Environment::getResultObjectLocation()` is able to return a location
|
|
|
|
// for this `CallExpr`.
|
|
|
|
if (S->getType()->isRecordType() && S->isPRValue())
|
|
|
|
if (Env.getValue(*S) == nullptr)
|
|
|
|
refreshRecordValue(*S, Env);
|
2022-01-13 13:53:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *S) {
|
|
|
|
const Expr *SubExpr = S->getSubExpr();
|
|
|
|
assert(SubExpr != nullptr);
|
|
|
|
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
Value *SubExprVal = Env.getValue(*SubExpr);
|
2023-05-22 06:17:17 +00:00
|
|
|
if (SubExprVal == nullptr)
|
2022-01-13 13:53:52 +00:00
|
|
|
return;
|
|
|
|
|
[clang][dataflow] Rename `AggregateStorageLocation` to `RecordStorageLocation` and `StructValue` to `RecordValue`.
- Both of these constructs are used to represent structs, classes, and unions;
Clang uses the collective term "record" for these.
- The term "aggregate" in `AggregateStorageLocation` implies that, at some
point, the intention may have been to use it also for arrays, but it don't
think it's possible to use it for arrays. Records and arrays are very
different and therefore need to be modeled differently. Records have a fixed
set of named fields, which can have different type; arrays have a variable
number of elements, but they all have the same type.
- Futhermore, "aggregate" has a very specific meaning in C++
(https://en.cppreference.com/w/cpp/language/aggregate_initialization).
Aggregates of class type may not have any user-declared or inherited
constructors, no private or protected non-static data members, no virtual
member functions, and so on, but we use `AggregateStorageLocations` to model all objects of class type.
In addition, for consistency, we also rename the following:
- `getAggregateLoc()` (in `RecordValue`, formerly known as `StructValue`) to
simply `getLoc()`.
- `refreshStructValue()` to `refreshRecordValue()`
We keep the old names around as deprecated synonyms to enable clients to be migrated to the new names.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D156788
2023-08-01 13:23:37 +00:00
|
|
|
if (RecordValue *RecordVal = dyn_cast<RecordValue>(SubExprVal)) {
|
|
|
|
Env.setStorageLocation(*S, RecordVal->getLoc());
|
2023-07-20 11:12:39 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
StorageLocation &Loc = Env.createStorageLocation(*S);
|
2023-05-22 06:17:17 +00:00
|
|
|
Env.setValue(Loc, *SubExprVal);
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setStorageLocation(*S, Loc);
|
2022-01-13 13:53:52 +00:00
|
|
|
}
|
|
|
|
|
2022-01-14 18:27:39 +00:00
|
|
|
void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *S) {
|
|
|
|
const Expr *SubExpr = S->getSubExpr();
|
|
|
|
assert(SubExpr != nullptr);
|
2022-01-04 13:47:14 +00:00
|
|
|
|
2023-05-22 06:17:17 +00:00
|
|
|
propagateValue(*SubExpr, *S, Env);
|
2022-01-14 18:27:39 +00:00
|
|
|
}
|
2022-01-13 13:53:52 +00:00
|
|
|
|
2022-01-14 18:27:39 +00:00
|
|
|
void VisitCXXStaticCastExpr(const CXXStaticCastExpr *S) {
|
|
|
|
if (S->getCastKind() == CK_NoOp) {
|
|
|
|
const Expr *SubExpr = S->getSubExpr();
|
|
|
|
assert(SubExpr != nullptr);
|
2022-01-04 13:47:14 +00:00
|
|
|
|
2023-05-22 06:17:17 +00:00
|
|
|
propagateValueOrStorageLocation(*SubExpr, *S, Env);
|
2022-01-04 13:47:14 +00:00
|
|
|
}
|
2021-12-29 11:31:02 +00:00
|
|
|
}
|
|
|
|
|
2022-01-19 13:56:21 +00:00
|
|
|
void VisitConditionalOperator(const ConditionalOperator *S) {
|
|
|
|
// FIXME: Revisit this once flow conditions are added to the framework. For
|
|
|
|
// `a = b ? c : d` we can add `b => a == c && !b => a == d` to the flow
|
|
|
|
// condition.
|
2023-11-22 16:34:24 +01:00
|
|
|
// When we do this, we will need to retrieve the values of the operands from
|
|
|
|
// the environments for the basic blocks they are computed in, in a similar
|
|
|
|
// way to how this is done for short-circuited logical operators in
|
|
|
|
// `getLogicOperatorSubExprValue()`.
|
2023-07-20 11:12:39 +00:00
|
|
|
if (S->isGLValue())
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setStorageLocation(*S, Env.createObject(S->getType()));
|
2023-07-20 11:12:39 +00:00
|
|
|
else if (Value *Val = Env.createValue(S->getType()))
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, *Val);
|
2022-01-19 13:56:21 +00:00
|
|
|
}
|
|
|
|
|
2022-01-24 16:17:22 +00:00
|
|
|
void VisitInitListExpr(const InitListExpr *S) {
|
|
|
|
QualType Type = S->getType();
|
|
|
|
|
2023-07-20 11:12:39 +00:00
|
|
|
if (!Type->isStructureOrClassType()) {
|
|
|
|
if (auto *Val = Env.createValue(Type))
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, *Val);
|
2022-01-24 16:17:22 +00:00
|
|
|
|
|
|
|
return;
|
2023-07-20 11:12:39 +00:00
|
|
|
}
|
2022-01-24 16:17:22 +00:00
|
|
|
|
2023-07-21 12:51:01 +00:00
|
|
|
// In case the initializer list is transparent, we just need to propagate
|
|
|
|
// the value that it contains.
|
|
|
|
if (S->isSemanticForm() && S->isTransparent()) {
|
|
|
|
propagateValue(*S->getInit(0), *S, Env);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-07-20 11:12:39 +00:00
|
|
|
llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
|
|
|
|
|
2023-09-07 07:02:57 +00:00
|
|
|
// This only contains the direct fields for the given type.
|
|
|
|
std::vector<FieldDecl *> FieldsForInit =
|
|
|
|
getFieldsForInitListExpr(Type->getAsRecordDecl());
|
2023-07-20 11:12:39 +00:00
|
|
|
|
2023-09-07 07:02:57 +00:00
|
|
|
// `S->inits()` contains all the initializer epressions, including the
|
|
|
|
// ones for direct base classes.
|
|
|
|
auto Inits = S->inits();
|
|
|
|
size_t InitIdx = 0;
|
|
|
|
|
|
|
|
// Initialize base classes.
|
|
|
|
if (auto* R = S->getType()->getAsCXXRecordDecl()) {
|
|
|
|
assert(FieldsForInit.size() + R->getNumBases() == Inits.size());
|
|
|
|
for ([[maybe_unused]] const CXXBaseSpecifier &Base : R->bases()) {
|
|
|
|
assert(InitIdx < Inits.size());
|
|
|
|
auto Init = Inits[InitIdx++];
|
|
|
|
assert(Base.getType().getCanonicalType() ==
|
|
|
|
Init->getType().getCanonicalType());
|
2023-12-21 09:02:20 +01:00
|
|
|
auto *BaseVal = Env.get<RecordValue>(*Init);
|
2023-09-07 07:02:57 +00:00
|
|
|
if (!BaseVal)
|
|
|
|
BaseVal = cast<RecordValue>(Env.createValue(Init->getType()));
|
|
|
|
// Take ownership of the fields of the `RecordValue` for the base class
|
|
|
|
// and incorporate them into the "flattened" set of fields for the
|
|
|
|
// derived class.
|
|
|
|
auto Children = BaseVal->getLoc().children();
|
|
|
|
FieldLocs.insert(Children.begin(), Children.end());
|
|
|
|
}
|
2022-01-24 16:17:22 +00:00
|
|
|
}
|
2023-07-20 11:12:39 +00:00
|
|
|
|
2023-09-07 07:02:57 +00:00
|
|
|
assert(FieldsForInit.size() == Inits.size() - InitIdx);
|
|
|
|
for (auto Field : FieldsForInit) {
|
|
|
|
assert(InitIdx < Inits.size());
|
|
|
|
auto Init = Inits[InitIdx++];
|
|
|
|
assert(
|
|
|
|
// The types are same, or
|
|
|
|
Field->getType().getCanonicalType().getUnqualifiedType() ==
|
2023-11-09 10:57:04 -05:00
|
|
|
Init->getType().getCanonicalType().getUnqualifiedType() ||
|
2023-09-07 07:02:57 +00:00
|
|
|
// The field's type is T&, and initializer is T
|
|
|
|
(Field->getType()->isReferenceType() &&
|
2023-11-09 10:57:04 -05:00
|
|
|
Field->getType().getCanonicalType()->getPointeeType() ==
|
|
|
|
Init->getType().getCanonicalType()));
|
2023-09-07 07:02:57 +00:00
|
|
|
auto& Loc = Env.createObject(Field->getType(), Init);
|
|
|
|
FieldLocs.insert({Field, &Loc});
|
|
|
|
}
|
|
|
|
|
2023-09-26 08:49:11 +02:00
|
|
|
// Check that we satisfy the invariant that a `RecordStorageLoation`
|
|
|
|
// contains exactly the set of modeled fields for that type.
|
|
|
|
// `ModeledFields` includes fields from all the bases, but only the
|
|
|
|
// modeled ones. However, if a class type is initialized with an
|
|
|
|
// `InitListExpr`, all fields in the class, including those from base
|
|
|
|
// classes, are included in the set of modeled fields. The code above
|
|
|
|
// should therefore populate exactly the modeled fields.
|
[clang][dataflow] Add synthetic fields to `RecordStorageLocation` (#73860)
Synthetic fields are intended to model the internal state of a class
(e.g. the value stored in a `std::optional`) without having to depend on
that class's implementation details.
Today, this is typically done with properties on `RecordValue`s, but
these have several drawbacks:
* Care must be taken to call `refreshRecordValue()` before modifying a
property so that the modified property values aren’t seen by other
environments that may have access to the same `RecordValue`.
* Properties aren’t associated with a storage location. If an analysis
needs to associate a location with the value stored in a property (e.g.
to model the reference returned by `std::optional::value()`), it needs
to manually add an indirection using a `PointerValue`. (See for example
the way this is done in UncheckedOptionalAccessModel.cpp, specifically
in `maybeInitializeOptionalValueMember()`.)
* Properties don’t participate in the builtin compare, join, and widen
operations. If an analysis needs to apply these operations to
properties, it needs to override the corresponding methods of
`ValueModel`.
* Longer-term, we plan to eliminate `RecordValue`, as by-value
operations on records aren’t really “a thing” in C++ (see
https://discourse.llvm.org/t/70086#changed-structvalue-api-14). This
would obviously eliminate the ability to set properties on
`RecordValue`s.
To demonstrate the advantages of synthetic fields, this patch converts
UncheckedOptionalAccessModel.cpp to synthetic fields. This greatly
simplifies the implementation of the check.
This PR is pretty big; to make it easier to review, I have broken it
down into a stack of three commits, each of which contains a set of
logically related changes. I considered submitting each of these as a
separate PR, but the commits only really make sense when taken together.
To review, I suggest first looking at the changes in
UncheckedOptionalAccessModel.cpp. This gives a flavor for how the
various API changes work together in the context of an analysis. Then,
review the rest of the changes.
2023-12-04 09:29:22 +01:00
|
|
|
assert(containsSameFields(
|
|
|
|
Env.getDataflowAnalysisContext().getModeledFields(Type), FieldLocs));
|
|
|
|
|
|
|
|
RecordStorageLocation::SyntheticFieldMap SyntheticFieldLocs;
|
|
|
|
for (const auto &Entry :
|
|
|
|
Env.getDataflowAnalysisContext().getSyntheticFields(Type)) {
|
|
|
|
SyntheticFieldLocs.insert(
|
|
|
|
{Entry.getKey(), &Env.createObject(Entry.getValue())});
|
|
|
|
}
|
|
|
|
|
|
|
|
auto &Loc = Env.getDataflowAnalysisContext().createRecordStorageLocation(
|
|
|
|
Type, std::move(FieldLocs), std::move(SyntheticFieldLocs));
|
[clang][dataflow] Rename `AggregateStorageLocation` to `RecordStorageLocation` and `StructValue` to `RecordValue`.
- Both of these constructs are used to represent structs, classes, and unions;
Clang uses the collective term "record" for these.
- The term "aggregate" in `AggregateStorageLocation` implies that, at some
point, the intention may have been to use it also for arrays, but it don't
think it's possible to use it for arrays. Records and arrays are very
different and therefore need to be modeled differently. Records have a fixed
set of named fields, which can have different type; arrays have a variable
number of elements, but they all have the same type.
- Futhermore, "aggregate" has a very specific meaning in C++
(https://en.cppreference.com/w/cpp/language/aggregate_initialization).
Aggregates of class type may not have any user-declared or inherited
constructors, no private or protected non-static data members, no virtual
member functions, and so on, but we use `AggregateStorageLocations` to model all objects of class type.
In addition, for consistency, we also rename the following:
- `getAggregateLoc()` (in `RecordValue`, formerly known as `StructValue`) to
simply `getLoc()`.
- `refreshStructValue()` to `refreshRecordValue()`
We keep the old names around as deprecated synonyms to enable clients to be migrated to the new names.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D156788
2023-08-01 13:23:37 +00:00
|
|
|
RecordValue &RecordVal = Env.create<RecordValue>(Loc);
|
2023-07-20 11:12:39 +00:00
|
|
|
|
[clang][dataflow] Rename `AggregateStorageLocation` to `RecordStorageLocation` and `StructValue` to `RecordValue`.
- Both of these constructs are used to represent structs, classes, and unions;
Clang uses the collective term "record" for these.
- The term "aggregate" in `AggregateStorageLocation` implies that, at some
point, the intention may have been to use it also for arrays, but it don't
think it's possible to use it for arrays. Records and arrays are very
different and therefore need to be modeled differently. Records have a fixed
set of named fields, which can have different type; arrays have a variable
number of elements, but they all have the same type.
- Futhermore, "aggregate" has a very specific meaning in C++
(https://en.cppreference.com/w/cpp/language/aggregate_initialization).
Aggregates of class type may not have any user-declared or inherited
constructors, no private or protected non-static data members, no virtual
member functions, and so on, but we use `AggregateStorageLocations` to model all objects of class type.
In addition, for consistency, we also rename the following:
- `getAggregateLoc()` (in `RecordValue`, formerly known as `StructValue`) to
simply `getLoc()`.
- `refreshStructValue()` to `refreshRecordValue()`
We keep the old names around as deprecated synonyms to enable clients to be migrated to the new names.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D156788
2023-08-01 13:23:37 +00:00
|
|
|
Env.setValue(Loc, RecordVal);
|
2023-07-20 11:12:39 +00:00
|
|
|
|
[clang][dataflow] Rename `AggregateStorageLocation` to `RecordStorageLocation` and `StructValue` to `RecordValue`.
- Both of these constructs are used to represent structs, classes, and unions;
Clang uses the collective term "record" for these.
- The term "aggregate" in `AggregateStorageLocation` implies that, at some
point, the intention may have been to use it also for arrays, but it don't
think it's possible to use it for arrays. Records and arrays are very
different and therefore need to be modeled differently. Records have a fixed
set of named fields, which can have different type; arrays have a variable
number of elements, but they all have the same type.
- Futhermore, "aggregate" has a very specific meaning in C++
(https://en.cppreference.com/w/cpp/language/aggregate_initialization).
Aggregates of class type may not have any user-declared or inherited
constructors, no private or protected non-static data members, no virtual
member functions, and so on, but we use `AggregateStorageLocations` to model all objects of class type.
In addition, for consistency, we also rename the following:
- `getAggregateLoc()` (in `RecordValue`, formerly known as `StructValue`) to
simply `getLoc()`.
- `refreshStructValue()` to `refreshRecordValue()`
We keep the old names around as deprecated synonyms to enable clients to be migrated to the new names.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D156788
2023-08-01 13:23:37 +00:00
|
|
|
Env.setValue(*S, RecordVal);
|
2023-07-20 11:12:39 +00:00
|
|
|
|
2022-01-24 16:17:22 +00:00
|
|
|
// FIXME: Implement array initialization.
|
|
|
|
}
|
|
|
|
|
2022-01-26 12:10:38 +00:00
|
|
|
void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *S) {
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, Env.getBoolLiteralValue(S->getValue()));
|
2022-01-26 12:10:38 +00:00
|
|
|
}
|
2022-01-14 18:27:39 +00:00
|
|
|
|
2023-06-15 19:07:05 +00:00
|
|
|
void VisitIntegerLiteral(const IntegerLiteral *S) {
|
2023-07-31 12:37:01 +00:00
|
|
|
Env.setValue(*S, Env.getIntLiteralValue(S->getValue()));
|
2023-06-15 19:07:05 +00:00
|
|
|
}
|
|
|
|
|
2022-05-04 21:08:43 +00:00
|
|
|
void VisitParenExpr(const ParenExpr *S) {
|
|
|
|
// The CFG does not contain `ParenExpr` as top-level statements in basic
|
|
|
|
// blocks, however manual traversal to sub-expressions may encounter them.
|
|
|
|
// Redirect to the sub-expression.
|
|
|
|
auto *SubExpr = S->getSubExpr();
|
|
|
|
assert(SubExpr != nullptr);
|
|
|
|
Visit(SubExpr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void VisitExprWithCleanups(const ExprWithCleanups *S) {
|
|
|
|
// The CFG does not contain `ExprWithCleanups` as top-level statements in
|
|
|
|
// basic blocks, however manual traversal to sub-expressions may encounter
|
|
|
|
// them. Redirect to the sub-expression.
|
|
|
|
auto *SubExpr = S->getSubExpr();
|
|
|
|
assert(SubExpr != nullptr);
|
|
|
|
Visit(SubExpr);
|
|
|
|
}
|
|
|
|
|
2022-01-14 18:27:39 +00:00
|
|
|
private:
|
2023-05-25 09:22:37 +00:00
|
|
|
/// Returns the value for the sub-expression `SubExpr` of a logic operator.
|
|
|
|
BoolValue &getLogicOperatorSubExprValue(const Expr &SubExpr) {
|
2022-03-11 11:52:53 +00:00
|
|
|
// `SubExpr` and its parent logic operator might be part of different basic
|
|
|
|
// blocks. We try to access the value that is assigned to `SubExpr` in the
|
|
|
|
// corresponding environment.
|
2023-05-25 09:22:37 +00:00
|
|
|
if (const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr))
|
|
|
|
if (auto *Val =
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
dyn_cast_or_null<BoolValue>(SubExprEnv->getValue(SubExpr)))
|
2023-05-25 09:22:37 +00:00
|
|
|
return *Val;
|
|
|
|
|
|
|
|
// The sub-expression may lie within a basic block that isn't reachable,
|
|
|
|
// even if we need it to evaluate the current (reachable) expression
|
|
|
|
// (see https://discourse.llvm.org/t/70775). In this case, visit `SubExpr`
|
|
|
|
// within the current environment and then try to get the value that gets
|
|
|
|
// assigned to it.
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
if (Env.getValue(SubExpr) == nullptr)
|
2022-05-17 18:08:25 +00:00
|
|
|
Visit(&SubExpr);
|
[clang][dataflow] Reverse course on `getValue()` deprecation.
In the [value categories RFC](https://discourse.llvm.org/t/70086), I proposed that the end state of the migration should be that `getValue()` should only be legal to call on prvalues.
As a stepping stone, to allow migrating off existing calls to `getValue()`, I proposed introducing `getValueStrict()`, which would already have the new semantics.
However, I've now reconsidered this. Any expression, whether prvalue or glvalue, has a value, so really there isn't any reason to forbid calling `getValue()` on glvalues. I'm therefore removing the deprecation from `getValue()` and transitioning existing `getValueStrict()` calls back to `getValue()`.
The other "strict" accessors are a different case. `setValueStrict()` should only be called on prvalues because glvalues need to have a storage location associated with them; it doesn't make sense to only set a value for them. And, of course, `getStorageLocationStrict()` and `setStorageLocationStrict()` should obviously only be called on glvalues because prvalues don't have storage locations.
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D155921
2023-07-26 12:30:53 +00:00
|
|
|
if (auto *Val = dyn_cast_or_null<BoolValue>(Env.getValue(SubExpr)))
|
2023-05-25 09:22:37 +00:00
|
|
|
return *Val;
|
2022-03-11 11:52:53 +00:00
|
|
|
|
|
|
|
// If the value of `SubExpr` is still unknown, we create a fresh symbolic
|
|
|
|
// boolean value for it.
|
2023-05-25 09:22:37 +00:00
|
|
|
return Env.makeAtomicBoolValue();
|
2022-03-11 11:52:53 +00:00
|
|
|
}
|
|
|
|
|
2022-08-10 14:01:18 +00:00
|
|
|
// If context sensitivity is enabled, try to analyze the body of the callee
|
|
|
|
// `F` of `S`. The type `E` must be either `CallExpr` or `CXXConstructExpr`.
|
|
|
|
template <typename E>
|
|
|
|
void transferInlineCall(const E *S, const FunctionDecl *F) {
|
2023-05-02 00:08:30 +00:00
|
|
|
const auto &Options = Env.getDataflowAnalysisContext().getOptions();
|
2022-08-15 19:58:23 +00:00
|
|
|
if (!(Options.ContextSensitiveOpts &&
|
|
|
|
Env.canDescend(Options.ContextSensitiveOpts->Depth, F)))
|
2022-08-10 14:01:18 +00:00
|
|
|
return;
|
|
|
|
|
2023-05-02 00:08:30 +00:00
|
|
|
const ControlFlowContext *CFCtx =
|
|
|
|
Env.getDataflowAnalysisContext().getControlFlowContext(F);
|
2022-08-10 14:01:18 +00:00
|
|
|
if (!CFCtx)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// FIXME: We don't support context-sensitive analysis of recursion, so
|
|
|
|
// we should return early here if `F` is the same as the `FunctionDecl`
|
|
|
|
// holding `S` itself.
|
|
|
|
|
|
|
|
auto ExitBlock = CFCtx->getCFG().getExit().getBlockID();
|
|
|
|
|
|
|
|
auto CalleeEnv = Env.pushCall(S);
|
|
|
|
|
|
|
|
// FIXME: Use the same analysis as the caller for the callee. Note,
|
|
|
|
// though, that doing so would require support for changing the analysis's
|
|
|
|
// ASTContext.
|
2023-08-02 07:32:00 +00:00
|
|
|
auto Analysis = NoopAnalysis(CFCtx->getDecl().getASTContext(),
|
2022-08-15 19:58:23 +00:00
|
|
|
DataflowAnalysisOptions{Options});
|
2022-08-10 14:01:18 +00:00
|
|
|
|
|
|
|
auto BlockToOutputState =
|
|
|
|
dataflow::runDataflowAnalysis(*CFCtx, Analysis, CalleeEnv);
|
|
|
|
assert(BlockToOutputState);
|
|
|
|
assert(ExitBlock < BlockToOutputState->size());
|
|
|
|
|
2023-06-24 02:45:17 +02:00
|
|
|
auto &ExitState = (*BlockToOutputState)[ExitBlock];
|
2022-08-10 14:01:18 +00:00
|
|
|
assert(ExitState);
|
|
|
|
|
2023-05-23 09:35:52 +00:00
|
|
|
Env.popCall(S, ExitState->Env);
|
2022-08-10 14:01:18 +00:00
|
|
|
}
|
|
|
|
|
2022-02-16 16:47:37 +00:00
|
|
|
const StmtToEnvMap &StmtToEnv;
|
2021-12-29 11:31:02 +00:00
|
|
|
Environment &Env;
|
|
|
|
};
|
|
|
|
|
2023-03-28 08:07:51 +00:00
|
|
|
} // namespace
|
|
|
|
|
2022-12-27 17:34:30 +00:00
|
|
|
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env) {
|
|
|
|
TransferVisitor(StmtToEnv, Env).Visit(&S);
|
2021-12-29 11:31:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace dataflow
|
|
|
|
} // namespace clang
|