[libc++] Fix {std, ranges}::copy for vector<bool> with small storage types (#131545)

The current implementation of `{std, ranges}::copy` fails to copy
`vector<bool>` correctly when the underlying storage type
(`__storage_type`) is smaller than `int`, such as `unsigned char`,
`unsigned short`, `uint8_t` and `uint16_t`. The root cause is that the
unsigned small storage type undergoes integer promotion to (signed)
`int`, which is then left and right shifted, leading to UB (before
C++20) and sign-bit extension (since C++20) respectively. As a result,
the underlying bit mask evaluations become incorrect, causing erroneous
copying behavior.

This patch resolves the issue by correcting the internal bitwise
operations, ensuring that `{std, ranges}::copy` operates correctly for
`vector<bool>` with any custom (unsigned) storage types.

Fixes #131692.
This commit is contained in:
Peng Liu 2025-03-19 11:55:51 -04:00 committed by GitHub
parent 480202f0d1
commit a5b3d3a03f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 308 additions and 10 deletions

View File

@ -53,7 +53,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
unsigned __clz = __bits_per_word - __first.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
__storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __first.__ctz_);
__storage_type __b = *__first.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b;
@ -73,7 +73,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
// do last word
if (__n > 0) {
__first.__seg_ += __nw;
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
__storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
__storage_type __b = *__first.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b;
@ -98,11 +98,11 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
unsigned __clz_f = __bits_per_word - __first.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
__storage_type __m = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_);
__storage_type __b = *__first.__seg_ & __m;
unsigned __clz_r = __bits_per_word - __result.__ctz_;
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
__m = std::__middle_mask<__storage_type>(__clz_r - __ddn, __result.__ctz_);
*__result.__seg_ &= ~__m;
if (__result.__ctz_ > __first.__ctz_)
*__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_);
@ -112,7 +112,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
__dn -= __ddn;
if (__dn > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
__m = std::__trailing_mask<__storage_type>(__bits_per_word - __dn);
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b >> (__first.__ctz_ + __ddn);
__result.__ctz_ = static_cast<unsigned>(__dn);
@ -123,7 +123,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
// __first.__ctz_ == 0;
// do middle words
unsigned __clz_r = __bits_per_word - __result.__ctz_;
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
__storage_type __m = std::__leading_mask<__storage_type>(__result.__ctz_);
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
__storage_type __b = *__first.__seg_;
*__result.__seg_ &= ~__m;
@ -134,17 +134,17 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
}
// do last word
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
__m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
__storage_type __b = *__first.__seg_ & __m;
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
__m = std::__middle_mask<__storage_type>(__clz_r - __dn, __result.__ctz_);
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b << __result.__ctz_;
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
__n -= __dn;
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
__m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b >> __dn;
__result.__ctz_ = static_cast<unsigned>(__n);

View File

@ -28,7 +28,7 @@ struct __size_difference_type_traits;
template <class _StoragePointer>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
__fill_masked_range(_StoragePointer __word, unsigned __ctz, unsigned __clz, bool __fill_val);
__fill_masked_range(_StoragePointer __word, unsigned __clz, unsigned __ctz, bool __fill_val);
template <class _StorageType>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _StorageType __trailing_mask(unsigned __clz);

View File

@ -12,10 +12,13 @@
// constexpr OutIter // constexpr after C++17
// copy(InIter first, InIter last, OutIter result);
// XFAIL: FROZEN-CXX03-HEADERS-FIXME
#include <algorithm>
#include <cassert>
#include <vector>
#include "sized_allocator.h"
#include "test_macros.h"
#include "test_iterators.h"
#include "type_algorithms.h"
@ -109,6 +112,153 @@ TEST_CONSTEXPR_CXX20 bool test() {
assert(test_vector_bool(256));
}
// Validate std::copy with std::vector<bool> iterators and custom storage types.
// Ensure that assigned bits hold the intended values, while unassigned bits stay unchanged.
// Related issue: https://github.com/llvm/llvm-project/issues/131692.
{
//// Tests for std::copy with aligned bits
{ // Test the first (partial) word for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(8, false, Alloc(1));
std::vector<bool, Alloc> out(8, true, Alloc(1));
std::copy(in.begin() + 1, in.begin() + 2, out.begin() + 1); // out[1] = false
assert(out[1] == false);
for (std::size_t i = 0; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
if (i != 1)
assert(out[i] == true);
}
{ // Test the last (partial) word for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(8, false, Alloc(1));
std::vector<bool, Alloc> out(8, true, Alloc(1));
std::copy(in.begin(), in.begin() + 1, out.begin()); // out[0] = false
assert(out[0] == false);
for (std::size_t i = 1; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
assert(out[i] == true);
}
{ // Test middle (whole) words for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(32, true, Alloc(1));
for (std::size_t i = 0; i < in.size(); i += 2)
in[i] = false;
std::vector<bool, Alloc> out(32, false, Alloc(1));
std::copy(in.begin() + 4, in.end() - 4, out.begin() + 4);
for (std::size_t i = 4; i < static_cast<std::size_t>(in.size() - 4); ++i)
assert(in[i] == out[i]);
for (std::size_t i = 0; i < 4; ++i)
assert(out[i] == false);
for (std::size_t i = 28; i < out.size(); ++i)
assert(out[i] == false);
}
{ // Test the first (partial) word for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(16, false, Alloc(1));
std::vector<bool, Alloc> out(16, true, Alloc(1));
std::copy(in.begin() + 1, in.begin() + 3, out.begin() + 1); // out[1..2] = false
assert(out[1] == false);
assert(out[2] == false);
for (std::size_t i = 0; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
if (i != 1 && i != 2)
assert(out[i] == true);
}
{ // Test the last (partial) word for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(16, false, Alloc(1));
std::vector<bool, Alloc> out(16, true, Alloc(1));
std::copy(in.begin(), in.begin() + 2, out.begin()); // out[0..1] = false
assert(out[0] == false);
assert(out[1] == false);
for (std::size_t i = 2; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
assert(out[i] == true);
}
{ // Test middle (whole) words for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(64, true, Alloc(1));
for (std::size_t i = 0; i < in.size(); i += 2)
in[i] = false;
std::vector<bool, Alloc> out(64, false, Alloc(1));
std::copy(in.begin() + 8, in.end() - 8, out.begin() + 8);
for (std::size_t i = 8; i < static_cast<std::size_t>(in.size() - 8); ++i)
assert(in[i] == out[i]);
for (std::size_t i = 0; i < 8; ++i)
assert(out[i] == false);
for (std::size_t i = static_cast<std::size_t>(out.size() - 8); i < out.size(); ++i)
assert(out[i] == false);
}
//// Tests for std::copy with unaligned bits
{ // Test the first (partial) word for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(8, false, Alloc(1));
std::vector<bool, Alloc> out(8, true, Alloc(1));
std::copy(in.begin() + 7, in.end(), out.begin()); // out[0] = false
assert(out[0] == false);
for (std::size_t i = 1; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
assert(out[i] == true);
}
{ // Test the last (partial) word for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(8, false, Alloc(1));
std::vector<bool, Alloc> out(8, true, Alloc(1));
std::copy(in.begin(), in.begin() + 1, out.begin() + 2); // out[2] = false
assert(out[2] == false);
for (std::size_t i = 1; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
if (i != 2)
assert(out[i] == true);
}
{ // Test middle (whole) words for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(36, true, Alloc(1));
for (std::size_t i = 0; i < in.size(); i += 2)
in[i] = false;
std::vector<bool, Alloc> out(40, false, Alloc(1));
std::copy(in.begin(), in.end(), out.begin() + 4);
for (std::size_t i = 0; i < in.size(); ++i)
assert(in[i] == out[i + 4]);
for (std::size_t i = 0; i < 4; ++i)
assert(out[i] == false);
}
{ // Test the first (partial) word for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(16, false, Alloc(1));
std::vector<bool, Alloc> out(16, true, Alloc(1));
std::copy(in.begin() + 14, in.end(), out.begin()); // out[0..1] = false
assert(out[0] == false);
assert(out[1] == false);
for (std::size_t i = 2; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
assert(out[i] == true);
}
{ // Test the last (partial) word for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(16, false, Alloc(1));
std::vector<bool, Alloc> out(16, true, Alloc(1));
std::copy(in.begin(), in.begin() + 2, out.begin() + 1); // out[1..2] = false
assert(out[1] == false);
assert(out[2] == false);
for (std::size_t i = 0; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
if (i != 1 && i != 2)
assert(out[i] == true);
}
{ // Test middle (whole) words for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(72, true, Alloc(1));
for (std::size_t i = 0; i < in.size(); i += 2)
in[i] = false;
std::vector<bool, Alloc> out(80, false, Alloc(1));
std::copy(in.begin(), in.end(), out.begin() + 4);
for (std::size_t i = 0; i < in.size(); ++i)
assert(in[i] == out[i + 4]);
for (std::size_t i = 0; i < 4; ++i)
assert(out[i] == false);
for (std::size_t i = in.size() + 4; i < out.size(); ++i)
assert(out[i] == false);
}
}
return true;
}

View File

@ -25,6 +25,7 @@
#include <vector>
#include "almost_satisfies_types.h"
#include "sized_allocator.h"
#include "test_iterators.h"
#include "test_macros.h"
#include "type_algorithms.h"
@ -237,6 +238,153 @@ constexpr bool test() {
assert(test_vector_bool(199));
assert(test_vector_bool(256));
}
// Validate std::ranges::copy with std::vector<bool> iterators and custom storage types.
// Ensure that assigned bits hold the intended values, while unassigned bits stay unchanged.
// Related issue: https://github.com/llvm/llvm-project/issues/131692.
{
//// Tests for std::ranges::copy with aligned bits
{ // Test the first (partial) word for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(8, false, Alloc(1));
std::vector<bool, Alloc> out(8, true, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin() + 1, in.begin() + 2), out.begin() + 1); // out[1] = false
assert(out[1] == false);
for (std::size_t i = 0; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
if (i != 1)
assert(out[i] == true);
}
{ // Test the last (partial) word for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(8, false, Alloc(1));
std::vector<bool, Alloc> out(8, true, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin(), in.begin() + 1), out.begin()); // out[0] = false
assert(out[0] == false);
for (std::size_t i = 1; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
assert(out[i] == true);
}
{ // Test middle (whole) words for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(32, true, Alloc(1));
for (std::size_t i = 0; i < in.size(); i += 2)
in[i] = false;
std::vector<bool, Alloc> out(32, false, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin() + 4, in.end() - 4), out.begin() + 4);
for (std::size_t i = 4; i < static_cast<std::size_t>(in.size() - 4); ++i)
assert(in[i] == out[i]);
for (std::size_t i = 0; i < 4; ++i)
assert(out[i] == false);
for (std::size_t i = 28; i < out.size(); ++i)
assert(out[i] == false);
}
{ // Test the first (partial) word for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(16, false, Alloc(1));
std::vector<bool, Alloc> out(16, true, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin() + 1, in.begin() + 3), out.begin() + 1); // out[1..2] = false
assert(out[1] == false);
assert(out[2] == false);
for (std::size_t i = 0; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
if (i != 1 && i != 2)
assert(out[i] == true);
}
{ // Test the last (partial) word for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(16, false, Alloc(1));
std::vector<bool, Alloc> out(16, true, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin(), in.begin() + 2), out.begin()); // out[0..1] = false
assert(out[0] == false);
assert(out[1] == false);
for (std::size_t i = 2; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
assert(out[i] == true);
}
{ // Test middle (whole) words for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(64, true, Alloc(1));
for (std::size_t i = 0; i < in.size(); i += 2)
in[i] = false;
std::vector<bool, Alloc> out(64, false, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin() + 8, in.end() - 8), out.begin() + 8);
for (std::size_t i = 8; i < static_cast<std::size_t>(in.size() - 8); ++i)
assert(in[i] == out[i]);
for (std::size_t i = 0; i < 8; ++i)
assert(out[i] == false);
for (std::size_t i = static_cast<std::size_t>(out.size() - 8); i < out.size(); ++i)
assert(out[i] == false);
}
//// Tests for std::ranges::copy with unaligned bits
{ // Test the first (partial) word for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(8, false, Alloc(1));
std::vector<bool, Alloc> out(8, true, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin() + 7, in.end()), out.begin()); // out[0] = false
assert(out[0] == false);
for (std::size_t i = 1; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
assert(out[i] == true);
}
{ // Test the last (partial) word for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(8, false, Alloc(1));
std::vector<bool, Alloc> out(8, true, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin(), in.begin() + 1), out.begin() + 2); // out[2] = false
assert(out[2] == false);
for (std::size_t i = 1; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
if (i != 2)
assert(out[i] == true);
}
{ // Test middle (whole) words for uint8_t
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
std::vector<bool, Alloc> in(36, true, Alloc(1));
for (std::size_t i = 0; i < in.size(); i += 2)
in[i] = false;
std::vector<bool, Alloc> out(40, false, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin(), in.end()), out.begin() + 4);
for (std::size_t i = 0; i < in.size(); ++i)
assert(in[i] == out[i + 4]);
for (std::size_t i = 0; i < 4; ++i)
assert(out[i] == false);
}
{ // Test the first (partial) word for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(16, false, Alloc(1));
std::vector<bool, Alloc> out(16, true, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin() + 14, in.end()), out.begin()); // out[0..1] = false
assert(out[0] == false);
assert(out[1] == false);
for (std::size_t i = 2; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
assert(out[i] == true);
}
{ // Test the last (partial) word for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(16, false, Alloc(1));
std::vector<bool, Alloc> out(16, true, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin(), in.begin() + 2), out.begin() + 1); // out[1..2] = false
assert(out[1] == false);
assert(out[2] == false);
for (std::size_t i = 0; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
if (i != 1 && i != 2)
assert(out[i] == true);
}
{ // Test middle (whole) words for uint16_t
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
std::vector<bool, Alloc> in(72, true, Alloc(1));
for (std::size_t i = 0; i < in.size(); i += 2)
in[i] = false;
std::vector<bool, Alloc> out(80, false, Alloc(1));
std::ranges::copy(std::ranges::subrange(in.begin(), in.end()), out.begin() + 4);
for (std::size_t i = 0; i < in.size(); ++i)
assert(in[i] == out[i + 4]);
for (std::size_t i = 0; i < 4; ++i)
assert(out[i] == false);
for (std::size_t i = in.size() + 4; i < out.size(); ++i)
assert(out[i] == false);
}
}
#endif
return true;