[OffloadBundler] Expose function to parse compressed bundle headers (#130284)

In COMGR we hash the header of compressed bundles. For this we take the
first bytes of the buffer (according to the maximum header size) and
hash them.

To have a more stable API, and to be able to pick only the hash field (which is
the only one we are actually interested in) of the header, we propose
a version independent header version that is common to all versions.
This commit is contained in:
Juan Manuel Martinez Caamaño 2025-03-19 09:10:40 +01:00 committed by GitHub
parent e9988c36ed
commit 614d8557dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 127 additions and 101 deletions

View File

@ -107,50 +107,20 @@ struct OffloadTargetInfo {
// - Compressed Data (variable length).
class CompressedOffloadBundle {
private:
static inline const size_t MagicSize = 4;
static inline const size_t VersionFieldSize = sizeof(uint16_t);
static inline const size_t MethodFieldSize = sizeof(uint16_t);
// Legacy size fields for V1/V2
static inline const size_t FileSizeFieldSizeV2 = sizeof(uint32_t);
static inline const size_t UncompressedSizeFieldSizeV2 = sizeof(uint32_t);
// New size fields for V3
static inline const size_t FileSizeFieldSizeV3 = sizeof(uint64_t);
static inline const size_t UncompressedSizeFieldSizeV3 = sizeof(uint64_t);
static inline const size_t HashFieldSize = sizeof(uint64_t);
// Keep V1 header size for backward compatibility
static inline const size_t V1HeaderSize =
MagicSize + VersionFieldSize + MethodFieldSize +
UncompressedSizeFieldSizeV2 + HashFieldSize;
// Keep V2 header size for backward compatibility
static inline const size_t V2HeaderSize =
MagicSize + VersionFieldSize + FileSizeFieldSizeV2 + MethodFieldSize +
UncompressedSizeFieldSizeV2 + HashFieldSize;
// Add V3 header size with 64-bit fields
static inline const size_t V3HeaderSize =
MagicSize + VersionFieldSize + FileSizeFieldSizeV3 + MethodFieldSize +
UncompressedSizeFieldSizeV3 + HashFieldSize;
static inline const llvm::StringRef MagicNumber = "CCOB";
public:
static inline const uint16_t DefaultVersion = 2;
struct CompressedBundleHeader {
unsigned Version;
llvm::compression::Format CompressionFormat;
std::optional<size_t> FileSize;
size_t UncompressedFileSize;
uint64_t Hash;
// Helper method to get header size based on version
static size_t getHeaderSize(uint16_t Version) {
switch (Version) {
case 1:
return V1HeaderSize;
case 2:
return V2HeaderSize;
case 3:
return V3HeaderSize;
default:
llvm_unreachable("Unsupported version");
}
}
static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
};
static inline const uint16_t DefaultVersion = 2;
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,

View File

@ -29,6 +29,7 @@
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
@ -1127,13 +1128,116 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
}
// Use packed structs to avoid padding, such that the structs map the serialized
// format.
LLVM_PACKED_START
union RawCompressedBundleHeader {
struct CommonFields {
uint32_t Magic;
uint16_t Version;
uint16_t Method;
};
struct V1Header {
CommonFields Common;
uint32_t UncompressedFileSize;
uint64_t Hash;
};
struct V2Header {
CommonFields Common;
uint32_t FileSize;
uint32_t UncompressedFileSize;
uint64_t Hash;
};
struct V3Header {
CommonFields Common;
uint64_t FileSize;
uint64_t UncompressedFileSize;
uint64_t Hash;
};
CommonFields Common;
V1Header V1;
V2Header V2;
V3Header V3;
};
LLVM_PACKED_END
// Helper method to get header size based on version
static size_t getHeaderSize(uint16_t Version) {
switch (Version) {
case 1:
return sizeof(RawCompressedBundleHeader::V1Header);
case 2:
return sizeof(RawCompressedBundleHeader::V2Header);
case 3:
return sizeof(RawCompressedBundleHeader::V3Header);
default:
llvm_unreachable("Unsupported version");
}
}
Expected<CompressedOffloadBundle::CompressedBundleHeader>
CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
assert(Blob.size() >= sizeof(RawCompressedBundleHeader::CommonFields));
assert(llvm::identify_magic(Blob) ==
llvm::file_magic::offload_bundle_compressed);
RawCompressedBundleHeader Header;
memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));
CompressedBundleHeader Normalized;
Normalized.Version = Header.Common.Version;
size_t RequiredSize = getHeaderSize(Normalized.Version);
if (Blob.size() < RequiredSize)
return createStringError(inconvertibleErrorCode(),
"Compressed bundle header size too small");
switch (Normalized.Version) {
case 1:
Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize;
Normalized.Hash = Header.V1.Hash;
break;
case 2:
Normalized.FileSize = Header.V2.FileSize;
Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize;
Normalized.Hash = Header.V2.Hash;
break;
case 3:
Normalized.FileSize = Header.V3.FileSize;
Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize;
Normalized.Hash = Header.V3.Hash;
break;
default:
return createStringError(inconvertibleErrorCode(),
"Unknown compressed bundle version");
}
// Determine compression format
switch (Header.Common.Method) {
case static_cast<uint16_t>(compression::Format::Zlib):
case static_cast<uint16_t>(compression::Format::Zstd):
Normalized.CompressionFormat =
static_cast<compression::Format>(Header.Common.Method);
break;
default:
return createStringError(inconvertibleErrorCode(),
"Unknown compressing method");
}
return Normalized;
}
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
bool Verbose) {
StringRef Blob = Input.getBuffer();
// Check minimum header size (using V1 as it's the smallest)
if (Blob.size() < V1HeaderSize)
if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields))
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
if (llvm::identify_magic(Blob) !=
@ -1143,68 +1247,20 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
}
size_t CurrentOffset = MagicSize;
Expected<CompressedBundleHeader> HeaderOrErr =
CompressedBundleHeader::tryParse(Blob);
if (!HeaderOrErr)
return HeaderOrErr.takeError();
// Read version
uint16_t ThisVersion;
memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
CurrentOffset += VersionFieldSize;
const CompressedBundleHeader &Normalized = *HeaderOrErr;
unsigned ThisVersion = Normalized.Version;
size_t HeaderSize = getHeaderSize(ThisVersion);
// Verify header size based on version
if (ThisVersion >= 2 && ThisVersion <= 3) {
size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize;
if (Blob.size() < RequiredSize)
return createStringError(inconvertibleErrorCode(),
"Compressed bundle header size too small");
}
llvm::compression::Format CompressionFormat = Normalized.CompressionFormat;
// Read compression method
uint16_t CompressionMethod;
memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
CurrentOffset += MethodFieldSize;
// Read total file size (version 2+)
uint64_t TotalFileSize = 0;
if (ThisVersion >= 2) {
if (ThisVersion == 2) {
uint32_t TotalFileSize32;
memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
TotalFileSize = TotalFileSize32;
CurrentOffset += FileSizeFieldSizeV2;
} else { // Version 3
memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += FileSizeFieldSizeV3;
}
}
// Read uncompressed size
uint64_t UncompressedSize = 0;
if (ThisVersion <= 2) {
uint32_t UncompressedSize32;
memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
UncompressedSize = UncompressedSize32;
CurrentOffset += UncompressedSizeFieldSizeV2;
} else { // Version 3
memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += UncompressedSizeFieldSizeV3;
}
// Read hash
uint64_t StoredHash;
memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
CurrentOffset += HashFieldSize;
// Determine compression format
llvm::compression::Format CompressionFormat;
if (CompressionMethod ==
static_cast<uint16_t>(llvm::compression::Format::Zlib))
CompressionFormat = llvm::compression::Format::Zlib;
else if (CompressionMethod ==
static_cast<uint16_t>(llvm::compression::Format::Zstd))
CompressionFormat = llvm::compression::Format::Zstd;
else
return createStringError(inconvertibleErrorCode(),
"Unknown compressing method");
size_t TotalFileSize = Normalized.FileSize.value_or(0);
size_t UncompressedSize = Normalized.UncompressedFileSize;
auto StoredHash = Normalized.Hash;
llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
*ClangOffloadBundlerTimerGroup);
@ -1212,7 +1268,7 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
DecompressTimer.startTimer();
SmallVector<uint8_t, 0> DecompressedData;
StringRef CompressedData = Blob.substr(CurrentOffset);
StringRef CompressedData = Blob.substr(HeaderSize);
if (llvm::Error DecompressionError = llvm::compression::decompress(
CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
DecompressedData, UncompressedSize))