mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-16 13:46:37 +00:00
[OffloadBundler] Expose function to parse compressed bundle headers (#130284)
In COMGR we hash the header of compressed bundles. For this we take the first bytes of the buffer (according to the maximum header size) and hash them. To have a more stable API, and to be able to pick only the hash field (which is the only one we are actually interested in) of the header, we propose a version independent header version that is common to all versions.
This commit is contained in:
parent
e9988c36ed
commit
614d8557dc
@ -107,50 +107,20 @@ struct OffloadTargetInfo {
|
||||
// - Compressed Data (variable length).
|
||||
class CompressedOffloadBundle {
|
||||
private:
|
||||
static inline const size_t MagicSize = 4;
|
||||
static inline const size_t VersionFieldSize = sizeof(uint16_t);
|
||||
static inline const size_t MethodFieldSize = sizeof(uint16_t);
|
||||
// Legacy size fields for V1/V2
|
||||
static inline const size_t FileSizeFieldSizeV2 = sizeof(uint32_t);
|
||||
static inline const size_t UncompressedSizeFieldSizeV2 = sizeof(uint32_t);
|
||||
// New size fields for V3
|
||||
static inline const size_t FileSizeFieldSizeV3 = sizeof(uint64_t);
|
||||
static inline const size_t UncompressedSizeFieldSizeV3 = sizeof(uint64_t);
|
||||
static inline const size_t HashFieldSize = sizeof(uint64_t);
|
||||
|
||||
// Keep V1 header size for backward compatibility
|
||||
static inline const size_t V1HeaderSize =
|
||||
MagicSize + VersionFieldSize + MethodFieldSize +
|
||||
UncompressedSizeFieldSizeV2 + HashFieldSize;
|
||||
|
||||
// Keep V2 header size for backward compatibility
|
||||
static inline const size_t V2HeaderSize =
|
||||
MagicSize + VersionFieldSize + FileSizeFieldSizeV2 + MethodFieldSize +
|
||||
UncompressedSizeFieldSizeV2 + HashFieldSize;
|
||||
|
||||
// Add V3 header size with 64-bit fields
|
||||
static inline const size_t V3HeaderSize =
|
||||
MagicSize + VersionFieldSize + FileSizeFieldSizeV3 + MethodFieldSize +
|
||||
UncompressedSizeFieldSizeV3 + HashFieldSize;
|
||||
|
||||
static inline const llvm::StringRef MagicNumber = "CCOB";
|
||||
|
||||
public:
|
||||
static inline const uint16_t DefaultVersion = 2;
|
||||
struct CompressedBundleHeader {
|
||||
unsigned Version;
|
||||
llvm::compression::Format CompressionFormat;
|
||||
std::optional<size_t> FileSize;
|
||||
size_t UncompressedFileSize;
|
||||
uint64_t Hash;
|
||||
|
||||
// Helper method to get header size based on version
|
||||
static size_t getHeaderSize(uint16_t Version) {
|
||||
switch (Version) {
|
||||
case 1:
|
||||
return V1HeaderSize;
|
||||
case 2:
|
||||
return V2HeaderSize;
|
||||
case 3:
|
||||
return V3HeaderSize;
|
||||
default:
|
||||
llvm_unreachable("Unsupported version");
|
||||
}
|
||||
}
|
||||
static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
|
||||
};
|
||||
|
||||
static inline const uint16_t DefaultVersion = 2;
|
||||
|
||||
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
|
||||
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "llvm/Object/Binary.h"
|
||||
#include "llvm/Object/ObjectFile.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/Compression.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/EndianStream.h"
|
||||
@ -1127,13 +1128,116 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
|
||||
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
|
||||
}
|
||||
|
||||
// Use packed structs to avoid padding, such that the structs map the serialized
|
||||
// format.
|
||||
LLVM_PACKED_START
|
||||
union RawCompressedBundleHeader {
|
||||
struct CommonFields {
|
||||
uint32_t Magic;
|
||||
uint16_t Version;
|
||||
uint16_t Method;
|
||||
};
|
||||
|
||||
struct V1Header {
|
||||
CommonFields Common;
|
||||
uint32_t UncompressedFileSize;
|
||||
uint64_t Hash;
|
||||
};
|
||||
|
||||
struct V2Header {
|
||||
CommonFields Common;
|
||||
uint32_t FileSize;
|
||||
uint32_t UncompressedFileSize;
|
||||
uint64_t Hash;
|
||||
};
|
||||
|
||||
struct V3Header {
|
||||
CommonFields Common;
|
||||
uint64_t FileSize;
|
||||
uint64_t UncompressedFileSize;
|
||||
uint64_t Hash;
|
||||
};
|
||||
|
||||
CommonFields Common;
|
||||
V1Header V1;
|
||||
V2Header V2;
|
||||
V3Header V3;
|
||||
};
|
||||
LLVM_PACKED_END
|
||||
|
||||
// Helper method to get header size based on version
|
||||
static size_t getHeaderSize(uint16_t Version) {
|
||||
switch (Version) {
|
||||
case 1:
|
||||
return sizeof(RawCompressedBundleHeader::V1Header);
|
||||
case 2:
|
||||
return sizeof(RawCompressedBundleHeader::V2Header);
|
||||
case 3:
|
||||
return sizeof(RawCompressedBundleHeader::V3Header);
|
||||
default:
|
||||
llvm_unreachable("Unsupported version");
|
||||
}
|
||||
}
|
||||
|
||||
Expected<CompressedOffloadBundle::CompressedBundleHeader>
|
||||
CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
|
||||
assert(Blob.size() >= sizeof(RawCompressedBundleHeader::CommonFields));
|
||||
assert(llvm::identify_magic(Blob) ==
|
||||
llvm::file_magic::offload_bundle_compressed);
|
||||
|
||||
RawCompressedBundleHeader Header;
|
||||
memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));
|
||||
|
||||
CompressedBundleHeader Normalized;
|
||||
Normalized.Version = Header.Common.Version;
|
||||
|
||||
size_t RequiredSize = getHeaderSize(Normalized.Version);
|
||||
if (Blob.size() < RequiredSize)
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"Compressed bundle header size too small");
|
||||
|
||||
switch (Normalized.Version) {
|
||||
case 1:
|
||||
Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize;
|
||||
Normalized.Hash = Header.V1.Hash;
|
||||
break;
|
||||
case 2:
|
||||
Normalized.FileSize = Header.V2.FileSize;
|
||||
Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize;
|
||||
Normalized.Hash = Header.V2.Hash;
|
||||
break;
|
||||
case 3:
|
||||
Normalized.FileSize = Header.V3.FileSize;
|
||||
Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize;
|
||||
Normalized.Hash = Header.V3.Hash;
|
||||
break;
|
||||
default:
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"Unknown compressed bundle version");
|
||||
}
|
||||
|
||||
// Determine compression format
|
||||
switch (Header.Common.Method) {
|
||||
case static_cast<uint16_t>(compression::Format::Zlib):
|
||||
case static_cast<uint16_t>(compression::Format::Zstd):
|
||||
Normalized.CompressionFormat =
|
||||
static_cast<compression::Format>(Header.Common.Method);
|
||||
break;
|
||||
default:
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"Unknown compressing method");
|
||||
}
|
||||
|
||||
return Normalized;
|
||||
}
|
||||
|
||||
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
|
||||
CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
|
||||
bool Verbose) {
|
||||
StringRef Blob = Input.getBuffer();
|
||||
|
||||
// Check minimum header size (using V1 as it's the smallest)
|
||||
if (Blob.size() < V1HeaderSize)
|
||||
if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields))
|
||||
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
|
||||
|
||||
if (llvm::identify_magic(Blob) !=
|
||||
@ -1143,68 +1247,20 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
|
||||
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
|
||||
}
|
||||
|
||||
size_t CurrentOffset = MagicSize;
|
||||
Expected<CompressedBundleHeader> HeaderOrErr =
|
||||
CompressedBundleHeader::tryParse(Blob);
|
||||
if (!HeaderOrErr)
|
||||
return HeaderOrErr.takeError();
|
||||
|
||||
// Read version
|
||||
uint16_t ThisVersion;
|
||||
memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
|
||||
CurrentOffset += VersionFieldSize;
|
||||
const CompressedBundleHeader &Normalized = *HeaderOrErr;
|
||||
unsigned ThisVersion = Normalized.Version;
|
||||
size_t HeaderSize = getHeaderSize(ThisVersion);
|
||||
|
||||
// Verify header size based on version
|
||||
if (ThisVersion >= 2 && ThisVersion <= 3) {
|
||||
size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize;
|
||||
if (Blob.size() < RequiredSize)
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"Compressed bundle header size too small");
|
||||
}
|
||||
llvm::compression::Format CompressionFormat = Normalized.CompressionFormat;
|
||||
|
||||
// Read compression method
|
||||
uint16_t CompressionMethod;
|
||||
memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
|
||||
CurrentOffset += MethodFieldSize;
|
||||
|
||||
// Read total file size (version 2+)
|
||||
uint64_t TotalFileSize = 0;
|
||||
if (ThisVersion >= 2) {
|
||||
if (ThisVersion == 2) {
|
||||
uint32_t TotalFileSize32;
|
||||
memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
|
||||
TotalFileSize = TotalFileSize32;
|
||||
CurrentOffset += FileSizeFieldSizeV2;
|
||||
} else { // Version 3
|
||||
memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
|
||||
CurrentOffset += FileSizeFieldSizeV3;
|
||||
}
|
||||
}
|
||||
|
||||
// Read uncompressed size
|
||||
uint64_t UncompressedSize = 0;
|
||||
if (ThisVersion <= 2) {
|
||||
uint32_t UncompressedSize32;
|
||||
memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
|
||||
UncompressedSize = UncompressedSize32;
|
||||
CurrentOffset += UncompressedSizeFieldSizeV2;
|
||||
} else { // Version 3
|
||||
memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
|
||||
CurrentOffset += UncompressedSizeFieldSizeV3;
|
||||
}
|
||||
|
||||
// Read hash
|
||||
uint64_t StoredHash;
|
||||
memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
|
||||
CurrentOffset += HashFieldSize;
|
||||
|
||||
// Determine compression format
|
||||
llvm::compression::Format CompressionFormat;
|
||||
if (CompressionMethod ==
|
||||
static_cast<uint16_t>(llvm::compression::Format::Zlib))
|
||||
CompressionFormat = llvm::compression::Format::Zlib;
|
||||
else if (CompressionMethod ==
|
||||
static_cast<uint16_t>(llvm::compression::Format::Zstd))
|
||||
CompressionFormat = llvm::compression::Format::Zstd;
|
||||
else
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"Unknown compressing method");
|
||||
size_t TotalFileSize = Normalized.FileSize.value_or(0);
|
||||
size_t UncompressedSize = Normalized.UncompressedFileSize;
|
||||
auto StoredHash = Normalized.Hash;
|
||||
|
||||
llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
|
||||
*ClangOffloadBundlerTimerGroup);
|
||||
@ -1212,7 +1268,7 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
|
||||
DecompressTimer.startTimer();
|
||||
|
||||
SmallVector<uint8_t, 0> DecompressedData;
|
||||
StringRef CompressedData = Blob.substr(CurrentOffset);
|
||||
StringRef CompressedData = Blob.substr(HeaderSize);
|
||||
if (llvm::Error DecompressionError = llvm::compression::decompress(
|
||||
CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
|
||||
DecompressedData, UncompressedSize))
|
||||
|
Loading…
x
Reference in New Issue
Block a user