about summary refs log tree commit diff
path: root/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch
diff options
context:
space:
mode:
Diffstat (limited to 'pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch')
-rw-r--r--pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch1191
1 files changed, 1191 insertions, 0 deletions
diff --git a/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch b/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch
new file mode 100644
index 0000000000000..8589ac527dcb8
--- /dev/null
+++ b/pkgs/development/rocm-modules/6/llvm/add-compression-to-clang-offload-bundler.patch
@@ -0,0 +1,1191 @@
+From 058d9ba2f54a8c7c47b6522db808db87fed82e78 Mon Sep 17 00:00:00 2001
+From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
+Date: Thu, 4 Apr 2024 03:25:28 +0200
+Subject: [PATCH] backport 7e2823438e920d25364ff92b62ad90020c31bb59
+
+Reland "[HIP] Support compressing device binary"
+
+Original PR: https://github.com/llvm/llvm-project/pull/67162
+
+The commit was reverted due to UB detected by santizer:
+
+https://lab.llvm.org/buildbot/#/builders/238/builds/5955
+
+clang/lib/Driver/OffloadBundler.cpp:1012:25: runtime error:
+ load of misaligned address 0xaaaae2d90e7c for type
+ 'const uint64_t' (aka 'const unsigned long'), which
+ requires 8 byte alignment
+
+It was fixed by using memcpy instead of dereferencing int*
+casted from unaligned char*.
+
+Co-Authored-By: Martin Schwaighofer <mschwaig@users.noreply.github.com>
+(only did the backport)
+---
+ clang/docs/ClangOffloadBundler.rst            |  27 ++
+ clang/include/clang/Driver/OffloadBundler.h   |  37 ++
+ clang/include/clang/Driver/Options.td         |   5 +
+ clang/lib/Driver/OffloadBundler.cpp           | 346 +++++++++++++++---
+ clang/lib/Driver/ToolChains/Clang.cpp         |   7 +
+ clang/lib/Driver/ToolChains/HIPUtility.cpp    |   6 +
+ .../test/Driver/clang-offload-bundler-zlib.c  |  75 ++++
+ .../test/Driver/clang-offload-bundler-zstd.c  |  72 ++++
+ .../test/Driver/hip-offload-compress-zlib.hip |  45 +++
+ .../test/Driver/hip-offload-compress-zstd.hip |  45 +++
+ .../clang-offload-bundler/CMakeLists.txt      |   1 +
+ .../ClangOffloadBundler.cpp                   |  10 +
+ llvm/include/llvm/BinaryFormat/Magic.h        |  28 +-
+ llvm/lib/BinaryFormat/Magic.cpp               |  11 +
+ llvm/lib/Object/Binary.cpp                    |   2 +
+ llvm/lib/Object/ObjectFile.cpp                |   2 +
+ 16 files changed, 659 insertions(+), 60 deletions(-)
+ create mode 100644 clang/test/Driver/clang-offload-bundler-zlib.c
+ create mode 100644 clang/test/Driver/clang-offload-bundler-zstd.c
+ create mode 100644 clang/test/Driver/hip-offload-compress-zlib.hip
+ create mode 100644 clang/test/Driver/hip-offload-compress-zstd.hip
+
+diff --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst
+index 432da787249b..d47997bf718d 100644
+--- a/clang/docs/ClangOffloadBundler.rst
++++ b/clang/docs/ClangOffloadBundler.rst
+@@ -498,3 +498,30 @@ target by comparing bundle ID's. Two bundle ID's are considered compatible if:
+   Verbose printing of matched/unmatched comparisons between bundle entry id of
+   a device binary from HDA and bundle entry ID of a given target processor
+   (see :ref:`compatibility-bundle-entry-id`).
++
++Compression and Decompression
++=============================
++
++``clang-offload-bundler`` provides features to compress and decompress the full
++bundle, leveraging inherent redundancies within the bundle entries. Use the
++`-compress` command-line option to enable this compression capability.
++
++The compressed offload bundle begins with a header followed by the compressed binary data:
++
++- **Magic Number (4 bytes)**:
++    This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).
++
++- **Version Number (16-bit unsigned int)**:
++    This denotes the version of the compressed offload bundle format. The current version is `1`.
++
++- **Compression Method (16-bit unsigned int)**:
++    This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.
++
++- **Uncompressed Binary Size (32-bit unsigned int)**:
++    This is the size (in bytes) of the binary data before it was compressed.
++
++- **Hash (64-bit unsigned int)**:
++    This is a 64-bit truncated MD5 hash of the uncompressed binary data. It serves for verification and caching purposes.
++
++- **Compressed Data**:
++    The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.
+diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
+index fe263f0540b9..fc96f200414d 100644
+--- a/clang/include/clang/Driver/OffloadBundler.h
++++ b/clang/include/clang/Driver/OffloadBundler.h
+@@ -19,6 +19,7 @@
+ 
+ #include "llvm/Support/Error.h"
+ #include "llvm/TargetParser/Triple.h"
++#include <llvm/Support/MemoryBuffer.h>
+ #include <string>
+ #include <vector>
+ 
+@@ -26,11 +27,15 @@ namespace clang {
+ 
+ class OffloadBundlerConfig {
+ public:
++  OffloadBundlerConfig();
++
+   bool AllowNoHost = false;
+   bool AllowMissingBundles = false;
+   bool CheckInputArchive = false;
+   bool PrintExternalCommands = false;
+   bool HipOpenmpCompatible = false;
++  bool Compress = false;
++  bool Verbose = false;
+ 
+   unsigned BundleAlignment = 1;
+   unsigned HostInputIndex = ~0u;
+@@ -82,6 +87,38 @@ struct OffloadTargetInfo {
+   std::string str() const;
+ };
+ 
++// CompressedOffloadBundle represents the format for the compressed offload
++// bundles.
++//
++// The format is as follows:
++// - Magic Number (4 bytes) - A constant "CCOB".
++// - Version (2 bytes)
++// - Compression Method (2 bytes) - Uses the values from
++// llvm::compression::Format.
++// - Uncompressed Size (4 bytes).
++// - Truncated MD5 Hash (8 bytes).
++// - Compressed Data (variable length).
++
++class CompressedOffloadBundle {
++private:
++  static inline const size_t MagicSize = 4;
++  static inline const size_t VersionFieldSize = sizeof(uint16_t);
++  static inline const size_t MethodFieldSize = sizeof(uint16_t);
++  static inline const size_t SizeFieldSize = sizeof(uint32_t);
++  static inline const size_t HashFieldSize = 8;
++  static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
++                                          MethodFieldSize + SizeFieldSize +
++                                          HashFieldSize;
++  static inline const llvm::StringRef MagicNumber = "CCOB";
++  static inline const uint16_t Version = 1;
++
++public:
++  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
++  compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
++  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
++  decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
++};
++
+ } // namespace clang
+ 
+ #endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
+diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
+index c206ab84508f..f8df98678396 100644
+--- a/clang/include/clang/Driver/Options.td
++++ b/clang/include/clang/Driver/Options.td
+@@ -984,6 +984,11 @@ def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group<f_Group>
+ def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
+   InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
+                               "'-aux-target-cpu' and '-aux-target-feature'.">;
++
++def offload_compress : Flag<["--"], "offload-compress">,
++  HelpText<"Compress offload device binaries (HIP only)">;
++def no_offload_compress : Flag<["--"], "no-offload-compress">;
++
+ def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOption]>,
+   HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
+ def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>,
+diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
+index 29250c49eb0c..69e14679da1c 100644
+--- a/clang/lib/Driver/OffloadBundler.cpp
++++ b/clang/lib/Driver/OffloadBundler.cpp
+@@ -21,24 +21,29 @@
+ #include "llvm/ADT/ArrayRef.h"
+ #include "llvm/ADT/SmallString.h"
+ #include "llvm/ADT/SmallVector.h"
++#include "llvm/ADT/StringExtras.h"
+ #include "llvm/ADT/StringMap.h"
+ #include "llvm/ADT/StringRef.h"
++#include "llvm/BinaryFormat/Magic.h"
+ #include "llvm/Object/Archive.h"
+ #include "llvm/Object/ArchiveWriter.h"
+ #include "llvm/Object/Binary.h"
+ #include "llvm/Object/ObjectFile.h"
+ #include "llvm/Support/Casting.h"
++#include "llvm/Support/Compression.h"
+ #include "llvm/Support/Debug.h"
+ #include "llvm/Support/EndianStream.h"
+ #include "llvm/Support/Errc.h"
+ #include "llvm/Support/Error.h"
+ #include "llvm/Support/ErrorOr.h"
+ #include "llvm/Support/FileSystem.h"
++#include "llvm/Support/MD5.h"
+ #include "llvm/Support/MemoryBuffer.h"
+ #include "llvm/Support/Path.h"
+ #include "llvm/Support/Program.h"
+ #include "llvm/Support/Signals.h"
+ #include "llvm/Support/StringSaver.h"
++#include "llvm/Support/Timer.h"
+ #include "llvm/Support/WithColor.h"
+ #include "llvm/Support/raw_ostream.h"
+ #include "llvm/TargetParser/Host.h"
+@@ -48,6 +53,7 @@
+ #include <cstddef>
+ #include <cstdint>
+ #include <forward_list>
++#include <llvm/Support/Process.h>
+ #include <memory>
+ #include <set>
+ #include <string>
+@@ -58,6 +64,10 @@ using namespace llvm;
+ using namespace llvm::object;
+ using namespace clang;
+ 
++static llvm::TimerGroup
++    ClangOffloadBundlerTimerGroup("Clang Offload Bundler Timer Group",
++                                  "Timer group for clang offload bundler");
++
+ /// Magic string that marks the existence of offloading data.
+ #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
+ 
+@@ -229,20 +239,22 @@ public:
+ 
+   /// Write the header of the bundled file to \a OS based on the information
+   /// gathered from \a Inputs.
+-  virtual Error WriteHeader(raw_fd_ostream &OS,
++  virtual Error WriteHeader(raw_ostream &OS,
+                             ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) = 0;
+ 
+   /// Write the marker that initiates a bundle for the triple \a TargetTriple to
+   /// \a OS.
+-  virtual Error WriteBundleStart(raw_fd_ostream &OS,
+-                                 StringRef TargetTriple) = 0;
++  virtual Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) = 0;
+ 
+   /// Write the marker that closes a bundle for the triple \a TargetTriple to \a
+   /// OS.
+-  virtual Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0;
++  virtual Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) = 0;
+ 
+   /// Write the bundle from \a Input into \a OS.
+-  virtual Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0;
++  virtual Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) = 0;
++
++  /// Finalize output file.
++  virtual Error finalizeOutputFile() { return Error::success(); }
+ 
+   /// List bundle IDs in \a Input.
+   virtual Error listBundleIDs(MemoryBuffer &Input) {
+@@ -330,7 +342,7 @@ static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) {
+ }
+ 
+ /// Write 8-byte integers to a buffer in little-endian format.
+-static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) {
++static void Write8byteIntegerToBuffer(raw_ostream &OS, uint64_t Val) {
+   llvm::support::endian::write(OS, Val, llvm::support::little);
+ }
+ 
+@@ -378,8 +390,7 @@ public:
+       return Error::success();
+ 
+     // Check if no magic was found.
+-    StringRef Magic(FC.data(), sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
+-    if (!Magic.equals(OFFLOAD_BUNDLER_MAGIC_STR))
++    if (llvm::identify_magic(FC) != llvm::file_magic::offload_bundle)
+       return Error::success();
+ 
+     // Read number of bundles.
+@@ -454,7 +465,7 @@ public:
+     return Error::success();
+   }
+ 
+-  Error WriteHeader(raw_fd_ostream &OS,
++  Error WriteHeader(raw_ostream &OS,
+                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
+ 
+     // Compute size of the header.
+@@ -491,19 +502,27 @@ public:
+     return Error::success();
+   }
+ 
+-  Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
++  Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
+     CurWriteBundleTarget = TargetTriple.str();
+     return Error::success();
+   }
+ 
+-  Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
++  Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
+     return Error::success();
+   }
+ 
+-  Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
++  Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
+     auto BI = BundlesInfo[CurWriteBundleTarget];
+-    OS.seek(BI.Offset);
++
++    // Pad with 0 to reach specified offset.
++    size_t CurrentPos = OS.tell();
++    size_t PaddingSize = BI.Offset > CurrentPos ? BI.Offset - CurrentPos : 0;
++    for (size_t I = 0; I < PaddingSize; ++I)
++      OS.write('\0');
++    assert(OS.tell() == BI.Offset);
++
+     OS.write(Input.getBufferStart(), Input.getBufferSize());
++
+     return Error::success();
+   }
+ };
+@@ -560,7 +579,7 @@ class ObjectFileHandler final : public FileHandler {
+       return NameOrErr.takeError();
+ 
+     // If it does not start with the reserved suffix, just skip this section.
+-    if (!NameOrErr->startswith(OFFLOAD_BUNDLER_MAGIC_STR))
++    if (llvm::identify_magic(*NameOrErr) != llvm::file_magic::offload_bundle)
+       return std::nullopt;
+ 
+     // Return the triple that is right after the reserved prefix.
+@@ -625,7 +644,7 @@ public:
+     return Error::success();
+   }
+ 
+-  Error WriteHeader(raw_fd_ostream &OS,
++  Error WriteHeader(raw_ostream &OS,
+                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
+     assert(BundlerConfig.HostInputIndex != ~0u &&
+            "Host input index not defined.");
+@@ -635,12 +654,16 @@ public:
+     return Error::success();
+   }
+ 
+-  Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
++  Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
+     ++NumberOfProcessedInputs;
+     return Error::success();
+   }
+ 
+-  Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
++  Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
++    return Error::success();
++  }
++
++  Error finalizeOutputFile() final {
+     assert(NumberOfProcessedInputs <= NumberOfInputs &&
+            "Processing more inputs that actually exist!");
+     assert(BundlerConfig.HostInputIndex != ~0u &&
+@@ -658,10 +681,6 @@ public:
+     assert(BundlerConfig.ObjcopyPath != "" &&
+            "llvm-objcopy path not specified");
+ 
+-    // We write to the output file directly. So, we close it and use the name
+-    // to pass down to llvm-objcopy.
+-    OS.close();
+-
+     // Temporary files that need to be removed.
+     TempFileHandlerRAII TempFiles;
+ 
+@@ -702,7 +721,7 @@ public:
+     return Error::success();
+   }
+ 
+-  Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
++  Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
+     return Error::success();
+   }
+ 
+@@ -799,22 +818,22 @@ protected:
+     return Error::success();
+   }
+ 
+-  Error WriteHeader(raw_fd_ostream &OS,
++  Error WriteHeader(raw_ostream &OS,
+                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
+     return Error::success();
+   }
+ 
+-  Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
++  Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
+     OS << BundleStartString << TargetTriple << "\n";
+     return Error::success();
+   }
+ 
+-  Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
++  Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
+     OS << BundleEndString << TargetTriple << "\n";
+     return Error::success();
+   }
+ 
+-  Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
++  Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
+     OS << Input.getBuffer();
+     return Error::success();
+   }
+@@ -899,6 +918,187 @@ CreateFileHandler(MemoryBuffer &FirstInput,
+                            "'" + FilesType + "': invalid file type specified");
+ }
+ 
++OffloadBundlerConfig::OffloadBundlerConfig() {
++  auto IgnoreEnvVarOpt =
++      llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
++  if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
++    return;
++
++  auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE");
++  if (VerboseEnvVarOpt.has_value())
++    Verbose = VerboseEnvVarOpt.value() == "1";
++
++  auto CompressEnvVarOpt =
++      llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
++  if (CompressEnvVarOpt.has_value())
++    Compress = CompressEnvVarOpt.value() == "1";
++}
++
++llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
++CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
++                                  bool Verbose) {
++  llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
++                        ClangOffloadBundlerTimerGroup);
++  if (Verbose)
++    HashTimer.startTimer();
++  llvm::MD5 Hash;
++  llvm::MD5::MD5Result Result;
++  Hash.update(Input.getBuffer());
++  Hash.final(Result);
++  uint64_t TruncatedHash = Result.low();
++  if (Verbose)
++    HashTimer.stopTimer();
++
++  SmallVector<uint8_t, 0> CompressedBuffer;
++  auto BufferUint8 = llvm::ArrayRef<uint8_t>(
++      reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
++      Input.getBuffer().size());
++
++  llvm::compression::Format CompressionFormat;
++
++  if (llvm::compression::zstd::isAvailable())
++    CompressionFormat = llvm::compression::Format::Zstd;
++  else if (llvm::compression::zlib::isAvailable())
++    CompressionFormat = llvm::compression::Format::Zlib;
++  else
++    return createStringError(llvm::inconvertibleErrorCode(),
++                             "Compression not supported");
++
++  llvm::Timer CompressTimer("Compression Timer", "Compression time",
++                            ClangOffloadBundlerTimerGroup);
++  if (Verbose)
++    CompressTimer.startTimer();
++  llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
++  if (Verbose)
++    CompressTimer.stopTimer();
++
++  uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
++  uint32_t UncompressedSize = Input.getBuffer().size();
++
++  SmallVector<char, 0> FinalBuffer;
++  llvm::raw_svector_ostream OS(FinalBuffer);
++  OS << MagicNumber;
++  OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
++  OS.write(reinterpret_cast<const char *>(&CompressionMethod),
++           sizeof(CompressionMethod));
++  OS.write(reinterpret_cast<const char *>(&UncompressedSize),
++           sizeof(UncompressedSize));
++  OS.write(reinterpret_cast<const char *>(&TruncatedHash),
++           sizeof(TruncatedHash));
++  OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
++           CompressedBuffer.size());
++
++  if (Verbose) {
++    auto MethodUsed =
++        CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
++    llvm::errs() << "Compressed bundle format version: " << Version << "\n"
++                 << "Compression method used: " << MethodUsed << "\n"
++                 << "Binary size before compression: " << UncompressedSize
++                 << " bytes\n"
++                 << "Binary size after compression: " << CompressedBuffer.size()
++                 << " bytes\n"
++                 << "Truncated MD5 hash: "
++                 << llvm::format_hex(TruncatedHash, 16) << "\n";
++  }
++
++  return llvm::MemoryBuffer::getMemBufferCopy(
++      llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
++}
++
++llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
++CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
++                                    bool Verbose) {
++
++  StringRef Blob = Input.getBuffer();
++
++  if (Blob.size() < HeaderSize) {
++    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
++  }
++  if (llvm::identify_magic(Blob) !=
++      llvm::file_magic::offload_bundle_compressed) {
++    if (Verbose)
++      llvm::errs() << "Uncompressed bundle.\n";
++    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
++  }
++
++  uint16_t ThisVersion;
++  uint16_t CompressionMethod;
++  uint32_t UncompressedSize;
++  uint64_t StoredHash;
++  memcpy(&ThisVersion, Input.getBuffer().data() + MagicNumber.size(),
++         sizeof(uint16_t));
++  memcpy(&CompressionMethod, Blob.data() + MagicSize + VersionFieldSize,
++         sizeof(uint16_t));
++  memcpy(&UncompressedSize,
++         Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize,
++         sizeof(uint32_t));
++  memcpy(&StoredHash,
++         Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize +
++             SizeFieldSize,
++         sizeof(uint64_t));
++
++  llvm::compression::Format CompressionFormat;
++  if (CompressionMethod ==
++      static_cast<uint16_t>(llvm::compression::Format::Zlib))
++    CompressionFormat = llvm::compression::Format::Zlib;
++  else if (CompressionMethod ==
++           static_cast<uint16_t>(llvm::compression::Format::Zstd))
++    CompressionFormat = llvm::compression::Format::Zstd;
++  else
++    return createStringError(inconvertibleErrorCode(),
++                             "Unknown compressing method");
++
++  llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
++                              ClangOffloadBundlerTimerGroup);
++  if (Verbose)
++    DecompressTimer.startTimer();
++
++  SmallVector<uint8_t, 0> DecompressedData;
++  StringRef CompressedData = Blob.substr(HeaderSize);
++  if (llvm::Error DecompressionError = llvm::compression::decompress(
++          CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
++          DecompressedData, UncompressedSize))
++    return createStringError(inconvertibleErrorCode(),
++                             "Could not decompress embedded file contents: " +
++                                 llvm::toString(std::move(DecompressionError)));
++
++  if (Verbose) {
++    DecompressTimer.stopTimer();
++
++    // Recalculate MD5 hash
++    llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
++                                "Hash recalculation time",
++                                ClangOffloadBundlerTimerGroup);
++    HashRecalcTimer.startTimer();
++    llvm::MD5 Hash;
++    llvm::MD5::MD5Result Result;
++    Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(),
++                                        DecompressedData.size()));
++    Hash.final(Result);
++    uint64_t RecalculatedHash = Result.low();
++    HashRecalcTimer.stopTimer();
++    bool HashMatch = (StoredHash == RecalculatedHash);
++
++    llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
++                 << "Decompression method: "
++                 << (CompressionFormat == llvm::compression::Format::Zlib
++                         ? "zlib"
++                         : "zstd")
++                 << "\n"
++                 << "Size before decompression: " << CompressedData.size()
++                 << " bytes\n"
++                 << "Size after decompression: " << UncompressedSize
++                 << " bytes\n"
++                 << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
++                 << "Recalculated hash: "
++                 << llvm::format_hex(RecalculatedHash, 16) << "\n"
++                 << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
++  }
++
++  return llvm::MemoryBuffer::getMemBufferCopy(
++      llvm::toStringRef(DecompressedData));
++}
++
+ // List bundle IDs. Return true if an error was found.
+ Error OffloadBundler::ListBundleIDsInFile(
+     StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) {
+@@ -908,28 +1108,35 @@ Error OffloadBundler::ListBundleIDsInFile(
+   if (std::error_code EC = CodeOrErr.getError())
+     return createFileError(InputFileName, EC);
+ 
+-  MemoryBuffer &Input = **CodeOrErr;
++  // Decompress the input if necessary.
++  Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
++      CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
++  if (!DecompressedBufferOrErr)
++    return createStringError(
++        inconvertibleErrorCode(),
++        "Failed to decompress input: " +
++            llvm::toString(DecompressedBufferOrErr.takeError()));
++
++  MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
+ 
+   // Select the right files handler.
+   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
+-      CreateFileHandler(Input, BundlerConfig);
++      CreateFileHandler(DecompressedInput, BundlerConfig);
+   if (!FileHandlerOrErr)
+     return FileHandlerOrErr.takeError();
+ 
+   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
+   assert(FH);
+-  return FH->listBundleIDs(Input);
++  return FH->listBundleIDs(DecompressedInput);
+ }
+ 
+ /// Bundle the files. Return true if an error was found.
+ Error OffloadBundler::BundleFiles() {
+   std::error_code EC;
+ 
+-  // Create output file.
+-  raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
+-                            sys::fs::OF_None);
+-  if (EC)
+-    return createFileError(BundlerConfig.OutputFileNames.front(), EC);
++  // Create a buffer to hold the content before compressing.
++  SmallVector<char, 0> Buffer;
++  llvm::raw_svector_ostream BufferStream(Buffer);
+ 
+   // Open input files.
+   SmallVector<std::unique_ptr<MemoryBuffer>, 8u> InputBuffers;
+@@ -956,22 +1163,46 @@ Error OffloadBundler::BundleFiles() {
+   assert(FH);
+ 
+   // Write header.
+-  if (Error Err = FH->WriteHeader(OutputFile, InputBuffers))
++  if (Error Err = FH->WriteHeader(BufferStream, InputBuffers))
+     return Err;
+ 
+   // Write all bundles along with the start/end markers. If an error was found
+   // writing the end of the bundle component, abort the bundle writing.
+   auto Input = InputBuffers.begin();
+   for (auto &Triple : BundlerConfig.TargetNames) {
+-    if (Error Err = FH->WriteBundleStart(OutputFile, Triple))
++    if (Error Err = FH->WriteBundleStart(BufferStream, Triple))
+       return Err;
+-    if (Error Err = FH->WriteBundle(OutputFile, **Input))
++    if (Error Err = FH->WriteBundle(BufferStream, **Input))
+       return Err;
+-    if (Error Err = FH->WriteBundleEnd(OutputFile, Triple))
++    if (Error Err = FH->WriteBundleEnd(BufferStream, Triple))
+       return Err;
+     ++Input;
+   }
+-  return Error::success();
++
++  raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
++                            sys::fs::OF_None);
++  if (EC)
++    return createFileError(BundlerConfig.OutputFileNames.front(), EC);
++
++  SmallVector<char, 0> CompressedBuffer;
++  if (BundlerConfig.Compress) {
++    std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
++        llvm::MemoryBuffer::getMemBufferCopy(
++            llvm::StringRef(Buffer.data(), Buffer.size()));
++    auto CompressionResult =
++        CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
++    if (auto Error = CompressionResult.takeError())
++      return Error;
++
++    auto CompressedMemBuffer = std::move(CompressionResult.get());
++    CompressedBuffer.assign(CompressedMemBuffer->getBufferStart(),
++                            CompressedMemBuffer->getBufferEnd());
++  } else
++    CompressedBuffer = Buffer;
++
++  OutputFile.write(CompressedBuffer.data(), CompressedBuffer.size());
++
++  return FH->finalizeOutputFile();
+ }
+ 
+ // Unbundle the files. Return true if an error was found.
+@@ -982,7 +1213,16 @@ Error OffloadBundler::UnbundleFiles() {
+   if (std::error_code EC = CodeOrErr.getError())
+     return createFileError(BundlerConfig.InputFileNames.front(), EC);
+ 
+-  MemoryBuffer &Input = **CodeOrErr;
++  // Decompress the input if necessary.
++  Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
++      CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
++  if (!DecompressedBufferOrErr)
++    return createStringError(
++        inconvertibleErrorCode(),
++        "Failed to decompress input: " +
++            llvm::toString(DecompressedBufferOrErr.takeError()));
++
++  MemoryBuffer &Input = **DecompressedBufferOrErr;
+ 
+   // Select the right files handler.
+   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
+@@ -1357,22 +1597,34 @@ Error OffloadBundler::UnbundleArchive() {
+     if (!CodeObjectBufferRefOrErr)
+       return CodeObjectBufferRefOrErr.takeError();
+ 
+-    auto CodeObjectBuffer =
++    auto TempCodeObjectBuffer =
+         MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false);
+ 
++    // Decompress the buffer if necessary.
++    Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
++        CompressedOffloadBundle::decompress(*TempCodeObjectBuffer,
++                                            BundlerConfig.Verbose);
++    if (!DecompressedBufferOrErr)
++      return createStringError(
++          inconvertibleErrorCode(),
++          "Failed to decompress code object: " +
++              llvm::toString(DecompressedBufferOrErr.takeError()));
++
++    MemoryBuffer &CodeObjectBuffer = **DecompressedBufferOrErr;
++
+     Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
+-        CreateFileHandler(*CodeObjectBuffer, BundlerConfig);
++        CreateFileHandler(CodeObjectBuffer, BundlerConfig);
+     if (!FileHandlerOrErr)
+       return FileHandlerOrErr.takeError();
+ 
+     std::unique_ptr<FileHandler> &FileHandler = *FileHandlerOrErr;
+     assert(FileHandler);
+ 
+-    if (Error ReadErr = FileHandler->ReadHeader(*CodeObjectBuffer))
++    if (Error ReadErr = FileHandler->ReadHeader(CodeObjectBuffer))
+       return ReadErr;
+ 
+     Expected<std::optional<StringRef>> CurBundleIDOrErr =
+-        FileHandler->ReadBundleStart(*CodeObjectBuffer);
++        FileHandler->ReadBundleStart(CodeObjectBuffer);
+     if (!CurBundleIDOrErr)
+       return CurBundleIDOrErr.takeError();
+ 
+@@ -1393,7 +1645,7 @@ Error OffloadBundler::UnbundleArchive() {
+                                              BundlerConfig)) {
+         std::string BundleData;
+         raw_string_ostream DataStream(BundleData);
+-        if (Error Err = FileHandler->ReadBundle(DataStream, *CodeObjectBuffer))
++        if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer))
+           return Err;
+ 
+         for (auto &CompatibleTarget : CompatibleTargets) {
+@@ -1431,11 +1683,11 @@ Error OffloadBundler::UnbundleArchive() {
+         }
+       }
+ 
+-      if (Error Err = FileHandler->ReadBundleEnd(*CodeObjectBuffer))
++      if (Error Err = FileHandler->ReadBundleEnd(CodeObjectBuffer))
+         return Err;
+ 
+       Expected<std::optional<StringRef>> NextTripleOrErr =
+-          FileHandler->ReadBundleStart(*CodeObjectBuffer);
++          FileHandler->ReadBundleStart(CodeObjectBuffer);
+       if (!NextTripleOrErr)
+         return NextTripleOrErr.takeError();
+ 
+diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
+index 9e1f07d78bf1..274875c631c5 100644
+--- a/clang/lib/Driver/ToolChains/Clang.cpp
++++ b/clang/lib/Driver/ToolChains/Clang.cpp
+@@ -8400,6 +8400,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
+     }
+     CmdArgs.push_back(TCArgs.MakeArgString(UB));
+   }
++  if (TCArgs.hasFlag(options::OPT_offload_compress,
++                     options::OPT_no_offload_compress, false))
++    CmdArgs.push_back("-compress");
++  if (TCArgs.hasArg(options::OPT_v))
++    CmdArgs.push_back("-verbose");
+   // All the inputs are encoded as commands.
+   C.addCommand(std::make_unique<Command>(
+       JA, *this, ResponseFileSupport::None(),
+@@ -8494,6 +8499,8 @@ void OffloadBundler::ConstructJobMultipleOutputs(
+   }
+   CmdArgs.push_back("-unbundle");
+   CmdArgs.push_back("-allow-missing-bundles");
++  if (TCArgs.hasArg(options::OPT_v))
++    CmdArgs.push_back("-verbose");
+ 
+   // All the inputs are encoded as commands.
+   C.addCommand(std::make_unique<Command>(
+diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp
+index 8b9d8db90ffa..04efdcba20ea 100644
+--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp
++++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp
+@@ -84,6 +84,12 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
+       Args.MakeArgString(std::string("-output=").append(Output));
+   BundlerArgs.push_back(BundlerOutputArg);
+ 
++  if (Args.hasFlag(options::OPT_offload_compress,
++                   options::OPT_no_offload_compress, false))
++    BundlerArgs.push_back("-compress");
++  if (Args.hasArg(options::OPT_v))
++    BundlerArgs.push_back("-verbose");
++
+   const char *Bundler = Args.MakeArgString(
+       T.getToolChain().GetProgramPath("clang-offload-bundler"));
+   C.addCommand(std::make_unique<Command>(
+diff --git a/clang/test/Driver/clang-offload-bundler-zlib.c b/clang/test/Driver/clang-offload-bundler-zlib.c
+new file mode 100644
+index 000000000000..c46c32a4a053
+--- /dev/null
++++ b/clang/test/Driver/clang-offload-bundler-zlib.c
+@@ -0,0 +1,75 @@
++// REQUIRES: zlib
++// REQUIRES: x86-registered-target
++// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
++
++//
++// Generate the host binary to be bundled.
++//
++// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
++
++//
++// Generate an empty file to help with the checks of empty files.
++//
++// RUN: touch %t.empty
++
++//
++// Generate device binaries to be bundled.
++//
++// RUN: echo 'Content of device file 1' > %t.tgt1
++// RUN: echo 'Content of device file 2' > %t.tgt2
++
++//
++// Check compression/decompression of offload bundle.
++//
++// RUN: env OFFLOAD_BUNDLER_COMPRESS=1 OFFLOAD_BUNDLER_VERBOSE=1 \
++// RUN:   clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc 2>&1 | \
++// RUN:   FileCheck -check-prefix=COMPRESS %s
++// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
++// RUN: env OFFLOAD_BUNDLER_VERBOSE=1 \
++// RUN:   clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle 2>&1 | \
++// RUN:   FileCheck -check-prefix=DECOMPRESS %s
++// RUN: diff %t.tgt1 %t.res.tgt1
++// RUN: diff %t.tgt2 %t.res.tgt2
++
++//
++// COMPRESS: Compression method used:
++// DECOMPRESS: Decompression method:
++// NOHOST-NOT: host-
++// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
++// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
++//
++
++//
++// Check -bundle-align option.
++//
++
++// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
++// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
++// RUN: diff %t.bc %t.res.bc
++// RUN: diff %t.tgt1 %t.res.tgt1
++// RUN: diff %t.tgt2 %t.res.tgt2
++
++//
++// Check unbundling archive.
++//
++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
++// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
++// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
++// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
++// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
++// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
++// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
++// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
++// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
++
++// Some code so that we can create a binary out of this file.
++int A = 0;
++void test_func(void) {
++  ++A;
++}
+diff --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c
+new file mode 100644
+index 000000000000..b2b588b72d4d
+--- /dev/null
++++ b/clang/test/Driver/clang-offload-bundler-zstd.c
+@@ -0,0 +1,72 @@
++// REQUIRES: zstd
++// REQUIRES: x86-registered-target
++// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
++
++//
++// Generate the host binary to be bundled.
++//
++// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
++
++//
++// Generate an empty file to help with the checks of empty files.
++//
++// RUN: touch %t.empty
++
++//
++// Generate device binaries to be bundled.
++//
++// RUN: echo 'Content of device file 1' > %t.tgt1
++// RUN: echo 'Content of device file 2' > %t.tgt2
++
++//
++// Check compression/decompression of offload bundle.
++//
++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \
++// RUN:   FileCheck -check-prefix=COMPRESS %s
++// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \
++// RUN:   FileCheck -check-prefix=DECOMPRESS %s
++// RUN: diff %t.tgt1 %t.res.tgt1
++// RUN: diff %t.tgt2 %t.res.tgt2
++//
++// COMPRESS: Compression method used
++// DECOMPRESS: Decompression method
++// NOHOST-NOT: host-
++// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
++// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
++//
++
++//
++// Check -bundle-align option.
++//
++
++// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
++// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
++// RUN: diff %t.bc %t.res.bc
++// RUN: diff %t.tgt1 %t.res.tgt1
++// RUN: diff %t.tgt2 %t.res.tgt2
++
++//
++// Check unbundling archive.
++//
++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
++// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
++// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
++// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
++// RUN:   -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
++// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
++// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
++// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
++// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
++// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
++// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
++
++// Some code so that we can create a binary out of this file.
++int A = 0;
++void test_func(void) {
++  ++A;
++}
+diff --git a/clang/test/Driver/hip-offload-compress-zlib.hip b/clang/test/Driver/hip-offload-compress-zlib.hip
+new file mode 100644
+index 000000000000..a29b6d037350
+--- /dev/null
++++ b/clang/test/Driver/hip-offload-compress-zlib.hip
+@@ -0,0 +1,45 @@
++// REQUIRES: zlib
++// REQUIRES: x86-registered-target
++// REQUIRES: amdgpu-registered-target
++
++// Test compress bundled bitcode.
++
++// RUN: rm -rf %T/a.bc
++// RUN: %clang -c -v --target=x86_64-linux-gnu \
++// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
++// RUN:   -fgpu-rdc -nogpuinc -nogpulib \
++// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
++// RUN:   --offload-compress --offload-device-only --gpu-bundle-output \
++// RUN:   -o %T/a.bc \
++// RUN: 2>&1 | FileCheck %s
++
++// CHECK: clang-offload-bundler{{.*}} -type=bc
++// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
++// CHECK-SAME: -compress -verbose
++// CHECK: Compressed bundle format
++
++// Test uncompress of bundled bitcode.
++
++// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \
++// RUN:   --offload-arch=gfx1100 --offload-arch=gfx1101 \
++// RUN:   -fgpu-rdc -nogpulib \
++// RUN:   %T/a.bc --offload-device-only \
++// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
++
++// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc"
++// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
++// UNBUNDLE-SAME: -unbundle
++// UNBUNDLE-SAME: -verbose
++
++// Test compress bundled code objects.
++
++// RUN: %clang -c -### -v --target=x86_64-linux-gnu \
++// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
++// RUN:   -nogpuinc -nogpulib \
++// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
++// RUN:   --offload-compress \
++// RUN: 2>&1 | FileCheck -check-prefix=CO %s
++
++// CO: clang-offload-bundler{{.*}} "-type=o"
++// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
++// CO-SAME: "-compress" "-verbose"
+diff --git a/clang/test/Driver/hip-offload-compress-zstd.hip b/clang/test/Driver/hip-offload-compress-zstd.hip
+new file mode 100644
+index 000000000000..688c2c85329c
+--- /dev/null
++++ b/clang/test/Driver/hip-offload-compress-zstd.hip
+@@ -0,0 +1,45 @@
++// REQUIRES: zstd
++// REQUIRES: x86-registered-target
++// REQUIRES: amdgpu-registered-target
++
++// Test compress bundled bitcode.
++
++// RUN: rm -rf %T/a.bc
++// RUN: %clang -c -v --target=x86_64-linux-gnu \
++// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
++// RUN:   -fgpu-rdc -nogpuinc -nogpulib \
++// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
++// RUN:   --offload-compress --offload-device-only --gpu-bundle-output \
++// RUN:   -o %T/a.bc \
++// RUN: 2>&1 | FileCheck %s
++
++// CHECK: clang-offload-bundler{{.*}} -type=bc
++// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
++// CHECK-SAME: -compress -verbose
++// CHECK: Compressed bundle format
++
++// Test uncompress of bundled bitcode.
++
++// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \
++// RUN:   --offload-arch=gfx1100 --offload-arch=gfx1101 \
++// RUN:   -fgpu-rdc -nogpulib \
++// RUN:   %T/a.bc --offload-device-only \
++// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
++
++// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc"
++// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
++// UNBUNDLE-SAME: -unbundle
++// UNBUNDLE-SAME: -verbose
++
++// Test compress bundled code objects.
++
++// RUN: %clang -c -### -v --target=x86_64-linux-gnu \
++// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
++// RUN:   -nogpuinc -nogpulib \
++// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
++// RUN:   --offload-compress \
++// RUN: 2>&1 | FileCheck -check-prefix=CO %s
++
++// CO: clang-offload-bundler{{.*}} "-type=o"
++// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
++// CO-SAME: "-compress" "-verbose"
+diff --git a/clang/tools/clang-offload-bundler/CMakeLists.txt b/clang/tools/clang-offload-bundler/CMakeLists.txt
+index dabd82382cdf..dec2881589a5 100644
+--- a/clang/tools/clang-offload-bundler/CMakeLists.txt
++++ b/clang/tools/clang-offload-bundler/CMakeLists.txt
+@@ -1,4 +1,5 @@
+ set(LLVM_LINK_COMPONENTS
++  BinaryFormat
+   Object
+   Support
+   TargetParser
+diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+index c02b5854bded..68f29807b219 100644
+--- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
++++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+@@ -141,6 +141,11 @@ int main(int argc, const char **argv) {
+     cl::desc("Treat hip and hipv4 offload kinds as "
+              "compatible with openmp kind, and vice versa.\n"),
+     cl::init(false), cl::cat(ClangOffloadBundlerCategory));
++  cl::opt<bool> Compress("compress",
++                         cl::desc("Compress output file when bundling.\n"),
++                         cl::init(false), cl::cat(ClangOffloadBundlerCategory));
++  cl::opt<bool> Verbose("verbose", cl::desc("Print debug information.\n"),
++                        cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+ 
+   // Process commandline options and report errors
+   sys::PrintStackTraceOnErrorSignal(argv[0]);
+@@ -169,6 +174,11 @@ int main(int argc, const char **argv) {
+   BundlerConfig.BundleAlignment = BundleAlignment;
+   BundlerConfig.FilesType = FilesType;
+   BundlerConfig.ObjcopyPath = "";
++  // Do not override the default value Compress and Verbose in BundlerConfig.
++  if (Compress.getNumOccurrences() > 0)
++    BundlerConfig.Compress = Compress;
++  if (Verbose.getNumOccurrences() > 0)
++    BundlerConfig.Verbose = Verbose;
+ 
+   BundlerConfig.TargetNames = TargetNames;
+   BundlerConfig.InputFileNames = InputFileNames;
+diff --git a/llvm/include/llvm/BinaryFormat/Magic.h b/llvm/include/llvm/BinaryFormat/Magic.h
+index 329c96f5c14c..a28710dcdfaf 100644
+--- a/llvm/include/llvm/BinaryFormat/Magic.h
++++ b/llvm/include/llvm/BinaryFormat/Magic.h
+@@ -42,19 +42,21 @@ struct file_magic {
+     macho_universal_binary,                   ///< Mach-O universal binary
+     macho_file_set,                           ///< Mach-O file set binary
+     minidump,                                 ///< Windows minidump file
+-    coff_cl_gl_object,   ///< Microsoft cl.exe's intermediate code file
+-    coff_object,         ///< COFF object file
+-    coff_import_library, ///< COFF import library
+-    pecoff_executable,   ///< PECOFF executable file
+-    windows_resource,    ///< Windows compiled resource file (.res)
+-    xcoff_object_32,     ///< 32-bit XCOFF object file
+-    xcoff_object_64,     ///< 64-bit XCOFF object file
+-    wasm_object,         ///< WebAssembly Object file
+-    pdb,                 ///< Windows PDB debug info file
+-    tapi_file,           ///< Text-based Dynamic Library Stub file
+-    cuda_fatbinary,      ///< CUDA Fatbinary object file
+-    offload_binary,      ///< LLVM offload object file
+-    dxcontainer_object,  ///< DirectX container file
++    coff_cl_gl_object,         ///< Microsoft cl.exe's intermediate code file
++    coff_object,               ///< COFF object file
++    coff_import_library,       ///< COFF import library
++    pecoff_executable,         ///< PECOFF executable file
++    windows_resource,          ///< Windows compiled resource file (.res)
++    xcoff_object_32,           ///< 32-bit XCOFF object file
++    xcoff_object_64,           ///< 64-bit XCOFF object file
++    wasm_object,               ///< WebAssembly Object file
++    pdb,                       ///< Windows PDB debug info file
++    tapi_file,                 ///< Text-based Dynamic Library Stub file
++    cuda_fatbinary,            ///< CUDA Fatbinary object file
++    offload_binary,            ///< LLVM offload object file
++    dxcontainer_object,        ///< DirectX container file
++    offload_bundle,            ///< Clang offload bundle file
++    offload_bundle_compressed, ///< Compressed clang offload bundle file
+   };
+ 
+   bool is_object() const { return V != unknown; }
+diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp
+index aa84bc36bfde..76aa1a602aa8 100644
+--- a/llvm/lib/BinaryFormat/Magic.cpp
++++ b/llvm/lib/BinaryFormat/Magic.cpp
+@@ -87,6 +87,10 @@ file_magic llvm::identify_magic(StringRef Magic) {
+     if (startswith(Magic, "BC\xC0\xDE"))
+       return file_magic::bitcode;
+     break;
++  case 'C':
++    if (startswith(Magic, "CCOB"))
++      return file_magic::offload_bundle_compressed;
++    break;
+   case '!':
+     if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
+       return file_magic::archive;
+@@ -246,6 +250,13 @@ file_magic llvm::identify_magic(StringRef Magic) {
+       return file_magic::coff_object;
+     break;
+ 
++  case '_': {
++    const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
++    if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
++      return file_magic::offload_bundle;
++    break;
++  }
++
+   default:
+     break;
+   }
+diff --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp
+index d18aed8b3b8c..0ee9f7fac448 100644
+--- a/llvm/lib/Object/Binary.cpp
++++ b/llvm/lib/Object/Binary.cpp
+@@ -87,6 +87,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
+   case file_magic::cuda_fatbinary:
+   case file_magic::coff_cl_gl_object:
+   case file_magic::dxcontainer_object:
++  case file_magic::offload_bundle:
++  case file_magic::offload_bundle_compressed:
+     // Unrecognized object file format.
+     return errorCodeToError(object_error::invalid_file_type);
+   case file_magic::offload_binary:
+diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
+index 56a1d09097d4..4cc95ea32f60 100644
+--- a/llvm/lib/Object/ObjectFile.cpp
++++ b/llvm/lib/Object/ObjectFile.cpp
+@@ -154,6 +154,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
+   case file_magic::cuda_fatbinary:
+   case file_magic::offload_binary:
+   case file_magic::dxcontainer_object:
++  case file_magic::offload_bundle:
++  case file_magic::offload_bundle_compressed:
+     return errorCodeToError(object_error::invalid_file_type);
+   case file_magic::tapi_file:
+     return errorCodeToError(object_error::invalid_file_type);
+-- 
+2.43.0
+