summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRich Ercolani <214141+rincebrain@users.noreply.github.com>2021-08-30 17:13:46 -0400
committerTony Hutter <hutter2@llnl.gov>2021-09-14 15:05:55 -0700
commit72a989cf60b4c7b9a46dc5854c0bd2561ce6b576 (patch)
tree8133d7ee2002161fe3f3c20c7020d3d7c5b15e5c
parent6bb6410570e6aeb19ae82d9287922927432f8c74 (diff)
Fix cross-endian interoperability of zstd
It turns out that layouts of union bitfields are a pain, and the current code results in an inconsistent layout between BE and LE systems, leading to zstd-active datasets on one erroring out on the other. Switch everyone over to the LE layout, and add compatibility code to read both. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Signed-off-by: Rich Ercolani <rincebrain@gmail.com> Closes #12008 Closes #12022
-rw-r--r--cmd/zdb/zdb.c6
-rw-r--r--include/sys/zstd/zstd.h148
-rw-r--r--module/zstd/Makefile.in1
-rw-r--r--module/zstd/include/sparc_compat.h4
-rw-r--r--module/zstd/zfs_zstd.c14
-rw-r--r--module/zstd/zstd_sparc.c11
6 files changed, 165 insertions, 19 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index c548936bc..e964e3ba8 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -2218,7 +2218,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf),
" ZSTD:size=%u:version=%u:level=%u:EMBEDDED",
- zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
+ zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
+ zfs_get_hdrlevel(&zstd_hdr));
return;
}
@@ -2242,7 +2243,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf),
" ZSTD:size=%u:version=%u:level=%u:NORMAL",
- zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
+ zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
+ zfs_get_hdrlevel(&zstd_hdr));
abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp));
}
diff --git a/include/sys/zstd/zstd.h b/include/sys/zstd/zstd.h
index 8fe4a2482..e87dda1b1 100644
--- a/include/sys/zstd/zstd.h
+++ b/include/sys/zstd/zstd.h
@@ -56,22 +56,25 @@ typedef struct zfs_zstd_header {
/*
* Version and compression level
- * We use a union to be able to big endian encode a single 32 bit
- * unsigned integer, but still access the individual bitmasked
- * components easily.
+ * We used to use a union to reference compression level
+ * and version easily, but as it turns out, relying on the
+ * ordering of bitfields is not remotely portable.
+ * So now we have get/set functions in zfs_zstd.c for
+ * manipulating this in just the right way forever.
*/
- union {
- uint32_t raw_version_level;
- struct {
- uint32_t version : 24;
- uint8_t level;
- };
- };
-
+ uint32_t raw_version_level;
char data[];
} zfs_zstdhdr_t;
/*
+ * Simple struct to pass the data from raw_version_level around.
+ */
+typedef struct zfs_zstd_meta {
+ uint8_t level;
+ uint32_t version;
+} zfs_zstdmeta_t;
+
+/*
* kstat helper macros
*/
#define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64)
@@ -94,6 +97,129 @@ int zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len,
size_t d_len, int n);
void zfs_zstd_cache_reap_now(void);
+/*
+ * So, the reason we have all these complicated set/get functions is that
+ * originally, in the zstd "header" we wrote out to disk, we used a 32-bit
+ * bitfield to store the "level" (8 bits) and "version" (24 bits).
+ *
+ * Unfortunately, bitfields make few promises about how they're arranged in
+ * memory...
+ *
+ * By way of example, if we were using version 1.4.5 and level 3, it'd be
+ * level = 0x03, version = 10405/0x0028A5, which gets broken into Vhigh = 0x00,
+ * Vmid = 0x28, Vlow = 0xA5. We include these positions below to help follow
+ * which data winds up where.
+ *
+ * As a consequence, we wound up with little endian platforms with a layout
+ * like this in memory:
+ *
+ * 0 8 16 24 32
+ * +-------+-------+-------+-------+
+ * | Vlow | Vmid | Vhigh | level |
+ * +-------+-------+-------+-------+
+ * =A5 =28 =00 =03
+ *
+ * ...and then, after being run through BE_32(), serializing this out to
+ * disk:
+ *
+ * 0 8 16 24 32
+ * +-------+-------+-------+-------+
+ * | level | Vhigh | Vmid | Vlow |
+ * +-------+-------+-------+-------+
+ * =03 =00 =28 =A5
+ *
+ * while on big-endian systems, since BE_32() is a noop there, both in
+ * memory and on disk, we wind up with:
+ *
+ * 0 8 16 24 32
+ * +-------+-------+-------+-------+
+ * | Vhigh | Vmid | Vlow | level |
+ * +-------+-------+-------+-------+
+ * =00 =28 =A5 =03
+ *
+ * (Vhigh is always 0 until version exceeds 6.55.35. Vmid and Vlow are the
+ * other two bytes of the "version" data.)
+ *
+ * So now we use the BF32_SET macros to get consistent behavior (the
+ * ondisk LE encoding, since x86 currently rules the world) across
+ * platforms, but the "get" behavior requires that we check each of the
+ * bytes in the aforementioned former-bitfield for 0x00, and from there,
+ * we can know which possible layout we're dealing with. (Only the two
+ * that have been observed in the wild are illustrated above, but handlers
+ * for all 4 positions of 0x00 are implemented.
+ */
+
+static inline void
+zfs_get_hdrmeta(const zfs_zstdhdr_t *blob, zfs_zstdmeta_t *res)
+{
+ uint32_t raw = blob->raw_version_level;
+ uint8_t findme = 0xff;
+ int shift;
+ for (shift = 0; shift < 4; shift++) {
+ findme = BF32_GET(raw, 8*shift, 8);
+ if (findme == 0)
+ break;
+ }
+ switch (shift) {
+ case 0:
+ res->level = BF32_GET(raw, 24, 8);
+ res->version = BSWAP_32(raw);
+ res->version = BF32_GET(res->version, 8, 24);
+ break;
+ case 1:
+ res->level = BF32_GET(raw, 0, 8);
+ res->version = BSWAP_32(raw);
+ res->version = BF32_GET(res->version, 0, 24);
+ break;
+ case 2:
+ res->level = BF32_GET(raw, 24, 8);
+ res->version = BF32_GET(raw, 0, 24);
+ break;
+ case 3:
+ res->level = BF32_GET(raw, 0, 8);
+ res->version = BF32_GET(raw, 8, 24);
+ break;
+ default:
+ res->level = 0;
+ res->version = 0;
+ break;
+ }
+}
+
+static inline uint8_t
+zfs_get_hdrlevel(const zfs_zstdhdr_t *blob)
+{
+ uint8_t level = 0;
+ zfs_zstdmeta_t res;
+ zfs_get_hdrmeta(blob, &res);
+ level = res.level;
+ return (level);
+}
+
+static inline uint32_t
+zfs_get_hdrversion(const zfs_zstdhdr_t *blob)
+{
+ uint32_t version = 0;
+ zfs_zstdmeta_t res;
+ zfs_get_hdrmeta(blob, &res);
+ version = res.version;
+ return (version);
+
+}
+
+static inline void
+zfs_set_hdrversion(zfs_zstdhdr_t *blob, uint32_t version)
+{
+ BF32_SET(blob->raw_version_level, 0, 24, version);
+}
+
+static inline void
+zfs_set_hdrlevel(zfs_zstdhdr_t *blob, uint8_t level)
+{
+ BF32_SET(blob->raw_version_level, 24, 8, level);
+}
+
+
#ifdef __cplusplus
}
#endif
diff --git a/module/zstd/Makefile.in b/module/zstd/Makefile.in
index f67db710f..091f7cea3 100644
--- a/module/zstd/Makefile.in
+++ b/module/zstd/Makefile.in
@@ -33,6 +33,7 @@ $(obj)/zfs_zstd.o: c_flags += -include $(zstd_include)/zstd_compat_wrapper.h
$(MODULE)-objs += zfs_zstd.o
$(MODULE)-objs += lib/zstd.o
+$(MODULE)-objs += zstd_sparc.o
all:
mkdir -p lib
diff --git a/module/zstd/include/sparc_compat.h b/module/zstd/include/sparc_compat.h
new file mode 100644
index 000000000..14c1bdde9
--- /dev/null
+++ b/module/zstd/include/sparc_compat.h
@@ -0,0 +1,4 @@
+#if defined(__sparc)
+uint64_t __bswapdi2(uint64_t in);
+uint32_t __bswapsi2(uint32_t in);
+#endif
diff --git a/module/zstd/zfs_zstd.c b/module/zstd/zfs_zstd.c
index a91ed1be9..2c698716c 100644
--- a/module/zstd/zfs_zstd.c
+++ b/module/zstd/zfs_zstd.c
@@ -380,6 +380,7 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
return (1);
}
+
/* Compress block using zstd */
size_t
zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
@@ -477,8 +478,8 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
* As soon as such incompatibility occurs, handling code needs to be
* added, differentiating between the versions.
*/
- hdr->version = ZSTD_VERSION_NUMBER;
- hdr->level = level;
+ zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER);
+ zfs_set_hdrlevel(hdr, level);
hdr->raw_version_level = BE_32(hdr->raw_version_level);
return (c_len + sizeof (*hdr));
@@ -504,6 +505,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
* not modify the original data that may be used again later.
*/
hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
+ uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy);
/*
* NOTE: We ignore the ZSTD version for now. As soon as any
@@ -516,13 +518,13 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
* An invalid level is a strong indicator for data corruption! In such
* case return an error so the upper layers can try to fix it.
*/
- if (zstd_enum_to_level(hdr_copy.level, &zstd_level)) {
+ if (zstd_enum_to_level(curlevel, &zstd_level)) {
ZSTDSTAT_BUMP(zstd_stat_dec_inval);
return (1);
}
ASSERT3U(d_len, >=, s_len);
- ASSERT3U(hdr_copy.level, !=, ZIO_COMPLEVEL_INHERIT);
+ ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT);
/* Invalid compressed buffer size encoded at start */
if (c_len + sizeof (*hdr) > s_len) {
@@ -553,7 +555,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
}
if (level) {
- *level = hdr_copy.level;
+ *level = curlevel;
}
return (0);
@@ -783,7 +785,7 @@ module_exit(zstd_fini);
ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS");
ZFS_MODULE_LICENSE("Dual BSD/GPL");
-ZFS_MODULE_VERSION(ZSTD_VERSION_STRING);
+ZFS_MODULE_VERSION(ZSTD_VERSION_STRING "a");
EXPORT_SYMBOL(zfs_zstd_compress);
EXPORT_SYMBOL(zfs_zstd_decompress_level);
diff --git a/module/zstd/zstd_sparc.c b/module/zstd/zstd_sparc.c
new file mode 100644
index 000000000..463df99bd
--- /dev/null
+++ b/module/zstd/zstd_sparc.c
@@ -0,0 +1,11 @@
+#ifdef __sparc__
+#include <stdint.h>
+#include <sys/byteorder.h>
+#include "include/sparc_compat.h"
+uint64_t __bswapdi2(uint64_t in) {
+ return (BSWAP_64(in));
+}
+uint32_t __bswapsi2(uint32_t in) {
+ return (BSWAP_32(in));
+}
+#endif