summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJitendra Patidar <53164267+jsai20@users.noreply.github.com>2022-02-23 02:36:43 +0530
committerGitHub <noreply@github.com>2022-02-22 13:06:43 -0800
commit361a7e821178e105c8e1206ead4479de83c2a617 (patch)
treee03d4741dc67a9a7aa469fdbb38104c9fe62f40f
parentccdcc1dbe8b2b741194cdbc5b81bdb8b58cc7142 (diff)
log xattr=sa create/remove/update to ZIL
As such, there are no specific synchronous semantics defined for the xattrs. But for xattr=on, it does log to ZIL and zil_commit() is done, if sync=always is set on dataset. This provides sync semantics for xattr=on with sync=always set on dataset. For the xattr=sa implementation, it doesn't log to ZIL, so, even with sync=always, xattrs are not guaranteed to be synced before xattr call returns to caller. So, xattr can be lost if system crash happens, before txg carrying xattr transaction is synced. This change adds xattr=sa logging to ZIL on xattr create/remove/update and xattrs are synced to ZIL (zil_commit() done) for sync=always. This makes xattr=sa behavior similar to xattr=on. Implementation notes: The actual logging is fairly straight-forward and does not warrant additional explanation. However, it has been 14 years since we last added new TX types to the ZIL [1], hence this is the first time we do it after the introduction of zpool features. Therefore, here is an overview of the feature activation and deactivation workflow: 1. The feature must be enabled. Otherwise, we don't log the new record type. This ensures compatibility with older software. 2. The feature is activated per-dataset, since the ZIL is per-dataset. 3. If the feature is enabled and dataset is not for zvol, any append to the ZIL chain will activate the feature for the dataset. Likewise for starting a new ZIL chain. 4. A dataset that doesn't have a ZIL chain has the feature deactivated. We ensure (3) by activating on the first zil_commit() after the feature was enabled. Since activating the features requires waiting for txg sync, the first zil_commit() after enabling the feature will be slower than usual. The downside is that this is really a conservative approximation: even if we never append a 'TX_SETSAXATTR' to the ZIL chain, we pay the penalty for feature activation. The upside is that the user is in control of when we pay the penalty, i.e., upon enabling the feature. We ensure (4) by hooking into zil_sync(), where ZIL destroy actually happens. One more piece on feature activation, since it's spread across multiple functions: zil_commit() zil_process_commit_list() if lwb == NULL // first zil_commit since zil_open zil_create() if no log block pointer in ZIL header: if feature enabled and not active: // CASE 1 enable, COALESCE txg wait with dmu_tx that allocated the log block else // log block was allocated earlier than this zil_open if feature enabled and not active: // CASE 2 enable, EXPLICIT txg wait else // already have an in-DRAM LWB if feature enabled and not active: // this happens when we enable the feature after zil_create // CASE 3 enable, EXPLICIT txg wait [1] https://github.com/illumos/illumos-gate/commit/da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0 Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Christian Schwarz <christian.schwarz@nutanix.com> Reviewed-by: Ahelenia ZiemiaƄska <nabijaczleweli@nabijaczleweli.xyz> Reviewed-by: Ryan Moeller <freqlabs@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Jitendra Patidar <jitendra.patidar@nutanix.com> Closes #8768 Closes #9078
-rw-r--r--cmd/zdb/zdb_il.c25
-rw-r--r--cmd/ztest/ztest.c1
-rw-r--r--include/sys/zfs_sa.h2
-rw-r--r--include/sys/zfs_znode.h2
-rw-r--r--include/sys/zil.h13
-rw-r--r--include/zfeature_common.h1
-rw-r--r--lib/libzfs/libzfs.abi9
-rw-r--r--man/man4/zfs.410
-rw-r--r--man/man7/zpool-features.718
-rw-r--r--module/os/freebsd/zfs/zfs_vnops_os.c6
-rw-r--r--module/os/linux/zfs/zpl_xattr.c2
-rw-r--r--module/zcommon/zfeature_common.c12
-rw-r--r--module/zfs/zfs_log.c34
-rw-r--r--module/zfs/zfs_replay.c83
-rw-r--r--module/zfs/zfs_sa.c29
-rw-r--r--module/zfs/zil.c74
-rw-r--r--tests/runfiles/common.run2
-rw-r--r--tests/zfs-tests/include/tunables.cfg1
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg1
-rw-r--r--tests/zfs-tests/tests/functional/slog/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/slog/slog_016_pos.ksh157
21 files changed, 470 insertions, 15 deletions
diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c
index d6f588d83..76b1d64d7 100644
--- a/cmd/zdb/zdb_il.c
+++ b/cmd/zdb/zdb_il.c
@@ -266,6 +266,29 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, const void *arg)
}
static void
+zil_prt_rec_setsaxattr(zilog_t *zilog, int txtype, const void *arg)
+{
+ (void) zilog, (void) txtype;
+ const lr_setsaxattr_t *lr = arg;
+
+ char *name = (char *)(lr + 1);
+ (void) printf("%sfoid %llu\n", tab_prefix,
+ (u_longlong_t)lr->lr_foid);
+
+ (void) printf("%sXAT_NAME %s\n", tab_prefix, name);
+ if (lr->lr_size == 0) {
+ (void) printf("%sXAT_VALUE NULL\n", tab_prefix);
+ } else {
+ (void) printf("%sXAT_VALUE ", tab_prefix);
+ char *val = name + (strlen(name) + 1);
+ for (int i = 0; i < lr->lr_size; i++) {
+ (void) printf("%c", *val);
+ val++;
+ }
+ }
+}
+
+static void
zil_prt_rec_acl(zilog_t *zilog, int txtype, const void *arg)
{
(void) zilog, (void) txtype;
@@ -304,6 +327,8 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
{.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ATTR "},
{.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ACL_ATTR "},
{.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "},
+ {.zri_print = zil_prt_rec_setsaxattr,
+ .zri_name = "TX_SETSAXATTR "},
};
static int
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index ed60d065c..0daaab69c 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -2386,6 +2386,7 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
NULL, /* TX_MKDIR_ATTR */
NULL, /* TX_MKDIR_ACL_ATTR */
NULL, /* TX_WRITE2 */
+ NULL, /* TX_SETSAXATTR */
};
/*
diff --git a/include/sys/zfs_sa.h b/include/sys/zfs_sa.h
index a0c383807..6b0336997 100644
--- a/include/sys/zfs_sa.h
+++ b/include/sys/zfs_sa.h
@@ -138,7 +138,7 @@ void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
int zfs_sa_get_xattr(struct znode *);
-int zfs_sa_set_xattr(struct znode *);
+int zfs_sa_set_xattr(struct znode *, const char *, const void *, size_t);
void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *);
void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
void zfs_sa_init(void);
diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
index 1bf25a77d..e20c18cc2 100644
--- a/include/sys/zfs_znode.h
+++ b/include/sys/zfs_znode.h
@@ -286,6 +286,8 @@ extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
+extern void zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
+ znode_t *zp, const char *name, const void *value, size_t size);
extern void zfs_znode_update_vfs(struct znode *);
diff --git a/include/sys/zil.h b/include/sys/zil.h
index 8e5a49da2..05e3647e6 100644
--- a/include/sys/zil.h
+++ b/include/sys/zil.h
@@ -162,7 +162,8 @@ typedef enum zil_create {
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
#define TX_WRITE2 20 /* dmu_sync EALREADY write */
-#define TX_MAX_TYPE 21 /* Max transaction type */
+#define TX_SETSAXATTR 21 /* Set sa xattrs on file */
+#define TX_MAX_TYPE 22 /* Max transaction type */
/*
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
@@ -182,7 +183,8 @@ typedef enum zil_create {
(txtype) == TX_SETATTR || \
(txtype) == TX_ACL_V0 || \
(txtype) == TX_ACL || \
- (txtype) == TX_WRITE2)
+ (txtype) == TX_WRITE2 || \
+ (txtype) == TX_SETSAXATTR)
/*
* The number of dnode slots consumed by the object is stored in the 8
@@ -337,6 +339,13 @@ typedef struct {
typedef struct {
lr_t lr_common; /* common portion of log record */
+ uint64_t lr_foid; /* file object to change attributes */
+ uint64_t lr_size;
+ /* xattr name and value follows */
+} lr_setsaxattr_t;
+
+typedef struct {
+ lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* obj id of file */
uint64_t lr_aclcnt; /* number of acl entries */
/* lr_aclcnt number of ace_t entries follow this */
diff --git a/include/zfeature_common.h b/include/zfeature_common.h
index 874cbd9ff..580f5ff3e 100644
--- a/include/zfeature_common.h
+++ b/include/zfeature_common.h
@@ -75,6 +75,7 @@ typedef enum spa_feature {
SPA_FEATURE_DEVICE_REBUILD,
SPA_FEATURE_ZSTD_COMPRESS,
SPA_FEATURE_DRAID,
+ SPA_FEATURE_ZILSAXATTR,
SPA_FEATURES
} spa_feature_t;
diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi
index 8f586804c..a34811b58 100644
--- a/lib/libzfs/libzfs.abi
+++ b/lib/libzfs/libzfs.abi
@@ -595,7 +595,7 @@
<elf-symbol name='fletcher_4_superscalar4_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='fletcher_4_superscalar_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
- <elf-symbol name='spa_feature_table' size='1904' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+ <elf-symbol name='spa_feature_table' size='1960' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -1854,8 +1854,8 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='../../module/zcommon/zfeature_common.c' language='LANG_C99'>
- <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='15232' id='d96379d0'>
- <subrange length='34' type-id='7359adad' id='6a6a7e00'/>
+ <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='15680' id='d96379d0'>
+ <subrange length='35' type-id='7359adad' id='6a6a7e00'/>
</array-type-def>
<enum-decl name='spa_feature' id='33ecb627'>
<underlying-type type-id='9cac1fee'/>
@@ -1894,7 +1894,8 @@
<enumerator name='SPA_FEATURE_DEVICE_REBUILD' value='31'/>
<enumerator name='SPA_FEATURE_ZSTD_COMPRESS' value='32'/>
<enumerator name='SPA_FEATURE_DRAID' value='33'/>
- <enumerator name='SPA_FEATURES' value='34'/>
+ <enumerator name='SPA_FEATURE_ZILSAXATTR' value='34'/>
+ <enumerator name='SPA_FEATURES' value='35'/>
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<enum-decl name='zfeature_flags' id='6db816a4'>
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index 2f7a18ea6..01e9c5de4 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -2099,6 +2099,16 @@ Limit SLOG write size per commit executed with synchronous priority.
Any writes above that will be executed with lower (asynchronous) priority
to limit potential SLOG device abuse by single active ZIL writer.
.
+.It Sy zfs_zil_saxattr Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Setting this tunable to zero disables ZIL logging of new
+.Sy xattr Ns = Ns Sy sa
+records if the
+.Sy org.openzfs:zilsaxattr
+feature is enabled on the pool.
+This would only be necessary to work around bugs in the ZIL logging or replay
+code for this record type.
+The tunable has no effect if the feature is disabled.
+.
.It Sy zfs_embedded_slog_min_ms Ns = Ns Sy 64 Pq int
Usually, one metaslab from each normal-class vdev is dedicated for use by
the ZIL to log synchronous writes.
diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7
index d5148fb48..9a202ca8a 100644
--- a/man/man7/zpool-features.7
+++ b/man/man7/zpool-features.7
@@ -778,6 +778,24 @@ by user and group.
\*[instant-never]
\*[remount-upgrade]
.
+.feature org.openzfs zilsaxattr yes extensible_dataset
+This feature enables
+.Sy xattr Ns = Ns Sy sa
+extended attribute logging in the ZIL.
+If enabled, extended attribute changes
+.Pq both Sy xattrdir Ns = Ns Sy dir No and Sy xattr Ns = Ns Sy sa
+are guaranteed to be durable if either the dataset had
+.Sy sync Ns = Ns Sy always
+set at the time the changes were made, or
+.Xr sync 2
+is called on the dataset after the changes were made.
+.Pp
+This feature becomes
+.Sy active
+when a ZIL is created for at least one dataset and will be returned to the
+.Sy enabled
+state when it is destroyed for all datasets that use this feature.
+.
.feature com.delphix zpool_checkpoint yes
This feature enables the
.Nm zpool Cm checkpoint
diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c
index 1b0f53c6d..21d121a15 100644
--- a/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -5549,7 +5549,7 @@ zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
if (error != 0)
error = SET_ERROR(error);
else
- error = zfs_sa_set_xattr(zp);
+ error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
if (error != 0) {
zp->z_xattr_cached = NULL;
nvlist_free(nvl);
@@ -5706,9 +5706,9 @@ zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
if (error != 0)
error = SET_ERROR(error);
}
- kmem_free(buf, entry_size);
if (error == 0)
- error = zfs_sa_set_xattr(zp);
+ error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
+ kmem_free(buf, entry_size);
if (error != 0) {
zp->z_xattr_cached = NULL;
nvlist_free(nvl);
diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c
index ce1815771..3b8ac517a 100644
--- a/module/os/linux/zfs/zpl_xattr.c
+++ b/module/os/linux/zfs/zpl_xattr.c
@@ -578,7 +578,7 @@ zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
* will be reconstructed from the ARC when next accessed.
*/
if (error == 0)
- error = -zfs_sa_set_xattr(zp);
+ error = -zfs_sa_set_xattr(zp, name, value, size);
if (error) {
nvlist_free(nvl);
diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c
index 529c52316..13dbccae2 100644
--- a/module/zcommon/zfeature_common.c
+++ b/module/zcommon/zfeature_common.c
@@ -695,6 +695,18 @@ zpool_feature_init(void)
"org.openzfs:draid", "draid", "Support for distributed spare RAID",
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);
+ {
+ static const spa_feature_t zilsaxattr_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_ZILSAXATTR,
+ "org.openzfs:zilsaxattr", "zilsaxattr",
+ "Support for xattr=sa extended attribute logging in ZIL.",
+ ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT,
+ ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
+ }
+
zfs_mod_list_supported_free(sfeatures);
}
diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c
index babf0406e..9df801870 100644
--- a/module/zfs/zfs_log.c
+++ b/module/zfs/zfs_log.c
@@ -721,6 +721,40 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
}
/*
+ * Handles TX_SETSAXATTR transactions.
+ */
+void
+zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
+ znode_t *zp, const char *name, const void *value, size_t size)
+{
+ itx_t *itx;
+ lr_setsaxattr_t *lr;
+ size_t recsize = sizeof (lr_setsaxattr_t);
+ void *xattrstart;
+ int namelen;
+
+ if (zil_replaying(zilog, tx) || zp->z_unlinked)
+ return;
+
+ namelen = strlen(name) + 1;
+ recsize += (namelen + size);
+ itx = zil_itx_create(txtype, recsize);
+ lr = (lr_setsaxattr_t *)&itx->itx_lr;
+ lr->lr_foid = zp->z_id;
+ xattrstart = (char *)(lr + 1);
+ bcopy(name, xattrstart, namelen);
+ if (value != NULL) {
+ bcopy(value, (char *)xattrstart + namelen, size);
+ lr->lr_size = size;
+ } else {
+ lr->lr_size = 0;
+ }
+
+ itx->itx_sync = (zp->z_sync_cnt != 0);
+ zil_itx_assign(zilog, itx, tx);
+}
+
+/*
* Handles TX_ACL transactions.
*/
void
diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c
index 860ca5929..3ccd96dc2 100644
--- a/module/zfs/zfs_replay.c
+++ b/module/zfs/zfs_replay.c
@@ -47,6 +47,8 @@
#include <sys/atomic.h>
#include <sys/cred.h>
#include <sys/zpl.h>
+#include <sys/dmu_objset.h>
+#include <sys/zfeature.h>
/*
* NB: FreeBSD expects to be able to do vnode locking in lookup and
@@ -869,6 +871,86 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
}
static int
+zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap)
+{
+ zfsvfs_t *zfsvfs = arg1;
+ lr_setsaxattr_t *lr = arg2;
+ znode_t *zp;
+ nvlist_t *nvl;
+ size_t sa_size;
+ char *name;
+ char *value;
+ size_t size;
+ int error = 0;
+
+ ASSERT(spa_feature_is_active(zfsvfs->z_os->os_spa,
+ SPA_FEATURE_ZILSAXATTR));
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
+ return (error);
+
+ rw_enter(&zp->z_xattr_lock, RW_WRITER);
+ mutex_enter(&zp->z_lock);
+ if (zp->z_xattr_cached == NULL)
+ error = zfs_sa_get_xattr(zp);
+ mutex_exit(&zp->z_lock);
+
+ if (error)
+ goto out;
+
+ ASSERT(zp->z_xattr_cached);
+ nvl = zp->z_xattr_cached;
+
+ /* Get xattr name, value and size from log record */
+ size = lr->lr_size;
+ name = (char *)(lr + 1);
+ if (size == 0) {
+ value = NULL;
+ error = nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
+ } else {
+ value = name + strlen(name) + 1;
+ /* Limited to 32k to keep nvpair memory allocations small */
+ if (size > DXATTR_MAX_ENTRY_SIZE) {
+ error = SET_ERROR(EFBIG);
+ goto out;
+ }
+
+ /* Prevent the DXATTR SA from consuming the entire SA region */
+ error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
+ if (error)
+ goto out;
+
+ if (sa_size > DXATTR_MAX_SA_SIZE) {
+ error = SET_ERROR(EFBIG);
+ goto out;
+ }
+
+ error = nvlist_add_byte_array(nvl, name, (uchar_t *)value,
+ size);
+ }
+
+ /*
+ * Update the SA for additions, modifications, and removals. On
+ * error drop the inconsistent cached version of the nvlist, it
+ * will be reconstructed from the ARC when next accessed.
+ */
+ if (error == 0)
+ error = zfs_sa_set_xattr(zp, name, value, size);
+
+ if (error) {
+ nvlist_free(nvl);
+ zp->z_xattr_cached = NULL;
+ }
+
+out:
+ rw_exit(&zp->z_xattr_lock);
+ zrele(zp);
+ return (error);
+}
+
+static int
zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap)
{
zfsvfs_t *zfsvfs = arg1;
@@ -989,4 +1071,5 @@ zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE] = {
zfs_replay_create, /* TX_MKDIR_ATTR */
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
zfs_replay_write2, /* TX_WRITE2 */
+ zfs_replay_setsaxattr, /* TX_SETSAXATTR */
};
diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c
index 817f63048..1f15cae00 100644
--- a/module/zfs/zfs_sa.c
+++ b/module/zfs/zfs_sa.c
@@ -29,6 +29,7 @@
#include <sys/zfs_sa.h>
#include <sys/dmu_objset.h>
#include <sys/sa_impl.h>
+#include <sys/zfeature.h>
/*
* ZPL attribute registration table.
@@ -69,7 +70,10 @@ const sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
{NULL, 0, 0, 0}
};
+
#ifdef _KERNEL
+static int zfs_zil_saxattr = 1;
+
int
zfs_sa_readlink(znode_t *zp, zfs_uio_t *uio)
{
@@ -219,13 +223,14 @@ zfs_sa_get_xattr(znode_t *zp)
}
int
-zfs_sa_set_xattr(znode_t *zp)
+zfs_sa_set_xattr(znode_t *zp, const char *name, const void *value, size_t vsize)
{
zfsvfs_t *zfsvfs = ZTOZSB(zp);
+ zilog_t *zilog;
dmu_tx_t *tx;
char *obj;
size_t size;
- int error;
+ int error, logsaxattr = 0;
ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
ASSERT(zp->z_xattr_cached);
@@ -244,6 +249,17 @@ zfs_sa_set_xattr(znode_t *zp)
if (error)
goto out_free;
+ zilog = zfsvfs->z_log;
+
+ /*
+ * Users enable ZIL logging of xattr=sa operations by enabling the
+ * SPA_FEATURE_ZILSAXATTR feature on the pool. Feature is activated
+ * during zil_process_commit_list/zil_create, if enabled.
+ */
+ if (spa_feature_is_enabled(zfsvfs->z_os->os_spa,
+ SPA_FEATURE_ZILSAXATTR) && zfs_zil_saxattr)
+ logsaxattr = 1;
+
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa_create(tx, size);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
@@ -256,6 +272,10 @@ zfs_sa_set_xattr(znode_t *zp)
sa_bulk_attr_t bulk[2];
uint64_t ctime[2];
+ if (logsaxattr)
+ zfs_log_setsaxattr(zilog, tx, TX_SETSAXATTR, zp, name,
+ value, vsize);
+
zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DXATTR(zfsvfs),
NULL, obj, size);
@@ -264,6 +284,8 @@ zfs_sa_set_xattr(znode_t *zp)
VERIFY0(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
dmu_tx_commit(tx);
+ if (logsaxattr && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ zil_commit(zilog, 0);
}
out_free:
vmem_free(obj, size);
@@ -433,6 +455,9 @@ zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp)
}
}
+ZFS_MODULE_PARAM(zfs, zfs_, zil_saxattr, INT, ZMOD_RW,
+ "Disable xattr=sa extended attribute logging in ZIL by settng 0.");
+
EXPORT_SYMBOL(zfs_attr_table);
EXPORT_SYMBOL(zfs_sa_readlink);
EXPORT_SYMBOL(zfs_sa_symlink);
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 87a50a5c4..10f89c916 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -664,6 +664,38 @@ zilog_is_dirty(zilog_t *zilog)
}
/*
+ * Its called in zil_commit context (zil_process_commit_list()/zil_create()).
+ * It activates SPA_FEATURE_ZILSAXATTR feature, if its enabled.
+ * Check dsl_dataset_feature_is_active to avoid txg_wait_synced() on every
+ * zil_commit.
+ */
+static void
+zil_commit_activate_saxattr_feature(zilog_t *zilog)
+{
+ dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
+ uint64_t txg = 0;
+ dmu_tx_t *tx = NULL;
+
+ if (spa_feature_is_enabled(zilog->zl_spa,
+ SPA_FEATURE_ZILSAXATTR) &&
+ dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL &&
+ !dsl_dataset_feature_is_active(ds,
+ SPA_FEATURE_ZILSAXATTR)) {
+ tx = dmu_tx_create(zilog->zl_os);
+ VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+ dsl_dataset_dirty(ds, tx);
+ txg = dmu_tx_get_txg(tx);
+
+ mutex_enter(&ds->ds_lock);
+ ds->ds_feature_activation[SPA_FEATURE_ZILSAXATTR] =
+ (void *)B_TRUE;
+ mutex_exit(&ds->ds_lock);
+ dmu_tx_commit(tx);
+ txg_wait_synced(zilog->zl_dmu_pool, txg);
+ }
+}
+
+/*
* Create an on-disk intent log.
*/
static lwb_t *
@@ -677,6 +709,8 @@ zil_create(zilog_t *zilog)
int error = 0;
boolean_t fastwrite = FALSE;
boolean_t slog = FALSE;
+ dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
+
/*
* Wait for any previous destroy to complete.
@@ -724,9 +758,33 @@ zil_create(zilog_t *zilog)
* (zh is part of the MOS, so we cannot modify it in open context.)
*/
if (tx != NULL) {
+ /*
+ * If "zilsaxattr" feature is enabled on zpool, then activate
+ * it now when we're creating the ZIL chain. We can't wait with
+ * this until we write the first xattr log record because we
+ * need to wait for the feature activation to sync out.
+ */
+ if (spa_feature_is_enabled(zilog->zl_spa,
+ SPA_FEATURE_ZILSAXATTR) && dmu_objset_type(zilog->zl_os) !=
+ DMU_OST_ZVOL) {
+ mutex_enter(&ds->ds_lock);
+ ds->ds_feature_activation[SPA_FEATURE_ZILSAXATTR] =
+ (void *)B_TRUE;
+ mutex_exit(&ds->ds_lock);
+ }
+
dmu_tx_commit(tx);
txg_wait_synced(zilog->zl_dmu_pool, txg);
+ } else {
+ /*
+ * This branch covers the case where we enable the feature on a
+ * zpool that has existing ZIL headers.
+ */
+ zil_commit_activate_saxattr_feature(zilog);
}
+ IMPLY(spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) &&
+ dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL,
+ dsl_dataset_feature_is_active(ds, SPA_FEATURE_ZILSAXATTR));
ASSERT(error != 0 || bcmp(&blk, &zh->zh_log, sizeof (blk)) == 0);
IMPLY(error == 0, lwb != NULL);
@@ -2297,6 +2355,11 @@ zil_process_commit_list(zilog_t *zilog)
if (lwb == NULL) {
lwb = zil_create(zilog);
} else {
+ /*
+ * Activate SPA_FEATURE_ZILSAXATTR for the cases where ZIL will
+ * have already been created (zl_lwb_list not empty).
+ */
+ zil_commit_activate_saxattr_feature(zilog);
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE);
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE);
@@ -3075,6 +3138,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
if (zilog->zl_destroy_txg == txg) {
blkptr_t blk = zh->zh_log;
+ dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
ASSERT(list_head(&zilog->zl_lwb_list) == NULL);
@@ -3092,6 +3156,16 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
*/
zil_init_log_chain(zilog, &blk);
zh->zh_log = blk;
+ } else {
+ /*
+ * A destroyed ZIL chain can't contain any TX_SETSAXATTR
+ * records. So, deactivate the feature for this dataset.
+ * We activate it again when we start a new ZIL chain.
+ */
+ if (dsl_dataset_feature_is_active(ds,
+ SPA_FEATURE_ZILSAXATTR))
+ dsl_dataset_deactivate_feature(ds,
+ SPA_FEATURE_ZILSAXATTR, tx);
}
}
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index a7ddb146e..95deef010 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -854,7 +854,7 @@ tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001',
- 'slog_replay_fs_002', 'slog_replay_volume']
+ 'slog_replay_fs_002', 'slog_replay_volume', 'slog_016_pos']
tags = ['functional', 'slog']
[tests/functional/snapshot]
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index eea2af2ed..d3838cb7c 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -91,6 +91,7 @@ XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
ZIO_SLOW_IO_MS zio.slow_io_ms zio_slow_io_ms
+ZIL_SAXATTR zil_saxattr zfs_zil_saxattr
%%%%
while read name FreeBSD Linux; do
eval "export ${name}=\$${UNAME}"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
index accbf69cf..fac96e26e 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
@@ -97,5 +97,6 @@ if is_linux || is_freebsd; then
"feature@bookmark_v2"
"feature@livelist"
"feature@zstd_compress"
+ "feature@zilsaxattr"
)
fi \ No newline at end of file
diff --git a/tests/zfs-tests/tests/functional/slog/Makefile.am b/tests/zfs-tests/tests/functional/slog/Makefile.am
index 33e3a6d3a..92c3fd6c8 100644
--- a/tests/zfs-tests/tests/functional/slog/Makefile.am
+++ b/tests/zfs-tests/tests/functional/slog/Makefile.am
@@ -19,7 +19,8 @@ dist_pkgdata_SCRIPTS = \
slog_015_neg.ksh \
slog_replay_fs_001.ksh \
slog_replay_fs_002.ksh \
- slog_replay_volume.ksh
+ slog_replay_volume.ksh \
+ slog_016_pos.ksh
dist_pkgdata_DATA = \
slog.cfg \
diff --git a/tests/zfs-tests/tests/functional/slog/slog_016_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_016_pos.ksh
new file mode 100755
index 000000000..75f78c800
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/slog/slog_016_pos.ksh
@@ -0,0 +1,157 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2021 by Nutanix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/slog/slog.kshlib
+
+#
+# DESCRIPTION:
+# Verify saxattr logging in to ZIL works
+#
+# STRATEGY:
+# 1. Create an empty file system (TESTFS)
+# 2. Freeze TESTFS
+# 3. Create Xattrs.
+# 4. Unmount filesystem
+# <at this stage TESTFS is empty again and unfrozen, and the
+# intent log contains a complete set of deltas to replay it>
+# 5. Remount TESTFS <which replays the intent log>
+# 6. Check xattrs.
+#
+
+verify_runnable "global"
+
+function cleanup_testenv
+{
+ cleanup
+ log_must set_tunable32 ZIL_SAXATTR $orig_zil_saxattr
+}
+
+log_assert "Verify saxattr logging in to ZIL works"
+
+orig_zil_saxattr=$(get_tunable ZIL_SAXATTR)
+
+log_onexit cleanup_testenv
+log_must setup
+
+NFILES=10
+function validate_zil_saxattr
+{
+ saxattrzil=$1
+ if [ "$2" == "disabled" ]; then
+ zilsaxattr_feature_disabled=1
+ zpoolcreateflags="-ofeature@zilsaxattr=disabled"
+ else
+ zilsaxattr_feature_disabled=0
+ zpoolcreateflags=""
+ fi
+
+ log_must set_tunable32 ZIL_SAXATTR $saxattrzil
+
+ #
+ # 1. Create an empty file system (TESTFS)
+ #
+ log_must zpool create $zpoolcreateflags $TESTPOOL $VDEV log mirror $LDEV
+ log_must zfs set compression=on $TESTPOOL
+ log_must zfs create -o xattr=sa $TESTPOOL/$TESTFS
+ log_must mkdir -p $TESTDIR
+
+ #
+ # This dd command works around an issue where ZIL records aren't created
+ # after freezing the pool unless a ZIL header already exists. Create a
+ # file synchronously to force ZFS to write one out.
+ #
+ log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \
+ conv=fdatasync,fsync bs=1 count=1
+
+ #
+ # 2. Freeze TESTFS
+ #
+ log_must zpool freeze $TESTPOOL
+
+ rm /$TESTPOOL/$TESTFS/sync
+ #
+ # 3. Create xattrs
+ #
+ for i in $(seq $NFILES); do
+ log_must mkdir /$TESTPOOL/$TESTFS/xattr.d.$i
+ log_must set_xattr test test /$TESTPOOL/$TESTFS/xattr.d.$i
+
+ log_must touch /$TESTPOOL/$TESTFS/xattr.f.$i
+ log_must set_xattr test test /$TESTPOOL/$TESTFS/xattr.f.$i
+ done
+
+ #
+ # 4. Unmount filesystem and export the pool
+ #
+ # At this stage TESTFS is empty again and unfrozen, and the
+ # intent log contains a complete set of deltas to replay it.
+ #
+ log_must zfs unmount /$TESTPOOL/$TESTFS
+
+ log_note "Verify transactions to replay:"
+ log_must zdb -iv $TESTPOOL/$TESTFS
+
+ log_must zpool export $TESTPOOL
+
+ #
+ # 5. Remount TESTFS <which replays the intent log>
+ #
+ # Import the pool to unfreeze it and claim log blocks. It has to be
+ # `zpool import -f` because we can't write a frozen pool's labels!
+ #
+ log_must zpool import -f -d $VDIR $TESTPOOL
+
+ #
+ # 6. Verify Xattr
+ # If zilsaxattr_feature_disabled=1 or saxattrzil=0, then xattr=sa
+ # logging in ZIL is not enabled, So, xattrs would be lost.
+ # If zilsaxattr_feature_disabled=0 and saxattrzil=1, then xattr=sa
+ # logging in ZIL is enabled, So, xattrs shouldn't be lost.
+ #
+ for i in $(seq $NFILES); do
+ if [ $zilsaxattr_feature_disabled -eq 1 -o \
+ $saxattrzil -eq 0 ]; then
+ log_mustnot get_xattr test /$TESTPOOL/$TESTFS/xattr.d.$i
+ log_mustnot get_xattr test /$TESTPOOL/$TESTFS/xattr.f.$i
+ else
+ log_must get_xattr test /$TESTPOOL/$TESTFS/xattr.d.$i
+ log_must get_xattr test /$TESTPOOL/$TESTFS/xattr.f.$i
+ fi
+ done
+
+ cleanup
+ log_must setup
+}
+
+
+#Validate zilsaxattr feature enabled.
+validate_zil_saxattr 0
+validate_zil_saxattr 1
+#Validate zilsaxattr feature disabled.
+validate_zil_saxattr 0 disabled
+validate_zil_saxattr 1 disabled
+
+log_pass "Verify saxattr logging in to ZIL works"