From 3e484434c0ed128d61dd2c602c9f1b81b4d32039 Mon Sep 17 00:00:00 2001 From: Sherin T George Date: Thu, 12 Mar 2026 13:12:54 +0800 Subject: [PATCH] DAOS-18682 dfs: Hardlinks support in dfs and dfuse This commit adds support for creating, managing, and resolving hardlinks in DFS and dfuse. Includes metadata updates, link count handling, and initial tests. Signed-off-by: Sherin T George --- src/client/dfs/common.c | 711 ++++- src/client/dfs/cont.c | 619 ++++- src/client/dfs/dfs_internal.h | 46 +- src/client/dfs/dir.c | 14 +- src/client/dfs/file.c | 83 +- src/client/dfs/lookup.c | 33 +- src/client/dfs/metrics.h | 2 + src/client/dfs/mnt.c | 40 +- src/client/dfs/obj.c | 526 +++- src/client/dfs/rename.c | 33 +- src/client/dfs/xattr.c | 481 +++- src/client/dfuse/SConscript | 1 + src/client/dfuse/dfuse.h | 153 +- src/client/dfuse/dfuse_core.c | 279 +- src/client/dfuse/dfuse_fuseops.c | 27 +- src/client/dfuse/inval.c | 39 +- src/client/dfuse/ops/link.c | 48 + src/client/dfuse/ops/lookup.c | 44 +- src/client/dfuse/ops/open.c | 9 +- src/client/dfuse/ops/opendir.c | 11 +- src/client/dfuse/ops/rename.c | 43 +- src/client/dfuse/ops/setxattr.c | 11 +- src/client/dfuse/ops/unlink.c | 75 +- src/common/misc.c | 8 +- src/include/daos/dfs_lib_int.h | 22 +- src/include/daos_fs.h | 19 +- src/tests/suite/dfs_unit_test.c | 4210 +++++++++++++++++++++++++++++- 27 files changed, 7126 insertions(+), 461 deletions(-) create mode 100644 src/client/dfuse/ops/link.c diff --git a/src/client/dfs/common.c b/src/client/dfs/common.c index 72cae4dceb5..a2fd39f5a9a 100644 --- a/src/client/dfs/common.c +++ b/src/client/dfs/common.c @@ -278,6 +278,8 @@ fetch_entry(dfs_layout_ver_t ver, daos_handle_t oh, daos_handle_t th, const char *exists = false; else *exists = true; + + entry->ref_cnt = 1; out: if (xnr) { if (pxnames) { @@ -293,36 +295,348 @@ fetch_entry(dfs_layout_ver_t ver, daos_handle_t oh, daos_handle_t th, const char } int -remove_entry(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, const char *name, size_t len, - struct dfs_entry entry) +hlm_fetch_entry(daos_handle_t hlm_oh, daos_handle_t th, daos_obj_id_t *oid, struct dfs_entry *entry) { - daos_key_t dkey; - daos_handle_t oh; - int rc; + d_sg_list_t sgl; + d_iov_t sg_iovs[HLM_INODE_AKEYS]; + daos_iod_t iod; + daos_recx_t recx; + daos_key_t dkey; + unsigned int i; + int rc; - if (S_ISLNK(entry.mode)) - goto punch_entry; + if (oid == NULL || entry == NULL) + return EINVAL; - rc = daos_obj_open(dfs->coh, entry.oid, DAOS_OO_RW, &oh, NULL); - if (rc) + /* Set dkey as the file's OID */ + d_iov_set(&dkey, oid, sizeof(daos_obj_id_t)); + + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + iod.iod_nr = 1; + recx.rx_idx = 0; + recx.rx_nr = END_HLM_IDX; + iod.iod_recxs = &recx; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + i = 0; + d_iov_set(&sg_iovs[i++], &entry->mode, sizeof(mode_t)); + d_iov_set(&sg_iovs[i++], &entry->oid, sizeof(daos_obj_id_t)); + d_iov_set(&sg_iovs[i++], &entry->mtime, sizeof(uint64_t)); + d_iov_set(&sg_iovs[i++], &entry->ctime, sizeof(uint64_t)); + d_iov_set(&sg_iovs[i++], &entry->chunk_size, sizeof(daos_size_t)); + d_iov_set(&sg_iovs[i++], &entry->oclass, sizeof(daos_oclass_id_t)); + d_iov_set(&sg_iovs[i++], &entry->mtime_nano, sizeof(uint64_t)); + d_iov_set(&sg_iovs[i++], &entry->ctime_nano, sizeof(uint64_t)); + d_iov_set(&sg_iovs[i++], &entry->uid, sizeof(uid_t)); + d_iov_set(&sg_iovs[i++], &entry->gid, sizeof(gid_t)); + d_iov_set(&sg_iovs[i++], &entry->value_len, sizeof(daos_size_t)); + d_iov_set(&sg_iovs[i++], &entry->obj_hlc, sizeof(uint64_t)); + d_iov_set(&sg_iovs[i++], &entry->ref_cnt, sizeof(uint64_t)); + + sgl.sg_nr = i; + sgl.sg_nr_out = 0; + sgl.sg_iovs = sg_iovs; + + rc = daos_obj_fetch(hlm_oh, th, DAOS_COND_DKEY_FETCH, &dkey, 1, &iod, &sgl, NULL, NULL); + if (rc == -DER_NONEXIST) { + D_ERROR("Entry not found in HLM\n"); + return ENOENT; + } else if (rc) { + D_ERROR("Failed to fetch entry from HLM " DF_RC "\n", DP_RC(rc)); return daos_der2errno(rc); + } - rc = daos_obj_punch(oh, th, 0, NULL); + if (sgl.sg_nr_out == 0) { + D_ERROR("Entry not found in HLM\n"); + return ENOENT; + } + + return 0; +} + +/** + * Fetch entry for a dfs object, handling hardlinks transparently. + * If the object is a hardlink, fetches from HLM and sets parent_oh to DAOS_HDL_INVAL. + * Otherwise, opens the parent object and fetches the entry from the parent directory. + * If during fetch we discover the entry has become a hardlink (converted by another + * DFS instance), we update obj->mode and retry from HLM. + * + * \param dfs DFS handle + * \param th Transaction handle (can be DAOS_TX_NONE) + * \param obj DFS object to fetch entry for + * \param parent_oh Output parent object handle (DAOS_HDL_INVAL if hardlink) + * \param entry Output entry structure + * \return 0 on success, errno on failure + */ +int +dfsobj_fetch_entry(dfs_t *dfs, daos_handle_t th, dfs_obj_t *obj, daos_handle_t *parent_oh, + struct dfs_entry *entry) +{ + bool exists; + int rc; + +retry: + if (dfs_is_hardlink(obj->mode)) { + /* Fetch entry from HLM using obj's oid as dkey */ + rc = hlm_fetch_entry(dfs->hlm_oh, th, &obj->oid, entry); + if (rc) { + D_ERROR("Failed to fetch entry from HLM (%d)\n", rc); + return rc; + } + *parent_oh = DAOS_HDL_INVAL; + } else { + /* Open the parent directory object */ + rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, parent_oh, NULL); + if (rc) { + D_ERROR("daos_obj_open() failed (%d)\n", rc); + return daos_der2errno(rc); + } + + /* Fetch entry from parent directory */ + rc = fetch_entry(dfs->layout_v, *parent_oh, th, obj->name, strlen(obj->name), false, + &exists, entry, 0, NULL, NULL, NULL); + if (rc) { + D_ERROR("Failed to fetch entry (%d)\n", rc); + daos_obj_close(*parent_oh, NULL); + *parent_oh = DAOS_HDL_INVAL; + return rc; + } + if (!exists) { + daos_obj_close(*parent_oh, NULL); + *parent_oh = DAOS_HDL_INVAL; + return ENOENT; + } + + /* Verify OID matches - entry may have been replaced */ + if (obj->oid.hi != entry->oid.hi || obj->oid.lo != entry->oid.lo) { + daos_obj_close(*parent_oh, NULL); + *parent_oh = DAOS_HDL_INVAL; + return ENOENT; + } + + /* + * Check if the entry has become a hardlink (another DFS instance + * may have converted it). If so, update obj->mode and retry. + */ + if (unlikely(dfs_is_hardlink(entry->mode))) { + daos_obj_close(*parent_oh, NULL); + *parent_oh = DAOS_HDL_INVAL; + obj->mode = entry->mode; + goto retry; + } + } + + return 0; +} + +/** + * Punch the dkey entry from HLM object. + */ +static int +hlm_punch_entry(dfs_t *dfs, daos_handle_t th, daos_obj_id_t *oid) +{ + daos_key_t dkey; + int rc; + + d_iov_set(&dkey, oid, sizeof(daos_obj_id_t)); + rc = daos_obj_punch_dkeys(dfs->hlm_oh, th, 0, 1, &dkey, NULL); if (rc) { - daos_obj_close(oh, NULL); + D_ERROR("Failed to punch HLM entry " DF_RC "\n", DP_RC(rc)); return daos_der2errno(rc); } - rc = daos_obj_close(oh, NULL); + return 0; +} + +/** + * Update (increment or decrement) the reference count in HLM for a hardlinked file. + * Updates entry with ref_cnt, ctime, and ctime_nano that are written to HLM. + * \param delta Positive to increment, negative to decrement. + */ +int +hlm_update_ref_cnt(dfs_t *dfs, daos_handle_t th, struct dfs_entry *entry, int delta) +{ + daos_key_t dkey; + d_sg_list_t sgl; + d_iov_t sg_iovs[3]; + daos_iod_t iod; + daos_recx_t recxs[3]; + struct timespec now; + int rc; + + if (dfs == NULL || entry == NULL) + return EINVAL; + + /* Protect against underflow */ + if (delta < 0 && entry->ref_cnt < (uint64_t)(-delta)) { + D_ERROR("ref_cnt underflow: ref_cnt=%lu, delta=%d\n", entry->ref_cnt, delta); + return EINVAL; + } + + /* Update ref_cnt */ + entry->ref_cnt += delta; + + /* Set ctime to current time */ + rc = clock_gettime(CLOCK_REALTIME, &now); if (rc) + return errno; + + entry->ctime = now.tv_sec; + entry->ctime_nano = now.tv_nsec; + + /* Update ref_cnt and ctime in HLM */ + d_iov_set(&dkey, &entry->oid, sizeof(daos_obj_id_t)); + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + iod.iod_nr = 3; + recxs[0].rx_idx = REF_CNT_IDX; + recxs[0].rx_nr = sizeof(uint64_t); + recxs[1].rx_idx = CTIME_IDX; + recxs[1].rx_nr = sizeof(uint64_t); + recxs[2].rx_idx = CTIME_NSEC_IDX; + recxs[2].rx_nr = sizeof(uint64_t); + iod.iod_recxs = recxs; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + d_iov_set(&sg_iovs[0], &entry->ref_cnt, sizeof(uint64_t)); + d_iov_set(&sg_iovs[1], &entry->ctime, sizeof(uint64_t)); + d_iov_set(&sg_iovs[2], &entry->ctime_nano, sizeof(uint64_t)); + sgl.sg_nr = 3; + sgl.sg_nr_out = 0; + sgl.sg_iovs = sg_iovs; + + rc = daos_obj_update(dfs->hlm_oh, th, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, NULL); + if (rc) { + D_ERROR("Failed to update ref_cnt in HLM " DF_RC "\n", DP_RC(rc)); return daos_der2errno(rc); + } -punch_entry: + return 0; +} + +int +remove_entry(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, const char *name, size_t len, + struct dfs_entry entry, bool *deleted) +{ + daos_key_t dkey; + daos_handle_t oh; + daos_handle_t local_th = th; + bool own_tx = false; + int rc; + + /* Assume file is deleted unless it's a hardlink with ref_cnt > 1 */ + if (deleted) + *deleted = true; + + /* + * For hardlinks, if no transaction was passed, create a local one to ensure + * consistency across multiple updates (dentry punch, HLM update, file punch). + */ + if (dfs_is_hardlink(entry.mode) && !daos_handle_is_valid(th)) { + rc = daos_tx_open(dfs->coh, &local_th, 0, NULL); + if (rc) { + D_ERROR("daos_tx_open() failed (%d)\n", rc); + return daos_der2errno(rc); + } + own_tx = true; + } + +restart: + /* First, punch the dentry from parent */ d_iov_set(&dkey, (void *)name, len); /** we only need a conditional dkey punch if we are not using a DTX */ - rc = - daos_obj_punch_dkeys(parent_oh, th, dfs->use_dtx ? 0 : DAOS_COND_PUNCH, 1, &dkey, NULL); - return daos_der2errno(rc); + rc = daos_obj_punch_dkeys(parent_oh, local_th, + daos_handle_is_valid(local_th) ? 0 : DAOS_COND_PUNCH, 1, &dkey, + NULL); + if (rc) { + rc = daos_der2errno(rc); + D_GOTO(out, rc); + } + + if (S_ISLNK(entry.mode)) + D_GOTO(commit, rc = 0); + + /* If hardlink bit is set, handle hardlink-specific logic */ + if (dfs_is_hardlink(entry.mode)) { + struct dfs_entry hlm_entry = {0}; + + /* Fetch entry from HLM to check ref_cnt */ + rc = hlm_fetch_entry(dfs->hlm_oh, local_th, &entry.oid, &hlm_entry); + if (rc) + D_GOTO(out, rc); + + /* If ref_cnt is 1, punch HLM entry and file object */ + if (hlm_entry.ref_cnt == 1) { + /* Punch the dkey entry from HLM */ + rc = hlm_punch_entry(dfs, local_th, &entry.oid); + if (rc) { + D_ERROR("Failed to punch HLM entry (%d)\n", rc); + D_GOTO(out, rc); + } + + /* Punch the file object */ + rc = daos_obj_open(dfs->coh, entry.oid, DAOS_OO_RW, &oh, NULL); + if (rc) { + rc = daos_der2errno(rc); + D_GOTO(out, rc); + } + + rc = daos_obj_punch(oh, local_th, 0, NULL); + daos_obj_close(oh, NULL); + if (rc) { + rc = daos_der2errno(rc); + D_GOTO(out, rc); + } + } else { + /* Decrement ref_cnt in HLM - file is NOT deleted */ + if (deleted) + *deleted = false; + rc = hlm_update_ref_cnt(dfs, local_th, &hlm_entry, -1); + if (rc) { + D_ERROR("Failed to decrement ref_cnt in HLM (%d)\n", rc); + D_GOTO(out, rc); + } + } + D_GOTO(commit, rc = 0); + } + + /* Regular file (not hardlink) - punch the file object */ + rc = daos_obj_open(dfs->coh, entry.oid, DAOS_OO_RW, &oh, NULL); + if (rc) { + rc = daos_der2errno(rc); + D_GOTO(out, rc); + } + + rc = daos_obj_punch(oh, local_th, 0, NULL); + daos_obj_close(oh, NULL); + if (rc) { + rc = daos_der2errno(rc); + D_GOTO(out, rc); + } + +commit: + if (own_tx) { + rc = daos_tx_commit(local_th, NULL); + if (rc) { + if (rc != -DER_TX_RESTART) + D_ERROR("daos_tx_commit() failed (%d)\n", rc); + rc = daos_der2errno(rc); + D_GOTO(out, rc); + } + } + +out: + if (own_tx && daos_handle_is_valid(local_th)) { + if (rc == ERESTART) { + int rc2 = daos_tx_restart(local_th, NULL); + if (rc2 == 0) + goto restart; + rc = daos_der2errno(rc2); + } + daos_tx_close(local_th, NULL); + } + return rc; } int @@ -506,17 +820,39 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz memset(stbuf, 0, sizeof(struct stat)); - /** Check if parent has the entry. */ - rc = fetch_entry(dfs->layout_v, oh, th, name, len, false, &exists, &entry, 0, NULL, NULL, - NULL); - if (rc) - return rc; + /* If obj is provided and already known to be a hardlink, fetch directly from HLM */ + if (obj && dfs_is_hardlink(obj->mode)) { + rc = hlm_fetch_entry(dfs->hlm_oh, th, &obj->oid, &entry); + if (rc) { + D_ERROR("Failed to fetch entry '%s' oid=" DF_OID " from HLM (%d)\n", name, + DP_OID(obj->oid), rc); + return rc; + } + } else { + /** Check if parent has the entry. */ + rc = fetch_entry(dfs->layout_v, oh, th, name, len, false, &exists, &entry, 0, NULL, + NULL, NULL); + if (rc) + return rc; - if (!exists) - return ENOENT; + if (!exists) + return ENOENT; - if (obj && (obj->oid.hi != entry.oid.hi || obj->oid.lo != entry.oid.lo)) - return ENOENT; + if (obj && (obj->oid.hi != entry.oid.hi || obj->oid.lo != entry.oid.lo)) + return ENOENT; + + /* If dentry indicates hardlink, update obj->mode and retry from HLM */ + if (dfs_is_hardlink(entry.mode)) { + if (obj) + obj->mode = entry.mode; + rc = hlm_fetch_entry(dfs->hlm_oh, th, &entry.oid, &entry); + if (rc) { + D_ERROR("Failed to fetch entry '%s' oid=" DF_OID " from HLM (%d)\n", + name, DP_OID(entry.oid), rc); + return rc; + } + } + } switch (entry.mode & S_IFMT) { case S_IFDIR: { @@ -532,7 +868,8 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz return daos_der2errno(rc); } - rc = daos_obj_query_max_epoch(dir_oh, th, &ep, NULL); + /** Use DAOS_TX_NONE - query doesn't need to be part of DTX */ + rc = daos_obj_query_max_epoch(dir_oh, DAOS_TX_NONE, &ep, NULL); if (rc) { daos_obj_close(dir_oh, NULL); return daos_der2errno(rc); @@ -614,9 +951,9 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz return EINVAL; } - stbuf->st_nlink = 1; + stbuf->st_nlink = entry.ref_cnt; stbuf->st_size = size; - stbuf->st_mode = entry.mode; + stbuf->st_mode = entry.mode & ~MODE_HARDLINK_BIT; stbuf->st_uid = entry.uid; stbuf->st_gid = entry.gid; if (tspec_gt(stbuf->st_ctim, stbuf->st_mtim)) { @@ -1000,3 +1337,321 @@ dfs_suggest_oclass(dfs_t *dfs, const char *hint, daos_oclass_id_t *cid) D_FREE(local); return rc; } + +/** + * Copy extended attributes from src dentry to HLM object. + * The src uses name as dkey, but dst uses the OID directly as binary dkey. + */ +int +hlm_copy_xattr(daos_handle_t src_oh, const char *src_name, daos_handle_t hlm_oh, + daos_obj_id_t *dst_oid, daos_handle_t th) +{ + daos_key_t src_dkey, dst_dkey; + daos_anchor_t anchor = {0}; + d_sg_list_t sgl, fsgl; + d_iov_t iov, fiov; + daos_iod_t iod; + void *val_buf; + char enum_buf[ENUM_XDESC_BUF]; + daos_key_desc_t kds[ENUM_DESC_NR]; + int rc = 0; + + /* Set dkey for src entry name */ + d_iov_set(&src_dkey, (void *)src_name, strlen(src_name)); + + /* Set dkey for dst as binary OID */ + d_iov_set(&dst_dkey, dst_oid, sizeof(daos_obj_id_t)); + + /* Set IOD descriptor for fetching every xattr */ + iod.iod_nr = 1; + iod.iod_recxs = NULL; + iod.iod_type = DAOS_IOD_SINGLE; + iod.iod_size = DFS_MAX_XATTR_LEN; + + /* Set sgl for fetch - use a preallocated buf to avoid a roundtrip */ + D_ALLOC(val_buf, DFS_MAX_XATTR_LEN); + if (val_buf == NULL) + return ENOMEM; + fsgl.sg_nr = 1; + fsgl.sg_nr_out = 0; + fsgl.sg_iovs = &fiov; + + /* Set sgl for akey_list */ + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + d_iov_set(&iov, enum_buf, ENUM_XDESC_BUF); + sgl.sg_iovs = &iov; + + /* Iterate over every akey to look for xattrs */ + while (!daos_anchor_is_eof(&anchor)) { + uint32_t number = ENUM_DESC_NR; + uint32_t i; + char *ptr; + + memset(enum_buf, 0, ENUM_XDESC_BUF); + rc = daos_obj_list_akey(src_oh, th, &src_dkey, &number, kds, &sgl, &anchor, NULL); + if (rc) { + D_ERROR("daos_obj_list_akey() failed (%d)\n", rc); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + if (number == 0) + continue; + + /* + * For every entry enumerated, check if it's an xattr, and + * insert it in the HLM entry. + */ + for (ptr = enum_buf, i = 0; i < number; i++) { + /* If not an xattr, go to next entry */ + if (strncmp("x:", ptr, 2) != 0) { + ptr += kds[i].kd_key_len; + continue; + } + + /* Set akey as the xattr name */ + d_iov_set(&iod.iod_name, ptr, kds[i].kd_key_len); + d_iov_set(&fiov, val_buf, DFS_MAX_XATTR_LEN); + + /* Fetch the xattr value from the src */ + rc = daos_obj_fetch(src_oh, th, 0, &src_dkey, 1, &iod, &fsgl, NULL, NULL); + if (rc) { + D_ERROR("daos_obj_fetch() failed (%d)\n", rc); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + d_iov_set(&fiov, val_buf, iod.iod_size); + + /* Add it to the HLM destination */ + rc = daos_obj_update(hlm_oh, th, 0, &dst_dkey, 1, &iod, &fsgl, NULL); + if (rc) { + D_ERROR("daos_obj_update() failed (%d)\n", rc); + D_GOTO(out, rc = daos_der2errno(rc)); + } + ptr += kds[i].kd_key_len; + } + } + +out: + D_FREE(val_buf); + return rc; +} + +/** + * Copy the dentry and extended attributes to the HLM object. + * The dkey in HLM is the OID of the file object. + * Updates entry with ctime, ctime_nano, and ref_cnt that are written to HLM. + */ +int +hlm_copy_entry(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, const char *name, + struct dfs_entry *entry) +{ + daos_key_t dkey; + d_sg_list_t sgl; + d_iov_t sg_iovs[HLM_INODE_AKEYS]; + daos_iod_t iod; + daos_recx_t recx; + unsigned int i; + struct timespec now; + int rc; + + /* Set ctime to current time */ + rc = clock_gettime(CLOCK_REALTIME, &now); + if (rc) + return errno; + + /* Update entry with values being written to HLM */ + entry->ctime = now.tv_sec; + entry->ctime_nano = now.tv_nsec; + entry->ref_cnt = 2; + entry->mode |= MODE_HARDLINK_BIT; + + /* Set dkey as the file's OID in HLM object */ + d_iov_set(&dkey, &entry->oid, sizeof(daos_obj_id_t)); + + /* Set up iod for the inode akey */ + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + iod.iod_nr = 1; + recx.rx_idx = 0; + recx.rx_nr = END_HLM_IDX; + iod.iod_recxs = &recx; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + /* Populate sg_iovs with entry fields */ + i = 0; + d_iov_set(&sg_iovs[i++], &entry->mode, sizeof(mode_t)); + d_iov_set(&sg_iovs[i++], &entry->oid, sizeof(daos_obj_id_t)); + d_iov_set(&sg_iovs[i++], &entry->mtime, sizeof(uint64_t)); + d_iov_set(&sg_iovs[i++], &entry->ctime, sizeof(uint64_t)); + d_iov_set(&sg_iovs[i++], &entry->chunk_size, sizeof(daos_size_t)); + d_iov_set(&sg_iovs[i++], &entry->oclass, sizeof(daos_oclass_id_t)); + d_iov_set(&sg_iovs[i++], &entry->mtime_nano, sizeof(uint64_t)); + d_iov_set(&sg_iovs[i++], &entry->ctime_nano, sizeof(uint64_t)); + d_iov_set(&sg_iovs[i++], &entry->uid, sizeof(uid_t)); + d_iov_set(&sg_iovs[i++], &entry->gid, sizeof(gid_t)); + d_iov_set(&sg_iovs[i++], &entry->value_len, sizeof(daos_size_t)); + d_iov_set(&sg_iovs[i++], &entry->obj_hlc, sizeof(uint64_t)); + /* ref_cnt is 2 because we have the original entry and the new hardlink */ + d_iov_set(&sg_iovs[i++], &entry->ref_cnt, sizeof(uint64_t)); + + sgl.sg_nr = i; + sgl.sg_nr_out = 0; + sgl.sg_iovs = sg_iovs; + + /* Insert dentry into HLM object */ + rc = daos_obj_update(dfs->hlm_oh, th, DAOS_COND_DKEY_INSERT, &dkey, 1, &iod, &sgl, NULL); + if (rc) { + D_ERROR("Failed to insert entry into HLM " DF_RC "\n", DP_RC(rc)); + return daos_der2errno(rc); + } + + /* Copy extended attributes from parent dentry to HLM */ + rc = hlm_copy_xattr(parent_oh, name, dfs->hlm_oh, &entry->oid, th); + if (rc) { + D_ERROR("Failed to copy xattrs to HLM (%d)\n", rc); + return rc; + } + + return 0; +} + +/** + * Remove extended attributes from a dentry (not from HLM). + */ +int +remove_xattrs_from_entry(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, const char *name) +{ + daos_key_t dkey; + daos_key_t akey; + daos_anchor_t anchor = {0}; + d_sg_list_t sgl; + d_iov_t iov; + char enum_buf[ENUM_XDESC_BUF]; + daos_key_desc_t kds[ENUM_DESC_NR]; + int rc = 0; + + d_iov_set(&dkey, (void *)name, strlen(name)); + + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + d_iov_set(&iov, enum_buf, ENUM_XDESC_BUF); + sgl.sg_iovs = &iov; + + while (!daos_anchor_is_eof(&anchor)) { + uint32_t number = ENUM_DESC_NR; + uint32_t i; + char *ptr; + + memset(enum_buf, 0, ENUM_XDESC_BUF); + rc = daos_obj_list_akey(parent_oh, th, &dkey, &number, kds, &sgl, &anchor, NULL); + if (rc) { + D_ERROR("daos_obj_list_akey() failed (%d)\n", rc); + return daos_der2errno(rc); + } + + if (number == 0) + continue; + + for (ptr = enum_buf, i = 0; i < number; i++) { + /* Only remove xattrs (prefixed with "x:") */ + if (strncmp("x:", ptr, 2) != 0) { + ptr += kds[i].kd_key_len; + continue; + } + + d_iov_set(&akey, ptr, kds[i].kd_key_len); + rc = daos_obj_punch_akeys(parent_oh, th, + DAOS_COND_DKEY_UPDATE | DAOS_COND_PUNCH, &dkey, 1, + &akey, NULL); + if (rc) { + D_ERROR("Failed to punch xattr akey (%d)\n", rc); + return daos_der2errno(rc); + } + ptr += kds[i].kd_key_len; + } + } + + return 0; +} + +/** + * Update the mode of an entry to set the hardlink bit. + */ +int +set_hardlink_bit(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, dfs_obj_t *obj, mode_t mode) +{ + daos_key_t dkey; + d_sg_list_t sgl; + d_iov_t sg_iov; + daos_iod_t iod; + daos_recx_t recx; + mode_t new_mode; + int rc; + + new_mode = mode | MODE_HARDLINK_BIT; + + d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + iod.iod_nr = 1; + recx.rx_idx = MODE_IDX; + recx.rx_nr = sizeof(mode_t); + iod.iod_recxs = &recx; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + d_iov_set(&sg_iov, &new_mode, sizeof(mode_t)); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &sg_iov; + + rc = daos_obj_update(parent_oh, th, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, NULL); + if (rc) { + D_ERROR("Failed to set hardlink bit " DF_RC "\n", DP_RC(rc)); + return daos_der2errno(rc); + } + obj->mode = new_mode; + + return 0; +} + +/** + * Update the mode of an entry to clear the hardlink bit. + */ +int +clear_hardlink_bit(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, dfs_obj_t *obj, + mode_t mode) +{ + daos_key_t dkey; + d_sg_list_t sgl; + d_iov_t sg_iov; + daos_iod_t iod; + daos_recx_t recx; + mode_t new_mode; + int rc; + + new_mode = mode & ~MODE_HARDLINK_BIT; + + d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + iod.iod_nr = 1; + recx.rx_idx = MODE_IDX; + recx.rx_nr = sizeof(mode_t); + iod.iod_recxs = &recx; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + d_iov_set(&sg_iov, &new_mode, sizeof(mode_t)); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &sg_iov; + + rc = daos_obj_update(parent_oh, th, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, NULL); + if (rc) { + D_ERROR("Failed to clear hardlink bit " DF_RC "\n", DP_RC(rc)); + return daos_der2errno(rc); + } + obj->mode = new_mode; + + return 0; +} diff --git a/src/client/dfs/cont.c b/src/client/dfs/cont.c index b01d527c112..3a0567b4a00 100644 --- a/src/client/dfs/cont.c +++ b/src/client/dfs/cont.c @@ -288,8 +288,18 @@ dfs_cont_create(daos_handle_t poh, uuid_t *cuuid, dfs_attr_t *attr, daos_handle_ D_GOTO(err_prop, rc = daos_der2errno(rc)); } - /* store SB & root OIDs as container property */ - roots.cr_oids[2] = roots.cr_oids[3] = DAOS_OBJ_NIL; + /* select oclass and generate HLM (hardlink metadata) OID */ + roots.cr_oids[2].lo = RESERVED_LO; + roots.cr_oids[2].hi = HLM_HI; + rc = daos_obj_generate_oid_by_rf(poh, rf, &roots.cr_oids[2], 0, dattr.da_dir_oclass_id, + dir_oclass_hint, 0, pa_domain); + if (rc) { + D_ERROR("Failed to generate HLM OID " DF_RC "\n", DP_RC(rc)); + D_GOTO(err_prop, rc = daos_der2errno(rc)); + } + + /* store SB, root & HLM OIDs as container property */ + roots.cr_oids[3] = DAOS_OBJ_NIL; if (roots_entry == NULL) { /* need to add roots prop to list */ @@ -460,16 +470,299 @@ dfs_cont_create_with_label(daos_handle_t poh, const char *label, dfs_attr_t *att #define DFS_ELAPSED_TIME 30 struct dfs_oit_args { - daos_handle_t oit; - uint64_t flags; - uint64_t snap_epoch; - uint64_t skipped; - uint64_t failed; - time_t start_time; - time_t print_time; - uint64_t num_scanned; + daos_handle_t oit; + uint64_t flags; + uint64_t snap_epoch; + uint64_t skipped; + uint64_t failed; + time_t start_time; + time_t print_time; + uint64_t num_scanned; + struct d_hash_table *hlm_hash; +}; + +/** Hash table entry to track HLM objects for hardlink verification */ +struct hlm_check_entry { + daos_obj_id_t hce_oid; /**< Object ID (key) */ + uint64_t hce_stored_linkcnt; /**< Link count stored in HLM (ref_cnt) */ + uint64_t hce_cur_linkcnt; /**< Current link count from traversal */ + d_list_t hce_link; /**< Hash table link */ +}; + +static inline struct hlm_check_entry * +hlm_check_obj(d_list_t *rlink) +{ + return container_of(rlink, struct hlm_check_entry, hce_link); +} + +static bool +hlm_check_key_cmp(struct d_hash_table *htable, d_list_t *rlink, const void *key, unsigned int ksize) +{ + struct hlm_check_entry *entry = hlm_check_obj(rlink); + + D_ASSERT(ksize == sizeof(daos_obj_id_t)); + return daos_oid_cmp(entry->hce_oid, *(daos_obj_id_t *)key) == 0; +} + +static uint32_t +hlm_check_rec_hash(struct d_hash_table *htable, d_list_t *rlink) +{ + struct hlm_check_entry *entry = hlm_check_obj(rlink); + + return d_hash_string_u32((const char *)&entry->hce_oid, sizeof(daos_obj_id_t)); +} + +static void +hlm_check_rec_free(struct d_hash_table *htable, d_list_t *rlink) +{ + struct hlm_check_entry *entry = hlm_check_obj(rlink); + + D_FREE(entry); +} + +static d_hash_table_ops_t hlm_check_hash_ops = { + .hop_key_cmp = hlm_check_key_cmp, + .hop_rec_hash = hlm_check_rec_hash, + .hop_rec_free = hlm_check_rec_free, }; +#define HLM_CHECK_HASH_BITS 10 + +/** + * Iterate through HLM object and populate the hash table with all entries. + * Each entry in HLM has the OID as dkey and stores ref_cnt among other fields. + */ +static int +hlm_populate_check_hash(dfs_t *dfs, struct d_hash_table *hlm_hash, uint64_t flags) +{ + daos_anchor_t anchor = {0}; + daos_key_desc_t *kds = NULL; + char *enum_buf = NULL; + d_sg_list_t sgl; + d_iov_t iov; + int rc = 0; + + /* If HLM is not enabled for this container, nothing to do */ + if (daos_obj_id_is_nil(dfs->hlm_oid)) + return 0; + + D_ALLOC_ARRAY(kds, DFS_ITER_NR); + if (kds == NULL) + return ENOMEM; + + D_ALLOC_ARRAY(enum_buf, DFS_ITER_NR * sizeof(daos_obj_id_t)); + if (enum_buf == NULL) { + D_FREE(kds); + return ENOMEM; + } + + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + d_iov_set(&iov, enum_buf, DFS_ITER_NR * sizeof(daos_obj_id_t)); + sgl.sg_iovs = &iov; + + while (!daos_anchor_is_eof(&anchor)) { + uint32_t nr = DFS_ITER_NR; + uint32_t i; + char *ptr; + + rc = daos_obj_list_dkey(dfs->hlm_oh, DAOS_TX_NONE, &nr, kds, &sgl, &anchor, NULL); + if (rc) { + D_ERROR("daos_obj_list_dkey() on HLM failed " DF_RC "\n", DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + if (nr == 0) + continue; + + ptr = enum_buf; + for (i = 0; i < nr; i++) { + struct hlm_check_entry *entry; + daos_obj_id_t *oid_ptr; + struct dfs_entry hlm_entry = {0}; + + if (kds[i].kd_key_len != sizeof(daos_obj_id_t)) { + D_ERROR("Unexpected dkey size in HLM: %lu\n", + (unsigned long)kds[i].kd_key_len); + D_GOTO(out, rc = EINVAL); + } + + oid_ptr = (daos_obj_id_t *)ptr; + + /* Fetch the ref_cnt from HLM for this OID */ + rc = hlm_fetch_entry(dfs->hlm_oh, DAOS_TX_NONE, oid_ptr, &hlm_entry); + if (rc) { + D_ERROR("Failed to fetch HLM entry for " DF_OID ": %d\n", + DP_OID(*oid_ptr), rc); + D_GOTO(out, rc); + } + + /** + * Only regular files (array type) should be in HLM. If we find a + * non-array object, this is corruption - punch the HLM entry. + */ + if (!daos_is_array_type(daos_obj_id2type(*oid_ptr))) { + D_ERROR("Invalid object type in HLM. OID: " DF_OID ", type: %d\n", + DP_OID(*oid_ptr), daos_obj_id2type(*oid_ptr)); + if (flags & (DFS_CHECK_REMOVE | DFS_CHECK_RELINK)) { + daos_key_t dkey; + + D_PRINT("Removing invalid object from HLM entry\n"); + d_iov_set(&dkey, oid_ptr, sizeof(daos_obj_id_t)); + rc = daos_obj_punch_dkeys(dfs->hlm_oh, DAOS_TX_NONE, 0, 1, + &dkey, NULL); + if (rc) { + D_ERROR("Failed to punch invalid HLM entry " DF_OID + " " DF_RC "\n", + DP_OID(*oid_ptr), DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + } + ptr += kds[i].kd_key_len; + continue; + } + + D_ALLOC_PTR(entry); + if (entry == NULL) + D_GOTO(out, rc = ENOMEM); + + oid_cp(&entry->hce_oid, *oid_ptr); + entry->hce_stored_linkcnt = hlm_entry.ref_cnt; + entry->hce_cur_linkcnt = 0; + + rc = d_hash_rec_insert(hlm_hash, &entry->hce_oid, sizeof(daos_obj_id_t), + &entry->hce_link, true); + if (rc) { + D_ERROR("Failed to insert HLM entry into hash " DF_RC "\n", + DP_RC(rc)); + D_FREE(entry); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + ptr += kds[i].kd_key_len; + } + } + +out: + D_FREE(kds); + D_FREE(enum_buf); + return rc; +} + +/** Context for HLM link count verification traverse callback */ +struct hlm_verify_arg { + dfs_t *dfs; + uint64_t flags; + uint64_t mismatches; + uint64_t fixed; +}; + +/** + * Callback for traversing HLM hash table to verify and fix link counts. + * If hce_cur_linkcnt != hce_stored_linkcnt, update the HLM entry. + */ +static int +hlm_verify_linkcnt_cb(d_list_t *link, void *arg) +{ + struct hlm_verify_arg *varg = (struct hlm_verify_arg *)arg; + struct hlm_check_entry *hce = hlm_check_obj(link); + dfs_t *dfs = varg->dfs; + daos_key_t dkey; + d_sg_list_t sgl; + d_iov_t sg_iovs[3]; + daos_iod_t iod; + daos_recx_t recxs[3]; + struct timespec now; + int rc; + + /** + * No directory entries point to this OID - it's an orphan in HLM. + * This could happen if: + * 1. Object exists but unmarked - handled by Pass 1 (REMOVE) or Pass 2 (RELINK) + * 2. Object doesn't exist - HLM entry is garbage, punch it + * 3. HLM entry has ref_cnt=0 stored (invalid state) + * + * We punch the HLM entry here if repair flags are set. If the object + * exists and gets relinked in Pass 2, the HLM entry will be recreated. + */ + if (hce->hce_cur_linkcnt == 0) { + D_PRINT("HLM entry " DF_OID " has no directory entries (stored=%lu, cur=%lu)\n", + DP_OID(hce->hce_oid), (unsigned long)hce->hce_stored_linkcnt, + (unsigned long)hce->hce_cur_linkcnt); + + varg->mismatches++; + if (varg->flags & (DFS_CHECK_REMOVE | DFS_CHECK_RELINK)) { + d_iov_set(&dkey, &hce->hce_oid, sizeof(daos_obj_id_t)); + rc = daos_obj_punch_dkeys(dfs->hlm_oh, DAOS_TX_NONE, 0, 1, &dkey, NULL); + if (rc && rc != -DER_NONEXIST) { + D_ERROR("Failed to punch orphan HLM entry " DF_OID " " DF_RC "\n", + DP_OID(hce->hce_oid), DP_RC(rc)); + return daos_der2errno(rc); + } + varg->fixed++; + } + return 0; + } + + /** No mismatch - counts match */ + if (hce->hce_cur_linkcnt == hce->hce_stored_linkcnt) + return 0; + + varg->mismatches++; + + /** + * Link count mismatch detected. The stored link count in HLM doesn't + * match the actual number of directory entries pointing to this OID. + * Update HLM with the correct (current) link count. + */ + D_PRINT("HLM entry " DF_OID " link count mismatch (stored=%lu, cur=%lu)\n", + DP_OID(hce->hce_oid), (unsigned long)hce->hce_stored_linkcnt, + (unsigned long)hce->hce_cur_linkcnt); + + /** Only fix if repair flags are set */ + if (!(varg->flags & (DFS_CHECK_REMOVE | DFS_CHECK_RELINK))) + return 0; + + /* Get current time for ctime update */ + rc = clock_gettime(CLOCK_REALTIME, &now); + if (rc) { + D_ERROR("clock_gettime() failed: %d (%s)\n", errno, strerror(errno)); + return errno; + } + + /* Update ref_cnt and ctime in HLM */ + d_iov_set(&dkey, &hce->hce_oid, sizeof(daos_obj_id_t)); + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); + iod.iod_nr = 3; + recxs[0].rx_idx = REF_CNT_IDX; + recxs[0].rx_nr = sizeof(uint64_t); + recxs[1].rx_idx = CTIME_IDX; + recxs[1].rx_nr = sizeof(uint64_t); + recxs[2].rx_idx = CTIME_NSEC_IDX; + recxs[2].rx_nr = sizeof(uint64_t); + iod.iod_recxs = recxs; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + d_iov_set(&sg_iovs[0], &hce->hce_cur_linkcnt, sizeof(uint64_t)); + d_iov_set(&sg_iovs[1], &now.tv_sec, sizeof(uint64_t)); + d_iov_set(&sg_iovs[2], &now.tv_nsec, sizeof(uint64_t)); + sgl.sg_nr = 3; + sgl.sg_nr_out = 0; + sgl.sg_iovs = sg_iovs; + + rc = daos_obj_update(dfs->hlm_oh, DAOS_TX_NONE, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, + NULL); + if (rc) { + D_ERROR("Failed to update HLM ref_cnt for " DF_OID " " DF_RC "\n", + DP_OID(hce->hce_oid), DP_RC(rc)); + return daos_der2errno(rc); + } + + varg->fixed++; + return 0; +} + static int fetch_mark_oids(daos_handle_t coh, daos_obj_id_t oid, daos_key_desc_t *kds, char *enum_buf, struct dfs_oit_args *args) @@ -564,7 +857,9 @@ oit_mark_cb(dfs_t *dfs, dfs_obj_t *parent, const char name[], void *args) d_iov_t marker; bool mark_data = true; struct timespec current_time; + d_list_t *rlink; int rc; + int rc2; rc = clock_gettime(CLOCK_REALTIME, ¤t_time); if (rc) @@ -578,7 +873,7 @@ oit_mark_cb(dfs_t *dfs, dfs_obj_t *parent, const char name[], void *args) } /** open the entry name and get the oid */ - rc = dfs_lookup_rel(dfs, parent, name, O_RDONLY | O_NOFOLLOW, &obj, NULL, NULL); + rc = dfs_lookup_rel(dfs, parent, name, O_RDWR | O_NOFOLLOW, &obj, NULL, NULL); if (rc) { D_ERROR("dfs_lookup_rel() of %s failed: %d\n", name, rc); return rc; @@ -588,6 +883,78 @@ oit_mark_cb(dfs_t *dfs, dfs_obj_t *parent, const char name[], void *args) if (rc) D_GOTO(out_obj, rc); + /** Check if the OID exists in HLM hash table */ + if (oit_args->hlm_hash != NULL) { + rlink = d_hash_rec_find(oit_args->hlm_hash, &oid, sizeof(daos_obj_id_t)); + if (rlink != NULL) { + struct hlm_check_entry *hce = hlm_check_obj(rlink); + + /** Increment current link count */ + hce->hce_cur_linkcnt++; + + /** + * File found in HLM but dentry missing hardlink bit. + * Note: Only array-type objects are added to hlm_hash + * (directories/symlinks are filtered in hlm_populate_check_hash). + */ + if (!dfs_is_hardlink(obj->mode)) { + D_PRINT("OID " DF_OID " (parent " DF_OID ", name '%s') in HLM but " + "dentry missing hardlink bit\n", + DP_OID(oid), DP_OID(parent->oid), name); + if (oit_args->flags & (DFS_CHECK_REMOVE | DFS_CHECK_RELINK)) { + rc = set_hardlink_bit(dfs, DAOS_TX_NONE, parent->oh, obj, + obj->mode); + if (rc) { + D_ERROR("Failed to set hardlink bit for " DF_OID + ": %d\n", + DP_OID(oid), rc); + D_GOTO(out_obj, rc); + } + } + } + } else if (dfs_is_hardlink(obj->mode)) { + /** + * Dentry has hardlink bit set but no HLM entry exists. + * This is corruption - the file claims to be a hardlink + * but has no metadata in HLM to support it. Clear the bit + * so the file becomes accessible as a regular file. + */ + D_PRINT("OID " DF_OID " (parent " DF_OID ", name '%s') has hardlink bit " + "set but no HLM entry exists\n", + DP_OID(oid), DP_OID(parent->oid), name); + if (oit_args->flags & (DFS_CHECK_REMOVE | DFS_CHECK_RELINK)) { + rc = clear_hardlink_bit(dfs, DAOS_TX_NONE, parent->oh, obj, + obj->mode); + if (rc) { + D_ERROR("Failed to clear hardlink bit for " DF_OID ": %d\n", + DP_OID(oid), rc); + D_GOTO(out_obj, rc); + } + } + } + } + + /** + * Directories and symlinks should never have the hardlink bit set. + * This is corruption - clear the bit. + * Note: We check MODE_HARDLINK_BIT directly instead of dfs_is_hardlink() + * because dfs_is_hardlink() requires S_ISREG() which is false here. + */ + if ((S_ISDIR(obj->mode) || S_ISLNK(obj->mode)) && (obj->mode & MODE_HARDLINK_BIT)) { + D_WARN("OID " DF_OID " (parent " DF_OID ", name '%s') is %s but has " + "hardlink bit set - needs clearing\n", + DP_OID(oid), DP_OID(parent->oid), name, + S_ISDIR(obj->mode) ? "directory" : "symlink"); + if (oit_args->flags & (DFS_CHECK_REMOVE | DFS_CHECK_RELINK)) { + rc = clear_hardlink_bit(dfs, DAOS_TX_NONE, parent->oh, obj, obj->mode); + if (rc) { + D_ERROR("Failed to clear hardlink bit for " DF_OID ": %d\n", + DP_OID(oid), rc); + D_GOTO(out_obj, rc); + } + } + } + if (oit_args->flags & DFS_CHECK_VERIFY) { rc = daos_obj_verify(dfs->coh, oid, oit_args->snap_epoch); if (rc == -DER_NOSYS) { @@ -613,6 +980,7 @@ oit_mark_cb(dfs_t *dfs, dfs_obj_t *parent, const char name[], void *args) D_ERROR("Failed to mark OID in OIT: " DF_RC "\n", DP_RC(rc)); D_GOTO(out_obj, rc = daos_der2errno(rc)); } + rc = 0; /** descend into directories */ if (S_ISDIR(obj->mode)) { @@ -631,8 +999,8 @@ oit_mark_cb(dfs_t *dfs, dfs_obj_t *parent, const char name[], void *args) } out_obj: - rc = dfs_release(obj); - return rc; + rc2 = dfs_release(obj); + return rc ? rc : rc2; } static int @@ -719,6 +1087,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * char *dkey_enum_buf = NULL; char *entry_enum_buf = NULL; uint64_t unmarked_entries = 0; + struct d_hash_table *hlm_hash = NULL; d_iov_t marker; bool mark_data = true; daos_epoch_range_t epr; @@ -796,6 +1165,12 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_ERROR("Failed to mark ROOT OID in OIT: " DF_RC "\n", DP_RC(rc)); D_GOTO(out_oit, rc = daos_der2errno(rc)); } + /** mark the HLM object as reachable */ + rc = daos_oit_mark(oit_args->oit, dfs->hlm_oid, &marker, NULL); + if (rc && rc != -DER_NONEXIST) { + D_ERROR("Failed to mark HLM OID in OIT: " DF_RC "\n", DP_RC(rc)); + D_GOTO(out_oit, rc = daos_der2errno(rc)); + } rc = 0; if (flags & DFS_CHECK_VERIFY) { @@ -824,8 +1199,42 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_ERROR("daos_obj_verify() failed " DF_RC "\n", DP_RC(rc)); D_GOTO(out_oit, rc = daos_der2errno(rc)); } + + if (!daos_obj_id_is_nil(dfs->hlm_oid)) { + rc = daos_obj_verify(coh, dfs->hlm_oid, snap_epoch); + if (rc == -DER_NOSYS) { + oit_args->skipped++; + } else if (rc == -DER_MISMATCH) { + oit_args->failed++; + if (flags & DFS_CHECK_PRINT) + D_PRINT("HLM Object " DF_OID + " failed data consistency check!\n", + DP_OID(dfs->hlm_oid)); + } else if (rc) { + D_ERROR("daos_obj_verify() failed " DF_RC "\n", DP_RC(rc)); + D_GOTO(out_oit, rc = daos_der2errno(rc)); + } + } + } + + /** Create and populate hash table with HLM entries for hardlink verification */ + rc = d_hash_table_create(D_HASH_FT_NOLOCK, HLM_CHECK_HASH_BITS, NULL, &hlm_check_hash_ops, + &hlm_hash); + if (rc) { + D_ERROR("Failed to create HLM check hash table " DF_RC "\n", DP_RC(rc)); + D_GOTO(out_oit, rc = daos_der2errno(rc)); + } + + D_PRINT("DFS checker: Populating HLM hash table for hardlink verification\n"); + rc = hlm_populate_check_hash(dfs, hlm_hash, flags); + if (rc) { + D_ERROR("Failed to populate HLM check hash table: %d\n", rc); + D_GOTO(out_hlm_hash, rc); } + /** Set the hash table in oit_args so oit_mark_cb can access it */ + oit_args->hlm_hash = hlm_hash; + D_PRINT("DFS checker: Iterating namespace and marking objects\n"); oit_args->num_scanned = 2; /** iterate through the namespace and mark OITs starting from the root object */ @@ -834,7 +1243,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * oit_mark_cb, oit_args); if (rc) { D_ERROR("dfs_iterate() failed: %d\n", rc); - D_GOTO(out_oit, rc); + D_GOTO(out_hlm_hash, rc); } nr_entries = DFS_ITER_NR; @@ -842,7 +1251,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * rc = clock_gettime(CLOCK_REALTIME, ¤t_time); if (rc) - D_GOTO(out_oit, rc = errno); + D_GOTO(out_hlm_hash, rc = errno); D_PRINT("DFS checker: marked " DF_U64 " files/directories (runtime: " DF_U64 " sec))\n", oit_args->num_scanned, current_time.tv_sec - oit_args->start_time); @@ -852,7 +1261,7 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * &lf); if (rc) { D_ERROR("Failed to create/open lost+found directory: %d\n", rc); - D_GOTO(out_oit, rc); + D_GOTO(out_hlm_hash, rc); } if (name == NULL) { @@ -908,9 +1317,11 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_lf2, rc = daos_der2errno(rc)); } - clock_gettime(CLOCK_REALTIME, ¤t_time); - if (rc) + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) { + D_ERROR("clock_gettime() failed: %d (%s)\n", errno, strerror(errno)); D_GOTO(out_lf2, rc = errno); + } oit_args->num_scanned += nr_entries; if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { D_PRINT("DFS checker: Checked " DF_U64 " objects (runtime: " DF_U64 @@ -934,9 +1345,24 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * continue; } - if (flags & DFS_CHECK_PRINT) - D_PRINT("oid[" DF_U64 "]: " DF_OID "\n", unmarked_entries, - DP_OID(oids[i])); + if (flags & DFS_CHECK_PRINT) { + d_list_t *hlink = NULL; + + if (hlm_hash != NULL) + hlink = d_hash_rec_find(hlm_hash, &oids[i], + sizeof(daos_obj_id_t)); + if (hlink != NULL) { + struct hlm_check_entry *hce = hlm_check_obj(hlink); + + D_PRINT("oid[" DF_U64 "]: " DF_OID " (hardlink, " + "HLM ref_cnt=%lu)\n", + unmarked_entries, DP_OID(oids[i]), + (unsigned long)hce->hce_stored_linkcnt); + } else { + D_PRINT("oid[" DF_U64 "]: " DF_OID "\n", unmarked_entries, + DP_OID(oids[i])); + } + } if (flags & DFS_CHECK_VERIFY) { rc = daos_obj_verify(dfs->coh, oids[i], snap_epoch); @@ -950,26 +1376,42 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * DP_OID(oids[i])); } else if (rc) { D_ERROR("daos_obj_verify() failed " DF_RC "\n", DP_RC(rc)); - D_GOTO(out_oit, rc = daos_der2errno(rc)); + D_GOTO(out_hlm_hash, rc = daos_der2errno(rc)); } } if (flags & DFS_CHECK_REMOVE) { daos_handle_t oh; + daos_key_t hlm_dkey; rc = daos_obj_open(dfs->coh, oids[i], DAOS_OO_RW, &oh, NULL); if (rc) - D_GOTO(out_oit, rc = daos_der2errno(rc)); + D_GOTO(out_hlm_hash, rc = daos_der2errno(rc)); rc = daos_obj_punch(oh, DAOS_TX_NONE, 0, NULL); if (rc) { daos_obj_close(oh, NULL); - D_GOTO(out_oit, rc = daos_der2errno(rc)); + D_GOTO(out_hlm_hash, rc = daos_der2errno(rc)); } rc = daos_obj_close(oh, NULL); if (rc) - D_GOTO(out_oit, rc = daos_der2errno(rc)); + D_GOTO(out_hlm_hash, rc = daos_der2errno(rc)); + + /** Also punch any HLM entry for this OID */ + if (!daos_obj_id_is_nil(dfs->hlm_oid)) { + d_iov_set(&hlm_dkey, &oids[i], sizeof(daos_obj_id_t)); + rc = daos_obj_punch_dkeys(dfs->hlm_oh, DAOS_TX_NONE, 0, 1, + &hlm_dkey, NULL); + if (rc && rc != -DER_NONEXIST) { + D_ERROR("Failed to punch HLM entry " DF_RC "\n", + DP_RC(rc)); + D_GOTO(out_hlm_hash, rc = daos_der2errno(rc)); + } + } + + /** Remove from HLM check hash table if it exists */ + d_hash_rec_delete(hlm_hash, &oids[i], sizeof(daos_obj_id_t)); } unmarked_entries++; @@ -991,9 +1433,11 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * D_GOTO(out_lf2, rc = daos_der2errno(rc)); } - clock_gettime(CLOCK_REALTIME, ¤t_time); - if (rc) + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); + if (rc) { + D_ERROR("clock_gettime() failed: %d (%s)\n", errno, strerror(errno)); D_GOTO(out_lf2, rc = errno); + } oit_args->num_scanned += nr_entries; if (current_time.tv_sec - oit_args->print_time >= DFS_ELAPSED_TIME) { D_PRINT("DFS checker: Checked " DF_U64 " objects (runtime: " DF_U64 @@ -1007,9 +1451,24 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * enum daos_otype_t otype = daos_obj_id2type(oids[i]); char oid_name[DFS_MAX_NAME + 1]; - if (flags & DFS_CHECK_PRINT) - D_PRINT("oid[" DF_U64 "]: " DF_OID "\n", unmarked_entries, - DP_OID(oids[i])); + if (flags & DFS_CHECK_PRINT) { + d_list_t *hlink = NULL; + + if (hlm_hash != NULL) + hlink = d_hash_rec_find(hlm_hash, &oids[i], + sizeof(daos_obj_id_t)); + if (hlink != NULL) { + struct hlm_check_entry *hce = hlm_check_obj(hlink); + + D_PRINT("oid[" DF_U64 "]: " DF_OID " (hardlink, " + "HLM ref_cnt=%lu)\n", + unmarked_entries, DP_OID(oids[i]), + (unsigned long)hce->hce_stored_linkcnt); + } else { + D_PRINT("oid[" DF_U64 "]: " DF_OID "\n", unmarked_entries, + DP_OID(oids[i])); + } + } if (flags & DFS_CHECK_VERIFY) { rc = daos_obj_verify(dfs->coh, oids[i], snap_epoch); @@ -1038,6 +1497,55 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * entry.mtime_nano = entry.ctime_nano = now.tv_nsec; entry.chunk_size = dfs->attr.da_chunk_size; + /** + * Check if this OID has an HLM entry. If so, this is a hardlink + * that lost all its directory entries. Set the hardlink bit and + * update the HLM link count to 1. + */ + if (hlm_hash != NULL) { + d_list_t *hlink; + + hlink = d_hash_rec_find(hlm_hash, &oids[i], sizeof(daos_obj_id_t)); + if (hlink != NULL) { + struct hlm_check_entry *hce = hlm_check_obj(hlink); + struct dfs_entry hlm_entry; + int delta; + + /* Set hardlink bit in the new entry */ + entry.mode |= MODE_HARDLINK_BIT; + + /* Calculate delta to set link count to 1 */ + delta = 1 - (int)hce->hce_stored_linkcnt; + + /* Fetch HLM entry and update link count using delta */ + rc = hlm_fetch_entry(dfs->hlm_oh, DAOS_TX_NONE, &oids[i], + &hlm_entry); + if (rc) { + D_ERROR("Failed to fetch HLM entry for " DF_OID + ": %d\n", + DP_OID(oids[i]), rc); + D_GOTO(out_lf2, rc); + } + rc = hlm_update_ref_cnt(dfs, DAOS_TX_NONE, &hlm_entry, + delta); + if (rc) { + D_ERROR("Failed to update HLM ref_cnt for " DF_OID + ": %d\n", + DP_OID(oids[i]), rc); + D_GOTO(out_lf2, rc); + } + + /* Update hash entry to reflect new link count */ + hce->hce_cur_linkcnt = 1; + hce->hce_stored_linkcnt = 1; + + if (flags & DFS_CHECK_PRINT) + D_PRINT("Restoring hardlink " DF_OID + " with link count 1\n", + DP_OID(oids[i])); + } + } + /* * If this is a regular file / array object, the user might have used a * different chunk size than the default one. Since we lost the directory @@ -1075,6 +1583,25 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * } done: + /** Verify and fix HLM link counts */ + if (hlm_hash != NULL) { + struct hlm_verify_arg varg = {0}; + + D_PRINT("DFS checker: Verifying HLM link counts\n"); + varg.dfs = dfs; + varg.flags = flags; + rc = d_hash_table_traverse(hlm_hash, hlm_verify_linkcnt_cb, &varg); + if (rc) { + D_ERROR("HLM link count verification failed: %d\n", rc); + D_GOTO(out_lf2, rc); + } + if (varg.mismatches > 0) { + D_PRINT("DFS checker: Found " DF_U64 " HLM link count mismatches, " + "fixed " DF_U64 "\n", + varg.mismatches, varg.fixed); + } + } + rc = clock_gettime(CLOCK_REALTIME, ¤t_time); if (rc) D_GOTO(out_lf2, rc = errno); @@ -1103,6 +1630,12 @@ dfs_cont_check(daos_handle_t poh, const char *cont, uint64_t flags, const char * if (rc == 0) rc = rc2; } +out_hlm_hash: + if (hlm_hash != NULL) { + rc2 = d_hash_table_destroy(hlm_hash, true); + if (rc2) + D_ERROR("Failed to destroy HLM check hash table " DF_RC "\n", DP_RC(rc2)); + } out_oit: rc2 = daos_oit_close(oit_args->oit, NULL); if (rc == 0) @@ -1293,8 +1826,10 @@ dfs_relink_root(daos_handle_t coh) int dfs_obj_fix_type(dfs_t *dfs, dfs_obj_t *parent, const char *name) { - struct dfs_entry entry = {0}; + struct dfs_entry entry = {0}; + struct dfs_entry hlm_entry = {0}; bool exists; + bool is_hardlink = false; daos_key_t dkey; size_t len; enum daos_otype_t otype; @@ -1334,6 +1869,17 @@ dfs_obj_fix_type(dfs_t *dfs, dfs_obj_t *parent, const char *name) if (daos_is_array_type(otype)) { mode |= S_IFREG; D_PRINT("Setting entry type to S_IFREG\n"); + + /** Check if this is a hardlink by looking up the HLM */ + rc = hlm_fetch_entry(dfs->hlm_oh, DAOS_TX_NONE, &entry.oid, &hlm_entry); + if (rc == 0) { + mode |= MODE_HARDLINK_BIT; + is_hardlink = true; + D_PRINT("Entry is a hardlink, setting hardlink bit\n"); + } else if (rc != ENOENT) { + D_ERROR("Failed to fetch HLM entry (%d)\n", rc); + D_GOTO(out, rc); + } } else if (entry.value_len) { mode |= S_IFLNK; D_PRINT("Setting entry type to S_IFLNK\n"); @@ -1362,6 +1908,17 @@ dfs_obj_fix_type(dfs_t *dfs, dfs_obj_t *parent, const char *name) D_GOTO(out, rc = daos_der2errno(rc)); } + /** Update mode bits in HLM entry if this is a hardlink */ + if (is_hardlink) { + d_iov_set(&dkey, &entry.oid, sizeof(daos_obj_id_t)); + rc = daos_obj_update(dfs->hlm_oh, DAOS_TX_NONE, DAOS_COND_DKEY_UPDATE, &dkey, 1, + &iod, &sgl, NULL); + if (rc) { + D_ERROR("Failed to update HLM entry with new mode " DF_RC "\n", DP_RC(rc)); + D_GOTO(out, rc = daos_der2errno(rc)); + } + } + out: if (entry.value) D_FREE(entry.value); diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index 41be576c349..f1ea163d861 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -63,6 +64,8 @@ /** Number of A-keys for attributes in any object entry */ #define INODE_AKEYS 12 +/** Number of A-keys for HLM entries (includes ref_cnt) */ +#define HLM_INODE_AKEYS 13 #define INODE_AKEY_NAME "DFS_INODE" #define SLINK_AKEY_NAME "DFS_SLINK" #define MODE_IDX 0 @@ -78,6 +81,8 @@ #define SIZE_IDX (GID_IDX + sizeof(gid_t)) #define HLC_IDX (SIZE_IDX + sizeof(daos_size_t)) #define END_IDX (HLC_IDX + sizeof(uint64_t)) +#define REF_CNT_IDX END_IDX +#define END_HLM_IDX (REF_CNT_IDX + sizeof(uint64_t)) /* * END IDX for layout V2 (2.0) is at the current offset where we store the mtime nsec, but also need @@ -94,10 +99,21 @@ #define RESERVED_LO 0 #define SB_HI 0 #define ROOT_HI 1 +#define HLM_HI 2 /** DFS mode mask (3rd bit) */ #define MODE_MASK (1 << 2) +/** Hardlink mode bit - highest bit in mode_t to indicate file is a hardlink */ +#define MODE_HARDLINK_BIT (1U << 31) + +/** Check if a mode indicates this is a hardlinked regular file */ +static inline bool +dfs_is_hardlink(mode_t mode) +{ + return S_ISREG(mode) && (mode & MODE_HARDLINK_BIT); +} + /** Max recursion depth for symlinks */ #define DFS_MAX_RECURSION 40 @@ -175,6 +191,10 @@ struct dfs { daos_obj_id_t super_oid; /** Open object handle of SB */ daos_handle_t super_oh; + /** Hardlink metadata object OID */ + daos_obj_id_t hlm_oid; + /** Open object handle of HLM */ + daos_handle_t hlm_oh; /** Root object info */ dfs_obj_t root; /** DFS container attributes (Default chunk size, oclass, etc.) */ @@ -223,6 +243,8 @@ struct dfs_entry { gid_t gid; /** Sym Link value */ char *value; + /** Number of hardlinks to the same file object */ + uint64_t ref_cnt; }; /** enum for hash entry type */ @@ -411,8 +433,30 @@ fetch_entry(dfs_layout_ver_t ver, daos_handle_t oh, daos_handle_t th, const char bool fetch_sym, bool *exists, struct dfs_entry *entry, int xnr, char *xnames[], void *xvals[], daos_size_t *xsizes); int +hlm_fetch_entry(daos_handle_t hlm_oh, daos_handle_t th, daos_obj_id_t *oid, + struct dfs_entry *entry); +int +dfsobj_fetch_entry(dfs_t *dfs, daos_handle_t th, dfs_obj_t *obj, daos_handle_t *parent_oh, + struct dfs_entry *entry); +int +hlm_update_ref_cnt(dfs_t *dfs, daos_handle_t th, struct dfs_entry *entry, int delta); +int remove_entry(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, const char *name, size_t len, - struct dfs_entry entry); + struct dfs_entry entry, bool *deleted); +int +hlm_copy_xattr(daos_handle_t src_oh, const char *src_name, daos_handle_t hlm_oh, + daos_obj_id_t *dst_oid, daos_handle_t th); +int +hlm_copy_entry(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, const char *name, + struct dfs_entry *entry); +int +remove_xattrs_from_entry(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, const char *name); +int +set_hardlink_bit(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, dfs_obj_t *obj, + mode_t mode); +int +clear_hardlink_bit(dfs_t *dfs, daos_handle_t th, daos_handle_t parent_oh, dfs_obj_t *obj, + mode_t mode); int open_dir(dfs_t *dfs, dfs_obj_t *parent, int flags, daos_oclass_id_t cid, struct dfs_entry *entry, size_t len, dfs_obj_t *dir); diff --git a/src/client/dfs/dir.c b/src/client/dfs/dir.c index 188b79c917d..325ad310811 100644 --- a/src/client/dfs/dir.c +++ b/src/client/dfs/dir.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -123,7 +124,7 @@ remove_dir_contents(dfs_t *dfs, daos_handle_t th, struct dfs_entry entry) D_GOTO(out, rc); } - rc = remove_entry(dfs, th, oh, ptr, kds[i].kd_key_len, child_entry); + rc = remove_entry(dfs, th, oh, ptr, kds[i].kd_key_len, child_entry, NULL); if (rc) D_GOTO(out, rc); } @@ -135,7 +136,8 @@ remove_dir_contents(dfs_t *dfs, daos_handle_t th, struct dfs_entry entry) } int -dfs_remove(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj_id_t *oid) +dfs_remove_internal(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj_id_t *oid, + bool *deleted) { struct dfs_entry entry = {0}; daos_handle_t th = DAOS_TX_NONE; @@ -205,7 +207,7 @@ dfs_remove(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj } } - rc = remove_entry(dfs, th, parent->oh, name, len, entry); + rc = remove_entry(dfs, th, parent->oh, name, len, entry, deleted); if (rc) D_GOTO(out, rc); @@ -229,6 +231,12 @@ dfs_remove(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj return rc; } +int +dfs_remove(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj_id_t *oid) +{ + return dfs_remove_internal(dfs, parent, name, force, oid, NULL); +} + int dfs_obj_set_oclass(dfs_t *dfs, dfs_obj_t *obj, int flags, daos_oclass_id_t cid) { diff --git a/src/client/dfs/file.c b/src/client/dfs/file.c index 2e63310dd96..9e46f822bd2 100644 --- a/src/client/dfs/file.c +++ b/src/client/dfs/file.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -47,21 +48,15 @@ dfs_get_chunk_size(dfs_obj_t *obj, daos_size_t *chunk_size) static int set_chunk_size(dfs_t *dfs, dfs_obj_t *obj, daos_size_t csize) { - daos_handle_t oh; - d_sg_list_t sgl; - d_iov_t sg_iov; - daos_iod_t iod; - daos_recx_t recx; - daos_key_t dkey; - int rc; - - /** Open parent object and fetch entry of obj from it */ - rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); - if (rc) - return daos_der2errno(rc); - - /** set dkey as the entry name */ - d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); + daos_handle_t oh = DAOS_HDL_INVAL; + d_sg_list_t sgl; + d_iov_t sg_iov; + daos_iod_t iod; + daos_recx_t recx; + daos_key_t dkey; + struct dfs_entry entry = {0}; + bool exists; + int rc; /** set akey as the inode name */ d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); @@ -78,15 +73,57 @@ set_chunk_size(dfs_t *dfs, dfs_obj_t *obj, daos_size_t csize) sgl.sg_nr_out = 0; sgl.sg_iovs = &sg_iov; - rc = daos_obj_update(oh, DAOS_TX_NONE, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, NULL); - if (rc) { - D_ERROR("Failed to update chunk size: " DF_RC "\n", DP_RC(rc)); - D_GOTO(out, rc = daos_der2errno(rc)); +retry: + if (dfs_is_hardlink(obj->mode)) { + /* For hardlinks, update chunk size in HLM with OID as dkey */ + d_iov_set(&dkey, &obj->oid, sizeof(daos_obj_id_t)); + + rc = daos_obj_update(dfs->hlm_oh, DAOS_TX_NONE, DAOS_COND_DKEY_UPDATE, &dkey, 1, + &iod, &sgl, NULL); + if (rc) { + D_ERROR("Failed to update chunk size in HLM: " DF_RC "\n", DP_RC(rc)); + return daos_der2errno(rc); + } + } else { + /** Open parent object and fetch entry of obj from it */ + rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); + if (rc) + return daos_der2errno(rc); + + /** set dkey as the entry name */ + d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); + + rc = daos_obj_update(oh, DAOS_TX_NONE, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, + NULL); + if (rc) { + D_ERROR("Failed to update chunk size: " DF_RC "\n", DP_RC(rc)); + daos_obj_close(oh, NULL); + return daos_der2errno(rc); + } + + if (S_ISREG(obj->mode)) { + /* + * Check if the entry became a hardlink (another DFS instance may have + * converted it) + */ + rc = fetch_entry(dfs->layout_v, oh, dfs->th, obj->name, strlen(obj->name), + false, &exists, &entry, 0, NULL, NULL, NULL); + daos_obj_close(oh, NULL); + if (rc) { + D_ERROR("Failed to fetch entry: %d\n", rc); + return rc; + } + if (exists && dfs_is_hardlink(entry.mode)) { + /* Entry became a hardlink, update obj->mode and retry from HLM */ + obj->mode = entry.mode; + goto retry; + } + } else { + daos_obj_close(oh, NULL); + } } -out: - daos_obj_close(oh, NULL); - return rc; + return 0; } int @@ -126,7 +163,7 @@ dfs_file_update_chunk_size(dfs_t *dfs, dfs_obj_t *obj, daos_size_t csize) rc = set_chunk_size(dfs, obj, csize); if (rc) - return daos_der2errno(rc); + return rc; /* need to update the array handle chunk size */ rc = daos_array_update_chunk_size(obj->oh, csize); diff --git a/src/client/dfs/lookup.c b/src/client/dfs/lookup.c index 7e792989245..50ab4bd52a7 100644 --- a/src/client/dfs/lookup.c +++ b/src/client/dfs/lookup.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -136,6 +137,18 @@ lookup_rel_path(dfs_t *dfs, dfs_obj_t *root, const char *path, int flags, dfs_ob /** if entry is a file, open the array object and return */ if (S_ISREG(entry.mode)) { + /* If entry is a hardlink, fetch metadata from HLM */ + if (dfs_is_hardlink(entry.mode)) { + rc = hlm_fetch_entry(dfs->hlm_oh, dfs->th, &entry.oid, &entry); + if (rc) { + D_ERROR("Failed to fetch entry '%s' oid=" DF_OID + " from HLM (%d)\n", + token, DP_OID(entry.oid), rc); + D_GOTO(err_obj, rc); + } + obj->mode = entry.mode; + } + /* if there are more entries, then file is not a dir */ if (strtok_r(NULL, "/", &sptr) != NULL) { D_ERROR("%s is not a directory\n", obj->name); @@ -168,6 +181,8 @@ lookup_rel_path(dfs_t *dfs, dfs_obj_t *root, const char *path, int flags, dfs_ob D_GOTO(err_obj, rc = daos_der2errno(rc)); } + stbuf->st_nlink = entry.ref_cnt; + stbuf->st_mode = entry.mode & ~MODE_HARDLINK_BIT; stbuf->st_size = array_stbuf.st_size; stbuf->st_blocks = (stbuf->st_size + (1 << 9) - 1) >> 9; @@ -339,8 +354,8 @@ lookup_rel_path(dfs_t *dfs, dfs_obj_t *root, const char *path, int flags, dfs_ob } memcpy(stbuf, &dfs->root_stbuf, sizeof(struct stat)); } else { - stbuf->st_nlink = 1; - stbuf->st_mode = obj->mode; + stbuf->st_nlink = entry.ref_cnt; + stbuf->st_mode = obj->mode & ~MODE_HARDLINK_BIT; stbuf->st_uid = entry.uid; stbuf->st_gid = entry.gid; if (tspec_gt(stbuf->st_ctim, stbuf->st_mtim)) { @@ -429,6 +444,16 @@ lookup_rel_int(dfs_t *dfs, dfs_obj_t *parent, const char *name, int flags, dfs_o if (!exists) return ENOENT; + /* If entry is a hardlink, fetch metadata from HLM */ + if (dfs_is_hardlink(entry.mode)) { + rc = hlm_fetch_entry(dfs->hlm_oh, dfs->th, &entry.oid, &entry); + if (rc) { + D_ERROR("Failed to fetch entry '%s' oid=" DF_OID " from HLM (%d)\n", name, + DP_OID(entry.oid), rc); + return rc; + } + } + if (stbuf) memset(stbuf, 0, sizeof(struct stat)); @@ -557,8 +582,8 @@ lookup_rel_int(dfs_t *dfs, dfs_obj_t *parent, const char *name, int flags, dfs_o *mode = obj->mode; if (stbuf) { - stbuf->st_nlink = 1; - stbuf->st_mode = obj->mode; + stbuf->st_nlink = entry.ref_cnt; + stbuf->st_mode = obj->mode & ~MODE_HARDLINK_BIT; stbuf->st_uid = entry.uid; stbuf->st_gid = entry.gid; if (tspec_gt(stbuf->st_ctim, stbuf->st_mtim)) { diff --git a/src/client/dfs/metrics.h b/src/client/dfs/metrics.h index 722a57590d7..ffedfc1b187 100644 --- a/src/client/dfs/metrics.h +++ b/src/client/dfs/metrics.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -45,6 +46,7 @@ extern "C" { ACTION(SETXATTR) \ ACTION(STAT) \ ACTION(SYMLINK) \ + ACTION(LINK) \ ACTION(SYNC) \ ACTION(TRUNCATE) \ ACTION(UNLINK) \ diff --git a/src/client/dfs/mnt.c b/src/client/dfs/mnt.c index a4955bbde1d..dc82dbd4dfd 100644 --- a/src/client/dfs/mnt.c +++ b/src/client/dfs/mnt.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -633,6 +634,7 @@ dfs_mount_int(daos_handle_t poh, daos_handle_t coh, int flags, daos_epoch_t epoc dfs->super_oid = roots->cr_oids[0]; dfs->root.oid = roots->cr_oids[1]; dfs->root.parent_oid = dfs->super_oid; + dfs->hlm_oid = roots->cr_oids[2]; /** Verify SB */ rc = open_sb(coh, false, false, omode, dfs->super_oid, &dfs->attr, &dfs->super_oh, @@ -711,17 +713,26 @@ dfs_mount_int(daos_handle_t poh, daos_handle_t coh, int flags, daos_epoch_t epoc dfs->root_stbuf.st_atim.tv_nsec = root_dir.mtime_nano; } + /** Open hardlink metadata object */ + if (!daos_obj_id_is_nil(dfs->hlm_oid)) { + rc = daos_obj_open(dfs->coh, dfs->hlm_oid, omode, &dfs->hlm_oh, NULL); + if (rc) { + D_ERROR("Failed to open hlm object: " DF_RC "\n", DP_RC(rc)); + D_GOTO(err_root, rc = daos_der2errno(rc)); + } + } + /** if RW, allocate an OID for the namespace */ if (amode == O_RDWR) { dfs->last_hi = (unsigned int)d_rand(); - /** Avoid potential conflict with SB or ROOT */ - if (dfs->last_hi <= 1) - dfs->last_hi = 2; + /** Avoid potential conflict with SB, ROOT, or HLM */ + if (dfs->last_hi <= HLM_HI) + dfs->last_hi = HLM_HI + 1; rc = daos_cont_alloc_oids(coh, 1, &dfs->oid.lo, NULL); if (rc) { D_ERROR("daos_cont_alloc_oids() Failed, " DF_RC "\n", DP_RC(rc)); - D_GOTO(err_root, rc = daos_der2errno(rc)); + D_GOTO(err_hlm, rc = daos_der2errno(rc)); } dfs->oid.hi = dfs->last_hi; @@ -737,6 +748,9 @@ dfs_mount_int(daos_handle_t poh, daos_handle_t coh, int flags, daos_epoch_t epoc daos_prop_free(prop); return rc; +err_hlm: + if (daos_handle_is_valid(dfs->hlm_oh)) + daos_obj_close(dfs->hlm_oh, NULL); err_root: daos_obj_close(dfs->root.oh, NULL); err_super: @@ -844,6 +858,8 @@ dfs_umount(dfs_t *dfs) if (daos_handle_is_valid(dfs->th)) daos_tx_close(dfs->th, NULL); + if (daos_handle_is_valid(dfs->hlm_oh)) + daos_obj_close(dfs->hlm_oh, NULL); daos_obj_close(dfs->root.oh, NULL); daos_obj_close(dfs->super_oh, NULL); @@ -956,6 +972,7 @@ struct dfs_glob { uuid_t coh_uuid; daos_obj_id_t super_oid; daos_obj_id_t root_oid; + daos_obj_id_t hlm_oid; daos_epoch_t th_epoch; }; @@ -1039,6 +1056,7 @@ dfs_local2global(dfs_t *dfs, d_iov_t *glob) dfs_params->amode = dfs->amode; dfs_params->super_oid = dfs->super_oid; dfs_params->root_oid = dfs->root.oid; + dfs_params->hlm_oid = dfs->hlm_oid; dfs_params->uid = dfs->uid; dfs_params->gid = dfs->gid; dfs_params->id = dfs->attr.da_id; @@ -1118,6 +1136,7 @@ dfs_global2local(daos_handle_t poh, daos_handle_t coh, int flags, d_iov_t glob, dfs->super_oid = dfs_params->super_oid; dfs->root.oid = dfs_params->root_oid; dfs->root.parent_oid = dfs->super_oid; + dfs->hlm_oid = dfs_params->hlm_oid; if (daos_obj_id_is_nil(dfs->super_oid) || daos_obj_id_is_nil(dfs->root.oid)) { D_ERROR("Invalid superblock or root object ID\n"); D_FREE(dfs); @@ -1153,6 +1172,17 @@ dfs_global2local(daos_handle_t poh, daos_handle_t coh, int flags, d_iov_t glob, D_GOTO(err_dfs, rc = daos_der2errno(rc)); } + /* Open HLM (hardlink metadata) Object */ + if (!daos_obj_id_is_nil(dfs->hlm_oid)) { + rc = daos_obj_open(coh, dfs->hlm_oid, obj_mode, &dfs->hlm_oh, NULL); + if (rc) { + D_ERROR("daos_obj_open() failed for hlm, " DF_RC "\n", DP_RC(rc)); + daos_obj_close(dfs->super_oh, NULL); + daos_obj_close(dfs->root.oh, NULL); + D_GOTO(err_dfs, rc = daos_der2errno(rc)); + } + } + /** Create transaction handle */ dfs->th_epoch = dfs_params->th_epoch; if (dfs->th_epoch == DAOS_EPOCH_MAX) { @@ -1163,6 +1193,8 @@ dfs_global2local(daos_handle_t poh, daos_handle_t coh, int flags, d_iov_t glob, D_ERROR("daos_tx_open_snap() failed, " DF_RC "\n", DP_RC(rc)); daos_obj_close(dfs->super_oh, NULL); daos_obj_close(dfs->root.oh, NULL); + if (daos_handle_is_valid(dfs->hlm_oh)) + daos_obj_close(dfs->hlm_oh, NULL); D_GOTO(err_dfs, rc = daos_der2errno(rc)); } } diff --git a/src/client/dfs/obj.c b/src/client/dfs/obj.c index c85a8f84f9c..d9b34b62e08 100644 --- a/src/client/dfs/obj.c +++ b/src/client/dfs/obj.c @@ -841,7 +841,7 @@ struct statx_op_args { daos_iod_t iod; daos_recx_t recx; d_sg_list_t sgl; - d_iov_t sg_iovs[INODE_AKEYS]; + d_iov_t sg_iovs[HLM_INODE_AKEYS]; struct dfs_entry entry; daos_array_stbuf_t array_stbuf; }; @@ -851,6 +851,7 @@ ostatx_cb(tse_task_t *task, void *data) { struct dfs_statx_args *args = daos_task_get_args(task); struct statx_op_args *op_args = *((struct statx_op_args **)data); + bool is_obj_hardlink = dfs_is_hardlink(args->obj->mode); int rc2, rc = task->dt_result; if (rc != 0) { @@ -864,6 +865,21 @@ ostatx_cb(tse_task_t *task, void *data) args->obj->oid.lo != op_args->entry.oid.lo) D_GOTO(out, rc = -DER_ENOENT); + /* + * If we fetched from dentry (not HLM) and the entry has hardlink bit set, + * we need to fetch the actual metadata from HLM. + */ + if (!is_obj_hardlink && dfs_is_hardlink(op_args->entry.mode)) { + rc = hlm_fetch_entry(args->dfs->hlm_oh, args->dfs->th, &op_args->entry.oid, + &op_args->entry); + if (rc) { + D_ERROR("Failed to fetch entry from HLM (%d)\n", rc); + D_GOTO(out, rc = daos_errno2der(rc)); + } + /* Update obj->mode to reflect hardlink status */ + args->obj->mode |= MODE_HARDLINK_BIT; + } + rc = update_stbuf_times(op_args->entry, op_args->array_stbuf.st_max_epoch, args->stbuf, NULL); if (rc) @@ -880,8 +896,8 @@ ostatx_cb(tse_task_t *task, void *data) args->stbuf->st_size = op_args->entry.value_len; } - args->stbuf->st_nlink = 1; - args->stbuf->st_mode = op_args->entry.mode; + args->stbuf->st_nlink = op_args->entry.ref_cnt ? op_args->entry.ref_cnt : 1; + args->stbuf->st_mode = op_args->entry.mode & ~MODE_HARDLINK_BIT; args->stbuf->st_uid = op_args->entry.uid; args->stbuf->st_gid = op_args->entry.gid; if (tspec_gt(args->stbuf->st_ctim, args->stbuf->st_mtim)) { @@ -894,9 +910,12 @@ ostatx_cb(tse_task_t *task, void *data) out: D_FREE(op_args); - rc2 = daos_obj_close(args->parent_oh, NULL); - if (rc == 0) - rc = rc2; + /* Only close parent_oh if we opened it (non-hardlink case) */ + if (!is_obj_hardlink && daos_handle_is_valid(args->parent_oh)) { + rc2 = daos_obj_close(args->parent_oh, NULL); + if (rc == 0) + rc = rc2; + } if (rc == 0) DFS_OP_STAT_INCR(args->dfs, DOS_STAT); return rc; @@ -917,7 +936,8 @@ statx_task(tse_task_t *task) D_ALLOC_PTR(op_args); if (op_args == NULL) { - daos_obj_close(args->parent_oh, NULL); + if (!dfs_is_hardlink(args->obj->mode)) + daos_obj_close(args->parent_oh, NULL); return -DER_NOMEM; } @@ -928,12 +948,23 @@ statx_task(tse_task_t *task) D_GOTO(err1_out, rc); } - /** set obj_fetch parameters */ - d_iov_set(&op_args->dkey, (void *)args->obj->name, strlen(args->obj->name)); + /* + * Set up fetch parameters based on whether obj is already known to be a hardlink. + * For hardlinks, fetch from HLM using OID as dkey. + * For non-hardlinks, fetch from parent dir using name as dkey. + */ + if (dfs_is_hardlink(args->obj->mode)) { + /* For hardlinks, use OID as dkey and fetch from HLM */ + d_iov_set(&op_args->dkey, &args->obj->oid, sizeof(daos_obj_id_t)); + op_args->recx.rx_nr = END_HLM_IDX; + } else { + /* For non-hardlinks, use name as dkey and fetch from parent */ + d_iov_set(&op_args->dkey, (void *)args->obj->name, strlen(args->obj->name)); + op_args->recx.rx_nr = END_IDX; + } d_iov_set(&op_args->iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); op_args->iod.iod_nr = 1; op_args->recx.rx_idx = 0; - op_args->recx.rx_nr = END_IDX; op_args->iod.iod_recxs = &op_args->recx; op_args->iod.iod_type = DAOS_IOD_ARRAY; op_args->iod.iod_size = 1; @@ -950,12 +981,16 @@ statx_task(tse_task_t *task) d_iov_set(&op_args->sg_iovs[i++], &op_args->entry.gid, sizeof(gid_t)); d_iov_set(&op_args->sg_iovs[i++], &op_args->entry.value_len, sizeof(daos_size_t)); d_iov_set(&op_args->sg_iovs[i++], &op_args->entry.obj_hlc, sizeof(uint64_t)); + /* For HLM fetch, also get ref_cnt */ + if (dfs_is_hardlink(args->obj->mode)) + d_iov_set(&op_args->sg_iovs[i++], &op_args->entry.ref_cnt, sizeof(uint64_t)); op_args->sgl.sg_nr = i; op_args->sgl.sg_nr_out = 0; op_args->sgl.sg_iovs = op_args->sg_iovs; fetch_arg = daos_task_get_args(fetch_task); - fetch_arg->oh = args->parent_oh; + /* Use HLM object handle for hardlinks, parent object handle otherwise */ + fetch_arg->oh = dfs_is_hardlink(args->obj->mode) ? args->dfs->hlm_oh : args->parent_oh; fetch_arg->th = args->dfs->th; fetch_arg->flags = DAOS_COND_DKEY_FETCH; fetch_arg->dkey = &op_args->dkey; @@ -1029,7 +1064,8 @@ statx_task(tse_task_t *task) tse_task_complete(fetch_task, rc); err1_out: D_FREE(op_args); - daos_obj_close(args->parent_oh, NULL); + if (!dfs_is_hardlink(args->obj->mode)) + daos_obj_close(args->parent_oh, NULL); return rc; } @@ -1037,9 +1073,10 @@ statx_task(tse_task_t *task) int dfs_ostatx(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, daos_event_t *ev) { - daos_handle_t oh; + daos_handle_t oh = DAOS_HDL_INVAL; tse_task_t *task; struct dfs_statx_args *args; + bool is_hardlink; int rc; if (dfs == NULL || !dfs->mounted) @@ -1049,13 +1086,20 @@ dfs_ostatx(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, daos_event_t *ev) if (ev == NULL) return dfs_ostat(dfs, obj, stbuf); - rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RO, &oh, NULL); - if (rc) - return daos_der2errno(rc); + /* Check if obj is already known to be a hardlink */ + is_hardlink = dfs_is_hardlink(obj->mode); + + /* Only need to open parent if not a hardlink (hardlinks fetch from HLM) */ + if (!is_hardlink) { + rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RO, &oh, NULL); + if (rc) + return daos_der2errno(rc); + } rc = dc_task_create(statx_task, NULL, ev, &task); if (rc) { - daos_obj_close(oh, NULL); + if (!is_hardlink) + daos_obj_close(oh, NULL); return daos_der2errno(rc); } D_ASSERT(ev); @@ -1114,6 +1158,16 @@ dfs_access(dfs_t *dfs, dfs_obj_t *parent, const char *name, int mask) if (!exists) return ENOENT; + /* If entry is a hardlink, fetch metadata from HLM */ + if (dfs_is_hardlink(entry.mode)) { + rc = hlm_fetch_entry(dfs->hlm_oh, dfs->th, &entry.oid, &entry); + if (rc) { + D_ERROR("Failed to fetch entry '%s' oid=" DF_OID " from HLM (%d)\n", name, + DP_OID(entry.oid), rc); + return rc; + } + } + if (!S_ISLNK(entry.mode)) { if (mask == F_OK) return 0; @@ -1155,6 +1209,8 @@ dfs_chmod(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode) mode_t orig_mode; const char *entry_name; struct timespec now; + bool is_hardlink; + daos_obj_id_t hlm_oid; int rc; if (dfs == NULL || !dfs->mounted) @@ -1186,14 +1242,35 @@ dfs_chmod(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode) return ENOTSUP; } + if (dfs->use_dtx) { + rc = daos_tx_open(dfs->coh, &th, 0, NULL); + if (rc) { + D_ERROR("daos_tx_open() failed (%d)\n", rc); + return daos_der2errno(rc); + } + } + +restart: + is_hardlink = false; /* Check if parent has the entry */ - rc = fetch_entry(dfs->layout_v, oh, dfs->th, name, len, true, &exists, &entry, 0, NULL, - NULL, NULL); + rc = fetch_entry(dfs->layout_v, oh, th, name, len, true, &exists, &entry, 0, NULL, NULL, + NULL); if (rc) - return rc; + D_GOTO(out, rc); if (!exists) - return ENOENT; + D_GOTO(out, rc = ENOENT); + + /* If entry is a hardlink, fetch metadata from HLM */ + if (dfs_is_hardlink(entry.mode)) { + is_hardlink = true; + oid_cp(&hlm_oid, entry.oid); + rc = hlm_fetch_entry(dfs->hlm_oh, th, &hlm_oid, &entry); + if (rc) { + D_ERROR("Failed to fetch entry '%s' from HLM (%d)\n", name, rc); + D_GOTO(out, rc); + } + } /** resolve symlink */ if (S_ISLNK(entry.mode)) { @@ -1203,14 +1280,20 @@ dfs_chmod(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode) if (rc) { D_ERROR("Failed to lookup symlink %s\n", entry.value); D_FREE(entry.value); - return rc; + D_GOTO(out, rc); } rc = daos_obj_open(dfs->coh, sym->parent_oid, DAOS_OO_RW, &oh, NULL); D_FREE(entry.value); if (rc) { dfs_release(sym); - return daos_der2errno(rc); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + /* Check if symlink target is a hardlink */ + if (dfs_is_hardlink(sym->mode)) { + is_hardlink = true; + oid_cp(&hlm_oid, sym->oid); } orig_mode = sym->mode; @@ -1223,14 +1306,20 @@ dfs_chmod(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode) if ((mode & S_IFMT) && (orig_mode & S_IFMT) != (mode & S_IFMT)) { D_ERROR("Cannot change entry type\n"); - D_GOTO(out, rc = EINVAL); + D_GOTO(out_sym, rc = EINVAL); } /** set the type mode in case user has not passed it */ mode |= orig_mode & S_IFMT; - /** set dkey as the entry name */ - d_iov_set(&dkey, (void *)entry_name, len); + /** For hardlinks, update HLM with OID as dkey; otherwise use parent dir with name as dkey + */ + if (is_hardlink) { + d_iov_set(&dkey, &hlm_oid, sizeof(daos_obj_id_t)); + oh = dfs->hlm_oh; + } else { + d_iov_set(&dkey, (void *)entry_name, len); + } d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); iod.iod_recxs = recxs; iod.iod_type = DAOS_IOD_ARRAY; @@ -1245,7 +1334,7 @@ dfs_chmod(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode) rc = clock_gettime(CLOCK_REALTIME, &now); if (rc) - D_GOTO(out, rc = errno); + D_GOTO(out_sym, rc = errno); /** set sgl for update */ sgl.sg_nr = 3; @@ -1258,15 +1347,43 @@ dfs_chmod(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode) rc = daos_obj_update(oh, th, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, NULL); if (rc) { D_ERROR("Failed to update mode, " DF_RC "\n", DP_RC(rc)); - D_GOTO(out, rc = daos_der2errno(rc)); + D_GOTO(out_sym, rc = daos_der2errno(rc)); + } + + if (dfs->use_dtx) { + rc = daos_tx_commit(th, NULL); + if (rc) { + if (rc != -DER_TX_RESTART) + D_ERROR("daos_tx_commit() failed (%d)\n", rc); + if (rc == -DER_TX_RESTART) { + rc = daos_tx_restart(th, NULL); + if (rc) { + D_ERROR("daos_tx_restart() failed (%d)\n", rc); + D_GOTO(out_sym, rc = daos_der2errno(rc)); + } + if (S_ISLNK(entry.mode)) { + dfs_release(sym); + daos_obj_close(oh, NULL); + } + goto restart; + } + D_GOTO(out_sym, rc = daos_der2errno(rc)); + } } DFS_OP_STAT_INCR(dfs, DOS_CHMOD); -out: +out_sym: if (S_ISLNK(entry.mode)) { dfs_release(sym); daos_obj_close(oh, NULL); } +out: + if (dfs->use_dtx) { + if (rc == 0) + daos_tx_close(th, NULL); + else + daos_tx_abort(th, NULL); + } return rc; } @@ -1287,6 +1404,8 @@ dfs_chown(dfs_t *dfs, dfs_obj_t *parent, const char *name, uid_t uid, gid_t gid, const char *entry_name; int i; struct timespec now; + bool is_hardlink = false; + daos_obj_id_t hlm_oid; int rc; if (dfs == NULL || !dfs->mounted) @@ -1312,14 +1431,34 @@ dfs_chown(dfs_t *dfs, dfs_obj_t *parent, const char *name, uid_t uid, gid_t gid, oh = parent->oh; } + if (dfs->use_dtx) { + rc = daos_tx_open(dfs->coh, &th, 0, NULL); + if (rc) { + D_ERROR("daos_tx_open() failed (%d)\n", rc); + return daos_der2errno(rc); + } + } + +restart: /* Check if parent has the entry */ - rc = fetch_entry(dfs->layout_v, oh, DAOS_TX_NONE, name, len, true, &exists, &entry, 0, NULL, - NULL, NULL); + rc = fetch_entry(dfs->layout_v, oh, th, name, len, true, &exists, &entry, 0, NULL, NULL, + NULL); if (rc) - return rc; + D_GOTO(out, rc); if (!exists) - return ENOENT; + D_GOTO(out, rc = ENOENT); + + /* If entry is a hardlink, fetch metadata from HLM */ + if (dfs_is_hardlink(entry.mode)) { + is_hardlink = true; + oid_cp(&hlm_oid, entry.oid); + rc = hlm_fetch_entry(dfs->hlm_oh, th, &hlm_oid, &entry); + if (rc) { + D_ERROR("Failed to fetch entry '%s' from HLM (%d)\n", name, rc); + D_GOTO(out, rc); + } + } if (uid == -1 && gid == -1) D_GOTO(out, rc = 0); @@ -1332,15 +1471,22 @@ dfs_chown(dfs_t *dfs, dfs_obj_t *parent, const char *name, uid_t uid, gid_t gid, D_DEBUG(DB_TRACE, "Failed to lookup symlink '%s': %d (%s)\n", entry.value, rc, strerror(rc)); D_FREE(entry.value); - return rc; + D_GOTO(out, rc); } rc = daos_obj_open(dfs->coh, sym->parent_oid, DAOS_OO_RW, &oh, NULL); D_FREE(entry.value); if (rc) { dfs_release(sym); - return daos_der2errno(rc); + D_GOTO(out, rc = daos_der2errno(rc)); + } + + /* Check if symlink target is a hardlink */ + if (dfs_is_hardlink(sym->mode)) { + is_hardlink = true; + oid_cp(&hlm_oid, sym->oid); } + entry_name = sym->name; len = strlen(entry_name); } else { @@ -1351,7 +1497,7 @@ dfs_chown(dfs_t *dfs, dfs_obj_t *parent, const char *name, uid_t uid, gid_t gid, rc = clock_gettime(CLOCK_REALTIME, &now); if (rc) - D_GOTO(out, rc = errno); + D_GOTO(out_sym, rc = errno); i = 0; recxs[i].rx_idx = CTIME_IDX; @@ -1378,8 +1524,14 @@ dfs_chown(dfs_t *dfs, dfs_obj_t *parent, const char *name, uid_t uid, gid_t gid, i++; } - /** set dkey as the entry name */ - d_iov_set(&dkey, (void *)entry_name, len); + /** For hardlinks, update HLM with OID as dkey; otherwise use parent dir with name as dkey + */ + if (is_hardlink) { + d_iov_set(&dkey, &hlm_oid, sizeof(daos_obj_id_t)); + oh = dfs->hlm_oh; + } else { + d_iov_set(&dkey, (void *)entry_name, len); + } d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); iod.iod_nr = i; iod.iod_recxs = recxs; @@ -1394,15 +1546,43 @@ dfs_chown(dfs_t *dfs, dfs_obj_t *parent, const char *name, uid_t uid, gid_t gid, rc = daos_obj_update(oh, th, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, NULL); if (rc) { D_ERROR("Failed to update owner/group, " DF_RC "\n", DP_RC(rc)); - D_GOTO(out, rc = daos_der2errno(rc)); + D_GOTO(out_sym, rc = daos_der2errno(rc)); + } + + if (dfs->use_dtx) { + rc = daos_tx_commit(th, NULL); + if (rc) { + if (rc != -DER_TX_RESTART) + D_ERROR("daos_tx_commit() failed (%d)\n", rc); + if (rc == -DER_TX_RESTART) { + rc = daos_tx_restart(th, NULL); + if (rc) { + D_ERROR("daos_tx_restart() failed (%d)\n", rc); + D_GOTO(out_sym, rc = daos_der2errno(rc)); + } + if (!(flags & O_NOFOLLOW) && S_ISLNK(entry.mode)) { + dfs_release(sym); + daos_obj_close(oh, NULL); + } + goto restart; + } + D_GOTO(out_sym, rc = daos_der2errno(rc)); + } } DFS_OP_STAT_INCR(dfs, DOS_CHOWN); -out: +out_sym: if (!(flags & O_NOFOLLOW) && S_ISLNK(entry.mode)) { dfs_release(sym); daos_obj_close(oh, NULL); } +out: + if (dfs->use_dtx) { + if (rc == 0) + daos_tx_close(th, NULL); + else + daos_tx_abort(th, NULL); + } return rc; } @@ -1411,7 +1591,7 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) { daos_handle_t th = DAOS_TX_NONE; daos_key_t dkey; - daos_handle_t oh; + daos_handle_t oh = DAOS_HDL_INVAL; d_sg_list_t sgl; d_iov_t sg_iovs[10]; daos_iod_t iod; @@ -1424,7 +1604,10 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) uint64_t obj_hlc = 0; struct stat rstat = {}; daos_array_stbuf_t array_stbuf = {0}; + int saved_flags; int rc; + mode_t st_mode; + bool is_hardlink; if (dfs == NULL || !dfs->mounted) return EINVAL; @@ -1445,12 +1628,42 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) } } - /** Open parent object and fetch entry of obj from it */ - rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); - if (rc) - return daos_der2errno(rc); + saved_flags = flags; + + if (dfs->use_dtx) { + rc = daos_tx_open(dfs->coh, &th, 0, NULL); + if (rc) { + D_ERROR("daos_tx_open() failed (%d)\n", rc); + return daos_der2errno(rc); + } + } - len = strlen(obj->name); +restart: + len = strlen(obj->name); + flags = saved_flags; + i = 0; + hlc_recx_idx = 0; + set_size = false; + set_mtime = false; + set_ctime = false; + st_mode = 0; + is_hardlink = false; + + /** Set up dkey and object handle based on hardlink status */ + if (dfs_is_hardlink(obj->mode)) { + /* For hardlinks, attrs are stored in HLM with OID as dkey */ + d_iov_set(&dkey, &obj->oid, sizeof(daos_obj_id_t)); + oh = dfs->hlm_oh; + st_mode = MODE_HARDLINK_BIT; + is_hardlink = true; + } else { + /** Open parent object and fetch entry of obj from it */ + rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); + if (rc) + D_GOTO(out, rc = daos_der2errno(rc)); + /** set dkey as the entry name */ + d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); + } /* * Fetch the remote entry first so we can check the oid, then keep track locally of what has @@ -1463,8 +1676,15 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) if (rc) D_GOTO(out_obj, rc); - /** set dkey as the entry name */ - d_iov_set(&dkey, (void *)obj->name, len); + /** entry_stat would update the hardlink bit if set remotely */ + if (!is_hardlink && dfs_is_hardlink(obj->mode)) { + daos_obj_close(oh, NULL); + d_iov_set(&dkey, &obj->oid, sizeof(daos_obj_id_t)); + oh = dfs->hlm_oh; + st_mode = MODE_HARDLINK_BIT; + is_hardlink = true; + } + d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); iod.iod_recxs = recxs; iod.iod_type = DAOS_IOD_ARRAY; @@ -1494,7 +1714,8 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) } if (flags & DFS_SET_ATTR_MODE) { - d_iov_set(&sg_iovs[i], &stbuf->st_mode, sizeof(mode_t)); + st_mode |= stbuf->st_mode; + d_iov_set(&sg_iovs[i], &st_mode, sizeof(mode_t)); recxs[i].rx_idx = MODE_IDX; recxs[i].rx_nr = sizeof(mode_t); i++; @@ -1565,7 +1786,7 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) D_GOTO(out_obj, rc = EINVAL); if (set_size) { - rc = daos_array_set_size(obj->oh, th, stbuf->st_size, NULL); + rc = daos_array_set_size(obj->oh, DAOS_TX_NONE, stbuf->st_size, NULL); if (rc) D_GOTO(out_obj, rc = daos_der2errno(rc)); @@ -1579,7 +1800,7 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) * an array stat for the hlc. */ /** TODO - need an array API to just stat the max epoch without size */ - rc = daos_array_stat(obj->oh, th, &array_stbuf, NULL); + rc = daos_array_stat(obj->oh, DAOS_TX_NONE, &array_stbuf, NULL); if (rc) D_GOTO(out_obj, rc = daos_der2errno(rc)); @@ -1618,11 +1839,38 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) D_GOTO(out_obj, rc = daos_der2errno(rc)); } + if (dfs->use_dtx) { + rc = daos_tx_commit(th, NULL); + if (rc) { + if (rc != -DER_TX_RESTART) + D_ERROR("daos_tx_commit() failed (%d)\n", rc); + if (rc == -DER_TX_RESTART) { + rc = daos_tx_restart(th, NULL); + if (rc) { + D_ERROR("daos_tx_restart() failed (%d)\n", rc); + D_GOTO(out_obj, rc = daos_der2errno(rc)); + } + if (!is_hardlink) + daos_obj_close(oh, NULL); + goto restart; + } + D_GOTO(out_obj, rc = daos_der2errno(rc)); + } + } + DFS_OP_STAT_INCR(dfs, DOS_SETATTR); out_stat: *stbuf = rstat; out_obj: - daos_obj_close(oh, NULL); + if (!is_hardlink) + daos_obj_close(oh, NULL); +out: + if (dfs->use_dtx) { + if (rc == 0) + daos_tx_close(th, NULL); + else + daos_tx_abort(th, NULL); + } return rc; } @@ -1723,6 +1971,180 @@ dfs_get_symlink_value(dfs_obj_t *obj, char *buf, daos_size_t *size) return 0; } +int +dfs_link(dfs_t *dfs, dfs_obj_t *obj, dfs_obj_t *parent, const char *name, dfs_obj_t **_new_obj, + struct stat *stbuf) +{ + struct dfs_entry entry = {0}; + dfs_obj_t *new_obj = NULL; + daos_handle_t th = DAOS_TX_NONE; + daos_handle_t oh = DAOS_HDL_INVAL; + size_t len; + int rc, rc2; + + if (dfs == NULL || !dfs->mounted) + return EINVAL; + if (dfs->amode != O_RDWR) + return EPERM; + if (obj == NULL) + return EINVAL; + /* Hardlinks are only supported for regular files */ + if (!S_ISREG(obj->mode)) + return EPERM; + if (parent == NULL) + parent = &dfs->root; + else if (!S_ISDIR(parent->mode)) + return ENOTDIR; + + /* Check if HLM object is available */ + if (!daos_handle_is_valid(dfs->hlm_oh)) { + D_ERROR("HLM object not available for hardlink support\n"); + return ENOTSUP; + } + + rc = check_name(name, &len); + if (rc) + return rc; + + /* Always use DTX for hardlink operations to ensure consistency */ + rc = daos_tx_open(dfs->coh, &th, 0, NULL); + if (rc) { + D_ERROR("daos_tx_open() failed (%d)\n", rc); + return daos_der2errno(rc); + } + +restart: + /* Fetch the target entry, handling hardlinks transparently */ + oh = DAOS_HDL_INVAL; + rc = dfsobj_fetch_entry(dfs, th, obj, &oh, &entry); + if (rc) { + D_ERROR("Failed to fetch target entry (%d)\n", rc); + D_GOTO(out, rc); + } + + /* Allocate new_obj and open array early so failures can be handled in tx */ + if (_new_obj) { + D_ALLOC_PTR(new_obj); + if (new_obj == NULL) + D_GOTO(out_close, rc = ENOMEM); + + /* Open the array object for the file */ + rc = daos_array_open_with_attr(dfs->coh, entry.oid, th, DAOS_OO_RW, 1, + entry.chunk_size ? entry.chunk_size + : dfs->attr.da_chunk_size, + &new_obj->oh, NULL); + if (rc) { + D_ERROR("daos_array_open_with_attr() failed " DF_RC "\n", DP_RC(rc)); + D_FREE(new_obj); + new_obj = NULL; + D_GOTO(out_close, rc = daos_der2errno(rc)); + } + } + + /* Step 1: If hardlink bit is not set, migrate metadata to HLM */ + if (!dfs_is_hardlink(entry.mode)) { + /* 1a. Copy dentry and xattrs to HLM object with dkey as oid */ + rc = hlm_copy_entry(dfs, th, oh, obj->name, &entry); + if (rc) { + D_ERROR("Failed to copy entry to HLM (%d)\n", rc); + D_GOTO(out_close, rc); + } + + /* 1b. Set hardlink bit in the target dentry */ + rc = set_hardlink_bit(dfs, th, oh, obj, entry.mode); + if (rc) { + D_ERROR("Failed to set hardlink bit (%d)\n", rc); + D_GOTO(out_close, rc); + } + + /* 1c. Remove extended attributes from target dentry (not HLM) */ + rc = remove_xattrs_from_entry(dfs, th, oh, obj->name); + if (rc) { + D_ERROR("Failed to remove xattrs from entry (%d)\n", rc); + D_GOTO(out_close, rc); + } + } else { + /* Entry is already a hardlink, increment ref_cnt in HLM */ + rc = hlm_update_ref_cnt(dfs, th, &entry, 1); + if (rc) { + D_ERROR("Failed to increment ref_cnt in HLM (%d)\n", rc); + D_GOTO(out_close, rc); + } + } + + /* Step 2: Create dentry for link_name with same value as target */ + /* Set the hardlink bit in the new entry's mode as well */ + entry.mode |= MODE_HARDLINK_BIT; + rc = insert_entry(dfs->layout_v, parent->oh, th, name, len, DAOS_COND_DKEY_INSERT, &entry); + if (rc) { + D_ERROR("Failed to create link entry (%d)\n", rc); + D_GOTO(out_close, rc); + } + + if (daos_handle_is_valid(oh)) + daos_obj_close(oh, NULL); + oh = DAOS_HDL_INVAL; + + rc = daos_tx_commit(th, NULL); + if (rc) { + if (rc != -DER_TX_RESTART) + D_ERROR("daos_tx_commit() failed (%d)\n", rc); + D_GOTO(out_close, rc = daos_der2errno(rc)); + } + + daos_tx_close(th, NULL); + th = DAOS_TX_NONE; + + /* Populate new_obj fields after successful commit */ + if (new_obj) { + strncpy(new_obj->name, name, len + 1); + oid_cp(&new_obj->parent_oid, parent->oid); + oid_cp(&new_obj->oid, entry.oid); + new_obj->mode = entry.mode; + new_obj->dfs = dfs; + new_obj->flags = obj->flags; + *_new_obj = new_obj; + } + + /* Populate stbuf if requested */ + if (stbuf) { + memset(stbuf, 0, sizeof(struct stat)); + stbuf->st_nlink = entry.ref_cnt; + stbuf->st_mode = entry.mode & ~MODE_HARDLINK_BIT; + stbuf->st_uid = entry.uid; + stbuf->st_gid = entry.gid; + stbuf->st_mtim.tv_sec = entry.mtime; + stbuf->st_mtim.tv_nsec = entry.mtime_nano; + stbuf->st_ctim.tv_sec = entry.ctime; + stbuf->st_ctim.tv_nsec = entry.ctime_nano; + stbuf->st_atim = stbuf->st_mtim; + stbuf->st_blksize = entry.chunk_size ? entry.chunk_size : dfs->attr.da_chunk_size; + } + + DFS_OP_STAT_INCR(dfs, DOS_LINK); + return 0; + +out_close: + if (new_obj) { + daos_array_close(new_obj->oh, NULL); + D_FREE(new_obj); + } + if (daos_handle_is_valid(oh)) + daos_obj_close(oh, NULL); +out: + if (daos_handle_is_valid(th)) { + if (rc == ERESTART) { + rc2 = daos_tx_restart(th, NULL); + if (rc2 == 0) + goto restart; + rc = daos_der2errno(rc2); + } + daos_tx_abort(th, NULL); + daos_tx_close(th, NULL); + } + return rc; +} + int dfs_sync(dfs_t *dfs) { diff --git a/src/client/dfs/rename.c b/src/client/dfs/rename.c index cb6991e0e0b..3a7bfe25c62 100644 --- a/src/client/dfs/rename.c +++ b/src/client/dfs/rename.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -111,11 +112,14 @@ xattr_copy(daos_handle_t src_oh, const char *src_name, daos_handle_t dst_oh, con return rc; } -/* Returns oids for both moved and clobbered files, but does not check either of them */ +/* Returns oids for both moved and clobbered files, but does not check either of them. + * deleted indicates if the clobbered file was actually deleted (last link or regular file), + * or just a hardlink was removed (file still has other links). + */ int dfs_move_internal(dfs_t *dfs, unsigned int flags, dfs_obj_t *parent, const char *name, dfs_obj_t *new_parent, const char *new_name, daos_obj_id_t *moid, - daos_obj_id_t *oid) + daos_obj_id_t *oid, bool *deleted) { struct dfs_entry entry = {0}, new_entry = {0}; daos_handle_t th = DAOS_TX_NONE; @@ -125,6 +129,9 @@ dfs_move_internal(dfs_t *dfs, unsigned int flags, dfs_obj_t *parent, const char size_t new_len; int rc; + if (deleted) + *deleted = true; /* Default to true for non-hardlink files */ + if (dfs == NULL || !dfs->mounted) return EINVAL; if (dfs->amode != O_RDWR) @@ -227,7 +234,7 @@ dfs_move_internal(dfs_t *dfs, unsigned int flags, dfs_obj_t *parent, const char D_GOTO(out, rc = ENOTEMPTY); } - rc = remove_entry(dfs, th, new_parent->oh, new_name, new_len, new_entry); + rc = remove_entry(dfs, th, new_parent->oh, new_name, new_len, new_entry, deleted); if (rc) { D_ERROR("Failed to remove entry %s (%d)\n", new_name, rc); D_GOTO(out, rc); @@ -239,7 +246,7 @@ dfs_move_internal(dfs_t *dfs, unsigned int flags, dfs_obj_t *parent, const char /** rename symlink */ if (S_ISLNK(entry.mode)) { - rc = remove_entry(dfs, th, parent->oh, name, len, entry); + rc = remove_entry(dfs, th, parent->oh, name, len, entry, NULL); if (rc) { D_ERROR("Failed to remove entry %s (%d)\n", name, rc); D_GOTO(out, rc); @@ -268,13 +275,15 @@ dfs_move_internal(dfs_t *dfs, unsigned int flags, dfs_obj_t *parent, const char D_GOTO(out, rc); } - /** cp the extended attributes if they exist */ - rc = xattr_copy(parent->oh, name, new_parent->oh, new_name, th); - if (rc == ERESTART) { - D_GOTO(out, rc); - } else if (rc) { - D_ERROR("Failed to copy extended attributes (%d)\n", rc); - D_GOTO(out, rc); + /** cp the extended attributes if they exist (skip for hardlinks - xattrs are in HLM) */ + if (!dfs_is_hardlink(entry.mode)) { + rc = xattr_copy(parent->oh, name, new_parent->oh, new_name, th); + if (rc == ERESTART) { + D_GOTO(out, rc); + } else if (rc) { + D_ERROR("Failed to copy extended attributes (%d)\n", rc); + D_GOTO(out, rc); + } } /** remove the old entry from the old parent (just the dkey) */ @@ -318,7 +327,7 @@ int dfs_move(dfs_t *dfs, dfs_obj_t *parent, const char *name, dfs_obj_t *new_parent, const char *new_name, daos_obj_id_t *oid) { - return dfs_move_internal(dfs, 0, parent, name, new_parent, new_name, NULL, oid); + return dfs_move_internal(dfs, 0, parent, name, new_parent, new_name, NULL, oid, NULL); } int diff --git a/src/client/dfs/xattr.c b/src/client/dfs/xattr.c index b3a13a31a8d..88703f524ae 100644 --- a/src/client/dfs/xattr.c +++ b/src/client/dfs/xattr.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -18,17 +19,20 @@ int dfs_setxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, const void *value, daos_size_t size, int flags) { - char *xname = NULL; - daos_handle_t th = DAOS_TX_NONE; - d_sg_list_t sgls[2]; - d_iov_t sg_iovs[3]; - daos_iod_t iods[2]; - daos_recx_t recxs[2]; - daos_key_t dkey; - daos_handle_t oh; - uint64_t cond = 0; - struct timespec now; - int rc; + char *xname = NULL; + daos_handle_t th = DAOS_TX_NONE; + d_sg_list_t sgls[2]; + d_iov_t sg_iovs[3]; + daos_iod_t iods[2]; + daos_recx_t recxs[2]; + daos_key_t dkey; + daos_handle_t oh = DAOS_HDL_INVAL; + uint64_t cond = 0; + uint64_t saved_cond; + struct timespec now; + struct dfs_entry entry = {0}; + bool exists; + int rc; if (dfs == NULL || !dfs->mounted) return EINVAL; @@ -48,14 +52,6 @@ dfs_setxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, const void *value, da if (xname == NULL) return ENOMEM; - /** Open parent object and insert xattr in the entry of the object */ - rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); - if (rc) - D_GOTO(free, rc = daos_der2errno(rc)); - - /** set dkey as the entry name */ - d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); - /** add xattr iod & sgl */ d_iov_set(&iods[0].iod_name, xname, strlen(xname)); iods[0].iod_nr = 1; @@ -80,14 +76,9 @@ dfs_setxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, const void *value, da recxs[0].rx_nr = sizeof(uint64_t); recxs[1].rx_idx = CTIME_NSEC_IDX; recxs[1].rx_nr = sizeof(uint64_t); - rc = clock_gettime(CLOCK_REALTIME, &now); - if (rc) - D_GOTO(out, rc = errno); sgls[1].sg_nr = 2; sgls[1].sg_nr_out = 0; sgls[1].sg_iovs = &sg_iovs[1]; - d_iov_set(&sg_iovs[1], &now.tv_sec, sizeof(uint64_t)); - d_iov_set(&sg_iovs[2], &now.tv_nsec, sizeof(uint64_t)); /** if not default flag, check for xattr existence */ if (flags != 0) { @@ -98,34 +89,118 @@ dfs_setxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, const void *value, da } cond |= DAOS_COND_DKEY_UPDATE; + d_iov_set(&sg_iovs[1], &now.tv_sec, sizeof(uint64_t)); + d_iov_set(&sg_iovs[2], &now.tv_nsec, sizeof(uint64_t)); + + saved_cond = cond; + + if (dfs->use_dtx) { + rc = daos_tx_open(dfs->coh, &th, 0, NULL); + if (rc) { + D_ERROR("daos_tx_open() failed (%d)\n", rc); + D_GOTO(out, rc = daos_der2errno(rc)); + } + } + +retry: + /** Part 1: Set up dkey and object handle based on hardlink status */ + cond = saved_cond; + if (dfs_is_hardlink(obj->mode)) { + /* For hardlinks, xattrs are stored in HLM with OID as dkey */ + d_iov_set(&dkey, &obj->oid, sizeof(daos_obj_id_t)); + oh = dfs->hlm_oh; + } else { + /** Open parent object and insert xattr in the entry of the object */ + rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); + if (rc) + D_GOTO(out, rc = daos_der2errno(rc)); + + /** + * Check if entry is a hardlink (another DFS instance may have + * converted it). If so, update obj->mode and retry from HLM. + */ + if (S_ISREG(obj->mode)) { + rc = fetch_entry(dfs->layout_v, oh, th, obj->name, strlen(obj->name), false, + &exists, &entry, 0, NULL, NULL, NULL); + if (rc) { + D_ERROR("Failed to fetch entry '%s' (%d)\n", obj->name, rc); + D_GOTO(out_obj, rc); + } + if (!exists) + D_GOTO(out_obj, rc = ENOENT); + if (dfs_is_hardlink(entry.mode)) { + obj->mode = entry.mode; + daos_obj_close(oh, NULL); + oh = DAOS_HDL_INVAL; + goto retry; + } + } + + /** set dkey as the entry name */ + d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); + } + + /** Part 2: Perform the update operation */ + rc = clock_gettime(CLOCK_REALTIME, &now); + if (rc) + D_GOTO(out_obj, rc = errno); + /** update ctime in a separate update if DAOS_COND_AKEY_INSERT is used for the xattr */ if (cond & DAOS_COND_AKEY_INSERT) { /** insert the xattr */ rc = daos_obj_update(oh, th, cond, &dkey, 1, &iods[0], &sgls[0], NULL); if (rc) { D_ERROR("Failed to insert extended attribute %s\n", name); - D_GOTO(out, rc = daos_der2errno(rc)); + D_GOTO(out_obj, rc = daos_der2errno(rc)); } /** update the ctime */ rc = daos_obj_update(oh, th, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iods[1], &sgls[1], NULL); if (rc) { D_ERROR("Failed to update ctime %s\n", name); - D_GOTO(out, rc = daos_der2errno(rc)); + D_GOTO(out_obj, rc = daos_der2errno(rc)); } } else { /** replace the xattr and update the ctime */ rc = daos_obj_update(oh, th, cond, &dkey, 2, iods, sgls, NULL); if (rc) { D_ERROR("Failed to insert extended attribute %s\n", name); - D_GOTO(out, rc = daos_der2errno(rc)); + D_GOTO(out_obj, rc = daos_der2errno(rc)); + } + } + + if (dfs->use_dtx) { + rc = daos_tx_commit(th, NULL); + if (rc) { + if (rc != -DER_TX_RESTART) + D_ERROR("daos_tx_commit() failed (%d)\n", rc); + if (rc == -DER_TX_RESTART) { + rc = daos_tx_restart(th, NULL); + if (rc) { + D_ERROR("daos_tx_restart() failed (%d)\n", rc); + D_GOTO(out_obj, rc = daos_der2errno(rc)); + } + if (!dfs_is_hardlink(obj->mode)) { + daos_obj_close(oh, NULL); + oh = DAOS_HDL_INVAL; + } + goto retry; + } + D_GOTO(out_obj, rc = daos_der2errno(rc)); } } DFS_OP_STAT_INCR(dfs, DOS_SETXATTR); +out_obj: + if (!dfs_is_hardlink(obj->mode) && daos_handle_is_valid(oh)) + daos_obj_close(oh, NULL); out: - daos_obj_close(oh, NULL); -free: + if (dfs->use_dtx) { + if (rc == 0) + daos_tx_close(th, NULL); + else + daos_tx_abort(th, NULL); + } D_FREE(xname); return rc; } @@ -133,13 +208,15 @@ dfs_setxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, const void *value, da int dfs_getxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, void *value, daos_size_t *size) { - char *xname = NULL; - d_sg_list_t sgl; - d_iov_t sg_iov; - daos_iod_t iod; - daos_key_t dkey; - daos_handle_t oh; - int rc; + char *xname = NULL; + d_sg_list_t sgl; + d_iov_t sg_iov; + daos_iod_t iod; + daos_key_t dkey; + daos_handle_t oh = DAOS_HDL_INVAL; + struct dfs_entry entry = {0}; + bool exists; + int rc; if (dfs == NULL || !dfs->mounted) return EINVAL; @@ -154,14 +231,6 @@ dfs_getxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, void *value, daos_siz if (xname == NULL) return ENOMEM; - /** Open parent object and get xattr from the entry of the object */ - rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RO, &oh, NULL); - if (rc) - D_GOTO(out, rc = daos_der2errno(rc)); - - /** set dkey as the entry name */ - d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); - /** set akey as the xattr name */ d_iov_set(&iod.iod_name, xname, strlen(xname)); iod.iod_nr = 1; @@ -179,26 +248,75 @@ dfs_getxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, void *value, daos_siz sgl.sg_nr = 1; sgl.sg_nr_out = 0; sgl.sg_iovs = &sg_iov; - - rc = daos_obj_fetch(oh, dfs->th, DAOS_COND_AKEY_FETCH, &dkey, 1, &iod, &sgl, NULL, - NULL); } else { iod.iod_size = DAOS_REC_ANY; - - rc = daos_obj_fetch(oh, dfs->th, DAOS_COND_AKEY_FETCH, &dkey, 1, &iod, NULL, NULL, - NULL); } - if (rc) { - DL_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, rc, "Failed to fetch xattr '%s'", - name); - D_GOTO(close, rc = daos_der2errno(rc)); + +retry: + if (dfs_is_hardlink(obj->mode)) { + /* For hardlinks, xattrs are stored in HLM with OID as dkey */ + d_iov_set(&dkey, &obj->oid, sizeof(daos_obj_id_t)); + + if (*size) + rc = daos_obj_fetch(dfs->hlm_oh, dfs->th, DAOS_COND_AKEY_FETCH, &dkey, 1, + &iod, &sgl, NULL, NULL); + else + rc = daos_obj_fetch(dfs->hlm_oh, dfs->th, DAOS_COND_AKEY_FETCH, &dkey, 1, + &iod, NULL, NULL, NULL); + if (rc) { + DL_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, rc, + "Failed to fetch xattr '%s' from HLM", name); + D_GOTO(out, rc = daos_der2errno(rc)); + } + } else { + /** Open parent object and get xattr from the entry of the object */ + rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RO, &oh, NULL); + if (rc) + D_GOTO(out, rc = daos_der2errno(rc)); + + /** set dkey as the entry name */ + d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); + + if (*size) + rc = daos_obj_fetch(oh, dfs->th, DAOS_COND_AKEY_FETCH, &dkey, 1, &iod, &sgl, + NULL, NULL); + else + rc = daos_obj_fetch(oh, dfs->th, DAOS_COND_AKEY_FETCH, &dkey, 1, &iod, NULL, + NULL, NULL); + if (rc == -DER_NONEXIST && S_ISREG(obj->mode)) { + /* + * xattr not found - check if the entry became a hardlink + * (another DFS instance may have converted it and moved xattrs to HLM) + */ + rc = fetch_entry(dfs->layout_v, oh, dfs->th, obj->name, strlen(obj->name), + false, &exists, &entry, 0, NULL, NULL, NULL); + daos_obj_close(oh, NULL); + oh = DAOS_HDL_INVAL; + if (rc) { + D_ERROR("Failed to fetch entry '%s' (%d)\n", obj->name, rc); + D_GOTO(out, rc); + } + if (!exists) + D_GOTO(out, rc = ENODATA); + if (dfs_is_hardlink(entry.mode)) { + /* Entry became a hardlink, update obj->mode and retry from HLM */ + obj->mode = entry.mode; + goto retry; + } + D_GOTO(out, rc = ENODATA); + } + if (rc) { + DL_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, rc, + "Failed to fetch xattr '%s'", name); + daos_obj_close(oh, NULL); + D_GOTO(out, rc = daos_der2errno(rc)); + } + daos_obj_close(oh, NULL); } *size = iod.iod_size; DFS_OP_STAT_INCR(dfs, DOS_GETXATTR); -close: - daos_obj_close(oh, NULL); out: D_FREE(xname); if (rc == ENOENT) @@ -209,17 +327,19 @@ dfs_getxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, void *value, daos_siz int dfs_removexattr(dfs_t *dfs, dfs_obj_t *obj, const char *name) { - char *xname = NULL; - daos_handle_t th = DAOS_TX_NONE; - daos_key_t dkey, akey; - daos_handle_t oh; - uint64_t cond = 0; - d_sg_list_t sgl; - d_iov_t sg_iovs[2]; - daos_iod_t iod; - daos_recx_t recxs[2]; - struct timespec now; - int rc; + char *xname = NULL; + daos_handle_t th = DAOS_TX_NONE; + daos_key_t dkey, akey; + daos_handle_t oh = DAOS_HDL_INVAL; + uint64_t cond = 0; + d_sg_list_t sgl; + d_iov_t sg_iovs[2]; + daos_iod_t iod; + daos_recx_t recxs[2]; + struct timespec now; + struct dfs_entry entry = {0}; + bool exists; + int rc; if (dfs == NULL || !dfs->mounted) return EINVAL; @@ -236,25 +356,10 @@ dfs_removexattr(dfs_t *dfs, dfs_obj_t *obj, const char *name) if (xname == NULL) return ENOMEM; - /** Open parent object and remove xattr from the entry of the object */ - rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); - if (rc) - D_GOTO(free, rc = daos_der2errno(rc)); - - /** set dkey as the entry name */ - d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); /** set akey as the xattr name */ d_iov_set(&akey, xname, strlen(xname)); - cond = DAOS_COND_DKEY_UPDATE | DAOS_COND_PUNCH; - rc = daos_obj_punch_akeys(oh, th, cond, &dkey, 1, &akey, NULL); - if (rc) { - D_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, - "Failed to punch extended attribute '%s'\n", name); - D_GOTO(out, rc = daos_der2errno(rc)); - } - - /** update ctime */ + /** setup iod/sgl for ctime update */ d_iov_set(&iod.iod_name, INODE_AKEY_NAME, sizeof(INODE_AKEY_NAME) - 1); iod.iod_recxs = recxs; iod.iod_type = DAOS_IOD_ARRAY; @@ -264,25 +369,128 @@ dfs_removexattr(dfs_t *dfs, dfs_obj_t *obj, const char *name) recxs[0].rx_nr = sizeof(uint64_t); recxs[1].rx_idx = CTIME_NSEC_IDX; recxs[1].rx_nr = sizeof(uint64_t); - rc = clock_gettime(CLOCK_REALTIME, &now); - if (rc) - D_GOTO(out, rc = errno); - sgl.sg_nr = 2; - sgl.sg_nr_out = 0; - sgl.sg_iovs = &sg_iovs[0]; - d_iov_set(&sg_iovs[0], &now.tv_sec, sizeof(uint64_t)); - d_iov_set(&sg_iovs[1], &now.tv_nsec, sizeof(uint64_t)); - - rc = daos_obj_update(oh, th, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, NULL); - if (rc) { - D_ERROR("Failed to update mode, " DF_RC "\n", DP_RC(rc)); - D_GOTO(out, rc = daos_der2errno(rc)); + sgl.sg_nr = 2; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &sg_iovs[0]; + + cond = DAOS_COND_DKEY_UPDATE | DAOS_COND_PUNCH; + + if (dfs->use_dtx) { + rc = daos_tx_open(dfs->coh, &th, 0, NULL); + if (rc) { + D_ERROR("daos_tx_open() failed (%d)\n", rc); + D_GOTO(out, rc = daos_der2errno(rc)); + } + } + +retry: + if (dfs_is_hardlink(obj->mode)) { + /* For hardlinks, xattrs are stored in HLM with OID as dkey */ + d_iov_set(&dkey, &obj->oid, sizeof(daos_obj_id_t)); + + rc = daos_obj_punch_akeys(dfs->hlm_oh, th, cond, &dkey, 1, &akey, NULL); + if (rc) { + D_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, + "Failed to punch extended attribute '%s' from HLM\n", name); + D_GOTO(out_tx, rc = daos_der2errno(rc)); + } + + /** update ctime in HLM */ + rc = clock_gettime(CLOCK_REALTIME, &now); + if (rc) + D_GOTO(out_tx, rc = errno); + d_iov_set(&sg_iovs[0], &now.tv_sec, sizeof(uint64_t)); + d_iov_set(&sg_iovs[1], &now.tv_nsec, sizeof(uint64_t)); + + rc = daos_obj_update(dfs->hlm_oh, th, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, + NULL); + if (rc) { + D_ERROR("Failed to update ctime in HLM, " DF_RC "\n", DP_RC(rc)); + D_GOTO(out_tx, rc = daos_der2errno(rc)); + } + } else { + /** Open parent object and remove xattr from the entry of the object */ + rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); + if (rc) + D_GOTO(out_tx, rc = daos_der2errno(rc)); + + /** set dkey as the entry name */ + d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); + + rc = daos_obj_punch_akeys(oh, th, cond, &dkey, 1, &akey, NULL); + if (rc == -DER_NONEXIST && S_ISREG(obj->mode)) { + /* + * xattr not found - check if the entry became a hardlink + * (another DFS instance may have converted it and moved xattrs to HLM) + */ + rc = fetch_entry(dfs->layout_v, oh, th, obj->name, strlen(obj->name), false, + &exists, &entry, 0, NULL, NULL, NULL); + daos_obj_close(oh, NULL); + oh = DAOS_HDL_INVAL; + if (rc) { + D_ERROR("Failed to fetch entry '%s' (%d)\n", obj->name, rc); + D_GOTO(out_tx, rc); + } + if (!exists) + D_GOTO(out_tx, rc = ENODATA); + if (dfs_is_hardlink(entry.mode)) { + /* Entry became a hardlink, update obj->mode and retry from HLM */ + obj->mode = entry.mode; + goto retry; + } + D_GOTO(out_tx, rc = ENODATA); + } + if (rc) { + D_CDEBUG(rc == -DER_NONEXIST, DLOG_DBG, DLOG_ERR, + "Failed to punch extended attribute '%s'\n", name); + daos_obj_close(oh, NULL); + D_GOTO(out_tx, rc = daos_der2errno(rc)); + } + + /** update ctime */ + rc = clock_gettime(CLOCK_REALTIME, &now); + if (rc) { + daos_obj_close(oh, NULL); + D_GOTO(out_tx, rc = errno); + } + d_iov_set(&sg_iovs[0], &now.tv_sec, sizeof(uint64_t)); + d_iov_set(&sg_iovs[1], &now.tv_nsec, sizeof(uint64_t)); + + rc = daos_obj_update(oh, th, DAOS_COND_DKEY_UPDATE, &dkey, 1, &iod, &sgl, NULL); + if (rc) { + D_ERROR("Failed to update ctime, " DF_RC "\n", DP_RC(rc)); + daos_obj_close(oh, NULL); + D_GOTO(out_tx, rc = daos_der2errno(rc)); + } + daos_obj_close(oh, NULL); + } + + if (dfs->use_dtx) { + rc = daos_tx_commit(th, NULL); + if (rc) { + if (rc != -DER_TX_RESTART) + D_ERROR("daos_tx_commit() failed (%d)\n", rc); + if (rc == -DER_TX_RESTART) { + rc = daos_tx_restart(th, NULL); + if (rc) { + D_ERROR("daos_tx_restart() failed (%d)\n", rc); + D_GOTO(out_tx, rc = daos_der2errno(rc)); + } + goto retry; + } + D_GOTO(out_tx, rc = daos_der2errno(rc)); + } } DFS_OP_STAT_INCR(dfs, DOS_RMXATTR); +out_tx: + if (dfs->use_dtx) { + if (rc == 0) + daos_tx_close(th, NULL); + else + daos_tx_abort(th, NULL); + } out: - daos_obj_close(oh, NULL); -free: D_FREE(xname); return rc; } @@ -290,31 +498,46 @@ dfs_removexattr(dfs_t *dfs, dfs_obj_t *obj, const char *name) int dfs_listxattr(dfs_t *dfs, dfs_obj_t *obj, char *list, daos_size_t *size) { - daos_key_t dkey; - daos_handle_t oh; - daos_key_desc_t kds[ENUM_DESC_NR]; - daos_anchor_t anchor = {0}; - daos_size_t list_size, ret_size; - char *ptr_list; - int rc; + daos_key_t dkey; + daos_handle_t oh = DAOS_HDL_INVAL; + daos_handle_t list_oh; + daos_key_desc_t kds[ENUM_DESC_NR]; + daos_anchor_t anchor = {0}; + daos_size_t list_size, ret_size; + char *ptr_list; + struct dfs_entry entry = {0}; + bool exists; + bool is_hardlink; + int rc = 0; if (dfs == NULL || !dfs->mounted) return EINVAL; if (obj == NULL) return EINVAL; - /** Open parent object and list from entry */ - rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); - if (rc) - return daos_der2errno(rc); - - /** set dkey as the entry name */ - d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); - list_size = *size; ret_size = 0; ptr_list = list; +retry: + is_hardlink = dfs_is_hardlink(obj->mode); + memset(&anchor, 0, sizeof(anchor)); + + if (is_hardlink) { + /* For hardlinks, xattrs are stored in HLM with OID as dkey */ + d_iov_set(&dkey, &obj->oid, sizeof(daos_obj_id_t)); + list_oh = dfs->hlm_oh; + } else { + /** Open parent object and list from entry */ + rc = daos_obj_open(dfs->coh, obj->parent_oid, DAOS_OO_RW, &oh, NULL); + if (rc) + return daos_der2errno(rc); + + /** set dkey as the entry name */ + d_iov_set(&dkey, (void *)obj->name, strlen(obj->name)); + list_oh = oh; + } + while (!daos_anchor_is_eof(&anchor)) { uint32_t number = ENUM_DESC_NR; uint32_t i; @@ -328,9 +551,12 @@ dfs_listxattr(dfs_t *dfs, dfs_obj_t *obj, char *list, daos_size_t *size) d_iov_set(&iov, enum_buf, ENUM_DESC_BUF); sgl.sg_iovs = &iov; - rc = daos_obj_list_akey(oh, dfs->th, &dkey, &number, kds, &sgl, &anchor, NULL); - if (rc) + rc = daos_obj_list_akey(list_oh, dfs->th, &dkey, &number, kds, &sgl, &anchor, NULL); + if (rc) { + if (!is_hardlink) + daos_obj_close(oh, NULL); D_GOTO(out, rc = daos_der2errno(rc)); + } if (number == 0) continue; @@ -356,9 +582,32 @@ dfs_listxattr(dfs_t *dfs, dfs_obj_t *obj, char *list, daos_size_t *size) } } + /* + * If no xattrs found, not already a hardlink, and object is a regular file, + * check if entry became a hardlink (another DFS instance may have converted + * it and moved xattrs to HLM) + */ + if (ret_size == 0 && !is_hardlink && S_ISREG(obj->mode)) { + rc = fetch_entry(dfs->layout_v, oh, dfs->th, obj->name, strlen(obj->name), false, + &exists, &entry, 0, NULL, NULL, NULL); + daos_obj_close(oh, NULL); + oh = DAOS_HDL_INVAL; + if (rc) { + D_ERROR("Failed to fetch entry '%s' (%d)\n", obj->name, rc); + D_GOTO(out, rc); + } + if (exists && dfs_is_hardlink(entry.mode)) { + /* Entry became a hardlink, update obj->mode and retry from HLM */ + obj->mode = entry.mode; + goto retry; + } + /* Entry doesn't exist or not a hardlink - return empty list */ + } else if (!is_hardlink) { + daos_obj_close(oh, NULL); + } + *size = ret_size; DFS_OP_STAT_INCR(dfs, DOS_LSXATTR); out: - daos_obj_close(oh, NULL); return rc; } diff --git a/src/client/dfuse/SConscript b/src/client/dfuse/SConscript index c699dd2acbb..347e80f4183 100644 --- a/src/client/dfuse/SConscript +++ b/src/client/dfuse/SConscript @@ -15,6 +15,7 @@ OPS_SRC = ['create', 'fgetattr', 'forget', 'getxattr', + 'link', 'listxattr', 'ioctl', 'lookup', diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index 4095aea3c55..68dc5eae2fd 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -386,6 +386,8 @@ struct dfuse_inode_ops { const char *newname, unsigned int flags); void (*symlink)(fuse_req_t req, const char *link, struct dfuse_inode_entry *parent, const char *name); + void (*hardlink)(fuse_req_t req, struct dfuse_inode_entry *inode, + struct dfuse_inode_entry *parent, const char *name); void (*unlink)(fuse_req_t req, struct dfuse_inode_entry *parent, const char *name); void (*setxattr)(fuse_req_t req, struct dfuse_inode_entry *inode, @@ -472,6 +474,7 @@ struct dfuse_pool { ACTION(UNLINK) \ ACTION(READDIR) \ ACTION(SYMLINK) \ + ACTION(LINK) \ ACTION(READLINK) \ ACTION(OPENDIR) \ ACTION(SETXATTR) \ @@ -927,6 +930,26 @@ dfuse_loop(struct dfuse_info *dfuse_info); #define DFUSE_REPLY_IOCTL(desc, req, arg) DFUSE_REPLY_IOCTL_SIZE(desc, req, &(arg), sizeof(arg)) +/** + * Directory entry for tracking hardlink names. + * + * For hardlinks, a single inode can have multiple names in different + * directories. This structure tracks each (parent, name) pair. + */ +struct dfuse_dentry { + /** + * Link in the inode's ie_dentries list, or list head when used + * to hold released dentries from dfuse_replace_dentries(). + */ + d_list_t dd_list; + + /** The parent inode number */ + fuse_ino_t dd_parent; + + /** The name of this entry */ + char dd_name[NAME_MAX + 1]; +}; + /** * Inode handle. * @@ -963,6 +986,12 @@ struct dfuse_inode_entry { */ fuse_ino_t ie_parent; + /** List of all directory entries (dentries) for this inode. + * For hardlinks, an inode can have multiple names in different + * directories. Each dentry tracks a (parent, name) pair. + */ + d_list_t ie_dentries; + struct dfuse_cont *ie_dfs; /** Hash table of inodes @@ -1010,6 +1039,12 @@ struct dfuse_inode_entry { * acquired and released to flush outstanding writes for getattr, close and forget. */ pthread_rwlock_t ie_wlock; + + /* Lock protecting ie_parent, ie_name, and ie_dentries from concurrent access. + * This is needed because ival_loop runs in a separate thread and accesses these fields. + */ + pthread_spinlock_t ie_dentry_lock; + /** Last file closed in this directory was read linearly. Directories only. * * Set on close() of a file in the directory to the value of linear_read from the fh. @@ -1106,7 +1141,102 @@ dfuse_inode_decref(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie) d_hash_rec_decref(&dfuse_info->dpi_iet, &ie->ie_htl); } -/* Drop a reference on an inode. */ +/** + * Add a dentry for an inode if it does not already exist. + * + * If the primary dentry (ie_parent, ie_name) is not set, this becomes + * the primary dentry. Otherwise, if the dentry doesn't already exist + * in ie_dentries list, it is added. + * + * \param[in] ie The inode entry + * \param[in] parent The parent inode number + * \param[in] name The name of the entry + * + * \return 0 on success, -DER_NOMEM on allocation failure + */ +int +dfuse_ie_dentry_add(struct dfuse_inode_entry *ie, fuse_ino_t parent, const char *name); + +/** + * Remove a dentry from an inode. + * + * If the matching dentry is the primary, promote the first entry from + * ie_dentries to be the new primary. If ie_dentries is empty, clear + * the primary (ie_name[0] = '\0'). + * + * \param[in] ie The inode entry + * \param[in] parent The parent inode number + * \param[in] name The name of the entry + */ +void +dfuse_ie_dentry_remove(struct dfuse_inode_entry *ie, fuse_ino_t parent, const char *name); + +/** + * Replace a dentry or release all dentries and set a new primary. + * + * If a dentry matching old_parent/old_name exists, replace it with + * new_parent/new_name. If not found, copy the current primary to + * released->dd_parent/dd_name, move ie_dentries to released->dd_list, + * and set new_parent/new_name as the new primary. + * + * \param[in] ie The inode entry + * \param[in] old_parent The old parent inode number to find + * \param[in] old_name The old name to find + * \param[in] new_parent The new parent inode number + * \param[in] new_name The new name + * \param[out] released Structure to receive released dentries if old not found + */ +void +dfuse_ie_dentry_replace(struct dfuse_inode_entry *ie, fuse_ino_t old_parent, const char *old_name, + fuse_ino_t new_parent, const char *new_name, struct dfuse_dentry *released); + +/** + * Copy all dentries from an inode to a released structure. + * + * Copies the primary dentry to released->dd_parent/dd_name, + * moves ie_dentries to released->dd_list. The primary dentry + * in the inode is not cleared for now. + * + * \param[in] ie The inode entry + * \param[out] released Structure to receive all dentries + */ +void +dfuse_ie_dentry_clear(struct dfuse_inode_entry *ie, struct dfuse_dentry *released); + +/** + * Invalidate and free all dentries in a released structure. + * + * Calls fuse_lowlevel_notify_inval_entry() for the primary dentry (if set) + * and each secondary dentry in dd_list, then frees the secondary entries. + * The released struct itself is not freed. No spinlock is acquired. + * + * \param[in] dfuse_info The dfuse info structure (for session) + * \param[in] released Structure containing dentries to invalidate + * + * \return 0 on success, -EBADF if fuse session is dead + */ +int +dfuse_ie_dentry_inval(struct dfuse_info *dfuse_info, struct dfuse_dentry *released); + +/** + * Delete and free all dentries in a released structure. + * + * Calls fuse_lowlevel_notify_inval_inode() for the inode, then + * fuse_lowlevel_notify_delete() for the primary dentry (if set) + * and each secondary dentry in dd_list, then frees the secondary entries. + * The released struct itself is not freed. No spinlock is acquired. + * Skip the dentry matching exclude_parent/exclude_name (kernel already knows about it). + * + * \param[in] dfuse_info The dfuse info structure (for session) + * \param[in] ie The inode entry (for inode number) + * \param[in] released Structure containing dentries to delete + * \param[in] exclude_parent Parent inode of dentry to skip + * \param[in] exclude_name Name of dentry to skip + */ +void + dfuse_ie_inode_delete(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, + struct dfuse_dentry *released, fuse_ino_t exclude_parent, + const char *exclude_name); extern char *duns_xattr_name; @@ -1292,6 +1422,9 @@ void dfuse_cb_symlink(fuse_req_t, const char *, struct dfuse_inode_entry *, const char *); +void +dfuse_cb_link(fuse_req_t, struct dfuse_inode_entry *, struct dfuse_inode_entry *, const char *); + void dfuse_cb_setxattr(fuse_req_t, struct dfuse_inode_entry *, const char *, const char *, size_t, int); @@ -1333,10 +1466,20 @@ dfuse_reply_entry(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *inode int _dfuse_mode_update(fuse_req_t req, struct dfuse_inode_entry *parent, mode_t *_mode); -/* Mark object as removed and invalidate any kernel data for it */ +/* Mark object as removed and invalidate any kernel data for it. + * This function should only be called when the file is actually deleted. + * For hardlink removal where file still exists, use dfuse_hardlink_removed() instead. + */ +void +dfuse_oid_removed(struct dfuse_info *dfuse_info, fuse_req_t req, daos_obj_id_t *oid, + struct dfuse_inode_entry *parent, const char *name); + +/* Handle a hardlink being removed but the file still exists (other links remain). + * Removes the dentry from the inode's tracking and replies to fuse. + */ void -dfuse_oid_unlinked(struct dfuse_info *dfuse_info, fuse_req_t req, daos_obj_id_t *oid, - struct dfuse_inode_entry *parent, const char *name); +dfuse_hardlink_removed(struct dfuse_info *dfuse_info, fuse_req_t req, daos_obj_id_t *oid, + struct dfuse_inode_entry *parent, const char *name); /* dfuse_cont.c */ void diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index d2081432eae..4d2d13fbe58 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC. * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -1284,7 +1284,9 @@ dfuse_ie_init(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie) atomic_init(&ie->ie_linear_read, true); atomic_fetch_add_relaxed(&dfuse_info->di_inode_count, 1); D_INIT_LIST_HEAD(&ie->ie_evict_entry); + D_INIT_LIST_HEAD(&ie->ie_dentries); D_RWLOCK_INIT(&ie->ie_wlock, 0); + D_SPIN_INIT(&ie->ie_dentry_lock, PTHREAD_PROCESS_PRIVATE); } void @@ -1324,6 +1326,16 @@ dfuse_ie_close(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie) d_hash_rec_decref(dfp->dfp_cont_table, &dfc->dfs_entry); } + /* Free any remaining secondary dentries */ + while (!d_list_empty(&ie->ie_dentries)) { + struct dfuse_dentry *dd; + + dd = d_list_entry(ie->ie_dentries.next, struct dfuse_dentry, dd_list); + d_list_del(&dd->dd_list); + D_FREE(dd); + } + + D_SPIN_DESTROY(&ie->ie_dentry_lock); dfuse_ie_free(dfuse_info, ie); } @@ -1790,3 +1802,268 @@ dfuse_fs_fini(struct dfuse_info *dfuse_info) return rc; } + +/** + * Add a dentry for an inode if it does not already exist. + */ +int +dfuse_ie_dentry_add(struct dfuse_inode_entry *ie, fuse_ino_t parent, const char *name) +{ + struct dfuse_dentry *dd; + struct dfuse_dentry *new_dd = NULL; + + /* Preallocate dentry outside spinlock */ + D_ALLOC_PTR(new_dd); + if (new_dd == NULL) + return -DER_NOMEM; + + D_SPIN_LOCK(&ie->ie_dentry_lock); + + /* Check if primary dentry is not set */ + if (ie->ie_name[0] == '\0') { + /* Set as primary dentry */ + ie->ie_parent = parent; + strncpy(ie->ie_name, name, NAME_MAX); + ie->ie_name[NAME_MAX] = '\0'; + goto out; + } + + /* Check if this matches the primary dentry */ + if (ie->ie_parent == parent && strncmp(ie->ie_name, name, NAME_MAX) == 0) + goto out; + + /* Check if already in the dentry list */ + d_list_for_each_entry(dd, &ie->ie_dentries, dd_list) { + if (dd->dd_parent == parent && strncmp(dd->dd_name, name, NAME_MAX) == 0) + goto out; + } + + /* Not found, add the preallocated dentry */ + new_dd->dd_parent = parent; + strncpy(new_dd->dd_name, name, NAME_MAX); + new_dd->dd_name[NAME_MAX] = '\0'; + d_list_add_tail(&new_dd->dd_list, &ie->ie_dentries); + new_dd = NULL; /* Don't free it */ + +out: + D_SPIN_UNLOCK(&ie->ie_dentry_lock); + + if (new_dd != NULL) + D_FREE(new_dd); + + return 0; +} + +/** + * Remove a dentry from an inode. + */ +void +dfuse_ie_dentry_remove(struct dfuse_inode_entry *ie, fuse_ino_t parent, const char *name) +{ + struct dfuse_dentry *dd; + struct dfuse_dentry *free_dd = NULL; + + D_SPIN_LOCK(&ie->ie_dentry_lock); + + /* Check if this matches the primary dentry */ + if (ie->ie_parent == parent && strncmp(ie->ie_name, name, NAME_MAX) == 0) { + /* Primary matches, promote first from list or clear */ + if (!d_list_empty(&ie->ie_dentries)) { + dd = d_list_entry(ie->ie_dentries.next, struct dfuse_dentry, dd_list); + ie->ie_parent = dd->dd_parent; + strncpy(ie->ie_name, dd->dd_name, NAME_MAX); + ie->ie_name[NAME_MAX] = '\0'; + d_list_del(&dd->dd_list); + free_dd = dd; + } else { + ie->ie_parent = 0; + ie->ie_name[0] = '\0'; + } + goto out; + } + + /* Search in the dentry list */ + d_list_for_each_entry(dd, &ie->ie_dentries, dd_list) { + if (dd->dd_parent == parent && strncmp(dd->dd_name, name, NAME_MAX) == 0) { + d_list_del(&dd->dd_list); + free_dd = dd; + goto out; + } + } + +out: + D_SPIN_UNLOCK(&ie->ie_dentry_lock); + + if (free_dd != NULL) + D_FREE(free_dd); +} + +/** + * Replace a dentry or release all dentries and set a new primary. + */ +void +dfuse_ie_dentry_replace(struct dfuse_inode_entry *ie, fuse_ino_t old_parent, const char *old_name, + fuse_ino_t new_parent, const char *new_name, struct dfuse_dentry *released) +{ + struct dfuse_dentry *dd; + + /* Initialize released structure */ + released->dd_parent = 0; + released->dd_name[0] = '\0'; + D_INIT_LIST_HEAD(&released->dd_list); + + D_SPIN_LOCK(&ie->ie_dentry_lock); + + /* Check if old dentry matches the primary */ + if (ie->ie_parent == old_parent && strncmp(ie->ie_name, old_name, NAME_MAX) == 0) { + /* Replace primary with new values */ + ie->ie_parent = new_parent; + strncpy(ie->ie_name, new_name, NAME_MAX); + ie->ie_name[NAME_MAX] = '\0'; + goto out; + } + + /* Search in the dentry list */ + d_list_for_each_entry(dd, &ie->ie_dentries, dd_list) { + if (dd->dd_parent == old_parent && strncmp(dd->dd_name, old_name, NAME_MAX) == 0) { + /* Replace this entry with new values */ + dd->dd_parent = new_parent; + strncpy(dd->dd_name, new_name, NAME_MAX); + dd->dd_name[NAME_MAX] = '\0'; + goto out; + } + } + + /* Old dentry not found - release all and set new primary */ + released->dd_parent = ie->ie_parent; + strncpy(released->dd_name, ie->ie_name, NAME_MAX); + released->dd_name[NAME_MAX] = '\0'; + + /* Move ie_dentries to released->dd_list */ + d_list_splice_init(&ie->ie_dentries, &released->dd_list); + + /* Set new primary */ + ie->ie_parent = new_parent; + strncpy(ie->ie_name, new_name, NAME_MAX); + ie->ie_name[NAME_MAX] = '\0'; + +out: + D_SPIN_UNLOCK(&ie->ie_dentry_lock); +} + +/** + * Clear all dentries from an inode. + */ +void +dfuse_ie_dentry_clear(struct dfuse_inode_entry *ie, struct dfuse_dentry *released) +{ + /* Initialize released structure */ + released->dd_parent = 0; + released->dd_name[0] = '\0'; + D_INIT_LIST_HEAD(&released->dd_list); + + D_SPIN_LOCK(&ie->ie_dentry_lock); + + /* Copy primary to released */ + released->dd_parent = ie->ie_parent; + strncpy(released->dd_name, ie->ie_name, NAME_MAX); + released->dd_name[NAME_MAX] = '\0'; + + /* Move ie_dentries to released->dd_list */ + d_list_splice_init(&ie->ie_dentries, &released->dd_list); + + /* Lets not clear primary for now */ + + D_SPIN_UNLOCK(&ie->ie_dentry_lock); +} + +/** + * Invalidate and free all dentries in a released structure. + * + * Returns -EBADF if fuse session is dead, 0 otherwise. + */ +int +dfuse_ie_dentry_inval(struct dfuse_info *dfuse_info, struct dfuse_dentry *released) +{ + struct dfuse_dentry *dd; + struct dfuse_dentry *ddn; + int rc; + int ret = 0; + + /* Invalidate primary dentry if set */ + if (released->dd_name[0] != '\0') { + rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, released->dd_parent, + released->dd_name, + strnlen(released->dd_name, NAME_MAX)); + if (rc == -EBADF) + ret = -EBADF; + else if (rc != 0 && rc != -ENOENT) + DS_ERROR(-rc, "inval_entry() error"); + } + + /* Invalidate and free secondary dentries */ + d_list_for_each_entry_safe(dd, ddn, &released->dd_list, dd_list) { + rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, dd->dd_parent, + dd->dd_name, strnlen(dd->dd_name, NAME_MAX)); + if (rc == -EBADF) + ret = -EBADF; + else if (rc != 0 && rc != -ENOENT) + DS_ERROR(-rc, "inval_entry() error"); + + d_list_del(&dd->dd_list); + D_FREE(dd); + } + + return ret; +} + +/** + * Delete and free all dentries in a released structure. + * Skip the dentry matching exclude_parent/exclude_name (kernel already knows about it). + */ +void +dfuse_ie_inode_delete(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, + struct dfuse_dentry *released, fuse_ino_t exclude_parent, + const char *exclude_name) +{ + struct dfuse_dentry *dd; + struct dfuse_dentry *ddn; + fuse_ino_t ino = ie->ie_stat.st_ino; + int rc; + + /* Invalidate the data and attribute caches. As this came from a unlink/rename call + * the kernel will have just done a lookup and knows what was likely unlinked so will + * destroy it anyway, but there is a race here so try and destroy it even though most + * of the time we expect this to fail. + */ + rc = fuse_lowlevel_notify_inval_inode(dfuse_info->di_session, ino, 0, 0); + if (rc != 0 && rc != -ENOENT) + DS_ERROR(-rc, "inval_inode() error"); + + /* Delete primary dentry if set and not excluded */ + if (released->dd_name[0] != '\0') { + if (released->dd_parent != exclude_parent || + strncmp(released->dd_name, exclude_name, NAME_MAX) != 0) { + rc = fuse_lowlevel_notify_delete( + dfuse_info->di_session, released->dd_parent, ino, released->dd_name, + strnlen(released->dd_name, NAME_MAX)); + if (rc != 0 && rc != -ENOENT) + DS_ERROR(-rc, "notify_delete() error"); + } + } + + /* Delete and free secondary dentries */ + d_list_for_each_entry_safe(dd, ddn, &released->dd_list, dd_list) { + if (dd->dd_parent != exclude_parent || + strncmp(dd->dd_name, exclude_name, NAME_MAX) != 0) { + rc = fuse_lowlevel_notify_delete(dfuse_info->di_session, dd->dd_parent, ino, + dd->dd_name, + strnlen(dd->dd_name, NAME_MAX)); + if (rc != 0 && rc != -ENOENT) + DS_ERROR(-rc, "notify_delete() error"); + } + + d_list_del(&dd->dd_list); + D_FREE(dd); + } +} diff --git a/src/client/dfuse/dfuse_fuseops.c b/src/client/dfuse/dfuse_fuseops.c index d3183f480cc..a9c181c4670 100644 --- a/src/client/dfuse/dfuse_fuseops.c +++ b/src/client/dfuse/dfuse_fuseops.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -398,6 +398,29 @@ df_ll_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, const char *n DFUSE_REPLY_ERR_RAW(dfuse_info, req, rc); } +static void +df_ll_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, const char *newname) +{ + struct dfuse_info *dfuse_info = fuse_req_userdata(req); + struct dfuse_inode_entry *inode; + struct dfuse_inode_entry *parent_inode; + int rc; + + inode = dfuse_inode_lookup_nf(dfuse_info, ino); + parent_inode = dfuse_inode_lookup_nf(dfuse_info, newparent); + + if (!parent_inode->ie_dfs->dfs_ops->hardlink) + D_GOTO(err, rc = ENOTSUP); + + DFUSE_IE_STAT_ADD(parent_inode, DS_LINK); + + parent_inode->ie_dfs->dfs_ops->hardlink(req, inode, parent_inode, newname); + + return; +err: + DFUSE_REPLY_ERR_RAW(dfuse_info, req, rc); +} + /* Do not allow security xattrs to be set or read, see DAOS-14639 */ #define XATTR_SEC "security." /* Do not allow either system.posix_acl_default or system.posix_acl_access */ @@ -603,6 +626,7 @@ const struct dfuse_inode_ops dfuse_dfs_ops = { .create = dfuse_cb_create, .rename = dfuse_cb_rename, .symlink = dfuse_cb_symlink, + .hardlink = dfuse_cb_link, .setxattr = dfuse_cb_setxattr, .getxattr = dfuse_cb_getxattr, .listxattr = dfuse_cb_listxattr, @@ -638,6 +662,7 @@ const struct dfuse_inode_ops dfuse_pool_ops = { ACTION(mknod, df_ll_mknod, true) \ ACTION(rename, df_ll_rename, true) \ ACTION(symlink, df_ll_symlink, true) \ + ACTION(link, df_ll_link, true) \ ACTION(setxattr, df_ll_setxattr, true) \ ACTION(getxattr, df_ll_getxattr, false) \ ACTION(listxattr, df_ll_listxattr, false) \ diff --git a/src/client/dfuse/inval.c b/src/client/dfuse/inval.c index 3ddcc052d86..e6f63a2a8a5 100644 --- a/src/client/dfuse/inval.c +++ b/src/client/dfuse/inval.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -90,20 +91,9 @@ struct dfuse_time_entry { /* Core data structure, maintains a list of struct dfuse_time_entry lists */ struct dfuse_ival { - d_list_t time_entry_list; - struct fuse_session *session; - bool session_dead; -}; - -/* The core data from struct dfuse_inode_entry. No additional inode references are held on inodes - * because of there place on invalidate lists, rather inodes are removed from any list on close. - * Therefore once a decision is made to evict an inode then a copy of the data is needed as once - * the ival_lock is dropped the inode could be freed. This is not a problem if this happens as the - * kernel will simply return ENOENT. - */ -struct inode_core { - char name[NAME_MAX + 1]; - fuse_ino_t parent; + d_list_t time_entry_list; + struct dfuse_info *dfuse_info; + bool session_dead; }; /* Number of dentries to invalidate per iteration. This value affects how long the lock is held, @@ -127,10 +117,13 @@ static bool ival_loop(int *sleep_time) { struct dfuse_time_entry *dte, *dtep; - struct inode_core ic[EVICT_COUNT] = {}; + struct dfuse_dentry ic[EVICT_COUNT] = {}; int idx = 0; double sleep = (60 * 1) - 1; + for (int i = 0; i < EVICT_COUNT; i++) + D_INIT_LIST_HEAD(&ic[i].dd_list); + D_MUTEX_LOCK(&ival_lock); /* Walk the list, oldest first */ @@ -162,9 +155,8 @@ ival_loop(int *sleep_time) continue; } - ic[idx].parent = inode->ie_parent; - strncpy(ic[idx].name, inode->ie_name, NAME_MAX + 1); - ic[idx].name[NAME_MAX] = '\0'; + /* Clear all dentries from the inode for invalidation */ + dfuse_ie_dentry_clear(inode, &ic[idx]); d_list_del_init(&inode->ie_evict_entry); @@ -186,13 +178,10 @@ ival_loop(int *sleep_time) for (int i = 0; i < idx; i++) { int rc; - DFUSE_TRA_DEBUG(&ival_data, "Evicting entry %#lx " DF_DE, ic[i].parent, - DP_DE(ic[i].name)); + DFUSE_TRA_DEBUG(&ival_data, "Evicting entry %#lx " DF_DE, ic[i].dd_parent, + DP_DE(ic[i].dd_name)); - rc = fuse_lowlevel_notify_inval_entry(ival_data.session, ic[i].parent, ic[i].name, - strnlen(ic[i].name, NAME_MAX)); - if (rc && rc != -ENOENT && rc != -EBADF) - DHS_ERROR(&ival_data, -rc, "notify_inval_entry() failed"); + rc = dfuse_ie_dentry_inval(ival_data.dfuse_info, &ic[i]); if (rc == -EBADF) ival_data.session_dead = true; } @@ -290,7 +279,7 @@ ival_thread_start(struct dfuse_info *dfuse_info) { int rc; - ival_data.session = dfuse_info->di_session; + ival_data.dfuse_info = dfuse_info; rc = pthread_create(&ival_thread, NULL, ival_thread_fn, NULL); if (rc != 0) diff --git a/src/client/dfuse/ops/link.c b/src/client/dfuse/ops/link.c new file mode 100644 index 00000000000..c0eaf13516c --- /dev/null +++ b/src/client/dfuse/ops/link.c @@ -0,0 +1,48 @@ +/** + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include "dfuse_common.h" +#include "dfuse.h" + +void +dfuse_cb_link(fuse_req_t req, struct dfuse_inode_entry *inode, struct dfuse_inode_entry *parent, + const char *name) +{ + struct dfuse_info *dfuse_info = fuse_req_userdata(req); + struct dfuse_inode_entry *ie; + int rc; + + D_ALLOC_PTR(ie); + if (!ie) + D_GOTO(err, rc = ENOMEM); + + DFUSE_TRA_UP(ie, parent, "inode"); + + dfuse_ie_init(dfuse_info, ie); + + rc = dfs_link(parent->ie_dfs->dfs_ns, inode->ie_obj, parent->ie_obj, name, &ie->ie_obj, + &ie->ie_stat); + if (rc != 0) + D_GOTO(err, rc); + + DFUSE_TRA_DEBUG(ie, "obj is %p", ie->ie_obj); + + strncpy(ie->ie_name, name, NAME_MAX); + ie->ie_name[NAME_MAX] = '\0'; + ie->ie_parent = parent->ie_stat.st_ino; + ie->ie_dfs = parent->ie_dfs; + + dfs_obj2id(ie->ie_obj, &ie->ie_oid); + + dfuse_compute_inode(ie->ie_dfs, &ie->ie_oid, &ie->ie_stat.st_ino); + + dfuse_reply_entry(dfuse_info, ie, NULL, true, req); + + return; +err: + DFUSE_REPLY_ERR_RAW(parent, req, rc); + dfuse_ie_free(dfuse_info, ie); +} diff --git a/src/client/dfuse/ops/lookup.c b/src/client/dfuse/ops/lookup.c index 1f1f25201b9..b7293be43a3 100644 --- a/src/client/dfuse/ops/lookup.c +++ b/src/client/dfuse/ops/lookup.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -18,10 +18,11 @@ dfuse_reply_entry(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, { struct fuse_entry_param entry = {0}; d_list_t *rlink; - ino_t wipe_parent = 0; - char wipe_name[NAME_MAX + 1]; + struct dfuse_dentry released = {0}; int rc; + D_INIT_LIST_HEAD(&released.dd_list); + D_ASSERT(ie->ie_parent); D_ASSERT(ie->ie_dfs); @@ -110,21 +111,20 @@ dfuse_reply_entry(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, if (ie->ie_stat.st_ino == ie->ie_dfs->dfs_ino) { DFUSE_TRA_DEBUG(inode, "Not updating parent"); - } else if ((inode->ie_parent != ie->ie_parent) || - (strncmp(inode->ie_name, ie->ie_name, NAME_MAX) != 0)) { - DFUSE_TRA_DEBUG(inode, "File has moved from " DF_DE " to " DF_DE, - DP_DE(inode->ie_name), DP_DE(ie->ie_name)); - - dfs_update_parent(inode->ie_obj, ie->ie_obj, ie->ie_name); - - /* Save the old name so that we can invalidate it in later */ - wipe_parent = inode->ie_parent; - strncpy(wipe_name, inode->ie_name, NAME_MAX); - wipe_name[NAME_MAX] = '\0'; - - inode->ie_parent = ie->ie_parent; - strncpy(inode->ie_name, ie->ie_name, NAME_MAX); - inode->ie_name[NAME_MAX] = '\0'; + } else { + if (ie->ie_stat.st_nlink > 1) { + /* Hardlink: add as additional dentry */ + rc = dfuse_ie_dentry_add(inode, ie->ie_parent, ie->ie_name); + if (rc != 0) + DHL_ERROR(inode, rc, "dentry_add failed"); + dfs_update_parent(inode->ie_obj, ie->ie_obj, ie->ie_name); + } else { + /* Single link: replace dentry, release old ones */ + dfuse_ie_dentry_replace(inode, ie->ie_parent, ie->ie_name, + ie->ie_parent, ie->ie_name, &released); + if (released.dd_name[0] != '\0') + dfs_update_parent(inode->ie_obj, ie->ie_obj, ie->ie_name); + } } atomic_fetch_sub_relaxed(&ie->ie_ref, 1); dfuse_ie_close(dfuse_info, ie); @@ -160,13 +160,7 @@ dfuse_reply_entry(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie, DFUSE_REPLY_ENTRY(ie, req, entry); } - if (wipe_parent == 0) - return; - - rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, wipe_parent, wipe_name, - strnlen(wipe_name, NAME_MAX)); - if (rc && rc != -ENOENT) - DS_ERROR(-rc, "inval_entry() failed"); + dfuse_ie_dentry_inval(dfuse_info, &released); return; out_err: diff --git a/src/client/dfuse/ops/open.c b/src/client/dfuse/ops/open.c index 6abe707a907..8c086ae0f43 100644 --- a/src/client/dfuse/ops/open.c +++ b/src/client/dfuse/ops/open.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -231,11 +232,11 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) dfuse_inode_decref(dfuse_info, oh->doh_parent_dir); } if (ie) { - rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, ie->ie_parent, - ie->ie_name, strnlen(ie->ie_name, NAME_MAX)); + struct dfuse_dentry released = {0}; - if (rc != 0 && rc != -ENOENT) - DHS_ERROR(ie, -rc, "inval_entry() error"); + D_INIT_LIST_HEAD(&released.dd_list); + dfuse_ie_dentry_clear(ie, &released); + dfuse_ie_dentry_inval(dfuse_info, &released); dfuse_inode_decref(dfuse_info, ie); } dfuse_oh_free(dfuse_info, oh); diff --git a/src/client/dfuse/ops/opendir.c b/src/client/dfuse/ops/opendir.c index 60d1e6ab4d5..4c139519f73 100644 --- a/src/client/dfuse/ops/opendir.c +++ b/src/client/dfuse/ops/opendir.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -81,13 +82,11 @@ dfuse_cb_releasedir(fuse_req_t req, struct dfuse_inode_entry *ino, struct fuse_f DFUSE_REPLY_ZERO_OH(oh, req); if (ie) { - int rc; + struct dfuse_dentry released = {0}; - rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, ie->ie_parent, - ie->ie_name, strnlen(ie->ie_name, NAME_MAX)); - - if (rc != 0 && rc != -ENOENT) - DHS_ERROR(ie, -rc, "inval_entry() error"); + D_INIT_LIST_HEAD(&released.dd_list); + dfuse_ie_dentry_clear(ie, &released); + dfuse_ie_dentry_inval(dfuse_info, &released); dfuse_inode_decref(dfuse_info, ie); } dfuse_oh_free(dfuse_info, oh); diff --git a/src/client/dfuse/ops/rename.c b/src/client/dfuse/ops/rename.c index fcf95b82466..d142d2e51a6 100644 --- a/src/client/dfuse/ops/rename.c +++ b/src/client/dfuse/ops/rename.c @@ -1,11 +1,13 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ #include "dfuse_common.h" #include "dfuse.h" +#include /* Handle a file that has been moved. * @@ -18,9 +20,11 @@ dfuse_oid_moved(struct dfuse_info *dfuse_info, daos_obj_id_t *oid, struct dfuse_ const char *name, struct dfuse_inode_entry *newparent, const char *newname) { struct dfuse_inode_entry *ie; - int rc; + struct dfuse_dentry released = {0}; ino_t ino; + D_INIT_LIST_HEAD(&released.dd_list); + dfuse_compute_inode(parent->ie_dfs, oid, &ino); DFUSE_TRA_DEBUG(dfuse_info, "Renamed file was %#lx", ino); @@ -29,25 +33,16 @@ dfuse_oid_moved(struct dfuse_info *dfuse_info, daos_obj_id_t *oid, struct dfuse_ if (!ie) return; - /* If the move is not from where we thought the file was then invalidate the old entry */ - if ((ie->ie_parent != parent->ie_stat.st_ino) || - (strncmp(ie->ie_name, name, NAME_MAX) != 0)) { - DFUSE_TRA_DEBUG(ie, "Invalidating old name"); - - rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, ie->ie_parent, - ie->ie_name, strnlen(ie->ie_name, NAME_MAX)); - - if (rc && rc != -ENOENT) - DFUSE_TRA_ERROR(ie, "inval_entry() returned: %d (%s)", rc, strerror(-rc)); - } - - /* Update the inode entry data */ - ie->ie_parent = newparent->ie_stat.st_ino; - strncpy(ie->ie_name, newname, NAME_MAX); + /* Replace old dentry with new, releasing any stale dentries */ + dfuse_ie_dentry_replace(ie, parent->ie_stat.st_ino, name, newparent->ie_stat.st_ino, + newname, &released); - /* Set the new parent and name */ + /* Set the new parent and name in the DFS object */ dfs_update_parentfd(ie->ie_obj, newparent->ie_obj, newname); + /* Invalidate any released dentries from the cache */ + dfuse_ie_dentry_inval(dfuse_info, &released); + /* Drop the ref again */ dfuse_inode_decref(dfuse_info, ie); } @@ -60,6 +55,7 @@ dfuse_cb_rename(fuse_req_t req, struct dfuse_inode_entry *parent, struct dfuse_info *dfuse_info = fuse_req_userdata(req); daos_obj_id_t moid = {}; daos_obj_id_t oid = {}; + bool deleted = true; int rc; if (flags != 0) { @@ -86,7 +82,7 @@ dfuse_cb_rename(fuse_req_t req, struct dfuse_inode_entry *parent, } rc = dfs_move_internal(parent->ie_dfs->dfs_ns, flags, parent->ie_obj, (char *)name, - newparent->ie_obj, (char *)newname, &moid, &oid); + newparent->ie_obj, (char *)newname, &moid, &oid, &deleted); if (rc) D_GOTO(out, rc); @@ -96,10 +92,15 @@ dfuse_cb_rename(fuse_req_t req, struct dfuse_inode_entry *parent, dfuse_oid_moved(dfuse_info, &moid, parent, name, newparent, newname); /* Check if a file was unlinked and see if anything needs updating */ - if (oid.lo || oid.hi) - dfuse_oid_unlinked(dfuse_info, req, &oid, newparent, newname); - else + if (oid.lo || oid.hi) { + if (!deleted) { + dfuse_hardlink_removed(dfuse_info, req, &oid, newparent, newname); + } else { + dfuse_oid_removed(dfuse_info, req, &oid, newparent, newname); + } + } else { DFUSE_REPLY_ZERO(newparent, req); + } return; diff --git a/src/client/dfuse/ops/setxattr.c b/src/client/dfuse/ops/setxattr.c index e50c010b377..8d752080228 100644 --- a/src/client/dfuse/ops/setxattr.c +++ b/src/client/dfuse/ops/setxattr.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2022 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -56,12 +57,12 @@ dfuse_cb_setxattr(fuse_req_t req, struct dfuse_inode_entry *inode, * will be skipped. */ if (duns_attr && inode->ie_dfs->dfc_dentry_dir_timeout > 0) { - struct dfuse_info *dfuse_info = fuse_req_userdata(req); + struct dfuse_info *dfuse_info = fuse_req_userdata(req); + struct dfuse_dentry released = {0}; - rc = fuse_lowlevel_notify_inval_entry(dfuse_info->di_session, - inode->ie_parent, inode->ie_name, - strnlen(inode->ie_name, NAME_MAX)); - DFUSE_TRA_INFO(inode, "inval_entry() rc is %d", rc); + D_INIT_LIST_HEAD(&released.dd_list); + dfuse_ie_dentry_clear(inode, &released); + dfuse_ie_dentry_inval(dfuse_info, &released); } DFUSE_REPLY_ZERO(inode, req); return; diff --git a/src/client/dfuse/ops/unlink.c b/src/client/dfuse/ops/unlink.c index 778d44e0bed..cc183563954 100644 --- a/src/client/dfuse/ops/unlink.c +++ b/src/client/dfuse/ops/unlink.c @@ -1,26 +1,51 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ #include "dfuse_common.h" #include "dfuse.h" +#include + +/* Handle a hardlink being removed but the file still exists (other links remain). + * Removes the dentry from the inode's tracking and replies to fuse. + */ +void +dfuse_hardlink_removed(struct dfuse_info *dfuse_info, fuse_req_t req, daos_obj_id_t *oid, + struct dfuse_inode_entry *parent, const char *name) +{ + struct dfuse_inode_entry *ie; + fuse_ino_t ino; + + dfuse_compute_inode(parent->ie_dfs, oid, &ino); + ie = dfuse_inode_lookup(dfuse_info, ino); + if (ie) { + dfuse_ie_dentry_remove(ie, parent->ie_stat.st_ino, name); + dfuse_inode_decref(dfuse_info, ie); + } + DFUSE_REPLY_ZERO(parent, req); +} /* Handle a file that has been unlinked via dfuse. This means that either a unlink or rename call * caused the file to be deleted. * Takes the oid of the deleted file, and the parent/name where the delete happened. + * If deleted is true, the file was actually deleted (last link removed or regular file). + * If deleted is false, only a hardlink was removed and the file still exists. * * Will always call DFUSE_REPLY_ZERO() after updating local state but before updating kernel. */ void -dfuse_oid_unlinked(struct dfuse_info *dfuse_info, fuse_req_t req, daos_obj_id_t *oid, - struct dfuse_inode_entry *parent, const char *name) +dfuse_oid_removed(struct dfuse_info *dfuse_info, fuse_req_t req, daos_obj_id_t *oid, + struct dfuse_inode_entry *parent, const char *name) { struct dfuse_inode_entry *ie; - int rc; + struct dfuse_dentry released = {0}; fuse_ino_t ino; - ino_t parent_ino; + fuse_ino_t parent_ino = parent->ie_stat.st_ino; + + D_INIT_LIST_HEAD(&released.dd_list); dfuse_compute_inode(parent->ie_dfs, oid, &ino); @@ -31,38 +56,17 @@ dfuse_oid_unlinked(struct dfuse_info *dfuse_info, fuse_req_t req, daos_obj_id_t } DFUSE_TRA_DEBUG(ie, "Setting inode as deleted"); - ie->ie_unlinked = true; - - parent_ino = parent->ie_stat.st_ino; + /* Clear all dentries for deletion notification */ + dfuse_ie_dentry_clear(ie, &released); /* At this point the request is complete so the kernel is free to drop any refs on parent * so it should not be accessed. */ DFUSE_REPLY_ZERO(parent, req); - /* If caching is enabled then invalidate the data and attribute caches. As this came a - * unlink/rename call the kernel will have just done a lookup and knows what was likely - * unlinked so will destroy it anyway, but there is a race here so try and destroy it - * even though most of the time we expect this to fail. - */ - rc = fuse_lowlevel_notify_inval_inode(dfuse_info->di_session, ino, 0, 0); - if (rc && rc != -ENOENT) - DHS_ERROR(ie, -rc, "inval_inode() error"); - - /* If the kernel was aware of this inode at an old location then remove that which should - * trigger a forget call. Checking the test logs shows that we do see the forget anyway - * for cases where the kernel knows which file it deleted. - */ - if ((ie->ie_parent != parent_ino) || (strncmp(ie->ie_name, name, NAME_MAX) != 0)) { - DFUSE_TRA_DEBUG(ie, "Telling kernel to forget %#lx " DF_DE, ie->ie_parent, - DP_DE(ie->ie_name)); - - rc = fuse_lowlevel_notify_delete(dfuse_info->di_session, ie->ie_parent, ino, - ie->ie_name, strnlen(ie->ie_name, NAME_MAX)); - if (rc && rc != -ENOENT) - DHS_ERROR(ie, -rc, "notify_delete() error"); - } + /* Delete all dentries from the kernel */ + dfuse_ie_inode_delete(dfuse_info, ie, &released, parent_ino, name); /* Drop the ref again */ dfuse_inode_decref(dfuse_info, ie); @@ -73,11 +77,13 @@ dfuse_cb_unlink(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na { struct dfuse_info *dfuse_info = fuse_req_userdata(req); int rc; - daos_obj_id_t oid = {}; + daos_obj_id_t oid = {}; + bool deleted = true; dfuse_cache_evict_dir(dfuse_info, parent); - rc = dfs_remove(parent->ie_dfs->dfs_ns, parent->ie_obj, name, false, &oid); + rc = dfs_remove_internal(parent->ie_dfs->dfs_ns, parent->ie_obj, name, false, &oid, + &deleted); if (rc != 0) { DFUSE_REPLY_ERR_RAW(parent, req, rc); return; @@ -85,5 +91,10 @@ dfuse_cb_unlink(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na D_ASSERT(oid.lo || oid.hi); - dfuse_oid_unlinked(dfuse_info, req, &oid, parent, name); + if (!deleted) { + dfuse_hardlink_removed(dfuse_info, req, &oid, parent, name); + return; + } + + dfuse_oid_removed(dfuse_info, req, &oid, parent, name); } diff --git a/src/common/misc.c b/src/common/misc.c index 12b6d6c788d..5884230f3d4 100644 --- a/src/common/misc.c +++ b/src/common/misc.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -105,9 +105,9 @@ daos_sgls_copy_internal(d_sg_list_t *dst_sgl, uint32_t dst_nr, if (src_sgl[i].sg_iovs[j].iov_len > dst_sgl[i].sg_iovs[j].iov_buf_len) { - D_ERROR("%d:%d "DF_U64" > "DF_U64"\n", - i, j, src_sgl[i].sg_iovs[j].iov_len, - src_sgl[i].sg_iovs[j].iov_buf_len); + D_ERROR("%d:%d " DF_U64 " > " DF_U64 "\n", i, j, + src_sgl[i].sg_iovs[j].iov_len, + dst_sgl[i].sg_iovs[j].iov_buf_len); D_GOTO(out, rc = -DER_INVAL); } memcpy(dst_sgl[i].sg_iovs[j].iov_buf, diff --git a/src/include/daos/dfs_lib_int.h b/src/include/daos/dfs_lib_int.h index 7df6d09a5e4..d13c14543ac 100644 --- a/src/include/daos/dfs_lib_int.h +++ b/src/include/daos/dfs_lib_int.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -56,13 +57,15 @@ dfs_lookupx(dfs_t *dfs, dfs_obj_t *parent, const char *name, int flags, dfs_obj_ daos_size_t *xsizes); /* moid is moved oid, oid is clobbered file. + * deleted indicates if the clobbered file was actually deleted (last link or regular file), + * or just a hardlink was removed (file still has other links). * This isn't yet fully compatible with dfuse because we also want to pass in a flag for if the * destination exists. */ int dfs_move_internal(dfs_t *dfs, unsigned int flags, dfs_obj_t *parent, const char *name, dfs_obj_t *new_parent, const char *new_name, daos_obj_id_t *moid, - daos_obj_id_t *oid); + daos_obj_id_t *oid, bool *deleted); /* Set the in-memory parent, but takes the parent, rather than another file object */ void @@ -102,6 +105,23 @@ dfs_relink_root(daos_handle_t coh); int dfs_ostatx(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, daos_event_t *ev); +/** + * Internal routine for remove that returns whether the file was actually deleted. + * For hardlinks, the file is only deleted when the last link is removed. + * + * \param[in] dfs DFS handle + * \param[in] parent Parent directory object + * \param[in] name Name of entry to remove + * \param[in] force If true, remove directory contents recursively + * \param[out] oid OID of the removed entry (optional) + * \param[out] deleted Set to true if the file was actually deleted, false if only a link was + * removed (hardlink case). Optional, can be NULL. + * \return 0 on success, errno on failure + */ +int +dfs_remove_internal(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj_id_t *oid, + bool *deleted); + #if defined(__cplusplus) } #endif diff --git a/src/include/daos_fs.h b/src/include/daos_fs.h index 53efcd8a711..d8b76dc9461 100644 --- a/src/include/daos_fs.h +++ b/src/include/daos_fs.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2018-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -894,6 +894,23 @@ int dfs_exchange(dfs_t *dfs, dfs_obj_t *parent1, const char *name1, dfs_obj_t *parent2, const char *name2); +/** + * Create a hard link to an existing file. + * + * \param[in] dfs Pointer to the mounted file system. + * \param[in] obj Open object to create a hard link to. + * \param[in] parent Opened parent directory object where the new link will be created. + * If NULL, use root obj. + * \param[in] name Link name of the new hard link. + * \param[out] new_obj Optional: pointer to return the new opened object handle. + * \param[out] stbuf Optional: stat struct of the linked object. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_link(dfs_t *dfs, dfs_obj_t *obj, dfs_obj_t *parent, const char *name, dfs_obj_t **new_obj, + struct stat *stbuf); + /** * Retrieve mode of an open object. * diff --git a/src/tests/suite/dfs_unit_test.c b/src/tests/suite/dfs_unit_test.c index 6a537ffa048..bf42b2c40f8 100644 --- a/src/tests/suite/dfs_unit_test.c +++ b/src/tests/suite/dfs_unit_test.c @@ -3558,63 +3558,4161 @@ dfs_test_pipeline_find(void **state) test_pipeline_find(state, OC_RP_3GX); } +/** + * Test hardlink functionality: + * 1. Create 2 directories + * 2. Create a file in the first directory + * 3. Stat the file and verify nlink == 1 + * 4. Create a hardlink to the file in the same directory + * 5. Stat both files - verify nlink == 2 and other properties match + * 6. Create a hardlink in the second directory + * 7. Stat all 3 files - verify nlink == 3 and properties match + * 8. Delete first file - verify nlink == 2 + * 9. Delete third file - verify nlink == 1 + * 10. Delete remaining file - verify object is removed (stat returns ENOENT) + */ +static void +dfs_test_hardlink(void **state) +{ + test_arg_t *arg = *state; + dfs_obj_t *dir1, *dir2; + dfs_obj_t *file1, *file2, *file3; + struct stat stbuf1, stbuf2, stbuf3; + struct stat stbuf_orig, stbuf_prev; + daos_obj_id_t oid1, oid2, oid3, removed_oid; + int rc; + + if (arg->myrank != 0) + return; + + print_message("Creating 2 directories...\n"); + + /* Step 1: Create 2 directories */ + rc = dfs_open(dfs_mt, NULL, "hldir1", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir1); + assert_int_equal(rc, 0); + + rc = dfs_open(dfs_mt, NULL, "hldir2", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir2); + assert_int_equal(rc, 0); + + /* Step 2: Create a file in the first directory */ + print_message("Creating file in first directory...\n"); + rc = dfs_open(dfs_mt, dir1, "testfile", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file1); + assert_int_equal(rc, 0); + + /* Get the object ID for later comparison */ + rc = dfs_obj2id(file1, &oid1); + assert_int_equal(rc, 0); + + /* Step 3: Stat the file and save info - nlink should be 1 */ + print_message("Stat original file - expecting nlink=1...\n"); + rc = dfs_ostat(dfs_mt, file1, &stbuf_orig); + assert_int_equal(rc, 0); + assert_int_equal(stbuf_orig.st_nlink, 1); + print_message(" st_nlink = %lu (expected 1)\n", (unsigned long)stbuf_orig.st_nlink); + print_message(" st_ino = %lu\n", (unsigned long)stbuf_orig.st_ino); + print_message(" st_mode = 0%o\n", stbuf_orig.st_mode); + print_message(" st_uid = %u\n", stbuf_orig.st_uid); + print_message(" st_gid = %u\n", stbuf_orig.st_gid); + print_message(" st_mtim = %ld.%09ld\n", stbuf_orig.st_mtim.tv_sec, + stbuf_orig.st_mtim.tv_nsec); + print_message(" st_ctim = %ld.%09ld\n", stbuf_orig.st_ctim.tv_sec, + stbuf_orig.st_ctim.tv_nsec); + + /* Save for later comparison */ + stbuf_prev = stbuf_orig; + + /* Step 4: Create a hardlink in the same directory */ + print_message("Creating hardlink in same directory...\n"); + rc = dfs_link(dfs_mt, file1, dir1, "testfile_link1", &file2, &stbuf2); + assert_int_equal(rc, 0); + + /* Get the object ID - should match original */ + rc = dfs_obj2id(file2, &oid2); + assert_int_equal(rc, 0); + assert_true(oid1.lo == oid2.lo && oid1.hi == oid2.hi); + + /* Step 5: Stat first and second files - nlink should be 2 */ + print_message("Stat both files - expecting nlink=2...\n"); + rc = dfs_ostat(dfs_mt, file1, &stbuf1); + assert_int_equal(rc, 0); + assert_int_equal(stbuf1.st_nlink, 2); + print_message(" file1: st_nlink = %lu (expected 2)\n", (unsigned long)stbuf1.st_nlink); + + /* stbuf2 was already filled by dfs_link */ + assert_int_equal(stbuf2.st_nlink, 2); + print_message(" file2: st_nlink = %lu (expected 2)\n", (unsigned long)stbuf2.st_nlink); + + /* Verify other properties match (inode, mode, uid, gid should be same) */ + assert_int_equal(stbuf1.st_ino, stbuf2.st_ino); + assert_int_equal(stbuf1.st_ino, stbuf_orig.st_ino); + assert_int_equal(stbuf1.st_mode & ~S_IFMT, stbuf_orig.st_mode & ~S_IFMT); + assert_int_equal(stbuf1.st_uid, stbuf_orig.st_uid); + assert_int_equal(stbuf1.st_gid, stbuf_orig.st_gid); + + /* + * POSIX: Creating a hardlink updates ctime (metadata changed). + * mtime should NOT change (file content not modified). + */ + print_message(" Checking ctime/mtime after first hardlink creation...\n"); + assert_true(check_ts(stbuf_prev.st_ctim, stbuf1.st_ctim)); + print_message(" Verified: ctime updated (prev < current)\n"); + /* mtime should remain unchanged */ + assert_int_equal(stbuf1.st_mtim.tv_sec, stbuf_orig.st_mtim.tv_sec); + assert_int_equal(stbuf1.st_mtim.tv_nsec, stbuf_orig.st_mtim.tv_nsec); + print_message(" Verified: mtime unchanged\n"); + print_message(" Verified: ino, mode, uid, gid match original\n"); + + /* Save current state for next comparison */ + stbuf_prev = stbuf1; + + /* Step 6: Create a hardlink in the second directory */ + print_message("Creating hardlink in second directory...\n"); + rc = dfs_link(dfs_mt, file1, dir2, "testfile_link2", &file3, &stbuf3); + assert_int_equal(rc, 0); + + /* Get the object ID - should match original */ + rc = dfs_obj2id(file3, &oid3); + assert_int_equal(rc, 0); + assert_true(oid1.lo == oid3.lo && oid1.hi == oid3.hi); + + /* Step 7: Stat all 3 files - nlink should be 3 */ + print_message("Stat all 3 files - expecting nlink=3...\n"); + rc = dfs_ostat(dfs_mt, file1, &stbuf1); + assert_int_equal(rc, 0); + assert_int_equal(stbuf1.st_nlink, 3); + print_message(" file1: st_nlink = %lu (expected 3)\n", (unsigned long)stbuf1.st_nlink); + + rc = dfs_ostat(dfs_mt, file2, &stbuf2); + assert_int_equal(rc, 0); + assert_int_equal(stbuf2.st_nlink, 3); + print_message(" file2: st_nlink = %lu (expected 3)\n", (unsigned long)stbuf2.st_nlink); + + /* stbuf3 was already filled by dfs_link, but let's re-stat to be sure */ + rc = dfs_ostat(dfs_mt, file3, &stbuf3); + assert_int_equal(rc, 0); + assert_int_equal(stbuf3.st_nlink, 3); + print_message(" file3: st_nlink = %lu (expected 3)\n", (unsigned long)stbuf3.st_nlink); + + /* All should have same inode */ + assert_int_equal(stbuf1.st_ino, stbuf2.st_ino); + assert_int_equal(stbuf2.st_ino, stbuf3.st_ino); + assert_int_equal(stbuf1.st_ino, stbuf_orig.st_ino); + print_message(" Verified: all files have same inode\n"); + + /* + * POSIX: Creating a hardlink updates ctime (metadata changed). + * mtime should NOT change (file content not modified). + */ + print_message(" Checking ctime/mtime after second hardlink creation...\n"); + assert_true(check_ts(stbuf_prev.st_ctim, stbuf1.st_ctim)); + print_message(" Verified: ctime updated (prev < current)\n"); + /* mtime should remain unchanged from original */ + assert_int_equal(stbuf1.st_mtim.tv_sec, stbuf_orig.st_mtim.tv_sec); + assert_int_equal(stbuf1.st_mtim.tv_nsec, stbuf_orig.st_mtim.tv_nsec); + print_message(" Verified: mtime unchanged from original\n"); + + /* Save current state for next comparison */ + stbuf_prev = stbuf1; + + /* Step 8: Delete the first file and check stat */ + print_message("Removing first file...\n"); + rc = dfs_remove(dfs_mt, dir1, "testfile", false, &removed_oid); + assert_int_equal(rc, 0); + + /* file2 and file3 should now have nlink=2 */ + rc = dfs_ostat(dfs_mt, file2, &stbuf2); + assert_int_equal(rc, 0); + assert_int_equal(stbuf2.st_nlink, 2); + print_message(" file2: st_nlink = %lu (expected 2)\n", (unsigned long)stbuf2.st_nlink); + + rc = dfs_ostat(dfs_mt, file3, &stbuf3); + assert_int_equal(rc, 0); + assert_int_equal(stbuf3.st_nlink, 2); + print_message(" file3: st_nlink = %lu (expected 2)\n", (unsigned long)stbuf3.st_nlink); + + /* file1 handle should still be valid but stat via name should fail */ + rc = dfs_stat(dfs_mt, dir1, "testfile", &stbuf1); + assert_int_equal(rc, ENOENT); + print_message(" Verified: original name no longer exists (ENOENT)\n"); + + /* + * POSIX: Removing a hardlink updates ctime (metadata changed - link count decreased). + * mtime should NOT change (file content not modified). + */ + print_message(" Checking ctime/mtime after first unlink...\n"); + assert_true(check_ts(stbuf_prev.st_ctim, stbuf2.st_ctim)); + print_message(" Verified: ctime updated (prev < current)\n"); + /* mtime should remain unchanged from original */ + assert_int_equal(stbuf2.st_mtim.tv_sec, stbuf_orig.st_mtim.tv_sec); + assert_int_equal(stbuf2.st_mtim.tv_nsec, stbuf_orig.st_mtim.tv_nsec); + print_message(" Verified: mtime unchanged from original\n"); + + /* Save current state for next comparison */ + stbuf_prev = stbuf2; + + /* Step 9: Delete the third file (in dir2) and check stat */ + print_message("Removing third file (from dir2)...\n"); + rc = dfs_remove(dfs_mt, dir2, "testfile_link2", false, NULL); + assert_int_equal(rc, 0); + + /* file2 should now have nlink=1 */ + rc = dfs_ostat(dfs_mt, file2, &stbuf2); + assert_int_equal(rc, 0); + assert_int_equal(stbuf2.st_nlink, 1); + print_message(" file2: st_nlink = %lu (expected 1)\n", (unsigned long)stbuf2.st_nlink); + + /* + * POSIX: Removing a hardlink updates ctime (metadata changed - link count decreased). + * mtime should NOT change (file content not modified). + */ + print_message(" Checking ctime/mtime after second unlink...\n"); + assert_true(check_ts(stbuf_prev.st_ctim, stbuf2.st_ctim)); + print_message(" Verified: ctime updated (prev < current)\n"); + /* mtime should remain unchanged from original */ + assert_int_equal(stbuf2.st_mtim.tv_sec, stbuf_orig.st_mtim.tv_sec); + assert_int_equal(stbuf2.st_mtim.tv_nsec, stbuf_orig.st_mtim.tv_nsec); + print_message(" Verified: mtime unchanged from original\n"); + + /* Step 10: Delete the last remaining file */ + print_message("Removing last file...\n"); + rc = dfs_remove(dfs_mt, dir1, "testfile_link1", false, &removed_oid); + assert_int_equal(rc, 0); + + /* + * Verify the object is really deleted by trying to stat via + * the open handle - should return ENOENT after the object + * is punched/deleted from the backend. + */ + rc = dfs_ostat(dfs_mt, file2, &stbuf2); + assert_int_equal(rc, ENOENT); + print_message(" Verified: object is deleted (stat on handle returns ENOENT)\n"); + + /* Also verify stat by name fails */ + rc = dfs_stat(dfs_mt, dir1, "testfile_link1", &stbuf2); + assert_int_equal(rc, ENOENT); + print_message(" Verified: name no longer exists (ENOENT)\n"); + + /* Cleanup: release all handles */ + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(file2); + assert_int_equal(rc, 0); + rc = dfs_release(file3); + assert_int_equal(rc, 0); + + /* Remove directories */ + rc = dfs_release(dir1); + assert_int_equal(rc, 0); + rc = dfs_release(dir2); + assert_int_equal(rc, 0); + + rc = dfs_remove(dfs_mt, NULL, "hldir1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "hldir2", false, NULL); + assert_int_equal(rc, 0); + + print_message("Hardlink test completed successfully!\n"); +} + +static void +dfs_test_hardlink_chmod_chown(void **state) +{ + test_arg_t *arg = *state; + dfs_obj_t *dir1, *dir2; + dfs_obj_t *file1, *file2, *file3; + struct stat stbuf1, stbuf2, stbuf3; + struct stat stbuf_orig, stbuf_prev; + daos_obj_id_t oid1, oid2, oid3; + mode_t orig_mode, new_mode; + uid_t orig_uid, new_uid; + gid_t orig_gid, new_gid; + int rc; + + if (arg->myrank != 0) + return; + + print_message("=== Hardlink chmod/chown test ===\n"); + + /* Step 1: Create 2 directories */ + print_message("Creating 2 directories...\n"); + rc = dfs_open(dfs_mt, NULL, "hl_chmod_dir1", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir1); + assert_int_equal(rc, 0); + + rc = dfs_open(dfs_mt, NULL, "hl_chmod_dir2", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir2); + assert_int_equal(rc, 0); + + /* Step 2: Create original file in dir1 */ + print_message("Creating original file in dir1...\n"); + rc = dfs_open(dfs_mt, dir1, "original", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file1); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file1, &oid1); + assert_int_equal(rc, 0); + + /* Get original stats */ + rc = dfs_ostat(dfs_mt, file1, &stbuf_orig); + assert_int_equal(rc, 0); + orig_mode = stbuf_orig.st_mode; + orig_uid = stbuf_orig.st_uid; + orig_gid = stbuf_orig.st_gid; + print_message(" Original: mode=0%o, uid=%u, gid=%u, nlink=%lu\n", orig_mode, orig_uid, + orig_gid, (unsigned long)stbuf_orig.st_nlink); + assert_int_equal(stbuf_orig.st_nlink, 1); + + /* Step 3: Create hardlink in same directory (dir1) */ + print_message("Creating hardlink 'link1' in same directory (dir1)...\n"); + rc = dfs_link(dfs_mt, file1, dir1, "link1", &file2, &stbuf2); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file2, &oid2); + assert_int_equal(rc, 0); + assert_true(oid1.lo == oid2.lo && oid1.hi == oid2.hi); + print_message(" Verified: OIDs match\n"); + + /* Step 4: Create hardlink in different directory (dir2) */ + print_message("Creating hardlink 'link2' in different directory (dir2)...\n"); + rc = dfs_link(dfs_mt, file1, dir2, "link2", &file3, &stbuf3); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file3, &oid3); + assert_int_equal(rc, 0); + assert_true(oid1.lo == oid3.lo && oid1.hi == oid3.hi); + print_message(" Verified: OIDs match\n"); + + /* Verify all have nlink=3 */ + rc = dfs_ostat(dfs_mt, file1, &stbuf1); + assert_int_equal(rc, 0); + assert_int_equal(stbuf1.st_nlink, 3); + rc = dfs_ostat(dfs_mt, file2, &stbuf2); + assert_int_equal(rc, 0); + assert_int_equal(stbuf2.st_nlink, 3); + rc = dfs_ostat(dfs_mt, file3, &stbuf3); + assert_int_equal(rc, 0); + assert_int_equal(stbuf3.st_nlink, 3); + print_message(" All 3 files have nlink=3\n"); + + /* Save state for ctime comparison */ + stbuf_prev = stbuf1; + + /* ========== CHMOD TESTS ========== */ + print_message("\n--- Testing chmod on hardlink ---\n"); + + /* Step 5: chmod on the second hardlink (link1 in dir1) */ + new_mode = S_IFREG | S_IRWXU | S_IRGRP | S_IXGRP; /* rwx for user, rx for group */ + print_message("Calling chmod on 'link1' to mode 0%o...\n", new_mode & ~S_IFMT); + rc = dfs_chmod(dfs_mt, dir1, "link1", new_mode); + assert_int_equal(rc, 0); + + /* Verify mode changed on ALL hardlinks */ + print_message("Verifying mode change visible on all hardlinks...\n"); + rc = dfs_ostat(dfs_mt, file1, &stbuf1); + assert_int_equal(rc, 0); + assert_int_equal(stbuf1.st_mode, new_mode); + print_message(" file1 (original): mode=0%o - PASS\n", stbuf1.st_mode); + + rc = dfs_ostat(dfs_mt, file2, &stbuf2); + assert_int_equal(rc, 0); + assert_int_equal(stbuf2.st_mode, new_mode); + print_message(" file2 (link1): mode=0%o - PASS\n", stbuf2.st_mode); + + rc = dfs_ostat(dfs_mt, file3, &stbuf3); + assert_int_equal(rc, 0); + assert_int_equal(stbuf3.st_mode, new_mode); + print_message(" file3 (link2 in dir2): mode=0%o - PASS\n", stbuf3.st_mode); + + /* Verify ctime updated (chmod changes metadata) */ + assert_true(check_ts(stbuf_prev.st_ctim, stbuf1.st_ctim)); + print_message(" Verified: ctime updated after chmod\n"); + + /* Also verify via path lookup */ + rc = dfs_stat(dfs_mt, dir1, "original", &stbuf1); + assert_int_equal(rc, 0); + assert_int_equal(stbuf1.st_mode, new_mode); + rc = dfs_stat(dfs_mt, dir2, "link2", &stbuf3); + assert_int_equal(rc, 0); + assert_int_equal(stbuf3.st_mode, new_mode); + print_message(" Verified via path lookup as well\n"); + + stbuf_prev = stbuf1; + + /* ========== CHOWN TESTS ========== */ + print_message("\n--- Testing chown on hardlink ---\n"); + + /* Step 6: chown on the third hardlink (link2 in dir2) */ + new_uid = 1000; + new_gid = 2000; + print_message("Calling chown on 'link2' (in dir2) to uid=%u, gid=%u...\n", new_uid, + new_gid); + rc = dfs_chown(dfs_mt, dir2, "link2", new_uid, new_gid, 0); + assert_int_equal(rc, 0); + + /* Verify uid/gid changed on ALL hardlinks */ + print_message("Verifying uid/gid change visible on all hardlinks...\n"); + rc = dfs_ostat(dfs_mt, file1, &stbuf1); + assert_int_equal(rc, 0); + assert_int_equal(stbuf1.st_uid, new_uid); + assert_int_equal(stbuf1.st_gid, new_gid); + print_message(" file1 (original): uid=%u, gid=%u - PASS\n", stbuf1.st_uid, stbuf1.st_gid); + + rc = dfs_ostat(dfs_mt, file2, &stbuf2); + assert_int_equal(rc, 0); + assert_int_equal(stbuf2.st_uid, new_uid); + assert_int_equal(stbuf2.st_gid, new_gid); + print_message(" file2 (link1): uid=%u, gid=%u - PASS\n", stbuf2.st_uid, stbuf2.st_gid); + + rc = dfs_ostat(dfs_mt, file3, &stbuf3); + assert_int_equal(rc, 0); + assert_int_equal(stbuf3.st_uid, new_uid); + assert_int_equal(stbuf3.st_gid, new_gid); + print_message(" file3 (link2 in dir2): uid=%u, gid=%u - PASS\n", stbuf3.st_uid, + stbuf3.st_gid); + + /* Verify ctime updated (chown changes metadata) */ + assert_true(check_ts(stbuf_prev.st_ctim, stbuf1.st_ctim)); + print_message(" Verified: ctime updated after chown\n"); + + stbuf_prev = stbuf1; + + /* ========== REMOVE FILE USED FOR CHMOD AND VERIFY PERSISTENCE ========== */ + print_message("\n--- Removing link1 (chmod target) and verifying persistence ---\n"); + + rc = dfs_release(file2); + assert_int_equal(rc, 0); + + rc = dfs_remove(dfs_mt, dir1, "link1", false, NULL); + assert_int_equal(rc, 0); + print_message(" Removed 'link1' from dir1\n"); + + /* Re-open via another path for verification */ + rc = dfs_lookup_rel(dfs_mt, dir1, "original", O_RDWR, &file2, NULL, &stbuf2); + assert_int_equal(rc, 0); + + /* Verify changes still visible on remaining hardlinks */ + rc = dfs_ostat(dfs_mt, file1, &stbuf1); + assert_int_equal(rc, 0); + assert_int_equal(stbuf1.st_nlink, 2); + assert_int_equal(stbuf1.st_mode, new_mode); + assert_int_equal(stbuf1.st_uid, new_uid); + assert_int_equal(stbuf1.st_gid, new_gid); + print_message(" file1: nlink=%lu, mode=0%o, uid=%u, gid=%u - PASS\n", + (unsigned long)stbuf1.st_nlink, stbuf1.st_mode, stbuf1.st_uid, stbuf1.st_gid); + + rc = dfs_ostat(dfs_mt, file3, &stbuf3); + assert_int_equal(rc, 0); + assert_int_equal(stbuf3.st_nlink, 2); + assert_int_equal(stbuf3.st_mode, new_mode); + assert_int_equal(stbuf3.st_uid, new_uid); + assert_int_equal(stbuf3.st_gid, new_gid); + print_message(" file3: nlink=%lu, mode=0%o, uid=%u, gid=%u - PASS\n", + (unsigned long)stbuf3.st_nlink, stbuf3.st_mode, stbuf3.st_uid, stbuf3.st_gid); + + /* Verify ctime updated (unlink changes metadata - link count) */ + assert_true(check_ts(stbuf_prev.st_ctim, stbuf1.st_ctim)); + print_message(" Verified: ctime updated after unlink\n"); + + stbuf_prev = stbuf1; + + /* ========== REMOVE ORIGINAL FILE AND VERIFY PERSISTENCE ========== */ + print_message("\n--- Removing original file and verifying persistence ---\n"); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + + rc = dfs_remove(dfs_mt, dir1, "original", false, NULL); + assert_int_equal(rc, 0); + print_message(" Removed 'original' from dir1\n"); + + /* Verify changes still visible on last remaining hardlink */ + rc = dfs_ostat(dfs_mt, file3, &stbuf3); + assert_int_equal(rc, 0); + assert_int_equal(stbuf3.st_nlink, 1); + assert_int_equal(stbuf3.st_mode, new_mode); + assert_int_equal(stbuf3.st_uid, new_uid); + assert_int_equal(stbuf3.st_gid, new_gid); + print_message(" file3 (last link): nlink=%lu, mode=0%o, uid=%u, gid=%u - PASS\n", + (unsigned long)stbuf3.st_nlink, stbuf3.st_mode, stbuf3.st_uid, stbuf3.st_gid); + + /* Verify via path lookup as well */ + rc = dfs_stat(dfs_mt, dir2, "link2", &stbuf3); + assert_int_equal(rc, 0); + assert_int_equal(stbuf3.st_nlink, 1); + assert_int_equal(stbuf3.st_mode, new_mode); + assert_int_equal(stbuf3.st_uid, new_uid); + assert_int_equal(stbuf3.st_gid, new_gid); + print_message(" Verified via path lookup: link2 in dir2 has correct attributes\n"); + + /* Verify ctime updated (unlink changes metadata - link count) */ + assert_true(check_ts(stbuf_prev.st_ctim, stbuf3.st_ctim)); + print_message(" Verified: ctime updated after unlink\n"); + + /* ========== CLEANUP ========== */ + print_message("\n--- Cleanup ---\n"); + + rc = dfs_release(file2); + assert_int_equal(rc, 0); + rc = dfs_release(file3); + assert_int_equal(rc, 0); + + /* Remove last file */ + rc = dfs_remove(dfs_mt, dir2, "link2", false, NULL); + assert_int_equal(rc, 0); + + /* Remove directories */ + rc = dfs_release(dir1); + assert_int_equal(rc, 0); + rc = dfs_release(dir2); + assert_int_equal(rc, 0); + + rc = dfs_remove(dfs_mt, NULL, "hl_chmod_dir1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "hl_chmod_dir2", false, NULL); + assert_int_equal(rc, 0); + + print_message("\nHardlink chmod/chown test completed successfully!\n"); +} + +static void +dfs_test_hardlink_rename(void **state) +{ + test_arg_t *arg = *state; + dfs_obj_t *src, *src_link, *dst, *dst_link; + dfs_obj_t *file_a, *file_a_link, *file_b, *file_b_link; + dfs_obj_t *tmp_obj; + dfs_obj_t *dir_src, *dir_src_link, *dir_dst, *dir_dst_link; + struct stat stbuf; + d_sg_list_t sgl; + d_iov_t iov; + char src_data[64], dst_data[64], read_buf[64]; + daos_size_t read_size; + daos_obj_id_t oid_src, oid_dst, oid_a, oid_b, oid_tmp; + const char *xattr_name = "user.rename_test"; + const char *xattr_val = "xattr_preserved_after_rename"; + char xattr_buf[64]; + daos_size_t xattr_size; + int rc; + + if (arg->myrank != 0) + return; + + print_message("=== Hardlink rename test ===\n"); + print_message("All files and hardlinks in different directories, with xattr validation\n"); + + /* Prepare unique data patterns for source and destination */ + memset(src_data, 'S', sizeof(src_data)); /* Source pattern */ + memset(dst_data, 'D', sizeof(dst_data)); /* Destination pattern */ + + sgl.sg_nr = 1; + sgl.sg_nr_out = 1; + sgl.sg_iovs = &iov; + + /* + * ============================================================ + * Scenario 1: Source file has hardlinks, destination is regular + * ============================================================ + * /dir_src1/src1 has hardlink /dir_src1_link/src1_link + * /dir_dst1/dst1 is a regular file (no hardlinks) + * rename(src1 -> dst1) with xattr on src + */ + print_message("\n--- Scenario 1: Source has hardlinks, dest is regular file ---\n"); + + /* Create directories */ + rc = dfs_mkdir(dfs_mt, NULL, "dir_src1", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_src1", O_RDWR, &dir_src, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "dir_src1_link", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_src1_link", O_RDWR, &dir_src_link, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "dir_dst1", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_dst1", O_RDWR, &dir_dst, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directories: dir_src1/, dir_src1_link/, dir_dst1/\n"); + + /* Create source file in dir_src1 */ + rc = dfs_open(dfs_mt, dir_src, "src1", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &src); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(src, &oid_src); + assert_int_equal(rc, 0); + print_message(" Created /dir_src1/src1, oid=" DF_OID "\n", DP_OID(oid_src)); + + /* Write source data */ + d_iov_set(&iov, src_data, sizeof(src_data)); + rc = dfs_write(dfs_mt, src, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + /* Set xattr on source file */ + rc = dfs_setxattr(dfs_mt, src, xattr_name, xattr_val, strlen(xattr_val) + 1, 0); + assert_int_equal(rc, 0); + print_message(" Set xattr '%s' = '%s' on src1\n", xattr_name, xattr_val); + + rc = dfs_release(src); + assert_int_equal(rc, 0); + + /* Re-open and create hardlink in different directory */ + rc = dfs_lookup_rel(dfs_mt, dir_src, "src1", O_RDWR, &src, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_link(dfs_mt, src, dir_src_link, "src1_link", &src_link, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /dir_src1_link/src1_link, nlink=%lu\n", + (unsigned long)stbuf.st_nlink); + + rc = dfs_release(src); + assert_int_equal(rc, 0); + rc = dfs_release(src_link); + assert_int_equal(rc, 0); + + /* Create destination file in dir_dst1 */ + rc = dfs_open(dfs_mt, dir_dst, "dst1", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dst); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(dst, &oid_dst); + assert_int_equal(rc, 0); + print_message(" Created /dir_dst1/dst1, oid=" DF_OID "\n", DP_OID(oid_dst)); + + d_iov_set(&iov, dst_data, sizeof(dst_data)); + rc = dfs_write(dfs_mt, dst, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + rc = dfs_release(dst); + assert_int_equal(rc, 0); + + /* Rename src1 -> dst1 (cross-directory rename) */ + print_message(" Renaming /dir_src1/src1 -> /dir_dst1/dst1...\n"); + rc = dfs_move(dfs_mt, dir_src, "src1", dir_dst, "dst1", NULL); + assert_int_equal(rc, 0); + + /* src1 name should not exist */ + rc = dfs_stat(dfs_mt, dir_src, "src1", &stbuf); + assert_int_equal(rc, ENOENT); + print_message(" /dir_src1/src1 no longer exists - PASS\n"); + + /* Verify src1_link still works */ + rc = dfs_lookup_rel(dfs_mt, dir_src_link, "src1_link", O_RDWR, &src_link, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(src_link, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, src_link, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_src.lo && oid_tmp.hi == oid_src.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /dir_src1_link/src1_link: oid=" DF_OID ", nlink=%lu - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + + /* Verify xattr accessible via src1_link */ + xattr_size = sizeof(xattr_buf); + memset(xattr_buf, 0, sizeof(xattr_buf)); + rc = dfs_getxattr(dfs_mt, src_link, xattr_name, xattr_buf, &xattr_size); + assert_int_equal(rc, 0); + assert_string_equal(xattr_buf, xattr_val); + print_message(" xattr via src1_link: '%s' = '%s' - PASS\n", xattr_name, xattr_buf); + + rc = dfs_release(src_link); + assert_int_equal(rc, 0); + + /* Verify dst1 now has src's oid */ + rc = dfs_lookup_rel(dfs_mt, dir_dst, "dst1", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_src.lo && oid_tmp.hi == oid_src.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /dir_dst1/dst1: oid=" DF_OID ", nlink=%lu - PASS\n", DP_OID(oid_tmp), + (unsigned long)stbuf.st_nlink); + + /* Verify xattr accessible via dst1 */ + xattr_size = sizeof(xattr_buf); + memset(xattr_buf, 0, sizeof(xattr_buf)); + rc = dfs_getxattr(dfs_mt, tmp_obj, xattr_name, xattr_buf, &xattr_size); + assert_int_equal(rc, 0); + assert_string_equal(xattr_buf, xattr_val); + print_message(" xattr via dst1: '%s' = '%s' - PASS\n", xattr_name, xattr_buf); + + /* Verify content */ + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, src_data, sizeof(src_data)); + print_message(" dst1 content matches original src1 data - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Cleanup scenario 1 */ + rc = dfs_remove(dfs_mt, dir_dst, "dst1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_src_link, "src1_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_release(dir_src); + assert_int_equal(rc, 0); + rc = dfs_release(dir_src_link); + assert_int_equal(rc, 0); + rc = dfs_release(dir_dst); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_src1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_src1_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_dst1", false, NULL); + assert_int_equal(rc, 0); + + /* + * ============================================================ + * Scenario 2: Source is regular file, destination has hardlinks + * ============================================================ + * /dir_src2/src2 is a regular file (no hardlinks) with xattr + * /dir_dst2/dst2 has hardlink /dir_dst2_link/dst2_link + */ + print_message("\n--- Scenario 2: Source is regular, dest has hardlinks ---\n"); + + /* Create directories */ + rc = dfs_mkdir(dfs_mt, NULL, "dir_src2", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_src2", O_RDWR, &dir_src, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "dir_dst2", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_dst2", O_RDWR, &dir_dst, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "dir_dst2_link", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_dst2_link", O_RDWR, &dir_dst_link, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directories: dir_src2/, dir_dst2/, dir_dst2_link/\n"); + + /* Create source file with xattr */ + rc = dfs_open(dfs_mt, dir_src, "src2", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &src); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(src, &oid_src); + assert_int_equal(rc, 0); + print_message(" Created /dir_src2/src2, oid=" DF_OID "\n", DP_OID(oid_src)); + + d_iov_set(&iov, src_data, sizeof(src_data)); + rc = dfs_write(dfs_mt, src, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + rc = dfs_setxattr(dfs_mt, src, xattr_name, xattr_val, strlen(xattr_val) + 1, 0); + assert_int_equal(rc, 0); + print_message(" Set xattr '%s' on src2\n", xattr_name); + + rc = dfs_release(src); + assert_int_equal(rc, 0); + + /* Create destination file with hardlink */ + rc = dfs_open(dfs_mt, dir_dst, "dst2", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dst); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(dst, &oid_dst); + assert_int_equal(rc, 0); + print_message(" Created /dir_dst2/dst2, oid=" DF_OID "\n", DP_OID(oid_dst)); + + d_iov_set(&iov, dst_data, sizeof(dst_data)); + rc = dfs_write(dfs_mt, dst, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + rc = dfs_release(dst); + assert_int_equal(rc, 0); + + /* Create hardlink to dst2 in different directory */ + rc = dfs_lookup_rel(dfs_mt, dir_dst, "dst2", O_RDWR, &dst, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_link(dfs_mt, dst, dir_dst_link, "dst2_link", &dst_link, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /dir_dst2_link/dst2_link, nlink=%lu\n", + (unsigned long)stbuf.st_nlink); + + rc = dfs_release(dst); + assert_int_equal(rc, 0); + rc = dfs_release(dst_link); + assert_int_equal(rc, 0); + + /* Rename src2 -> dst2 */ + print_message(" Renaming /dir_src2/src2 -> /dir_dst2/dst2...\n"); + rc = dfs_move(dfs_mt, dir_src, "src2", dir_dst, "dst2", NULL); + assert_int_equal(rc, 0); + + /* src2 name should not exist */ + rc = dfs_stat(dfs_mt, dir_src, "src2", &stbuf); + assert_int_equal(rc, ENOENT); + print_message(" /dir_src2/src2 no longer exists - PASS\n"); + + /* dst2 should now have src's oid with nlink=1 */ + rc = dfs_lookup_rel(dfs_mt, dir_dst, "dst2", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_src.lo && oid_tmp.hi == oid_src.hi); + assert_int_equal(stbuf.st_nlink, 1); + print_message(" /dir_dst2/dst2: oid=" DF_OID " (src's), nlink=%lu - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* dst2_link should have old dst's oid with nlink=1 */ + rc = dfs_lookup_rel(dfs_mt, dir_dst_link, "dst2_link", O_RDONLY, &dst_link, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(dst_link, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, dst_link, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_dst.lo && oid_tmp.hi == oid_dst.hi); + assert_int_equal(stbuf.st_nlink, 1); + print_message(" /dir_dst2_link/dst2_link: oid=" DF_OID " (old dst), nlink=%lu - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + + /* Verify xattr on dst2 (moved from src2) */ + rc = dfs_lookup_rel(dfs_mt, dir_dst, "dst2", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + xattr_size = sizeof(xattr_buf); + memset(xattr_buf, 0, sizeof(xattr_buf)); + rc = dfs_getxattr(dfs_mt, tmp_obj, xattr_name, xattr_buf, &xattr_size); + assert_int_equal(rc, 0); + assert_string_equal(xattr_buf, xattr_val); + print_message(" xattr via dst2: '%s' = '%s' - PASS\n", xattr_name, xattr_buf); + + /* Verify content of dst2 */ + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, src_data, sizeof(src_data)); + print_message(" dst2 content matches original src2 data - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify dst2_link content - should be old dst_data */ + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, dst_link, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, dst_data, sizeof(dst_data)); + print_message(" dst2_link content matches original dst2 data - PASS\n"); + + /* Cleanup scenario 2 */ + rc = dfs_release(dst_link); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_dst, "dst2", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_dst_link, "dst2_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_release(dir_src); + assert_int_equal(rc, 0); + rc = dfs_release(dir_dst); + assert_int_equal(rc, 0); + rc = dfs_release(dir_dst_link); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_src2", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_dst2", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_dst2_link", false, NULL); + assert_int_equal(rc, 0); + + /* + * ============================================================ + * Scenario 3: Both source and destination have hardlinks + * ============================================================ + * /dir_a/file_a has hardlink /dir_a_link/file_a_link, with xattr + * /dir_b/file_b has hardlink /dir_b_link/file_b_link + */ + print_message("\n--- Scenario 3: Both source and dest have hardlinks ---\n"); + + /* Create directories */ + rc = dfs_mkdir(dfs_mt, NULL, "dir_a", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_a", O_RDWR, &dir_src, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "dir_a_link", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_a_link", O_RDWR, &dir_src_link, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "dir_b", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_b", O_RDWR, &dir_dst, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "dir_b_link", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_b_link", O_RDWR, &dir_dst_link, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directories: dir_a/, dir_a_link/, dir_b/, dir_b_link/\n"); + + /* Create file_a with xattr */ + rc = dfs_open(dfs_mt, dir_src, "file_a", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_a); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_a, &oid_a); + assert_int_equal(rc, 0); + print_message(" Created /dir_a/file_a, oid=" DF_OID "\n", DP_OID(oid_a)); + + d_iov_set(&iov, src_data, sizeof(src_data)); + rc = dfs_write(dfs_mt, file_a, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + rc = dfs_setxattr(dfs_mt, file_a, xattr_name, xattr_val, strlen(xattr_val) + 1, 0); + assert_int_equal(rc, 0); + print_message(" Set xattr '%s' on file_a\n", xattr_name); + + rc = dfs_release(file_a); + assert_int_equal(rc, 0); + + /* Create hardlink to file_a */ + rc = dfs_lookup_rel(dfs_mt, dir_src, "file_a", O_RDWR, &file_a, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_link(dfs_mt, file_a, dir_src_link, "file_a_link", &file_a_link, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /dir_a_link/file_a_link, nlink=%lu\n", + (unsigned long)stbuf.st_nlink); + + rc = dfs_release(file_a); + assert_int_equal(rc, 0); + rc = dfs_release(file_a_link); + assert_int_equal(rc, 0); + + /* Create file_b with hardlink */ + rc = dfs_open(dfs_mt, dir_dst, "file_b", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_b); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_b, &oid_b); + assert_int_equal(rc, 0); + print_message(" Created /dir_b/file_b, oid=" DF_OID "\n", DP_OID(oid_b)); + + d_iov_set(&iov, dst_data, sizeof(dst_data)); + rc = dfs_write(dfs_mt, file_b, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + rc = dfs_release(file_b); + assert_int_equal(rc, 0); + + rc = dfs_lookup_rel(dfs_mt, dir_dst, "file_b", O_RDWR, &file_b, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_link(dfs_mt, file_b, dir_dst_link, "file_b_link", &file_b_link, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /dir_b_link/file_b_link, nlink=%lu\n", + (unsigned long)stbuf.st_nlink); + + rc = dfs_release(file_b); + assert_int_equal(rc, 0); + rc = dfs_release(file_b_link); + assert_int_equal(rc, 0); + + /* Rename file_a -> file_b */ + print_message(" Renaming /dir_a/file_a -> /dir_b/file_b...\n"); + rc = dfs_move(dfs_mt, dir_src, "file_a", dir_dst, "file_b", NULL); + assert_int_equal(rc, 0); + + /* file_a name should not exist */ + rc = dfs_stat(dfs_mt, dir_src, "file_a", &stbuf); + assert_int_equal(rc, ENOENT); + print_message(" /dir_a/file_a no longer exists - PASS\n"); + + /* file_a_link should have original oid, nlink=2 */ + rc = dfs_lookup_rel(dfs_mt, dir_src_link, "file_a_link", O_RDWR, &file_a_link, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_a_link, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, file_a_link, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_a.lo && oid_tmp.hi == oid_a.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /dir_a_link/file_a_link: oid=" DF_OID ", nlink=%lu - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + + /* Verify xattr via file_a_link */ + xattr_size = sizeof(xattr_buf); + memset(xattr_buf, 0, sizeof(xattr_buf)); + rc = dfs_getxattr(dfs_mt, file_a_link, xattr_name, xattr_buf, &xattr_size); + assert_int_equal(rc, 0); + assert_string_equal(xattr_buf, xattr_val); + print_message(" xattr via file_a_link: '%s' = '%s' - PASS\n", xattr_name, xattr_buf); + + rc = dfs_release(file_a_link); + assert_int_equal(rc, 0); + + /* file_b should now have file_a's oid */ + rc = dfs_lookup_rel(dfs_mt, dir_dst, "file_b", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_a.lo && oid_tmp.hi == oid_a.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /dir_b/file_b: oid=" DF_OID " (file_a's), nlink=%lu - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + + /* Verify xattr via file_b */ + xattr_size = sizeof(xattr_buf); + memset(xattr_buf, 0, sizeof(xattr_buf)); + rc = dfs_getxattr(dfs_mt, tmp_obj, xattr_name, xattr_buf, &xattr_size); + assert_int_equal(rc, 0); + assert_string_equal(xattr_buf, xattr_val); + print_message(" xattr via file_b: '%s' = '%s' - PASS\n", xattr_name, xattr_buf); + + /* Verify content */ + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, src_data, sizeof(src_data)); + print_message(" file_b content matches original file_a data - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* file_b_link should have old file_b oid, nlink=1 */ + rc = dfs_lookup_rel(dfs_mt, dir_dst_link, "file_b_link", O_RDWR, &file_b_link, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_b_link, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, file_b_link, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_b.lo && oid_tmp.hi == oid_b.hi); + assert_int_equal(stbuf.st_nlink, 1); + print_message(" /dir_b_link/file_b_link: oid=" DF_OID " (old file_b), nlink=%lu - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + + /* Verify file_b_link content - should be old dst_data */ + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, file_b_link, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, dst_data, sizeof(dst_data)); + print_message(" file_b_link content matches original file_b data - PASS\n"); + + /* Cleanup scenario 3 */ + rc = dfs_release(file_b_link); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_dst, "file_b", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_src_link, "file_a_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_dst_link, "file_b_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_release(dir_src); + assert_int_equal(rc, 0); + rc = dfs_release(dir_src_link); + assert_int_equal(rc, 0); + rc = dfs_release(dir_dst); + assert_int_equal(rc, 0); + rc = dfs_release(dir_dst_link); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_a", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_a_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_b", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_b_link", false, NULL); + assert_int_equal(rc, 0); + + /* + * ============================================================ + * Scenario 4: Source has hardlinks, destination doesn't exist + * ============================================================ + * /dir_src4/src4 has hardlink /dir_src4_link/src4_link, with xattr + * /dir_dst4/dst4 does not exist + */ + print_message("\n--- Scenario 4: Source has hardlinks, dest doesn't exist ---\n"); + + /* Create directories */ + rc = dfs_mkdir(dfs_mt, NULL, "dir_src4", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_src4", O_RDWR, &dir_src, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "dir_src4_link", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_src4_link", O_RDWR, &dir_src_link, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "dir_dst4", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "dir_dst4", O_RDWR, &dir_dst, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directories: dir_src4/, dir_src4_link/, dir_dst4/\n"); + + /* Create source file with xattr */ + rc = dfs_open(dfs_mt, dir_src, "src4", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &src); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(src, &oid_src); + assert_int_equal(rc, 0); + print_message(" Created /dir_src4/src4, oid=" DF_OID "\n", DP_OID(oid_src)); + + d_iov_set(&iov, src_data, sizeof(src_data)); + rc = dfs_write(dfs_mt, src, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + rc = dfs_setxattr(dfs_mt, src, xattr_name, xattr_val, strlen(xattr_val) + 1, 0); + assert_int_equal(rc, 0); + print_message(" Set xattr '%s' on src4\n", xattr_name); + + rc = dfs_release(src); + assert_int_equal(rc, 0); + + /* Create hardlink in different directory */ + rc = dfs_lookup_rel(dfs_mt, dir_src, "src4", O_RDWR, &src, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_link(dfs_mt, src, dir_src_link, "src4_link", &src_link, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /dir_src4_link/src4_link, nlink=%lu\n", + (unsigned long)stbuf.st_nlink); + + rc = dfs_release(src); + assert_int_equal(rc, 0); + rc = dfs_release(src_link); + assert_int_equal(rc, 0); + + /* Verify dst4 does not exist */ + rc = dfs_stat(dfs_mt, dir_dst, "dst4", &stbuf); + assert_int_equal(rc, ENOENT); + print_message(" /dir_dst4/dst4 does not exist - confirmed\n"); + + /* Rename src4 -> dst4 */ + print_message(" Renaming /dir_src4/src4 -> /dir_dst4/dst4...\n"); + rc = dfs_move(dfs_mt, dir_src, "src4", dir_dst, "dst4", NULL); + assert_int_equal(rc, 0); + + /* src4 name should not exist */ + rc = dfs_stat(dfs_mt, dir_src, "src4", &stbuf); + assert_int_equal(rc, ENOENT); + print_message(" /dir_src4/src4 no longer exists - PASS\n"); + + /* dst4 should have src's oid, nlink=2 */ + rc = dfs_lookup_rel(dfs_mt, dir_dst, "dst4", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_src.lo && oid_tmp.hi == oid_src.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /dir_dst4/dst4: oid=" DF_OID ", nlink=%lu - PASS\n", DP_OID(oid_tmp), + (unsigned long)stbuf.st_nlink); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* src4_link should still have original oid, nlink=2 */ + rc = dfs_lookup_rel(dfs_mt, dir_src_link, "src4_link", O_RDWR, &src_link, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(src_link, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, src_link, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_src.lo && oid_tmp.hi == oid_src.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /dir_src4_link/src4_link: oid=" DF_OID ", nlink=%lu - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + + /* Verify xattr via src4_link */ + xattr_size = sizeof(xattr_buf); + memset(xattr_buf, 0, sizeof(xattr_buf)); + rc = dfs_getxattr(dfs_mt, src_link, xattr_name, xattr_buf, &xattr_size); + assert_int_equal(rc, 0); + assert_string_equal(xattr_buf, xattr_val); + print_message(" xattr via src4_link: '%s' = '%s' - PASS\n", xattr_name, xattr_buf); + + rc = dfs_release(src_link); + assert_int_equal(rc, 0); + + /* Verify xattr via dst4 */ + rc = dfs_lookup_rel(dfs_mt, dir_dst, "dst4", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + xattr_size = sizeof(xattr_buf); + memset(xattr_buf, 0, sizeof(xattr_buf)); + rc = dfs_getxattr(dfs_mt, tmp_obj, xattr_name, xattr_buf, &xattr_size); + assert_int_equal(rc, 0); + assert_string_equal(xattr_buf, xattr_val); + print_message(" xattr via dst4: '%s' = '%s' - PASS\n", xattr_name, xattr_buf); + + /* Verify content */ + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, src_data, sizeof(src_data)); + print_message(" dst4 content matches original src4 data - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Cleanup scenario 4 */ + rc = dfs_remove(dfs_mt, dir_dst, "dst4", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_src_link, "src4_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_release(dir_src); + assert_int_equal(rc, 0); + rc = dfs_release(dir_src_link); + assert_int_equal(rc, 0); + rc = dfs_release(dir_dst); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_src4", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_src4_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "dir_dst4", false, NULL); + assert_int_equal(rc, 0); + + print_message("\nHardlink rename test completed successfully!\n"); +} + +static void +dfs_test_hardlink_xattr(void **state) +{ + test_arg_t *arg = *state; + dfs_obj_t *file1, *file2; + struct stat stbuf; + const char *xname1 = "user.attr1"; + const char *xname2 = "user.attr2"; + const char *xval1 = "value1"; + const char *xval2 = "value2"; + daos_size_t size; + char buf[64]; + daos_obj_id_t oid_orig, oid_tmp; + int rc; + + if (arg->myrank != 0) + return; + + print_message("=== Hardlink xattr test ===\n"); + + /* + * ============================================================ + * Part 1: Test xattr sharing with hardlinks + * ============================================================ + * - Create file, set xname1 + * - Create hardlink, set xname2 on hardlink + * - Both xnames should be visible on both files + * - Delete first file + * - Both xattrs still visible on second file + */ + print_message("\n--- Part 1: xattr sharing across hardlinks ---\n"); + + /* Create first file */ + rc = dfs_open(dfs_mt, NULL, "xattr_file1", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file1); + assert_int_equal(rc, 0); + + /* Record oid immediately after creation */ + rc = dfs_obj2id(file1, &oid_orig); + assert_int_equal(rc, 0); + print_message(" Created xattr_file1, oid=" DF_OID "\n", DP_OID(oid_orig)); + + /* Set xname1 on first file BEFORE hardlink is created */ + rc = dfs_setxattr(dfs_mt, file1, xname1, xval1, strlen(xval1) + 1, 0); + assert_int_equal(rc, 0); + print_message(" Set xattr '%s' = '%s' on file1\n", xname1, xval1); + + /* Verify xname1 is set */ + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_getxattr(dfs_mt, file1, xname1, buf, &size); + assert_int_equal(rc, 0); + assert_string_equal(buf, xval1); + print_message(" Verified xattr on file1 - PASS\n"); + + /* Release handle */ + rc = dfs_release(file1); + assert_int_equal(rc, 0); + + /* Re-open and create hardlink */ + rc = dfs_lookup_rel(dfs_mt, NULL, "xattr_file1", O_RDWR, &file1, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_link(dfs_mt, file1, NULL, "xattr_file2", &file2, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink xattr_file2\n"); + + /* Verify oid matches */ + rc = dfs_obj2id(file2, &oid_tmp); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_orig.lo && oid_tmp.hi == oid_orig.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" Hardlink oid=" DF_OID " (matches), nlink=%lu\n", DP_OID(oid_tmp), + (unsigned long)stbuf.st_nlink); + + /* Set xname2 on the hardlink (file2) */ + rc = dfs_setxattr(dfs_mt, file2, xname2, xval2, strlen(xval2) + 1, 0); + assert_int_equal(rc, 0); + print_message(" Set xattr '%s' = '%s' on file2 (hardlink)\n", xname2, xval2); + + /* Verify both xattrs are visible on file1 */ + print_message(" Verifying both xattrs visible on file1...\n"); + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_getxattr(dfs_mt, file1, xname1, buf, &size); + assert_int_equal(rc, 0); + assert_string_equal(buf, xval1); + print_message(" file1 has '%s' = '%s' - PASS\n", xname1, xval1); + + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_getxattr(dfs_mt, file1, xname2, buf, &size); + assert_int_equal(rc, 0); + assert_string_equal(buf, xval2); + print_message(" file1 has '%s' = '%s' - PASS\n", xname2, xval2); + + /* Verify both xattrs are visible on file2 */ + print_message(" Verifying both xattrs visible on file2...\n"); + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_getxattr(dfs_mt, file2, xname1, buf, &size); + assert_int_equal(rc, 0); + assert_string_equal(buf, xval1); + print_message(" file2 has '%s' = '%s' - PASS\n", xname1, xval1); + + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_getxattr(dfs_mt, file2, xname2, buf, &size); + assert_int_equal(rc, 0); + assert_string_equal(buf, xval2); + print_message(" file2 has '%s' = '%s' - PASS\n", xname2, xval2); + + /* Verify listxattr shows both on file1 */ + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_listxattr(dfs_mt, file1, buf, &size); + assert_int_equal(rc, 0); + assert_int_equal(size, strlen(xname1) + 1 + strlen(xname2) + 1); + print_message(" listxattr on file1 shows both xattrs - PASS\n"); + + /* Release handles */ + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(file2); + assert_int_equal(rc, 0); + + /* Delete first file */ + print_message(" Removing xattr_file1...\n"); + rc = dfs_remove(dfs_mt, NULL, "xattr_file1", false, NULL); + assert_int_equal(rc, 0); + + /* Re-open file2 and verify xattrs still exist */ + rc = dfs_lookup_rel(dfs_mt, NULL, "xattr_file2", O_RDWR, &file2, NULL, NULL); + assert_int_equal(rc, 0); + + /* Verify oid and nlink */ + rc = dfs_obj2id(file2, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, file2, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_orig.lo && oid_tmp.hi == oid_orig.hi); + assert_int_equal(stbuf.st_nlink, 1); + print_message(" file2 after deletion: oid=" DF_OID ", nlink=%lu\n", DP_OID(oid_tmp), + (unsigned long)stbuf.st_nlink); + + /* Verify both xattrs still visible on file2 */ + print_message(" Verifying xattrs still visible after file1 deletion...\n"); + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_getxattr(dfs_mt, file2, xname1, buf, &size); + assert_int_equal(rc, 0); + assert_string_equal(buf, xval1); + print_message(" file2 still has '%s' = '%s' - PASS\n", xname1, xval1); + + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_getxattr(dfs_mt, file2, xname2, buf, &size); + assert_int_equal(rc, 0); + assert_string_equal(buf, xval2); + print_message(" file2 still has '%s' = '%s' - PASS\n", xname2, xval2); + + /* Cleanup part 1 */ + rc = dfs_release(file2); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "xattr_file2", false, NULL); + assert_int_equal(rc, 0); + + /* + * ============================================================ + * Part 2: Test xattr removal with hardlinks + * ============================================================ + * - Create file, set xname1 + * - Create hardlink, set xname2 on hardlink + * - Remove xattr from first file + * - Verify removal is visible in both (xattr gone from both) + */ + print_message("\n--- Part 2: xattr removal across hardlinks ---\n"); + + /* Create first file */ + rc = dfs_open(dfs_mt, NULL, "xattr_rm_file1", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file1); + assert_int_equal(rc, 0); + + /* Record oid immediately after creation */ + rc = dfs_obj2id(file1, &oid_orig); + assert_int_equal(rc, 0); + print_message(" Created xattr_rm_file1, oid=" DF_OID "\n", DP_OID(oid_orig)); + + /* Set xname1 on first file */ + rc = dfs_setxattr(dfs_mt, file1, xname1, xval1, strlen(xval1) + 1, 0); + assert_int_equal(rc, 0); + print_message(" Set xattr '%s' = '%s' on file1\n", xname1, xval1); + + /* Release handle */ + rc = dfs_release(file1); + assert_int_equal(rc, 0); + + /* Re-open and create hardlink */ + rc = dfs_lookup_rel(dfs_mt, NULL, "xattr_rm_file1", O_RDWR, &file1, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_link(dfs_mt, file1, NULL, "xattr_rm_file2", &file2, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink xattr_rm_file2\n"); + + /* Set xname2 on the hardlink (file2) */ + rc = dfs_setxattr(dfs_mt, file2, xname2, xval2, strlen(xval2) + 1, 0); + assert_int_equal(rc, 0); + print_message(" Set xattr '%s' = '%s' on file2 (hardlink)\n", xname2, xval2); + + /* Verify both xattrs visible on both files */ + print_message(" Verifying both xattrs on both files before removal...\n"); + size = sizeof(buf); + rc = dfs_getxattr(dfs_mt, file1, xname1, buf, &size); + assert_int_equal(rc, 0); + size = sizeof(buf); + rc = dfs_getxattr(dfs_mt, file1, xname2, buf, &size); + assert_int_equal(rc, 0); + size = sizeof(buf); + rc = dfs_getxattr(dfs_mt, file2, xname1, buf, &size); + assert_int_equal(rc, 0); + size = sizeof(buf); + rc = dfs_getxattr(dfs_mt, file2, xname2, buf, &size); + assert_int_equal(rc, 0); + print_message(" Both files have both xattrs - PASS\n"); + + /* Remove xname1 from file1 */ + print_message(" Removing xattr '%s' from file1...\n", xname1); + rc = dfs_removexattr(dfs_mt, file1, xname1); + assert_int_equal(rc, 0); + + /* Verify xname1 is gone from file1 */ + size = sizeof(buf); + rc = dfs_getxattr(dfs_mt, file1, xname1, buf, &size); + assert_int_equal(rc, ENODATA); + print_message(" file1: '%s' removed (ENODATA) - PASS\n", xname1); + + /* Verify xname1 is also gone from file2 (shared xattr) */ + size = sizeof(buf); + rc = dfs_getxattr(dfs_mt, file2, xname1, buf, &size); + assert_int_equal(rc, ENODATA); + print_message(" file2: '%s' also removed (ENODATA) - PASS\n", xname1); + + /* Verify xname2 still exists on both */ + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_getxattr(dfs_mt, file1, xname2, buf, &size); + assert_int_equal(rc, 0); + assert_string_equal(buf, xval2); + print_message(" file1: '%s' still exists - PASS\n", xname2); + + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_getxattr(dfs_mt, file2, xname2, buf, &size); + assert_int_equal(rc, 0); + assert_string_equal(buf, xval2); + print_message(" file2: '%s' still exists - PASS\n", xname2); + + /* Verify listxattr shows only xname2 on both files */ + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_listxattr(dfs_mt, file1, buf, &size); + assert_int_equal(rc, 0); + assert_int_equal(size, strlen(xname2) + 1); + assert_string_equal(buf, xname2); + print_message(" file1 listxattr shows only '%s' - PASS\n", xname2); + + size = sizeof(buf); + memset(buf, 0, sizeof(buf)); + rc = dfs_listxattr(dfs_mt, file2, buf, &size); + assert_int_equal(rc, 0); + assert_int_equal(size, strlen(xname2) + 1); + assert_string_equal(buf, xname2); + print_message(" file2 listxattr shows only '%s' - PASS\n", xname2); + + /* Cleanup part 2 */ + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(file2); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "xattr_rm_file1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "xattr_rm_file2", false, NULL); + assert_int_equal(rc, 0); + + print_message("\nHardlink xattr test completed successfully!\n"); +} + +static void +dfs_test_exchange(void **state) +{ + test_arg_t *arg = *state; + dfs_obj_t *file_a, *file_a_link, *file_b, *file_b_link; + dfs_obj_t *tmp_obj; + dfs_obj_t *dir_a, *dir_a_link, *dir_b, *dir_b_link; + struct stat stbuf; + d_sg_list_t sgl; + d_iov_t iov; + char data_a[64], data_b[64], read_buf[64]; + daos_size_t read_size; + daos_obj_id_t oid_a, oid_b, oid_tmp; + int rc; + + if (arg->myrank != 0) + return; + + print_message("=== dfs_exchange test ===\n"); + + /* Prepare unique data patterns */ + memset(data_a, 'A', sizeof(data_a)); + memset(data_b, 'B', sizeof(data_b)); + + sgl.sg_nr = 1; + sgl.sg_nr_out = 1; + sgl.sg_iovs = &iov; + + /* + * ============================================================ + * Scenario 1: Basic exchange (no hardlinks) + * ============================================================ + * /exch_dir_a1/file_a and /exch_dir_b1/file_b are regular files + * exchange(file_a, file_b) should swap their directory entries + */ + print_message("\n--- Scenario 1: Basic exchange (no hardlinks) ---\n"); + + /* Create directories for file_a and file_b */ + rc = dfs_mkdir(dfs_mt, NULL, "exch_dir_a1", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "exch_dir_a1", O_RDWR, &dir_a, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_mkdir(dfs_mt, NULL, "exch_dir_b1", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "exch_dir_b1", O_RDWR, &dir_b, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directories: exch_dir_a1/, exch_dir_b1/\n"); + + /* Create file_a and write data */ + rc = dfs_open(dfs_mt, dir_a, "exch_a1", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_a); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_a, &oid_a); + assert_int_equal(rc, 0); + print_message(" Created /exch_dir_a1/exch_a1, oid=" DF_OID "\n", DP_OID(oid_a)); + + d_iov_set(&iov, data_a, sizeof(data_a)); + rc = dfs_write(dfs_mt, file_a, &sgl, 0, NULL); + assert_int_equal(rc, 0); + print_message(" Wrote data 'A' to exch_a1\n"); + + rc = dfs_release(file_a); + assert_int_equal(rc, 0); + + /* Create file_b and write data */ + rc = dfs_open(dfs_mt, dir_b, "exch_b1", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_b); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_b, &oid_b); + assert_int_equal(rc, 0); + print_message(" Created /exch_dir_b1/exch_b1, oid=" DF_OID "\n", DP_OID(oid_b)); + + d_iov_set(&iov, data_b, sizeof(data_b)); + rc = dfs_write(dfs_mt, file_b, &sgl, 0, NULL); + assert_int_equal(rc, 0); + print_message(" Wrote data 'B' to exch_b1\n"); + + rc = dfs_release(file_b); + assert_int_equal(rc, 0); + + /* Exchange file_a and file_b - names swap directories */ + print_message(" Exchanging /exch_dir_a1/exch_a1 <-> /exch_dir_b1/exch_b1...\n"); + rc = dfs_exchange(dfs_mt, dir_a, "exch_a1", dir_b, "exch_b1"); + assert_int_equal(rc, 0); + + /* After exchange: exch_a1 is now in dir_b, exch_b1 is now in dir_a */ + /* Verify exch_a1 (now in dir_b) still has oid_a and data_a */ + rc = dfs_lookup_rel(dfs_mt, dir_b, "exch_a1", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_a.lo && oid_tmp.hi == oid_a.hi); + print_message(" /exch_dir_b1/exch_a1: oid=" DF_OID " (still oid_a) - PASS\n", + DP_OID(oid_tmp)); + + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, data_a, sizeof(data_a)); + print_message(" /exch_dir_b1/exch_a1: content is 'A' - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify exch_b1 (now in dir_a) still has oid_b and data_b */ + rc = dfs_lookup_rel(dfs_mt, dir_a, "exch_b1", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_b.lo && oid_tmp.hi == oid_b.hi); + print_message(" /exch_dir_a1/exch_b1: oid=" DF_OID " (still oid_b) - PASS\n", + DP_OID(oid_tmp)); + + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, data_b, sizeof(data_b)); + print_message(" /exch_dir_a1/exch_b1: content is 'B' - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Cleanup scenario 1 - files are now in swapped directories */ + rc = dfs_remove(dfs_mt, dir_b, "exch_a1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_a, "exch_b1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_release(dir_a); + assert_int_equal(rc, 0); + rc = dfs_release(dir_b); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "exch_dir_a1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "exch_dir_b1", false, NULL); + assert_int_equal(rc, 0); + + /* + * ============================================================ + * Scenario 2: Exchange where one file has hardlinks + * ============================================================ + * /exch_dir_a2/file_a has a hardlink /exch_linkdir_a2/file_a_link + * /exch_dir_b2/file_b is a regular file (no hardlinks) + * exchange(file_a, file_b) should: + * - file_a name now points to file_b's object + * - file_b name now points to file_a's object + * - file_a_link still points to file_a's original object + */ + print_message("\n--- Scenario 2: Exchange where one file has hardlinks ---\n"); + + /* Create directory for file_a */ + rc = dfs_mkdir(dfs_mt, NULL, "exch_dir_a2", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "exch_dir_a2", O_RDWR, &dir_a, NULL, NULL); + assert_int_equal(rc, 0); + + /* Create directory for file_a's hardlink */ + rc = dfs_mkdir(dfs_mt, NULL, "exch_linkdir_a2", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "exch_linkdir_a2", O_RDWR, &dir_a_link, NULL, NULL); + assert_int_equal(rc, 0); + + /* Create directory for file_b */ + rc = dfs_mkdir(dfs_mt, NULL, "exch_dir_b2", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "exch_dir_b2", O_RDWR, &dir_b, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directories: exch_dir_a2/, exch_linkdir_a2/, exch_dir_b2/\n"); + + /* Create file_a in its directory */ + rc = dfs_open(dfs_mt, dir_a, "exch_a2", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_a); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_a, &oid_a); + assert_int_equal(rc, 0); + print_message(" Created /exch_dir_a2/exch_a2, oid=" DF_OID "\n", DP_OID(oid_a)); + + d_iov_set(&iov, data_a, sizeof(data_a)); + rc = dfs_write(dfs_mt, file_a, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + /* Create hardlink in separate directory */ + rc = dfs_link(dfs_mt, file_a, dir_a_link, "exch_a2_link", &file_a_link, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /exch_linkdir_a2/exch_a2_link, nlink=%lu\n", + (unsigned long)stbuf.st_nlink); + + rc = dfs_release(file_a); + assert_int_equal(rc, 0); + rc = dfs_release(file_a_link); + assert_int_equal(rc, 0); + + /* Create file_b in its directory (no hardlink) */ + rc = dfs_open(dfs_mt, dir_b, "exch_b2", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_b); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_b, &oid_b); + assert_int_equal(rc, 0); + print_message(" Created /exch_dir_b2/exch_b2, oid=" DF_OID "\n", DP_OID(oid_b)); + + d_iov_set(&iov, data_b, sizeof(data_b)); + rc = dfs_write(dfs_mt, file_b, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + rc = dfs_release(file_b); + assert_int_equal(rc, 0); + + /* Exchange file_a and file_b - names swap directories */ + print_message(" Exchanging /exch_dir_a2/exch_a2 <-> /exch_dir_b2/exch_b2...\n"); + rc = dfs_exchange(dfs_mt, dir_a, "exch_a2", dir_b, "exch_b2"); + assert_int_equal(rc, 0); + + /* After exchange: exch_a2 is now in dir_b, exch_b2 is now in dir_a */ + /* Verify exch_a2 (now in dir_b) still has oid_a, nlink=2 (has hardlink) */ + rc = dfs_lookup_rel(dfs_mt, dir_b, "exch_a2", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_a.lo && oid_tmp.hi == oid_a.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /exch_dir_b2/exch_a2: oid=" DF_OID ", nlink=%lu (still oid_a) - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify exch_b2 (now in dir_a) still has oid_b, nlink=1 */ + rc = dfs_lookup_rel(dfs_mt, dir_a, "exch_b2", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_b.lo && oid_tmp.hi == oid_b.hi); + assert_int_equal(stbuf.st_nlink, 1); + print_message(" /exch_dir_a2/exch_b2: oid=" DF_OID ", nlink=%lu (still oid_b) - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify exch_linkdir_a2/exch_a2_link still has oid_a, nlink=2 */ + rc = dfs_lookup_rel(dfs_mt, dir_a_link, "exch_a2_link", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_a.lo && oid_tmp.hi == oid_a.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /exch_linkdir_a2/exch_a2_link: oid=" DF_OID + ", nlink=%lu (same as exch_a2) - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify content of exch_a2 (now in dir_b, should be data_a) */ + rc = dfs_lookup_rel(dfs_mt, dir_b, "exch_a2", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, data_a, sizeof(data_a)); + print_message(" /exch_dir_b2/exch_a2: content is 'A' - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify content of exch_b2 (now in dir_a, should be data_b) */ + rc = dfs_lookup_rel(dfs_mt, dir_a, "exch_b2", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, data_b, sizeof(data_b)); + print_message(" /exch_dir_a2/exch_b2: content is 'B' - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify content of exch_a2_link (should be data_a, same as exch_a2) */ + rc = dfs_lookup_rel(dfs_mt, dir_a_link, "exch_a2_link", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, data_a, sizeof(data_a)); + print_message(" /exch_linkdir_a2/exch_a2_link: content is 'A' (same as exch_a2) - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Cleanup scenario 2 - files are now in swapped directories */ + rc = dfs_remove(dfs_mt, dir_b, "exch_a2", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_a, "exch_b2", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_a_link, "exch_a2_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_release(dir_a); + assert_int_equal(rc, 0); + rc = dfs_release(dir_a_link); + assert_int_equal(rc, 0); + rc = dfs_release(dir_b); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "exch_dir_a2", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "exch_linkdir_a2", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "exch_dir_b2", false, NULL); + assert_int_equal(rc, 0); + + /* + * ============================================================ + * Scenario 3: Exchange where both files have hardlinks + * ============================================================ + * /exch_dir_a3/file_a has hardlink /exch_linkdir_a3/file_a_link + * /exch_dir_b3/file_b has hardlink /exch_linkdir_b3/file_b_link + * exchange(file_a, file_b) should: + * - file_a name now points to file_b's object + * - file_b name now points to file_a's object + * - file_a_link still points to original file_a object (now same as file_b) + * - file_b_link still points to original file_b object (now same as file_a) + */ + print_message("\n--- Scenario 3: Exchange where both files have hardlinks ---\n"); + + /* Create directory for file_a */ + rc = dfs_mkdir(dfs_mt, NULL, "exch_dir_a3", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "exch_dir_a3", O_RDWR, &dir_a, NULL, NULL); + assert_int_equal(rc, 0); + + /* Create directory for file_a's hardlink */ + rc = dfs_mkdir(dfs_mt, NULL, "exch_linkdir_a3", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "exch_linkdir_a3", O_RDWR, &dir_a_link, NULL, NULL); + assert_int_equal(rc, 0); + + /* Create directory for file_b */ + rc = dfs_mkdir(dfs_mt, NULL, "exch_dir_b3", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "exch_dir_b3", O_RDWR, &dir_b, NULL, NULL); + assert_int_equal(rc, 0); + + /* Create directory for file_b's hardlink */ + rc = dfs_mkdir(dfs_mt, NULL, "exch_linkdir_b3", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "exch_linkdir_b3", O_RDWR, &dir_b_link, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directories: exch_dir_a3/, exch_linkdir_a3/, exch_dir_b3/, " + "exch_linkdir_b3/\n"); + + /* Create file_a in its directory */ + rc = dfs_open(dfs_mt, dir_a, "exch_a3", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_a); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_a, &oid_a); + assert_int_equal(rc, 0); + print_message(" Created /exch_dir_a3/exch_a3, oid=" DF_OID "\n", DP_OID(oid_a)); + + d_iov_set(&iov, data_a, sizeof(data_a)); + rc = dfs_write(dfs_mt, file_a, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + /* Create hardlink in separate directory */ + rc = dfs_link(dfs_mt, file_a, dir_a_link, "exch_a3_link", &file_a_link, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /exch_linkdir_a3/exch_a3_link\n"); + rc = dfs_release(file_a); + assert_int_equal(rc, 0); + rc = dfs_release(file_a_link); + assert_int_equal(rc, 0); + + /* Create file_b in its directory */ + rc = dfs_open(dfs_mt, dir_b, "exch_b3", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_b); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file_b, &oid_b); + assert_int_equal(rc, 0); + print_message(" Created /exch_dir_b3/exch_b3, oid=" DF_OID "\n", DP_OID(oid_b)); + + d_iov_set(&iov, data_b, sizeof(data_b)); + rc = dfs_write(dfs_mt, file_b, &sgl, 0, NULL); + assert_int_equal(rc, 0); + + /* Create hardlink in separate directory */ + rc = dfs_link(dfs_mt, file_b, dir_b_link, "exch_b3_link", &file_b_link, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /exch_linkdir_b3/exch_b3_link\n"); + rc = dfs_release(file_b); + assert_int_equal(rc, 0); + rc = dfs_release(file_b_link); + assert_int_equal(rc, 0); + + /* Exchange file_a and file_b - names swap directories */ + print_message(" Exchanging /exch_dir_a3/exch_a3 <-> /exch_dir_b3/exch_b3...\n"); + rc = dfs_exchange(dfs_mt, dir_a, "exch_a3", dir_b, "exch_b3"); + assert_int_equal(rc, 0); + + /* After exchange: exch_a3 is now in dir_b, exch_b3 is now in dir_a */ + /* Verify exch_a3 (now in dir_b) still has oid_a, nlink=2 */ + rc = dfs_lookup_rel(dfs_mt, dir_b, "exch_a3", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_a.lo && oid_tmp.hi == oid_a.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /exch_dir_b3/exch_a3: oid=" DF_OID " (still oid_a), nlink=%lu - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify exch_b3 (now in dir_a) still has oid_b, nlink=2 */ + rc = dfs_lookup_rel(dfs_mt, dir_a, "exch_b3", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_b.lo && oid_tmp.hi == oid_b.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /exch_dir_a3/exch_b3: oid=" DF_OID " (still oid_b), nlink=%lu - PASS\n", + DP_OID(oid_tmp), (unsigned long)stbuf.st_nlink); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify exch_linkdir_a3/exch_a3_link still has oid_a (same as exch_a3) */ + rc = dfs_lookup_rel(dfs_mt, dir_a_link, "exch_a3_link", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_a.lo && oid_tmp.hi == oid_a.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /exch_linkdir_a3/exch_a3_link: oid=" DF_OID + " (oid_a, same as exch_a3) - PASS\n", + DP_OID(oid_tmp)); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify exch_linkdir_b3/exch_b3_link still has oid_b (same as exch_b3) */ + rc = dfs_lookup_rel(dfs_mt, dir_b_link, "exch_b3_link", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(tmp_obj, &oid_tmp); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, tmp_obj, &stbuf); + assert_int_equal(rc, 0); + assert_true(oid_tmp.lo == oid_b.lo && oid_tmp.hi == oid_b.hi); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" /exch_linkdir_b3/exch_b3_link: oid=" DF_OID + " (oid_b, same as exch_b3) - PASS\n", + DP_OID(oid_tmp)); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify content: exch_a3 (now in dir_b) and exch_a3_link should have data_a */ + rc = dfs_lookup_rel(dfs_mt, dir_b, "exch_a3", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, data_a, sizeof(data_a)); + print_message(" /exch_dir_b3/exch_a3: content is 'A' - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + rc = dfs_lookup_rel(dfs_mt, dir_a_link, "exch_a3_link", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, data_a, sizeof(data_a)); + print_message(" /exch_linkdir_a3/exch_a3_link: content is 'A' (same as exch_a3) - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Verify content: exch_b3 (now in dir_a) and exch_b3_link should have data_b */ + rc = dfs_lookup_rel(dfs_mt, dir_a, "exch_b3", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, data_b, sizeof(data_b)); + print_message(" /exch_dir_a3/exch_b3: content is 'B' - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + rc = dfs_lookup_rel(dfs_mt, dir_b_link, "exch_b3_link", O_RDONLY, &tmp_obj, NULL, NULL); + assert_int_equal(rc, 0); + memset(read_buf, 0, sizeof(read_buf)); + d_iov_set(&iov, read_buf, sizeof(read_buf)); + rc = dfs_read(dfs_mt, tmp_obj, &sgl, 0, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(read_buf, data_b, sizeof(data_b)); + print_message(" /exch_linkdir_b3/exch_b3_link: content is 'B' (same as exch_b3) - PASS\n"); + rc = dfs_release(tmp_obj); + assert_int_equal(rc, 0); + + /* Cleanup scenario 3 - files are now in swapped directories */ + rc = dfs_remove(dfs_mt, dir_b, "exch_a3", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_a, "exch_b3", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_a_link, "exch_a3_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_b_link, "exch_b3_link", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_release(dir_a); + assert_int_equal(rc, 0); + rc = dfs_release(dir_a_link); + assert_int_equal(rc, 0); + rc = dfs_release(dir_b); + assert_int_equal(rc, 0); + rc = dfs_release(dir_b_link); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "exch_dir_a3", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "exch_linkdir_a3", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "exch_dir_b3", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "exch_linkdir_b3", false, NULL); + assert_int_equal(rc, 0); + + print_message("\ndfs_exchange test completed successfully!\n"); +} + +static void +dfs_test_hardlink_access(void **state) +{ + test_arg_t *arg = *state; + dfs_obj_t *file, *link1, *link2, *symlink_obj; + dfs_obj_t *dir_file, *dir_link1, *dir_link2, *dir_symlink; + struct stat stbuf; + daos_obj_id_t oid_file; + int rc; + + if (arg->myrank != 0) + return; + + print_message("=== Hardlink dfs_access test ===\n"); + print_message("Testing that dfs_access returns consistent results across hardlinks\n"); + + /* + * Create structure: + * /access_dir_file/access_file - original file + * /access_dir_link1/access_link1 - hardlink 1 + * /access_dir_link2/access_link2 - hardlink 2 + * /access_dir_symlink/access_symlink -> ../access_dir_link2/access_link2 + */ + + /* Create directories */ + rc = dfs_mkdir(dfs_mt, NULL, "access_dir_file", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "access_dir_file", O_RDWR, &dir_file, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directory /access_dir_file/\n"); + + rc = dfs_mkdir(dfs_mt, NULL, "access_dir_link1", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "access_dir_link1", O_RDWR, &dir_link1, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directory /access_dir_link1/\n"); + + rc = dfs_mkdir(dfs_mt, NULL, "access_dir_link2", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "access_dir_link2", O_RDWR, &dir_link2, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directory /access_dir_link2/\n"); + + rc = dfs_mkdir(dfs_mt, NULL, "access_dir_symlink", S_IFDIR | S_IRWXU, 0); + assert_int_equal(rc, 0); + rc = dfs_lookup_rel(dfs_mt, NULL, "access_dir_symlink", O_RDWR, &dir_symlink, NULL, NULL); + assert_int_equal(rc, 0); + print_message(" Created directory /access_dir_symlink/\n"); + + /* Create file with read-write permissions */ + rc = dfs_open(dfs_mt, dir_file, "access_file", S_IFREG | S_IRUSR | S_IWUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file); + assert_int_equal(rc, 0); + + rc = dfs_obj2id(file, &oid_file); + assert_int_equal(rc, 0); + rc = dfs_ostat(dfs_mt, file, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created /access_dir_file/access_file, oid=" DF_OID ", mode=0%o\n", + DP_OID(oid_file), stbuf.st_mode & 0777); + + /* Create hardlinks in different directories */ + rc = dfs_link(dfs_mt, file, dir_link1, "access_link1", &link1, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /access_dir_link1/access_link1, nlink=%lu\n", + (unsigned long)stbuf.st_nlink); + + rc = dfs_link(dfs_mt, file, dir_link2, "access_link2", &link2, &stbuf); + assert_int_equal(rc, 0); + print_message(" Created hardlink /access_dir_link2/access_link2, nlink=%lu\n", + (unsigned long)stbuf.st_nlink); + + /* Create symlink pointing to link2 */ + rc = dfs_open(dfs_mt, dir_symlink, "access_symlink", S_IFLNK, O_RDWR | O_CREAT | O_EXCL, 0, + 0, "../access_dir_link2/access_link2", &symlink_obj); + assert_int_equal(rc, 0); + print_message(" Created symlink /access_dir_symlink/access_symlink -> " + "../access_dir_link2/access_link2\n"); + + rc = dfs_release(file); + assert_int_equal(rc, 0); + rc = dfs_release(link1); + assert_int_equal(rc, 0); + rc = dfs_release(link2); + assert_int_equal(rc, 0); + rc = dfs_release(symlink_obj); + assert_int_equal(rc, 0); + + /* + * Test 1: F_OK (file existence) on all paths + */ + print_message("\n--- Test 1: F_OK (existence check) ---\n"); + + rc = dfs_access(dfs_mt, dir_file, "access_file", F_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_file/access_file, F_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link1, "access_link1", F_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link1/access_link1, F_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link2, "access_link2", F_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link2/access_link2, F_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_symlink, "access_symlink", F_OK); + assert_int_equal(rc, 0); + print_message( + " dfs_access(/access_dir_symlink/access_symlink, F_OK) = %d - PASS (via symlink)\n", + rc); + + /* + * Test 2: R_OK (read permission) on all paths + */ + print_message("\n--- Test 2: R_OK (read permission) ---\n"); + + rc = dfs_access(dfs_mt, dir_file, "access_file", R_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_file/access_file, R_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link1, "access_link1", R_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link1/access_link1, R_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link2, "access_link2", R_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link2/access_link2, R_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_symlink, "access_symlink", R_OK); + assert_int_equal(rc, 0); + print_message( + " dfs_access(/access_dir_symlink/access_symlink, R_OK) = %d - PASS (via symlink)\n", + rc); + + /* + * Test 3: W_OK (write permission) on all paths + */ + print_message("\n--- Test 3: W_OK (write permission) ---\n"); + + rc = dfs_access(dfs_mt, dir_file, "access_file", W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_file/access_file, W_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link1, "access_link1", W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link1/access_link1, W_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link2, "access_link2", W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link2/access_link2, W_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_symlink, "access_symlink", W_OK); + assert_int_equal(rc, 0); + print_message( + " dfs_access(/access_dir_symlink/access_symlink, W_OK) = %d - PASS (via symlink)\n", + rc); + + /* + * Test 4: R_OK | W_OK combined on all paths + */ + print_message("\n--- Test 4: R_OK | W_OK (read+write permission) ---\n"); + + rc = dfs_access(dfs_mt, dir_file, "access_file", R_OK | W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_file/access_file, R_OK|W_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link1, "access_link1", R_OK | W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link1/access_link1, R_OK|W_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link2, "access_link2", R_OK | W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link2/access_link2, R_OK|W_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_symlink, "access_symlink", R_OK | W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_symlink/access_symlink, R_OK|W_OK) = %d - PASS " + "(via symlink)\n", + rc); + + /* + * Test 5: Change permissions via one link, verify via all links + */ + print_message("\n--- Test 5: chmod via one link, verify access via all links ---\n"); + + /* Change to read-only via link1 */ + rc = dfs_chmod(dfs_mt, dir_link1, "access_link1", S_IRUSR); + assert_int_equal(rc, 0); + print_message(" Changed mode to read-only (0400) via /access_dir_link1/access_link1\n"); + + /* Verify R_OK succeeds on all */ + rc = dfs_access(dfs_mt, dir_file, "access_file", R_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_file/access_file, R_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link1, "access_link1", R_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link1/access_link1, R_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link2, "access_link2", R_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link2/access_link2, R_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_symlink, "access_symlink", R_OK); + assert_int_equal(rc, 0); + print_message( + " dfs_access(/access_dir_symlink/access_symlink, R_OK) = %d - PASS (via symlink)\n", + rc); + + /* Verify W_OK fails on all (read-only now) */ + rc = dfs_access(dfs_mt, dir_file, "access_file", W_OK); + assert_int_equal(rc, EACCES); + print_message(" dfs_access(/access_dir_file/access_file, W_OK) = EACCES - PASS\n"); + + rc = dfs_access(dfs_mt, dir_link1, "access_link1", W_OK); + assert_int_equal(rc, EACCES); + print_message(" dfs_access(/access_dir_link1/access_link1, W_OK) = EACCES - PASS\n"); + + rc = dfs_access(dfs_mt, dir_link2, "access_link2", W_OK); + assert_int_equal(rc, EACCES); + print_message(" dfs_access(/access_dir_link2/access_link2, W_OK) = EACCES - PASS\n"); + + rc = dfs_access(dfs_mt, dir_symlink, "access_symlink", W_OK); + assert_int_equal(rc, EACCES); + print_message(" dfs_access(/access_dir_symlink/access_symlink, W_OK) = EACCES - PASS (via " + "symlink)\n"); + + /* + * Test 6: Restore permissions via original file, verify via all links + */ + print_message("\n--- Test 6: Restore permissions, verify access ---\n"); + + /* Restore read-write via original file */ + rc = dfs_chmod(dfs_mt, dir_file, "access_file", S_IRUSR | S_IWUSR); + assert_int_equal(rc, 0); + print_message(" Restored mode to read-write (0600) via /access_dir_file/access_file\n"); + + /* Verify W_OK now succeeds on all */ + rc = dfs_access(dfs_mt, dir_file, "access_file", W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_file/access_file, W_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link1, "access_link1", W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link1/access_link1, W_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_link2, "access_link2", W_OK); + assert_int_equal(rc, 0); + print_message(" dfs_access(/access_dir_link2/access_link2, W_OK) = %d - PASS\n", rc); + + rc = dfs_access(dfs_mt, dir_symlink, "access_symlink", W_OK); + assert_int_equal(rc, 0); + print_message( + " dfs_access(/access_dir_symlink/access_symlink, W_OK) = %d - PASS (via symlink)\n", + rc); + + /* Cleanup */ + rc = dfs_remove(dfs_mt, dir_file, "access_file", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_link1, "access_link1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_link2, "access_link2", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir_symlink, "access_symlink", false, NULL); + assert_int_equal(rc, 0); + + rc = dfs_release(dir_file); + assert_int_equal(rc, 0); + rc = dfs_release(dir_link1); + assert_int_equal(rc, 0); + rc = dfs_release(dir_link2); + assert_int_equal(rc, 0); + rc = dfs_release(dir_symlink); + assert_int_equal(rc, 0); + + rc = dfs_remove(dfs_mt, NULL, "access_dir_file", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "access_dir_link1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "access_dir_link2", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "access_dir_symlink", false, NULL); + assert_int_equal(rc, 0); + + print_message("\nHardlink dfs_access test completed successfully!\n"); +} + +/** + * Test dfs_ostatx with hardlinks. + * + * This test verifies that dfs_ostatx works correctly when: + * 1. A file is converted to a hardlink (another DFS instance scenario) + * 2. A stale handle (opened before hardlink creation) is used + * + * Test steps: + * 1. Create two subdirectories (dir1 and dir2) + * 2. Create a file in dir1 and open two handles to it + * 3. Using handle1, create a hardlink in dir2 (converts file to hardlink) + * 4. Using handle1, stat both files - they should have identical metadata + * 5. Write some data using handle1, stat both files - size should match + * 6. Use handle2 (opened before hardlink) to call dfs_ostatx - should work + */ +static void +dfs_test_hardlink_ostatx(void **state) +{ + test_arg_t *arg = *state; + dfs_obj_t *dir1 = NULL, *dir2 = NULL; + dfs_obj_t *file_handle1 = NULL, *file_handle2 = NULL; + dfs_obj_t *link_handle = NULL; + struct stat stbuf1, stbuf_link, stbuf_handle2; + daos_obj_id_t oid1, oid2, oid_link; + char *write_buf = "Hello, hardlink ostatx test!"; + daos_size_t write_size; + d_sg_list_t sgl; + d_iov_t iov; + daos_event_t ev, *evp; + int rc; + + if (arg->myrank != 0) + return; + + print_message("\n=== Test: dfs_ostatx with hardlinks ===\n"); + + /* + * Step 1: Create two subdirectories + */ + print_message("\nStep 1: Creating two subdirectories...\n"); + rc = dfs_open(dfs_mt, NULL, "ostatx_dir1", S_IFDIR | S_IRWXU, O_RDWR | O_CREAT | O_EXCL, 0, + 0, NULL, &dir1); + assert_int_equal(rc, 0); + print_message(" Created /ostatx_dir1\n"); + + rc = dfs_open(dfs_mt, NULL, "ostatx_dir2", S_IFDIR | S_IRWXU, O_RDWR | O_CREAT | O_EXCL, 0, + 0, NULL, &dir2); + assert_int_equal(rc, 0); + print_message(" Created /ostatx_dir2\n"); + + /* + * Step 2: Create a file in dir1 and open TWO handles to it + */ + print_message("\nStep 2: Creating file and opening two handles...\n"); + + /* First handle - will be used to create hardlink */ + rc = dfs_open(dfs_mt, dir1, "testfile", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_handle1); + assert_int_equal(rc, 0); + rc = dfs_obj2id(file_handle1, &oid1); + assert_int_equal(rc, 0); + print_message(" Created /ostatx_dir1/testfile (handle1)\n"); + + /* Second handle - opened BEFORE hardlink creation (simulates stale handle) */ + rc = dfs_lookup(dfs_mt, "/ostatx_dir1/testfile", O_RDWR, &file_handle2, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(file_handle2, &oid2); + assert_int_equal(rc, 0); + print_message(" Opened second handle to same file (handle2)\n"); + + /* Verify both handles point to the same OID */ + assert_true(oid1.lo == oid2.lo && oid1.hi == oid2.hi); + print_message(" Verified: both handles have same OID\n"); + + /* Stat using handle1 before hardlink - nlink should be 1 */ + rc = dfs_ostat(dfs_mt, file_handle1, &stbuf1); + assert_int_equal(rc, 0); + assert_int_equal(stbuf1.st_nlink, 1); + print_message(" Initial stat via handle1: nlink=%lu, size=%lu\n", + (unsigned long)stbuf1.st_nlink, (unsigned long)stbuf1.st_size); + + /* + * Step 3: Using handle1, create a hardlink in dir2 + * This converts the file to a hardlink (metadata moves to HLM) + */ + print_message("\nStep 3: Creating hardlink using handle1...\n"); + rc = dfs_link(dfs_mt, file_handle1, dir2, "testfile_link", &link_handle, &stbuf_link); + assert_int_equal(rc, 0); + rc = dfs_obj2id(link_handle, &oid_link); + assert_int_equal(rc, 0); + assert_true(oid1.lo == oid_link.lo && oid1.hi == oid_link.hi); + print_message(" Created /ostatx_dir2/testfile_link\n"); + print_message(" Verified: link has same OID as original\n"); + assert_int_equal(stbuf_link.st_nlink, 2); + print_message(" Link stat: nlink=%lu (expected 2)\n", (unsigned long)stbuf_link.st_nlink); + + /* + * Step 4: Using handle1, stat both files - they should have identical data + */ + print_message("\nStep 4: Stat both files via handle1 - verifying identical metadata...\n"); + + rc = dfs_ostat(dfs_mt, file_handle1, &stbuf1); + assert_int_equal(rc, 0); + print_message(" Original file: nlink=%lu, size=%lu, mode=0%o\n", + (unsigned long)stbuf1.st_nlink, (unsigned long)stbuf1.st_size, + stbuf1.st_mode); + + rc = dfs_ostat(dfs_mt, link_handle, &stbuf_link); + assert_int_equal(rc, 0); + print_message(" Link file: nlink=%lu, size=%lu, mode=0%o\n", + (unsigned long)stbuf_link.st_nlink, (unsigned long)stbuf_link.st_size, + stbuf_link.st_mode); + + /* Verify identical metadata */ + assert_int_equal(stbuf1.st_nlink, 2); + assert_int_equal(stbuf_link.st_nlink, 2); + assert_int_equal(stbuf1.st_mode, stbuf_link.st_mode); + assert_int_equal(stbuf1.st_uid, stbuf_link.st_uid); + assert_int_equal(stbuf1.st_gid, stbuf_link.st_gid); + assert_int_equal(stbuf1.st_size, stbuf_link.st_size); + print_message(" Verified: both files have identical metadata\n"); + + /* + * Step 5: Write some data and repeat stat - size should match + * Use dfs_ostatx with event handle to test async path + */ + print_message("\nStep 5: Writing data and verifying size (using dfs_ostatx async)...\n"); + + write_size = strlen(write_buf); + d_iov_set(&iov, write_buf, write_size); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &iov; + + rc = dfs_write(dfs_mt, file_handle1, &sgl, 0, NULL); + assert_int_equal(rc, 0); + print_message(" Wrote %lu bytes via handle1\n", (unsigned long)write_size); + + /* Stat original file using dfs_ostatx with event */ + rc = daos_event_init(&ev, arg->eq, NULL); + assert_rc_equal(rc, 0); + + rc = dfs_ostatx(dfs_mt, file_handle1, &stbuf1, &ev); + assert_int_equal(rc, 0); + + rc = daos_eq_poll(arg->eq, 0, DAOS_EQ_WAIT, 1, &evp); + assert_rc_equal(rc, 1); + assert_ptr_equal(evp, &ev); + assert_int_equal(evp->ev_error, 0); + + rc = daos_event_fini(&ev); + assert_rc_equal(rc, 0); + + print_message(" Original file after write: nlink=%lu, size=%lu\n", + (unsigned long)stbuf1.st_nlink, (unsigned long)stbuf1.st_size); + + /* Stat link file using dfs_ostatx with event */ + rc = daos_event_init(&ev, arg->eq, NULL); + assert_rc_equal(rc, 0); + + rc = dfs_ostatx(dfs_mt, link_handle, &stbuf_link, &ev); + assert_int_equal(rc, 0); + + rc = daos_eq_poll(arg->eq, 0, DAOS_EQ_WAIT, 1, &evp); + assert_rc_equal(rc, 1); + assert_ptr_equal(evp, &ev); + assert_int_equal(evp->ev_error, 0); + + rc = daos_event_fini(&ev); + assert_rc_equal(rc, 0); + + print_message(" Link file after write: nlink=%lu, size=%lu\n", + (unsigned long)stbuf_link.st_nlink, (unsigned long)stbuf_link.st_size); + + /* Verify size is correct on both */ + assert_int_equal(stbuf1.st_size, write_size); + assert_int_equal(stbuf_link.st_size, write_size); + assert_int_equal(stbuf1.st_nlink, 2); + assert_int_equal(stbuf_link.st_nlink, 2); + print_message(" Verified: both files show correct size (%lu bytes)\n", + (unsigned long)write_size); + + /* + * Step 6: Use handle2 (opened BEFORE hardlink creation) to call dfs_ostatx + * This simulates the case where a DFS handle was opened before another + * DFS instance converted the file to a hardlink. + * dfs_ostatx should detect the hardlink bit and fetch from HLM. + * We use an event handle to test the async code path. + */ + print_message("\nStep 6: Using stale handle2 with dfs_ostatx (async)...\n"); + print_message(" (handle2 was opened before hardlink creation)\n"); + + /* Initialize event for async operation */ + rc = daos_event_init(&ev, arg->eq, NULL); + assert_rc_equal(rc, 0); + + rc = dfs_ostatx(dfs_mt, file_handle2, &stbuf_handle2, &ev); + assert_int_equal(rc, 0); + + /* Wait for async completion */ + rc = daos_eq_poll(arg->eq, 0, DAOS_EQ_WAIT, 1, &evp); + assert_rc_equal(rc, 1); + assert_ptr_equal(evp, &ev); + assert_int_equal(evp->ev_error, 0); + + rc = daos_event_fini(&ev); + assert_rc_equal(rc, 0); + + print_message(" dfs_ostatx via handle2: nlink=%lu, size=%lu, mode=0%o\n", + (unsigned long)stbuf_handle2.st_nlink, (unsigned long)stbuf_handle2.st_size, + stbuf_handle2.st_mode); + + /* Verify handle2 sees the same data as handle1 */ + assert_int_equal(stbuf_handle2.st_nlink, 2); + assert_int_equal(stbuf_handle2.st_size, write_size); + assert_int_equal(stbuf_handle2.st_mode, stbuf1.st_mode); + assert_int_equal(stbuf_handle2.st_uid, stbuf1.st_uid); + assert_int_equal(stbuf_handle2.st_gid, stbuf1.st_gid); + print_message(" Verified: handle2 sees identical metadata as handle1\n"); + print_message(" SUCCESS: dfs_ostatx correctly detected hardlink and fetched from HLM\n"); + + /* + * Cleanup + */ + print_message("\nCleaning up...\n"); + + rc = dfs_release(file_handle1); + assert_int_equal(rc, 0); + rc = dfs_release(file_handle2); + assert_int_equal(rc, 0); + rc = dfs_release(link_handle); + assert_int_equal(rc, 0); + + rc = dfs_remove(dfs_mt, dir1, "testfile", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir2, "testfile_link", false, NULL); + assert_int_equal(rc, 0); + + rc = dfs_release(dir1); + assert_int_equal(rc, 0); + rc = dfs_release(dir2); + assert_int_equal(rc, 0); + + rc = dfs_remove(dfs_mt, NULL, "ostatx_dir1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "ostatx_dir2", false, NULL); + assert_int_equal(rc, 0); + + print_message("\nHardlink dfs_ostatx test completed successfully!\n"); +} + +static void +dfs_test_hardlink_osetattr(void **state) +{ + test_arg_t *arg = *state; + dfs_obj_t *dir1 = NULL, *dir2 = NULL; + dfs_obj_t *file_handle1 = NULL, *file_handle2 = NULL; + dfs_obj_t *link_handle = NULL; + struct stat stbuf1, stbuf2, stbuf_link; + daos_obj_id_t oid1, oid2, oid_link; + mode_t new_mode; + uid_t new_uid; + gid_t new_gid; + daos_size_t new_size; + daos_event_t ev, *evp; + int rc; + + if (arg->myrank != 0) + return; + + print_message("\n=== Test: dfs_osetattr with hardlinks ===\n"); + + /* + * Step 1: Create two subdirectories + */ + print_message("\nStep 1: Creating two subdirectories...\n"); + rc = dfs_open(dfs_mt, NULL, "osetattr_dir1", S_IFDIR | S_IRWXU, O_RDWR | O_CREAT | O_EXCL, + 0, 0, NULL, &dir1); + assert_int_equal(rc, 0); + print_message(" Created /osetattr_dir1\n"); + + rc = dfs_open(dfs_mt, NULL, "osetattr_dir2", S_IFDIR | S_IRWXU, O_RDWR | O_CREAT | O_EXCL, + 0, 0, NULL, &dir2); + assert_int_equal(rc, 0); + print_message(" Created /osetattr_dir2\n"); + + /* + * Step 2: Create a file in dir1 (not root) and open TWO handles to it + */ + print_message("\nStep 2: Creating file in subdirectory and opening two handles...\n"); + + /* First handle */ + rc = dfs_open(dfs_mt, dir1, "testfile", S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file_handle1); + assert_int_equal(rc, 0); + rc = dfs_obj2id(file_handle1, &oid1); + assert_int_equal(rc, 0); + print_message(" Created /osetattr_dir1/testfile (handle1)\n"); + + /* Second handle - opened before hardlink creation */ + rc = dfs_lookup(dfs_mt, "/osetattr_dir1/testfile", O_RDWR, &file_handle2, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_obj2id(file_handle2, &oid2); + assert_int_equal(rc, 0); + print_message(" Opened second handle to same file (handle2)\n"); + + /* Verify both handles point to the same OID */ + assert_true(oid1.lo == oid2.lo && oid1.hi == oid2.hi); + print_message(" Verified: both handles have same OID\n"); + + /* + * Step 3: Using handle1, convert to hardlink by creating link in dir2 + */ + print_message("\nStep 3: Converting to hardlink using handle1...\n"); + rc = dfs_link(dfs_mt, file_handle1, dir2, "testfile_link", &link_handle, &stbuf_link); + assert_int_equal(rc, 0); + rc = dfs_obj2id(link_handle, &oid_link); + assert_int_equal(rc, 0); + assert_true(oid1.lo == oid_link.lo && oid1.hi == oid_link.hi); + print_message(" Created /osetattr_dir2/testfile_link\n"); + print_message(" Verified: link has same OID as original\n"); + assert_int_equal(stbuf_link.st_nlink, 2); + print_message(" Link stat: nlink=%lu (expected 2)\n", (unsigned long)stbuf_link.st_nlink); + + /* + * Step 4: Using handle1, set mode, ownership and size using dfs_osetattr() + */ + print_message( + "\nStep 4: Setting mode, ownership and size via dfs_osetattr() using handle1...\n"); + + new_mode = S_IFREG | S_IRWXU | S_IRGRP | S_IXGRP; /* rwxr-x--- = 0750 */ + new_uid = 1001; + new_gid = 2002; + new_size = 4096; /* Set size to 4K */ + + memset(&stbuf1, 0, sizeof(stbuf1)); + stbuf1.st_mode = new_mode; + stbuf1.st_uid = new_uid; + stbuf1.st_gid = new_gid; + stbuf1.st_size = new_size; + + print_message(" Setting mode=0%o, uid=%u, gid=%u, size=%lu\n", new_mode & ~S_IFMT, new_uid, + new_gid, (unsigned long)new_size); + + rc = dfs_osetattr(dfs_mt, file_handle1, &stbuf1, + DFS_SET_ATTR_MODE | DFS_SET_ATTR_UID | DFS_SET_ATTR_GID | + DFS_SET_ATTR_SIZE); + assert_int_equal(rc, 0); + print_message(" dfs_osetattr() completed successfully\n"); + + /* + * Step 5: Call dfs_ostat() using handle1 and verify stat values are correct + */ + print_message("\nStep 5: Verifying attributes via dfs_ostat() using handle1...\n"); + + memset(&stbuf1, 0, sizeof(stbuf1)); + rc = dfs_ostat(dfs_mt, file_handle1, &stbuf1); + assert_int_equal(rc, 0); + + print_message(" handle1 stat: mode=0%o, uid=%u, gid=%u, size=%lu, nlink=%lu\n", + stbuf1.st_mode & ~S_IFMT, stbuf1.st_uid, stbuf1.st_gid, + (unsigned long)stbuf1.st_size, (unsigned long)stbuf1.st_nlink); + + assert_int_equal(stbuf1.st_mode, new_mode); + assert_int_equal(stbuf1.st_uid, new_uid); + assert_int_equal(stbuf1.st_gid, new_gid); + assert_int_equal(stbuf1.st_size, new_size); + assert_int_equal(stbuf1.st_nlink, 2); + print_message(" Verified: all attributes are correctly set via handle1\n"); + + /* + * Step 6: Call dfs_ostatx() using handle2 (opened before hardlink creation) + * The output should be the same as handle1 + */ + print_message("\nStep 6: Verifying attributes via dfs_ostatx() using handle2...\n"); + print_message(" (handle2 was opened before hardlink creation)\n"); + + /* Initialize event for async operation */ + rc = daos_event_init(&ev, arg->eq, NULL); + assert_rc_equal(rc, 0); + + memset(&stbuf2, 0, sizeof(stbuf2)); + rc = dfs_ostatx(dfs_mt, file_handle2, &stbuf2, &ev); + assert_int_equal(rc, 0); + + /* Wait for async completion */ + rc = daos_eq_poll(arg->eq, 0, DAOS_EQ_WAIT, 1, &evp); + assert_rc_equal(rc, 1); + assert_ptr_equal(evp, &ev); + assert_int_equal(evp->ev_error, 0); + + rc = daos_event_fini(&ev); + assert_rc_equal(rc, 0); + + print_message(" handle2 stat: mode=0%o, uid=%u, gid=%u, size=%lu, nlink=%lu\n", + stbuf2.st_mode & ~S_IFMT, stbuf2.st_uid, stbuf2.st_gid, + (unsigned long)stbuf2.st_size, (unsigned long)stbuf2.st_nlink); + + /* Verify handle2 sees the same data as handle1 */ + assert_int_equal(stbuf2.st_mode, new_mode); + assert_int_equal(stbuf2.st_uid, new_uid); + assert_int_equal(stbuf2.st_gid, new_gid); + assert_int_equal(stbuf2.st_size, new_size); + assert_int_equal(stbuf2.st_nlink, 2); + assert_int_equal(stbuf2.st_ino, stbuf1.st_ino); + print_message(" Verified: handle2 (via dfs_ostatx) sees identical metadata as handle1\n"); + print_message( + " SUCCESS: dfs_osetattr changes visible through stale handle via dfs_ostatx\n"); + + /* + * Cleanup + */ + print_message("\nCleaning up...\n"); + + rc = dfs_release(file_handle1); + assert_int_equal(rc, 0); + rc = dfs_release(file_handle2); + assert_int_equal(rc, 0); + rc = dfs_release(link_handle); + assert_int_equal(rc, 0); + + rc = dfs_remove(dfs_mt, dir1, "testfile", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, dir2, "testfile_link", false, NULL); + assert_int_equal(rc, 0); + + rc = dfs_release(dir1); + assert_int_equal(rc, 0); + rc = dfs_release(dir2); + assert_int_equal(rc, 0); + + rc = dfs_remove(dfs_mt, NULL, "osetattr_dir1", false, NULL); + assert_int_equal(rc, 0); + rc = dfs_remove(dfs_mt, NULL, "osetattr_dir2", false, NULL); + assert_int_equal(rc, 0); + + print_message("\nHardlink dfs_osetattr test completed successfully!\n"); +} + +/** + * Test dfs_cont_check with HLM (Hardlink Metadata) inconsistencies. + * + * This test covers five scenarios: + * + * SCENARIO 1: Orphan HLM entries + * - Creates a file with a hardlink (which creates an HLM entry) + * - Punches ALL directory entries using low-level DAOS APIs + * - HLM entry becomes orphaned (no directory entries point to the OID) + * - Verifies PRINT mode reports the orphan + * - Verifies REMOVE mode removes HLM entry and punches the object + * - Verifies RELINK mode restores file in lost+found with correct nlink=1 + * - Additional test: Empty file orphan - RELINK deletes HLM entry instead of relinking + * + * SCENARIO 2: HLM link count mismatch + * - Creates a file with a hardlink (nlink=2 stored in HLM) + * - Punches only ONE directory entry using low-level DAOS APIs + * - HLM has link count=2 but only 1 dentry exists + * - Verifies PRINT mode reports the mismatch (stored=2, cur=1) + * - Verifies RELINK mode fixes link count from 2 to 1 + * + * SCENARIO 3: Missing hardlink bit in directory entry + * - Creates a file with mode 555, then creates a hardlink + * - Changes mode to 500 via the hardlink + * - Clears the hardlink bit from one dentry using low-level DAOS APIs + * - Verifies PRINT mode reports the missing hardlink bit + * - Verifies RELINK mode restores the hardlink bit + * - Verifies both files show mode 500 after repair + * + * SCENARIO 4: Spurious hardlink bit on regular file + * - Creates a regular file (not a hardlink) + * - Sets the hardlink bit on the dentry using low-level DAOS APIs + * - Verifies dfs_osetattr() fails on the corrupted file + * - Verifies PRINT mode reports the spurious hardlink bit + * - Verifies RELINK mode clears the spurious hardlink bit + * - Verifies dfs_osetattr() succeeds after repair + * + * SCENARIO 5: Hardlink bit on directory or symlink + * - Creates a directory and a symlink + * - Sets the hardlink bit on both using low-level DAOS APIs + * - Verifies PRINT mode reports the spurious hardlink bit + * - Verifies RELINK mode clears the hardlink bit from both + * - Fetches dentries directly and verifies hardlink bit is cleared + */ +static void +dfs_test_checker_hlm(void **state) +{ + test_arg_t *arg = *state; + dfs_t *dfs; + dfs_obj_t *dir, *file1, *file2; + daos_obj_id_t file_oid, dir_oid; + daos_handle_t coh; + struct stat stbuf; + uint64_t nr_oids = 0; + char *cname = "cont_chkr_hlm"; + int rc; + + if (arg->myrank != 0) + return; + + print_message("Testing dfs_cont_check with HLM inconsistencies...\n"); + + /* + * ========================================================================== + * SCENARIO 1: Orphan HLM entries (both directory entries punched) + * ========================================================================== + */ + print_message("\n=== SCENARIO 1: Orphan HLM entries ===\n"); + + /* + * Part 1.1: Setup - Create container, file with hardlink + */ + print_message("Creating container and file with hardlink...\n"); + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_CREAT | O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + /* Create a directory */ + rc = dfs_open(dfs, NULL, "hlm_test_dir", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir); + assert_int_equal(rc, 0); + + /* Create a file */ + rc = dfs_open(dfs, dir, "testfile", S_IFREG | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT | O_EXCL, + 0, 0, NULL, &file1); + assert_int_equal(rc, 0); + + /* Write some data to the file */ + { + d_sg_list_t sgl; + d_iov_t iov; + char *buf; + + D_ALLOC(buf, 1024); + assert_non_null(buf); + memset(buf, 'A', 1024); + sgl.sg_nr = 1; + sgl.sg_nr_out = 1; + sgl.sg_iovs = &iov; + d_iov_set(&iov, buf, 1024); + rc = dfs_write(dfs, file1, &sgl, 0, NULL); + assert_int_equal(rc, 0); + D_FREE(buf); + print_message(" Wrote 1024 bytes of data to file\n"); + } + + /* Get the file OID */ + rc = dfs_obj2id(file1, &file_oid); + assert_int_equal(rc, 0); + + /* Create a hardlink - this creates the HLM entry */ + rc = dfs_link(dfs, file1, dir, "testfile_link", &file2, &stbuf); + assert_int_equal(rc, 0); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" Created file " DF_OID " with nlink=2\n", DP_OID(file_oid)); + + /* Get the directory OID for later use */ + rc = dfs_obj2id(dir, &dir_oid); + assert_int_equal(rc, 0); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(file2); + assert_int_equal(rc, 0); + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + /** have to call fini to release the cached container handle for the checker to work */ + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* + * Part 1.2: Corrupt - Punch all directory entries using low-level API + * This leaves the HLM entry orphaned + */ + print_message("Punching all directory entries (leaving HLM orphaned)...\n"); + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_handle_t dir_oh; + d_iov_t dkey; + + /* Punch the file entries from the directory object */ + rc = daos_obj_open(coh, dir_oid, DAOS_OO_RW, &dir_oh, NULL); + assert_rc_equal(rc, 0); + + /* Punch "testfile" entry */ + d_iov_set(&dkey, "testfile", strlen("testfile")); + rc = daos_obj_punch_dkeys(dir_oh, DAOS_TX_NONE, DAOS_COND_PUNCH, 1, &dkey, NULL); + assert_rc_equal(rc, 0); + + /* Punch "testfile_link" entry */ + d_iov_set(&dkey, "testfile_link", strlen("testfile_link")); + rc = daos_obj_punch_dkeys(dir_oh, DAOS_TX_NONE, DAOS_COND_PUNCH, 1, &dkey, NULL); + assert_rc_equal(rc, 0); + + rc = daos_obj_close(dir_oh, NULL); + assert_rc_equal(rc, 0); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + /* + * Part 1.3: Test PRINT mode - should report orphan HLM entry + */ + print_message("Testing PRINT mode - should report orphan...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT, NULL); + assert_int_equal(rc, 0); + + /* Objects should still exist (PRINT doesn't modify anything) */ + get_nr_oids(arg->pool.poh, cname, &nr_oids); + print_message(" Number of OIDs after PRINT: %lu\n", (unsigned long)nr_oids); + /* Should be: SB + root + dir + file + HLM = 5 (HLM is a reserved object) */ + assert_int_equal(nr_oids, 5); + + /* + * Part 1.4: Test REMOVE mode - should remove HLM entry and punch object + */ + print_message("Testing REMOVE mode - should remove orphan HLM and object...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT | DFS_CHECK_REMOVE, NULL); + assert_int_equal(rc, 0); + + /* File object should be removed (unmarked OID gets punched) */ + get_nr_oids(arg->pool.poh, cname, &nr_oids); + print_message(" Number of OIDs after REMOVE: %lu\n", (unsigned long)nr_oids); + /* Should be: SB + root + dir + HLM = 4 (file is gone) */ + assert_int_equal(nr_oids, 4); + + /* + * Part 1.5: Recreate corruption and test RELINK mode + */ + print_message("Recreating file with hardlink for RELINK test...\n"); + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + /* Create a new file with hardlink */ + rc = dfs_lookup(dfs, "/hlm_test_dir", O_RDWR, &dir, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_open(dfs, dir, "testfile2", S_IFREG | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT | O_EXCL, + 0, 0, NULL, &file1); + assert_int_equal(rc, 0); + + /* Write some data to the file */ + { + d_sg_list_t sgl; + d_iov_t iov; + char *buf; + + D_ALLOC(buf, 1024); + assert_non_null(buf); + memset(buf, 'A', 1024); + sgl.sg_nr = 1; + sgl.sg_nr_out = 1; + sgl.sg_iovs = &iov; + d_iov_set(&iov, buf, 1024); + rc = dfs_write(dfs, file1, &sgl, 0, NULL); + assert_int_equal(rc, 0); + D_FREE(buf); + print_message(" Wrote 1024 bytes of data to file\n"); + } + + rc = dfs_obj2id(file1, &file_oid); + assert_int_equal(rc, 0); + + rc = dfs_link(dfs, file1, dir, "testfile2_link", &file2, &stbuf); + assert_int_equal(rc, 0); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" Created file " DF_OID " with nlink=2\n", DP_OID(file_oid)); + + /* Get dir_oid before disconnecting */ + rc = dfs_obj2id(dir, &dir_oid); + assert_int_equal(rc, 0); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(file2); + assert_int_equal(rc, 0); + rc = dfs_release(dir); + assert_int_equal(rc, 0); + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* Punch directory entries again */ + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_handle_t dir_oh; + d_iov_t dkey; + + rc = daos_obj_open(coh, dir_oid, DAOS_OO_RW, &dir_oh, NULL); + assert_rc_equal(rc, 0); + + d_iov_set(&dkey, "testfile2", strlen("testfile2")); + rc = daos_obj_punch_dkeys(dir_oh, DAOS_TX_NONE, DAOS_COND_PUNCH, 1, &dkey, NULL); + assert_rc_equal(rc, 0); + + d_iov_set(&dkey, "testfile2_link", strlen("testfile2_link")); + rc = daos_obj_punch_dkeys(dir_oh, DAOS_TX_NONE, DAOS_COND_PUNCH, 1, &dkey, NULL); + assert_rc_equal(rc, 0); + + rc = daos_obj_close(dir_oh, NULL); + assert_rc_equal(rc, 0); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + print_message("Testing RELINK mode - should restore file in lost+found...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT | DFS_CHECK_RELINK, "lf_orphan"); + assert_int_equal(rc, 0); + + /* Verify file is in lost+found */ + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + { + char fpath[128]; + + /* Construct expected path in lost+found */ + sprintf(fpath, "/lost+found/lf_orphan/%" PRIu64 ".%" PRIu64 "", file_oid.hi, + file_oid.lo); + + print_message(" Looking for restored file at %s\n", fpath); + rc = dfs_lookup(dfs, fpath, O_RDONLY, &file1, NULL, &stbuf); + assert_int_equal(rc, 0); + + /* Verify it's a regular file with the hardlink bit set and nlink=1 */ + assert_true(S_ISREG(stbuf.st_mode)); + assert_int_equal(stbuf.st_nlink, 1); + print_message(" Found file with nlink=%lu\n", (unsigned long)stbuf.st_nlink); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + } + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* Cleanup scenario 1 - destroy container */ + rc = daos_cont_destroy(arg->pool.poh, cname, 1, NULL); + assert_rc_equal(rc, 0); + + /* + * Part 1.5: RELINK with empty file - should delete HLM entry instead of relinking + */ + print_message( + "\n--- Scenario 1b: Empty orphan file - RELINK should delete HLM entry ---\n"); + + /* Create container with empty file (no data) that has a hardlink */ + print_message("Creating container with empty hardlinked file...\n"); + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_CREAT | O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + /* Create directory */ + rc = dfs_open(dfs, NULL, "hlm_test_dir", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir); + assert_int_equal(rc, 0); + + /* Create empty file (no data written) */ + rc = dfs_open(dfs, dir, "empty_file", S_IFREG | 0644, O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, + &file1); + assert_int_equal(rc, 0); + + /* Create a hardlink to make it an HLM entry (link count > 1) */ + rc = dfs_link(dfs, file1, dir, "empty_file_link", &file2, NULL); + assert_int_equal(rc, 0); + print_message(" Created empty file with hardlink (nlink=2)\n"); + + /* Get the file OID */ + rc = dfs_obj2id(file1, &file_oid); + assert_int_equal(rc, 0); + rc = dfs_obj2id(dir, &dir_oid); + assert_int_equal(rc, 0); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(file2); + assert_int_equal(rc, 0); + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* Punch ALL directory entries to make HLM entry orphaned */ + print_message("Punching all directory entries to orphan the HLM entry...\n"); + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_handle_t dir_oh; + d_iov_t dkey; + + rc = daos_obj_open(coh, dir_oid, DAOS_OO_RW, &dir_oh, NULL); + assert_rc_equal(rc, 0); + + d_iov_set(&dkey, "empty_file", strlen("empty_file")); + rc = daos_obj_punch_dkeys(dir_oh, DAOS_TX_NONE, DAOS_COND_PUNCH, 1, &dkey, NULL); + assert_rc_equal(rc, 0); + + d_iov_set(&dkey, "empty_file_link", strlen("empty_file_link")); + rc = daos_obj_punch_dkeys(dir_oh, DAOS_TX_NONE, DAOS_COND_PUNCH, 1, &dkey, NULL); + assert_rc_equal(rc, 0); + + rc = daos_obj_close(dir_oh, NULL); + assert_rc_equal(rc, 0); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + /* Run RELINK mode - should delete the HLM entry (file is empty, not worth relinking) */ + print_message("Testing RELINK mode - should delete HLM entry for empty file...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT | DFS_CHECK_RELINK, "lf_empty"); + assert_int_equal(rc, 0); + + /* Verify HLM object has no dkeys by querying container roots and listing dkeys */ + print_message("Verifying HLM object has no entries...\n"); + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_prop_t *prop = NULL; + struct daos_prop_entry *entry; + struct daos_prop_co_roots *roots; + daos_obj_id_t hlm_oid; + daos_handle_t hlm_oh; + daos_anchor_t anchor = {0}; + uint32_t nr_dkeys; + daos_key_desc_t kds[1]; + d_sg_list_t sgl; + d_iov_t iov; + char dkey_buf[64]; + + /* Get HLM OID from container roots */ + prop = daos_prop_alloc(1); + assert_non_null(prop); + prop->dpp_entries[0].dpe_type = DAOS_PROP_CO_ROOTS; + rc = daos_cont_query(coh, NULL, prop, NULL); + assert_rc_equal(rc, 0); + entry = daos_prop_entry_get(prop, DAOS_PROP_CO_ROOTS); + assert_non_null(entry); + roots = (struct daos_prop_co_roots *)entry->dpe_val_ptr; + hlm_oid = roots->cr_oids[2]; + print_message(" HLM OID: " DF_OID "\n", DP_OID(hlm_oid)); + + /* Open HLM object and list dkeys */ + rc = daos_obj_open(coh, hlm_oid, DAOS_OO_RO, &hlm_oh, NULL); + assert_rc_equal(rc, 0); + + d_iov_set(&iov, dkey_buf, sizeof(dkey_buf)); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &iov; + nr_dkeys = 1; + rc = daos_obj_list_dkey(hlm_oh, DAOS_TX_NONE, &nr_dkeys, kds, &sgl, &anchor, NULL); + /* rc might be 0 or -DER_NONEXIST if no dkeys */ + if (rc == 0 && nr_dkeys == 0) { + print_message(" HLM object has no entries (as expected)\n"); + } else if (rc == -DER_NONEXIST) { + print_message(" HLM object has no entries (DER_NONEXIST)\n"); + rc = 0; + } else { + print_message(" ERROR: HLM object has %u entries (expected 0)\n", + nr_dkeys); + assert_int_equal(nr_dkeys, 0); + } + + rc = daos_obj_close(hlm_oh, NULL); + assert_rc_equal(rc, 0); + + daos_prop_free(prop); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + /* Cleanup scenario 1b - destroy container */ + rc = daos_cont_destroy(arg->pool.poh, cname, 1, NULL); + assert_rc_equal(rc, 0); + + print_message("Scenario 1 (orphan HLM) completed successfully!\n"); + + /* + * ========================================================================== + * SCENARIO 2: HLM link count mismatch (one directory entry punched) + * ========================================================================== + */ + print_message("\n=== SCENARIO 2: HLM link count mismatch ===\n"); + + /* + * Part 2.1: Setup - Create container, file with 2 hardlinks + */ + print_message("Creating container and file with 2 hardlinks...\n"); + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_CREAT | O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + /* Create a directory */ + rc = dfs_open(dfs, NULL, "hlm_test_dir", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir); + assert_int_equal(rc, 0); + + /* Create a file */ + rc = dfs_open(dfs, dir, "testfile", S_IFREG | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT | O_EXCL, + 0, 0, NULL, &file1); + assert_int_equal(rc, 0); + + /* Write some data to the file */ + { + d_sg_list_t sgl; + d_iov_t iov; + char *buf; + + D_ALLOC(buf, 1024); + assert_non_null(buf); + memset(buf, 'B', 1024); + sgl.sg_nr = 1; + sgl.sg_nr_out = 1; + sgl.sg_iovs = &iov; + d_iov_set(&iov, buf, 1024); + rc = dfs_write(dfs, file1, &sgl, 0, NULL); + assert_int_equal(rc, 0); + D_FREE(buf); + print_message(" Wrote 1024 bytes of data to file\n"); + } + + /* Get the file OID */ + rc = dfs_obj2id(file1, &file_oid); + assert_int_equal(rc, 0); + + /* Create a hardlink - this creates the HLM entry with link count = 2 */ + rc = dfs_link(dfs, file1, dir, "testfile_link", &file2, &stbuf); + assert_int_equal(rc, 0); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" Created file " DF_OID " with nlink=2\n", DP_OID(file_oid)); + + /* Get the directory OID for later use */ + rc = dfs_obj2id(dir, &dir_oid); + assert_int_equal(rc, 0); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(file2); + assert_int_equal(rc, 0); + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* + * Part 2.2: Corrupt - Punch only ONE directory entry using low-level API + * This leaves the file with 1 dentry but HLM has link count = 2 + */ + print_message("Punching one directory entry (creating link count mismatch)...\n"); + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_handle_t dir_oh; + d_iov_t dkey; + + /* Punch the file entries from the directory object */ + rc = daos_obj_open(coh, dir_oid, DAOS_OO_RW, &dir_oh, NULL); + assert_rc_equal(rc, 0); + + /* Punch only "testfile_link" entry - keep "testfile" */ + d_iov_set(&dkey, "testfile_link", strlen("testfile_link")); + rc = daos_obj_punch_dkeys(dir_oh, DAOS_TX_NONE, DAOS_COND_PUNCH, 1, &dkey, NULL); + assert_rc_equal(rc, 0); + + rc = daos_obj_close(dir_oh, NULL); + assert_rc_equal(rc, 0); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + /* + * Part 2.3: Test PRINT mode - should report link count mismatch (stored=2, cur=1) + */ + print_message("Testing PRINT mode - should report link count mismatch...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT, NULL); + assert_int_equal(rc, 0); + + /* Verify the file is still accessible and has wrong nlink */ + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + rc = dfs_lookup(dfs, "/hlm_test_dir/testfile", O_RDONLY, &file1, NULL, &stbuf); + assert_int_equal(rc, 0); + /* nlink should still be 2 because PRINT doesn't fix anything */ + print_message(" File nlink after PRINT mode: %lu (expected 2, unfixed)\n", + (unsigned long)stbuf.st_nlink); + assert_int_equal(stbuf.st_nlink, 2); + rc = dfs_release(file1); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* + * Part 2.4: Test RELINK mode - should fix link count from 2 to 1 + */ + print_message("Testing RELINK mode - should fix link count to 1...\n"); + rc = + dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT | DFS_CHECK_RELINK, "lf_mismatch"); + assert_int_equal(rc, 0); + + /* Verify the file now has correct nlink = 1 */ + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + rc = dfs_lookup(dfs, "/hlm_test_dir/testfile", O_RDONLY, &file1, NULL, &stbuf); + assert_int_equal(rc, 0); + /* nlink should now be 1 because RELINK fixed the link count */ + print_message(" File nlink after RELINK mode: %lu (expected 1, fixed)\n", + (unsigned long)stbuf.st_nlink); + assert_int_equal(stbuf.st_nlink, 1); + rc = dfs_release(file1); + assert_int_equal(rc, 0); + + /* + * Verify that lost+found/lf_mismatch directory is empty since + * no files were orphaned - only link count was fixed. + * Note: dfs_cont_check creates the directory regardless, so we check it's empty. + */ + rc = dfs_lookup(dfs, "/lost+found/lf_mismatch", O_RDONLY, &dir, NULL, NULL); + assert_int_equal(rc, 0); + { + daos_anchor_t anchor = {0}; + uint32_t nr = 1; + struct dirent ents[1]; + + rc = dfs_readdir(dfs, dir, &anchor, &nr, ents); + assert_int_equal(rc, 0); + if (nr == 0) { + print_message(" /lost+found/lf_mismatch is empty (as expected)\n"); + } else { + print_message( + " ERROR: /lost+found/lf_mismatch has entries (expected 0)\n"); + assert_int_equal(nr, 0); + } + } + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* Cleanup scenario 2 - destroy container */ + rc = daos_cont_destroy(arg->pool.poh, cname, 1, NULL); + assert_rc_equal(rc, 0); + + print_message("Scenario 2 (link count mismatch) completed successfully!\n"); + + /* + * ========================================================================== + * SCENARIO 3: Missing hardlink bit in directory entry + * ========================================================================== + */ + print_message("\n=== SCENARIO 3: Missing hardlink bit in dentry ===\n"); + + /* + * Part 3.1: Setup - Create container, file with mode 555, then create hardlink + */ + print_message("Creating container and file with mode 555...\n"); + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_CREAT | O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + /* Create a directory */ + rc = dfs_open(dfs, NULL, "hlm_test_dir", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir); + assert_int_equal(rc, 0); + + /* Create a file with mode 555 */ + rc = dfs_open(dfs, dir, "testfile", S_IFREG | 0555, O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, + &file1); + assert_int_equal(rc, 0); + + /* Write some data to the file */ + { + d_sg_list_t sgl; + d_iov_t iov; + char *buf; + + D_ALLOC(buf, 1024); + assert_non_null(buf); + memset(buf, 'C', 1024); + sgl.sg_nr = 1; + sgl.sg_nr_out = 1; + sgl.sg_iovs = &iov; + d_iov_set(&iov, buf, 1024); + rc = dfs_write(dfs, file1, &sgl, 0, NULL); + assert_int_equal(rc, 0); + D_FREE(buf); + print_message(" Wrote 1024 bytes of data to file\n"); + } + + /* Verify initial mode is 555 */ + rc = dfs_stat(dfs, dir, "testfile", &stbuf); + assert_int_equal(rc, 0); + print_message(" Initial file mode: 0%o\n", stbuf.st_mode & 0777); + assert_int_equal(stbuf.st_mode & 0777, 0555); + + /* Get the file OID */ + rc = dfs_obj2id(file1, &file_oid); + assert_int_equal(rc, 0); + + /* Create a hardlink */ + rc = dfs_link(dfs, file1, dir, "testfile_link", &file2, &stbuf); + assert_int_equal(rc, 0); + assert_int_equal(stbuf.st_nlink, 2); + print_message(" Created hardlink with nlink=2\n"); + + /* Change mode to 500 via the second handle */ + stbuf.st_mode = S_IFREG | 0500; + rc = dfs_osetattr(dfs, file2, &stbuf, DFS_SET_ATTR_MODE); + assert_int_equal(rc, 0); + print_message(" Changed mode to 500 via hardlink\n"); + + /* Verify both files now show mode 500 */ + rc = dfs_stat(dfs, dir, "testfile", &stbuf); + assert_int_equal(rc, 0); + print_message(" file1 mode after chmod: 0%o\n", stbuf.st_mode & 0777); + assert_int_equal(stbuf.st_mode & 0777, 0500); + + rc = dfs_stat(dfs, dir, "testfile_link", &stbuf); + assert_int_equal(rc, 0); + print_message(" file2 mode after chmod: 0%o\n", stbuf.st_mode & 0777); + assert_int_equal(stbuf.st_mode & 0777, 0500); + + /* Get the directory OID for later use */ + rc = dfs_obj2id(dir, &dir_oid); + assert_int_equal(rc, 0); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(file2); + assert_int_equal(rc, 0); + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* + * Part 3.2: Corrupt - Clear the hardlink bit from testfile's dentry + */ + print_message("Clearing hardlink bit from testfile's dentry...\n"); + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_handle_t dir_oh; + d_iov_t dkey; + d_iov_t akey; + d_sg_list_t sgl; + d_iov_t iov; + daos_iod_t iod; + daos_recx_t recx; + mode_t mode; + + /* Open the directory object */ + rc = daos_obj_open(coh, dir_oid, DAOS_OO_RW, &dir_oh, NULL); + assert_rc_equal(rc, 0); + + /* Fetch the current mode */ + d_iov_set(&dkey, "testfile", strlen("testfile")); + d_iov_set(&akey, "DFS_INODE", strlen("DFS_INODE")); + d_iov_set(&iov, &mode, sizeof(mode_t)); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &iov; + recx.rx_idx = 0; /* MODE_IDX = 0 */ + recx.rx_nr = sizeof(mode_t); + iod.iod_name = akey; + iod.iod_nr = 1; + iod.iod_recxs = &recx; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + rc = daos_obj_fetch(dir_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL, NULL); + assert_rc_equal(rc, 0); + print_message(" Fetched mode from dentry: 0x%x\n", mode); + + /* Clear the hardlink bit (MODE_HARDLINK_BIT = 1U << 31) */ + mode &= ~(1U << 31); + print_message(" Mode after clearing hardlink bit: 0x%x\n", mode); + + /* Update the mode */ + rc = daos_obj_update(dir_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL); + assert_rc_equal(rc, 0); + print_message(" Updated dentry with cleared hardlink bit\n"); + + rc = daos_obj_close(dir_oh, NULL); + assert_rc_equal(rc, 0); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + /* + * Part 3.3: Test PRINT mode - should report missing hardlink bit + */ + print_message("Testing PRINT mode - should report missing hardlink bit...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT, NULL); + assert_int_equal(rc, 0); + + /* Verify the file still works but has inconsistent state */ + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + rc = dfs_lookup(dfs, "/hlm_test_dir", O_RDWR, &dir, NULL, NULL); + assert_int_equal(rc, 0); + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* + * Part 3.4: Test RELINK mode - should fix the hardlink bit + */ + print_message("Testing RELINK mode - should fix hardlink bit...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT | DFS_CHECK_RELINK, "lf_hlbit"); + assert_int_equal(rc, 0); + + /* Verify the hardlink bit is restored by fetching the dentry mode directly */ + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_handle_t dir_oh; + d_iov_t dkey; + d_iov_t akey; + d_sg_list_t sgl; + d_iov_t iov; + daos_iod_t iod; + daos_recx_t recx; + mode_t mode; + + /* Open the directory object */ + rc = daos_obj_open(coh, dir_oid, DAOS_OO_RO, &dir_oh, NULL); + assert_rc_equal(rc, 0); + + /* Fetch the mode */ + d_iov_set(&dkey, "testfile", strlen("testfile")); + d_iov_set(&akey, "DFS_INODE", strlen("DFS_INODE")); + d_iov_set(&iov, &mode, sizeof(mode_t)); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &iov; + recx.rx_idx = 0; /* MODE_IDX = 0 */ + recx.rx_nr = sizeof(mode_t); + iod.iod_name = akey; + iod.iod_nr = 1; + iod.iod_recxs = &recx; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + rc = daos_obj_fetch(dir_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL, NULL); + assert_rc_equal(rc, 0); + print_message(" Dentry mode after RELINK: 0x%x\n", mode); + /* Verify hardlink bit is set (MODE_HARDLINK_BIT = 1U << 31) */ + assert_true((mode & (1U << 31)) != 0); + print_message(" Hardlink bit is now set!\n"); + + rc = daos_obj_close(dir_oh, NULL); + assert_rc_equal(rc, 0); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + /* + * Part 3.5: Verify both files show mode 500 via DFS APIs + */ + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + rc = dfs_lookup(dfs, "/hlm_test_dir", O_RDWR, &dir, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_stat(dfs, dir, "testfile", &stbuf); + assert_int_equal(rc, 0); + print_message(" file1 mode after repair: 0%o (expected 0500)\n", stbuf.st_mode & 0777); + assert_int_equal(stbuf.st_mode & 0777, 0500); + + rc = dfs_stat(dfs, dir, "testfile_link", &stbuf); + assert_int_equal(rc, 0); + print_message(" file2 mode after repair: 0%o (expected 0500)\n", stbuf.st_mode & 0777); + assert_int_equal(stbuf.st_mode & 0777, 0500); + + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* Cleanup scenario 3 - destroy container */ + rc = daos_cont_destroy(arg->pool.poh, cname, 1, NULL); + assert_rc_equal(rc, 0); + + print_message("Scenario 3 (missing hardlink bit) completed successfully!\n"); + +#if 0 /* REVISIT */ + /* + * ========================================================================== + * SCENARIO 4: Spurious hardlink bit on non-hardlink file + * ========================================================================== + */ + print_message("\n=== SCENARIO 4: Spurious hardlink bit on regular file ===\n"); + + /* + * Part 4.1: Setup - Create container and a regular file (no hardlink) + */ + print_message("Creating container and regular file (no hardlink)...\n"); + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_CREAT | O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + /* Create a directory */ + rc = dfs_open(dfs, NULL, "hlm_test_dir", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir); + assert_int_equal(rc, 0); + + /* Create a regular file (NOT a hardlink) */ + rc = dfs_open(dfs, dir, "testfile", S_IFREG | 0644, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &file1); + assert_int_equal(rc, 0); + + /* Write some data to the file */ + { + d_sg_list_t sgl; + d_iov_t iov; + char *buf; + + D_ALLOC(buf, 1024); + assert_non_null(buf); + memset(buf, 'D', 1024); + sgl.sg_nr = 1; + sgl.sg_nr_out = 1; + sgl.sg_iovs = &iov; + d_iov_set(&iov, buf, 1024); + rc = dfs_write(dfs, file1, &sgl, 0, NULL); + assert_int_equal(rc, 0); + D_FREE(buf); + print_message(" Wrote 1024 bytes of data to file\n"); + } + + /* Get the directory OID for later use */ + rc = dfs_obj2id(dir, &dir_oid); + assert_int_equal(rc, 0); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* + * Part 4.2: Corrupt - Set the hardlink bit on the dentry + */ + print_message("Setting hardlink bit on regular file's dentry...\n"); + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_handle_t dir_oh; + d_iov_t dkey; + d_iov_t akey; + d_sg_list_t sgl; + d_iov_t iov; + daos_iod_t iod; + daos_recx_t recx; + mode_t mode; + + /* Open the directory object */ + rc = daos_obj_open(coh, dir_oid, DAOS_OO_RW, &dir_oh, NULL); + assert_rc_equal(rc, 0); + + /* Fetch the current mode */ + d_iov_set(&dkey, "testfile", strlen("testfile")); + d_iov_set(&akey, "DFS_INODE", strlen("DFS_INODE")); + d_iov_set(&iov, &mode, sizeof(mode_t)); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &iov; + recx.rx_idx = 0; /* MODE_IDX = 0 */ + recx.rx_nr = sizeof(mode_t); + iod.iod_name = akey; + iod.iod_nr = 1; + iod.iod_recxs = &recx; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + rc = daos_obj_fetch(dir_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL, NULL); + assert_rc_equal(rc, 0); + print_message(" Fetched mode from dentry: 0x%x\n", mode); + + /* Set the hardlink bit (MODE_HARDLINK_BIT = 1U << 31) */ + mode |= (1U << 31); + print_message(" Mode after setting hardlink bit: 0x%x\n", mode); + + /* Update the mode */ + rc = daos_obj_update(dir_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL); + assert_rc_equal(rc, 0); + print_message(" Updated dentry with spurious hardlink bit\n"); + + rc = daos_obj_close(dir_oh, NULL); + assert_rc_equal(rc, 0); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + /* + * Part 4.3: Verify dfs_osetattr fails on corrupted file + */ + print_message("Verifying dfs_osetattr fails on corrupted file...\n"); + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + rc = dfs_lookup(dfs, "/hlm_test_dir", O_RDWR, &dir, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_lookup_rel(dfs, dir, "testfile", O_RDWR, &file1, NULL, NULL); + assert_int_equal(rc, 0); + + /* Try to set mode - should fail because file claims to be hardlink but no HLM entry */ + stbuf.st_mode = S_IFREG | 0600; + rc = dfs_osetattr(dfs, file1, &stbuf, DFS_SET_ATTR_MODE); + print_message(" dfs_osetattr returned: %d (expected non-zero failure)\n", rc); + assert_int_not_equal(rc, 0); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* + * Part 4.4: Test PRINT mode - should report spurious hardlink bit + */ + print_message("Testing PRINT mode - should report spurious hardlink bit...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT, NULL); + assert_int_equal(rc, 0); + + /* + * Part 4.5: Test RELINK mode - should clear the hardlink bit + */ + print_message("Testing RELINK mode - should clear spurious hardlink bit...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT | DFS_CHECK_RELINK, "lf_spurious"); + assert_int_equal(rc, 0); + + /* + * Part 4.6: Verify dfs_osetattr now succeeds + */ + print_message("Verifying dfs_osetattr succeeds after repair...\n"); + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + rc = dfs_lookup(dfs, "/hlm_test_dir", O_RDWR, &dir, NULL, NULL); + assert_int_equal(rc, 0); + + rc = dfs_lookup_rel(dfs, dir, "testfile", O_RDWR, &file1, NULL, NULL); + assert_int_equal(rc, 0); + + /* Now dfs_osetattr should succeed because hardlink bit is cleared */ + stbuf.st_mode = S_IFREG | 0600; + rc = dfs_osetattr(dfs, file1, &stbuf, DFS_SET_ATTR_MODE); + print_message(" dfs_osetattr returned: %d (expected 0)\n", rc); + assert_int_equal(rc, 0); + + /* Verify mode was changed */ + rc = dfs_ostat(dfs, file1, &stbuf); + assert_int_equal(rc, 0); + print_message(" File mode after dfs_osetattr: 0%o (expected 0600)\n", stbuf.st_mode & 0777); + assert_int_equal(stbuf.st_mode & 0777, 0600); + + rc = dfs_release(file1); + assert_int_equal(rc, 0); + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* Cleanup scenario 4 - destroy container */ + rc = daos_cont_destroy(arg->pool.poh, cname, 1, NULL); + assert_rc_equal(rc, 0); + + print_message("Scenario 4 (spurious hardlink bit) completed successfully!\n"); +#endif + + /* + * ========================================================================== + * SCENARIO 5: Hardlink bit on directory or symlink + * ========================================================================== + */ + print_message("\n=== SCENARIO 5: Hardlink bit on directory or symlink ===\n"); + + /* + * Part 5.1: Setup - Create container with a directory and a symlink + */ + print_message("Creating container with directory and symlink...\n"); + rc = dfs_init(); + assert_int_equal(rc, 0); + rc = dfs_connect(arg->pool.pool_str, arg->group, cname, O_CREAT | O_RDWR, NULL, &dfs); + assert_int_equal(rc, 0); + + /* Create a parent directory */ + rc = dfs_open(dfs, NULL, "parent_dir", S_IFDIR | S_IWUSR | S_IRUSR | S_IXUSR, + O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, &dir); + assert_int_equal(rc, 0); + + /* Create a test directory */ + rc = dfs_open(dfs, dir, "test_dir", S_IFDIR | 0755, O_RDWR | O_CREAT | O_EXCL, 0, 0, NULL, + &file1); + assert_int_equal(rc, 0); + rc = dfs_release(file1); + assert_int_equal(rc, 0); + + /* Create a symlink */ + rc = dfs_open(dfs, dir, "test_symlink", S_IFLNK | 0777, O_RDWR | O_CREAT | O_EXCL, 0, 0, + "target", &file2); + assert_int_equal(rc, 0); + rc = dfs_release(file2); + assert_int_equal(rc, 0); + + /* Get the parent directory OID for later use */ + rc = dfs_obj2id(dir, &dir_oid); + assert_int_equal(rc, 0); + + rc = dfs_release(dir); + assert_int_equal(rc, 0); + + rc = dfs_disconnect(dfs); + assert_int_equal(rc, 0); + rc = dfs_fini(); + assert_int_equal(rc, 0); + + /* + * Part 5.2: Corrupt - Set the hardlink bit on directory and symlink + */ + print_message("Setting hardlink bit on directory and symlink dentries...\n"); + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_handle_t parent_oh; + d_iov_t dkey; + d_iov_t akey; + d_sg_list_t sgl; + d_iov_t iov; + daos_iod_t iod; + daos_recx_t recx; + mode_t mode; + + /* Open the parent directory object */ + rc = daos_obj_open(coh, dir_oid, DAOS_OO_RW, &parent_oh, NULL); + assert_rc_equal(rc, 0); + + /* Set hardlink bit on directory "test_dir" */ + d_iov_set(&dkey, "test_dir", strlen("test_dir")); + d_iov_set(&akey, "DFS_INODE", strlen("DFS_INODE")); + d_iov_set(&iov, &mode, sizeof(mode_t)); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &iov; + recx.rx_idx = 0; /* MODE_IDX = 0 */ + recx.rx_nr = sizeof(mode_t); + iod.iod_name = akey; + iod.iod_nr = 1; + iod.iod_recxs = &recx; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + rc = daos_obj_fetch(parent_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL, NULL); + assert_rc_equal(rc, 0); + print_message(" Directory mode before: 0x%x\n", mode); + + mode |= (1U << 31); /* MODE_HARDLINK_BIT */ + print_message(" Directory mode after setting hardlink bit: 0x%x\n", mode); + + rc = daos_obj_update(parent_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL); + assert_rc_equal(rc, 0); + + /* Set hardlink bit on symlink "test_symlink" */ + d_iov_set(&dkey, "test_symlink", strlen("test_symlink")); + + rc = daos_obj_fetch(parent_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL, NULL); + assert_rc_equal(rc, 0); + print_message(" Symlink mode before: 0x%x\n", mode); + + mode |= (1U << 31); /* MODE_HARDLINK_BIT */ + print_message(" Symlink mode after setting hardlink bit: 0x%x\n", mode); + + rc = daos_obj_update(parent_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL); + assert_rc_equal(rc, 0); + + print_message(" Updated both dentries with spurious hardlink bit\n"); + + rc = daos_obj_close(parent_oh, NULL); + assert_rc_equal(rc, 0); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + /* + * Part 5.3: Test PRINT mode - should report spurious hardlink bits + */ + print_message("Testing PRINT mode - should report spurious hardlink bits...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT, NULL); + assert_int_equal(rc, 0); + + /* + * Part 5.4: Test RELINK mode - should clear the hardlink bits + */ + print_message("Testing RELINK mode - should clear spurious hardlink bits...\n"); + rc = dfs_cont_check(arg->pool.poh, cname, DFS_CHECK_PRINT | DFS_CHECK_RELINK, NULL); + assert_int_equal(rc, 0); + + /* + * Part 5.5: Verify hardlink bit is cleared by fetching dentries directly + */ + print_message("Verifying hardlink bit cleared via direct dentry fetch...\n"); + rc = daos_cont_open(arg->pool.poh, cname, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + { + daos_handle_t parent_oh; + d_iov_t dkey; + d_iov_t akey; + d_sg_list_t sgl; + d_iov_t iov; + daos_iod_t iod; + daos_recx_t recx; + mode_t mode; + + /* Open the parent directory object */ + rc = daos_obj_open(coh, dir_oid, DAOS_OO_RO, &parent_oh, NULL); + assert_rc_equal(rc, 0); + + /* Fetch directory mode and verify hardlink bit is cleared */ + d_iov_set(&dkey, "test_dir", strlen("test_dir")); + d_iov_set(&akey, "DFS_INODE", strlen("DFS_INODE")); + d_iov_set(&iov, &mode, sizeof(mode_t)); + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + sgl.sg_iovs = &iov; + recx.rx_idx = 0; /* MODE_IDX = 0 */ + recx.rx_nr = sizeof(mode_t); + iod.iod_name = akey; + iod.iod_nr = 1; + iod.iod_recxs = &recx; + iod.iod_type = DAOS_IOD_ARRAY; + iod.iod_size = 1; + + rc = daos_obj_fetch(parent_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL, NULL); + assert_rc_equal(rc, 0); + print_message(" Directory mode from dentry: 0x%x\n", mode); + print_message(" Expected: S_IFDIR | 0755 = 0x%x (no hardlink bit)\n", + S_IFDIR | 0755); + assert_true(S_ISDIR(mode)); + assert_int_equal(mode & (1U << 31), 0); /* Hardlink bit should be cleared */ + assert_int_equal(mode & 0777, 0755); + print_message(" Directory hardlink bit verified cleared\n"); + + /* Fetch symlink mode and verify hardlink bit is cleared */ + d_iov_set(&dkey, "test_symlink", strlen("test_symlink")); + + rc = daos_obj_fetch(parent_oh, DAOS_TX_NONE, 0, &dkey, 1, &iod, &sgl, NULL, NULL); + assert_rc_equal(rc, 0); + print_message(" Symlink mode from dentry: 0x%x\n", mode); + assert_true(S_ISLNK(mode)); + assert_int_equal(mode & (1U << 31), 0); /* Hardlink bit should be cleared */ + print_message(" Symlink hardlink bit verified cleared\n"); + + rc = daos_obj_close(parent_oh, NULL); + assert_rc_equal(rc, 0); + } + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + + /* Cleanup scenario 5 - destroy container */ + rc = daos_cont_destroy(arg->pool.poh, cname, 1, NULL); + assert_rc_equal(rc, 0); + + print_message("Scenario 5 (hardlink bit on directory/symlink) completed successfully!\n"); + print_message("\ndfs_test_checker_hlm completed successfully!\n"); +} + static const struct CMUnitTest dfs_unit_tests[] = { - { "DFS_UNIT_TEST1: DFS mount / umount", - dfs_test_mount, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST2: DFS container modes", - dfs_test_modes, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST3: DFS lookup / lookup_rel", - dfs_test_lookup, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST4: Simple Symlinks", - dfs_test_syml, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST5: Symlinks with / without O_NOFOLLOW", - dfs_test_syml_follow, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST6: multi-threads read shared file", - dfs_test_read_shared_file, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST7: DFS lookupx", - dfs_test_lookupx, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST8: DFS IO sync error code", - dfs_test_io_error_code, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST9: DFS IO async error code", - dfs_test_io_error_code, async_enable, test_case_teardown}, - { "DFS_UNIT_TEST10: multi-threads mkdir same dir", - dfs_test_mt_mkdir, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST11: Simple rename", - dfs_test_rename, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST12: DFS API compat", - dfs_test_compat, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST13: DFS l2g/g2l_all", - dfs_test_handles, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST14: multi-threads connect to same container", - dfs_test_mt_connect, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST15: DFS chown", - dfs_test_chown, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST16: DFS stat mtime", - dfs_test_mtime, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST17: multi-threads async IO", - dfs_test_async_io_th, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST18: async IO", - dfs_test_async_io, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST19: DFS readdir", - dfs_test_readdir, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST20: dfs oclass hints", - dfs_test_oclass_hints, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST21: dfs multiple pools", - dfs_test_multiple_pools, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST22: dfs extended attributes", - dfs_test_xattrs, test_case_teardown}, - { "DFS_UNIT_TEST23: dfs MWC container checker", - dfs_test_checker, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST24: dfs MWC SB fix", - dfs_test_fix_sb, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST25: dfs MWC root fix", - dfs_test_relink_root, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST26: dfs MWC chunk size fix", - dfs_test_fix_chunk_size, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST27: dfs pipeline find", - dfs_test_pipeline_find, async_disable, test_case_teardown}, - { "DFS_UNIT_TEST28: dfs open/lookup flags", - dfs_test_oflags, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST1: DFS mount / umount", dfs_test_mount, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST2: DFS container modes", dfs_test_modes, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST3: DFS lookup / lookup_rel", dfs_test_lookup, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST4: Simple Symlinks", dfs_test_syml, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST5: Symlinks with / without O_NOFOLLOW", dfs_test_syml_follow, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST6: multi-threads read shared file", dfs_test_read_shared_file, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST7: DFS lookupx", dfs_test_lookupx, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST8: DFS IO sync error code", dfs_test_io_error_code, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST9: DFS IO async error code", dfs_test_io_error_code, async_enable, + test_case_teardown}, + {"DFS_UNIT_TEST10: multi-threads mkdir same dir", dfs_test_mt_mkdir, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST11: Simple rename", dfs_test_rename, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST12: DFS API compat", dfs_test_compat, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST13: DFS l2g/g2l_all", dfs_test_handles, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST14: multi-threads connect to same container", dfs_test_mt_connect, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST15: DFS chown", dfs_test_chown, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST16: DFS stat mtime", dfs_test_mtime, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST17: multi-threads async IO", dfs_test_async_io_th, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST18: async IO", dfs_test_async_io, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST19: DFS readdir", dfs_test_readdir, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST20: dfs oclass hints", dfs_test_oclass_hints, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST21: dfs multiple pools", dfs_test_multiple_pools, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST22: dfs extended attributes", dfs_test_xattrs, test_case_teardown}, + {"DFS_UNIT_TEST23: dfs MWC container checker", dfs_test_checker, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST24: dfs MWC SB fix", dfs_test_fix_sb, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST25: dfs MWC root fix", dfs_test_relink_root, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST26: dfs MWC chunk size fix", dfs_test_fix_chunk_size, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST27: dfs pipeline find", dfs_test_pipeline_find, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST28: dfs open/lookup flags", dfs_test_oflags, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST29: dfs hardlink", dfs_test_hardlink, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST30: dfs hardlink chmod/chown", dfs_test_hardlink_chmod_chown, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST31: dfs hardlink rename", dfs_test_hardlink_rename, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST32: dfs hardlink xattr", dfs_test_hardlink_xattr, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST33: dfs exchange", dfs_test_exchange, async_disable, test_case_teardown}, + {"DFS_UNIT_TEST34: dfs hardlink access", dfs_test_hardlink_access, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST35: dfs hardlink ostatx", dfs_test_hardlink_ostatx, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST36: dfs hardlink osetattr", dfs_test_hardlink_osetattr, async_disable, + test_case_teardown}, + {"DFS_UNIT_TEST37: dfs checker HLM", dfs_test_checker_hlm, async_disable, test_case_teardown}, }; static int