Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 153 additions & 32 deletions src/chk/chk_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ btr_ops_t chk_pool_ops = {
struct chk_pending_bundle {
struct chk_instance *cpb_ins;
d_list_t *cpb_pool_head;
d_list_t *cpb_rank_head;
uuid_t cpb_uuid;
d_rank_t cpb_rank;
uint32_t cpb_class;
Expand Down Expand Up @@ -247,11 +246,6 @@ chk_pending_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov,

d_list_add_tail(&cpr->cpr_ins_link, &cpb->cpb_ins->ci_pending_list);

if (cpb->cpb_rank_head != NULL)
d_list_add_tail(&cpr->cpr_rank_link, cpb->cpb_rank_head);
else
D_INIT_LIST_HEAD(&cpr->cpr_rank_link);

rec->rec_off = umem_ptr2off(&tins->ti_umm, cpr);
d_list_add_tail(&cpr->cpr_pool_link, cpb->cpb_pool_head);

Expand Down Expand Up @@ -279,7 +273,6 @@ chk_pending_free(struct btr_instance *tins, struct btr_record *rec, void *args)

rec->rec_off = UMOFF_NULL;
d_list_del_init(&cpr->cpr_pool_link);
d_list_del_init(&cpr->cpr_rank_link);
d_list_del_init(&cpr->cpr_ins_link);

if (val_iov != NULL) {
Expand Down Expand Up @@ -931,30 +924,9 @@ chk_pool_shard_cleanup(struct chk_instance *ins)
}
}

int
chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr)
{
d_iov_t kiov;
d_iov_t riov;
int rc;

d_iov_set(&riov, NULL, 0);
d_iov_set(&kiov, &seq, sizeof(seq));

ABT_rwlock_rdlock(ins->ci_abt_lock);
rc = dbtree_lookup(ins->ci_pending_hdl, &kiov, &riov);
ABT_rwlock_unlock(ins->ci_abt_lock);
if (rc == 0)
*cpr = (struct chk_pending_rec *)riov.iov_buf;
else
*cpr = NULL;

return rc;
}

int
chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid,
uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options,
static int
chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, uuid_t uuid, uint64_t seq,
uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options,
struct chk_pending_rec **cpr)
{
struct chk_pending_bundle rbund;
Expand All @@ -967,7 +939,6 @@ chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_he

uuid_copy(rbund.cpb_uuid, uuid);
rbund.cpb_pool_head = pool_head;
rbund.cpb_rank_head = rank_head;
rbund.cpb_ins = ins;
rbund.cpb_seq = seq;
rbund.cpb_rank = rank;
Expand Down Expand Up @@ -1059,6 +1030,156 @@ chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr)
return rc;
}

int
chk_report(struct chk_instance *ins, struct chk_report_unit *cru, uint64_t *seq, int *decision)
{
struct chk_pending_rec *cpr = NULL;
struct chk_pool_rec *pool = NULL;
d_iov_t kiov;
d_iov_t riov;
int rc;

CHK_IS_READY(ins);

if (cru->cru_result == 0 && ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_DRYRUN)
cru->cru_result = CHK__CHECK_RESULT__DRY_RUN;

if (*seq == 0) {
new_seq:
*seq = chk_report_seq_gen(ins);
}

D_INFO("Report on %u (%s) with seq " DF_X64 " class %u, action %u, %s, result %d\n",
cru->cru_rank, ins->ci_is_leader ? "leader" : "engine", *seq, cru->cru_cla,
cru->cru_act, cru->cru_msg, cru->cru_result);

if (cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) {
if (cru->cru_pool == NULL)
D_GOTO(log, rc = -DER_INVAL);

d_iov_set(&riov, NULL, 0);
d_iov_set(&kiov, cru->cru_pool, sizeof(uuid_t));
rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov);
if (rc != 0)
goto log;

pool = (struct chk_pool_rec *)riov.iov_buf;

rc = chk_pending_add(ins, &pool->cpr_pending_list, *cru->cru_pool, *seq,
cru->cru_rank, cru->cru_cla, cru->cru_option_nr,
cru->cru_options, &cpr);
if (unlikely(rc == -DER_AGAIN))
goto new_seq;

if (rc != 0)
goto log;
}

rc = chk_report_upcall(cru->cru_gen, *seq, cru->cru_cla, cru->cru_act, cru->cru_result,
cru->cru_rank, cru->cru_target, cru->cru_pool, cru->cru_pool_label,
cru->cru_cont, cru->cru_cont_label, cru->cru_obj, cru->cru_dkey,
cru->cru_akey, cru->cru_msg, cru->cru_option_nr, cru->cru_options,
cru->cru_detail_nr, cru->cru_details);
/* Check cpr->cpr_action for the case of "dmg check repair" by race. */
if (rc == 0 && pool != NULL &&
likely(cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT))
pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PENDING;

log:
if (rc != 0) {
D_ERROR("Failed to handle report from rank %u (%s) with seq " DF_X64 ", class %u, "
"action %u, handle_rc %d, report_rc %d\n",
cru->cru_rank, ins->ci_is_leader ? "leader" : "engine", *seq, cru->cru_cla,
cru->cru_act, cru->cru_result, rc);
goto out;
}

if (decision == NULL || cpr == NULL)
goto out;

D_ASSERT(cpr->cpr_busy);

D_INFO("Need interaction for class %u with seq " DF_X64 "\n", cru->cru_cla, *seq);

ABT_mutex_lock(cpr->cpr_mutex);

again:
if (cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) {
*decision = cpr->cpr_action;
ABT_mutex_unlock(cpr->cpr_mutex);
goto out;
}

if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) {
rc = 1;
ABT_mutex_unlock(cpr->cpr_mutex);
goto out;
}

ABT_cond_wait(cpr->cpr_cond, cpr->cpr_mutex);

goto again;

out:
if ((rc != 0 || decision != NULL) && cpr != NULL)
chk_pending_destroy(ins, cpr);

if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING &&
d_list_empty(&pool->cpr_pending_list))
pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING;

return rc;
}

int
chk_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act)
{
struct chk_pending_rec *cpr = NULL;
int rc;

CHK_IS_READY(ins);

rc = chk_pending_del(ins, seq, &cpr);
if (rc == 0) {
/* The cpr will be destroyed by the waiter via chk_engine_report(). */
D_ASSERT(cpr->cpr_busy);

ABT_mutex_lock(cpr->cpr_mutex);
/*
* It is the control plane's duty to guarantee that the decision is a valid
* action from the report options. Otherwise, related inconsistency will be
* ignored.
*/
cpr->cpr_action = act;
ABT_cond_broadcast(cpr->cpr_cond);
ABT_mutex_unlock(cpr->cpr_mutex);
}

return rc;
}

int
chk_act(uint64_t seq, uint32_t act)
{
int rc = -DER_INVAL;

if (likely(act != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) {
if (chk_report_seq_leader(seq))
rc = chk_leader_act(seq, act);
else
rc = chk_engine_act(seq, act);
}

D_CDEBUG(rc != 0 && rc != -DER_NONEXIST && rc != -DER_NO_HDL, DLOG_ERR, DLOG_INFO,
"CHK repair on rank %u, act %u, seq " DF_X64 ": " DF_RC "\n", dss_self_rank(), act,
seq, DP_RC(rc));

if (rc == -DER_NONEXIST || rc == -DER_NO_HDL)
rc = 0;

return rc;
}

int
chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop)
{
Expand Down
Loading
Loading