Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/migtd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pub mod driver;
pub mod event_log;
pub mod mig_policy;
pub mod migration;
pub mod quote;
pub mod ratls;
pub mod spdm;

Expand Down
4 changes: 1 addition & 3 deletions src/migtd/src/mig_policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,7 @@ mod v2 {
LOCAL_TCB_INFO
.try_call_once(|| {
let policy = get_verified_policy().ok_or(PolicyError::InvalidParameter)?;
let tdx_report = tdx_tdcall::tdreport::tdcall_report(&[0u8; 64])
.map_err(|_| PolicyError::GetTdxReport)?;
let quote = attestation::get_quote(tdx_report.as_bytes())
let (quote, _report) = crate::quote::get_quote_with_retry(&[0u8; 64])
.map_err(|_| PolicyError::QuoteGeneration)?;
let (fmspc, suppl_data) = verify_quote(&quote, policy.get_collaterals())?;
setup_evaluation_data(fmspc, &suppl_data, policy, policy.get_collaterals())
Expand Down
114 changes: 114 additions & 0 deletions src/migtd/src/quote.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (c) Microsoft Corporation
//
// SPDX-License-Identifier: BSD-2-Clause-Patent

//! Quote generation with retry logic for handling security updates
//!
//! This module provides a resilient GetQuote flow that can handle impactless security
//! updates. If an update happens after the REPORT is retrieved but before the QUOTE
//! is generated, the Quoting Enclave may reject the REPORT. This module handles
//! such scenarios with simple exponential backoff retry.

#![cfg(feature = "attestation")]

use alloc::vec::Vec;

#[cfg(not(feature = "AzCVMEmu"))]
use tdx_tdcall::tdreport::tdcall_report;

#[cfg(feature = "AzCVMEmu")]
use tdx_tdcall_emu::tdreport::tdcall_report;

/// Initial retry delay in milliseconds (5 seconds)
const INITIAL_DELAY_MS: u64 = 5000;

/// Maximum number of attempts before giving up
const MAX_ATTEMPTS: u32 = 9; // Total wait time up to ~21 minutes with 5s initial delay
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bodzhang , please review this and ensure 21 minutes is OK.


/// Error type for quote generation with retry
#[derive(Debug)]
pub enum QuoteError {
/// Failed to generate TD report
ReportGenerationFailed,
/// Quote generation failed after all retry attempts
QuoteGenerationFailed,
}

/// Get a quote with retry logic to handle potential security updates
///
/// On quote failure, fetches a new TD REPORT and retries with exponential backoff.
///
/// # Arguments
/// * `additional_data` - The 64-byte additional data to include in the TD REPORT
///
/// # Returns
/// * `Ok((quote, report))` - The generated quote and the TD REPORT used
/// * `Err(QuoteError)` - If TD report/quote generation fails
pub fn get_quote_with_retry(additional_data: &[u8; 64]) -> Result<(Vec<u8>, Vec<u8>), QuoteError> {
let mut delay_ms = INITIAL_DELAY_MS;

for attempt in 1..=MAX_ATTEMPTS {
// Get TD REPORT
let current_report = tdcall_report(additional_data).map_err(|e| {
log::error!("Failed to get TD report: {:?}\n", e);
QuoteError::ReportGenerationFailed
})?;

let report_bytes = current_report.as_bytes();

// Attempt to get quote
match attestation::get_quote(report_bytes) {
Ok(quote) => {
log::info!("Quote generated successfully\n");
return Ok((quote, report_bytes.to_vec()));
}
Err(e) => {
if attempt < MAX_ATTEMPTS {
log::warn!(
"GetQuote failed (attempt {}/{}): {:?}, retrying with delay of {}ms\n",
attempt,
MAX_ATTEMPTS,
e,
delay_ms
);
delay_milliseconds(delay_ms);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am OK with the retry.
@haitaohuang, Question on validation: Do we have test to trigger the retry flow, and ensure it works as expected?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did test on a lab blade where getQuote fails and the mechanism worked as expected. Also had a mock test that add a forced failure on first try in my branch but took out because I did not want to mix with production code and make it harder to read. If you want, I can add back.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see not problem to add more unit test (mock). You can add a separated commit for adding mock test.
Do you mean you wait for about 21 min and finally it can finish?

delay_ms *= 2;
} else {
log::error!("GetQuote failed after {} attempts: {:?}\n", MAX_ATTEMPTS, e);
return Err(QuoteError::QuoteGenerationFailed);
}
}
}
}

// Should be unreachable because the final attempt returns above on failure.
Err(QuoteError::QuoteGenerationFailed)
}

/// Delay for the specified number of milliseconds
#[cfg(feature = "AzCVMEmu")]
fn delay_milliseconds(ms: u64) {
std::thread::sleep(std::time::Duration::from_millis(ms));
}

#[cfg(not(feature = "AzCVMEmu"))]
fn delay_milliseconds(ms: u64) {
use crate::driver::ticks::Timer;
use core::future::Future;
use core::pin::Pin;
use core::task::{Context, Poll, Waker};
use core::time::Duration;
use td_payload::arch::apic::{disable, enable_and_hlt};

let mut timer = Timer::after(Duration::from_millis(ms));
let waker = Waker::noop();
let mut cx = Context::from_waker(&waker);

loop {
if let Poll::Ready(()) = Pin::new(&mut timer).poll(&mut cx) {
break;
}
enable_and_hlt();
disable();
}
}
18 changes: 13 additions & 5 deletions src/migtd/src/ratls/server_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,20 @@ pub fn client_rebinding<T: AsyncRead + AsyncWrite + Unpin>(
}

fn gen_quote(public_key: &[u8]) -> Result<Vec<u8>> {
let td_report = gen_tdreport(public_key)?;
let hash = digest_sha384(public_key).map_err(|e| {
log::error!("Failed to compute SHA384 digest: {:?}\n", e);
e
})?;

let mut additional_data = [0u8; 64];
additional_data[..hash.len()].copy_from_slice(hash.as_ref());

attestation::get_quote(td_report.as_bytes()).map_err(|e| {
log::error!("Failed to get quote from TD report. Error: {:?}\n", e);
let (quote, _report) = crate::quote::get_quote_with_retry(&additional_data).map_err(|e| {
log::error!("get_quote_with_retry failed: {:?}\n", e);
RatlsError::GetQuote
})
})?;

Ok(quote)
}

pub fn gen_tdreport(public_key: &[u8]) -> Result<TdxReport> {
Expand Down Expand Up @@ -819,7 +827,6 @@ mod verify {
use crypto::ecdsa::ecdsa_verify;
use crypto::{Error as CryptoError, Result as CryptoResult};
use policy::PolicyError;
use tdx_tdcall::tdreport::TdxReport;

#[cfg(not(feature = "policy_v2"))]
pub fn verify_peer_cert(
Expand Down Expand Up @@ -1227,6 +1234,7 @@ mod verify {

#[cfg(feature = "policy_v2")]
fn verify_public_key_with_tdreport(tdreport: &[u8], public_key: &[u8]) -> CryptoResult<()> {
use tdx_tdcall::tdreport::TdxReport;
if cfg!(feature = "AzCVMEmu") {
// In AzCVMEmu mode, REPORTDATA is constructed differently.
// Bypass public key hash check in this development environment.
Expand Down
10 changes: 6 additions & 4 deletions src/migtd/src/spdm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,13 @@ pub fn gen_quote_spdm(report_data: &[u8]) -> Result<Vec<u8>, MigrationResult> {
// Generate the TD Report that contains the public key hash as nonce
let mut additional_data = [0u8; 64];
additional_data[..hash.len()].copy_from_slice(hash.as_ref());
let td_report = tdx_tdcall::tdreport::tdcall_report(&additional_data)?;

let res =
attestation::get_quote(td_report.as_bytes()).map_err(|_| MigrationResult::Unsupported)?;
Ok(res)
let (quote, _report) = crate::quote::get_quote_with_retry(&additional_data).map_err(|e| {
log::error!("get_quote_with_retry failed: {:?}\n", e);
MigrationResult::MutualAttestationError
})?;

Ok(quote)
}

const ECDSA_P384_SHA384_PRIVATE_KEY_LENGTH: usize = 0xb9;
Expand Down
Loading