Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions kernel/src/net/socket/inet/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,38 @@ impl BoundInner {
}
}

/// Validate the Linux dual-stack rule for an AF_INET6 socket that has already
/// been bound to a concrete local address.
///
/// Linux 6.6 keeps the effective local address family once an IPv6 socket is
/// bound to a specific address:
/// - bound to a native IPv6 address, then sending to an IPv4-mapped peer
/// returns `ENETUNREACH`
/// - bound to an IPv4-mapped address, then sending to a native IPv6 peer
/// returns `EAFNOSUPPORT`
///
/// Unspecified local addresses (`::`) remain dual-stack and therefore bypass
/// this check.
#[inline]
pub fn ensure_bound_dual_stack_remote_compatible(
local_addr: smoltcp::wire::IpAddress,
remote_addr: smoltcp::wire::IpAddress,
) -> Result<(), SystemError> {
if local_addr.is_unspecified() {
return Ok(());
}

match (local_addr, remote_addr) {
(smoltcp::wire::IpAddress::Ipv6(_), smoltcp::wire::IpAddress::Ipv4(_)) => {
Err(SystemError::ENETUNREACH)
}
(smoltcp::wire::IpAddress::Ipv4(_), smoltcp::wire::IpAddress::Ipv6(_)) => {
Err(SystemError::EAFNOSUPPORT)
}
_ => Ok(()),
}
}

#[inline]
pub fn get_iface_to_bind(
ip_addr: &smoltcp::wire::IpAddress,
Expand Down
23 changes: 22 additions & 1 deletion kernel/src/net/socket/inet/datagram/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ use core::sync::atomic::{
};
use smoltcp::wire::{IpAddress::*, IpEndpoint, IpListenEndpoint, IpVersion};

use super::{InetSocket, UNSPECIFIED_LOCAL_ENDPOINT_V4, UNSPECIFIED_LOCAL_ENDPOINT_V6};
use super::{
common::ensure_bound_dual_stack_remote_compatible, InetSocket, UNSPECIFIED_LOCAL_ENDPOINT_V4,
UNSPECIFIED_LOCAL_ENDPOINT_V6,
};

mod option;

Expand Down Expand Up @@ -288,6 +291,23 @@ impl UdpSocket {
}
}

#[inline]
fn validate_bound_send_dest(
&self,
bound: &inner::BoundUdp,
dest: IpEndpoint,
) -> Result<(), SystemError> {
if self.ip_version != IpVersion::Ipv6 {
return Ok(());
}

if let Some(local_addr) = bound.endpoint().addr {
ensure_bound_dual_stack_remote_compatible(local_addr, dest.addr)?;
}

Ok(())
}

fn loopback_accepts_with_preconnect(
&self,
pkt: &LoopbackPacket,
Expand Down Expand Up @@ -858,6 +878,7 @@ impl UdpSocket {
let dest = to
.or_else(|| bound.remote_endpoint().ok())
.ok_or(SystemError::EDESTADDRREQ)?;
self.validate_bound_send_dest(bound, dest)?;
let bound_iface = bound.inner().iface().clone();
let is_multicast = dest.addr.is_multicast();
let mcast_ifindex = if is_multicast {
Expand Down
137 changes: 107 additions & 30 deletions kernel/src/net/socket/inet/stream/inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,40 @@ impl Init {
}

/// # `listen`
pub(super) fn listen(self, backlog: usize) -> Result<Listening, (Self, SystemError)> {
let (inner, local) = match self {
Init::Unbound(_) => {
return Err((self, SystemError::EINVAL));
///
/// Linux semantics: calling `listen()` on an unbound TCP socket auto-binds
/// to `INADDR_ANY` (or `::`) with an ephemeral port, just like an implicit
/// `bind(0.0.0.0:0)` before `listen()`.
pub(super) fn listen(
self,
backlog: usize,
netns: Arc<NetNamespace>,
) -> Result<Listening, (Self, SystemError)> {
// If unbound, auto-bind to INADDR_ANY:ephemeral (Linux compat).
let bound_self = if matches!(self, Init::Unbound(_)) {
let ver = match &self {
Init::Unbound((_, v)) => *v,
_ => unreachable!(),
};
let unspec_addr = match ver {
smoltcp::wire::IpVersion::Ipv4 => {
smoltcp::wire::IpAddress::from(smoltcp::wire::Ipv4Address::UNSPECIFIED)
}
smoltcp::wire::IpVersion::Ipv6 => {
smoltcp::wire::IpAddress::from(smoltcp::wire::Ipv6Address::UNSPECIFIED)
}
};
let auto_bind_ep = smoltcp::wire::IpEndpoint::new(unspec_addr, 0);
match self.bind(auto_bind_ep, netns.clone()) {
Ok(bound) => bound,
Err(err) => return Err((Init::new(ver), err)),
}
} else {
self
};
let (inner, local) = match bound_self {
Init::Bound(inner) => inner,
Init::Unbound(_) => unreachable!(),
};
let listen_addr = if local.addr.is_unspecified() {
smoltcp::wire::IpListenEndpoint::from(local.port)
Expand All @@ -218,23 +246,55 @@ impl Init {
let backlog = core::cmp::min(if backlog == 0 { 1 } else { backlog }, 8);

let mut inners = Vec::new();
let is_any_addr = listen_addr.addr.is_none();

if let Err(err) = || -> Result<(), SystemError> {
let additional_sockets = backlog.saturating_sub(1);
for _ in 0..additional_sockets {
// -1 because the first one is already bound
// log::debug!("loop {:?}", _i);
let new_listen = socket::inet::BoundInner::bind(
new_listen_smoltcp_socket(listen_addr)?,
listen_addr
.addr
.as_ref()
.unwrap_or(&smoltcp::wire::IpAddress::from(
smoltcp::wire::Ipv4Address::UNSPECIFIED,
)),
inner.netns(),
)?;
inners.push(new_listen);
if is_any_addr {
// INADDR_ANY / [::]: smoltcp uses per-interface SocketSets, so we must
// create at least one listen socket on *every* interface; otherwise a SYN
// arriving on an interface without a listen socket gets no response (RST
// or silent drop depending on smoltcp version).
//
// Strategy: place ≥1 listen socket on each interface. Any remaining
// backlog slots go to the primary interface.
let device_list = netns.device_list();
for (_, iface) in device_list.iter() {
if alloc::sync::Arc::ptr_eq(iface, inner.iface()) {
continue; // primary inner already covers this iface
}
let new_listen = socket::inet::BoundInner::bind_on_iface(
new_listen_smoltcp_socket(listen_addr)?,
iface.clone(),
Comment on lines +265 to +267
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Keep port-owning listener first for INADDR_ANY

This branch inserts non-primary interface listeners into inners before the original bound listener, but Listening::close() still frees the bound TCP port via self.inners[0].iface().port_manager().unbind_port(...), which assumes index 0 owns the reservation. On multi-interface INADDR_ANY listeners, close will unbind the wrong iface and leak the actual port binding on the primary iface, so subsequent bind/listen on the same port can fail with EADDRINUSE.

Useful? React with 👍 / 👎.

inner.netns(),
)?;
inners.push(new_listen);
}
// Fill remaining backlog slots on the primary interface.
let remaining = backlog.saturating_sub(1 + inners.len());
for _ in 0..remaining {
let new_listen = socket::inet::BoundInner::bind_on_iface(
new_listen_smoltcp_socket(listen_addr)?,
inner.iface().clone(),
inner.netns(),
)?;
inners.push(new_listen);
}
} else {
// Specific address: all backlog sockets go to the same interface.
let additional_sockets = backlog.saturating_sub(1);
for _ in 0..additional_sockets {
let new_listen = socket::inet::BoundInner::bind(
new_listen_smoltcp_socket(listen_addr)?,
listen_addr
.addr
.as_ref()
.unwrap_or(&smoltcp::wire::IpAddress::from(
smoltcp::wire::Ipv4Address::UNSPECIFIED,
)),
inner.netns(),
)?;
inners.push(new_listen);
}
}
Ok(())
}() {
Expand Down Expand Up @@ -574,16 +634,27 @@ impl Listening {

// log::debug!("local at {:?}", local_endpoint);

let mut new_listen = socket::inet::BoundInner::bind(
new_listen_smoltcp_socket(self.listen_addr)?,
self.listen_addr
.addr
.as_ref()
.unwrap_or(&smoltcp::wire::IpAddress::from(
smoltcp::wire::Ipv4Address::UNSPECIFIED,
)),
connected.netns(),
)?;
// Create a replacement listen socket on the *same* interface as the one
// that just accepted a connection. This is critical for INADDR_ANY listeners
// where each interface has its own listen socket in the smoltcp SocketSet.
let mut new_listen = if self.listen_addr.addr.is_none() {
socket::inet::BoundInner::bind_on_iface(
new_listen_smoltcp_socket(self.listen_addr)?,
connected.iface().clone(),
connected.netns(),
)?
} else {
socket::inet::BoundInner::bind(
new_listen_smoltcp_socket(self.listen_addr)?,
self.listen_addr
.addr
.as_ref()
.unwrap_or(&smoltcp::wire::IpAddress::from(
smoltcp::wire::Ipv4Address::UNSPECIFIED,
)),
connected.netns(),
)?
};

// swap the connected socket with the new_listen socket
// TODO is smoltcp socket swappable?
Expand Down Expand Up @@ -630,7 +701,13 @@ impl Listening {
for inner in self.inners.iter() {
inner.with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.close());
}
self.inners[0]
// The original port-owning socket is always the *last* element in `inners`
// (pushed last during listen() construction). We must unbind from its
// port_manager, not inners[0] which may belong to a different iface for
// INADDR_ANY listeners.
self.inners
.last()
.expect("Listening socket must have at least one inner")
.iface()
.port_manager()
.unbind_port(Types::Tcp, port);
Expand Down
83 changes: 64 additions & 19 deletions kernel/src/net/socket/inet/stream/lifecycle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,27 @@ impl TcpSocket {
let inner = writer.take().expect("Tcp inner::Inner is None");
let (listening, err) = match inner {
inner::Inner::Init(init) => {
let listen_result = init.listen(backlog);
let listen_result = init.listen(backlog, self.netns());
match listen_result {
Ok(listening) => {
// DragonOS backlog emulation: listener is represented by multiple
// smoltcp TCP sockets. When all LISTEN sockets are consumed,
// Linux commonly drops incoming SYN (no RST). To implement this
// without changing smoltcp semantics, register the active listen port
// in the iface common registry.
//
// For INADDR_ANY listeners, listen sockets span multiple interfaces,
// so register on each unique interface.
let port = listening.get_name().port;
if let Some(b) = listening.inners.first() {
b.iface().common().register_tcp_listen_port(port, backlog);
let me = self.self_ref.upgrade().unwrap();
let mut registered_ifaces: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
for b in &listening.inners {
let nic_id = b.iface().nic_id();
if !registered_ifaces.contains(&nic_id) {
b.iface().common().register_tcp_listen_port(port, backlog);
b.iface().common().bind_socket(me.clone());
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Unbind listener from every iface during teardown

Registering the listening socket on each unique iface here is not matched by symmetric unregistration: close/shutdown paths only call unbind_socket() through a single iface (inner.iface() / inners[0]). For INADDR_ANY listeners spanning multiple interfaces, extra IfaceCommon::bounds entries remain after close, causing leaked socket references and repeated stale notifications on those interfaces.

Useful? React with 👍 / 👎.

registered_ifaces.push(nic_id);
}
}
(inner::Inner::Listening(listening), None)
}
Expand All @@ -66,14 +76,21 @@ impl TcpSocket {

pub fn try_accept(&self) -> Result<(Arc<TcpSocket>, smoltcp::wire::IpEndpoint), SystemError> {
// 主动推进协议栈:避免依赖后台 poll 线程,保证 accept 在无事件通知场景下也能前进。
if let Some(iface) = self
.inner
.read()
.as_ref()
.and_then(|inner| inner.iface())
.cloned()
// For INADDR_ANY listeners, poll all interfaces that have listen sockets.
{
iface.poll();
let reader = self.inner.read();
if let Some(inner::Inner::Listening(listening)) = reader.as_ref() {
let mut polled_nics: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
for b in &listening.inners {
let nic_id = b.iface().nic_id();
if !polled_nics.contains(&nic_id) {
b.iface().poll();
polled_nics.push(nic_id);
}
}
} else if let Some(iface) = reader.as_ref().and_then(|inner| inner.iface()).cloned() {
iface.poll();
}
}

match self
Expand Down Expand Up @@ -112,7 +129,6 @@ impl TcpSocket {
&self,
remote_endpoint: smoltcp::wire::IpEndpoint,
) -> Result<(), SystemError> {
// log::debug!("TcpSocket::start_connect: remote={:?}", remote_endpoint);
let mut writer = self.inner.write();
let inner = writer.take().expect("Tcp inner::Inner is None");
let (init, result) = match inner {
Expand Down Expand Up @@ -316,8 +332,19 @@ impl TcpSocket {
let local = listening.get_name();
let port = local.port;

if let Some(b) = listening.inners.first() {
b.iface().common().unregister_tcp_listen_port(port);
// Unregister listen port and unbind socket from all unique interfaces.
// For INADDR_ANY listeners, listen sockets span multiple interfaces.
{
let me = self.self_ref.upgrade().unwrap();
let mut unregistered: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
for b in &listening.inners {
let nic_id = b.iface().nic_id();
if !unregistered.contains(&nic_id) {
b.iface().common().unregister_tcp_listen_port(port);
b.iface().common().unbind_socket(me.clone());
unregistered.push(nic_id);
}
}
}

for bound in &listening.inners {
Expand Down Expand Up @@ -454,10 +481,16 @@ impl TcpSocket {
// close(fd) must not break in-flight syscalls that already hold a
// reference to this socket object (gVisor ClosedWriteBlockingSocket).
// So we do NOT leave self.inner as None; we always reinsert it below.
if let Some(iface) = inner.iface() {
iface
.common()
.unbind_socket(self.self_ref.upgrade().unwrap());
//
// For Listening sockets, unbind_socket must be done per-iface inside the
// Listening match arm (INADDR_ANY spans multiple interfaces). For all other
// states, inner.iface() returns the single owning iface.
if !matches!(inner, inner::Inner::Listening(_)) {
if let Some(iface) = inner.iface() {
iface
.common()
.unbind_socket(self.self_ref.upgrade().unwrap());
}
}

match inner {
Expand Down Expand Up @@ -526,8 +559,20 @@ impl TcpSocket {
inner::Inner::Listening(mut ls) => {
// close(listen_fd) should stop listening on the port.
let port = ls.get_name().port;
if let Some(b) = ls.inners.first() {
b.iface().common().unregister_tcp_listen_port(port);
// Unregister listen port and unbind socket from all unique interfaces.
// For INADDR_ANY listeners, listen sockets span multiple interfaces,
// so we must clean up each one.
{
let me = self.self_ref.upgrade().unwrap();
let mut cleaned: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
for b in &ls.inners {
let nic_id = b.iface().nic_id();
if !cleaned.contains(&nic_id) {
b.iface().common().unregister_tcp_listen_port(port);
b.iface().common().unbind_socket(me.clone());
cleaned.push(nic_id);
}
}
}
ls.close();
// IMPORTANT:
Expand Down
Loading
Loading