diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..59cd09a23 --- /dev/null +++ b/.clang-format @@ -0,0 +1,23 @@ +--- +BasedOnStyle: Google +IndentWidth: 2 +ColumnLimit: 79 +AlignAfterOpenBracket: DontAlign +SpacesInConditionalStatement: true +SortIncludes: false +BreakBeforeBraces: Linux +SpaceBeforeCpp11BracedList: true +SpaceBeforeParens: Never +SpaceAfterCStyleCast: true +AllowShortBlocksOnASingleLine: Empty +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +SpacesInContainerLiterals: true +Cpp11BracedListStyle: false +MaxEmptyLinesToKeep: 2 +AlignConsecutiveMacros: AcrossEmptyLinesAndComments +AlignConsecutiveBitFields: AcrossEmptyLinesAndComments +AlignTrailingComments: true +SpacesBeforeTrailingComments: 5 +SpaceAfterLogicalNot: true diff --git a/.gitignore b/.gitignore index 1aa7cf694..64735be2d 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,5 @@ Thumbs.db /src/lib/zf/doxygen/html.zip /src/lib/zf/doxygen/latex /src/lib/zf/doxygen/rtf + +/test_programs/mock_xnic/xnic_data diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..9472eba41 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "files.associations": { + "ip_internal.h": "c", + "*.tmpl": "c", + "functional": "c" + } +} diff --git a/mk/before.mk b/mk/before.mk index c419be348..6b0b4f8ca 100644 --- a/mk/before.mk +++ b/mk/before.mk @@ -47,7 +47,6 @@ MMAKE_INCLUDE_DIR := $(TOPPATH)/src/include MMAKE_INCLUDE := -I. -I$(BUILD)/include -I$(MMAKE_INCLUDE_DIR) - ###################################################################### # Some useful commands. # @@ -106,3 +105,13 @@ default_all: all nullstring:= space=$(nullstring) #<-do not edit this line + +# Definition of all of the DPDK libs. If subdirectories need to link to DPDK they can just reference +# this variable to include it. This matches the same setup that we have for REPL +DPDK_STATIC_LOCATIONS = -L$(RTE_SDK)/build/lib -L$(RTE_SDK)/build/drivers +DPDK_STATIC_LIBS = -lrte_hash -lrte_cmdline -lrte_pci -lrte_bus_pci -lrte_bus_vdev -lrte_mempool_ring -lrte_kni -lrte_ethdev -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring -lrte_kvargs -lrte_pmd_bond -lrte_pmd_virtio -lrte_pmd_enic -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_net -lrte_pmd_e1000 -lrte_pmd_ring -lrte_pmd_af_packet -lrte_pmd_mlx4 -lrte_pmd_mlx5 -lrte_pmd_ena -lrte_pmd_failsafe -lrte_pmd_netvsc -lrte_pmd_vdev_netvsc -lrte_bus_vmbus -lrte_pmd_tap -lrte_gso -lrte_timer -lrte_meter +DPDK_DYN_LIBS = -lm -ldl -lnuma -libverbs -lmlx4 -lmlx5 +DPDK_STATIC = -Wl,-Bstatic -Wl,--whole-archive +DPDK_DYNAMIC = -Wl,--no-whole-archive -Wl,-Bdynamic +DEFAULT_DPDK := $(DPDK_STATIC_LOCATIONS) $(DPDK_STATIC) $(DPDK_STATIC_LIBS) $(DPDK_DYNAMIC) $(DPDK_DYN_LIBS) + diff --git a/mk/linux_gcc.mk b/mk/linux_gcc.mk index 45e09d0eb..524eb8c13 100644 --- a/mk/linux_gcc.mk +++ b/mk/linux_gcc.mk @@ -94,8 +94,8 @@ cwarnings += -Wno-deprecated-declarations endif -MMAKE_CFLAGS += $(MMAKE_CARCH) $(cwarnings) -MMAKE_CXXFLAGS += $(MMAKE_CARCH) $(cxxwarnings) +MMAKE_CFLAGS += $(MMAKE_CARCH) $(cwarnings) -msse3 +MMAKE_CXXFLAGS += $(MMAKE_CARCH) $(cxxwarnings) -msse3 MMAKE_CPPFLAGS := MMAKE_CFLAGS_DLL := -fPIC @@ -195,7 +195,7 @@ endef define MMakeLinkCApp set -x; \ $(CLINK) $(MMAKE_CARCH) $(CFLAGS) -Wl,-E $(MMAKE_DIR_LINKFLAGS) $(filter %.o,$^) \ - $$libs -lm -lpthread -lrt -lresolv -lanl -o $@ + $$libs -lm -lpthread -lrt -lresolv -lanl $(MMAKE_DPDK_LIBS) -o $@ endef diff --git a/mk/platform/gnu_x86_64.mk b/mk/platform/gnu_x86_64.mk index 7f9272b74..6ff17a0d1 100644 --- a/mk/platform/gnu_x86_64.mk +++ b/mk/platform/gnu_x86_64.mk @@ -2,7 +2,7 @@ # X-SPDX-Copyright-Text: (c) Copyright 2004-2020 Xilinx, Inc. GNU := 1 MMAKE_CARCH ?= -mtune=native -MMAKE_CARCH := -m64 $(MMAKE_CTUNE) +MMAKE_CARCH := -march=native -m64 $(MMAKE_CTUNE) MMAKE_RELOCATABLE_LIB := -z combreloc diff --git a/scripts/onload_mkdist b/scripts/onload_mkdist index a0f588eb7..dd75f346e 100755 --- a/scripts/onload_mkdist +++ b/scripts/onload_mkdist @@ -226,12 +226,12 @@ done < <(find . -type d) preprocess_source verstub=$(echo "$version" | sed -e 's/-test.*//' -e 's/-rc.*//') -if [ -f ChangeLog ] && ! grep -q "$verstub" ChangeLog; then - bad "Nothing in the ChangeLog for this version" -fi -if [ -f ReleaseNotes ] && ! grep -q "$verstub" ReleaseNotes; then - bad "Nothing in the ReleaseNotes for this version" -fi +#if [ -f ChangeLog ] && ! grep -q "$verstub" ChangeLog; then +# bad "Nothing in the ChangeLog for this version" +#fi +#if [ -f ReleaseNotes ] && ! grep -q "$verstub" ReleaseNotes; then +# bad "Nothing in the ReleaseNotes for this version" +#fi # Get rid of junk! find . -type f -name '.#*' -exec rm "{}" \; diff --git a/scripts/onload_profiles/swxtch.opf b/scripts/onload_profiles/swxtch.opf new file mode 100644 index 000000000..7331f5e4c --- /dev/null +++ b/scripts/onload_profiles/swxtch.opf @@ -0,0 +1,21 @@ +onload_set EF_SPIN_USEC 10000000000000 +onload_set EF_TCP_CONNECT_SPIN 1 +onload_set EF_TCP_ACCEPT_SPIN 1 +onload_set EF_TCP_RECV_SPIN 1 +onload_set EF_TCP_SEND_SPIN 1 +onload_set EF_UDP_RECV_SPIN 1 +onload_set EF_UDP_SEND_SPIN 1 +onload_set EF_MAX_ENDPOINTS 16384 +onload_set EF_TXQ_SIZE 2048 +onload_set EF_CLUSTER_SIZE 0 +#onload_set EF_MAX_PACKETS 2097152 +#onload_set EF_MAX_TX_PACKETS 1048576 +#onload_set EF_PREALLOC_PACKETS 1 +onload_set EF_USE_HUGE_PAGES 2 +#onload_set EF_UDP_FORCE_REUSEPORT 0 +#onload_set EF_UNIX_LOG 0xFFFFFFFFFFFFFFFFFFFF # this is a crappy bitmask that can be found in opts_citp_def.h and it only seems to kind of work +#onload_set EF_CTPIO 0 +#onload_set EF_PIO 0 +#onload_set EF_EPOLL_MT_SAFE 1 +#onload_set EF_INT_DRIVEN 0 +#onload_set EF_HIGH_THROUGHPUT_MODE 1 diff --git a/scripts/swxtch_init.sh b/scripts/swxtch_init.sh new file mode 100755 index 000000000..5daf9bbf2 --- /dev/null +++ b/scripts/swxtch_init.sh @@ -0,0 +1,4 @@ +#! /bin/bash + +sudo ~/onload/build/x86_64_linux-$(uname -r)/driver/linux/load.sh onload +echo eth1 | sudo tee /sys/module/sfc_resource/afxdp/register diff --git a/src/include/ci/efhw/device.h b/src/include/ci/efhw/device.h index 5b197f2b0..1e19e2c72 100644 --- a/src/include/ci/efhw/device.h +++ b/src/include/ci/efhw/device.h @@ -5,10 +5,11 @@ /* NB: this enum must be aligned with enum ef_vi_arch */ enum efhw_arch { - EFHW_ARCH_EF10 = 1, - EFHW_ARCH_EF100, - EFHW_ARCH_EFCT, - EFHW_ARCH_AF_XDP, + EFHW_ARCH_EF10 = 1, + EFHW_ARCH_EF100, + EFHW_ARCH_EFCT, + EFHW_ARCH_AF_XDP, + EFHW_ARCH_SWXTCH, }; /*---------------------------------------------------------------------------- @@ -18,15 +19,15 @@ enum efhw_arch { *---------------------------------------------------------------------------*/ enum efhw_function { - EFHW_FUNCTION_PF, - EFHW_FUNCTION_VF, + EFHW_FUNCTION_PF, + EFHW_FUNCTION_VF, }; struct efhw_device_type { - int arch; /* enum efhw_arch */ - char variant; /* 'A', 'B', ... */ - int revision; /* 0, 1, ... */ - int function; /* enum efhw_function */ + int arch; /* enum efhw_arch */ + char variant; /* 'A', 'B', ... */ + int revision; /* 0, 1, ... */ + int function; /* enum efhw_function */ }; #endif diff --git a/src/include/ci/internal/ip.h b/src/include/ci/internal/ip.h index eaa24c625..e6f409b28 100644 --- a/src/include/ci/internal/ip.h +++ b/src/include/ci/internal/ip.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr ** \brief Decls & defs for IP library internal to our libraries. @@ -48,9 +48,9 @@ #endif #ifdef __KERNEL__ -# include -# include -# include +#include +#include +#include #endif #include @@ -93,44 +93,40 @@ extern const char* oo_uk_intf_ver; **********************************************************************/ #if ! CI_CFG_DETAILED_CHECKS || defined(NDEBUG) -# define CHECK_NI(ni) -# define CHECK_TS(ni,ts) -# define CHECK_TLS(ni,ts) -# define CHECK_TEP(ep) -# define CHECK_US(ni,ts) -# define CHECK_UEP(ep) -# define CHECK_TIMERS(ni) -# define CHECK_FREEPKTS(ni) -# define CHECK_TEP_NNL(ep) +#define CHECK_NI(ni) +#define CHECK_TS(ni, ts) +#define CHECK_TLS(ni, ts) +#define CHECK_TEP(ep) +#define CHECK_US(ni, ts) +#define CHECK_UEP(ep) +#define CHECK_TIMERS(ni) +#define CHECK_FREEPKTS(ni) +#define CHECK_TEP_NNL(ep) #else -# define CHECK_NI(ni) \ - ci_netif_assert_valid(ni,__FILE__,__LINE__) +#define CHECK_NI(ni) ci_netif_assert_valid(ni, __FILE__, __LINE__) -# define CHECK_TS(ni,ts) \ - ci_tcp_state_assert_valid((ni), (ts), __FILE__,__LINE__) +#define CHECK_TS(ni, ts) \ + ci_tcp_state_assert_valid((ni), (ts), __FILE__, __LINE__) -# define CHECK_TLS(ni, tls) \ - ci_tcp_state_listen_assert_valid((ni), (tls), __FILE__,__LINE__) +#define CHECK_TLS(ni, tls) \ + ci_tcp_state_listen_assert_valid((ni), (tls), __FILE__, __LINE__) -# define CHECK_TEP(ep) \ - ci_tcp_ep_assert_valid(ep, __FILE__, __LINE__) +#define CHECK_TEP(ep) ci_tcp_ep_assert_valid(ep, __FILE__, __LINE__) -# define CHECK_US(ni,ts) \ - ci_udp_state_assert_valid((ni), (ts), __FILE__, __LINE__) +#define CHECK_US(ni, ts) \ + ci_udp_state_assert_valid((ni), (ts), __FILE__, __LINE__) -# define CHECK_UEP(ep) \ - ci_udp_ep_assert_valid(ep, __FILE__, __LINE__) +#define CHECK_UEP(ep) ci_udp_ep_assert_valid(ep, __FILE__, __LINE__) -# define CHECK_TIMERS(ni) \ - ci_ip_timer_state_assert_valid((ni), __FILE__, __LINE__) +#define CHECK_TIMERS(ni) \ + ci_ip_timer_state_assert_valid((ni), __FILE__, __LINE__) -# define CHECK_FREEPKTS(ni) \ - ci_netif_verify_freepkts(ni, __FILE__, __LINE__); +#define CHECK_FREEPKTS(ni) ci_netif_verify_freepkts(ni, __FILE__, __LINE__); /* This is intended to allow some checking to be done without the netif * lock held. At the moment it does nothing. */ -# define CHECK_TEP_NNL(ep) +#define CHECK_TEP_NNL(ep) #endif @@ -140,62 +136,60 @@ extern const char* oo_uk_intf_ver; *********************************************************************/ /* It should be equal to EFX_MAX_MTU from driver/linux_net/net_driver.h */ -#define CI_PMTU_MAX_MTU (9 * 1024) +#define CI_PMTU_MAX_MTU (9 * 1024) -#define LOG_PMTU(x) LOG_IPP(x) +#define LOG_PMTU(x) LOG_IPP(x) /* Path MTU plateau table entries (stored in ci_tcp_state) */ -#define CI_PMTU_PLATEAU_ENTRIES \ - { 68, 296, 508, 1006, 1492, 2002, 4352, 8166, 32000, 65535 } -#define CI_PMTU_PLATEAU_ENTRY_MAX 9 +#define CI_PMTU_PLATEAU_ENTRIES \ + { \ + 68, 296, 508, 1006, 1492, 2002, 4352, 8166, 32000, 65535 \ + } +#define CI_PMTU_PLATEAU_ENTRY_MAX 9 -extern void -ci_pmtu_state_init(ci_netif* ni, ci_sock_cmn *s, oo_p pmtu_sp, - ci_pmtu_state_t* pmtus, int func_code); -extern void ci_pmtu_set(ci_netif *ni, ci_pmtu_state_t *pmtus, unsigned pmtu); +extern void ci_pmtu_state_init(ci_netif* ni, ci_sock_cmn* s, oo_p pmtu_sp, + ci_pmtu_state_t* pmtus, int func_code); +extern void ci_pmtu_set(ci_netif* ni, ci_pmtu_state_t* pmtus, unsigned pmtu); /*! IP timer callback for Path MTU discovery process */ extern void ci_pmtu_timeout_pmtu(ci_netif* ni, ci_pmtu_state_t* pmtu) CI_HF; -extern void ci_pmtu_update_fast(ci_netif *ni, ci_pmtu_state_t *pmtus, - ci_ip_cached_hdrs *ipcache, - unsigned mtu) CI_HF; -extern void ci_pmtu_update_slow(ci_netif *ni, ci_pmtu_state_t *pmtus, - ci_ip_cached_hdrs *ipcache, - unsigned mtu) CI_HF; +extern void ci_pmtu_update_fast(ci_netif* ni, ci_pmtu_state_t* pmtus, + ci_ip_cached_hdrs* ipcache, unsigned mtu) CI_HF; +extern void ci_pmtu_update_slow(ci_netif* ni, ci_pmtu_state_t* pmtus, + ci_ip_cached_hdrs* ipcache, unsigned mtu) CI_HF; -#define CI_PMTU_STOP_TIMER ((ci_iptime_t)0) -#define CI_PMTU_IMMEDIATE_TIMEOUT ((ci_iptime_t)1) +#define CI_PMTU_STOP_TIMER ((ci_iptime_t) 0) +#define CI_PMTU_IMMEDIATE_TIMEOUT ((ci_iptime_t) 1) -#define CI_PMTU_TIMER_SET_FAST(ni, p) \ +#define CI_PMTU_TIMER_SET_FAST(ni, p) \ ci_pmtu_discover_timer((ni), (p), NI_CONF(ni).tconst_pmtu_discover_fast) -#define CI_PMTU_TIMER_SET_SLOW(ni, p) \ +#define CI_PMTU_TIMER_SET_SLOW(ni, p) \ ci_pmtu_discover_timer((ni), (p), NI_CONF(ni).tconst_pmtu_discover_slow) -#define CI_PMTU_TIMER_SET_RECOVER(ni, p) \ +#define CI_PMTU_TIMER_SET_RECOVER(ni, p) \ ci_pmtu_discover_timer((ni), (p), NI_CONF(ni).tconst_pmtu_discover_recover) -#define CI_PMTU_TIMER_KILL(ni, p) \ - ci_pmtu_discover_timer( (ni), (p), CI_PMTU_STOP_TIMER ) -#define CI_PMTU_TIMER_NOW(ni, p) \ - ci_pmtu_discover_timer( (ni), (p), CI_PMTU_IMMEDIATE_TIMEOUT ) - +#define CI_PMTU_TIMER_KILL(ni, p) \ + ci_pmtu_discover_timer((ni), (p), CI_PMTU_STOP_TIMER) +#define CI_PMTU_TIMER_NOW(ni, p) \ + ci_pmtu_discover_timer((ni), (p), CI_PMTU_IMMEDIATE_TIMEOUT) /*! Initializes an IP cache * (to use this macro include ) */ -#define ci_ip_cache_init_common(ipcache, af) \ -do { \ - ci_ip_cache_invalidate(ipcache); \ - (ipcache)->status = retrrc_noroute; \ - (ipcache)->intf_i = -1; \ - (ipcache)->hwport = CI_HWPORT_ID_BAD; \ - (ipcache)->ether_type = ci_af2ethertype(af); \ - (ipcache)->flags = 0; \ - (ipcache)->nexthop = addr_any; \ - ipcache_ttl(ipcache) = CI_IPX_DFLT_TTL_HOPLIMIT(af); \ -} while (0) +#define ci_ip_cache_init_common(ipcache, af) \ + do { \ + ci_ip_cache_invalidate(ipcache); \ + (ipcache)->status = retrrc_noroute; \ + (ipcache)->intf_i = -1; \ + (ipcache)->hwport = CI_HWPORT_ID_BAD; \ + (ipcache)->ether_type = ci_af2ethertype(af); \ + (ipcache)->flags = 0; \ + (ipcache)->nexthop = addr_any; \ + ipcache_ttl(ipcache) = CI_IPX_DFLT_TTL_HOPLIMIT(af); \ + } while( 0 ) #define ci_ip_cache_init(ipcache, af) ci_ip_cache_init_common(ipcache, af) @@ -221,8 +215,7 @@ static inline cp_fwd_table_id ci_ni_fwd_table_id(ci_netif* ni) /*! Invalidates a ci_ip_cached_hdrs struct i.e. all state becomes out-of-date. */ -ci_inline void -ci_ip_cache_invalidate(ci_ip_cached_hdrs* ipcache) +ci_inline void ci_ip_cache_invalidate(ci_ip_cached_hdrs* ipcache) { oo_cp_verinfo_init(&ipcache->fwd_ver); oo_cp_verinfo_init(&ipcache->fwd_ver_init_net); @@ -230,16 +223,15 @@ ci_ip_cache_invalidate(ci_ip_cached_hdrs* ipcache) } -static inline int -oo_cp_ipcache_is_valid(ci_netif* ni, ci_ip_cached_hdrs* ipcache) +static inline int oo_cp_ipcache_is_valid( + ci_netif* ni, ci_ip_cached_hdrs* ipcache) { - int rc = oo_cp_verinfo_is_valid(ni->cplane, &ipcache->fwd_ver, - ci_ni_fwd_table_id(ni)); + int rc = oo_cp_verinfo_is_valid( + ni->cplane, &ipcache->fwd_ver, ci_ni_fwd_table_id(ni)); if( rc && ipcache->fwd_ver_init_net.id != CICP_MAC_ROWID_UNUSED ) { rc = ni->cplane_init_net != NULL && oo_cp_verinfo_is_valid(ni->cplane_init_net, - &ipcache->fwd_ver_init_net, - ci_ni_fwd_table_id(ni)); + &ipcache->fwd_ver_init_net, ci_ni_fwd_table_id(ni)); } return rc; } @@ -253,78 +245,77 @@ oo_cp_ipcache_is_valid(ci_netif* ni, ci_ip_cached_hdrs* ipcache) #define TX_PKT_LEN(pkt) (pkt)->pay_len /* Offset of current buffer position from start of TCP payload. */ -#define PKT_RX_BUF_OFF(pkt) \ - ((ci_uint32)(oo_offbuf_ptr(&(pkt)->buf) - CI_TCP_PAYLOAD(PKT_TCP_HDR(pkt)))) +#define PKT_RX_BUF_OFF(pkt) \ + ((ci_uint32) (oo_offbuf_ptr(&(pkt)->buf) - CI_TCP_PAYLOAD(PKT_TCP_HDR(pkt)))) -#define PKT_IPX_RX_BUF_OFF(af, pkt) \ - ((ci_uint32)(oo_offbuf_ptr(&(pkt)->buf) - \ - CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(af, pkt)))) +#define PKT_IPX_RX_BUF_OFF(af, pkt) \ + ((ci_uint32) (oo_offbuf_ptr(&(pkt)->buf) - \ + CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(af, pkt)))) /* Sequence number at the current buffer position. */ -#define PKT_RX_BUF_SEQ(pkt) \ +#define PKT_RX_BUF_SEQ(pkt) \ (CI_BSWAP_BE32(PKT_TCP_HDR(pkt)->tcp_seq_be32) + PKT_RX_BUF_OFF(pkt)) -#define PKT_IPX_RX_BUF_SEQ(af, pkt) \ +#define PKT_IPX_RX_BUF_SEQ(af, pkt) \ (CI_BSWAP_BE32(PKT_IPX_TCP_HDR(af, pkt)->tcp_seq_be32) + \ - PKT_IPX_RX_BUF_OFF(af, pkt)) + PKT_IPX_RX_BUF_OFF(af, pkt)) -#define PKT_TCP_RX_BUF_ASSERT_VALID(ni, pkt) \ - OO_OFFBUF_ASSERT_VALID(&(pkt)->buf, PKT_START(pkt), \ - (pkt) + CI_CFG_PKT_BUF_SIZE) +#define PKT_TCP_RX_BUF_ASSERT_VALID(ni, pkt) \ + OO_OFFBUF_ASSERT_VALID( \ + &(pkt)->buf, PKT_START(pkt), (pkt) + CI_CFG_PKT_BUF_SIZE) -#define PKT_START(pkt) ((char*) oo_ether_hdr(pkt)) +#define PKT_START(pkt) ((char*) oo_ether_hdr(pkt)) -#define PKT_TCP_HDR(pkt) ((ci_tcp_hdr*) oo_ip_data(pkt)) +#define PKT_TCP_HDR(pkt) ((ci_tcp_hdr*) oo_ip_data(pkt)) static inline ci_tcp_hdr* ci_pkt_ipx_tcp_hdr(int af, ci_ip_pkt_fmt* pkt) - { return oo_ipx_data(af, pkt); } +{ + return oo_ipx_data(af, pkt); +} #define PKT_IPX_TCP_HDR(af, pkt) ci_pkt_ipx_tcp_hdr(af, pkt) /*! Find the amount of data in an outgoing packet */ -#define PKT_TCP_TX_SEQ_SPACE(pkt) \ - (SEQ_SUB((pkt)->pf.tcp_tx.end_seq, (pkt)->pf.tcp_tx.start_seq)) +#define PKT_TCP_TX_SEQ_SPACE(pkt) \ + (SEQ_SUB((pkt)->pf.tcp_tx.end_seq, (pkt)->pf.tcp_tx.start_seq)) -#define TX_PKT_TCP(pkt) ((ci_tcp_hdr*) oo_tx_ipx_data(oo_pkt_af(pkt), pkt)) -#define TX_PKT_UDP(pkt) ((ci_udp_hdr*) oo_tx_ipx_data(oo_pkt_af(pkt), pkt)) -#define TX_PKT_SPORT_BE16(pkt) (((ci_uint16*) oo_tx_ip_data(pkt))[0]) -#define TX_PKT_DPORT_BE16(pkt) (((ci_uint16*) oo_tx_ip_data(pkt))[1]) +#define TX_PKT_TCP(pkt) ((ci_tcp_hdr*) oo_tx_ipx_data(oo_pkt_af(pkt), pkt)) +#define TX_PKT_UDP(pkt) ((ci_udp_hdr*) oo_tx_ipx_data(oo_pkt_af(pkt), pkt)) +#define TX_PKT_SPORT_BE16(pkt) (((ci_uint16*) oo_tx_ip_data(pkt))[0]) +#define TX_PKT_DPORT_BE16(pkt) (((ci_uint16*) oo_tx_ip_data(pkt))[1]) #define TX_PKT_IPX_SPORT(af, pkt) (((ci_uint16*) oo_tx_ipx_data(af, pkt))[0]) #define TX_PKT_IPX_DPORT(af, pkt) (((ci_uint16*) oo_tx_ipx_data(af, pkt))[1]) -#define TX_PKT_IPX_HDR(af, pkt) ((ci_ipx_hdr_t*) (oo_tx_ipx_hdr(af, pkt))) +#define TX_PKT_IPX_HDR(af, pkt) ((ci_ipx_hdr_t*) (oo_tx_ipx_hdr(af, pkt))) -#define TX_PKT_PROTOCOL(af, pkt) ipx_hdr_protocol(af, TX_PKT_IPX_HDR(af, pkt)) -#define TX_PKT_TTL(af, pkt) ipx_hdr_ttl(af, TX_PKT_IPX_HDR(af, pkt)) -#define TX_PKT_SADDR(af, pkt) ipx_hdr_saddr(af, TX_PKT_IPX_HDR(af, pkt)) -#define TX_PKT_DADDR(af, pkt) ipx_hdr_daddr(af, TX_PKT_IPX_HDR(af, pkt)) +#define TX_PKT_PROTOCOL(af, pkt) ipx_hdr_protocol(af, TX_PKT_IPX_HDR(af, pkt)) +#define TX_PKT_TTL(af, pkt) ipx_hdr_ttl(af, TX_PKT_IPX_HDR(af, pkt)) +#define TX_PKT_SADDR(af, pkt) ipx_hdr_saddr(af, TX_PKT_IPX_HDR(af, pkt)) +#define TX_PKT_DADDR(af, pkt) ipx_hdr_daddr(af, TX_PKT_IPX_HDR(af, pkt)) #define TX_PKT_SET_SADDR(af, pkt, addr) \ - ipx_hdr_set_saddr(af, TX_PKT_IPX_HDR(af, pkt), (addr)) + ipx_hdr_set_saddr(af, TX_PKT_IPX_HDR(af, pkt), (addr)) #define TX_PKT_SET_DADDR(af, pkt, addr) \ - ipx_hdr_set_daddr(af, TX_PKT_IPX_HDR(af, pkt), (addr)) + ipx_hdr_set_daddr(af, TX_PKT_IPX_HDR(af, pkt), (addr)) #define TX_PKT_SET_FLOWLABEL(af, pkt, flowlabel) \ - ipx_hdr_set_flowlabel(af, TX_PKT_IPX_HDR(af, pkt), (flowlabel)) + ipx_hdr_set_flowlabel(af, TX_PKT_IPX_HDR(af, pkt), (flowlabel)) #define RX_PKT_IPX_HDR(pkt) oo_ipx_hdr(pkt) #define RX_PKT_PROTOCOL(pkt) \ ipx_hdr_protocol(oo_pkt_af(pkt), RX_PKT_IPX_HDR(pkt)) -#define RX_PKT_TTL(pkt) \ - ipx_hdr_ttl(oo_pkt_af(pkt), RX_PKT_IPX_HDR(pkt)) -#define RX_PKT_SADDR(pkt) \ - ipx_hdr_saddr(oo_pkt_af(pkt), RX_PKT_IPX_HDR(pkt)) -#define RX_PKT_DADDR(pkt) \ - ipx_hdr_daddr(oo_pkt_af(pkt), RX_PKT_IPX_HDR(pkt)) +#define RX_PKT_TTL(pkt) ipx_hdr_ttl(oo_pkt_af(pkt), RX_PKT_IPX_HDR(pkt)) +#define RX_PKT_SADDR(pkt) ipx_hdr_saddr(oo_pkt_af(pkt), RX_PKT_IPX_HDR(pkt)) +#define RX_PKT_DADDR(pkt) ipx_hdr_daddr(oo_pkt_af(pkt), RX_PKT_IPX_HDR(pkt)) #define RX_PKT_PAYLOAD_LEN(pkt) \ ipx_hdr_tot_len(oo_pkt_af(pkt), RX_PKT_IPX_HDR(pkt)) -static inline ci_udp_hdr* ci_tx_pkt_ipx_udp(int af, ci_ip_pkt_fmt* pkt, - bool is_frag) +static inline ci_udp_hdr* ci_tx_pkt_ipx_udp( + int af, ci_ip_pkt_fmt* pkt, bool is_frag) { if( IS_AF_INET6(af) && is_frag ) - return (ci_udp_hdr*)((uint8_t*)oo_tx_ipx_data(af, pkt) + - sizeof(ci_ip6_frag_hdr)); + return (ci_udp_hdr*) ((uint8_t*) oo_tx_ipx_data(af, pkt) + + sizeof(ci_ip6_frag_hdr)); else return oo_tx_ipx_data(af, pkt); } @@ -335,29 +326,28 @@ static inline ci_tcp_hdr* ci_tx_pkt_ipx_tcp(int af, ci_ip_pkt_fmt* pkt) } #define TX_PKT_IPX_UDP(af, pkt, is_frag) ci_tx_pkt_ipx_udp(af, pkt, is_frag) -#define TX_PKT_IPX_TCP(af, pkt) ci_tx_pkt_ipx_tcp(af, pkt) +#define TX_PKT_IPX_TCP(af, pkt) ci_tx_pkt_ipx_tcp(af, pkt) static inline void* ci_ipx_data_ptr(int af, ci_ipx_hdr_t* hdr) { #if CI_CFG_IPV6 if( af == AF_INET6 ) { return &hdr->ip6 + 1; - } - else + } else #endif { - return (uint8_t*)&hdr->ip4 + CI_IP4_IHL(&hdr->ip4); + return (uint8_t*) &hdr->ip4 + CI_IP4_IHL(&hdr->ip4); } } -static inline ci_uint16 ci_tx_pkt_ipx_tcp_payload_len(int af, ci_ip_pkt_fmt* pkt) +static inline ci_uint16 ci_tx_pkt_ipx_tcp_payload_len( + int af, ci_ip_pkt_fmt* pkt) { ci_uint16 len; #if CI_CFG_IPV6 if( af == AF_INET6 ) { len = oo_tx_l3_len(pkt) - sizeof(ci_ip6_hdr); - } - else + } else #endif { ci_ip4_hdr* ip = oo_tx_ip_hdr(pkt); @@ -371,7 +361,7 @@ static inline ci_uint16 ci_tx_pkt_ipx_tcp_payload_len(int af, ci_ip_pkt_fmt* pkt /*! Get tsval from timestamp option. This had better be a TCP packet with ** a timestamp option! (Horribly inefficient; only use for logging). */ -#define PKT_TCP_TSO_TSVAL(pkt) \ +#define PKT_TCP_TSO_TSVAL(pkt) \ CI_BSWAP_BE32(*(ci_uint32*) (CI_TCP_HDR_OPTS(PKT_TCP_HDR(pkt)) + 4)) #define PKT_IPX_TCP_TSO_TSVAL(af, pkt) \ @@ -380,10 +370,12 @@ static inline ci_uint16 ci_tx_pkt_ipx_tcp_payload_len(int af, ci_ip_pkt_fmt* pkt /* TODO: replace PKT_UDP_HDR and PKT_IOVEC_UDP_PFX by TX-specific and * generic function; see oo_tx_ip_hdr() performance notes. */ -#define PKT_UDP_HDR(pkt) ((ci_udp_hdr*)oo_ip_data(pkt)) +#define PKT_UDP_HDR(pkt) ((ci_udp_hdr*) oo_ip_data(pkt)) static inline ci_udp_hdr* ci_pkt_ipx_udp_hdr(int af, ci_ip_pkt_fmt* pkt) - { return oo_ipx_data(af, pkt); } +{ + return oo_ipx_data(af, pkt); +} #define PKT_IPX_UDP_HDR(af, pkt) ci_pkt_ipx_udp_hdr(af, pkt) @@ -394,58 +386,57 @@ static inline ci_udp_hdr* ci_pkt_ipx_udp_hdr(int af, ci_ip_pkt_fmt* pkt) typedef struct { struct ci_netif_poll_state* poll_state; - ci_netif* ni; + ci_netif* ni; ci_ip_pkt_fmt* pkt; - ci_tcp_hdr* tcp; + ci_tcp_hdr* tcp; /* [flags] can take any of the following values, or any of the tcp ** options flags (e.g. CI_TCPT_FLAG_*). */ -#define CI_TCP_PAWS_FAILED 0x80000000 -#define CI_TCP_SACKED 0x20000000 /* Something is newly SACKed */ -#define CI_TCP_DSACK 0x10000000 /* First SACK block is duplicate */ - - ci_uint32 flags; - ci_uint32 timestamp; /* pointer to timeval, host endian */ - ci_uint32 timestamp_echo; /* pointer to timeval, host endian */ - ci_uint32 sack[8]; /* pointer to first block, host endian */ - ci_int32 sack_blocks; - ci_uint32 ack,seq; /* ACK and SEQ values in host endian */ - ci_uint32 hash; /* hash for l/r addr/port */ +#define CI_TCP_PAWS_FAILED 0x80000000 +#define CI_TCP_SACKED 0x20000000 /* Something is newly SACKed */ +#define CI_TCP_DSACK 0x10000000 /* First SACK block is duplicate */ + + ci_uint32 flags; + ci_uint32 timestamp; /* pointer to timeval, host endian */ + ci_uint32 timestamp_echo; /* pointer to timeval, host endian */ + ci_uint32 sack[8]; /* pointer to first block, host endian */ + ci_int32 sack_blocks; + ci_uint32 ack, seq; /* ACK and SEQ values in host endian */ + ci_uint32 hash; /* hash for l/r addr/port */ } ciip_tcp_rx_pkt; - /********************************************************************** ************************** Network interface ************************** **********************************************************************/ /* The following are used in netic_init.c to decode EF_UDP_OPTIONS */ -#define CI_EF_UDP_UL_RECV_M 0x00000007 -#define CI_EF_UDP_UL_RECV_S 0 +#define CI_EF_UDP_UL_RECV_M 0x00000007 +#define CI_EF_UDP_UL_RECV_S 0 -#define CI_EF_UDP_RECV_FAST_M 0x00000008 -#define CI_EF_UDP_RECV_FAST_S 3 +#define CI_EF_UDP_RECV_FAST_M 0x00000008 +#define CI_EF_UDP_RECV_FAST_S 3 -#define CI_EF_UDP_UL_POLL_M 0x00000070 -#define CI_EF_UDP_UL_POLL_S 4 +#define CI_EF_UDP_UL_POLL_M 0x00000070 +#define CI_EF_UDP_UL_POLL_S 4 -#define NI_ID(ni) ((ni)->state->stack_id) -#define NI_CONF(ni) ((ni)->state->conf) +#define NI_ID(ni) ((ni)->state->stack_id) +#define NI_CONF(ni) ((ni)->state->conf) #ifdef __KERNEL__ -# define NI_OPTS(ni) ((ni)->opts) +#define NI_OPTS(ni) ((ni)->opts) #else -# define NI_OPTS(ni) ((ni)->state->opts) +#define NI_OPTS(ni) ((ni)->state->opts) #endif -#define NI_IPID(ni) (&(ni)->state->ipid) +#define NI_IPID(ni) (&(ni)->state->ipid) #ifdef __KERNEL__ -# define NI_PKT_SET(ni) \ - ( (ni)->packets->id < 0 ? 0 : \ - (ni)->packets->id >= (ni)->pkt_sets_n ? \ - (ni)->pkt_sets_n - 1 : (ni)->packets->id ) +#define NI_PKT_SET(ni) \ + ((ni)->packets->id < 0 ? 0 \ + : (ni)->packets->id >= (ni)->pkt_sets_n ? (ni)->pkt_sets_n - 1 \ + : (ni)->packets->id) #else -# define NI_PKT_SET(ni) ((ni)->packets->id) +#define NI_PKT_SET(ni) ((ni)->packets->id) #endif @@ -453,51 +444,52 @@ extern void ci_netif_config_opts_rangecheck(ci_netif_config_opts* opts) CI_HF; extern void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) CI_HF; extern void ci_netif_config_opts_defaults(ci_netif_config_opts* opts) CI_HF; #ifdef __KERNEL__ -extern void ci_netif_state_init(ci_netif* ni, int cpu_khz, - const char* name) CI_HF; -extern int ci_netif_ctor(ci_netif**, const ci_netif_config_opts*, - unsigned flags) CI_HF; +extern void ci_netif_state_init( + ci_netif* ni, int cpu_khz, const char* name) CI_HF; +extern int ci_netif_ctor( + ci_netif**, const ci_netif_config_opts*, unsigned flags) CI_HF; #else -extern int ci_netif_ctor(ci_netif*, ef_driver_handle, const char* name, - unsigned flags) CI_HF; +extern int ci_netif_ctor( + ci_netif*, ef_driver_handle, const char* name, unsigned flags) CI_HF; extern void ci_netif_cluster_prefault(ci_netif* ni) CI_HF; #endif -extern int ci_netif_restore_id(ci_netif*, unsigned stack_id, bool is_service) CI_HF; -extern int citp_netif_by_id(ci_uint32 stack_id, ci_netif** out_ni, int locked) CI_HF; -extern int ci_netif_restore_name(ci_netif*, const char*) CI_HF; -extern int ci_netif_restore(ci_netif* ni, ci_fd_t fd, - unsigned netif_mmap_bytes) CI_HF; -extern int ci_netif_dtor(ci_netif*) CI_HF; +extern int ci_netif_restore_id( + ci_netif*, unsigned stack_id, bool is_service) CI_HF; +extern int citp_netif_by_id( + ci_uint32 stack_id, ci_netif** out_ni, int locked) CI_HF; +extern int ci_netif_restore_name(ci_netif*, const char*) CI_HF; +extern int ci_netif_restore( + ci_netif* ni, ci_fd_t fd, unsigned netif_mmap_bytes) CI_HF; +extern int ci_netif_dtor(ci_netif*) CI_HF; extern unsigned ci_netif_build_future_intf_mask(ci_netif* ni) CI_HF; -extern void ci_netif_error_detected(ci_netif*, unsigned error_flag, - const char* caller) CI_HF; +extern void ci_netif_error_detected( + ci_netif*, unsigned error_flag, const char* caller) CI_HF; #if OO_DO_STACK_POLL #ifndef __KERNEL__ -extern int ci_netif_poll_intf_future(ci_netif*, int intf_i, ci_uint64 now_frc) - CI_HF; +extern int ci_netif_poll_intf_future( + ci_netif*, int intf_i, ci_uint64 now_frc) CI_HF; #endif -extern int ci_netif_poll_n(ci_netif*, int max_evs) CI_HF; -#define ci_netif_poll(ni) ci_netif_poll_n((ni), NI_OPTS(ni).evs_per_poll) +extern int ci_netif_poll_n(ci_netif*, int max_evs) CI_HF; +#define ci_netif_poll(ni) ci_netif_poll_n((ni), NI_OPTS(ni).evs_per_poll) extern void ci_netif_loopback_pkts_send(ci_netif* ni) CI_HF; #if CI_CFG_WANT_BPF_NATIVE #ifdef __KERNEL__ /* in-kernel backend for ci_netif_evq_poll_k */ -extern int ci_netif_evq_poll(ci_netif*, int intf); +extern int ci_netif_evq_poll(ci_netif*, int intf); #else /* makes syscall to invoke ci_netif_evq_poll */ -extern int ci_netif_evq_poll_k(ci_netif* ni, int intf_i); +extern int ci_netif_evq_poll_k(ci_netif* ni, int intf_i); #endif #endif -extern void ci_netif_tx_pkt_complete(ci_netif*, struct ci_netif_poll_state*, - ci_ip_pkt_fmt*); +extern void ci_netif_tx_pkt_complete( + ci_netif*, struct ci_netif_poll_state*, ci_ip_pkt_fmt*); /* Fake TX complete function called when a packet was deferred because of * no destination MAC, and dropped as a response to various error. */ -ci_inline void -cicp_pkt_complete_fake(ci_netif* ni, ci_ip_pkt_fmt* pkt) +ci_inline void cicp_pkt_complete_fake(ci_netif* ni, ci_ip_pkt_fmt* pkt) { ni->state->nic[pkt->intf_i].tx_bytes_removed -= TX_PKT_LEN(pkt); ci_netif_tx_pkt_complete(ni, NULL, pkt); @@ -513,68 +505,63 @@ ci_inline void ci_netif_send(ci_netif* ni, ci_ip_pkt_fmt* pkt) __ci_netif_send(ni, pkt); } extern bool ci_netif_send_immediate(ci_netif* netif, ci_ip_pkt_fmt* pkt, - const struct ef_vi_tx_extra* extra) CI_HF; + const struct ef_vi_tx_extra* extra) CI_HF; extern int ci_netif_rx_post(ci_netif* netif, int nic_index, ef_vi* vi) CI_HF; #ifdef __KERNEL__ -extern int ci_netif_set_rxq_limit(ci_netif*) CI_HF; -extern int ci_netif_init_fill_rx_rings(ci_netif*) CI_HF; +extern int ci_netif_set_rxq_limit(ci_netif*) CI_HF; +extern int ci_netif_init_fill_rx_rings(ci_netif*) CI_HF; #endif extern ci_uint64 ci_netif_purge_deferred_socket_list(ci_netif* ni) CI_HF; extern void ci_netif_merge_atomic_counters(ci_netif* ni) CI_HF; extern void ci_netif_mem_pressure_pkt_pool_fill(ci_netif*) CI_HF; -extern int ci_netif_mem_pressure_try_exit(ci_netif*) CI_HF; +extern int ci_netif_mem_pressure_try_exit(ci_netif*) CI_HF; extern void ci_netif_timeout_remove(ci_netif* ni, ci_tcp_state* ts) CI_HF; extern void ci_netif_timeout_leave(ci_netif* ni, ci_tcp_state* ts) CI_HF; -extern void ci_netif_timeout_reap(ci_netif* ni) CI_HF; +extern void ci_netif_timeout_reap(ci_netif* ni) CI_HF; extern void ci_netif_timeout_state(ci_netif* ni) CI_HF; -extern void ci_netif_timeout_restart(ci_netif *ni, ci_tcp_state *ts) CI_HF; +extern void ci_netif_timeout_restart(ci_netif* ni, ci_tcp_state* ts) CI_HF; extern void ci_netif_timewait_enter(ci_netif* ni, ci_tcp_state* ts) CI_HF; -extern int ci_netif_timewait_try_to_free_filter(ci_netif* ni) CI_HF; +extern int ci_netif_timewait_try_to_free_filter(ci_netif* ni) CI_HF; extern void ci_netif_fin_timeout_enter(ci_netif* ni, ci_tcp_state* ts) CI_HF; extern void ci_netif_dump(ci_netif* ni) CI_HF; extern void ci_vi_info_dump(ci_netif* ni) CI_HF; -extern void ci_netif_dump_to_logger(ci_netif* ni, oo_dump_log_fn_t logger, - void* log_arg) CI_HF; +extern void ci_netif_dump_to_logger( + ci_netif* ni, oo_dump_log_fn_t logger, void* log_arg) CI_HF; extern void ci_netif_dump_vi_stats(ci_netif* ni) CI_HF; -extern void ci_netif_dump_vi_stats_to_logger(ci_netif* ni, - oo_dump_log_fn_t logger, - void* log_arg) CI_HF; +extern void ci_netif_dump_vi_stats_to_logger( + ci_netif* ni, oo_dump_log_fn_t logger, void* log_arg) CI_HF; extern void ci_netif_dump_extra(ci_netif* ni) CI_HF; -extern void ci_netif_dump_extra_to_logger(ci_netif* ni, - oo_dump_log_fn_t logger, - void *log_arg) CI_HF; +extern void ci_netif_dump_extra_to_logger( + ci_netif* ni, oo_dump_log_fn_t logger, void* log_arg) CI_HF; extern void ci_netif_dump_sockets(ci_netif* ni) CI_HF; -extern void ci_netif_dump_sockets_to_logger(ci_netif* ni, - oo_dump_log_fn_t logger, - void *log_arg) CI_HF; -extern void ci_netif_netstat_sockets_to_logger(ci_netif* ni, - oo_dump_log_fn_t logger, - void *log_arg) CI_HF; +extern void ci_netif_dump_sockets_to_logger( + ci_netif* ni, oo_dump_log_fn_t logger, void* log_arg) CI_HF; +extern void ci_netif_netstat_sockets_to_logger( + ci_netif* ni, oo_dump_log_fn_t logger, void* log_arg) CI_HF; extern void ci_netif_print_sockets(ci_netif* ni) CI_HF; extern void ci_netif_dump_dmaq(ci_netif* ni, int dump) CI_HF; extern void ci_netif_dump_timeoutq(ci_netif* ni) CI_HF; extern void ci_netif_dump_reap_list(ci_netif* ni, int verbose) CI_HF; -extern void ci_netif_config_opts_dump(ci_netif_config_opts* opts, - oo_dump_log_fn_t logger, - void* log_arg) CI_HF; -extern void ci_stack_time_dump(ci_netif* ni, oo_dump_log_fn_t logger, - void* log_arg) CI_HF; +extern void ci_netif_config_opts_dump( + ci_netif_config_opts* opts, oo_dump_log_fn_t logger, void* log_arg) CI_HF; +extern void ci_stack_time_dump( + ci_netif* ni, oo_dump_log_fn_t logger, void* log_arg) CI_HF; extern void ci_netif_pkt_dump_all(ci_netif* ni) CI_HF; -extern void ci_netif_pkt_queue_dump(ci_netif* ni, ci_ip_pkt_queue* q, - int is_recv, int dump) CI_HF; -extern void ci_netif_pkt_list_dump(ci_netif* ni, oo_pkt_p head, - int is_recv, int dump) CI_HF; -extern void ci_netif_pkt_dump(ci_netif* ni, ci_ip_pkt_fmt*, int is_recv, - int dump) CI_HF; -extern int ci_netif_bad_hwport(ci_netif*, ci_hwport_id_t) CI_HF; +extern void ci_netif_pkt_queue_dump( + ci_netif* ni, ci_ip_pkt_queue* q, int is_recv, int dump) CI_HF; +extern void ci_netif_pkt_list_dump( + ci_netif* ni, oo_pkt_p head, int is_recv, int dump) CI_HF; +extern void ci_netif_pkt_dump( + ci_netif* ni, ci_ip_pkt_fmt*, int is_recv, int dump) CI_HF; +extern int ci_netif_bad_hwport(ci_netif*, ci_hwport_id_t) CI_HF; extern void ci_tcp_rx_checks(ci_netif*, ci_tcp_state*, ci_ip_pkt_fmt*) CI_HF; -extern void ci_tcp_listen_rx_checks(ci_netif*, ci_tcp_socket_listen*, - ci_ip_pkt_fmt*) CI_HF; +extern void ci_tcp_listen_rx_checks( + ci_netif*, ci_tcp_socket_listen*, ci_ip_pkt_fmt*) CI_HF; -extern int ci_netif_force_wake(ci_netif* ni, int everyone) CI_HF; +extern int ci_netif_force_wake(ci_netif* ni, int everyone) CI_HF; #if CI_CFG_EPOLL3 #ifndef __KERNEL__ @@ -585,22 +572,27 @@ extern void ci_netif_free_ready_lists(ci_netif* ni); #endif CI_DEBUG(extern void ci_netif_assert_valid(ci_netif*, const char*, int);) -CI_DEBUG(extern void ci_netif_verify_freepkts(ci_netif *, const char *, int);) +CI_DEBUG(extern void ci_netif_verify_freepkts(ci_netif*, const char*, int);) -#define ASSERT_VALID_NETIF_ADDR(ni, addr, size) do{ \ - ci_assert(ci_to_int(addr) >= 0); \ - ci_assert((addr) < (ni)->state->netif_mmap_bytes); \ - ci_assert((addr) + (size) <= (ni)->state->netif_mmap_bytes); \ - }while(0) +#define ASSERT_VALID_NETIF_ADDR(ni, addr, size) \ + do { \ + ci_assert(ci_to_int(addr) >= 0); \ + ci_assert((addr) < (ni)->state->netif_mmap_bytes); \ + ci_assert((addr) + (size) <= (ni)->state->netif_mmap_bytes); \ + } while( 0 ) ci_inline int ci_netif_num_vis(ci_netif* ni) { #if CI_CFG_TCP_OFFLOAD_RECYCLER || CI_CFG_TX_CRC_OFFLOAD switch( NI_OPTS(ni).tcp_offload_plugin ) { - case CITP_TCP_OFFLOAD_OFF: return 1; - case CITP_TCP_OFFLOAD_NVME: return 1; - case CITP_TCP_OFFLOAD_RAW_TCP: return 2; - default: return 2 + CI_CFG_TCP_PLUGIN_EXTRA_VIS; + case CITP_TCP_OFFLOAD_OFF: + return 1; + case CITP_TCP_OFFLOAD_NVME: + return 1; + case CITP_TCP_OFFLOAD_RAW_TCP: + return 2; + default: + return 2 + CI_CFG_TCP_PLUGIN_EXTRA_VIS; } #endif return 1; @@ -611,7 +603,7 @@ ci_inline int ci_netif_num_vis(ci_netif* ni) *********************************************************************/ /* Assert packet is empty - but may contain payload */ -#ifndef NDEBUG +#ifndef NDEBUG #define ASSERT_PKT_PAYLOAD_EMPTY(p) ci_assert_equal(p->pay_len, 0); #else #define ASSERT_PKT_PAYLOAD_EMPTY(p) @@ -619,26 +611,26 @@ ci_inline int ci_netif_num_vis(ci_netif* ni) /* True if [id] is in legal range. */ -#define IS_VALID_PKT_ID(ni, pp) OO_PP_EQ((pp), VALID_PKT_ID((ni), (pp))) +#define IS_VALID_PKT_ID(ni, pp) OO_PP_EQ((pp), VALID_PKT_ID((ni), (pp))) /* Validate packet id. */ -#define ASSERT_VALID_PKT_ID(ni, id) \ +#define ASSERT_VALID_PKT_ID(ni, id) \ ci_ss_assert((ni), IS_VALID_PKT_ID((ni), (id))) -#define __ASSERT_VALID_PKT_ID(ni, id, file, line) \ +#define __ASSERT_VALID_PKT_ID(ni, id, file, line) \ ci_ss_assertfl((ni), IS_VALID_PKT_ID((ni), (id)), file, line) #ifdef NDEBUG -# define ASSERT_VALID_PKT(ni, pkt) -# define ASSERT_VALID_PKT_NNL(ni, pkt) -# define ASSERT_VALID_PKT_MAYBE_NNL(ni, pkt, ni_locked) +#define ASSERT_VALID_PKT(ni, pkt) +#define ASSERT_VALID_PKT_NNL(ni, pkt) +#define ASSERT_VALID_PKT_MAYBE_NNL(ni, pkt, ni_locked) #else -# define ASSERT_VALID_PKT(ni, pkt) \ - ci_assert_valid_pkt((ni), (pkt), CI_TRUE, __FILE__, __LINE__) -# define ASSERT_VALID_PKT_NNL(ni, pkt) \ - ci_assert_valid_pkt((ni), (pkt), CI_FALSE, __FILE__, __LINE__) -# define ASSERT_VALID_PKT_MAYBE_NNL(ni, pkt, ni_locked) \ - ci_assert_valid_pkt((ni), (pkt), (ni_locked), __FILE__, __LINE__) +#define ASSERT_VALID_PKT(ni, pkt) \ + ci_assert_valid_pkt((ni), (pkt), CI_TRUE, __FILE__, __LINE__) +#define ASSERT_VALID_PKT_NNL(ni, pkt) \ + ci_assert_valid_pkt((ni), (pkt), CI_FALSE, __FILE__, __LINE__) +#define ASSERT_VALID_PKT_MAYBE_NNL(ni, pkt, ni_locked) \ + ci_assert_valid_pkt((ni), (pkt), (ni_locked), __FILE__, __LINE__) #endif @@ -647,31 +639,31 @@ ci_inline int ci_netif_num_vis(ci_netif* ni) *********************************************************************/ #ifdef __KERNEL__ -# define IS_VALID_SOCK_ID(ni, id) ((unsigned) (id) < (ni)->ep_tbl_n) +#define IS_VALID_SOCK_ID(ni, id) ((unsigned) (id) < (ni)->ep_tbl_n) #else -# define IS_VALID_SOCK_ID(ni, id) ((unsigned) (id) < (ni)->state->n_ep_bufs) +#define IS_VALID_SOCK_ID(ni, id) ((unsigned) (id) < (ni)->state->n_ep_bufs) #endif -#define IS_VALID_SOCK_P(ni, sockp) IS_VALID_SOCK_ID((ni), OO_SP_TO_INT(sockp)) +#define IS_VALID_SOCK_P(ni, sockp) IS_VALID_SOCK_ID((ni), OO_SP_TO_INT(sockp)) -# define PKT_DBG_FMT "[id=%d flgs=%x]" -# define PKT_DBG_ARGS(p) OO_PKT_FMT(p), (p)->flags +#define PKT_DBG_FMT "[id=%d flgs=%x]" +#define PKT_DBG_ARGS(p) OO_PKT_FMT(p), (p)->flags extern int ci_netif_pktset_best(ci_netif* ni) CI_HF; -extern void ci_netif_pkt_free(ci_netif* ni, ci_ip_pkt_fmt* pkt - CI_KERNEL_ARG(int* p_netif_is_locked)) CI_HF; +extern void ci_netif_pkt_free(ci_netif* ni, + ci_ip_pkt_fmt* pkt CI_KERNEL_ARG(int* p_netif_is_locked)) CI_HF; #define CI_PKT_ALLOC_FOR_TCP_TX 1 #define CI_PKT_ALLOC_USE_NONB 2 #define CI_PKT_ALLOC_NO_REAP 4 extern ci_ip_pkt_fmt* ci_netif_pkt_alloc_slow(ci_netif*, int flags) CI_HF; -extern int ci_netif_pkt_try_to_free(ci_netif* ni, int desperation, - int stop_once_freed_n) CI_HF; +extern int ci_netif_pkt_try_to_free( + ci_netif* ni, int desperation, int stop_once_freed_n) CI_HF; extern void ci_netif_try_to_reap(ci_netif* ni, int stop_once_freed_n) CI_HF; -extern void ci_netif_rxq_low_on_recv(ci_netif*, ci_sock_cmn*, - int bytes_freed) CI_HF; +extern void ci_netif_rxq_low_on_recv( + ci_netif*, ci_sock_cmn*, int bytes_freed) CI_HF; /*! Allocate a packet buffer, blocking if necessary. If can_block=FALSE * this function returns 0 or -ENOBUFS. At userlevel this @@ -682,10 +674,8 @@ extern void ci_netif_rxq_low_on_recv(ci_netif*, ci_sock_cmn*, * does *not* "exit" the library, so if any signals occur when called from * userspace, they are deferred. */ -extern int ci_netif_pkt_alloc_block(ci_netif*, ci_sock_cmn*, - int* ni_locked, - int can_block, - ci_ip_pkt_fmt** p_pkt) CI_HF; +extern int ci_netif_pkt_alloc_block(ci_netif*, ci_sock_cmn*, int* ni_locked, + int can_block, ci_ip_pkt_fmt** p_pkt) CI_HF; /*! Sleeps until a packet buffer becomes available, returning 0 on success. * At userlevel this function will never fail. In the kernel it may return @@ -701,10 +691,10 @@ extern int ci_netif_pkt_alloc_block(ci_netif*, ci_sock_cmn*, */ extern int ci_netif_pkt_wait(ci_netif*, ci_sock_cmn* s, int lock_flags) CI_HF; -extern ci_ip_pkt_fmt* __ci_netif_pkt_rx_to_tx(ci_netif*, ci_ip_pkt_fmt*, - const char*) CI_HF; -#define ci_netif_pkt_rx_to_tx(ni, pkt) \ - __ci_netif_pkt_rx_to_tx((ni), (pkt), __FUNCTION__) +extern ci_ip_pkt_fmt* __ci_netif_pkt_rx_to_tx( + ci_netif*, ci_ip_pkt_fmt*, const char*) CI_HF; +#define ci_netif_pkt_rx_to_tx(ni, pkt) \ + __ci_netif_pkt_rx_to_tx((ni), (pkt), __FUNCTION__) extern int ci_netif_pkt_pass_to_kernel(ci_netif* ni, ci_ip_pkt_fmt* pkt); @@ -717,24 +707,24 @@ extern void oo_sock_cplane_init(struct oo_sock_cplane*) CI_HF; extern void ci_sock_cmn_init(ci_netif*, ci_sock_cmn*, int can_poison) CI_HF; extern void ci_sock_cmn_reinit(ci_netif*, ci_sock_cmn*) CI_HF; extern void ci_sock_cmn_dump(ci_netif*, ci_sock_cmn*, const char* pf, - oo_dump_log_fn_t logger, void* log_arg) CI_HF; + oo_dump_log_fn_t logger, void* log_arg) CI_HF; -# define S_SP(ss) ((ss)->s.b.bufid) -# define SC_SP(s) ((s)->b.bufid) -# define W_SP(w) ((w)->bufid) +#define S_SP(ss) ((ss)->s.b.bufid) +#define SC_SP(s) ((s)->b.bufid) +#define W_SP(w) ((w)->bufid) -#define S_ID(ss) OO_SP_TO_INT(S_SP(ss)) -#define SC_ID(s) OO_SP_TO_INT(SC_SP(s)) -#define W_ID(w) OO_SP_TO_INT(W_SP(w)) +#define S_ID(ss) OO_SP_TO_INT(S_SP(ss)) +#define SC_ID(s) OO_SP_TO_INT(SC_SP(s)) +#define W_ID(w) OO_SP_TO_INT(W_SP(w)) -#define S_FMT(ss) OO_SP_FMT(S_SP(ss)) -#define SC_FMT(s) OO_SP_FMT(SC_SP(s)) -#define W_FMT(w) OO_SP_FMT(W_SP(w)) +#define S_FMT(ss) OO_SP_FMT(S_SP(ss)) +#define SC_FMT(s) OO_SP_FMT(SC_SP(s)) +#define W_FMT(w) OO_SP_FMT(W_SP(w)) /* Wrappers to determine whether a socket has been bound, explicitly or * implicitly */ -#define CI_SOCK_NOT_BOUND(s) (!sock_lport_be16((s))) +#define CI_SOCK_NOT_BOUND(s) (! sock_lport_be16((s))) void ci_ipcache_set_saddr(ci_ip_cached_hdrs* ipcache, ci_addr_t addr); void ci_ipcache_set_daddr(ci_ip_cached_hdrs* ipcache, ci_addr_t addr); @@ -758,15 +748,19 @@ ci_addr_t sock_raddr(ci_sock_cmn* s); /* Note: we do not require a lock to set the so_error field */ /* [s] = ci_sock_cmn*, [e] is >= 0 error value */ -#define CI_SET_SO_ERROR(s,e) do { \ - ci_assert_ge(e,0); if((e)) (s)->so_error=(e); } while(0) +#define CI_SET_SO_ERROR(s, e) \ + do { \ + ci_assert_ge(e, 0); \ + if( (e) ) \ + (s)->so_error = (e); \ + } while( 0 ) /* [t] = ci_tcp_state*, [e] = +ve error value */ -#define CI_SET_TCP_SO_ERROR(t,e) CI_SET_SO_ERROR(&(t)->s,(e)) +#define CI_SET_TCP_SO_ERROR(t, e) CI_SET_SO_ERROR(&(t)->s, (e)) /* [u] = ci_udp_state*, [e] = +ve error value */ -#define CI_SET_UDP_SO_ERROR(u,e) CI_SET_SO_ERROR(&(u)->s,(e)) +#define CI_SET_UDP_SO_ERROR(u, e) CI_SET_SO_ERROR(&(u)->s, (e)) -ci_inline ci_int32 ci_get_so_error(ci_sock_cmn *s) +ci_inline ci_int32 ci_get_so_error(ci_sock_cmn* s) { ci_int32 rc; do { @@ -779,22 +773,20 @@ ci_inline ci_int32 ci_get_so_error(ci_sock_cmn *s) ************************ rx_errno & tx_errno ************************* **********************************************************************/ -#define SOCK_TX_ERRNO(s) ((s)->tx_errno) -#define SOCK_RX_ERRNO(s) ((s)->rx_errno & 0x3fff) +#define SOCK_TX_ERRNO(s) ((s)->tx_errno) +#define SOCK_RX_ERRNO(s) ((s)->rx_errno & 0x3fff) /********************************************************************** **************************** ICMP/Errors ***************************** **********************************************************************/ -extern int -ci_icmp_send(ci_netif *ni, ci_ip_pkt_fmt *tx_pkt, - const ci_addr_t saddr, const ci_addr_t daddr, - const ci_mac_addr_t *mac_dest, - ci_uint8 type, ci_uint8 code, ci_uint16 data_len) CI_HF; +extern int ci_icmp_send(ci_netif* ni, ci_ip_pkt_fmt* tx_pkt, + const ci_addr_t saddr, const ci_addr_t daddr, + const ci_mac_addr_t* mac_dest, ci_uint8 type, ci_uint8 code, + ci_uint16 data_len) CI_HF; extern int __ci_icmp_send_error(ci_netif* ni, int af, ci_ipx_hdr_t* ipx, - struct oo_eth_hdr* rx_eth, ci_uint8 type, - ci_uint8 code) CI_HF; + struct oo_eth_hdr* rx_eth, ci_uint8 type, ci_uint8 code) CI_HF; /********************************************************************** ********************************* UDP ********************************* @@ -809,48 +801,47 @@ extern int __ci_icmp_send_error(ci_netif* ni, int af, ci_ipx_hdr_t* ipx, #define CI_UDP_MAX_PAYLOAD_BYTES(af) \ (0xffff - sizeof(ci_udp_hdr) - (IS_AF_INET6(af) ? 0 : sizeof(ci_ip4_hdr))) -#define UDP_FLAGS(us) ((us)->udpflags) +#define UDP_FLAGS(us) ((us)->udpflags) -#define UDP_SET_FLAG(us,f) ((us)->udpflags|=(f)) -#define UDP_CLR_FLAG(us,f) ((us)->udpflags&=~(f)) -#define UDP_GET_FLAG(us,f) ((us)->udpflags&(f)) +#define UDP_SET_FLAG(us, f) ((us)->udpflags |= (f)) +#define UDP_CLR_FLAG(us, f) ((us)->udpflags &= ~(f)) +#define UDP_GET_FLAG(us, f) ((us)->udpflags & (f)) -#define UDP_IP_HDR(us) (&(us)->s.pkt.ipx.ip4) +#define UDP_IP_HDR(us) (&(us)->s.pkt.ipx.ip4) -#define udp_lport_be16(us) (sock_lport_be16(&us->s)) -#define udp_laddr_be32(us) (sock_laddr_be32(&us->s)) -#define udp_frag_off_be16(us) (UDP_IP_HDR((us))->ip_frag_off_be16) -#define udp_rport_be16(us) (sock_rport_be16(&us->s)) -#define udp_raddr_be32(us) (sock_raddr_be32(&us->s)) +#define udp_lport_be16(us) (sock_lport_be16(&us->s)) +#define udp_laddr_be32(us) (sock_laddr_be32(&us->s)) +#define udp_frag_off_be16(us) (UDP_IP_HDR((us))->ip_frag_off_be16) +#define udp_rport_be16(us) (sock_rport_be16(&us->s)) +#define udp_raddr_be32(us) (sock_raddr_be32(&us->s)) #if CI_CFG_IPV6 -#define udp_ip6_laddr(us) (sock_ip6_laddr(&us->s)) -#define udp_ip6_raddr(us) (sock_ip6_raddr(&us->s)) +#define udp_ip6_laddr(us) (sock_ip6_laddr(&us->s)) +#define udp_ip6_raddr(us) (sock_ip6_raddr(&us->s)) #endif -#define sock_ipx_laddr(s) ((s)->laddr) -#define sock_ipx_raddr(s) ipcache_raddr(&(s)->pkt) +#define sock_ipx_laddr(s) ((s)->laddr) +#define sock_ipx_raddr(s) ipcache_raddr(&(s)->pkt) -#define udp_ipx_laddr(us) sock_ipx_laddr(&(us)->s) -#define udp_ipx_raddr(us) sock_ipx_raddr(&(us)->s) +#define udp_ipx_laddr(us) sock_ipx_laddr(&(us)->s) +#define udp_ipx_raddr(us) sock_ipx_raddr(&(us)->s) -#define UDP_TX_ERRNO(us) (SOCK_TX_ERRNO(&(us)->s)) -#define UDP_RX_ERRNO(us) (SOCK_RX_ERRNO(&(us)->s)) -#define UDP_IS_SHUT_RD(us) ((us)->s.rx_errno & CI_SHUT_RD) -#define UDP_IS_SHUT_WR(us) ((us)->s.rx_errno & CI_SHUT_WR) -#define UDP_IS_SHUT_RDWR(us) \ - (((us)->s.rx_errno & (CI_SHUT_RD | CI_SHUT_WR)) == \ - (CI_SHUT_RD | CI_SHUT_WR)) +#define UDP_TX_ERRNO(us) (SOCK_TX_ERRNO(&(us)->s)) +#define UDP_RX_ERRNO(us) (SOCK_RX_ERRNO(&(us)->s)) +#define UDP_IS_SHUT_RD(us) ((us)->s.rx_errno & CI_SHUT_RD) +#define UDP_IS_SHUT_WR(us) ((us)->s.rx_errno & CI_SHUT_WR) +#define UDP_IS_SHUT_RDWR(us) \ + (((us)->s.rx_errno & (CI_SHUT_RD | CI_SHUT_WR)) == (CI_SHUT_RD | CI_SHUT_WR)) /*** udp.c ***/ extern void ci_udp_state_dump(ci_netif*, ci_udp_state*, const char* pf, - oo_dump_log_fn_t logger, void* log_arg) CI_HF; + oo_dump_log_fn_t logger, void* log_arg) CI_HF; /* Set the source IP address & port */ -ci_inline void -ci_sock_cmn_set_laddr(ci_sock_cmn* s, ci_addr_t addr, int lport_be16) +ci_inline void ci_sock_cmn_set_laddr( + ci_sock_cmn* s, ci_addr_t addr, int lport_be16) { ci_sock_set_laddr_port(s, addr, lport_be16); s->cp.lport_be16 = lport_be16; @@ -859,46 +850,42 @@ ci_sock_cmn_set_laddr(ci_sock_cmn* s, ci_addr_t addr, int lport_be16) s->cp.laddr = ip4_addr_any; else s->cp.laddr = addr; - } -extern void ci_udp_state_assert_valid(ci_netif*, ci_udp_state* ts, - const char* file, int line) CI_HF; +extern void ci_udp_state_assert_valid( + ci_netif*, ci_udp_state* ts, const char* file, int line) CI_HF; -extern void ci_udp_ep_assert_valid(citp_socket* ep, - const char* file, int line) CI_HF; +extern void ci_udp_ep_assert_valid( + citp_socket* ep, const char* file, int line) CI_HF; /*** udp_rx.c ***/ -extern void ci_udp_handle_rx(ci_netif*, ci_ip_pkt_fmt* pkt, ci_udp_hdr*, - int ip_paylen) CI_HF; +extern void ci_udp_handle_rx( + ci_netif*, ci_ip_pkt_fmt* pkt, ci_udp_hdr*, int ip_paylen) CI_HF; -ci_inline -void ci_pkt_init_from_ipcache_len(ci_ip_pkt_fmt *pkt, - const ci_ip_cached_hdrs *ipcache, - size_t header_len) +ci_inline void ci_pkt_init_from_ipcache_len( + ci_ip_pkt_fmt* pkt, const ci_ip_cached_hdrs* ipcache, size_t header_len) { - if( !ipcache_is_ipv6(ipcache) ) { + if( ! ipcache_is_ipv6(ipcache) ) { ci_assert_equal(CI_IP4_IHL(&ipcache->ipx.ip4), sizeof(ci_ip4_hdr)); ci_assert_equal(ipcache->ether_type, CI_ETHERTYPE_IP); } oo_tx_pkt_layout_update(pkt, ipcache->ether_offset); memcpy(oo_tx_ether_hdr(pkt), ci_ip_cache_ether_hdr(ipcache), - header_len + oo_tx_ether_hdr_size(pkt)); - if( !ipcache_is_ipv6(ipcache) ) { + header_len + oo_tx_ether_hdr_size(pkt)); + if( ! ipcache_is_ipv6(ipcache) ) { ci_assert_equal(CI_IP4_IHL(oo_tx_ip_hdr(pkt)), sizeof(ci_ip4_hdr)); ci_assert_equal(oo_tx_ether_type_get(pkt), CI_ETHERTYPE_IP); } } -ci_inline -void ci_pkt_init_from_ipcache(ci_ip_pkt_fmt *pkt, - const ci_ip_cached_hdrs *ipcache) +ci_inline void ci_pkt_init_from_ipcache( + ci_ip_pkt_fmt* pkt, const ci_ip_cached_hdrs* ipcache) { - ci_pkt_init_from_ipcache_len(pkt, ipcache, - CI_IPX_HDR_SIZE(ipcache_af(ipcache)) + sizeof(ci_tcp_hdr)); + ci_pkt_init_from_ipcache_len( + pkt, ipcache, CI_IPX_HDR_SIZE(ipcache_af(ipcache)) + sizeof(ci_tcp_hdr)); } @@ -919,51 +906,47 @@ struct cmsg_state { int* p_msg_flags; }; -extern void ci_put_cmsg(struct cmsg_state *cmsg_state, int level, int type, - socklen_t len, const void *data) CI_HF; +extern void ci_put_cmsg(struct cmsg_state* cmsg_state, int level, int type, + socklen_t len, const void* data) CI_HF; /* info_out contains a pointer to struct in_pktinfo or struct in6_pktinfo */ extern int ci_ip_cmsg_send(const struct msghdr*, void** info_out) CI_HF; extern void ci_ip_cmsg_finish(struct cmsg_state* cmsg_state) CI_HF; #ifndef __KERNEL__ -extern ci_fd_t ci_udp_ep_ctor(citp_socket* ep, ci_netif* sh, - int domain, int type) CI_HF; +extern ci_fd_t ci_udp_ep_ctor( + citp_socket* ep, ci_netif* sh, int domain, int type) CI_HF; extern int ci_udp_bind_start(citp_socket* ep, ci_fd_t fd, - const struct sockaddr* addr, socklen_t addrlen, - ci_uint16* lport) CI_HF; + const struct sockaddr* addr, socklen_t addrlen, ci_uint16* lport) CI_HF; extern int ci_udp_bind_conclude(citp_socket* ep, const struct sockaddr* addr, - socklen_t addrlen, ci_uint16 lport); + socklen_t addrlen, ci_uint16 lport); #if CI_CFG_ENDPOINT_MOVE extern void ci_udp_handle_force_reuseport(ci_fd_t fd, citp_socket* ep, - const struct sockaddr* sa, - socklen_t sa_len) CI_HF; + const struct sockaddr* sa, socklen_t sa_len) CI_HF; extern int ci_udp_reuseport_bind(citp_socket* ep, ci_fd_t fd, - const struct sockaddr* sa, - socklen_t sa_len, ci_uint16 lport) CI_HF; + const struct sockaddr* sa, socklen_t sa_len, ci_uint16 lport) CI_HF; #endif -extern int ci_udp_connect(citp_socket*, ci_fd_t fd, - const struct sockaddr*, socklen_t addrlen) CI_HF; +extern int ci_udp_connect( + citp_socket*, ci_fd_t fd, const struct sockaddr*, socklen_t addrlen) CI_HF; extern int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, - const struct sockaddr* serv_addr, - socklen_t addrlen, ci_fd_t os_sock) CI_HF; + const struct sockaddr* serv_addr, socklen_t addrlen, + ci_fd_t os_sock) CI_HF; extern int ci_udp_shutdown(citp_socket*, ci_fd_t fd, int how) CI_HF; extern int __ci_udp_shutdown(ci_netif* netif, ci_udp_state* us, int how) CI_HF; -extern int ci_udp_getpeername(citp_socket*, struct sockaddr*,socklen_t*) CI_HF; +extern int ci_udp_getpeername( + citp_socket*, struct sockaddr*, socklen_t*) CI_HF; extern int ci_udp_getsockopt(citp_socket* ep, ci_fd_t fd, int level, - int optname, void *optval, socklen_t *optlen ) CI_HF; + int optname, void* optval, socklen_t* optlen) CI_HF; extern int ci_udp_setsockopt(citp_socket* ep, ci_fd_t fd, int level, - int optname, const void*optval, socklen_t optlen) CI_HF; + int optname, const void* optval, socklen_t optlen) CI_HF; extern int ci_udp_ioctl(citp_socket*, ci_fd_t, int request, void* arg) CI_HF; #endif /* Send/recv called from within kernel & user-library, so outside above #if */ -extern int ci_udp_sendmsg(ci_udp_iomsg_args *a, - const ci_msghdr*, int) CI_HF; -extern int ci_udp_recvmsg(ci_udp_iomsg_args *a, ci_msghdr*, - int flags) CI_HF; +extern int ci_udp_sendmsg(ci_udp_iomsg_args* a, const ci_msghdr*, int) CI_HF; +extern int ci_udp_recvmsg(ci_udp_iomsg_args* a, ci_msghdr*, int flags) CI_HF; extern void ci_udp_set_no_unicast(citp_socket* ep) CI_HF; @@ -987,39 +970,35 @@ typedef enum { #ifndef __KERNEL__ struct mmsghdr; -extern int ci_udp_recvmmsg(ci_udp_iomsg_args *a, struct mmsghdr* mmsg, - unsigned int vlen, int flags, - const struct timespec* timeout - CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) CI_HF; +extern int ci_udp_recvmmsg(ci_udp_iomsg_args* a, struct mmsghdr* mmsg, + unsigned int vlen, int flags, + const struct timespec* timeout CI_KERNEL_ARG( + ci_addr_spc_t addr_spc)) CI_HF; struct onload_zc_mmsg; -extern int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, - struct onload_zc_mmsg* msgs, int flags); +extern int ci_tcp_zc_send( + ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msgs, int flags); struct onload_zc_recv_args; int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args); /* A special version of recvmsg to grab data from kernel stack when - * doing zero-copy + * doing zero-copy */ -extern int ci_udp_recvmsg_kernel(int fd, ci_netif* ni, ci_udp_state* us, - struct msghdr* msg, int flags); +extern int ci_udp_recvmsg_kernel( + int fd, ci_netif* ni, ci_udp_state* us, struct msghdr* msg, int flags); -extern enum onload_delegated_send_rc -ci_tcp_ds_fill_headers(ci_netif* ni, ci_tcp_state* ts, unsigned flags, - void* headers, int* headers_len_inout, - int* ip_tcp_hdr_len_out, - int* tcp_seq_offset_out, int* ip_len_offset_out); -extern int ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, - const ci_iovec *iov, int iovlen, int flags); +extern enum onload_delegated_send_rc ci_tcp_ds_fill_headers(ci_netif* ni, + ci_tcp_state* ts, unsigned flags, void* headers, int* headers_len_inout, + int* ip_tcp_hdr_len_out, int* tcp_seq_offset_out, int* ip_len_offset_out); +extern int ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, const ci_iovec* iov, + int iovlen, int flags); -extern int -ci_netif_raw_send(ci_netif* ni, int intf_i, - const ci_iovec *iov, int iovlen); +extern int ci_netif_raw_send( + ci_netif* ni, int intf_i, const ci_iovec* iov, int iovlen); #endif extern void ci_ip_cmsg_recv(ci_netif*, ci_udp_state*, const ci_ip_pkt_fmt*, - struct msghdr*, int netif_locked, - int *p_msg_flags) CI_HF; + struct msghdr*, int netif_locked, int* p_msg_flags) CI_HF; #if OO_DO_STACK_POLL extern void ci_udp_all_fds_gone(ci_netif* netif, oo_sp, int do_free); #endif @@ -1028,11 +1007,12 @@ extern void ci_udp_state_try_free(ci_netif*, ci_udp_state*) CI_HF; extern int ci_udp_csum_correct(ci_ip_pkt_fmt* pkt, ci_udp_hdr* udp) CI_HF; extern void ci_udp_sendmsg_send_async_q(ci_netif*, ci_udp_state*) CI_HF; -extern void ci_udp_perform_deferred_socket_work(ci_netif*, ci_udp_state*)CI_HF; -extern int ci_udp_try_to_free_pkts(ci_netif*, ci_udp_state*, - int desperation) CI_HF; +extern void ci_udp_perform_deferred_socket_work( + ci_netif*, ci_udp_state*) CI_HF; +extern int ci_udp_try_to_free_pkts( + ci_netif*, ci_udp_state*, int desperation) CI_HF; -#define CI_PIPE_ZC_WRITE_FLAG_FORCE (1<<16) +#define CI_PIPE_ZC_WRITE_FLAG_FORCE (1 << 16) struct ci_pipe_pkt_list { ci_ip_pkt_fmt* head; @@ -1040,284 +1020,260 @@ struct ci_pipe_pkt_list { ci_uint32 count; }; -typedef int (*ci_pipe_zc_read_cb)(void* context, struct iovec* iovec, - int iov_num, int flags); - -extern int ci_pipe_read(ci_netif*, struct oo_pipe*, const struct iovec*, - size_t iovlen) CI_HF; -extern int oo_pipe_write_block(ci_netif* ni, struct oo_pipe* p, int flags) CI_HF; -extern int ci_pipe_write(ci_netif*, struct oo_pipe*, const struct iovec*, - size_t iovlen) CI_HF; -extern int ci_pipe_zc_read(ci_netif* ni, struct oo_pipe* p, int len, - int flags, ci_pipe_zc_read_cb cb, void* ctx) CI_HF; +typedef int (*ci_pipe_zc_read_cb)( + void* context, struct iovec* iovec, int iov_num, int flags); + +extern int ci_pipe_read( + ci_netif*, struct oo_pipe*, const struct iovec*, size_t iovlen) CI_HF; +extern int oo_pipe_write_block( + ci_netif* ni, struct oo_pipe* p, int flags) CI_HF; +extern int ci_pipe_write( + ci_netif*, struct oo_pipe*, const struct iovec*, size_t iovlen) CI_HF; +extern int ci_pipe_zc_read(ci_netif* ni, struct oo_pipe* p, int len, int flags, + ci_pipe_zc_read_cb cb, void* ctx) CI_HF; extern int ci_pipe_zc_move(ci_netif* ni, struct oo_pipe* pipe_src, - struct oo_pipe* pipe_dest, int len, int flags) CI_HF; + struct oo_pipe* pipe_dest, int len, int flags) CI_HF; extern int ci_pipe_zc_write(ci_netif* ni, struct oo_pipe* p, - struct ci_pipe_pkt_list* pkts, - int len, int flags) CI_HF; -extern int ci_pipe_zc_alloc_buffers(ci_netif* ni, - struct oo_pipe* p, - int flags, - int count, - struct ci_pipe_pkt_list* pkts_out) CI_HF; -extern int ci_pipe_zc_release_buffers(ci_netif* ni, - struct oo_pipe* p, - struct ci_pipe_pkt_list* pkts) CI_HF; -extern int ci_pipe_set_size(ci_netif* ni, struct oo_pipe* p, - size_t size) CI_HF; + struct ci_pipe_pkt_list* pkts, int len, int flags) CI_HF; +extern int ci_pipe_zc_alloc_buffers(ci_netif* ni, struct oo_pipe* p, int flags, + int count, struct ci_pipe_pkt_list* pkts_out) CI_HF; +extern int ci_pipe_zc_release_buffers( + ci_netif* ni, struct oo_pipe* p, struct ci_pipe_pkt_list* pkts) CI_HF; +extern int ci_pipe_set_size( + ci_netif* ni, struct oo_pipe* p, size_t size) CI_HF; extern void oo_pipe_dump(ci_netif*, struct oo_pipe*, const char* pf, - oo_dump_log_fn_t logger, void* log_arg) CI_HF; + oo_dump_log_fn_t logger, void* log_arg) CI_HF; extern int ci_pipe_list_to_iovec(ci_netif* ni, struct oo_pipe* p, - struct iovec* iov, - int* iov_num, - struct ci_pipe_pkt_list* pkts, - int len); + struct iovec* iov, int* iov_num, struct ci_pipe_pkt_list* pkts, int len); /********************************************************************** ********************************* TCP ******************************** **********************************************************************/ -#define SEQ_EQ(s1, s2) ((ci_uint32)((s1) - (s2)) == 0u) -#define SEQ_LT(s1, s2) ((ci_int32)((s1) - (s2)) < 0) -#define SEQ_LE(s1, s2) ((ci_int32)((s1) - (s2)) <= 0) -#define SEQ_GT(s1, s2) ((ci_int32)((s1) - (s2)) > 0) -#define SEQ_GE(s1, s2) ((ci_int32)((s1) - (s2)) >= 0) -#define SEQ_SUB(s1, s2) ((ci_int32)((s1) - (s2))) -#define SEQ(s) ((unsigned) (s)) +#define SEQ_EQ(s1, s2) ((ci_uint32) ((s1) - (s2)) == 0u) +#define SEQ_LT(s1, s2) ((ci_int32) ((s1) - (s2)) < 0) +#define SEQ_LE(s1, s2) ((ci_int32) ((s1) - (s2)) <= 0) +#define SEQ_GT(s1, s2) ((ci_int32) ((s1) - (s2)) > 0) +#define SEQ_GE(s1, s2) ((ci_int32) ((s1) - (s2)) >= 0) +#define SEQ_SUB(s1, s2) ((ci_int32) ((s1) - (s2))) +#define SEQ(s) ((unsigned) (s)) /* Is [s] between [sl] and [sh] (inclusive) */ -#define SEQ_BTW(s, sl, sh) ((sh) - (sl) >= (s) - (sl)) +#define SEQ_BTW(s, sl, sh) ((sh) - (sl) >= (s) - (sl)) -#define SEQ_MIN(x, y) (SEQ_LE(x, y) ? (x) : (y)) -#define SEQ_MAX(x, y) (SEQ_LE(x, y) ? (y) : (x)) +#define SEQ_MIN(x, y) (SEQ_LE(x, y) ? (x) : (y)) +#define SEQ_MAX(x, y) (SEQ_LE(x, y) ? (y) : (x)) /* Flags for connection states. These are used to determine whether ** certain things can/should be done in the current state. */ -#define CI_TCP_STATE_SYNCHRONISED 0x001 -#define CI_TCP_STATE_SLOW_PATH 0x002 -#define CI_TCP_STATE_NOT_CONNECTED 0x004 -#define CI_TCP_STATE_RECVD_FIN 0x008 -#define CI_TCP_STATE_ACCEPT_DATA 0x010 -#define CI_TCP_STATE_TXQ_ACTIVE 0x020 -#define CI_TCP_STATE_NO_TIMERS 0x040 -#define CI_TCP_STATE_TIMEOUT_ORPHAN 0x080 -#define CI_TCP_STATE_TCP_CONN 0x100 -#define CI_TCP_STATE_TCP 0x200 -#define CI_TCP_STATE_INVALID 0x400 +#define CI_TCP_STATE_SYNCHRONISED 0x001 +#define CI_TCP_STATE_SLOW_PATH 0x002 +#define CI_TCP_STATE_NOT_CONNECTED 0x004 +#define CI_TCP_STATE_RECVD_FIN 0x008 +#define CI_TCP_STATE_ACCEPT_DATA 0x010 +#define CI_TCP_STATE_TXQ_ACTIVE 0x020 +#define CI_TCP_STATE_NO_TIMERS 0x040 +#define CI_TCP_STATE_TIMEOUT_ORPHAN 0x080 +#define CI_TCP_STATE_TCP_CONN 0x100 +#define CI_TCP_STATE_TCP 0x200 +#define CI_TCP_STATE_INVALID 0x400 /* 0x800 is unused */ /* Connection states. See also [tcp_misc.c] if you change these. */ -#define CI_TCP_CLOSED (0x0000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_SLOW_PATH \ - | CI_TCP_STATE_NOT_CONNECTED \ - | CI_TCP_STATE_NO_TIMERS) -#define CI_TCP_LISTEN (0x1000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_SLOW_PATH \ - | CI_TCP_STATE_NOT_CONNECTED \ - | CI_TCP_STATE_NO_TIMERS) -#define CI_TCP_INVALID (CI_TCP_LISTEN | CI_TCP_STATE_INVALID) -#define CI_TCP_SYN_SENT (0x2000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_TCP_CONN \ - | CI_TCP_STATE_SLOW_PATH \ - | CI_TCP_STATE_NOT_CONNECTED \ - | CI_TCP_STATE_TXQ_ACTIVE) -#define CI_TCP_ESTABLISHED (0x3000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_TCP_CONN \ - | CI_TCP_STATE_SYNCHRONISED \ - | CI_TCP_STATE_ACCEPT_DATA \ - | CI_TCP_STATE_TXQ_ACTIVE ) -#define CI_TCP_CLOSE_WAIT (0x4000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_TCP_CONN \ - | CI_TCP_STATE_SYNCHRONISED \ - | CI_TCP_STATE_SLOW_PATH \ - | CI_TCP_STATE_RECVD_FIN \ - | CI_TCP_STATE_TXQ_ACTIVE ) -#define CI_TCP_LAST_ACK (0x5000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_TCP_CONN \ - | CI_TCP_STATE_SYNCHRONISED \ - | CI_TCP_STATE_SLOW_PATH \ - | CI_TCP_STATE_NOT_CONNECTED \ - | CI_TCP_STATE_RECVD_FIN \ - | CI_TCP_STATE_TXQ_ACTIVE \ - | CI_TCP_STATE_TIMEOUT_ORPHAN ) -#define CI_TCP_FIN_WAIT1 (0x6000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_TCP_CONN \ - | CI_TCP_STATE_SLOW_PATH \ - | CI_TCP_STATE_SYNCHRONISED \ - | CI_TCP_STATE_ACCEPT_DATA \ - | CI_TCP_STATE_TXQ_ACTIVE \ - | CI_TCP_STATE_TIMEOUT_ORPHAN ) -#define CI_TCP_FIN_WAIT2 (0x7000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_TCP_CONN \ - | CI_TCP_STATE_SLOW_PATH \ - | CI_TCP_STATE_SYNCHRONISED \ - | CI_TCP_STATE_ACCEPT_DATA \ - | CI_TCP_STATE_TIMEOUT_ORPHAN ) -#define CI_TCP_CLOSING (0x8000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_TCP_CONN \ - | CI_TCP_STATE_SYNCHRONISED \ - | CI_TCP_STATE_SLOW_PATH \ - | CI_TCP_STATE_NOT_CONNECTED \ - | CI_TCP_STATE_RECVD_FIN \ - | CI_TCP_STATE_TXQ_ACTIVE \ - | CI_TCP_STATE_TIMEOUT_ORPHAN ) -#define CI_TCP_TIME_WAIT (0x9000 | CI_TCP_STATE_TCP \ - | CI_TCP_STATE_TCP_CONN \ - | CI_TCP_STATE_SYNCHRONISED \ - | CI_TCP_STATE_SLOW_PATH \ - | CI_TCP_STATE_NOT_CONNECTED \ - | CI_TCP_STATE_RECVD_FIN \ - /* 2MSL timer doesn't count */ \ - | CI_TCP_STATE_NO_TIMERS) +#define CI_TCP_CLOSED \ + (0x0000 | CI_TCP_STATE_TCP | CI_TCP_STATE_SLOW_PATH | \ + CI_TCP_STATE_NOT_CONNECTED | CI_TCP_STATE_NO_TIMERS) +#define CI_TCP_LISTEN \ + (0x1000 | CI_TCP_STATE_TCP | CI_TCP_STATE_SLOW_PATH | \ + CI_TCP_STATE_NOT_CONNECTED | CI_TCP_STATE_NO_TIMERS) +#define CI_TCP_INVALID (CI_TCP_LISTEN | CI_TCP_STATE_INVALID) +#define CI_TCP_SYN_SENT \ + (0x2000 | CI_TCP_STATE_TCP | CI_TCP_STATE_TCP_CONN | \ + CI_TCP_STATE_SLOW_PATH | CI_TCP_STATE_NOT_CONNECTED | \ + CI_TCP_STATE_TXQ_ACTIVE) +#define CI_TCP_ESTABLISHED \ + (0x3000 | CI_TCP_STATE_TCP | CI_TCP_STATE_TCP_CONN | \ + CI_TCP_STATE_SYNCHRONISED | CI_TCP_STATE_ACCEPT_DATA | \ + CI_TCP_STATE_TXQ_ACTIVE) +#define CI_TCP_CLOSE_WAIT \ + (0x4000 | CI_TCP_STATE_TCP | CI_TCP_STATE_TCP_CONN | \ + CI_TCP_STATE_SYNCHRONISED | CI_TCP_STATE_SLOW_PATH | \ + CI_TCP_STATE_RECVD_FIN | CI_TCP_STATE_TXQ_ACTIVE) +#define CI_TCP_LAST_ACK \ + (0x5000 | CI_TCP_STATE_TCP | CI_TCP_STATE_TCP_CONN | \ + CI_TCP_STATE_SYNCHRONISED | CI_TCP_STATE_SLOW_PATH | \ + CI_TCP_STATE_NOT_CONNECTED | CI_TCP_STATE_RECVD_FIN | \ + CI_TCP_STATE_TXQ_ACTIVE | CI_TCP_STATE_TIMEOUT_ORPHAN) +#define CI_TCP_FIN_WAIT1 \ + (0x6000 | CI_TCP_STATE_TCP | CI_TCP_STATE_TCP_CONN | \ + CI_TCP_STATE_SLOW_PATH | CI_TCP_STATE_SYNCHRONISED | \ + CI_TCP_STATE_ACCEPT_DATA | CI_TCP_STATE_TXQ_ACTIVE | \ + CI_TCP_STATE_TIMEOUT_ORPHAN) +#define CI_TCP_FIN_WAIT2 \ + (0x7000 | CI_TCP_STATE_TCP | CI_TCP_STATE_TCP_CONN | \ + CI_TCP_STATE_SLOW_PATH | CI_TCP_STATE_SYNCHRONISED | \ + CI_TCP_STATE_ACCEPT_DATA | CI_TCP_STATE_TIMEOUT_ORPHAN) +#define CI_TCP_CLOSING \ + (0x8000 | CI_TCP_STATE_TCP | CI_TCP_STATE_TCP_CONN | \ + CI_TCP_STATE_SYNCHRONISED | CI_TCP_STATE_SLOW_PATH | \ + CI_TCP_STATE_NOT_CONNECTED | CI_TCP_STATE_RECVD_FIN | \ + CI_TCP_STATE_TXQ_ACTIVE | CI_TCP_STATE_TIMEOUT_ORPHAN) +#define CI_TCP_TIME_WAIT \ + (0x9000 | CI_TCP_STATE_TCP | CI_TCP_STATE_TCP_CONN | \ + CI_TCP_STATE_SYNCHRONISED | CI_TCP_STATE_SLOW_PATH | \ + CI_TCP_STATE_NOT_CONNECTED | \ + CI_TCP_STATE_RECVD_FIN /* 2MSL timer doesn't count */ \ + | CI_TCP_STATE_NO_TIMERS) /* Set in a socket that is freed. */ -#define CI_TCP_STATE_FREE (0xa000 | CI_TCP_STATE_NO_TIMERS) +#define CI_TCP_STATE_FREE (0xa000 | CI_TCP_STATE_NO_TIMERS) /* Set in a socket that is UDP */ -#define CI_TCP_STATE_UDP (0xb000) +#define CI_TCP_STATE_UDP (0xb000) /* Set in a waitable which is in fact a pipe meta (not a pipe end) */ -#define CI_TCP_STATE_PIPE (0xc000) +#define CI_TCP_STATE_PIPE (0xc000) /* This endpoint is used to store aux buffers (synrecv states & some * others) */ -#define CI_TCP_STATE_AUXBUF (0xd000) +#define CI_TCP_STATE_AUXBUF (0xd000) /* Set in a socket that is used as the owner for an active wild filter */ #define CI_TCP_STATE_ACTIVE_WILD (0xe000) /* Convert state to number in range 0->0xe */ -#define CI_TCP_STATE_NUM(s) (((s) & 0xf000) >> 12u) +#define CI_TCP_STATE_NUM(s) (((s) &0xf000) >> 12u) /* Flags we don't expect to see in normal data packets. */ -#define CI_TCP_OOB_FLAGS (CI_TCP_FLAG_SYN|CI_TCP_FLAG_FIN| \ - CI_TCP_FLAG_RST|CI_TCP_FLAG_ECE| \ - CI_TCP_FLAG_CWR) +#define CI_TCP_OOB_FLAGS \ + (CI_TCP_FLAG_SYN | CI_TCP_FLAG_FIN | CI_TCP_FLAG_RST | CI_TCP_FLAG_ECE | \ + CI_TCP_FLAG_CWR) /* Flags to check for a socket */ -#define CI_TCP_STATE_IS_SOCKET(s) ((s) == CI_TCP_STATE_UDP || \ - (s) & CI_TCP_STATE_TCP) +#define CI_TCP_STATE_IS_SOCKET(s) \ + ((s) == CI_TCP_STATE_UDP || (s) &CI_TCP_STATE_TCP) /* For the fast path check we inspect header length and all flags other ** than PSH. */ -#define CI_TCP_FAST_PATH_MASK CI_BSWAPC_BE32(0xf0f70000) -#define CI_TCP_FAST_PATH_WORD(hdr) (((ci_uint32*)(hdr))[3]) +#define CI_TCP_FAST_PATH_MASK CI_BSWAPC_BE32(0xf0f70000) +#define CI_TCP_FAST_PATH_WORD(hdr) (((ci_uint32*) (hdr))[3]) -#ifndef MSG_NOSIGNAL /* Introduced in glibc3. */ -# define MSG_NOSIGNAL 0 +#ifndef MSG_NOSIGNAL /* Introduced in glibc3. */ +#define MSG_NOSIGNAL 0 #endif -#define tcp_outgoing_opts_len(ts) \ +#define tcp_outgoing_opts_len(ts) \ ((ts)->outgoing_hdrs_len - sizeof(ci_ip4_hdr) - sizeof(ci_tcp_hdr)) #define tcp_ipx_outgoing_opts_len(af, ts) \ ((ts)->outgoing_hdrs_len - CI_IPX_HDR_SIZE(af) - sizeof(ci_tcp_hdr)) /* These names match the terminology used in the RFCs etc. */ -#define tcp_snd_una(ts) ((ts)->snd_una) -#define tcp_snd_nxt(ts) ((ts)->snd_nxt) -#define tcp_snd_wnd(ts) SEQ_SUB((ts)->snd_max, (ts)->snd_una) -#define tcp_snd_up(ts) ((ts)->snd_up) - -#define tcp_rcv_nxt(ts) (TS_IPX_TCP(ts)->tcp_ack_be32) -#define tcp_rcv_usr(ts) ((ts)->rcv_added - (ts)->rcv_delivered) -#define tcp_rcv_up(ts) ((ts)->rcv_up) -#define tcp_rcv_wnd_advertised(ts) ((ts)->rcv_wnd_advertised) -#define tcp_rcv_wnd_right_edge_sent(ts) ((ts)->rcv_wnd_right_edge_sent) +#define tcp_snd_una(ts) ((ts)->snd_una) +#define tcp_snd_nxt(ts) ((ts)->snd_nxt) +#define tcp_snd_wnd(ts) SEQ_SUB((ts)->snd_max, (ts)->snd_una) +#define tcp_snd_up(ts) ((ts)->snd_up) + +#define tcp_rcv_nxt(ts) (TS_IPX_TCP(ts)->tcp_ack_be32) +#define tcp_rcv_usr(ts) ((ts)->rcv_added - (ts)->rcv_delivered) +#define tcp_rcv_up(ts) ((ts)->rcv_up) +#define tcp_rcv_wnd_advertised(ts) ((ts)->rcv_wnd_advertised) +#define tcp_rcv_wnd_right_edge_sent(ts) ((ts)->rcv_wnd_right_edge_sent) #define tcp_rcv_wnd_current(ts) \ - CI_MIN((ts)->rcv_window_max, (ts)->s.so.rcvbuf - tcp_rcv_usr(ts)) + CI_MIN((ts)->rcv_window_max, (ts)->s.so.rcvbuf - tcp_rcv_usr(ts)) /* TCP packet urgent offset - named urgent offset to differantiate it from snd_up of the tcp state */ -#define tcp_snd_urg_off(ts,tcp) \ - ( (ci_uint16) (tcp_snd_up(ts) - CI_BSWAP_BE32((tcp)->tcp_seq_be32)) ) +#define tcp_snd_urg_off(ts, tcp) \ + ((ci_uint16) (tcp_snd_up(ts) - CI_BSWAP_BE32((tcp)->tcp_seq_be32))) /* Sequence number of next data to be inserted into TX queue. */ #define tcp_enq_nxt(ts) (TS_IPX_TCP(ts)->tcp_seq_be32) /* TCP urgent data definitions */ #define tcp_urg_data(ts) ((ts)->urg_data) -#define tcp_urg_data_invalidate(ts) ((ts)->urg_data &=~ \ - (CI_TCP_URG_IS_HERE|CI_TCP_URG_PTR_VALID|CI_TCP_URG_DATA_MASK)); +#define tcp_urg_data_invalidate(ts) \ + ((ts)->urg_data &= \ + ~(CI_TCP_URG_IS_HERE | CI_TCP_URG_PTR_VALID | CI_TCP_URG_DATA_MASK)); /*! Returns true if we are not expecting an urgent byte. */ #define tcp_rx_urg_fast_path(ts) (~tcp_urg_data(ts) & CI_TCP_URG_COMING) /* conversion from scaled sa and sv to real srtt and rttvar */ -#define tcp_srtt(ts) ((ts)->sa >> 3u) -#define tcp_rttvar(ts) ((ts)->sv >> 2u) +#define tcp_srtt(ts) ((ts)->sa >> 3u) +#define tcp_rttvar(ts) ((ts)->sv >> 2u) -#define CI_SHUT_RD 0x8000 -#define CI_SHUT_WR 0x4000 -#define TCP_RX_DONE(ts) ((ts)->s.rx_errno) +#define CI_SHUT_RD 0x8000 +#define CI_SHUT_WR 0x4000 +#define TCP_RX_DONE(ts) ((ts)->s.rx_errno) -#define TCP_RX_ERRNO(ts) (SOCK_RX_ERRNO(&(ts)->s)) -#define TCP_TX_ERRNO(ts) (SOCK_TX_ERRNO(&(ts)->s)) +#define TCP_RX_ERRNO(ts) (SOCK_RX_ERRNO(&(ts)->s)) +#define TCP_TX_ERRNO(ts) (SOCK_TX_ERRNO(&(ts)->s)) /* We never transmit IP options (at the moment). */ -#define S_TCP_HDR(s) ((ci_tcp_hdr*) (&(s)->pkt.ipx.ip4 + 1)) -#define TS_TCP(ts) S_TCP_HDR(&(ts)->s) +#define S_TCP_HDR(s) ((ci_tcp_hdr*) (&(s)->pkt.ipx.ip4 + 1)) +#define TS_TCP(ts) S_TCP_HDR(&(ts)->s) #if CI_CFG_IPV6 #define S_IP6_TCP_HDR(s) ((ci_tcp_hdr*) (&(s)->pkt.ipx.ip6 + 1)) -#define TS_IP6_TCP(ts) S_IP6_TCP_HDR(&(ts)->s) +#define TS_IP6_TCP(ts) S_IP6_TCP_HDR(&(ts)->s) #endif #if CI_CFG_IPV6 -#define S_IPX_TCP_HDR(s) ((ipcache_is_ipv6(&(s)->pkt)) ? \ - S_IP6_TCP_HDR(s) : S_TCP_HDR(s)) -#define TS_IPX_TCP(ts) ((ipcache_is_ipv6(&(ts)->s.pkt)) ? \ - TS_IP6_TCP(ts) : TS_TCP(ts)) +#define S_IPX_TCP_HDR(s) \ + ((ipcache_is_ipv6(&(s)->pkt)) ? S_IP6_TCP_HDR(s) : S_TCP_HDR(s)) +#define TS_IPX_TCP(ts) \ + ((ipcache_is_ipv6(&(ts)->s.pkt)) ? TS_IP6_TCP(ts) : TS_TCP(ts)) #else #define S_IPX_TCP_HDR(s) S_TCP_HDR(s) -#define TS_IPX_TCP(ts) TS_TCP(ts) +#define TS_IPX_TCP(ts) TS_TCP(ts) #endif /** Macro that initialises RX queue offset */ -#define TS_QUEUE_RX_SET(ts, name) \ +#define TS_QUEUE_RX_SET(ts, name) \ ((ts)->recv_off = CI_MEMBER_OFFSET(ci_tcp_state, name)) /** Get active RX queue (fast/slow as appropriate) from TCP state */ -#define TS_QUEUE_RX(ts) \ - ((ci_ip_pkt_queue*) \ - ((ci_uintptr_t) (ts) + (ts)->recv_off)) +#define TS_QUEUE_RX(ts) \ + ((ci_ip_pkt_queue*) ((ci_uintptr_t) (ts) + (ts)->recv_off)) /** Offset of TS within netif state. */ -#define TS_OFF(ni, ts) oo_sockp_to_statep((ni),S_SP(ts)) -#define TS_MEMBER_OFF(ni, ts, member) ((ci_uint32)((char *)&(member) \ - - (char*)(ts) \ - + TS_OFF((ni),(ts)))) -#define TS_FMT "%d(%u)" -#define TS_ARG(ni,ts) (S_SP(ts)), (unsigned) TS_OFF((ni),(ts)) - -#define TCP_STATE_FROM_LINK(lnk) \ +#define TS_OFF(ni, ts) oo_sockp_to_statep((ni), S_SP(ts)) +#define TS_MEMBER_OFF(ni, ts, member) \ + ((ci_uint32) ((char*) &(member) - (char*) (ts) + TS_OFF((ni), (ts)))) +#define TS_FMT "%d(%u)" +#define TS_ARG(ni, ts) (S_SP(ts)), (unsigned) TS_OFF((ni), (ts)) + +#define TCP_STATE_FROM_LINK(lnk) \ CI_CONTAINER(ci_tcp_state, timeout_q_link, (lnk)) /* Macros for controlling delayed ACK state */ -#define TCP_FORCE_ACK(ts) ((ts)->acks_pending |= CI_TCP_ACK_FORCED_FLAG) -#define TCP_NEED_ACK(ts) (++(ts)->acks_pending) -#define TCP_ACK_FORCED(ts) ((ts)->acks_pending & CI_TCP_ACK_FORCED_FLAG) +#define TCP_FORCE_ACK(ts) ((ts)->acks_pending |= CI_TCP_ACK_FORCED_FLAG) +#define TCP_NEED_ACK(ts) (++(ts)->acks_pending) +#define TCP_ACK_FORCED(ts) ((ts)->acks_pending & CI_TCP_ACK_FORCED_FLAG) /* macros for getting source and dest addresses and ports */ #if CI_CFG_IPV6 -#define ipcache_ttl(ipcache) (*(ipcache_is_ipv6(ipcache) ? \ - &(ipcache)->ipx.ip6.hop_limit : &(ipcache)->ipx.ip4.ip_ttl)) +#define ipcache_ttl(ipcache) \ + (*(ipcache_is_ipv6(ipcache) ? &(ipcache)->ipx.ip6.hop_limit \ + : &(ipcache)->ipx.ip4.ip_ttl)) #else #define ipcache_ttl(ipcache) ((ipcache)->ipx.ip4.ip_ttl) #endif #if CI_CFG_IPV6 -#define ipcache_protocol(ipcache) (*(ipcache_is_ipv6(ipcache) ? \ - &(ipcache)->ipx.ip6.next_hdr : &(ipcache)->ipx.ip4.ip_protocol)) +#define ipcache_protocol(ipcache) \ + (*(ipcache_is_ipv6(ipcache) ? &(ipcache)->ipx.ip6.next_hdr \ + : &(ipcache)->ipx.ip4.ip_protocol)) #else #define ipcache_protocol(ipcache) ((ipcache)->ipx.ip4.ip_protocol) #endif @@ -1331,38 +1287,38 @@ extern int ci_pipe_list_to_iovec(ci_netif* ni, struct oo_pipe* p, #endif #if CI_CFG_IPV6 +#define ipcache_lport_be16(ipcache) \ + ((ipcache_is_ipv6(ipcache) ? ((ci_uint16*) (&(ipcache)->ipx.ip6 + 1)) \ + : ((ci_uint16*) (&(ipcache)->ipx.ip4 + 1)))[0]) +#define ipcache_rport_be16(ipcache) \ + ((ipcache_is_ipv6(ipcache) ? ((ci_uint16*) (&(ipcache)->ipx.ip6 + 1)) \ + : ((ci_uint16*) (&(ipcache)->ipx.ip4 + 1)))[1]) +#else #define ipcache_lport_be16(ipcache) \ - ((ipcache_is_ipv6(ipcache) ? \ - ((ci_uint16*) (&(ipcache)->ipx.ip6 + 1)) : \ - ((ci_uint16*) (&(ipcache)->ipx.ip4 + 1)) )[0]) + (((ci_uint16*) (&(ipcache)->ipx.ip4 + 1))[0]) #define ipcache_rport_be16(ipcache) \ - ((ipcache_is_ipv6(ipcache) ? \ - ((ci_uint16*) (&(ipcache)->ipx.ip6 + 1)) : \ - ((ci_uint16*) (&(ipcache)->ipx.ip4 + 1)) )[1]) -#else -#define ipcache_lport_be16(ipcache) (((ci_uint16*) (&(ipcache)->ipx.ip4 + 1))[0]) -#define ipcache_rport_be16(ipcache) (((ci_uint16*) (&(ipcache)->ipx.ip4 + 1))[1]) + (((ci_uint16*) (&(ipcache)->ipx.ip4 + 1))[1]) /* NB. Above two assume no IP options (which is true for now). */ #endif #define sock_lport_be16(s) ipcache_lport_be16(&(s)->pkt) #define sock_rport_be16(s) ipcache_rport_be16(&(s)->pkt) -#define sock_protocol(s) ipcache_protocol(&(s)->pkt) +#define sock_protocol(s) ipcache_protocol(&(s)->pkt) #define sock_tos_tclass(af, cp) \ - ( WITH_CI_CFG_IPV6( IS_AF_INET6(af) ? (cp)->tclass : ) (cp)->ip_tos ) + (WITH_CI_CFG_IPV6(IS_AF_INET6(af) ? (cp)->tclass :)(cp)->ip_tos) #define sock_cp_ttl_hoplimit(af, cp) \ - ( WITH_CI_CFG_IPV6( IS_AF_INET6(af) ? (cp)->hop_limit : ) (cp)->ip_ttl ) + (WITH_CI_CFG_IPV6(IS_AF_INET6(af) ? (cp)->hop_limit :)(cp)->ip_ttl) #if CI_CFG_IPV6 ci_inline int sock_af_space(ci_sock_cmn* s) { /* Fixme: do we want to cache sock_af_space() somewhere in the socket * state? */ - if( !CI_IS_ADDR_IP6(s->laddr) ) + if( ! CI_IS_ADDR_IP6(s->laddr) ) return AF_SPACE_FLAG_IP4; /* IPv6: are we bound to a specific IPv6 address? */ - if( !CI_IPX_ADDR_IS_ANY(s->laddr) ) + if( ! CI_IPX_ADDR_IS_ANY(s->laddr) ) return AF_SPACE_FLAG_IP6; /* Bound to :::. Is V6ONLY set? */ @@ -1372,47 +1328,45 @@ ci_inline int sock_af_space(ci_sock_cmn* s) return AF_SPACE_FLAG_IP6 | AF_SPACE_FLAG_IP4; } #else -#define sock_af_space(s) AF_SPACE_FLAG_IP4 +#define sock_af_space(s) AF_SPACE_FLAG_IP4 #endif -#define tcp_laddr_be32(ts) sock_laddr_be32(&(ts)->s) -#define tcp_raddr_be32(ts) sock_raddr_be32(&(ts)->s) +#define tcp_laddr_be32(ts) sock_laddr_be32(&(ts)->s) +#define tcp_raddr_be32(ts) sock_raddr_be32(&(ts)->s) #if CI_CFG_IPV6 -#define tcp_ip6_laddr(ts) sock_ip6_laddr(&(ts)->s) -#define tcp_ip6_raddr(ts) sock_ip6_raddr(&(ts)->s) +#define tcp_ip6_laddr(ts) sock_ip6_laddr(&(ts)->s) +#define tcp_ip6_raddr(ts) sock_ip6_raddr(&(ts)->s) #endif -#define tcp_ipx_laddr(ts) sock_ipx_laddr(&(ts)->s) -#define tcp_ipx_raddr(ts) sock_ipx_raddr(&(ts)->s) +#define tcp_ipx_laddr(ts) sock_ipx_laddr(&(ts)->s) +#define tcp_ipx_raddr(ts) sock_ipx_raddr(&(ts)->s) -#define tcp_protocol(ts) sock_protocol(&(ts)->s) -#define tcp_lport_be16(ts) sock_lport_be16(&(ts)->s) -#define tcp_rport_be16(ts) sock_rport_be16(&(ts)->s) +#define tcp_protocol(ts) sock_protocol(&(ts)->s) +#define tcp_lport_be16(ts) sock_lport_be16(&(ts)->s) +#define tcp_rport_be16(ts) sock_rport_be16(&(ts)->s) /* Enable / disable the TCP fast path. */ -# define ci_tcp_can_use_fast_path(ts) \ - ((~ts->s.b.state & CI_TCP_STATE_SLOW_PATH) && \ - ci_ip_queue_is_empty(&(ts)->rob) && \ - tcp_rx_urg_fast_path(ts) && \ - tcp_rcv_wnd_advertised(ts) ) +#define ci_tcp_can_use_fast_path(ts) \ + ((~ts->s.b.state & CI_TCP_STATE_SLOW_PATH) && \ + ci_ip_queue_is_empty(&(ts)->rob) && tcp_rx_urg_fast_path(ts) && \ + tcp_rcv_wnd_advertised(ts)) /* is state in CI_TCP_STATE_TIMEOUT_ORPHAN and orphaned - * if so we timeout */ #if CI_CFG_FD_CACHING -#define ci_tcp_is_timeout_orphan(ts) \ - (((ts)->s.b.state & CI_TCP_STATE_TIMEOUT_ORPHAN) && \ - (((ts)->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN) | \ - ((ts)->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE))) +#define ci_tcp_is_timeout_orphan(ts) \ + (((ts)->s.b.state & CI_TCP_STATE_TIMEOUT_ORPHAN) && \ + (((ts)->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN) | \ + ((ts)->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE))) #else -#define ci_tcp_is_timeout_orphan(ts) \ - (((ts)->s.b.state & CI_TCP_STATE_TIMEOUT_ORPHAN) && \ - ((ts)->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)) +#define ci_tcp_is_timeout_orphan(ts) \ + (((ts)->s.b.state & CI_TCP_STATE_TIMEOUT_ORPHAN) && \ + ((ts)->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)) #endif -static inline bool -ci_tcp_is_pluginized(ci_tcp_state* ts) +static inline bool ci_tcp_is_pluginized(ci_tcp_state* ts) { #if CI_CFG_TCP_OFFLOAD_RECYCLER return (ts->s.s_flags & CI_SOCK_FLAG_TCP_OFFLOAD) != 0; @@ -1440,30 +1394,31 @@ extern ci_tcp_state* ci_tcp_get_state_buf(ci_netif*) CI_HF; extern ci_tcp_state* ci_tcp_get_state_buf_from_cache(ci_netif*, int pid) CI_HF; #endif extern ci_udp_state* ci_udp_get_state_buf(ci_netif*) CI_HF; -extern void ci_tcp_state_init(ci_netif* netif, ci_tcp_state* ts, - int from_cache) CI_HF; -extern void ci_tcp_state_tcb_reinit_minimal(ci_netif* netif, - ci_tcp_state* ts) CI_HF; +extern void ci_tcp_state_init( + ci_netif* netif, ci_tcp_state* ts, int from_cache) CI_HF; +extern void ci_tcp_state_tcb_reinit_minimal( + ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_state_reinit(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_init_rcv_wnd(ci_tcp_state*, const char* caller) CI_HF; extern void ci_tcp_drop(ci_netif*, ci_tcp_state*, int so_error) CI_HF; extern void ci_tcp_drop_rob(ci_netif*, ci_tcp_state*) CI_HF; -extern int ci_tcp_try_to_free_pkts(ci_netif* ni, ci_tcp_state* ts, - int desperation) CI_HF; +extern int ci_tcp_try_to_free_pkts( + ci_netif* ni, ci_tcp_state* ts, int desperation) CI_HF; extern void ci_tcp_state_free(ci_netif* ni, ci_tcp_state* ts) CI_HF; extern void ci_tcp_state_free_to_cache(ci_netif* ni, ci_tcp_state* ts) CI_HF; #if OO_DO_STACK_POLL -extern void ci_tcp_listen_all_fds_gone(ci_netif*, ci_tcp_socket_listen*, - int do_free) CI_HF; -extern void ci_tcp_all_fds_gone(ci_netif* netif, ci_tcp_state*, - int do_free) CI_HF; +extern void ci_tcp_listen_all_fds_gone( + ci_netif*, ci_tcp_socket_listen*, int do_free) CI_HF; +extern void ci_tcp_all_fds_gone( + ci_netif* netif, ci_tcp_state*, int do_free) CI_HF; #endif extern void ci_tcp_all_fds_gone_common(ci_netif* netif, ci_tcp_state*) CI_HF; extern void ci_tcp_rx_reap_rxq_bufs(ci_netif* netif, ci_tcp_state* ts) CI_HF; -extern void ci_tcp_rx_reap_rxq_last_buf(ci_netif* netif, ci_tcp_state* ts) CI_HF; +extern void ci_tcp_rx_reap_rxq_last_buf( + ci_netif* netif, ci_tcp_state* ts) CI_HF; -static inline void -ci_tcp_rx_reap_rxq_bufs_socklocked(ci_netif* netif, ci_tcp_state* ts) +static inline void ci_tcp_rx_reap_rxq_bufs_socklocked( + ci_netif* netif, ci_tcp_state* ts) { ci_tcp_rx_reap_rxq_bufs(netif, ts); ci_assert(OO_PP_EQ(ts->recv1.head, ts->recv1_extract)); @@ -1471,19 +1426,17 @@ ci_tcp_rx_reap_rxq_bufs_socklocked(ci_netif* netif, ci_tcp_state* ts) ci_tcp_rx_reap_rxq_last_buf(netif, ts); } -extern void ci_tcp_state_dump(ci_netif*, ci_tcp_state*, const char *pf, - oo_dump_log_fn_t logger, void* log_arg) CI_HF; +extern void ci_tcp_state_dump(ci_netif*, ci_tcp_state*, const char* pf, + oo_dump_log_fn_t logger, void* log_arg) CI_HF; extern void ci_tcp_state_dump_id(ci_netif* ni, int ep_id) CI_HF; extern void ci_tcp_state_dump_qs(ci_netif*, int ep_id, int hex_dump) CI_HF; extern void ci_tcp_state_dump_rob(ci_netif* ni, ci_tcp_state* ts) CI_HF; extern void ci_tcp_state_dump_retrans_blocks(ci_netif*, ci_tcp_state*) CI_HF; extern void ci_tcp_state_dump_retrans(ci_netif* ni, ci_tcp_state* ts) CI_HF; -extern void ci_tcp_pkt_dump(ci_netif* ni, ci_ip_pkt_fmt* pkt, int is_recv, - int dump) CI_HF; +extern void ci_tcp_pkt_dump( + ci_netif* ni, ci_ip_pkt_fmt* pkt, int is_recv, int dump) CI_HF; extern void ci_tcp_socket_listen_dump(ci_netif*, ci_tcp_socket_listen*, - const char* pf, - oo_dump_log_fn_t logger, - void* log_arg) CI_HF; + const char* pf, oo_dump_log_fn_t logger, void* log_arg) CI_HF; /* If all the packets have size of amss, the maximum number of packets is * (ts)->s.so.rcvbuf / (ts)->amss. We allow the sum of receive queue to @@ -1498,43 +1451,37 @@ extern void ci_tcp_socket_listen_dump(ci_netif*, ci_tcp_socket_listen*, static inline int ci_tcp_rcvbuf_abused(ci_netif* ni, ci_tcp_state* ts) { return NI_OPTS(ni).tcp_rcvbuf_strict && - ts->recv1.num + ts->recv2.num + ts->rob.num > - (ts->s.so.rcvbuf + ts->rcv_window_max) / ts->amss; + ts->recv1.num + ts->recv2.num + ts->rob.num > + (ts->s.so.rcvbuf + ts->rcv_window_max) / ts->amss; } -extern void ci_tcp_rcvbuf_unabuse(ci_netif* ni, ci_tcp_state* ts, - int sock_already_locked) CI_HF; +extern void ci_tcp_rcvbuf_unabuse( + ci_netif* ni, ci_tcp_state* ts, int sock_already_locked) CI_HF; -extern void -ci_tcp_syncookie_syn(ci_netif* netif, ci_tcp_socket_listen* tls, - ci_tcp_state_synrecv* tsr); -extern void -ci_tcp_syncookie_ack(ci_netif* netif, ci_tcp_socket_listen* tls, - ciip_tcp_rx_pkt* rxp, - ci_tcp_state_synrecv **tsr_p); +extern void ci_tcp_syncookie_syn( + ci_netif* netif, ci_tcp_socket_listen* tls, ci_tcp_state_synrecv* tsr); +extern void ci_tcp_syncookie_ack(ci_netif* netif, ci_tcp_socket_listen* tls, + ciip_tcp_rx_pkt* rxp, ci_tcp_state_synrecv** tsr_p); extern void ci_tcp_set_sndbuf(ci_netif* ni, ci_tcp_state* ts); extern void ci_tcp_set_sndbuf_from_sndbuf_pkts(ci_netif* ni, ci_tcp_state* ts); -extern int -ci_tcp_use_mac_filter_listen(ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex); +extern int ci_tcp_use_mac_filter_listen( + ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex); #ifndef __KERNEL__ -extern int -ci_tcp_can_set_filter_in_ul(ci_netif *ni, ci_sock_cmn* s); +extern int ci_tcp_can_set_filter_in_ul(ci_netif* ni, ci_sock_cmn* s); #endif -extern int -ci_tcp_sock_set_stack_filter(ci_netif *ni, ci_sock_cmn* s); +extern int ci_tcp_sock_set_stack_filter(ci_netif* ni, ci_sock_cmn* s); -extern void -ci_tcp_sock_clear_stack_filter(ci_netif *ni, ci_tcp_state* ts); +extern void ci_tcp_sock_clear_stack_filter(ci_netif* ni, ci_tcp_state* ts); #if CI_CFG_FD_CACHING extern int /*bool*/ ci_tcp_is_cacheable_active_wild_sharer(ci_sock_cmn*); #endif -extern int -ci_tcp_offload_get_stream_id(ci_netif* ni, ci_tcp_state* ts, int intf_i); +extern int ci_tcp_offload_get_stream_id( + ci_netif* ni, ci_tcp_state* ts, int intf_i); extern void ci_tcp_prev_seq_remember(ci_netif*, ci_tcp_state*); extern ci_uint32 ci_tcp_prev_seq_lookup(ci_netif*, const ci_tcp_state*); @@ -1544,17 +1491,16 @@ extern ci_uint32 ci_tcp_prev_seq_lookup(ci_netif*, const ci_tcp_state*); **********************************************************************/ #if OO_DO_STACK_POLL -extern void ci_pipe_all_fds_gone(ci_netif* netif, struct oo_pipe* p, - int do_free); +extern void ci_pipe_all_fds_gone( + ci_netif* netif, struct oo_pipe* p, int do_free); #endif /********************************************************************** *************************** ACTIVE WILD ******************************* **********************************************************************/ -ci_inline void -ci_addr_simple_hash(ci_addr_t addr, ci_uint32 entries, - ci_uint32* hash1_out, ci_uint32* hash2_out) +ci_inline void ci_addr_simple_hash(ci_addr_t addr, ci_uint32 entries, + ci_uint32* hash1_out, ci_uint32* hash2_out) { /* Convert address to uint32. Without IPv6 there is no conversion. */ ci_uint32 hash0 = 0; @@ -1562,26 +1508,25 @@ ci_addr_simple_hash(ci_addr_t addr, ci_uint32 entries, if( CI_IS_ADDR_IP6(addr) ) { int i; - for( i = 0; i < sizeof(ci_addr_t) / 4; i++) { + for( i = 0; i < sizeof(ci_addr_t) / 4; i++ ) { hash0 ^= addr.u32[i]; } - } - else + } else #endif hash0 = addr.ip4; - *hash2_out = (hash0 | 1) & (entries - 1); + *hash2_out = (hash0 | 1) & (entries - 1); /* Spread the entropy (such as it is) from the higher-order bits of the * address down a bit. */ hash0 = CI_BSWAP_BE32(hash0); hash0 = hash0 ^ (hash0 >> 8); - *hash1_out = hash0 & (entries - 1); + *hash1_out = hash0 & (entries - 1); } extern ci_active_wild* ci_active_wild_get_state_buf(ci_netif* netif); -extern void ci_active_wild_all_fds_gone(ci_netif* ni, ci_active_wild* aw, - int do_free); +extern void ci_active_wild_all_fds_gone( + ci_netif* ni, ci_active_wild* aw, int do_free); /********************************************************************* ************************** citp_waitable_obj ************************* @@ -1598,229 +1543,198 @@ extern void citp_waitable_obj_free_to_cache(ci_netif*, citp_waitable*) CI_HF; #endif #if OO_DO_STACK_POLL extern void citp_waitable_all_fds_gone(ci_netif*, oo_sp) CI_HF; -extern void citp_waitable_cleanup(ci_netif* ni, citp_waitable_obj* wo, - int do_free); +extern void citp_waitable_cleanup( + ci_netif* ni, citp_waitable_obj* wo, int do_free); #endif extern const char* citp_waitable_type_str(citp_waitable* w) CI_HF; extern void citp_waitable_dump(ci_netif*, citp_waitable*, const char*) CI_HF; extern void citp_waitable_dump_to_logger(ci_netif* ni, citp_waitable* w, - const char* pf, oo_dump_log_fn_t logger, - void* log_arg) CI_HF; -extern void citp_waitable_print_to_logger(ci_netif*, citp_waitable*, - oo_dump_log_fn_t logger, - void* log_arg) CI_HF; -extern void -ci_tcp_listenq_print_to_logger(ci_netif* ni, ci_tcp_socket_listen* tls, - oo_dump_log_fn_t logger, void *log_arg); + const char* pf, oo_dump_log_fn_t logger, void* log_arg) CI_HF; +extern void citp_waitable_print_to_logger( + ci_netif*, citp_waitable*, oo_dump_log_fn_t logger, void* log_arg) CI_HF; +extern void ci_tcp_listenq_print_to_logger(ci_netif* ni, + ci_tcp_socket_listen* tls, oo_dump_log_fn_t logger, void* log_arg); /********************************************************************* *********************************************************************/ -extern void ci_tcp_listenq_insert(ci_netif*, ci_tcp_socket_listen*, - ci_tcp_state_synrecv*) CI_HF; -extern void ci_tcp_listenq_remove(ci_netif*, ci_tcp_socket_listen*, - ci_tcp_state_synrecv*) CI_HF; -extern void ci_tcp_listenq_drop(ci_netif*, ci_tcp_socket_listen*, - ci_tcp_state_synrecv*) CI_HF; -extern ci_tcp_state_synrecv* ci_tcp_listenq_lookup(ci_netif* netif, - ci_tcp_socket_listen* tls, - ciip_tcp_rx_pkt*) CI_HF; +extern void ci_tcp_listenq_insert( + ci_netif*, ci_tcp_socket_listen*, ci_tcp_state_synrecv*) CI_HF; +extern void ci_tcp_listenq_remove( + ci_netif*, ci_tcp_socket_listen*, ci_tcp_state_synrecv*) CI_HF; +extern void ci_tcp_listenq_drop( + ci_netif*, ci_tcp_socket_listen*, ci_tcp_state_synrecv*) CI_HF; +extern ci_tcp_state_synrecv* ci_tcp_listenq_lookup( + ci_netif* netif, ci_tcp_socket_listen* tls, ciip_tcp_rx_pkt*) CI_HF; extern void ci_tcp_listenq_drop_oldest(ci_netif*, ci_tcp_socket_listen*) CI_HF; extern int ci_tcp_listenq_drop_all(ci_netif*, ci_tcp_socket_listen*) CI_HF; extern int ci_tcp_listenq_try_promote(ci_netif*, ci_tcp_socket_listen*, - ci_tcp_state_synrecv*, - ci_ip_cached_hdrs*, - ci_ip_pkt_fmt*, - ci_tcp_state**) CI_HF; + ci_tcp_state_synrecv*, ci_ip_cached_hdrs*, ci_ip_pkt_fmt*, + ci_tcp_state**) CI_HF; extern const char* ci_tcp_state_num_str(int state) CI_HF; -#define ci_tcp_state_str(state) ci_tcp_state_num_str(CI_TCP_STATE_NUM(state)) -#define state_str(ts) ci_tcp_state_str((ts)->s.b.state) +#define ci_tcp_state_str(state) ci_tcp_state_num_str(CI_TCP_STATE_NUM(state)) +#define state_str(ts) ci_tcp_state_str((ts)->s.b.state) #ifndef NDEBUG -extern void ci_tcp_state_verify_no_timers(ci_netif *ni, ci_tcp_state *ts); +extern void ci_tcp_state_verify_no_timers(ci_netif* ni, ci_tcp_state* ts); #else -#define ci_tcp_state_verify_no_timers(ni,ts) +#define ci_tcp_state_verify_no_timers(ni, ts) #endif extern const char* ci_tcp_congstate_str(unsigned state) CI_HF; -#define congstate_str(ts) ci_tcp_congstate_str((ts)->congstate) +#define congstate_str(ts) ci_tcp_congstate_str((ts)->congstate) extern void ci_tcp_handle_rx(ci_netif*, struct ci_netif_poll_state*, - ci_ip_pkt_fmt*, ci_tcp_hdr*, int ip_paylen) CI_HF; -extern void ci_tcp_rx_deliver2(ci_tcp_state*,ci_netif*,ciip_tcp_rx_pkt*) CI_HF; -extern void ci_tcp_rx_plugin_meta(ci_netif*, struct ci_netif_poll_state*, - ci_ip_pkt_fmt* pkt) CI_HF; + ci_ip_pkt_fmt*, ci_tcp_hdr*, int ip_paylen) CI_HF; +extern void ci_tcp_rx_deliver2( + ci_tcp_state*, ci_netif*, ciip_tcp_rx_pkt*) CI_HF; +extern void ci_tcp_rx_plugin_meta( + ci_netif*, struct ci_netif_poll_state*, ci_ip_pkt_fmt* pkt) CI_HF; extern void ci_tcp_tx_change_mss(ci_netif*, ci_tcp_state*) CI_HF; -extern void ci_tcp_enqueue_no_data(ci_tcp_state* ts, ci_netif* netif, - ci_ip_pkt_fmt* pkt) CI_HF; +extern void ci_tcp_enqueue_no_data( + ci_tcp_state* ts, ci_netif* netif, ci_ip_pkt_fmt* pkt) CI_HF; extern int ci_tcp_send_sim_synack(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern int ci_tcp_synrecv_send(ci_netif* netif, ci_tcp_socket_listen* tls, - ci_tcp_state_synrecv* tsr, - ci_ip_pkt_fmt* pkt, ci_uint8 tcp_flags, - ci_ip_cached_hdrs* ipcache_opt) CI_HF; + ci_tcp_state_synrecv* tsr, ci_ip_pkt_fmt* pkt, ci_uint8 tcp_flags, + ci_ip_cached_hdrs* ipcache_opt) CI_HF; extern int ci_tcp_unsacked_segments_in_flight(ci_netif*, ci_tcp_state*) CI_HF; -extern int ci_tcp_retrans_one(ci_tcp_state* ts, ci_netif* netif, - ci_ip_pkt_fmt* pkt) CI_HF; +extern int ci_tcp_retrans_one( + ci_tcp_state* ts, ci_netif* netif, ci_ip_pkt_fmt* pkt) CI_HF; extern int ci_tcp_retrans(ci_netif* ni, ci_tcp_state* ts, int seq_limit, - int before_sacked_only, int* seq_used) CI_HF; -extern void ci_tcp_retrans_recover(ci_netif* ni, ci_tcp_state* ts, - int force_retrans_first) CI_HF; + int before_sacked_only, int* seq_used) CI_HF; +extern void ci_tcp_retrans_recover( + ci_netif* ni, ci_tcp_state* ts, int force_retrans_first) CI_HF; extern int /*bool*/ ci_tcp_maybe_enter_fast_recovery(ci_netif* ni, ci_tcp_state* ts) CI_HF; extern void ci_tcp_recovered(ci_netif* ni, ci_tcp_state* ts) CI_HF; extern void ci_tcp_clear_sacks(ci_netif* ni, ci_tcp_state* ts) CI_HF; -extern void ci_tcp_retrans_init_ptrs(ci_netif* ni, ci_tcp_state* ts, - unsigned* recover_seq_out) CI_HF; -extern void ci_tcp_get_fack(ci_netif* ni, ci_tcp_state* ts, - unsigned* fack_out, int* retrans_data_out) CI_HF; +extern void ci_tcp_retrans_init_ptrs( + ci_netif* ni, ci_tcp_state* ts, unsigned* recover_seq_out) CI_HF; +extern void ci_tcp_get_fack(ci_netif* ni, ci_tcp_state* ts, unsigned* fack_out, + int* retrans_data_out) CI_HF; -extern void ci_tcp_retrans_coalesce_block(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt* pkt) CI_HF; +extern void ci_tcp_retrans_coalesce_block( + ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* pkt) CI_HF; extern int ci_tcp_tx_coalesce(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_queue* q, ci_ip_pkt_fmt* pkt, - ci_boolean_t is_sendq) CI_HF; + ci_ip_pkt_queue* q, ci_ip_pkt_fmt* pkt, ci_boolean_t is_sendq) CI_HF; extern void ci_tcp_tx_insert_option_space(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt* pkt, int hdrlen, - int extra_opts) CI_HF; + ci_ip_pkt_fmt* pkt, int hdrlen, int extra_opts) CI_HF; extern int ci_tcp_tx_split(ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_queue* qu, - ci_ip_pkt_fmt* pkt, int new_paylen, - ci_boolean_t is_sendq) CI_HF; + ci_ip_pkt_fmt* pkt, int new_paylen, ci_boolean_t is_sendq) CI_HF; extern void ci_tcp_tx_advance(ci_tcp_state* ts, ci_netif* netif) CI_HF; extern void ci_tcp_tx_advance_to(ci_netif* ni, ci_tcp_state* ts, - unsigned right_edge, ci_uint32* p_stop_cntr) CI_HF; -extern void ci_tcp_send_rst_with_flags(ci_netif*, ci_tcp_state*, - ci_uint8 extra_flags) CI_HF; + unsigned right_edge, ci_uint32* p_stop_cntr) CI_HF; +extern void ci_tcp_send_rst_with_flags( + ci_netif*, ci_tcp_state*, ci_uint8 extra_flags) CI_HF; extern void ci_tcp_send_rst(ci_netif* netif, ci_tcp_state* ts) CI_HF; -extern void -ci_tcp_reply_with_rst(ci_netif* netif, const struct oo_sock_cplane* sock_cp, - ciip_tcp_rx_pkt* rxp) CI_HF; -extern int ci_tcp_reset_untrusted(ci_netif *netif, ci_tcp_state *ts) CI_HF; +extern void ci_tcp_reply_with_rst(ci_netif* netif, + const struct oo_sock_cplane* sock_cp, ciip_tcp_rx_pkt* rxp) CI_HF; +extern int ci_tcp_reset_untrusted(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_send_zwin_probe(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_set_established_state(ci_netif*, ci_tcp_state*) CI_HF; extern void ci_tcp_expand_sndbuf(ci_netif*, ci_tcp_state*) CI_HF; extern bool ci_tcp_should_expand_sndbuf(ci_netif*, ci_tcp_state*) CI_HF; -extern void ci_tcp_moderate_sndbuf(ci_netif* , ci_tcp_state*) CI_HF; +extern void ci_tcp_moderate_sndbuf(ci_netif*, ci_tcp_state*) CI_HF; extern void ci_tcp_set_slow_state(ci_netif*, ci_tcp_state*, int state) CI_HF; -extern int ci_tcp_parse_options(ci_netif*, ciip_tcp_rx_pkt*, - ci_tcp_options*) CI_HF; +extern int ci_tcp_parse_options( + ci_netif*, ciip_tcp_rx_pkt*, ci_tcp_options*) CI_HF; -extern void ci_ipx_hdr_init_fixed(ci_ipx_hdr_t* ip, int af, int protocol, - int ttl, unsigned tos) CI_HF; +extern void ci_ipx_hdr_init_fixed( + ci_ipx_hdr_t* ip, int af, int protocol, int ttl, unsigned tos) CI_HF; extern void ci_tcp_send_ack_rx(ci_netif*, ci_tcp_state*, ci_ip_pkt_fmt*, - int sock_locked, int update_wnd) CI_HF; -ci_inline void ci_tcp_send_ack(ci_netif* netif, ci_tcp_state* ts, - ci_ip_pkt_fmt* pkt, int sock_locked) + int sock_locked, int update_wnd) CI_HF; +ci_inline void ci_tcp_send_ack( + ci_netif* netif, ci_tcp_state* ts, ci_ip_pkt_fmt* pkt, int sock_locked) { ci_tcp_send_ack_rx(netif, ts, pkt, sock_locked, 1); } -extern int ci_tcp_send_challenge_ack(ci_netif*, ci_tcp_state*, - ci_ip_pkt_fmt*) CI_HF; -extern int/*bool*/ +extern int ci_tcp_send_challenge_ack( + ci_netif*, ci_tcp_state*, ci_ip_pkt_fmt*) CI_HF; +extern int /*bool*/ ci_tcp_may_send_ack_ratelimited(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_send_ack_loopback(ci_netif* netif, ci_tcp_state* ts) CI_HF; -extern int ci_tcp_send_wnd_update(ci_netif*, ci_tcp_state*, - int sock_locked) CI_HF; +extern int ci_tcp_send_wnd_update( + ci_netif*, ci_tcp_state*, int sock_locked) CI_HF; /* TCP/UDP filter insertion */ extern void ci_netif_filter_init(ci_netif* ni, int size_lg2) CI_HF; #if CI_CFG_IPV6 -void ci_ip6_netif_filter_init(ci_ip6_netif_filter_table* tbl, - int size_lg2) CI_HF; +void ci_ip6_netif_filter_init( + ci_ip6_netif_filter_table* tbl, int size_lg2) CI_HF; #endif -extern ci_sock_cmn* -__ci_netif_filter_lookup(ci_netif* netif, int af_space, - ci_addr_t daddr, unsigned dport, - ci_addr_t saddr, unsigned sport, - unsigned prot) CI_HF; +extern ci_sock_cmn* __ci_netif_filter_lookup(ci_netif* netif, int af_space, + ci_addr_t daddr, unsigned dport, ci_addr_t saddr, unsigned sport, + unsigned prot) CI_HF; #if CI_CFG_IPV6 -extern int -ci_ip6_netif_filter_lookup(ci_netif* netif, ci_addr_t laddr, unsigned lport, - ci_addr_t raddr, unsigned rport, unsigned prot) CI_HF; -extern int -__ci_ip6_netif_filter_lookup(ci_netif* netif, ci_addr_t laddr, unsigned lport, - ci_addr_t raddr, unsigned rport, unsigned prot) CI_HF; -#endif -extern oo_sp -ci_netif_filter_lookup(ci_netif* netif, int af_space, - ci_addr_t laddr, unsigned lport, - ci_addr_t raddr, unsigned rport, - unsigned protocol); +extern int ci_ip6_netif_filter_lookup(ci_netif* netif, ci_addr_t laddr, + unsigned lport, ci_addr_t raddr, unsigned rport, unsigned prot) CI_HF; +extern int __ci_ip6_netif_filter_lookup(ci_netif* netif, ci_addr_t laddr, + unsigned lport, ci_addr_t raddr, unsigned rport, unsigned prot) CI_HF; +#endif +extern oo_sp ci_netif_filter_lookup(ci_netif* netif, int af_space, + ci_addr_t laddr, unsigned lport, ci_addr_t raddr, unsigned rport, + unsigned protocol); /* Returns socket index, or OO_SP_NULL if lookup failed. */ -extern oo_sp -ci_netif_listener_lookup(ci_netif* netif, int af_space, - ci_addr_t laddr, unsigned lport) CI_HF; +extern oo_sp ci_netif_listener_lookup( + ci_netif* netif, int af_space, ci_addr_t laddr, unsigned lport) CI_HF; /* Invokes the callback on each socket that matches the supplied addressing * fields. If the callback returns non-zero, then the search is * terminated. * Returns 1 if the search was terminated, 0 otherwise. */ -extern int -ci_netif_filter_for_each_match(ci_netif*, unsigned laddr, unsigned lport, - unsigned raddr, unsigned rport, - unsigned protocol, int intf_i, int vlan, - int (*callback)(ci_sock_cmn*, void*), - void* callback_arg, ci_uint32* hash_out) CI_HF; +extern int ci_netif_filter_for_each_match(ci_netif*, unsigned laddr, + unsigned lport, unsigned raddr, unsigned rport, unsigned protocol, + int intf_i, int vlan, int (*callback)(ci_sock_cmn*, void*), + void* callback_arg, ci_uint32* hash_out) CI_HF; #if CI_CFG_IPV6 -extern int -ci_netif_filter_for_each_match_ip6(ci_netif* ni, - const ci_addr_t* laddr, unsigned lport, - const ci_addr_t* raddr, unsigned rport, - unsigned protocol, int intf_i, int vlan, - int (*callback)(ci_sock_cmn*, void*), - void* callback_arg, ci_uint32* hash_out) CI_HF; -#endif - -extern ci_uint32 -ci_netif_filter_hash(ci_netif* ni, ci_addr_t laddr, unsigned lport, - ci_addr_t raddr, unsigned rport, - unsigned protocol) CI_HF; - -extern int -ci_netif_filter_insert(ci_netif* netif, oo_sp sock_id, int af_space, - const ci_addr_t laddr, unsigned lport, - const ci_addr_t raddr, unsigned rport, - unsigned protocol) CI_HF; - -extern void -ci_netif_filter_remove(ci_netif* netif, oo_sp tcp_id, int af_space, - const ci_addr_t laddr, unsigned lport, - const ci_addr_t raddr, unsigned rport, - unsigned protocol) CI_HF; +extern int ci_netif_filter_for_each_match_ip6(ci_netif* ni, + const ci_addr_t* laddr, unsigned lport, const ci_addr_t* raddr, + unsigned rport, unsigned protocol, int intf_i, int vlan, + int (*callback)(ci_sock_cmn*, void*), void* callback_arg, + ci_uint32* hash_out) CI_HF; +#endif + +extern ci_uint32 ci_netif_filter_hash(ci_netif* ni, ci_addr_t laddr, + unsigned lport, ci_addr_t raddr, unsigned rport, unsigned protocol) CI_HF; + +extern int ci_netif_filter_insert(ci_netif* netif, oo_sp sock_id, int af_space, + const ci_addr_t laddr, unsigned lport, const ci_addr_t raddr, + unsigned rport, unsigned protocol) CI_HF; + +extern void ci_netif_filter_remove(ci_netif* netif, oo_sp tcp_id, int af_space, + const ci_addr_t laddr, unsigned lport, const ci_addr_t raddr, + unsigned rport, unsigned protocol) CI_HF; #if CI_CFG_UL_INTERRUPT_HELPER || defined(__KERNEL__) -ci_inline void -oo_sw_filter_apply(ci_netif* ni, struct oo_sw_filter_op* op) +ci_inline void oo_sw_filter_apply(ci_netif* ni, struct oo_sw_filter_op* op) { if( op->op == OO_SW_FILTER_OP_ADD ) { - ci_netif_filter_insert(ni, op->sock_id, op->af_space, - op->laddr, op->lport, - op->raddr, op->rport, op->protocol); - } - else { - ci_netif_filter_remove(ni, op->sock_id, op->af_space, - op->laddr, op->lport, - op->raddr, op->rport, op->protocol); + ci_netif_filter_insert(ni, op->sock_id, op->af_space, op->laddr, op->lport, + op->raddr, op->rport, op->protocol); + } else { + ci_netif_filter_remove(ni, op->sock_id, op->af_space, op->laddr, op->lport, + op->raddr, op->rport, op->protocol); } } #endif @@ -1846,19 +1760,17 @@ ci_inline ci_uint32 ci_netif_filter_table_size(ci_netif* ni) #if CI_CFG_TCP_SHARED_LOCAL_PORTS #ifndef __KERNEL__ extern oo_sp ci_netif_active_wild_get(ci_netif* ni, ci_addr_t laddr, - ci_addr_t raddr, unsigned lport, - ci_uint16* port_out, - ci_uint32* prev_seq_out); + ci_addr_t raddr, unsigned lport, ci_uint16* port_out, + ci_uint32* prev_seq_out); #endif extern void ci_netif_active_wild_sharer_closed(ci_netif* ni, ci_sock_cmn* s); #define RSS_HASH_SIZE 0x80 #define RSS_HASH_MASK (RSS_HASH_SIZE - 1) -extern int ci_netif_active_wild_nic_hash(ci_netif *ni, - ci_addr_t laddr, ci_uint16 lport, - ci_addr_t raddr, ci_uint16 rport); +extern int ci_netif_active_wild_nic_hash(ci_netif* ni, ci_addr_t laddr, + ci_uint16 lport, ci_addr_t raddr, ci_uint16 rport); -extern struct oo_p_dllink_state -ci_netif_get_active_wild_list(ci_netif* ni, int aw_pool, ci_addr_t laddr); +extern struct oo_p_dllink_state ci_netif_get_active_wild_list( + ci_netif* ni, int aw_pool, ci_addr_t laddr); #endif /* Bind RX of socket to given interface. Used by implementation of @@ -1867,17 +1779,16 @@ ci_netif_get_active_wild_list(ci_netif* ni, int aw_pool, ci_addr_t laddr); */ extern int ci_sock_rx_bind2dev(ci_netif*, ci_sock_cmn*, ci_ifid_t) CI_HF; -extern int -__ci_tcp_shutdown(ci_netif*, ci_tcp_state*, int how) CI_HF; +extern int __ci_tcp_shutdown(ci_netif*, ci_tcp_state*, int how) CI_HF; extern void __ci_tcp_listen_shutdown(ci_netif*, ci_tcp_socket_listen*) CI_HF; -extern void ci_tcp_listen_shutdown_queues(ci_netif* netif, - ci_tcp_socket_listen* tls) CI_HF; +extern void ci_tcp_listen_shutdown_queues( + ci_netif* netif, ci_tcp_socket_listen* tls) CI_HF; #if CI_CFG_FD_CACHING -extern void -ci_tcp_listen_uncache_fds(ci_netif* netif, ci_tcp_socket_listen* tls) CI_HF; +extern void ci_tcp_listen_uncache_fds( + ci_netif* netif, ci_tcp_socket_listen* tls) CI_HF; extern void ci_tcp_epcache_drop_cache(ci_netif* ni) CI_HF; -extern void ci_tcp_listen_update_cached(ci_netif* netif, - ci_tcp_socket_listen* tls) CI_HF; +extern void ci_tcp_listen_update_cached( + ci_netif* netif, ci_tcp_socket_listen* tls) CI_HF; extern void ci_tcp_active_cache_drop_cache(ci_netif* ni) CI_HF; extern void ci_tcp_passive_scalable_cache_drop_cache(ci_netif* ni) CI_HF; #endif @@ -1885,8 +1796,8 @@ extern void __ci_tcp_listen_to_normal(ci_netif*, ci_tcp_socket_listen*) CI_HF; extern void ci_netif_filter_dump(ci_netif*) CI_HF; -extern unsigned int ci_tcp_wscl_by_buff(ci_netif *netif, - ci_int32 rcv_buff) CI_HF; +extern unsigned int ci_tcp_wscl_by_buff( + ci_netif* netif, ci_int32 rcv_buff) CI_HF; extern ci_int32 ci_tcp_rcvbuf_established(ci_netif* ni, ci_sock_cmn* s) CI_HF; @@ -1898,11 +1809,11 @@ extern void ci_udp_ipcache_convert(int af, ci_udp_state* ts) CI_HF; #endif /* timer handlers */ -#define ci_tcp_time_now(ni) ci_ip_time_now(ni) +#define ci_tcp_time_now(ni) ci_ip_time_now(ni) #define ci_tcp_time_ms2ticks(ni, x) ci_ip_time_ms2ticks(ni, (x)) extern void ci_tcp_timer_init(ci_netif* netif) CI_HF; -extern void ci_tcp_timeout_listen(ci_netif* netif, - ci_tcp_socket_listen* tls) CI_HF; +extern void ci_tcp_timeout_listen( + ci_netif* netif, ci_tcp_socket_listen* tls) CI_HF; extern void ci_tcp_timeout_kalive(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_timeout_zwin(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_timeout_delack(ci_netif* netif, ci_tcp_state* ts) CI_HF; @@ -1910,15 +1821,15 @@ extern void ci_tcp_timeout_rto(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_timeout_cork(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_timeout_recycle(ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_stop_timers(ci_netif* netif, ci_tcp_state* ts) CI_HF; -extern void ci_tcp_send_corked_packets(ci_netif* netif, ci_tcp_state* ts) CI_HF; +extern void ci_tcp_send_corked_packets( + ci_netif* netif, ci_tcp_state* ts) CI_HF; extern void ci_tcp_tx_pkt_assert_valid(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt*, - const char* f, int l) CI_HF; -extern void ci_tcp_state_assert_valid(ci_netif*, ci_tcp_state* ts, - const char* file, int line) CI_HF; -extern void ci_tcp_state_listen_assert_valid(ci_netif*, ci_tcp_socket_listen*, - const char* file, int line) CI_HF; + ci_ip_pkt_fmt*, const char* f, int l) CI_HF; +extern void ci_tcp_state_assert_valid( + ci_netif*, ci_tcp_state* ts, const char* file, int line) CI_HF; +extern void ci_tcp_state_listen_assert_valid( + ci_netif*, ci_tcp_socket_listen*, const char* file, int line) CI_HF; extern void ci_tcp_ep_assert_valid(citp_socket*, const char*, int ln) CI_HF; @@ -1928,50 +1839,47 @@ extern void ci_tcp_ep_assert_valid(citp_socket*, const char*, int ln) CI_HF; #ifndef __KERNEL__ -extern int -ci_opt_is_setting_reuseport(int level, int optname, const void* optval, - socklen_t optlen) CI_HF; -extern int -ci_setsockopt_os_fail_ignore(ci_netif* ni, ci_sock_cmn* s, int err, - int level, int optname, - const void* optval, socklen_t optlen) CI_HF; +extern int ci_opt_is_setting_reuseport( + int level, int optname, const void* optval, socklen_t optlen) CI_HF; +extern int ci_setsockopt_os_fail_ignore(ci_netif* ni, ci_sock_cmn* s, int err, + int level, int optname, const void* optval, socklen_t optlen) CI_HF; struct oo_per_thread; typedef void (*citp_init_thread_callback)(struct oo_per_thread*); typedef void (*oo_signal_terminate_fn)(int signum); extern int ci_tp_init(citp_init_thread_callback cb, - oo_signal_terminate_fn signal_terminate_fn) CI_HF; + oo_signal_terminate_fn signal_terminate_fn) CI_HF; extern int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, - socklen_t addrlen, ci_fd_t fd) CI_HF; + socklen_t addrlen, ci_fd_t fd) CI_HF; #if CI_CFG_ENDPOINT_MOVE extern int ci_tcp_reuseport_bind(ci_sock_cmn* sock, ci_fd_t fd) CI_HF; #endif -extern void ci_tcp_get_peer_addr(ci_tcp_state* ts, struct sockaddr* name, - socklen_t* namelen) CI_HF; -extern int ci_tcp_getpeername(citp_socket*, struct sockaddr*, socklen_t*) CI_HF; -extern int ci_tcp_getsockname(citp_socket*, ci_fd_t, struct sockaddr*, - socklen_t*) CI_HF; +extern void ci_tcp_get_peer_addr( + ci_tcp_state* ts, struct sockaddr* name, socklen_t* namelen) CI_HF; +extern int ci_tcp_getpeername( + citp_socket*, struct sockaddr*, socklen_t*) CI_HF; +extern int ci_tcp_getsockname( + citp_socket*, ci_fd_t, struct sockaddr*, socklen_t*) CI_HF; -extern int ci_tcp_getsockopt(citp_socket* ep, ci_fd_t fd, int level, int optname, - void *optval, socklen_t *optlen) CI_HF; -extern int ci_tcp_setsockopt(citp_socket* ep, ci_fd_t fd, int level, int optname, - const void*optval, socklen_t optlen) CI_HF; -extern int ci_tcp_ioctl(citp_socket* ep, ci_fd_t fd, int request, void* arg) CI_HF; +extern int ci_tcp_getsockopt(citp_socket* ep, ci_fd_t fd, int level, + int optname, void* optval, socklen_t* optlen) CI_HF; +extern int ci_tcp_setsockopt(citp_socket* ep, ci_fd_t fd, int level, + int optname, const void* optval, socklen_t optlen) CI_HF; +extern int ci_tcp_ioctl( + citp_socket* ep, ci_fd_t fd, int request, void* arg) CI_HF; struct oo_msg_template; struct onload_template_msg_update_iovec; extern int ci_tcp_tmpl_alloc(ci_netif* ni, ci_tcp_state* ts, - struct oo_msg_template** omt_pp, - const struct iovec* initial_msg, int mlen, - unsigned flags) CI_HF; -extern int -ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, - struct oo_msg_template* omt, - const struct onload_template_msg_update_iovec* updates, - int ulen, unsigned flags) CI_HF; -extern int ci_tcp_tmpl_abort(ci_netif* ni, ci_tcp_state* ts, - struct oo_msg_template* omt) CI_HF; + struct oo_msg_template** omt_pp, const struct iovec* initial_msg, int mlen, + unsigned flags) CI_HF; +extern int ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, + struct oo_msg_template* omt, + const struct onload_template_msg_update_iovec* updates, int ulen, + unsigned flags) CI_HF; +extern int ci_tcp_tmpl_abort( + ci_netif* ni, ci_tcp_state* ts, struct oo_msg_template* omt) CI_HF; extern int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) CI_HF; @@ -1979,27 +1887,26 @@ extern int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) CI_HF; #ifdef __KERNEL__ extern void ci_tcp_linger(ci_netif*, ci_tcp_state*) CI_HF; -extern int ci_tcp_sync_sockopts_to_os_sock(ci_netif* ni, oo_sp sock_id, - struct socket* sock) CI_HF; +extern int ci_tcp_sync_sockopts_to_os_sock( + ci_netif* ni, oo_sp sock_id, struct socket* sock) CI_HF; #endif -extern int ci_tcp_listen_init(ci_netif *ni, ci_tcp_socket_listen *tls) CI_HF; +extern int ci_tcp_listen_init(ci_netif* ni, ci_tcp_socket_listen* tls) CI_HF; /* Send/recv called from within kernel & user-library, so outside above #if */ extern int ci_tcp_recvmsg(const ci_tcp_recvmsg_args*) CI_HF; struct onload_zc_recv_args; -extern int ci_tcp_zc_recvmsg(const ci_tcp_recvmsg_args*, - struct onload_zc_recv_args* args) CI_HF; -extern int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, - const ci_iovec* iov, unsigned long iovlen, - int flags - CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) CI_HF; -extern void ci_tcp_sendmsg_enqueue_prequeue_deferred(ci_netif*, - ci_tcp_state*) CI_HF; -extern void ci_tcp_sendmsg_enqueue_prequeue(ci_netif* ni, - ci_tcp_state* ts, - int/*bool*/ shutdown) CI_HF; -extern void ci_tcp_perform_deferred_socket_work(ci_netif*, ci_tcp_state*)CI_HF; +extern int ci_tcp_zc_recvmsg( + const ci_tcp_recvmsg_args*, struct onload_zc_recv_args* args) CI_HF; +extern int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, const ci_iovec* iov, + unsigned long iovlen, + int flags CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) CI_HF; +extern void ci_tcp_sendmsg_enqueue_prequeue_deferred( + ci_netif*, ci_tcp_state*) CI_HF; +extern void ci_tcp_sendmsg_enqueue_prequeue( + ci_netif* ni, ci_tcp_state* ts, int /*bool*/ shutdown) CI_HF; +extern void ci_tcp_perform_deferred_socket_work( + ci_netif*, ci_tcp_state*) CI_HF; /* Guarantees that deferred work will be performed at some point in the * near future, either by the calling thread (in this call), or deferred to @@ -2007,51 +1914,49 @@ extern void ci_tcp_perform_deferred_socket_work(ci_netif*, ci_tcp_state*)CI_HF; * * Returns 1 if the stack lock was grabbed, else 0. */ -extern int ci_netif_lock_or_defer_work(ci_netif*, citp_waitable*) CI_HF; +extern int ci_netif_lock_or_defer_work(ci_netif*, citp_waitable*) CI_HF; #ifndef __KERNEL__ extern int ci_tcp_connect(citp_socket*, const struct sockaddr*, socklen_t, - ci_fd_t fd, int *p_moved) CI_HF; + ci_fd_t fd, int* p_moved) CI_HF; extern int ci_tcp_shutdown(citp_socket*, int how, ci_fd_t fd) CI_HF; #endif -extern oo_sp ci_tcp_connect_find_local_peer(ci_netif *ni, int locked, - ci_addr_t dst_addr, - int dport_be16) CI_HF; +extern oo_sp ci_tcp_connect_find_local_peer( + ci_netif* ni, int locked, ci_addr_t dst_addr, int dport_be16) CI_HF; #ifdef __KERNEL__ -extern int ci_tcp_connect_lo_samestack(ci_netif *ni, ci_tcp_state *ts, - oo_sp tls_id, int *stack_locked) CI_HF; -extern int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, - ci_netif *l_ni, oo_sp l_id) CI_HF; +extern int ci_tcp_connect_lo_samestack( + ci_netif* ni, ci_tcp_state* ts, oo_sp tls_id, int* stack_locked) CI_HF; +extern int ci_tcp_connect_lo_toconn(ci_netif* c_ni, oo_sp c_id, ci_addr_t dst, + ci_netif* l_ni, oo_sp l_id) CI_HF; #endif #if CI_CFG_LIMIT_AMSS || CI_CFG_LIMIT_SMSS -extern ci_uint16 ci_tcp_limit_mss(ci_uint16 mss, ci_netif* ni, - const char* caller) CI_HF; +extern ci_uint16 ci_tcp_limit_mss( + ci_uint16 mss, ci_netif* ni, const char* caller) CI_HF; #endif extern unsigned ci_tcp_amss(ci_netif* ni, const ci_tcp_socket_cmn* c, - ci_ip_cached_hdrs* ipcache, - const char* caller) CI_HF; + ci_ip_cached_hdrs* ipcache, const char* caller) CI_HF; /********************************************************************** ************************** Misc and tracing ************************** **********************************************************************/ -extern const char* /*??ci_*/ip_addr_str(unsigned addr_be32) CI_HF; - /* Note that this function is not reentrant. However, it won't cause - ** seg-faults. Two buffers are used alternately, so it can be used twice - ** in an list of arguments. - */ +extern const char* /*??ci_*/ ip_addr_str(unsigned addr_be32) CI_HF; +/* Note that this function is not reentrant. However, it won't cause +** seg-faults. Two buffers are used alternately, so it can be used twice +** in an list of arguments. +*/ -extern const char* /*??ci_*/domain_str(int domain) CI_HF; -extern const char* /*??ci_*/type_str(int type) CI_HF; +extern const char* /*??ci_*/ domain_str(int domain) CI_HF; +extern const char* /*??ci_*/ type_str(int type) CI_HF; #define CI_SOCK_TYPE_FMT "%s%s%s" -#define CI_SOCK_TYPE_ARGS(type) \ - type_str(type), type & SOCK_CLOEXEC ? " | SOCK_CLOEXEC" : "", \ - type & SOCK_NONBLOCK ? " | SOCK_NONBLOCK" : "" +#define CI_SOCK_TYPE_ARGS(type) \ + type_str(type), type &SOCK_CLOEXEC ? " | SOCK_CLOEXEC" : "", \ + type &SOCK_NONBLOCK ? " | SOCK_NONBLOCK" : "" #ifndef __KERNEL__ /* Linux defines it in its in-kernel header only; we use it both in module @@ -2063,19 +1968,19 @@ extern const char* /*??ci_*/type_str(int type) CI_HF; * be handled by the L5 stack. * On failure, the return value is the error condition. */ -extern int ci_can_handle_addr(ci_netif *netif, ci_uint32 ip_be32, - unsigned int proto, ci_uint32 *src_ip_be32_out, - unsigned *nic_i_out, unsigned *mtu_out) CI_HF; +extern int ci_can_handle_addr(ci_netif* netif, ci_uint32 ip_be32, + unsigned int proto, ci_uint32* src_ip_be32_out, unsigned* nic_i_out, + unsigned* mtu_out) CI_HF; -#define NETIF_MAGIC 0xd +#define NETIF_MAGIC 0xd /********************************************************************** ************************** Per-socket locks *************************** **********************************************************************/ -extern int ci_sock_lock_slow(ci_netif* ni, citp_waitable* w) CI_HF; +extern int ci_sock_lock_slow(ci_netif* ni, citp_waitable* w) CI_HF; extern void ci_sock_unlock_slow(ci_netif*, citp_waitable*) CI_HF; @@ -2083,10 +1988,10 @@ extern void ci_sock_unlock_slow(ci_netif*, citp_waitable*) CI_HF; ******************************* Sleeping ****************************** **********************************************************************/ -#define CI_SLEEP_NETIF_LOCKED 0x1 -#define CI_SLEEP_SOCK_LOCKED 0x2 -#define CI_SLEEP_NETIF_RQ 0x4 -#define CI_SLEEP_SOCK_RQ 0x8 +#define CI_SLEEP_NETIF_LOCKED 0x1 +#define CI_SLEEP_SOCK_LOCKED 0x2 +#define CI_SLEEP_NETIF_RQ 0x4 +#define CI_SLEEP_SOCK_RQ 0x8 #if OO_DO_STACK_POLL /*! Sleep until something happens. @@ -2107,8 +2012,7 @@ extern void ci_sock_unlock_slow(ci_netif*, citp_waitable*) CI_HF; ** of this function). */ extern int ci_sock_sleep(ci_netif* ni, citp_waitable* w, ci_bits why, - unsigned lock_flags, ci_uint64 sleep_seq, - ci_uint32 *timeout_ms_p) CI_HF; + unsigned lock_flags, ci_uint64 sleep_seq, ci_uint32* timeout_ms_p) CI_HF; #endif @@ -2116,9 +2020,9 @@ extern int ci_sock_sleep(ci_netif* ni, citp_waitable* w, ci_bits why, ******************************* Polling ******************************* **********************************************************************/ -#define ci_netif_is_contention(ni) \ - (ef_eplock_flags(&(ni)->state->lock) & \ - (CI_EPLOCK_FL_NEED_WAKE | CI_EPLOCK_NETIF_SOCKET_LIST)) +#define ci_netif_is_contention(ni) \ + (ef_eplock_flags(&(ni)->state->lock) & \ + (CI_EPLOCK_FL_NEED_WAKE | CI_EPLOCK_NETIF_SOCKET_LIST)) /********************************************************************** @@ -2130,42 +2034,44 @@ extern int ci_sock_sleep(ci_netif* ni, citp_waitable* w, ci_bits why, /* ***************** - * Errno wrappers - note that errno should not be accessed directly + * Errno wrappers - note that errno should not be accessed directly */ #ifdef __KERNEL__ -# define CI_SET_ERROR(rc, e) \ - do{ \ - CI_BUILD_ASSERT_CONSTANT_NON_NEGATIVE((int)(e)-1); \ - ci_assert_gt((int)(e), 0); \ - (rc) = -(e); \ - } while(0) -# define CI_GET_ERROR(rc) (-(rc)) +#define CI_SET_ERROR(rc, e) \ + do { \ + CI_BUILD_ASSERT_CONSTANT_NON_NEGATIVE((int) (e) -1); \ + ci_assert_gt((int) (e), 0); \ + (rc) = -(e); \ + } while( 0 ) +#define CI_GET_ERROR(rc) (-(rc)) #else -# define CI_SET_ERROR(rc, e) \ - do{ \ - CI_BUILD_ASSERT_CONSTANT_NON_NEGATIVE((int)(e)-1); \ - ci_assert_gt((int)(e), 0); \ - errno = (e); \ - (rc) = CI_SOCKET_ERROR; \ - } while(0) -# define CI_GET_ERROR(rc) (errno) +#define CI_SET_ERROR(rc, e) \ + do { \ + CI_BUILD_ASSERT_CONSTANT_NON_NEGATIVE((int) (e) -1); \ + ci_assert_gt((int) (e), 0); \ + errno = (e); \ + (rc) = CI_SOCKET_ERROR; \ + } while( 0 ) +#define CI_GET_ERROR(rc) (errno) #endif /* Sets errno to specified value and returns CI_SOCKET_ERROR */ -#define RET_WITH_ERRNO(_errno) do { \ - int rc_; \ - CI_SET_ERROR(rc_, _errno); \ - return rc_; } while (0) +#define RET_WITH_ERRNO(_errno) \ + do { \ + int rc_; \ + CI_SET_ERROR(rc_, _errno); \ + return rc_; \ + } while( 0 ) /********************************************************************** **************************** OS-specific ***************************** -**********************************************************************/ + **********************************************************************/ /* ************************** */ /* Unix Implementation */ /* ************************** */ /*! \i_ossock Return value from failed socket calls. */ -#define CI_SOCKET_ERROR -1 +#define CI_SOCKET_ERROR -1 /*! \i_ossock Verify that a file descriptor/handle is valid */ #define CI_IS_VALID_SOCKET(fd) ((fd) >= 0) @@ -2182,8 +2088,11 @@ extern int ci_sock_sleep(ci_netif* ni, citp_waitable* w, ci_bits why, * \param fd [in] OS fd to release * \return nothing */ -#define ci_rel_os_sock_fd(fd) do { if(CI_IS_VALID_SOCKET(fd)) \ - ci_tcp_helper_rel_sock_fd((fd)); } while(0) +#define ci_rel_os_sock_fd(fd) \ + do { \ + if( CI_IS_VALID_SOCKET(fd) ) \ + ci_tcp_helper_rel_sock_fd((fd)); \ + } while( 0 ) /********************************************************************** @@ -2194,31 +2103,31 @@ extern int ci_sock_sleep(ci_netif* ni, citp_waitable* w, ci_bits why, /* Copy data from [piov] into [pkt]. [buf] identifies the buffer space ** into which data can be copied. */ -extern int __ci_copy_iovec_to_pkt(ci_netif*, ci_ip_pkt_fmt*, ci_iovec_ptr* - CI_KERNEL_ARG(ci_addr_spc_t)) CI_HF; +extern int __ci_copy_iovec_to_pkt(ci_netif*, ci_ip_pkt_fmt*, + ci_iovec_ptr* CI_KERNEL_ARG(ci_addr_spc_t)) CI_HF; #if defined(__KERNEL__) -# define ci_copy_iovec_to_pkt(ni, pkt, piov, addr_spc) \ - __ci_copy_iovec_to_pkt((ni), (pkt), (piov), (addr_spc)) +#define ci_copy_iovec_to_pkt(ni, pkt, piov, addr_spc) \ + __ci_copy_iovec_to_pkt((ni), (pkt), (piov), (addr_spc)) #else -# define ci_copy_iovec_to_pkt(ni, pkt, piov) \ - __ci_copy_iovec_to_pkt((ni), (pkt), (piov)) +#define ci_copy_iovec_to_pkt(ni, pkt, piov) \ + __ci_copy_iovec_to_pkt((ni), (pkt), (piov)) #endif -# define ci_ip_copy_pkt_to_user __ci_ip_copy_pkt_to_user -extern ssize_t __ci_ip_copy_pkt_to_user(ci_netif*, ci_iovec*, - ci_ip_pkt_fmt*, int peek_off) CI_HF; +#define ci_ip_copy_pkt_to_user __ci_ip_copy_pkt_to_user +extern ssize_t __ci_ip_copy_pkt_to_user( + ci_netif*, ci_iovec*, ci_ip_pkt_fmt*, int peek_off) CI_HF; #if defined(__KERNEL__) -# define ci_ip_copy_pkt_from_piov __ci_ip_copy_pkt_from_piov -extern size_t __ci_ip_copy_pkt_from_piov(ci_netif*, ci_ip_pkt_fmt*, - ci_iovec_ptr*, ci_addr_spc_t) CI_HF; +#define ci_ip_copy_pkt_from_piov __ci_ip_copy_pkt_from_piov +extern size_t __ci_ip_copy_pkt_from_piov( + ci_netif*, ci_ip_pkt_fmt*, ci_iovec_ptr*, ci_addr_spc_t) CI_HF; #else /* __KERNEL__ */ -# define ci_ip_copy_pkt_from_piov(ni, pkt, iov, aspc) \ +#define ci_ip_copy_pkt_from_piov(ni, pkt, iov, aspc) \ __ci_ip_copy_pkt_from_piov((ni), (pkt), (iov)) -extern size_t __ci_ip_copy_pkt_from_piov(ci_netif*, ci_ip_pkt_fmt*, - ci_iovec_ptr*) CI_HF; +extern size_t __ci_ip_copy_pkt_from_piov( + ci_netif*, ci_ip_pkt_fmt*, ci_iovec_ptr*) CI_HF; #endif @@ -2236,25 +2145,31 @@ extern int ci_setup_ipstack_params(void); **********************************************************************/ #if CI_CFG_STATS_NETIF -# define CITP_STATS_NETIF(x) x -# define CITP_STATS_NETIF_INC(ni,x) do{ ++(ni)->state->stats.x; }while(0) -# define CITP_STATS_NETIF_ADD(ni,x,v) do{ (ni)->state->stats.x += v; }while(0) +#define CITP_STATS_NETIF(x) x +#define CITP_STATS_NETIF_INC(ni, x) \ + do { \ + ++(ni)->state->stats.x; \ + } while( 0 ) +#define CITP_STATS_NETIF_ADD(ni, x, v) \ + do { \ + (ni)->state->stats.x += v; \ + } while( 0 ) #else -# define CITP_STATS_NETIF(x) -# define CITP_STATS_NETIF_INC(ni,x) -# define CITP_STATS_NETIF_ADD(ni,x,v) +#define CITP_STATS_NETIF(x) +#define CITP_STATS_NETIF_INC(ni, x) +#define CITP_STATS_NETIF_ADD(ni, x, v) #endif #if CI_CFG_STATS_TCP_LISTEN -# define CITP_STATS_TCP_LISTEN(x) x +#define CITP_STATS_TCP_LISTEN(x) x #else -# define CITP_STATS_TCP_LISTEN(x) +#define CITP_STATS_TCP_LISTEN(x) #endif #if CI_CFG_DETAILED_CHECKS -# define CITP_DETAILED_CHECKS(x) x +#define CITP_DETAILED_CHECKS(x) x #else -# define CITP_DETAILED_CHECKS(x) +#define CITP_DETAILED_CHECKS(x) #endif @@ -2262,12 +2177,12 @@ extern int ci_setup_ipstack_params(void); ******************** Accessing trusted kernel state ******************* **********************************************************************/ -#define ci_netif_ep_get(ni, s) ((ni)->ep_tbl[OO_SP_TO_INT(s)]) -#define ci_trs_ep_get(trs, s) ci_netif_ep_get(&(trs)->netif, (s)) +#define ci_netif_ep_get(ni, s) ((ni)->ep_tbl[OO_SP_TO_INT(s)]) +#define ci_trs_ep_get(trs, s) ci_netif_ep_get(&(trs)->netif, (s)) -#define ci_netif_get_valid_ep(ni, sockp) \ +#define ci_netif_get_valid_ep(ni, sockp) \ ((ni)->ep_tbl[TRUSTED_SOCK_ID_FROM_P((ni), (sockp))]) -#define ci_trs_get_valid_ep(trs, sock_id) \ +#define ci_trs_get_valid_ep(trs, sock_id) \ ci_netif_get_valid_ep(&(trs)->netif, (sock_id)) @@ -2276,19 +2191,20 @@ extern int ci_setup_ipstack_params(void); **********************************************************************/ /* note bitno is undefined after loop */ -#define OO_FOR_EACH_BIT(init_mask, mask, bitno) \ - for( (mask) = (init_mask), (bitno) = __builtin_ctz(mask); \ - (mask) ; \ - (mask) = (mask) & ((mask) - 1), (bitno) = __builtin_ctz(mask) ) +#define OO_FOR_EACH_BIT(init_mask, mask, bitno) \ + for( (mask) = (init_mask), (bitno) = __builtin_ctz(mask); (mask); \ + (mask) = (mask) & ((mask) -1), (bitno) = __builtin_ctz(mask) ) CI_BUILD_ASSERT(sizeof(unsigned) * 8 >= CI_CFG_MAX_INTERFACES); -#define OO_STACK_FOR_EACH_INTF_I(_ni, _intf_i) \ +#define OO_STACK_FOR_EACH_INTF_I(_ni, _intf_i) \ for( (_intf_i) = 0; (_intf_i) < oo_stack_intf_max(_ni); ++(_intf_i) ) -#define OO_STACK_FOR_EACH_FUTURE_INTF_I(_ni, _mask, _intf_i) \ - ci_assert_lt((_ni)->future_intf_mask, 1u << oo_stack_intf_max(_ni)); \ - { CI_BUILD_ASSERT(sizeof(_mask) >= sizeof((_ni)->future_intf_mask)); } \ +#define OO_STACK_FOR_EACH_FUTURE_INTF_I(_ni, _mask, _intf_i) \ + ci_assert_lt((_ni)->future_intf_mask, 1u << oo_stack_intf_max(_ni)); \ + { \ + CI_BUILD_ASSERT(sizeof(_mask) >= sizeof((_ni)->future_intf_mask)); \ + } \ OO_FOR_EACH_BIT((_ni)->future_intf_mask, (_mask), (_intf_i)) /* @@ -2317,7 +2233,8 @@ CI_BUILD_ASSERT(sizeof(unsigned) * 8 >= CI_CFG_MAX_INTERFACES); ************************ Runtime config options ********************** *********************************************************************/ -ci_inline ef_driver_handle ci_netif_get_driver_handle(const ci_netif* ni) { +ci_inline ef_driver_handle ci_netif_get_driver_handle(const ci_netif* ni) +{ #ifdef __KERNEL__ return 0; #else @@ -2325,7 +2242,8 @@ ci_inline ef_driver_handle ci_netif_get_driver_handle(const ci_netif* ni) { #endif } -ci_inline int oo_stack_intf_max(ci_netif* ni) { +ci_inline int oo_stack_intf_max(ci_netif* ni) +{ #if defined(__KERNEL__) return ni->nic_n; #else @@ -2333,7 +2251,8 @@ ci_inline int oo_stack_intf_max(ci_netif* ni) { #endif } -ci_inline ef_vi* ci_netif_vi(ci_netif* ni, int nic_i) { +ci_inline ef_vi* ci_netif_vi(ci_netif* ni, int nic_i) +{ ci_assert_ge(nic_i, 0); ci_assert_lt(nic_i, oo_stack_intf_max(ni)); @@ -2344,8 +2263,8 @@ ci_inline ef_vi* ci_netif_vi(ci_netif* ni, int nic_i) { #if CI_CFG_IPV6 extern ci_uint32 ci_make_flowlabel(ci_netif* ni, ci_addr_t saddr, ci_uint16 sport, ci_addr_t daddr, ci_uint16 dport, ci_uint8 proto) CI_HF; -extern ci_uint32 ci_ipcache_make_flowlabel(ci_netif* ni, - ci_ip_cached_hdrs* ipcache) CI_HF; +extern ci_uint32 ci_ipcache_make_flowlabel( + ci_netif* ni, ci_ip_cached_hdrs* ipcache) CI_HF; extern void ci_ipcache_update_flowlabel(ci_netif* ni, ci_sock_cmn* s) CI_HF; #else #define ci_ipcache_update_flowlabel(ni, s) @@ -2355,19 +2274,19 @@ extern void ci_ipcache_update_flowlabel(ci_netif* ni, ci_sock_cmn* s) CI_HF; * negative, because rxq_limit changes dynamically. */ ci_inline int ci_netif_rx_vi_space(ci_netif* ni, ef_vi* vi) -{ return ni->state->rxq_limit - ef_vi_receive_fill_level(vi); } - +{ + return ni->state->rxq_limit - ef_vi_receive_fill_level(vi); +} extern void ci_netif_send_plugin_app_ctrl(ci_netif* ni, int nic_index, - ci_ip_pkt_fmt* pkt, - const void* payload, size_t paylen); + ci_ip_pkt_fmt* pkt, const void* payload, size_t paylen); -extern void __ci_netif_ring_plugin_app_doorbell(ci_netif* netif, - int nic_index); +extern void __ci_netif_ring_plugin_app_doorbell( + ci_netif* netif, int nic_index); -ci_inline void ci_netif_ring_ceph_doorbell(ci_netif* netif, - int nic_index, int n) +ci_inline void ci_netif_ring_ceph_doorbell( + ci_netif* netif, int nic_index, int n) { #if CI_CFG_TCP_OFFLOAD_RECYCLER if( NI_OPTS(netif).tcp_offload_plugin == CITP_TCP_OFFLOAD_CEPH ) { @@ -2393,7 +2312,7 @@ ci_inline void ci_netif_rx_post_all_batch(ci_netif* netif, int nic_index) if( ci_netif_rx_vi_space(netif, vi) >= CI_CFG_RX_DESC_BATCH ) n_posted = ci_netif_rx_post(netif, nic_index, vi); } - (void)n_posted; + (void) n_posted; #if CI_CFG_TCP_OFFLOAD_RECYCLER /* The VI owning the doorbell is always the last VI, so the loop above has * the effect of ignoring n_posted for all the others and only passing the @@ -2407,7 +2326,8 @@ ci_inline void ci_netif_rx_post_all_batch(ci_netif* netif, int nic_index) * Handling return from ci_netif_pkt_wait() and ci_netif_lock(). */ -ci_inline int ci_netif_pkt_wait_was_interrupted(int rc) { +ci_inline int ci_netif_pkt_wait_was_interrupted(int rc) +{ #ifdef __KERNEL__ ci_assert(rc == 0 || rc == -ERESTARTSYS); return rc < 0; @@ -2418,7 +2338,8 @@ ci_inline int ci_netif_pkt_wait_was_interrupted(int rc) { } -ci_inline int ci_netif_lock_was_interrupted(int rc) { +ci_inline int ci_netif_lock_was_interrupted(int rc) +{ #ifdef __KERNEL__ ci_assert(rc == 0 || rc == -ERESTARTSYS); return rc < 0; @@ -2453,45 +2374,55 @@ ci_inline int oo_bit_array_get(ci_uint32* array, int id) ************************** citp_waitable_obj ************************* *********************************************************************/ -ci_inline ci_udp_state* SOCK_TO_UDP(ci_sock_cmn* s) { +ci_inline ci_udp_state* SOCK_TO_UDP(ci_sock_cmn* s) +{ ci_assert_equal(s->b.state, CI_TCP_STATE_UDP); return CI_CONTAINER(ci_udp_state, s, s); } ci_inline ci_tcp_state* __SOCK_TO_TCP(ci_sock_cmn* s) -{ return CI_CONTAINER(ci_tcp_state, s, s); } -ci_inline ci_tcp_socket_listen* __SOCK_TO_TCP_LISTEN(ci_sock_cmn* s) { +{ + return CI_CONTAINER(ci_tcp_state, s, s); +} +ci_inline ci_tcp_socket_listen* __SOCK_TO_TCP_LISTEN(ci_sock_cmn* s) +{ return CI_CONTAINER(ci_tcp_socket_listen, s, s); } #ifdef NDEBUG #define SOCK_TO_TCP __SOCK_TO_TCP #else -ci_inline ci_tcp_state* SOCK_TO_TCP_DEBUG(ci_sock_cmn*s, const char*file, - int line) { +ci_inline ci_tcp_state* SOCK_TO_TCP_DEBUG( + ci_sock_cmn* s, const char* file, int line) +{ _ci_assert(s->b.state & CI_TCP_STATE_TCP, file, line); - _ci_assert(s->b.state == CI_TCP_CLOSED || - (s->b.state & CI_TCP_STATE_TCP_CONN), file, line); + _ci_assert( + s->b.state == CI_TCP_CLOSED || (s->b.state & CI_TCP_STATE_TCP_CONN), + file, line); return __SOCK_TO_TCP(s); } -# define SOCK_TO_TCP(s) SOCK_TO_TCP_DEBUG(s, __FILE__, __LINE__) +#define SOCK_TO_TCP(s) SOCK_TO_TCP_DEBUG(s, __FILE__, __LINE__) #endif -ci_inline ci_tcp_socket_listen* SOCK_TO_TCP_LISTEN(ci_sock_cmn* s) { +ci_inline ci_tcp_socket_listen* SOCK_TO_TCP_LISTEN(ci_sock_cmn* s) +{ ci_assert_equal(s->b.state, CI_TCP_LISTEN); return __SOCK_TO_TCP_LISTEN(s); } ci_inline citp_waitable_obj* SOCK_TO_WAITABLE_OBJ(ci_sock_cmn* s) -{ return CI_CONTAINER(citp_waitable_obj, sock, s); } +{ + return CI_CONTAINER(citp_waitable_obj, sock, s); +} /********************************************************************* ************************** UDP Receive queue ************************* *********************************************************************/ -ci_inline void ci_udp_recv_q_init(ci_udp_recv_q* q) { +ci_inline void ci_udp_recv_q_init(ci_udp_recv_q* q) +{ q->head = q->extract = OO_PP_NULL; q->pkts_reaped = q->pkts_delivered = q->pkts_added = 0; } @@ -2502,17 +2433,17 @@ ci_inline int ci_udp_recv_q_is_empty(ci_udp_recv_q* q) } ci_inline int ci_udp_recv_q_not_empty(ci_udp_recv_q* q) -{ +{ return q->pkts_added != q->pkts_delivered; } ci_inline int ci_udp_recv_q_pkts(ci_udp_recv_q* q) { - return q->pkts_added - q->pkts_delivered; + return q->pkts_added - q->pkts_delivered; } ci_inline int ci_udp_recv_q_reapable(ci_udp_recv_q* q) -{ +{ return q->pkts_delivered - q->pkts_reaped; } @@ -2523,10 +2454,12 @@ ci_inline int ci_udp_recv_q_reapable(ci_udp_recv_q* q) /*! This function gets the current cached time in ticks ** \param its A pointer to the ci_ip_timer_state management block -** \return The current cached time in ticks +** \return The current cached time in ticks */ -ci_inline ci_iptime_t ci_ip_time_now(ci_netif *ni) -{ return IPTIMER_STATE(ni)->ci_ip_time_real_ticks; } +ci_inline ci_iptime_t ci_ip_time_now(ci_netif* ni) +{ + return IPTIMER_STATE(ni)->ci_ip_time_real_ticks; +} /* Returns true if [a] is before [b]. */ @@ -2537,21 +2470,23 @@ ci_ip_time_before(ci_iptime_t a, ci_iptime_t b) } -/*! This function sets the initial current free cycle counter time in ticks +/*! This function sets the initial current free cycle counter time in ticks ** \param its A pointer to the ci_ip_timer_state management block */ -ci_inline void ci_ip_time_initial_sync(ci_ip_timer_state* its) { -#if defined(CI_HAVE_FRC64) - ci_frc64(&its->frc); +ci_inline void ci_ip_time_initial_sync(ci_ip_timer_state* its) +{ +#if defined(CI_HAVE_FRC64) + ci_frc64(&its->frc); its->ci_ip_time_real_ticks = - (ci_iptime_t)(its->frc >> its->ci_ip_time_frc2tick); + (ci_iptime_t) (its->frc >> its->ci_ip_time_frc2tick); #else -# error need a frc64 routine to compile iptimer support -#endif +#error need a frc64 routine to compile iptimer support +#endif } -ci_inline void ci_ip_time_update(ci_ip_timer_state* its, ci_uint64 new_frc) { - if(CI_LIKELY( new_frc >= its->frc )) { +ci_inline void ci_ip_time_update(ci_ip_timer_state* its, ci_uint64 new_frc) +{ + if( CI_LIKELY(new_frc >= its->frc) ) { ci_iptime_t new_ticks; new_ticks = (ci_iptime_t) (new_frc >> its->ci_ip_time_frc2tick); its->ci_ip_time_real_ticks = new_ticks; @@ -2559,52 +2494,55 @@ ci_inline void ci_ip_time_update(ci_ip_timer_state* its, ci_uint64 new_frc) { } } -/*! This function updates the current free cycle counter time in ticks +/*! This function updates the current free cycle counter time in ticks ** \param its A pointer to the ci_ip_timer_state management block */ -ci_inline void ci_ip_time_resync(ci_ip_timer_state* its) { +ci_inline void ci_ip_time_resync(ci_ip_timer_state* its) +{ ci_uint64 new_frc; - ci_frc64(&new_frc); + ci_frc64(&new_frc); ci_ip_time_update(its, new_frc); } -/*! This function gets the current free cycle counter time in ticks +/*! This function gets the current free cycle counter time in ticks ** \param its A pointer to the ci_ip_timer_state management block ** \param t An out parameter to write the return into */ -ci_inline void ci_ip_time_get(ci_ip_timer_state* its, ci_iptime_t* ticks) { -#if defined(CI_HAVE_FRC64) +ci_inline void ci_ip_time_get(ci_ip_timer_state* its, ci_iptime_t* ticks) +{ +#if defined(CI_HAVE_FRC64) ci_uint64 frc; - ci_frc64(&frc); - *ticks = (ci_iptime_t)(frc >> its->ci_ip_time_frc2tick); + ci_frc64(&frc); + *ticks = (ci_iptime_t) (frc >> its->ci_ip_time_frc2tick); #else -# error need a frc64 routine to compile iptimer support -#endif +#error need a frc64 routine to compile iptimer support +#endif } /*! This function gets the current free cycle counter time in us ** \param its A pointer to the ci_ip_timer_state management block ** \param t An out parameter to write the return into */ -ci_inline void ci_ip_time_get_us(ci_ip_timer_state* its, ci_iptime_t* t) { -#if defined(CI_HAVE_FRC64) +ci_inline void ci_ip_time_get_us(ci_ip_timer_state* its, ci_iptime_t* t) +{ +#if defined(CI_HAVE_FRC64) ci_uint64 frc; - ci_frc64(&frc); - *t = (ci_iptime_t)(frc >> its->ci_ip_time_frc2us); + ci_frc64(&frc); + *t = (ci_iptime_t) (frc >> its->ci_ip_time_frc2us); #else -# error need a frc64 routine to compile iptimer support -#endif +#error need a frc64 routine to compile iptimer support +#endif } /*! Convert a time measure in ms to the number of ticks -** \param ni A pointer to the netif +** \param ni A pointer to the netif ** \param t The time in ms ** \return The time t in ticks */ -ci_inline ci_iptime_t ci_ip_time_ms2ticks(ci_netif *ni, ci_uint32 t) +ci_inline ci_iptime_t ci_ip_time_ms2ticks(ci_netif* ni, ci_uint32 t) { - ci_ip_timer_state *its = IPTIMER_STATE(ni); - t = (ci_iptime_t) ( (((ci_uint64)t) * its->ci_ip_time_ms2tick_fxp) >> 32 ); + ci_ip_timer_state* its = IPTIMER_STATE(ni); + t = (ci_iptime_t) ((((ci_uint64) t) * its->ci_ip_time_ms2tick_fxp) >> 32); /* rounds up 0 timers... */ return t ? t : 1; } @@ -2614,11 +2552,12 @@ ci_inline ci_iptime_t ci_ip_time_ms2ticks(ci_netif *ni, ci_uint32 t) ** \param t The time in ticks ** \return The time t in ms */ -ci_inline ci_uint32 ci_ip_time_ticks2ms(ci_netif* ni, ci_iptime_t t) { - ci_ip_timer_state *its = IPTIMER_STATE(ni); +ci_inline ci_uint32 ci_ip_time_ticks2ms(ci_netif* ni, ci_iptime_t t) +{ + ci_ip_timer_state* its = IPTIMER_STATE(ni); /* As for now the function is not used on fast path should this change * then use of multiplication and inversed factor to be considered */ - t = (ci_iptime_t) ( (((ci_uint64)t << 32) / its->ci_ip_time_ms2tick_fxp) ); + t = (ci_iptime_t) ((((ci_uint64) t << 32) / its->ci_ip_time_ms2tick_fxp)); return t; } @@ -2626,10 +2565,10 @@ ci_inline ci_uint32 ci_ip_time_ticks2ms(ci_netif* ni, ci_iptime_t t) { /* Convert Herz (per-second value) to per tick. */ ci_inline ci_uint32 ci_ip_time_freq_hz2tick(ci_netif* ni, ci_uint32 hz) { - ci_ip_timer_state *its = IPTIMER_STATE(ni); + ci_ip_timer_state* its = IPTIMER_STATE(ni); /* We assume that 1024==1000, and khz = hz >> 10. * Then we use the expression from ci_ip_time_ms2ticks(). */ - return ((ci_uint64)hz << 22) / its->ci_ip_time_ms2tick_fxp; + return ((ci_uint64) hz << 22) / its->ci_ip_time_ms2tick_fxp; } @@ -2643,7 +2582,8 @@ ci_inline const cicp_hwport_mask_t ci_netif_get_hwport_mask(ci_netif* ni) } -ci_inline const ci_int8* ci_netif_get_hwport_to_intf_i(ci_netif* ni) { +ci_inline const ci_int8* ci_netif_get_hwport_to_intf_i(ci_netif* ni) +{ #ifdef __KERNEL__ return ni->hwport_to_intf_i; #else @@ -2652,21 +2592,21 @@ ci_inline const ci_int8* ci_netif_get_hwport_to_intf_i(ci_netif* ni) { } -ci_inline int __ci_hwport_to_intf_i(ci_netif* ni, ci_hwport_id_t hwport) { +ci_inline int __ci_hwport_to_intf_i(ci_netif* ni, ci_hwport_id_t hwport) +{ ci_assert((unsigned) hwport < CI_CFG_MAX_HWPORTS); return ci_netif_get_hwport_to_intf_i(ni)[hwport]; } -ci_inline int ci_hwport_to_intf_i(ci_netif* ni, ci_hwport_id_t hwport) { - if(CI_LIKELY( (unsigned) hwport < CI_CFG_MAX_HWPORTS )) +ci_inline int ci_hwport_to_intf_i(ci_netif* ni, ci_hwport_id_t hwport) +{ + if( CI_LIKELY((unsigned) hwport < CI_CFG_MAX_HWPORTS) ) return ci_netif_get_hwport_to_intf_i(ni)[hwport]; return ci_netif_bad_hwport(ni, hwport); } - - ci_inline int ci_netif_may_poll(ci_netif* ni) { return NI_OPTS(ni).poll_on_demand; @@ -2696,33 +2636,40 @@ ci_inline int ci_netif_is_spinner(ci_netif* ni) ** generate an interrupt when an event next arrives. */ ci_inline int ci_netif_is_primed(ci_netif* ni) -{ return ni->nic_set.nics == ni->state->evq_primed; } +{ + return ni->nic_set.nics == ni->state->evq_primed; +} /* Cheap test that returns true if the stack is not "primed". i.e. Not all ** event queues have been primed to generate an interrupt when the next ** event arrives. */ ci_inline int ci_netif_not_primed(ci_netif* ni) -{ return ni->nic_set.nics != ni->state->evq_primed; } +{ + return ni->nic_set.nics != ni->state->evq_primed; +} /* Returns true if there are any hardware events outstanding on the given * interface. */ ci_inline int ci_netif_intf_has_event(ci_netif* ni, int intf_i) -{ return ef_eventq_has_event(ci_netif_vi(ni, intf_i)); } +{ + return ef_eventq_has_event(ci_netif_vi(ni, intf_i)); +} /* Returns true if there are any hardware events outstanding on any * interface. */ -ci_inline int ci_netif_has_event(ci_netif* ni) { +ci_inline int ci_netif_has_event(ci_netif* ni) +{ int intf_i, rc = 0; OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - if( ci_netif_intf_has_event(ni, intf_i) ) { - rc = 1; - break; - } + if( ci_netif_intf_has_event(ni, intf_i) ) { + rc = 1; + break; + } if( OO_PP_NOT_NULL(ni->state->looppkts) ) rc = 1; return rc; @@ -2730,13 +2677,14 @@ ci_inline int ci_netif_has_event(ci_netif* ni) { /* Returns true if there are many hardware events outstanding. */ -ci_inline int ci_netif_has_many_events(ci_netif* ni, int lookahead) { +ci_inline int ci_netif_has_many_events(ci_netif* ni, int lookahead) +{ int intf_i, rc = 0; OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - if( ef_eventq_has_many_events(ci_netif_vi(ni, intf_i), lookahead) ) { - rc = 1; - break; - } + if( ef_eventq_has_many_events(ci_netif_vi(ni, intf_i), lookahead) ) { + rc = 1; + break; + } return rc; } @@ -2746,10 +2694,11 @@ ci_inline int ci_netif_has_many_events(ci_netif* ni, int lookahead) { * This must not match the first four bytes of the packet. Anything that does * not match our OUI or multicast addresses will do. If we add support for * third-party NICs, we may want a per-NIC poison value to ensure a mismatch. - * If we add support for detecting subsequent cache lines, or if packet prefixes - * are enabled, there will be the possibility of deciding falsely that a packet - * is still poisonous when in fact it is not, but there is very little that we - * can do about that. It would not cause a functional problem in any case. + * If we add support for detecting subsequent cache lines, or if packet + * prefixes are enabled, there will be the possibility of deciding falsely that + * a packet is still poisonous when in fact it is not, but there is very little + * that we can do about that. It would not cause a functional problem in any + * case. * * For EFCT, this must match the value provided to the driver in * efct_nic_rxq_bind. TODO EFCT centralise the definition of this value. @@ -2757,7 +2706,7 @@ ci_inline int ci_netif_has_many_events(ci_netif* ni, int lookahead) { #define CI_PKT_RX_POISON 0xFFA0C09Bu ci_inline volatile uint32_t* ci_netif_poison_location(ci_ip_pkt_fmt* pkt) { - return (volatile uint32_t*)pkt->dma_start; + return (volatile uint32_t*) pkt->dma_start; } ci_inline void ci_netif_poison_rx_pkt(ci_ip_pkt_fmt* pkt) { @@ -2797,8 +2746,8 @@ ci_inline ci_ip_pkt_fmt* ci_netif_intf_next_rx_pkt(ci_netif* ni, ef_vi* vi) * * The function does not require a stack lock. */ -ci_inline const volatile uint32_t* -ci_netif_intf_rx_future(ci_netif* ni, int intf_i, const uint32_t* poison) +ci_inline const volatile uint32_t* ci_netif_intf_rx_future( + ci_netif* ni, int intf_i, const uint32_t* poison) { ci_ip_pkt_fmt* pkt; ci_uint8* p; @@ -2819,8 +2768,9 @@ ci_netif_intf_rx_future(ci_netif* ni, int intf_i, const uint32_t* poison) if( pkt == NULL ) return poison; - /* FIXME: colocate all the fields used by the rx path to reduce cache usage */ - for( p = (ci_uint8*)pkt; p < pkt->dma_start; p += CI_CACHE_LINE_SIZE ) + /* FIXME: colocate all the fields used by the rx path to reduce cache usage + */ + for( p = (ci_uint8*) pkt; p < pkt->dma_start; p += CI_CACHE_LINE_SIZE ) ci_prefetch(p); return ci_netif_poison_location(pkt); @@ -2828,24 +2778,24 @@ ci_netif_intf_rx_future(ci_netif* ni, int intf_i, const uint32_t* poison) #endif -ci_inline int ci_netif_need_timer_prime(ci_netif* ni, ci_uint64 frc_now) { +ci_inline int ci_netif_need_timer_prime(ci_netif* ni, ci_uint64 frc_now) +{ return frc_now - ni->state->evq_last_prime > ni->state->timer_prime_cycles; } ci_inline int ci_netif_need_poll_spinning(ci_netif* ni, ci_uint64 frc_now) { - return ci_netif_has_event(ni) || - ci_netif_need_timer_prime(ni, frc_now); + return ci_netif_has_event(ni) || ci_netif_need_timer_prime(ni, frc_now); } /* See ci_netif_need_poll() for description. Use this when you already ** know a recent frc. */ -ci_inline int ci_netif_need_poll_frc(ci_netif* ni, ci_uint64 frc_now) { - return ci_netif_not_primed(ni) && - ci_netif_need_poll_spinning(ni, frc_now); +ci_inline int ci_netif_need_poll_frc(ci_netif* ni, ci_uint64 frc_now) +{ + return ci_netif_not_primed(ni) && ci_netif_need_poll_spinning(ni, frc_now); } @@ -2857,11 +2807,14 @@ ci_inline int ci_netif_need_poll_frc(ci_netif* ni, ci_uint64 frc_now) { ** outstanding or it has been a while since the stack was last polled. */ ci_inline int ci_netif_need_poll(ci_netif* ni) -{ return ci_netif_need_poll_frc(ni, ci_frc64_get()); } +{ + return ci_netif_need_poll_frc(ni, ci_frc64_get()); +} -ci_inline int ci_netif_need_poll_maybe_spinning(ci_netif* ni, ci_uint64 frc_now, - int spinning) { +ci_inline int ci_netif_need_poll_maybe_spinning( + ci_netif* ni, ci_uint64 frc_now, int spinning) +{ if( spinning ) return ci_netif_need_poll_spinning(ni, frc_now); else @@ -2871,48 +2824,49 @@ ci_inline int ci_netif_need_poll_maybe_spinning(ci_netif* ni, ci_uint64 frc_now, #if CI_CFG_TCP_SHARED_LOCAL_PORTS ci_inline int ci_netif_should_allocate_tcp_shared_local_ports(ci_netif* ni) { - return - NI_OPTS(ni).tcp_shared_local_ports > 0 && - NI_OPTS(ni).scalable_filter_enable != CITP_SCALABLE_FILTERS_ENABLE_WORKER; + return NI_OPTS(ni).tcp_shared_local_ports > 0 && + NI_OPTS(ni).scalable_filter_enable != + CITP_SCALABLE_FILTERS_ENABLE_WORKER; } #endif -ci_inline int oo_tx_zc_payload_size(ci_netif* ni) { +ci_inline int oo_tx_zc_payload_size(ci_netif* ni) +{ return sizeof(struct ci_pkt_zc_payload) + sizeof(ef_addr) * oo_stack_intf_max(ni); } -ci_inline struct ci_pkt_zc_payload* -oo_tx_zc_payload_next(ci_netif* ni, struct ci_pkt_zc_payload* zcp) +ci_inline struct ci_pkt_zc_payload* oo_tx_zc_payload_next( + ci_netif* ni, struct ci_pkt_zc_payload* zcp) { if( zcp->is_remote ) - return (void*)((char*)zcp + oo_tx_zc_payload_size(ni)); - return (void*)(zcp->local + CI_ALIGN_FWD(zcp->len, CI_PKT_ZC_PAYLOAD_ALIGN)); + return (void*) ((char*) zcp + oo_tx_zc_payload_size(ni)); + return ( + void*) (zcp->local + CI_ALIGN_FWD(zcp->len, CI_PKT_ZC_PAYLOAD_ALIGN)); } -#define OO_TX_FOR_EACH_ZC_PAYLOAD(ni, zch, zcp) \ - for( (zcp) = (zch)->data; \ - (char*)(zcp) - (char*)(zch) < (zch)->end; \ +#define OO_TX_FOR_EACH_ZC_PAYLOAD(ni, zch, zcp) \ + for( (zcp) = (zch)->data; (char*) (zcp) - (char*) (zch) < (zch)->end; \ (zcp) = oo_tx_zc_payload_next(ni, zcp) ) #if CI_CFG_TX_CRC_OFFLOAD /* Initializes an id pool for the NVMe CRC plugin ids. * According to the ci_fifo2 specification, the queue capacity is cap-1 */ -ci_inline void -ci_nvme_plugin_crc_id_init(struct nvme_crc_plugin_idp_t* idp, unsigned base, unsigned limit) +ci_inline void ci_nvme_plugin_crc_id_init( + struct nvme_crc_plugin_idp_t* idp, unsigned base, unsigned limit) { int i = 0; if( limit == 0 || limit >= (1 << ZC_NVME_CRC_IDP_CAP) ) limit = (1 << ZC_NVME_CRC_IDP_CAP) - 1; ci_fifo2_init(idp, 1 << ZC_NVME_CRC_IDP_CAP); for( i = 0; i < limit; i++ ) - ci_fifo2_put(idp, i + base); + ci_fifo2_put(idp, i + base); } -ci_inline int -ci_nvme_plugin_crc_id_alloc(struct nvme_crc_plugin_idp_t* idp, unsigned *id) +ci_inline int ci_nvme_plugin_crc_id_alloc( + struct nvme_crc_plugin_idp_t* idp, unsigned* id) { if( ci_fifo2_is_empty(idp) ) return -EAGAIN; @@ -2920,34 +2874,35 @@ ci_nvme_plugin_crc_id_alloc(struct nvme_crc_plugin_idp_t* idp, unsigned *id) return 0; } -ci_inline void -ci_nvme_plugin_crc_id_release(struct nvme_crc_plugin_idp_t* idp, unsigned id) +ci_inline void ci_nvme_plugin_crc_id_release( + struct nvme_crc_plugin_idp_t* idp, unsigned id) { - ci_assert(!ci_fifo2_is_full(idp)); + ci_assert(! ci_fifo2_is_full(idp)); ci_assert_nequal(id, ZC_NVME_CRC_ID_INVALID); ci_fifo2_put(idp, id); } -ci_inline bool -ci_nvme_plugin_crc_last_byte_sent(const struct ci_pkt_zc_payload* zcp) +ci_inline bool ci_nvme_plugin_crc_last_byte_sent( + const struct ci_pkt_zc_payload* zcp) { return zcp->crc_insert_first_byte + zcp->crc_insert_n_bytes == 4; } /* The acked ids are released when the flag is INSERT and the last * CRC byte has been sent */ -ci_inline void -ci_nvme_plugin_crc_free_acked_ids(ci_netif* ni, ci_ip_pkt_fmt* pkt) +ci_inline void ci_nvme_plugin_crc_free_acked_ids( + ci_netif* ni, ci_ip_pkt_fmt* pkt) { struct ci_pkt_zc_payload* zcp; struct ci_pkt_zc_header* zch = oo_tx_zc_header(pkt); int intf_i = pkt->intf_i; - OO_TX_FOR_EACH_ZC_PAYLOAD(ni, zch, zcp) { + OO_TX_FOR_EACH_ZC_PAYLOAD(ni, zch, zcp) + { if( zcp->zcp_flags & ZC_PAYLOAD_FLAG_INSERT_CRC && ci_nvme_plugin_crc_last_byte_sent(zcp) ) { - ci_nvme_plugin_crc_id_release(&ni->state->nvme_crc_plugin_idp[intf_i], - zcp->crc_id); + ci_nvme_plugin_crc_id_release( + &ni->state->nvme_crc_plugin_idp[intf_i], zcp->crc_id); zcp->crc_id = ZC_NVME_CRC_ID_INVALID; } } @@ -2958,9 +2913,8 @@ ci_nvme_plugin_crc_free_acked_ids(ci_netif* ni, ci_ip_pkt_fmt* pkt) * needed for cleanup when the ID pool is exhausted mid-packet or when TX of * the packet fails. The value of ts->current_crc_id should have been restored * by the caller to its pre-failure value before calling this function. */ -ci_inline void -ci_nvme_plugin_crc_packet_cleanup(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt* pkt) +ci_inline void ci_nvme_plugin_crc_packet_cleanup( + ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* pkt) { #if CI_CFG_TX_CRC_OFFLOAD struct ci_pkt_zc_header* zch = oo_tx_zc_header(pkt); @@ -2968,17 +2922,17 @@ ci_nvme_plugin_crc_packet_cleanup(ci_netif* ni, ci_tcp_state* ts, ci_uint32 prev_id = ZC_NVME_CRC_ID_INVALID; int intf_i = pkt->intf_i; - OO_TX_FOR_EACH_ZC_PAYLOAD(ni, zch, zcp) { - if( zcp->zcp_flags & (ZC_PAYLOAD_FLAG_ACCUM_CRC | - ZC_PAYLOAD_FLAG_INSERT_CRC) ) { + OO_TX_FOR_EACH_ZC_PAYLOAD(ni, zch, zcp) + { + if( zcp->zcp_flags & + (ZC_PAYLOAD_FLAG_ACCUM_CRC | ZC_PAYLOAD_FLAG_INSERT_CRC) ) { if( zcp->crc_id == ZC_NVME_CRC_ID_INVALID ) break; /* Free this ID if it was different from the preceding one, and therefore * newly allocated. */ - if( zcp->crc_id != ts->current_crc_id && - zcp->crc_id != prev_id ) { - ci_nvme_plugin_crc_id_release(&ni->state->nvme_crc_plugin_idp[intf_i], - zcp->crc_id); + if( zcp->crc_id != ts->current_crc_id && zcp->crc_id != prev_id ) { + ci_nvme_plugin_crc_id_release( + &ni->state->nvme_crc_plugin_idp[intf_i], zcp->crc_id); prev_id = zcp->crc_id; } zcp->crc_id = ZC_NVME_CRC_ID_INVALID; @@ -2991,9 +2945,8 @@ ci_nvme_plugin_crc_packet_cleanup(ci_netif* ni, ci_tcp_state* ts, * which need to be freed. Those ids are the current id and the * previously-allocated ones. */ -ci_inline void ci_nvme_plugin_idp_dropped_queue_cleanup(ci_netif* ni, - ci_tcp_state* ts, - ci_ip_pkt_queue *qu) +ci_inline void ci_nvme_plugin_idp_dropped_queue_cleanup( + ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_queue* qu) { #if CI_CFG_TX_CRC_OFFLOAD oo_pkt_p pp = qu->head; @@ -3006,8 +2959,8 @@ ci_inline void ci_nvme_plugin_idp_dropped_queue_cleanup(ci_netif* ni, pp = p->next; } if( ts->current_crc_id != ZC_NVME_CRC_ID_INVALID ) { - ci_nvme_plugin_crc_id_release(&ni->state->nvme_crc_plugin_idp[intf_i], - ts->current_crc_id); + ci_nvme_plugin_crc_id_release( + &ni->state->nvme_crc_plugin_idp[intf_i], ts->current_crc_id); ts->current_crc_id = ZC_NVME_CRC_ID_INVALID; } #endif @@ -3017,7 +2970,7 @@ ci_inline void ci_nvme_plugin_idp_dropped_queue_cleanup(ci_netif* ni, ********************** Packet buffer allocation ********************** *********************************************************************/ -ci_inline void __ci_netif_pkt_clean(ci_ip_pkt_fmt* pkt) +ci_inline void __ci_netif_pkt_clean(ci_ip_pkt_fmt* pkt) { pkt->flags &= CI_PKT_FLAG_NONB_POOL; pkt->rx_flags = 0; @@ -3064,7 +3017,7 @@ ci_inline void ci_netif_pkt_put(ci_netif* ni, ci_ip_pkt_fmt* pkt) /* If we have too few free packets in the now-current set, * we should allocate another set. * Fixme: is set_size/2 a good margin or should it be tunable? */ -ci_inline int/*bool*/ +ci_inline int /*bool*/ ci_netif_pkt_set_is_underfilled(ci_netif* ni, int bufset_id) { return ni->packets->set[bufset_id].n_free < CI_CFG_PKT_SET_LOW_WATER; @@ -3077,15 +3030,14 @@ ci_netif_pkt_set_is_underfilled(ci_netif* ni, int bufset_id) * If the caller need to allocate a lot of packets, there is no need call * ci_netif_pkt_set_change() until all the allocations are done. */ -ci_inline void ci_netif_pkt_set_change(ci_netif* ni, int bufset_id, - int/*bool*/ is_underfilled) +ci_inline void ci_netif_pkt_set_change( + ci_netif* ni, int bufset_id, int /*bool*/ is_underfilled) { ni->packets->id = bufset_id; ci_assert_equal(bufset_id, NI_PKT_SET(ni)); if( ni->packets->sets_n < ni->packets->sets_max && is_underfilled ) - ef_eplock_holder_set_flag(&ni->state->lock, - CI_EPLOCK_NETIF_NEED_PKT_SET); + ef_eplock_holder_set_flag(&ni->state->lock, CI_EPLOCK_NETIF_NEED_PKT_SET); /* When we are called from ci_netif_rx_post(), we could already consume * all available packets. Let's set NEED_PKT_SET flag above and exit. */ @@ -3094,12 +3046,13 @@ ci_inline void ci_netif_pkt_set_change(ci_netif* ni, int bufset_id, ci_assert(OO_PP_NOT_NULL(ni->packets->set[bufset_id].free)); } -ci_inline ci_ip_pkt_fmt* ci_netif_pkt_alloc(ci_netif* ni, int flags) { +ci_inline ci_ip_pkt_fmt* ci_netif_pkt_alloc(ci_netif* ni, int flags) +{ ci_ip_pkt_fmt* pkt; int bufset_id; - ci_assert( ci_netif_is_locked(ni) ); + ci_assert(ci_netif_is_locked(ni)); bufset_id = NI_PKT_SET(ni); - if(CI_LIKELY( ni->packets->set[bufset_id].n_free > 0 )) + if( CI_LIKELY(ni->packets->set[bufset_id].n_free > 0) ) pkt = ci_netif_pkt_get(ni, bufset_id); else pkt = ci_netif_pkt_alloc_slow(ni, flags); @@ -3108,33 +3061,39 @@ ci_inline ci_ip_pkt_fmt* ci_netif_pkt_alloc(ci_netif* ni, int flags) { ci_inline int ci_netif_pkt_nonb_pool_is_empty(ci_netif* ni) -{ return (ni->state->nonb_pkt_pool & 0xffffffff) == 0xffffffff; } +{ + return (ni->state->nonb_pkt_pool & 0xffffffff) == 0xffffffff; +} ci_inline int ci_netif_pkt_nonb_pool_not_empty(ci_netif* ni) -{ return (ni->state->nonb_pkt_pool & 0xffffffff) != 0xffffffff; } - +{ + return (ni->state->nonb_pkt_pool & 0xffffffff) != 0xffffffff; +} -#define CI_NETIF_PKT_POOL_MIN_LEVEL 512 +#define CI_NETIF_PKT_POOL_MIN_LEVEL 512 /* Number of packet either allocated to rx or reserved to rx */ -ci_inline int ci_netif_pkt_rx_n(ci_netif* ni) { +ci_inline int ci_netif_pkt_rx_n(ci_netif* ni) +{ return ni->state->n_rx_pkts + ni->state->reserved_pktbufs; } /* Number of packet bufs that can get allocated */ -ci_inline int ci_netif_pkt_free_n(ci_netif* ni) { - return - ((ni->packets->sets_max - ni->packets->sets_n) << CI_CFG_PKTS_PER_SET_S) + - ni->packets->n_free; +ci_inline int ci_netif_pkt_free_n(ci_netif* ni) +{ + return ((ni->packets->sets_max - ni->packets->sets_n) + << CI_CFG_PKTS_PER_SET_S) + + ni->packets->n_free; } /* Number of packet bufs currently allocated to TX paths. Packets in the * non-blocking free pool count as being allocated to TX. */ -ci_inline int ci_netif_pkt_tx_n(ci_netif* ni) { - return ni->packets->n_pkts_allocated - ni->state->n_rx_pkts - - ni->packets->n_free; +ci_inline int ci_netif_pkt_tx_n(ci_netif* ni) +{ + return ni->packets->n_pkts_allocated - ni->state->n_rx_pkts - + ni->packets->n_free; } @@ -3146,24 +3105,27 @@ ci_inline int ci_netif_pkt_tx_n(ci_netif* ni) { * it is possible at the moment -- there may be none free. See * ci_netif_pkt_tx_can_alloc_now(). */ -ci_inline int ci_netif_pkt_tx_may_alloc(ci_netif* ni) { +ci_inline int ci_netif_pkt_tx_may_alloc(ci_netif* ni) +{ int n_tx_pkts = ci_netif_pkt_tx_n(ni); + // ci_log("n tx pkts: %d", n_tx_pkts); return - /* TX is not yet using all of the packet buffers that are exclusively - * reserved for its use. - */ - (n_tx_pkts < NI_OPTS(ni).max_packets - NI_OPTS(ni).max_rx_packets) || - /* The RX rings are nice and full, and TX hasn't hit its limit. */ - (ni->state->mem_pressure == 0 && n_tx_pkts < NI_OPTS(ni).max_tx_packets); + /* TX is not yet using all of the packet buffers that are exclusively + * reserved for its use. + */ + (n_tx_pkts < NI_OPTS(ni).max_packets - NI_OPTS(ni).max_rx_packets) || + /* The RX rings are nice and full, and TX hasn't hit its limit. */ + (ni->state->mem_pressure == 0 && n_tx_pkts < NI_OPTS(ni).max_tx_packets); } /* Returns true if it is permitted and possible to allocate a packet buffer * for the TX path -- either from the free pool of the non-blocking pool. */ -ci_inline int ci_netif_pkt_tx_can_alloc_now(ci_netif* ni) { - return ( (ci_netif_pkt_tx_may_alloc(ni) && ni->packets->n_free > 0) || - ci_netif_pkt_nonb_pool_not_empty(ni) ); +ci_inline int ci_netif_pkt_tx_can_alloc_now(ci_netif* ni) +{ + return ((ci_netif_pkt_tx_may_alloc(ni) && ni->packets->n_free > 0) || + ci_netif_pkt_nonb_pool_not_empty(ni)); } @@ -3171,47 +3133,53 @@ ci_inline int ci_netif_pkt_tx_can_alloc_now(ci_netif* ni) { * the send queue or retransmit queue for a long time, so we must be * careful not to deplete the pool of free buffers too much. */ -ci_inline ci_ip_pkt_fmt* -ci_netif_pkt_tx_tcp_alloc(ci_netif* ni, ci_tcp_state* ts) { +ci_inline ci_ip_pkt_fmt* ci_netif_pkt_tx_tcp_alloc( + ci_netif* ni, ci_tcp_state* ts) +{ int bufset_id; ci_assert(ci_netif_is_locked(ni)); bufset_id = NI_PKT_SET(ni); - if(CI_LIKELY( ci_netif_pkt_tx_may_alloc(ni) && - ni->packets->set[bufset_id].n_free > 0 )) { + if( CI_LIKELY(ci_netif_pkt_tx_may_alloc(ni) && + ni->packets->set[bufset_id].n_free > 0) ) { + LOG_S(ci_log( + "Packets Allocated: %d, Packets free: %d, Set free: %d, Num sets: %d, " + "Pkts per set: %d, Bufset Id: %d", + ni->packets->n_pkts_allocated, ni->packets->n_free, + ni->packets->set[bufset_id].n_free, ni->packets->sets_n, PKTS_PER_SET, + bufset_id)); return ci_netif_pkt_get(ni, bufset_id); - } - else { + } else { if( (! ci_netif_pkt_tx_may_alloc(ni)) && - (NI_OPTS(ni).tcp_sndbuf_mode == 2) && - (ts != NULL) ) + (NI_OPTS(ni).tcp_sndbuf_mode == 2) && (ts != NULL) ) ci_tcp_moderate_sndbuf(ni, ts); /* TCP TX path is always allowed to allocate from the non-blocking pool * because those packet buffers are already allocated to TX. */ - return ci_netif_pkt_alloc_slow(ni, CI_PKT_ALLOC_FOR_TCP_TX | - CI_PKT_ALLOC_USE_NONB); + return ci_netif_pkt_alloc_slow( + ni, CI_PKT_ALLOC_FOR_TCP_TX | CI_PKT_ALLOC_USE_NONB); } } -ci_inline ci_ip_pkt_fmt* ci_netif_pkt_alloc_nonb(ci_netif* ni) +ci_inline ci_ip_pkt_fmt* ci_netif_pkt_alloc_nonb(ci_netif* ni) { - volatile ci_uint64 *nonb_pkt_pool_ptr; + volatile ci_uint64* nonb_pkt_pool_ptr; ci_uint64 link, new_link; unsigned id; ci_ip_pkt_fmt* pkt; oo_pkt_p pp; nonb_pkt_pool_ptr = &(ni->state->nonb_pkt_pool); - again: +again: pkt = NULL; link = *nonb_pkt_pool_ptr; id = link & 0xffffffff; if( id != 0xffffffff ) { OO_PP_INIT(ni, pp, id); pkt = PKT(ni, pp); - new_link = ((unsigned)OO_PP_ID(pkt->next)) | (link & 0xffffffff00000000llu); + new_link = + ((unsigned) OO_PP_ID(pkt->next)) | (link & 0xffffffff00000000llu); if( ci_cas64u_fail(nonb_pkt_pool_ptr, link, new_link) ) goto again; ci_assert_equal(pkt->refcount, 0); @@ -3222,10 +3190,10 @@ ci_inline ci_ip_pkt_fmt* ci_netif_pkt_alloc_nonb(ci_netif* ni) } -ci_inline void ci_netif_pkt_free_nonb_list(ci_netif *ni, oo_pkt_p pkt_list, - ci_ip_pkt_fmt *pkt_list_tail) +ci_inline void ci_netif_pkt_free_nonb_list( + ci_netif* ni, oo_pkt_p pkt_list, ci_ip_pkt_fmt* pkt_list_tail) { - volatile ci_uint64 *nonb_pkt_pool_ptr; + volatile ci_uint64* nonb_pkt_pool_ptr; ci_uint64 new_link, link; nonb_pkt_pool_ptr = &(ni->state->nonb_pkt_pool); @@ -3233,34 +3201,38 @@ ci_inline void ci_netif_pkt_free_nonb_list(ci_netif *ni, oo_pkt_p pkt_list, ci_assert_equal(pkt_list_tail->refcount, 0); link = *nonb_pkt_pool_ptr; OO_PP_INIT(ni, pkt_list_tail->next, link & 0xffffffff); - new_link = ((unsigned)OO_PP_ID(pkt_list)) | - ((link + 0x0000000100000000llu) & 0xffffffff00000000llu); + new_link = ((unsigned) OO_PP_ID(pkt_list)) | + ((link + 0x0000000100000000llu) & 0xffffffff00000000llu); } while( ci_cas64u_fail(nonb_pkt_pool_ptr, link, new_link) ); } -ci_inline void ci_netif_pkt_hold(ci_netif* ni, ci_ip_pkt_fmt* pkt) { +ci_inline void ci_netif_pkt_hold(ci_netif* ni, ci_ip_pkt_fmt* pkt) +{ ci_assert_gt(pkt->refcount, 0); ++pkt->refcount; } #ifdef __KERNEL__ -ci_inline void ci_netif_pkt_release_mnl(ci_netif* ni, ci_ip_pkt_fmt* pkt, - int* p_netif_is_locked) { +ci_inline void ci_netif_pkt_release_mnl( + ci_netif* ni, ci_ip_pkt_fmt* pkt, int* p_netif_is_locked) +{ ci_assert_gt(pkt->refcount, 0); if( --pkt->refcount == 0 ) ci_netif_pkt_free(ni, pkt, p_netif_is_locked); } -ci_inline void ci_netif_pkt_release(ci_netif* ni, ci_ip_pkt_fmt* pkt) { +ci_inline void ci_netif_pkt_release(ci_netif* ni, ci_ip_pkt_fmt* pkt) +{ int is_locked = 1; - ci_assert( ci_netif_is_locked(ni) ); + ci_assert(ci_netif_is_locked(ni)); ci_netif_pkt_release_mnl(ni, pkt, &is_locked); } #else -ci_inline void ci_netif_pkt_release(ci_netif* ni, ci_ip_pkt_fmt* pkt) { +ci_inline void ci_netif_pkt_release(ci_netif* ni, ci_ip_pkt_fmt* pkt) +{ ci_assert_gt(pkt->refcount, 0); - ci_assert( ci_netif_is_locked(ni) ); + ci_assert(ci_netif_is_locked(ni)); if( --pkt->refcount == 0 ) ci_netif_pkt_free(ni, pkt); } @@ -3270,15 +3242,13 @@ ci_inline void ci_netif_pkt_release(ci_netif* ni, ci_ip_pkt_fmt* pkt) { /* This is an optimised route for freeing packets when we know there is ** only one reference. */ -#define ci_netif_pkt_release_1ref(ni, pkt) \ - ci_netif_pkt_release(ni, pkt) +#define ci_netif_pkt_release_1ref(ni, pkt) ci_netif_pkt_release(ni, pkt) -#define ci_netif_pkt_release_rx_1ref(ni, pkt) \ +#define ci_netif_pkt_release_rx_1ref(ni, pkt) \ ci_netif_pkt_release_1ref(ni, pkt) -#define ci_netif_pkt_release_rx(ni, pkt) \ - ci_netif_pkt_release(ni, pkt) +#define ci_netif_pkt_release_rx(ni, pkt) ci_netif_pkt_release(ni, pkt) ci_inline int ci_netif_pkt_release_check_keep(ci_netif* ni, ci_ip_pkt_fmt* pkt) { @@ -3288,10 +3258,9 @@ ci_inline int ci_netif_pkt_release_check_keep(ci_netif* ni, ci_ip_pkt_fmt* pkt) */ if( (pkt->rx_flags & CI_PKT_RX_FLAG_KEEP) ) { /* Remove flag so other context (app or reap) will free it */ - pkt->rx_flags &=~ CI_PKT_RX_FLAG_KEEP; + pkt->rx_flags &= ~CI_PKT_RX_FLAG_KEEP; return 0; - } - else { + } else { ci_netif_pkt_release(ni, pkt); return 1; } @@ -3301,33 +3270,33 @@ ci_inline int ci_netif_pkt_release_check_keep(ci_netif* ni, ci_ip_pkt_fmt* pkt) *************************** pktbuf reserve accounting **************** *********************************************************************/ -ci_inline unsigned -__ci_tcp_rx_buf_count(ci_netif* netif, ci_tcp_state* ts) +ci_inline unsigned __ci_tcp_rx_buf_count(ci_netif* netif, ci_tcp_state* ts) { return ts->recv1.num + ts->recv2.num + ts->rob.num; } -ci_inline unsigned -__ci_tcp_rx_reserved_bufs(ci_netif* netif, ci_tcp_state* ts, int allocated_pkts) +ci_inline unsigned __ci_tcp_rx_reserved_bufs( + ci_netif* netif, ci_tcp_state* ts, int allocated_pkts) { - int reserved_bufs = ts->s.b.state != CI_TCP_ESTABLISHED ? 0 : - NI_OPTS(netif).endpoint_packet_reserve; + int reserved_bufs = ts->s.b.state != CI_TCP_ESTABLISHED + ? 0 + : NI_OPTS(netif).endpoint_packet_reserve; reserved_bufs -= allocated_pkts; /* this many buffers of this socket should have been added to * ns->reserved_pktbufs already */ return CI_MAX(0, reserved_bufs); } -ci_inline unsigned -ci_tcp_rx_reserved_bufs(ci_netif* netif, ci_tcp_state* ts) +ci_inline unsigned ci_tcp_rx_reserved_bufs(ci_netif* netif, ci_tcp_state* ts) { - return __ci_tcp_rx_reserved_bufs(netif, ts, __ci_tcp_rx_buf_count(netif, ts)); + return __ci_tcp_rx_reserved_bufs( + netif, ts, __ci_tcp_rx_buf_count(netif, ts)); } /* adjusts per-nic count of reserved buffers * needs to be called BEFORE any of tcp recv queues gets to be modified */ -ci_inline void -ci_tcp_rx_buf_adjust(ci_netif* netif, ci_tcp_state* ts, ci_ip_pkt_queue* q, int delta) +ci_inline void ci_tcp_rx_buf_adjust( + ci_netif* netif, ci_tcp_state* ts, ci_ip_pkt_queue* q, int delta) { int m, n; @@ -3338,7 +3307,8 @@ ci_tcp_rx_buf_adjust(ci_netif* netif, ci_tcp_state* ts, ci_ip_pkt_queue* q, int return; m = ci_tcp_rx_reserved_bufs(netif, ts); - n = __ci_tcp_rx_reserved_bufs(netif, ts, __ci_tcp_rx_buf_count(netif, ts) + delta); + n = __ci_tcp_rx_reserved_bufs( + netif, ts, __ci_tcp_rx_buf_count(netif, ts) + delta); netif->state->reserved_pktbufs += n - m; ci_assert(ci_netif_is_locked(netif)); @@ -3347,17 +3317,16 @@ ci_tcp_rx_buf_adjust(ci_netif* netif, ci_tcp_state* ts, ci_ip_pkt_queue* q, int extern void ci_ip_queue_drop(ci_netif*, ci_ip_pkt_queue*) CI_HF; -ci_inline void -ci_tcp_rx_queue_drop(ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_queue* q) +ci_inline void ci_tcp_rx_queue_drop( + ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_queue* q) { ci_tcp_rx_buf_adjust(ni, ts, q, -q->num); ci_ip_queue_drop(ni, q); } -ci_inline void -ci_tcp_rx_buf_account_begin(ci_netif* netif, ci_tcp_state* ts) +ci_inline void ci_tcp_rx_buf_account_begin(ci_netif* netif, ci_tcp_state* ts) { - ci_assert( ci_netif_is_locked(netif) ); + ci_assert(ci_netif_is_locked(netif)); ci_assert_nflags(netif->state->flags, CI_NETIF_FLAG_PKT_ACCOUNT_PENDING); CI_DEBUG(netif->state->flags |= CI_NETIF_FLAG_PKT_ACCOUNT_PENDING); @@ -3368,10 +3337,9 @@ ci_tcp_rx_buf_account_begin(ci_netif* netif, ci_tcp_state* ts) ci_assert_ge(netif->state->reserved_pktbufs, 0); } -ci_inline void -ci_tcp_rx_buf_account_end(ci_netif* netif, ci_tcp_state* ts) +ci_inline void ci_tcp_rx_buf_account_end(ci_netif* netif, ci_tcp_state* ts) { - ci_assert( ci_netif_is_locked(netif) ); + ci_assert(ci_netif_is_locked(netif)); ci_assert_flags(netif->state->flags, CI_NETIF_FLAG_PKT_ACCOUNT_PENDING); CI_DEBUG(netif->state->flags &= ~CI_NETIF_FLAG_PKT_ACCOUNT_PENDING); @@ -3383,20 +3351,25 @@ ci_tcp_rx_buf_account_end(ci_netif* netif, ci_tcp_state* ts) *************************** ci_ip_pkt_queue ************************** *********************************************************************/ -#define CI_IP_QUEUE_UNLIMITED INT_MAX +#define CI_IP_QUEUE_UNLIMITED INT_MAX -ci_inline void ci_ip_queue_init(ci_ip_pkt_queue *qu) { +ci_inline void ci_ip_queue_init(ci_ip_pkt_queue* qu) +{ qu->num = 0; qu->head = OO_PP_NULL; /* tail undefined when queue is empty */ } -ci_inline int ci_ip_queue_is_empty(ci_ip_pkt_queue *qu) -{ return qu->num == 0; } +ci_inline int ci_ip_queue_is_empty(ci_ip_pkt_queue* qu) +{ + return qu->num == 0; +} -ci_inline int ci_ip_queue_not_empty(ci_ip_pkt_queue *qu) -{ return qu->num; } +ci_inline int ci_ip_queue_not_empty(ci_ip_pkt_queue* qu) +{ + return qu->num; +} ci_inline int ci_ip_queue_is_valid(ci_netif* netif, ci_ip_pkt_queue* qu) @@ -3404,39 +3377,38 @@ ci_inline int ci_ip_queue_is_valid(ci_netif* netif, ci_ip_pkt_queue* qu) if( qu->num == 0 ) return OO_PP_IS_NULL(qu->head); else - return IS_VALID_PKT_ID(netif, qu->head) && - IS_VALID_PKT_ID(netif, qu->tail) && - OO_PP_IS_NULL(PKT(netif, qu->tail)->next); + return IS_VALID_PKT_ID(netif, qu->head) && + IS_VALID_PKT_ID(netif, qu->tail) && + OO_PP_IS_NULL(PKT(netif, qu->tail)->next); } #ifndef NDEBUG /* This function should be NEVER used in production! * For temporary debugging only! */ -ci_inline int ci_ip_queue_is_valid_long(ci_netif* netif, ci_ip_pkt_queue* qu, - const char *name) +ci_inline int ci_ip_queue_is_valid_long( + ci_netif* netif, ci_ip_pkt_queue* qu, const char* name) { int i = 0, found_tail = 0; oo_pkt_p id; - if( !ci_ip_queue_is_valid(netif, qu) ) + if( ! ci_ip_queue_is_valid(netif, qu) ) return 0; - for( id = qu->head; OO_PP_NOT_NULL(id); - id = PKT(netif, id)->next ) { - ci_ip_pkt_fmt *pkt = PKT(netif, id); + for( id = qu->head; OO_PP_NOT_NULL(id); id = PKT(netif, id)->next ) { + ci_ip_pkt_fmt* pkt = PKT(netif, id); i++; ci_log("%s queue %d: %d %08x-%08x", name, i, OO_PP_FMT(id), - pkt->pf.tcp_tx.start_seq, pkt->pf.tcp_tx.end_seq); + pkt->pf.tcp_tx.start_seq, pkt->pf.tcp_tx.end_seq); if( OO_PP_EQ(qu->tail, id) ) found_tail = 1; } if( i != qu->num ) { - ci_log("validation of %s queue failed: %d real members, %d declared", - name, i, qu->num); + ci_log("validation of %s queue failed: %d real members, %d declared", name, + i, qu->num); return 0; } if( i && ! found_tail ) { - ci_log("validation of %s queue failed: tail %d not in queue", - name, OO_PP_FMT(qu->tail)); + ci_log("validation of %s queue failed: tail %d not in queue", name, + OO_PP_FMT(qu->tail)); return 0; } return 1; @@ -3444,14 +3416,13 @@ ci_inline int ci_ip_queue_is_valid_long(ci_netif* netif, ci_ip_pkt_queue* qu, #endif -ci_inline void __ci_ip_queue_enqueue(ci_netif* netif, ci_ip_pkt_queue* qu, - ci_ip_pkt_fmt* pkt) +ci_inline void __ci_ip_queue_enqueue( + ci_netif* netif, ci_ip_pkt_queue* qu, ci_ip_pkt_fmt* pkt) { if( ci_ip_queue_is_empty(qu) ) { ci_assert(OO_PP_IS_NULL(qu->head)); qu->head = OO_PKT_P(pkt); - } - else { + } else { ci_assert(OO_PP_NOT_NULL(qu->head)); /* This assumes the netif lock is held, so use ci_ip_queue_enqueue_nnl() if it's not */ @@ -3462,24 +3433,24 @@ ci_inline void __ci_ip_queue_enqueue(ci_netif* netif, ci_ip_pkt_queue* qu, } -ci_inline void __ci_tcp_rx_queue_enqueue(ci_netif* netif, ci_tcp_state* ts, - ci_ip_pkt_queue* qu, ci_ip_pkt_fmt* pkt) +ci_inline void __ci_tcp_rx_queue_enqueue( + ci_netif* netif, ci_tcp_state* ts, ci_ip_pkt_queue* qu, ci_ip_pkt_fmt* pkt) { ci_tcp_rx_buf_adjust(netif, ts, qu, 1); __ci_ip_queue_enqueue(netif, qu, pkt); } -ci_inline void ci_ip_queue_enqueue(ci_netif* netif, ci_ip_pkt_queue* qu, - ci_ip_pkt_fmt* pkt) +ci_inline void ci_ip_queue_enqueue( + ci_netif* netif, ci_ip_pkt_queue* qu, ci_ip_pkt_fmt* pkt) { pkt->next = OO_PP_NULL; __ci_ip_queue_enqueue(netif, qu, pkt); } -ci_inline void ci_ip_queue_dequeue(ci_netif* netif, ci_ip_pkt_queue* qu, - ci_ip_pkt_fmt* head) +ci_inline void ci_ip_queue_dequeue( + ci_netif* netif, ci_ip_pkt_queue* qu, ci_ip_pkt_fmt* head) { ci_assert(IS_VALID_PKT_ID(netif, qu->head)); ci_assert_gt(qu->num, 0); @@ -3493,7 +3464,7 @@ ci_inline void ci_ip_queue_dequeue(ci_netif* netif, ci_ip_pkt_queue* qu, ci_inline void ci_tcp_rx_queue_dequeue(ci_netif* netif, ci_tcp_state* ts, - ci_ip_pkt_queue* qu, ci_ip_pkt_fmt* head) + ci_ip_pkt_queue* qu, ci_ip_pkt_fmt* head) { ci_tcp_rx_buf_adjust(netif, ts, qu, -1); ci_ip_queue_dequeue(netif, qu, head); @@ -3503,8 +3474,7 @@ ci_inline void ci_tcp_rx_queue_dequeue(ci_netif* netif, ci_tcp_state* ts, ** must point at the last packet in the chain to be moved. */ ci_inline void ci_ip_queue_move(ci_netif* netif, ci_ip_pkt_queue* from, - ci_ip_pkt_queue *to, - ci_ip_pkt_fmt *last, int num) + ci_ip_pkt_queue* to, ci_ip_pkt_fmt* last, int num) { oo_pkt_p originalfromhead; @@ -3519,8 +3489,8 @@ ci_inline void ci_ip_queue_move(ci_netif* netif, ci_ip_pkt_queue* from, from->num -= num; last->next = OO_PP_NULL; ci_wmb(); - /* - * cat the to list and the new list + /* + * cat the to list and the new list */ if( ci_ip_queue_is_empty(to) ) to->head = originalfromhead; @@ -3532,8 +3502,8 @@ ci_inline void ci_ip_queue_move(ci_netif* netif, ci_ip_pkt_queue* from, /* Move entire queue [from] to [to] and re-init [from]. [to] need not * be initialised. */ -ci_inline void ci_ip_queue_move_all(ci_netif* netif, ci_ip_pkt_queue* from, - ci_ip_pkt_queue *to) +ci_inline void ci_ip_queue_move_all( + ci_netif* netif, ci_ip_pkt_queue* from, ci_ip_pkt_queue* to) { ci_assert(netif); ci_assert(from); @@ -3548,13 +3518,13 @@ ci_inline void ci_ip_queue_move_all(ci_netif* netif, ci_ip_pkt_queue* from, **********************************************************************/ /* Limited IPID handling - just run around the block we get at start-up */ -# include -# define NEXT_IP_ID(ni) (NI_IPID(ni)->base | \ - (NI_IPID(ni)->next++ & CI_IPID_BLOCK_MASK)) +#include +#define NEXT_IP_ID(ni) \ + (NI_IPID(ni)->base | (NI_IPID(ni)->next++ & CI_IPID_BLOCK_MASK)) #if CI_CFG_IPV6 -# define NEXT_IP6_ID(ni) (NI_IPID(ni)->ip6_base | \ - (NI_IPID(ni)->ip6_next++ & CI_IP6ID_BLOCK_MASK)) +#define NEXT_IP6_ID(ni) \ + (NI_IPID(ni)->ip6_base | (NI_IPID(ni)->ip6_next++ & CI_IP6ID_BLOCK_MASK)) #endif typedef union { @@ -3564,8 +3534,7 @@ typedef union { ci_uint16 ip4; } ci_ipx_id_t; -ci_inline ci_ipx_id_t -ci_next_ipx_id_be(int af, ci_netif* ni) +ci_inline ci_ipx_id_t ci_next_ipx_id_be(int af, ci_netif* ni) { ci_ipx_id_t ipx_id; #if CI_CFG_IPV6 @@ -3580,50 +3549,46 @@ ci_next_ipx_id_be(int af, ci_netif* ni) /* Return true if ip options contain badness. * Badness: Unknown option, source routing, invalid option lengths. */ -extern int ci_ip_options_parse(ci_netif* netif, ci_ip4_hdr* ip, - const int hdr_size); +extern int ci_ip_options_parse( + ci_netif* netif, ci_ip4_hdr* ip, const int hdr_size); /********************************************************************** **************************** citp_waitable **************************** **********************************************************************/ -ci_inline int -citp_waitable_lock_or_set_flag(citp_waitable* w, ci_uint32 flag) +ci_inline int citp_waitable_lock_or_set_flag(citp_waitable* w, ci_uint32 flag) { ci_uint32 l, new_l; int rc; do { l = w->lock.wl_val; - if( ! (l & OO_WAITABLE_LK_LOCKED) ){ + if( ! (l & OO_WAITABLE_LK_LOCKED) ) { new_l = l | OO_WAITABLE_LK_LOCKED; rc = 1; - } - else{ + } else { new_l = l | flag; rc = 0; } - } while(CI_UNLIKELY( ci_cas32u_fail(&w->lock.wl_val, l, new_l) )); + } while( CI_UNLIKELY(ci_cas32u_fail(&w->lock.wl_val, l, new_l)) ); return rc; } -ci_inline void -citp_waitable_lock_set_flag(citp_waitable* w, ci_uint32 flag) +ci_inline void citp_waitable_lock_set_flag(citp_waitable* w, ci_uint32 flag) { ci_uint32 l; do { l = w->lock.wl_val; - } while(CI_UNLIKELY( ci_cas32u_fail(&w->lock.wl_val, l, l | flag) )); + } while( CI_UNLIKELY(ci_cas32u_fail(&w->lock.wl_val, l, l | flag)) ); } -ci_inline void -citp_waitable_lock_clear_flag(citp_waitable* w, ci_uint32 flag) +ci_inline void citp_waitable_lock_clear_flag(citp_waitable* w, ci_uint32 flag) { ci_uint32 l; do { l = w->lock.wl_val; - } while(CI_UNLIKELY( ci_cas32u_fail(&w->lock.wl_val, l, l & ~flag) )); + } while( CI_UNLIKELY(ci_cas32u_fail(&w->lock.wl_val, l, l & ~flag)) ); } @@ -3631,13 +3596,11 @@ citp_waitable_lock_clear_flag(citp_waitable* w, ci_uint32 flag) ***************************** ICMP/Errors ***************************** **********************************************************************/ -#define ci_icmp_send_error(ni, rx_pkt, type, code) \ - __ci_icmp_send_error((ni), oo_pkt_af(rx_pkt), \ - oo_ipx_hdr(rx_pkt), \ - oo_ether_hdr(rx_pkt), (type), (code)) +#define ci_icmp_send_error(ni, rx_pkt, type, code) \ + __ci_icmp_send_error((ni), oo_pkt_af(rx_pkt), oo_ipx_hdr(rx_pkt), \ + oo_ether_hdr(rx_pkt), (type), (code)) -ci_inline int -ci_icmp_send_port_unreach(ci_netif *ni, ci_ip_pkt_fmt* rx_pkt) +ci_inline int ci_icmp_send_port_unreach(ci_netif* ni, ci_ip_pkt_fmt* rx_pkt) { ci_uint8 type, code; @@ -3645,8 +3608,7 @@ ci_icmp_send_port_unreach(ci_netif *ni, ci_ip_pkt_fmt* rx_pkt) if( IS_AF_INET6(oo_pkt_af(rx_pkt)) ) { type = CI_ICMPV6_DEST_UNREACH; code = CI_ICMPV6_DU_PORT_UNREACH; - } - else + } else #endif { type = CI_ICMP_DEST_UNREACH; @@ -3660,7 +3622,8 @@ ci_icmp_send_port_unreach(ci_netif *ni, ci_ip_pkt_fmt* rx_pkt) ********************************* UDP ********************************* **********************************************************************/ -ci_inline void ci_udp_dec_tx_count(ci_udp_state* us, ci_ip_pkt_fmt* pkt) { +ci_inline void ci_udp_dec_tx_count(ci_udp_state* us, ci_ip_pkt_fmt* pkt) +{ ci_assert(pkt->flags & CI_PKT_FLAG_UDP); ci_assert_ge((int) us->tx_count, (int) pkt->pf.udp.tx_length); us->tx_count -= pkt->pf.udp.tx_length; @@ -3672,7 +3635,9 @@ ci_inline void ci_udp_dec_tx_count(ci_udp_state* us, ci_ip_pkt_fmt* pkt) { ** when to indicate writable in select() and poll(). */ ci_inline int ci_udp_tx_advertise_space(ci_udp_state* us) -{ return (int) (us->s.so.sndbuf - us->tx_count) > (int) (us->tx_count >> 1u); } +{ + return (int) (us->s.so.sndbuf - us->tx_count) > (int) (us->tx_count >> 1u); +} /********************************************************************* @@ -3681,19 +3646,18 @@ ci_inline int ci_udp_tx_advertise_space(ci_udp_state* us) extern void ci_udp_recv_q_drop(ci_netif*, ci_udp_recv_q*) CI_HF; extern int ci_udp_recv_q_reap(ci_netif*, ci_udp_recv_q*) CI_HF; -extern void ci_udp_recvq_dump(ci_netif* ni, ci_udp_recv_q* q, - const char* pf1, const char* pf2, - oo_dump_log_fn_t logger, void* log_arg) CI_HF; +extern void ci_udp_recvq_dump(ci_netif* ni, ci_udp_recv_q* q, const char* pf1, + const char* pf2, oo_dump_log_fn_t logger, void* log_arg) CI_HF; #if CI_CFG_TIMESTAMPING -extern int ci_udp_timestamp_q_enqueue(ci_netif* ni, ci_udp_state* us, - ci_ip_pkt_fmt* pkt); +extern int ci_udp_timestamp_q_enqueue( + ci_netif* ni, ci_udp_state* us, ci_ip_pkt_fmt* pkt); #endif /* Put a packet into recv_q but don't mark it as visible to the consumer yet. * Stack should be locked. */ -ci_inline void ci_udp_recv_q_put_pending(ci_netif* ni, ci_udp_recv_q* q, - ci_ip_pkt_fmt* pkt) +ci_inline void ci_udp_recv_q_put_pending( + ci_netif* ni, ci_udp_recv_q* q, ci_ip_pkt_fmt* pkt) { ci_assert(ci_netif_is_locked(ni)); @@ -3701,7 +3665,7 @@ ci_inline void ci_udp_recv_q_put_pending(ci_netif* ni, ci_udp_recv_q* q, /* Changing [pkt->rx_flags] without the socket lock is safe as long as we * ensure that we do so before posting [pkt] to the recvq. * This is required for proper functioning ci_udp_recv_q_get() */ - pkt->rx_flags &=~ CI_PKT_RX_FLAG_RECV_Q_CONSUMED; + pkt->rx_flags &= ~CI_PKT_RX_FLAG_RECV_Q_CONSUMED; } pkt->udp_rx_next = OO_PP_NULL; @@ -3715,8 +3679,7 @@ ci_inline void ci_udp_recv_q_put_pending(ci_netif* ni, ci_udp_recv_q* q, if( OO_PP_NOT_NULL(q->head) ) { PKT_CHK(ni, q->tail)->udp_rx_next = OO_PKT_P(pkt); ci_udp_recv_q_reap(ni, q); - } - else { + } else { ci_assert(OO_PP_IS_NULL(q->extract)); /* q->extract is modified here without proper lock * (q->extract is proteced by socket lock). @@ -3742,8 +3705,8 @@ ci_inline void ci_udp_recv_q_put_complete(ci_udp_recv_q* q, unsigned n_buffers) /* Put a packet into recv_q. Stack should be locked. */ -ci_inline void ci_udp_recv_q_put(ci_netif* ni, ci_udp_recv_q* q, - ci_ip_pkt_fmt* pkt) +ci_inline void ci_udp_recv_q_put( + ci_netif* ni, ci_udp_recv_q* q, ci_ip_pkt_fmt* pkt) { ci_udp_recv_q_put_pending(ni, q, pkt); ci_udp_recv_q_put_complete(q, pkt->n_buffers); @@ -3751,13 +3714,12 @@ ci_inline void ci_udp_recv_q_put(ci_netif* ni, ci_udp_recv_q* q, /* Get a packet from recv_q. Socket should be locked. */ -ci_inline ci_ip_pkt_fmt* ci_udp_recv_q_get(ci_netif* ni, - ci_udp_recv_q* q) +ci_inline ci_ip_pkt_fmt* ci_udp_recv_q_get(ci_netif* ni, ci_udp_recv_q* q) { ci_ip_pkt_fmt* pkt; if( ci_udp_recv_q_is_empty(q) ) - return NULL; + return NULL; /* Full barrier needed here: we need to prevent reordering of access to * q->extract before the above check, and also, since bumping q->extract @@ -3773,20 +3735,19 @@ ci_inline ci_ip_pkt_fmt* ci_udp_recv_q_get(ci_netif* ni, */ q->extract = OO_ACCESS_ONCE(pkt->udp_rx_next); pkt = PKT_CHK_NNL(ni, q->extract); - ci_assert( !(pkt->rx_flags & CI_PKT_RX_FLAG_RECV_Q_CONSUMED) ); + ci_assert(! (pkt->rx_flags & CI_PKT_RX_FLAG_RECV_Q_CONSUMED)); } return pkt; } -ci_inline void ci_udp_recv_q_deliver(ci_netif* ni, ci_udp_recv_q* q, - ci_ip_pkt_fmt* pkt) +ci_inline void ci_udp_recv_q_deliver( + ci_netif* ni, ci_udp_recv_q* q, ci_ip_pkt_fmt* pkt) { - q->pkts_delivered += pkt->n_buffers; + q->pkts_delivered += pkt->n_buffers; pkt->rx_flags |= CI_PKT_RX_FLAG_RECV_Q_CONSUMED; } -ci_inline ci_ip_pkt_fmt* ci_udp_recv_q_next(ci_netif* ni, - ci_ip_pkt_fmt* pkt) +ci_inline ci_ip_pkt_fmt* ci_udp_recv_q_next(ci_netif* ni, ci_ip_pkt_fmt* pkt) { /* This function is called without the stack lock, and so we had better be * certain that the packet is not going to be reaped under our feet. */ @@ -3818,85 +3779,92 @@ static inline int ci_tcp_retransq_is_empty(ci_tcp_state* ts) ! (ts->tcpflags & CI_TCPT_FLAG_FIN_PENDING); } -ci_inline void ci_tcp_rto_check_and_set(ci_netif* netif, ci_tcp_state* ts) { +ci_inline void ci_tcp_rto_check_and_set(ci_netif* netif, ci_tcp_state* ts) +{ /* shouldn't set an RTO if no data to send */ - ci_assert(!ci_tcp_retransq_is_empty(ts)); + ci_assert(! ci_tcp_retransq_is_empty(ts)); /* shouldn't set an RTO timer in a state that doesn't allow them */ - ci_assert(!(ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); + ci_assert(! (ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); if( ! ci_ip_timer_pending(netif, &ts->rto_tid) ) { #if CI_CFG_TAIL_DROP_PROBE - ts->tcpflags &=~ CI_TCPT_FLAG_TAIL_DROP_TIMING; + ts->tcpflags &= ~CI_TCPT_FLAG_TAIL_DROP_TIMING; #endif ci_ip_timer_set(netif, &ts->rto_tid, ci_tcp_time_now(netif) + ts->rto); } } ci_inline void ci_tcp_rto_clear(ci_netif* netif, ci_tcp_state* ts) -{ ci_ip_timer_clear(netif, &ts->rto_tid); } +{ + ci_ip_timer_clear(netif, &ts->rto_tid); +} -ci_inline void ci_tcp_rto_restart(ci_netif* netif, ci_tcp_state* ts) { +ci_inline void ci_tcp_rto_restart(ci_netif* netif, ci_tcp_state* ts) +{ /* shouldn't set an RTO if retrans queue is empty */ - ci_assert(!ci_tcp_retransq_is_empty(ts)); + ci_assert(! ci_tcp_retransq_is_empty(ts)); /* shouldn't set an RTO timer in a state that doesn't allow them */ - ci_assert(!(ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); + ci_assert(! (ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); #if CI_CFG_TAIL_DROP_PROBE - ts->tcpflags &=~ CI_TCPT_FLAG_TAIL_DROP_TIMING; + ts->tcpflags &= ~CI_TCPT_FLAG_TAIL_DROP_TIMING; #endif ci_ip_timer_modify(netif, &ts->rto_tid, ci_tcp_time_now(netif) + ts->rto); } -ci_inline void ci_tcp_rto_set_with_timeout(ci_netif* netif, ci_tcp_state* ts, - ci_iptime_t timeout) { +ci_inline void ci_tcp_rto_set_with_timeout( + ci_netif* netif, ci_tcp_state* ts, ci_iptime_t timeout) +{ /* shouldn't set an RTO if retrans queue is empty */ - ci_assert(!ci_tcp_retransq_is_empty(ts)); + ci_assert(! ci_tcp_retransq_is_empty(ts)); /* shouldn't set an RTO timer in a state that doesn't allow them */ - ci_assert(!(ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); + ci_assert(! (ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); ci_ip_timer_set(netif, &ts->rto_tid, ci_tcp_time_now(netif) + timeout); } -#define ci_tcp_rto_set(ni, ts) ci_tcp_rto_set_with_timeout((ni), (ts), \ - (ts)->rto) +#define ci_tcp_rto_set(ni, ts) \ + ci_tcp_rto_set_with_timeout((ni), (ts), (ts)->rto) -ci_inline void ci_tcp_rto_bound(ci_netif* netif, ci_tcp_state* ts) { +ci_inline void ci_tcp_rto_bound(ci_netif* netif, ci_tcp_state* ts) +{ ts->rto = CI_MIN(NI_CONF(netif).tconst_rto_max, ts->rto); ts->rto = CI_MAX(NI_CONF(netif).tconst_rto_min, ts->rto); } /* delayed ack timers */ -ci_inline void ci_tcp_delack_check_and_set(ci_netif* netif, - ci_tcp_state* ts) { +ci_inline void ci_tcp_delack_check_and_set(ci_netif* netif, ci_tcp_state* ts) +{ /* shouldn't set a timer in a state that doesn't allow them */ - ci_assert(!(ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); - if( !ci_ip_timer_pending(netif, &ts->delack_tid) ) - ci_ip_timer_set(netif, &ts->delack_tid, ci_tcp_time_now(netif) + - NI_CONF(netif).tconst_delack); + ci_assert(! (ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); + if( ! ci_ip_timer_pending(netif, &ts->delack_tid) ) + ci_ip_timer_set(netif, &ts->delack_tid, + ci_tcp_time_now(netif) + NI_CONF(netif).tconst_delack); } ci_inline void ci_tcp_delack_clear(ci_netif* netif, ci_tcp_state* ts) -{ ci_ip_timer_clear(netif, &ts->delack_tid); } +{ + ci_ip_timer_clear(netif, &ts->delack_tid); +} #if CI_CFG_DYNAMIC_ACK_RATE -ci_inline void ci_tcp_delack_soon(ci_netif* netif, ci_tcp_state* ts) +ci_inline void ci_tcp_delack_soon(ci_netif* netif, ci_tcp_state* ts) { /* shouldn't set a timer in a state that doesn't allow them */ - ci_assert(!(ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); + ci_assert(! (ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); ci_assert_gt(ts->acks_pending & CI_TCP_ACKS_PENDING_MASK, - NI_OPTS(netif).delack_thresh); + NI_OPTS(netif).delack_thresh); ts->acks_pending |= CI_TCP_DELACK_SOON_FLAG; if( ci_ip_timer_pending(netif, &ts->delack_tid) ) - ci_ip_timer_modify(netif, &ts->delack_tid, ci_tcp_time_now(netif)+1); + ci_ip_timer_modify(netif, &ts->delack_tid, ci_tcp_time_now(netif) + 1); else - ci_ip_timer_set(netif, &ts->delack_tid, ci_tcp_time_now(netif)+1); + ci_ip_timer_set(netif, &ts->delack_tid, ci_tcp_time_now(netif) + 1); } #endif #if CI_CFG_TAIL_DROP_PROBE -ci_inline int ci_tcp_taildrop_probe_enabled(const ci_netif* ni, - const ci_tcp_state* ts) +ci_inline int ci_tcp_taildrop_probe_enabled( + const ci_netif* ni, const ci_tcp_state* ts) { - return NI_OPTS(ni).tail_drop_probe && - (ts->tcpflags & CI_TCPT_FLAG_SACK) && + return NI_OPTS(ni).tail_drop_probe && (ts->tcpflags & CI_TCPT_FLAG_SACK) && ts->congstate == CI_TCP_CONG_OPEN && (ts->s.b.state & CI_TCP_STATE_SYNCHRONISED); } @@ -3918,8 +3886,8 @@ ci_inline int ci_tcp_taildrop_probe_enabled(const ci_netif* ni, */ #define TCP_TIMEOUT_MIN(netif) (NI_CONF(netif).tconst_rto_min / 10) -ci_inline unsigned ci_tcp_taildrop_timeout(const ci_netif* netif, - const ci_tcp_state* ts ) +ci_inline unsigned ci_tcp_taildrop_timeout( + const ci_netif* netif, const ci_tcp_state* ts) { unsigned offset; @@ -3934,11 +3902,10 @@ ci_inline unsigned ci_tcp_taildrop_timeout(const ci_netif* netif, offset += NI_CONF(netif).tconst_rto_min; else offset += TCP_TIMEOUT_MIN(netif); - } - else { + } else { /* ts->sa = 0 at start of day; it can be too small when 1 or 2 packets * were acked. */ - offset = NI_CONF(netif).tconst_rto_initial; + offset = NI_CONF(netif).tconst_rto_initial; } return CI_MIN(offset, ts->rto); } @@ -3946,14 +3913,14 @@ ci_inline unsigned ci_tcp_taildrop_timeout(const ci_netif* netif, #else -ci_inline int ci_tcp_taildrop_probe_enabled(const ci_netif* ni, - const ci_tcp_state* ts) +ci_inline int ci_tcp_taildrop_probe_enabled( + const ci_netif* ni, const ci_tcp_state* ts) { return 0; } -ci_inline unsigned ci_tcp_taildrop_timeout(const ci_netif* netif, - const ci_tcp_state* ts ) +ci_inline unsigned ci_tcp_taildrop_timeout( + const ci_netif* netif, const ci_tcp_state* ts) { ci_assert(0); return 0; @@ -3971,43 +3938,47 @@ ci_inline unsigned ci_tcp_taildrop_timeout(const ci_netif* netif, * @param ts TCP control block * @param t Relative time when keepalive timer should expire */ -ci_inline void ci_tcp_kalive_restart(ci_netif *netif, ci_tcp_state* ts, - ci_iptime_t t) { - /* - * Actually, if there are such situations remove this assert, +ci_inline void ci_tcp_kalive_restart( + ci_netif* netif, ci_tcp_state* ts, ci_iptime_t t) +{ + /* + * Actually, if there are such situations remove this assert, * but now I can't see any cases when this is false. */ /* ?? Why not use ts->s.b.state & CI_TCP_STATE_NO_TIMERS?? */ - ci_assert( ts->s.b.state != CI_TCP_CLOSED && - ts->s.b.state != CI_TCP_LISTEN ); + ci_assert(ts->s.b.state != CI_TCP_CLOSED && ts->s.b.state != CI_TCP_LISTEN); if( ts->s.s_flags & CI_SOCK_FLAG_KALIVE ) ci_ip_timer_modify(netif, &ts->kalive_tid, ci_tcp_time_now(netif) + t); else /* * ka_probes is not cleared somewhere, as soon as with disabled - * keepalive feature this field should be zero + * keepalive feature this field should be zero */ ci_assert(ts->ka_probes == 0); } /* - * Gets the value of keepalive IDLE time - time before start sending + * Gets the value of keepalive IDLE time - time before start sending * keepalive probes. * * @param ts TCP control block */ ci_inline ci_iptime_t ci_tcp_kalive_idle_get(ci_tcp_state* ts) -{ return ts->c.t_ka_time; } +{ + return ts->c.t_ka_time; +} /* - * Gets the value of keepalive IDLE time in seconds - time before + * Gets the value of keepalive IDLE time in seconds - time before * start sending keepalive probes. * * @param ts TCP control block */ ci_inline ci_iptime_t ci_tcp_kalive_idle_in_secs_get(ci_tcp_state* ts) -{ return ts->c.t_ka_time_in_secs; } +{ + return ts->c.t_ka_time_in_secs; +} /* * Gets the value of keepalive probe interval - interval between two @@ -4016,14 +3987,14 @@ ci_inline ci_iptime_t ci_tcp_kalive_idle_in_secs_get(ci_tcp_state* ts) * @param ts TCP control block * @param netif netif */ -ci_inline ci_iptime_t ci_tcp_kalive_intvl_get(ci_netif* netif, - ci_tcp_state* ts) +ci_inline ci_iptime_t ci_tcp_kalive_intvl_get( + ci_netif* netif, ci_tcp_state* ts) { return ts->c.t_ka_intvl; } /* - * Gets the value of keepalive probe interval in seconds- interval between + * Gets the value of keepalive probe interval in seconds- interval between * two consequent probes. * * @param ts TCP control block @@ -4039,16 +4010,20 @@ ci_inline ci_iptime_t ci_tcp_kalive_intvl_in_secs_get(ci_tcp_state* ts) * @param ts TCP control block */ ci_inline unsigned ci_tcp_kalive_probes_get(ci_tcp_state* ts) -{ return ts->c.ka_probe_th; } +{ + return ts->c.ka_probe_th; +} ci_inline void ci_tcp_kalive_check_and_clear(ci_netif* netif, ci_tcp_state* ts) -{ ci_ip_timer_clear(netif, &ts->kalive_tid); } +{ + ci_ip_timer_clear(netif, &ts->kalive_tid); +} /* Sort out the keepalive timer when an ACK is received */ -ci_inline void ci_tcp_kalive_reset(ci_netif *netif, ci_tcp_state *ts) +ci_inline void ci_tcp_kalive_reset(ci_netif* netif, ci_tcp_state* ts) { - if (ts->ka_probes) { + if( ts->ka_probes ) { /* This is a bit pointless, but necessary to get through WHQL for * chimney. We have to restart the timer here rather than just * let it expire and sort things out then because if the interval @@ -4056,7 +4031,7 @@ ci_inline void ci_tcp_kalive_reset(ci_netif *netif, ci_tcp_state *ts) * fail some WHQL tests. For now we ignore the time that has * expired on this timer so far */ ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts)); - } + } ts->ka_probes = 0; } @@ -4065,8 +4040,8 @@ ci_inline void ci_tcp_kalive_reset(ci_netif *netif, ci_tcp_state *ts) ci_inline void ci_tcp_zwin_set(ci_netif* netif, ci_tcp_state* ts) { ci_iptime_t t; - ci_assert( ! (ts->s.b.state & CI_TCP_STATE_NO_TIMERS) ); - ci_assert( OO_SP_IS_NULL(ts->local_peer) ); + ci_assert(! (ts->s.b.state & CI_TCP_STATE_NO_TIMERS)); + ci_assert(OO_SP_IS_NULL(ts->local_peer)); if( ts->zwin_probes == 0 ) t = ts->rto << ts->zwin_acks; else @@ -4078,23 +4053,23 @@ ci_inline void ci_tcp_zwin_set(ci_netif* netif, ci_tcp_state* ts) /* Put ts on the 'some recycling needs to be done for this socket' timer * queue, starting the timer if needed. */ -ci_inline void ci_tcp_recycle_reset(ci_netif* netif, ci_tcp_state* ts) { +ci_inline void ci_tcp_recycle_reset(ci_netif* netif, ci_tcp_state* ts) +{ #if CI_CFG_TCP_OFFLOAD_RECYCLER - struct oo_p_dllink_state link = oo_p_dllink_sb(netif, &ts->s.b, - &ts->recycle_link); + struct oo_p_dllink_state link = + oo_p_dllink_sb(netif, &ts->s.b, &ts->recycle_link); ci_assert(ci_ip_queue_not_empty(&ts->rob)); if( ! oo_p_dllink_is_empty(netif, link) ) return; - oo_p_dllink_add(netif, - oo_p_dllink_ptr(netif, &netif->state->recycle_retry_q), - link); + oo_p_dllink_add( + netif, oo_p_dllink_ptr(netif, &netif->state->recycle_retry_q), link); if( ! ci_ip_timer_pending(netif, &netif->state->recycle_tid) ) { /* This recycle timer exists to deal with the possibility of drops * and/or queue overflows in the link between plugin and host. Since * that's guaranteed to be a very fast link, we hard-code the minimum * possible timeout and share the timer across all sockets. */ - ci_ip_timer_set(netif, &netif->state->recycle_tid, - ci_tcp_time_now(netif) + 1); + ci_ip_timer_set( + netif, &netif->state->recycle_tid, ci_tcp_time_now(netif) + 1); } #endif } @@ -4105,13 +4080,17 @@ ci_inline void ci_tcp_recycle_reset(ci_netif* netif, ci_tcp_state* ts) { **********************************************************************/ ci_inline int ci_tcp_sendq_is_empty(ci_tcp_state* ts) -{ return ci_ip_queue_is_empty(&ts->send); } +{ + return ci_ip_queue_is_empty(&ts->send); +} ci_inline int ci_tcp_sendq_not_empty(ci_tcp_state* ts) -{ return ci_ip_queue_not_empty(&ts->send); } +{ + return ci_ip_queue_not_empty(&ts->send); +} ci_inline void ci_tcp_sendq_drop(ci_netif* ni, ci_tcp_state* ts) -{ +{ ts->send_out += ts->send.num; ci_ip_queue_drop(ni, &ts->send); } @@ -4131,7 +4110,7 @@ static inline int ci_tcp_resend_fin(ci_tcp_state* ts, ci_netif* netif) { tcp_enq_nxt(ts) -= 1; if( ci_tcp_add_fin(ts, netif) == 0 ) { - ts->tcpflags &=~ CI_TCPT_FLAG_FIN_PENDING; + ts->tcpflags &= ~CI_TCPT_FLAG_FIN_PENDING; CITP_STATS_NETIF_INC(netif, tcp_cant_fin_resolved); return 1; } @@ -4147,8 +4126,8 @@ static inline int ci_tcp_resend_fin(ci_tcp_state* ts, ci_netif* netif) ci_inline int ci_tcp_is_cached(ci_tcp_state* ts) { #if CI_CFG_FD_CACHING - ci_assert_equal(!!(ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE), - !!(ts->cached_on_fd != -1)); + ci_assert_equal(! ! (ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE), + ! ! (ts->cached_on_fd != -1)); return ts->cached_on_fd != -1; #else return 0; @@ -4156,25 +4135,29 @@ ci_inline int ci_tcp_is_cached(ci_tcp_state* ts) } -ci_inline ci_uint16 tcp_eff_mss(const ci_tcp_state* ts) { +ci_inline ci_uint16 tcp_eff_mss(const ci_tcp_state* ts) +{ if( ts->s.b.state != CI_TCP_CLOSED ) { ci_assert(ts->s.b.state != CI_TCP_LISTEN); ci_assert_gt(CI_CFG_TCP_MINIMUM_MSS, tcp_outgoing_opts_len(ts)); - ci_assert_ge(ts->eff_mss,CI_CFG_TCP_MINIMUM_MSS-tcp_outgoing_opts_len(ts)); + ci_assert_ge( + ts->eff_mss, CI_CFG_TCP_MINIMUM_MSS - tcp_outgoing_opts_len(ts)); } return ts->eff_mss; } -ci_inline void ci_tcp_fast_path_enable(ci_tcp_state* ts) { - ts->fast_path_check = ts->incoming_tcp_hdr_len << 26u; +ci_inline void ci_tcp_fast_path_enable(ci_tcp_state* ts) +{ + ts->fast_path_check = ts->incoming_tcp_hdr_len << 26u; ts->fast_path_check |= CI_TCP_FLAG_ACK << 16u; - ts->fast_path_check = CI_BSWAP_BE32(ts->fast_path_check); + ts->fast_path_check = CI_BSWAP_BE32(ts->fast_path_check); ci_assert(ci_tcp_can_use_fast_path(ts)); } -ci_inline void ci_tcp_fast_path_disable(ci_tcp_state* ts) { - ci_assert(!ci_tcp_can_use_fast_path(ts)); +ci_inline void ci_tcp_fast_path_disable(ci_tcp_state* ts) +{ + ci_assert(! ci_tcp_can_use_fast_path(ts)); ts->fast_path_check = ~CI_TCP_FAST_PATH_MASK; } @@ -4187,16 +4170,15 @@ ci_inline int ci_tcp_recv_not_blocked(ci_tcp_state* ts) */ int bytes = tcp_rcv_usr(ts); return TCP_RX_DONE(ts) || - (bytes >= ts->s.so.rcvlowat + - ((tcp_urg_data(ts) & CI_TCP_URG_IS_HERE) ? 1 : 0)); + (bytes >= ts->s.so.rcvlowat + + ((tcp_urg_data(ts) & CI_TCP_URG_IS_HERE) ? 1 : 0)); } -ci_inline -ci_iptime_t ci_tcp_isn2tick(ci_netif* ni, ci_uint32 isn) +ci_inline ci_iptime_t ci_tcp_isn2tick(ci_netif* ni, ci_uint32 isn) { ci_uint64 ticks = isn; - ticks <<= IPTIMER_STATE(ni)->ci_ip_time_frc2isn; /* isn -> frc */ + ticks <<= IPTIMER_STATE(ni)->ci_ip_time_frc2isn; /* isn -> frc */ ticks >>= IPTIMER_STATE(ni)->ci_ip_time_frc2tick; /* frc -> tick */ return ticks; } @@ -4208,14 +4190,12 @@ ci_iptime_t ci_tcp_isn2tick(ci_netif* ni, ci_uint32 isn) * that use the sequence number to decide whether a SYN matching a TIME_WAIT is * acceptable. */ -ci_inline unsigned -ci_tcp_future_isn(ci_netif* ni, ci_addr_t laddr, ci_uint16 lport_be, - ci_addr_t raddr, ci_uint16 rport_be, - ci_uint64 future_delta_ticks) +ci_inline unsigned ci_tcp_future_isn(ci_netif* ni, ci_addr_t laddr, + ci_uint16 lport_be, ci_addr_t raddr, ci_uint16 rport_be, + ci_uint64 future_delta_ticks) { ci_uint64 frc; - ci_uint32 hash = onload_hash3(laddr, lport_be, - raddr, rport_be, IPPROTO_TCP); + ci_uint32 hash = onload_hash3(laddr, lport_be, raddr, rport_be, IPPROTO_TCP); ci_frc64(&frc); frc += future_delta_ticks << IPTIMER_STATE(ni)->ci_ip_time_frc2tick; @@ -4225,29 +4205,26 @@ ci_tcp_future_isn(ci_netif* ni, ci_addr_t laddr, ci_uint16 lport_be, return hash + (frc >> IPTIMER_STATE(ni)->ci_ip_time_frc2isn); } -ci_inline unsigned -ci_tcp_initial_seqno(ci_netif* ni, ci_addr_t laddr, ci_uint16 lport_be, - ci_addr_t raddr, ci_uint16 rport_be) +ci_inline unsigned ci_tcp_initial_seqno(ci_netif* ni, ci_addr_t laddr, + ci_uint16 lport_be, ci_addr_t raddr, ci_uint16 rport_be) { return ci_tcp_future_isn(ni, laddr, lport_be, raddr, rport_be, 0); } /* Returns non-scaled value of the receive window. */ -ci_inline ci_uint32 ci_tcp_rcvbuf2window(ci_uint32 so_rcvbuf, - ci_uint32 amss, - ci_uint8 rcv_wscl) +ci_inline ci_uint32 ci_tcp_rcvbuf2window( + ci_uint32 so_rcvbuf, ci_uint32 amss, ci_uint8 rcv_wscl) { ci_assert(amss); so_rcvbuf = CI_MAX(so_rcvbuf, amss); if( so_rcvbuf % amss ) so_rcvbuf += amss - (so_rcvbuf % amss); - so_rcvbuf = CI_MIN(so_rcvbuf, (unsigned)CI_CFG_TCP_MAX_WINDOW << rcv_wscl); + so_rcvbuf = CI_MIN(so_rcvbuf, (unsigned) CI_CFG_TCP_MAX_WINDOW << rcv_wscl); return so_rcvbuf; } -ci_inline ci_uint16 ci_tcp_calc_rcv_wnd_syn(ci_uint32 so_rcvbuf, - ci_uint16 amss, - ci_uint8 rcv_wscl) +ci_inline ci_uint16 ci_tcp_calc_rcv_wnd_syn( + ci_uint32 so_rcvbuf, ci_uint16 amss, ci_uint8 rcv_wscl) { /* We shouldn't scale the window in any SYN packet, so our max claimable * window is the full 16 bits. If our unscaled window is more than that @@ -4258,57 +4235,74 @@ ci_inline ci_uint16 ci_tcp_calc_rcv_wnd_syn(ci_uint32 so_rcvbuf, ci_inline void ci_tcp_set_rcvbuf(ci_netif* ni, ci_tcp_state* ts) { - ts->rcv_window_max = ci_tcp_rcvbuf2window(ts->s.so.rcvbuf, ts->amss, - ts->rcv_wscl); - if( CI_UNLIKELY( ts->rcv_window_max > ts->s.so.rcvbuf ) ) + ts->rcv_window_max = + ci_tcp_rcvbuf2window(ts->s.so.rcvbuf, ts->amss, ts->rcv_wscl); + if( CI_UNLIKELY(ts->rcv_window_max > ts->s.so.rcvbuf) ) ts->s.so.rcvbuf = ts->rcv_window_max; } -ci_inline void ci_tcp_set_flags(ci_tcp_state* ts, unsigned flags) { +ci_inline void ci_tcp_set_flags(ci_tcp_state* ts, unsigned flags) +{ ci_tcp_hdr* tcp = TS_IPX_TCP(ts); - tcp->tcp_flags = (ci_uint8)flags; + tcp->tcp_flags = (ci_uint8) flags; } -ci_inline void ci_tcp_set_hdr_len(ci_tcp_state* ts, unsigned len) { +ci_inline void ci_tcp_set_hdr_len(ci_tcp_state* ts, unsigned len) +{ ci_tcp_hdr* tcp = TS_IPX_TCP(ts); CI_TCP_HDR_SET_LEN(tcp, len); } -ci_inline void ci_tcp_set_peer(ci_tcp_state* ts, ci_addr_t addr, unsigned port){ - ci_ipcache_set_daddr(&ts->s.pkt ,addr); - TS_IPX_TCP(ts)->tcp_dest_be16 = (ci_uint16)port; +ci_inline void ci_tcp_set_peer(ci_tcp_state* ts, ci_addr_t addr, unsigned port) +{ + ci_ipcache_set_daddr(&ts->s.pkt, addr); + TS_IPX_TCP(ts)->tcp_dest_be16 = (ci_uint16) port; ts->s.pkt.dport_be16 = port; ts->s.s_flags |= CI_SOCK_FLAG_CONNECTED; } ci_inline int ci_tcp_max_rcv_window(ci_tcp_state* ts) -{ return ts->rcv_window_max; } +{ + return ts->rcv_window_max; +} /* We'll send window updates whenever the window increases by this much. */ ci_inline int ci_tcp_ack_trigger_delta(ci_tcp_state* ts) -{ return ci_tcp_max_rcv_window(ts) >> 3; } +{ + return ci_tcp_max_rcv_window(ts) >> 3; +} #if CI_CFG_TCP_FASTSTART -# define CITP_TCP_FASTSTART(x) do{ x; }while(0) +#define CITP_TCP_FASTSTART(x) \ + do { \ + x; \ + } while( 0 ) -ci_inline void ci_tcp_reduce_faststart(ci_tcp_state* ts, unsigned reduction) { - if(CI_LIKELY( ts->faststart_acks <= reduction )) +ci_inline void ci_tcp_reduce_faststart(ci_tcp_state* ts, unsigned reduction) +{ + if( CI_LIKELY(ts->faststart_acks <= reduction) ) ts->faststart_acks = 0; else ts->faststart_acks -= reduction; } ci_inline int ci_tcp_is_in_faststart(ci_tcp_state* ts) -{ return ts->faststart_acks != 0; } +{ + return ts->faststart_acks != 0; +} #else -# define CITP_TCP_FASTSTART(x) do{}while(0) -# define ci_tcp_reduce_faststart(ts, n) do{}while(0) -# define ci_tcp_is_in_faststart(ts) (0) +#define CITP_TCP_FASTSTART(x) \ + do { \ + } while( 0 ) +#define ci_tcp_reduce_faststart(ts, n) \ + do { \ + } while( 0 ) +#define ci_tcp_is_in_faststart(ts) (0) #endif @@ -4321,7 +4315,8 @@ ci_inline int ci_tcp_is_in_faststart(ci_tcp_state* ts) * * \return Window size. */ -ci_inline unsigned int ci_tcp_wnd_from_hdr(ci_tcp_hdr* tcphdr, unsigned wscl) { +ci_inline unsigned int ci_tcp_wnd_from_hdr(ci_tcp_hdr* tcphdr, unsigned wscl) +{ unsigned tmp = CI_BSWAP_BE16(tcphdr->tcp_window_be16); return (tcphdr->tcp_flags & CI_TCP_FLAG_SYN) ? tmp : (tmp << wscl); } @@ -4334,7 +4329,8 @@ ci_inline unsigned int ci_tcp_wnd_from_hdr(ci_tcp_hdr* tcphdr, unsigned wscl) { * * \param ts TCP state */ -ci_inline ci_uint32 ci_tcp_base_dupack_thresh(ci_tcp_state *ts) { +ci_inline ci_uint32 ci_tcp_base_dupack_thresh(ci_tcp_state* ts) +{ #if CI_CFG_PORT_STRIPING if( ts->tcpflags & CI_TCPT_FLAG_STRIPE ) return NI_OPTS(ni).stripe_dupack_threshold; @@ -4344,8 +4340,9 @@ ci_inline ci_uint32 ci_tcp_base_dupack_thresh(ci_tcp_state *ts) { /* congestion control functions */ -/* set the initial congestion window as in rfc3390/rfc2581/rfc2001 */ -ci_inline void ci_tcp_set_initialcwnd(ci_netif* ni, ci_tcp_state* ts) { +/* set the initial congestion window as in rfc3390/rfc2581/rfc2001 */ +ci_inline void ci_tcp_set_initialcwnd(ci_netif* ni, ci_tcp_state* ts) +{ if( NI_OPTS(ni).initial_cwnd == 0 ) { #if CI_CFG_TCP_INITIAL_CWND_RFC == 3390 /* rfc3390: IW = min (4*SMSS, max (2*SMSS, 4380 bytes)) */ @@ -4357,21 +4354,20 @@ ci_inline void ci_tcp_set_initialcwnd(ci_netif* ni, ci_tcp_state* ts) { /* rfc2581: "IW, the initial value of cwnd, MUST be less than or equal to * 2*SMSS bytes and MUST NOT be more than 2 segments." */ - ts->cwnd = tcp_eff_mss(ts) <<1; + ts->cwnd = tcp_eff_mss(ts) << 1; #elif CI_CFG_TCP_INITIAL_CWND_RFC == 2001 /* rfc2001: IW = ts->eff_mss */ ts->cwnd = tcp_eff_mss(ts); #else -# error Bad CI_CFG_TCP_INITIAL_CWND_RFC +#error Bad CI_CFG_TCP_INITIAL_CWND_RFC #endif - } - else { + } else { if( NI_OPTS(ni).initial_cwnd < tcp_eff_mss(ts) ) { /* issue a warning and set initial_cwnd to eff_mss */ ci_log("EF_TCP_INITIAL_CWND=%d is less than MSS value %d. Correcting.", - NI_OPTS(ni).initial_cwnd, tcp_eff_mss(ts)); + NI_OPTS(ni).initial_cwnd, tcp_eff_mss(ts)); } - ts->cwnd = CI_MAX((ci_uint32)tcp_eff_mss(ts),NI_OPTS(ni).initial_cwnd); + ts->cwnd = CI_MAX((ci_uint32) tcp_eff_mss(ts), NI_OPTS(ni).initial_cwnd); } ts->cwnd = CI_MAX(ts->cwnd, NI_OPTS(ni).min_cwnd); /* RFC5681 suggests using the maximum possible send window as the initial @@ -4383,12 +4379,15 @@ ci_inline void ci_tcp_set_initialcwnd(ci_netif* ni, ci_tcp_state* ts) { ts->ssthresh = 65535 << ts->snd_wscl; } -/*! ?? \TODO should we use fackets to make things more exact ? */ +/*! ?? \TODO should we use fackets to make things more exact ? */ ci_inline unsigned ci_tcp_inflight(ci_tcp_state* ts) -{ return SEQ_SUB(ts->snd_nxt, ts->snd_una); } +{ + return SEQ_SUB(ts->snd_nxt, ts->snd_una); +} /* New value for [ssthresh] after loss (RFC2581 p5). */ -ci_inline unsigned ci_tcp_losswnd(ci_tcp_state* ts) { +ci_inline unsigned ci_tcp_losswnd(ci_tcp_state* ts) +{ unsigned x = ci_tcp_inflight(ts) >> 1u; unsigned y = tcp_eff_mss(ts) << 1u; return CI_MAX(x, y); @@ -4396,7 +4395,8 @@ ci_inline unsigned ci_tcp_losswnd(ci_tcp_state* ts) { #if CI_CFG_BURST_CONTROL -ci_inline unsigned ci_tcp_burst_exhausted(ci_netif* ni, ci_tcp_state* ts) { +ci_inline unsigned ci_tcp_burst_exhausted(ci_netif* ni, ci_tcp_state* ts) +{ int extra, retrans_data; unsigned fack; ci_tcp_get_fack(ni, ts, &fack, &retrans_data); @@ -4407,8 +4407,9 @@ ci_inline unsigned ci_tcp_burst_exhausted(ci_netif* ni, ci_tcp_state* ts) { #endif -ci_inline int ci_tcp_can_stripe(ci_netif* ni, unsigned laddr_be32, - unsigned raddr_be32) { +ci_inline int ci_tcp_can_stripe( + ci_netif* ni, unsigned laddr_be32, unsigned raddr_be32) +{ #if CI_CFG_PORT_STRIPING unsigned mask = NI_OPTS(ni).stripe_netmask_be32; return (laddr_be32 & mask) == (raddr_be32 & mask); @@ -4422,7 +4423,8 @@ ci_inline int ci_tcp_can_stripe(ci_netif* ni, unsigned laddr_be32, * any of these counters may be updated concurrently wrt this function, so * we have to protect against the result going negative. */ -ci_inline int ci_tcp_sendq_n_pkts(ci_tcp_state* ts) { +ci_inline int ci_tcp_sendq_n_pkts(ci_tcp_state* ts) +{ int n = oo_atomic_read(&ts->send_prequeue_in) + ts->send_in - ts->send_out; return n >= 0 ? n : 0; } @@ -4432,20 +4434,20 @@ ci_inline int ci_tcp_sendq_n_pkts(ci_tcp_state* ts) { ** wake a blocking thread, and to decide whether to indicate the socket is ** writable in select() and poll(). */ -ci_inline int ci_tcp_tx_advertise_space(ci_netif* ni, ci_tcp_state* ts) { +ci_inline int ci_tcp_tx_advertise_space(ci_netif* ni, ci_tcp_state* ts) +{ if( NI_OPTS(ni).tcp_sndbuf_mode ) { int pkts_queued = ci_tcp_sendq_n_pkts(ts) #if CI_CFG_TIMESTAMPING - + ci_udp_recv_q_pkts(&ts->timestamp_q) + + ci_udp_recv_q_pkts(&ts->timestamp_q) #endif - + ts->retrans.num; + + ts->retrans.num; return ts->so_sndbuf_pkts - pkts_queued > (pkts_queued >> 1u); - } - else { + } else { int bytes_enqueued = SEQ_SUB(tcp_enq_nxt(ts), tcp_snd_nxt(ts)); - return ( ts->so_sndbuf_pkts > ci_tcp_sendq_n_pkts(ts) ) && - ( (int) (ts->s.so.sndbuf - bytes_enqueued) > - (int) (bytes_enqueued >> 1u) ); + return (ts->so_sndbuf_pkts > ci_tcp_sendq_n_pkts(ts)) && + ((int) (ts->s.so.sndbuf - bytes_enqueued) > + (int) (bytes_enqueued >> 1u)); } } @@ -4455,33 +4457,34 @@ ci_inline int ci_tcp_tx_advertise_space(ci_netif* ni, ci_tcp_state* ts) { ci_inline int ci_tcp_tx_send_space(ci_netif* ni, ci_tcp_state* ts) { if( NI_OPTS(ni).tcp_sndbuf_mode ) { - return ts->so_sndbuf_pkts - - (ci_tcp_sendq_n_pkts(ts) + return ts->so_sndbuf_pkts - (ci_tcp_sendq_n_pkts(ts) #if CI_CFG_TIMESTAMPING - + ci_udp_recv_q_pkts(&ts->timestamp_q) + + ci_udp_recv_q_pkts(&ts->timestamp_q) #endif - + ts->retrans.num); - } - else + + ts->retrans.num); + } else return ts->so_sndbuf_pkts - ci_tcp_sendq_n_pkts(ts); } /* helpers for RTT sampling without TS option */ -ci_inline void ci_tcp_clear_rtt_timing(ci_tcp_state* ts) { +ci_inline void ci_tcp_clear_rtt_timing(ci_tcp_state* ts) +{ ts->timed_seq = tcp_snd_una(ts) - 1; } -ci_inline void ci_tcp_set_rtt_timing(ci_netif* netif, - ci_tcp_state* ts, int seq) { +ci_inline void ci_tcp_set_rtt_timing( + ci_netif* netif, ci_tcp_state* ts, int seq) +{ ts->timed_seq = seq; ts->timed_ts = ci_tcp_time_now(netif); } -ci_inline void ci_tcp_tx_pkt_set_end(ci_tcp_state* ts, ci_ip_pkt_fmt* pkt) { - uint8_t* end = (uint8_t*) oo_tx_l3_hdr(pkt) + ts->outgoing_hdrs_len + - tcp_eff_mss(ts); +ci_inline void ci_tcp_tx_pkt_set_end(ci_tcp_state* ts, ci_ip_pkt_fmt* pkt) +{ + uint8_t* end = + (uint8_t*) oo_tx_l3_hdr(pkt) + ts->outgoing_hdrs_len + tcp_eff_mss(ts); ci_assert_nflags(pkt->flags, CI_PKT_FLAG_INDIRECT); oo_offbuf_set_end(&(pkt->buf), end); } @@ -4493,11 +4496,12 @@ ci_inline void ci_tcp_tx_pkt_set_end(ci_tcp_state* ts, ci_ip_pkt_fmt* pkt) { * immediately after converting a packet to zc (or creating a new packet); * doing it again later may change where the zc_header appears to be located * and corrupt the packet. */ -ci_inline void ci_tcp_tx_pkt_set_zc_header_pos(ci_tcp_state* ts, - ci_ip_pkt_fmt* pkt) { - char* end = CI_PTR_ALIGN_FWD((char*)oo_tx_l3_hdr(pkt) + - sizeof(ci_tcp_hdr) + CI_TCP_MAX_OPTS_LEN, - CI_PKT_ZC_PAYLOAD_ALIGN); +ci_inline void ci_tcp_tx_pkt_set_zc_header_pos( + ci_tcp_state* ts, ci_ip_pkt_fmt* pkt) +{ + char* end = CI_PTR_ALIGN_FWD( + (char*) oo_tx_l3_hdr(pkt) + sizeof(ci_tcp_hdr) + CI_TCP_MAX_OPTS_LEN, + CI_PKT_ZC_PAYLOAD_ALIGN); ci_assert_flags(pkt->flags, CI_PKT_FLAG_INDIRECT); oo_offbuf_set_end(&(pkt->buf), end); pkt->buf.end = CI_MAX(pkt->buf.off, pkt->buf.end); @@ -4505,38 +4509,36 @@ ci_inline void ci_tcp_tx_pkt_set_zc_header_pos(ci_tcp_state* ts, ci_inline ci_uint32 ci_tcp_listenq_max(ci_netif* ni) -{ return NI_OPTS(ni).tcp_backlog_max; } +{ + return NI_OPTS(ni).tcp_backlog_max; +} -ci_inline unsigned ci_ipx_tcp_checksum(int af, const ci_ipx_hdr_t* ipx, - const ci_tcp_hdr* tcp, void* payload) +ci_inline unsigned ci_ipx_tcp_checksum( + int af, const ci_ipx_hdr_t* ipx, const ci_tcp_hdr* tcp, void* payload) { #if CI_CFG_IPV6 - if( af == AF_INET6 ) - { + if( af == AF_INET6 ) { return ci_ip6_tcp_checksum(&ipx->ip6, tcp, payload); - } - else + } else #endif { return ci_tcp_checksum(&ipx->ip4, tcp, payload); } } -ci_inline unsigned ci_ipx_udp_checksum(int af, const ci_ipx_hdr_t* ipx, - const ci_udp_hdr* udp, void* payload) +ci_inline unsigned ci_ipx_udp_checksum( + int af, const ci_ipx_hdr_t* ipx, const ci_udp_hdr* udp, void* payload) { - ci_iovec iov = {.iov_base = payload}; + ci_iovec iov = { .iov_base = payload }; #if CI_CFG_IPV6 - if( af == AF_INET6 ) - { + if( af == AF_INET6 ) { iov.iov_len = CI_BSWAP_BE16(ipx->ip6.payload_len) - sizeof(ci_udp_hdr); return ci_ip6_udp_checksum(&ipx->ip6, udp, &iov, 1); - } - else + } else #endif { iov.iov_len = CI_BSWAP_BE16(ipx->ip4.ip_tot_len_be16) - - CI_IP4_IHL(&ipx->ip4) - sizeof(ci_udp_hdr); + CI_IP4_IHL(&ipx->ip4) - sizeof(ci_udp_hdr); return ci_udp_checksum(&ipx->ip4, udp, &iov, 1); } } @@ -4549,7 +4551,7 @@ ci_inline int ci_sock_trylock(ci_netif* ni, citp_waitable* w) { ci_uint32 l = w->lock.wl_val; return ! (l & OO_WAITABLE_LK_LOCKED) && - ci_cas32u_succeed(&w->lock.wl_val, l, l | OO_WAITABLE_LK_LOCKED); + ci_cas32u_succeed(&w->lock.wl_val, l, l | OO_WAITABLE_LK_LOCKED); } /* Always returns 0 (success) at userland. Returns -ERESTARTSYS if @@ -4557,11 +4559,11 @@ ci_inline int ci_sock_trylock(ci_netif* ni, citp_waitable* w) * possibly EINTR?). Return value *must* be checked when invoked in * kernel, else risk of proceeding without the lock held. */ -ci_inline int ci_sock_lock(ci_netif*, citp_waitable*) - OO_MUST_CHECK_RET_IN_KERNEL; +ci_inline int ci_sock_lock( + ci_netif*, citp_waitable*) OO_MUST_CHECK_RET_IN_KERNEL; ci_inline int ci_sock_lock(ci_netif* ni, citp_waitable* w) { - if(CI_LIKELY( ci_cas32u_succeed(&w->lock.wl_val, 0, OO_WAITABLE_LK_LOCKED) )) + if( CI_LIKELY(ci_cas32u_succeed(&w->lock.wl_val, 0, OO_WAITABLE_LK_LOCKED)) ) return 0; #ifdef __KERNEL__ return ci_sock_lock_slow(ni, w); @@ -4576,11 +4578,11 @@ ci_inline int ci_sock_lock(ci_netif* ni, citp_waitable* w) ci_inline void ci_sock_unlock(ci_netif* ni, citp_waitable* w) { - if(CI_UNLIKELY( ci_cas32u_fail(&w->lock.wl_val, OO_WAITABLE_LK_LOCKED, 0) )) + if( CI_UNLIKELY(ci_cas32u_fail(&w->lock.wl_val, OO_WAITABLE_LK_LOCKED, 0)) ) ci_sock_unlock_slow(ni, w); } -ci_inline int ci_sock_is_locked(ci_netif* ni, citp_waitable* w) +ci_inline int ci_sock_is_locked(ci_netif* ni, citp_waitable* w) { return w->lock.wl_val & OO_WAITABLE_LK_LOCKED; } @@ -4591,45 +4593,43 @@ ci_inline int ci_sock_is_locked(ci_netif* ni, citp_waitable* w) *********************************************************************/ /* Use this if you don't own the [get] lock. */ -#define ci_tcp_acceptq_n(tls) \ - ((tls)->acceptq_n_in - (tls)->acceptq_n_out) +#define ci_tcp_acceptq_n(tls) ((tls)->acceptq_n_in - (tls)->acceptq_n_out) /* Use this if you do own the [get] lock. */ -#define ci_tcp_acceptq_not_empty(tls) \ +#define ci_tcp_acceptq_not_empty(tls) \ (((tls)->acceptq_put >= 0) | OO_SP_NOT_NULL((tls)->acceptq_get)) -ci_inline void ci_tcp_acceptq_put(ci_netif* ni, - ci_tcp_socket_listen* tls, - citp_waitable* w) { +ci_inline void ci_tcp_acceptq_put( + ci_netif* ni, ci_tcp_socket_listen* tls, citp_waitable* w) +{ ci_assert(OO_SP_IS_NULL(w->wt_next)); ci_assert(ci_netif_is_locked(ni)); do w->wt_next = OO_SP_FROM_INT(ni, tls->acceptq_put); - while( ci_cas32_fail(&tls->acceptq_put, - OO_SP_TO_INT(w->wt_next), W_ID(w)) ); + while( ci_cas32_fail(&tls->acceptq_put, OO_SP_TO_INT(w->wt_next), W_ID(w)) ); ++tls->acceptq_n_in; } -ci_inline void ci_tcp_acceptq_put_back_tail(ci_netif* ni, - ci_tcp_socket_listen* tls, - citp_waitable* w) { +ci_inline void ci_tcp_acceptq_put_back_tail( + ci_netif* ni, ci_tcp_socket_listen* tls, citp_waitable* w) +{ ci_assert(OO_SP_IS_NULL(w->wt_next)); ci_assert(ci_sock_is_locked(ni, &tls->s.b)); ci_assert(w->sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ); do w->wt_next = OO_SP_FROM_INT(ni, tls->acceptq_put); - while( ci_cas32_fail(&tls->acceptq_put, - OO_SP_TO_INT(w->wt_next), W_ID(w)) ); + while( ci_cas32_fail(&tls->acceptq_put, OO_SP_TO_INT(w->wt_next), W_ID(w)) ); --tls->acceptq_n_out; } /* Should not be called directly, use ci_tcp_acceptq_get() and * ci_tcp_acceptq_peek(). */ -ci_inline void ci_tcp_acceptq_get_swizzle(ci_netif* ni, - ci_tcp_socket_listen* tls) { +ci_inline void ci_tcp_acceptq_get_swizzle( + ci_netif* ni, ci_tcp_socket_listen* tls) +{ ci_int32 from; oo_sp from_sp; ci_tcp_state* ts; @@ -4651,13 +4651,15 @@ ci_inline void ci_tcp_acceptq_get_swizzle(ci_netif* ni, /* Only call this if ci_tcp_acceptq_not_empty() is true. */ -ci_inline citp_waitable* ci_tcp_acceptq_get(ci_netif* ni, - ci_tcp_socket_listen* tls) { +ci_inline citp_waitable* ci_tcp_acceptq_get( + ci_netif* ni, ci_tcp_socket_listen* tls) +{ citp_waitable* w; ci_assert(ci_sock_is_locked(ni, &tls->s.b) || (tls->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); ++tls->acceptq_n_out; - if( OO_SP_IS_NULL(tls->acceptq_get) ) ci_tcp_acceptq_get_swizzle(ni, tls); + if( OO_SP_IS_NULL(tls->acceptq_get) ) + ci_tcp_acceptq_get_swizzle(ni, tls); ci_assert(OO_SP_NOT_NULL(tls->acceptq_get)); w = SP_TO_WAITABLE(ni, tls->acceptq_get); tls->acceptq_get = w->wt_next; @@ -4668,10 +4670,12 @@ ci_inline citp_waitable* ci_tcp_acceptq_get(ci_netif* ni, #ifndef __ci_driver__ /* Only call this if ci_tcp_acceptq_not_empty() is true. */ -ci_inline ci_tcp_state* ci_tcp_acceptq_peek(ci_netif* ni, - ci_tcp_socket_listen* tls) { +ci_inline ci_tcp_state* ci_tcp_acceptq_peek( + ci_netif* ni, ci_tcp_socket_listen* tls) +{ ci_assert(ci_sock_is_locked(ni, &tls->s.b)); - if( OO_SP_IS_NULL(tls->acceptq_get) ) ci_tcp_acceptq_get_swizzle(ni, tls); + if( OO_SP_IS_NULL(tls->acceptq_get) ) + ci_tcp_acceptq_get_swizzle(ni, tls); ci_assert(OO_SP_NOT_NULL(tls->acceptq_get)); return SP_TO_TCP(ni, tls->acceptq_get); } @@ -4679,8 +4683,9 @@ ci_inline ci_tcp_state* ci_tcp_acceptq_peek(ci_netif* ni, /* Must hold the sock lock. */ -ci_inline void ci_tcp_acceptq_put_back(ci_netif* ni, ci_tcp_socket_listen* tls, - citp_waitable* w) { +ci_inline void ci_tcp_acceptq_put_back( + ci_netif* ni, ci_tcp_socket_listen* tls, citp_waitable* w) +{ ci_assert(ci_sock_is_locked(ni, &tls->s.b)); ci_assert(w->sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ); --tls->acceptq_n_out; @@ -4695,19 +4700,23 @@ ci_inline void ci_tcp_acceptq_put_back(ci_netif* ni, ci_tcp_socket_listen* tls, static inline const char* ci_tcp_aux_type2str(int type) { - switch(type) { - case CI_TCP_AUX_TYPE_SYNRECV: return "syn-recv state"; - case CI_TCP_AUX_TYPE_BUCKET: return "syn-recv bucket"; - case CI_TCP_AUX_TYPE_EPOLL: return "epoll3 state"; - default: return "unknown"; + switch( type ) { + case CI_TCP_AUX_TYPE_SYNRECV: + return "syn-recv state"; + case CI_TCP_AUX_TYPE_BUCKET: + return "syn-recv bucket"; + case CI_TCP_AUX_TYPE_EPOLL: + return "epoll3 state"; + default: + return "unknown"; } } /* Does exactly what it says on the tin! */ ci_inline ci_ni_aux_mem* ci_ni_aux_p2aux(ci_netif* ni, oo_p p) { - ci_assert( OO_P_NOT_NULL(p) ); - return (void*)CI_NETIF_PTR(ni, p); + ci_assert(OO_P_NOT_NULL(p)); + return (void*) CI_NETIF_PTR(ni, p); } ci_inline ci_tcp_state_synrecv* ci_ni_aux_p2synrecv(ci_netif* ni, oo_p oop) { @@ -4734,10 +4743,9 @@ ci_inline ci_pmtu_state_t* ci_ni_aux_p2pmtus(ci_netif* ni, oo_p oop) return &aux->u.pmtus; } -ci_inline citp_waitable* -ci_ni_aux2container_w(ci_ni_aux_mem* aux) +ci_inline citp_waitable* ci_ni_aux2container_w(ci_ni_aux_mem* aux) { - return (void *)((ci_uintptr_t)aux &~ (CI_CFG_EP_BUF_SIZE - 1)); + return (void*) ((ci_uintptr_t) aux & ~(CI_CFG_EP_BUF_SIZE - 1)); } ci_inline oo_p ci_ni_aux2p(ci_netif* ni, ci_ni_aux_mem* aux) @@ -4745,42 +4753,45 @@ ci_inline oo_p ci_ni_aux2p(ci_netif* ni, ci_ni_aux_mem* aux) CI_BUILD_ASSERT(CI_IS_POW2(CI_CFG_EP_BUF_SIZE)); citp_waitable* w = ci_ni_aux2container_w(aux); oo_p sp = oo_sockp_to_statep(ni, W_SP(w)); - OO_P_ADD(sp, (ci_uintptr_t)aux & (CI_CFG_EP_BUF_SIZE - 1)); + OO_P_ADD(sp, (ci_uintptr_t) aux & (CI_CFG_EP_BUF_SIZE - 1)); return sp; } -ci_inline struct oo_p_dllink_state -ci_ni_aux2link(ci_netif* ni, ci_ni_aux_mem* aux) +ci_inline struct oo_p_dllink_state ci_ni_aux2link( + ci_netif* ni, ci_ni_aux_mem* aux) { return oo_p_dllink_statep(ni, ci_ni_aux2p(ni, aux)); } ci_inline void ci_ni_aux_free(ci_netif* ni, ci_ni_aux_mem* aux) { - ci_assert( ci_netif_is_locked(ni) ); + ci_assert(ci_netif_is_locked(ni)); ni->state->n_aux_bufs[aux->type]--; oo_p_dllink_add(ni, oo_p_dllink_ptr(ni, &ni->state->free_aux_mem), - ci_ni_aux2link(ni, aux)); + ci_ni_aux2link(ni, aux)); ni->state->n_free_aux_bufs++; } -ci_inline void ci_tcp_synrecv_free(ci_netif* ni, ci_tcp_state_synrecv* tsr) { +ci_inline void ci_tcp_synrecv_free(ci_netif* ni, ci_tcp_state_synrecv* tsr) +{ ci_ni_aux_free(ni, CI_CONTAINER(ci_ni_aux_mem, u.synrecv, tsr)); } -ci_inline void ci_sb_epoll_free(ci_netif* ni, ci_sb_epoll_state* epoll) { +ci_inline void ci_sb_epoll_free(ci_netif* ni, ci_sb_epoll_state* epoll) +{ ci_ni_aux_free(ni, CI_CONTAINER(ci_ni_aux_mem, u.epoll, epoll)); } -ci_inline void ci_pmtu_state_free(ci_netif* ni, ci_pmtu_state_t* pmtus) { +ci_inline void ci_pmtu_state_free(ci_netif* ni, ci_pmtu_state_t* pmtus) +{ ci_ni_aux_free(ni, CI_CONTAINER(ci_ni_aux_mem, u.pmtus, pmtus)); } extern void ci_ni_aux_more_bufs(ci_netif* ni); -ci_inline int/*bool*/ ci_ni_aux_can_alloc(ci_netif* ni, int type) +ci_inline int /*bool*/ ci_ni_aux_can_alloc(ci_netif* ni, int type) { struct oo_p_dllink_state free_aux_mem = - oo_p_dllink_ptr(ni, &ni->state->free_aux_mem); + oo_p_dllink_ptr(ni, &ni->state->free_aux_mem); if( ni->state->n_aux_bufs[type] >= ni->state->max_aux_bufs[type] ) return CI_FALSE; - if( ! oo_p_dllink_is_empty(ni, free_aux_mem) ) + if( ! oo_p_dllink_is_empty(ni, free_aux_mem) ) return CI_TRUE; ci_ni_aux_more_bufs(ni); return ! oo_p_dllink_is_empty(ni, free_aux_mem); @@ -4788,12 +4799,12 @@ ci_inline int/*bool*/ ci_ni_aux_can_alloc(ci_netif* ni, int type) ci_inline oo_p ci_ni_aux_alloc(ci_netif* ni, int type) { struct oo_p_dllink_state free_aux_mem = - oo_p_dllink_ptr(ni, &ni->state->free_aux_mem); + oo_p_dllink_ptr(ni, &ni->state->free_aux_mem); struct oo_p_dllink_state link; ci_ni_aux_mem* aux; - ci_assert( ci_netif_is_locked(ni) ); - if( !ci_ni_aux_can_alloc(ni, type) ) { + ci_assert(ci_netif_is_locked(ni)); + if( ! ci_ni_aux_can_alloc(ni, type) ) { CITP_STATS_NETIF(++ni->state->stats.aux_alloc_fails); return OO_P_NULL; } @@ -4828,12 +4839,14 @@ ci_inline oo_p ci_tcp_synrecv2p(ci_netif* ni, ci_tcp_state_synrecv* tsr) { return ci_ni_aux2p(ni, CI_CONTAINER(ci_ni_aux_mem, u.synrecv, tsr)); } -ci_inline struct oo_p_dllink_state -ci_tcp_synrecv2link(ci_netif* ni, ci_tcp_state_synrecv* tsr) { - ci_ni_aux_mem* aux = CI_CONTAINER(ci_ni_aux_mem, u.synrecv, tsr); +ci_inline struct oo_p_dllink_state ci_tcp_synrecv2link( + ci_netif* ni, ci_tcp_state_synrecv* tsr) +{ + ci_ni_aux_mem* aux = CI_CONTAINER(ci_ni_aux_mem, u.synrecv, tsr); return oo_p_dllink_sb(ni, ci_ni_aux2container_w(aux), &aux->link); } -ci_inline ci_tcp_state_synrecv* ci_tcp_link2synrecv(struct oo_p_dllink* link) { +ci_inline ci_tcp_state_synrecv* ci_tcp_link2synrecv(struct oo_p_dllink* link) +{ return &CI_CONTAINER(ci_ni_aux_mem, link, link)->u.synrecv; } @@ -4847,14 +4860,15 @@ ci_inline unsigned ci_tcp_get_pmtu(ci_netif* netif, ci_tcp_state* ts) x = ts->s.pkt.mtu; if( OO_PP_NOT_NULL(ts->pmtus) ) { ci_pmtu_state_t* pmtus = ci_ni_aux_p2pmtus(netif, ts->pmtus); - return CI_MIN(x, (unsigned)pmtus->pmtu); + return CI_MIN(x, (unsigned) pmtus->pmtu); } return x; } /* find effective MSS value based on smss, PMTU and MTU and optional user * value */ -ci_inline void ci_tcp_set_eff_mss(ci_netif* netif, ci_tcp_state* ts) { +ci_inline void ci_tcp_set_eff_mss(ci_netif* netif, ci_tcp_state* ts) +{ unsigned x; #if CI_CFG_IPV6 int af = ipcache_af(&ts->s.pkt); @@ -4864,8 +4878,8 @@ ci_inline void ci_tcp_set_eff_mss(ci_netif* netif, ci_tcp_state* ts) { x = ci_tcp_get_pmtu(netif, ts) - sizeof(ci_tcp_hdr) - CI_IPX_HDR_SIZE(af); - x = CI_MIN(x, (unsigned)ts->smss); - ts->eff_mss = CI_MAX(x, (unsigned)CI_CFG_TCP_MINIMUM_MSS) - + x = CI_MIN(x, (unsigned) ts->smss); + ts->eff_mss = CI_MAX(x, (unsigned) CI_CFG_TCP_MINIMUM_MSS) - tcp_ipx_outgoing_opts_len(af, ts); /* Increase ssthresh & cwndif eff_mss has increased */ @@ -4877,37 +4891,35 @@ ci_inline void ci_tcp_set_eff_mss(ci_netif* netif, ci_tcp_state* ts) { } -ci_inline struct oo_p_dllink_state -ci_sb_epoll_ready_link(ci_netif* ni, ci_sb_epoll_state* epoll, int i) +ci_inline struct oo_p_dllink_state ci_sb_epoll_ready_link( + ci_netif* ni, ci_sb_epoll_state* epoll, int i) { return oo_p_dllink_sb(ni, - ci_ni_aux2container_w(CI_CONTAINER(ci_ni_aux_mem, - u.epoll, epoll)), - &epoll->e[i].ready_link); + ci_ni_aux2container_w(CI_CONTAINER(ci_ni_aux_mem, u.epoll, epoll)), + &epoll->e[i].ready_link); } #define CI_READY_LIST_EACH(bitmask, tmp, i) \ ci_assert_lt((bitmask), 1u << CI_CFG_N_READY_LISTS); \ OO_FOR_EACH_BIT(bitmask, tmp, i) -ci_inline void -ci_netif_put_on_post_poll_epoll(ci_netif* ni, citp_waitable* sb) +ci_inline void ci_netif_put_on_post_poll_epoll(ci_netif* ni, citp_waitable* sb) { #if CI_CFG_EPOLL3 ci_sb_epoll_state* epoll = ci_ni_aux_p2epoll(ni, sb->epoll); ci_uint32 tmp, i; - CI_READY_LIST_EACH(sb->ready_lists_in_use, tmp, i) { + CI_READY_LIST_EACH(sb->ready_lists_in_use, tmp, i) + { struct oo_p_dllink_state link = ci_sb_epoll_ready_link(ni, epoll, i); oo_p_dllink_del(ni, link); - oo_p_dllink_add_tail(ni, - oo_p_dllink_ptr(ni, &ni->state->ready_lists[i]), - link); + oo_p_dllink_add_tail( + ni, oo_p_dllink_ptr(ni, &ni->state->ready_lists[i]), link); } #endif } -ci_inline void -citp_waitable_remove_from_epoll(ci_netif* ni, citp_waitable* w, int do_free) +ci_inline void citp_waitable_remove_from_epoll( + ci_netif* ni, citp_waitable* w, int do_free) { ci_sb_epoll_state* epoll; ci_uint32 tmp, i; @@ -4920,7 +4932,8 @@ citp_waitable_remove_from_epoll(ci_netif* ni, citp_waitable* w, int do_free) epoll = ci_ni_aux_p2epoll(ni, w->epoll); ci_assert_equal(epoll->sock_id, w->bufid); - CI_READY_LIST_EACH(w->ready_lists_in_use, tmp, i) { + CI_READY_LIST_EACH(w->ready_lists_in_use, tmp, i) + { struct oo_p_dllink_state link = ci_sb_epoll_ready_link(ni, epoll, i); oo_p_dllink_del(ni, link); oo_p_dllink_init(ni, link); @@ -4937,8 +4950,8 @@ ci_inline void ci_netif_put_on_post_poll(ci_netif* ni, citp_waitable* sb) struct oo_p_dllink_state link = oo_p_dllink_sb(ni, sb, &sb->post_poll_link); oo_p_dllink_del(ni, link); - oo_p_dllink_add_tail(ni, oo_p_dllink_ptr(ni, &ni->state->post_poll_list), - link); + oo_p_dllink_add_tail( + ni, oo_p_dllink_ptr(ni, &ni->state->post_poll_list), link); #if CI_CFG_EPOLL3 if( sb->ready_lists_in_use != 0 ) @@ -4947,11 +4960,11 @@ ci_inline void ci_netif_put_on_post_poll(ci_netif* ni, citp_waitable* sb) } -ci_inline void ci_netif_poll_free_pkts(ci_netif* ni, - struct ci_netif_poll_state* ps) +ci_inline void ci_netif_poll_free_pkts( + ci_netif* ni, struct ci_netif_poll_state* ps) { - ci_ip_pkt_fmt* tail = CI_CONTAINER(ci_ip_pkt_fmt, next, - ps->tx_pkt_free_list_insert); + ci_ip_pkt_fmt* tail = + CI_CONTAINER(ci_ip_pkt_fmt, next, ps->tx_pkt_free_list_insert); ci_netif_pkt_free_nonb_list(ni, ps->tx_pkt_free_list, tail); ni->state->n_async_pkts += ps->tx_pkt_free_list_n; CITP_STATS_NETIF_ADD(ni, pkt_nonb, ps->tx_pkt_free_list_n); @@ -4974,18 +4987,18 @@ ci_inline int citp_shutdown_how_is_valid(int how) ********************************* PMTU ******************************** **********************************************************************/ -/*! Manage the discovery timer. If the time is CI_PMTU_STOP_TIMER then +/*! Manage the discovery timer. If the time is CI_PMTU_STOP_TIMER then * the timer will be killed. If the timer is pending it will be modified * otherwise it will be set */ -ci_inline void ci_pmtu_discover_timer(ci_netif* ni, ci_pmtu_state_t* pmtus, - ci_iptime_t timeout) { - ci_ip_timer_clear(ni, &pmtus->tid ); +ci_inline void ci_pmtu_discover_timer( + ci_netif* ni, ci_pmtu_state_t* pmtus, ci_iptime_t timeout) +{ + ci_ip_timer_clear(ni, &pmtus->tid); if( timeout != CI_PMTU_STOP_TIMER ) ci_ip_timer_set(ni, &pmtus->tid, ci_tcp_time_now(ni) + timeout); } - /********************************************************************* ************************* IPv4/IPv6 address helpers ****************** *********************************************************************/ @@ -5006,8 +5019,8 @@ union ci_sockaddr_u { */ ci_inline int ci_tcp_ipv6_is_ipv4(const struct sockaddr* sa) { - if (CI_IP6_IS_V4MAPPED(&CI_SIN6(sa)->sin6_addr) || - CI_IP6_IS_ADDR_ANY(&CI_SIN6(sa)->sin6_addr)) { + if( CI_IP6_IS_V4MAPPED(&CI_SIN6(sa)->sin6_addr) || + CI_IP6_IS_ADDR_ANY(&CI_SIN6(sa)->sin6_addr) ) { return 1; } return 0; @@ -5025,11 +5038,11 @@ ci_inline ci_uint32 ci_get_ip4_addr(int family, const struct sockaddr* sa) #endif #if CI_CFG_FAKE_IPV6 - if (family == AF_INET) + if( family == AF_INET ) return CI_SIN(sa)->sin_addr.s_addr; else { /* IPv6 */ ci_assert(ci_tcp_ipv6_is_ipv4(sa)); - return ((ci_uint32 *)(&CI_SIN6(sa)->sin6_addr))[3]; + return ((ci_uint32*) (&CI_SIN6(sa)->sin6_addr))[3]; } #else return CI_SIN(sa)->sin_addr.s_addr; @@ -5044,8 +5057,8 @@ ci_inline int ci_tcp_ipv6_is_addr_any(const struct sockaddr* sa) ci_inline int ci_sock_maybe_ipv6(ci_sock_cmn* s, const struct sockaddr* addr) { - if(s->domain == PF_INET6 && (!ci_tcp_ipv6_is_ipv4(addr) || - ci_tcp_ipv6_is_addr_any(addr))) + if( s->domain == PF_INET6 && + (! ci_tcp_ipv6_is_ipv4(addr) || ci_tcp_ipv6_is_addr_any(addr)) ) return 1; return 0; } @@ -5062,10 +5075,9 @@ ci_inline ci_addr_t ci_get_addr(const struct sockaddr* sa) memcpy(addr.ip6, &CI_SIN6(sa)->sin6_addr, sizeof(addr.ip6)); #else ci_assert(ci_tcp_ipv6_is_ipv4(sa)); - addr.ip4 = ((unsigned*)(&CI_SIN6(sa)->sin6_addr))[3]; + addr.ip4 = ((unsigned*) (&CI_SIN6(sa)->sin6_addr))[3]; #endif - } - else { + } else { addr = CI_ADDR_FROM_IP4(CI_SIN(sa)->sin_addr.s_addr); } return addr; @@ -5081,9 +5093,8 @@ ci_inline ci_uint16 ci_get_port(const struct sockaddr* sa) /* Functions to make a sockaddr structure from a given port/ip: */ /* Get an IPv4 address addr_be32 and fill it into sockaddr_in. */ -ci_inline void -ci_make_sockaddr_from_ip4(struct sockaddr_in *sin, - ci_uint16 port_be16, ci_uint32 addr_be32) +ci_inline void ci_make_sockaddr_from_ip4( + struct sockaddr_in* sin, ci_uint16 port_be16, ci_uint32 addr_be32) { sin->sin_family = AF_INET; sin->sin_port = port_be16; @@ -5093,9 +5104,8 @@ ci_make_sockaddr_from_ip4(struct sockaddr_in *sin, #if CI_CFG_FAKE_IPV6 /* Get an IPv4 address addr_be32 and fill an mapped address * into sockaddr_in6. */ -ci_inline void -ci_make_sockaddr_in6_from_ip4(struct sockaddr_in6 *sin, - ci_uint16 port_be16, ci_uint32 addr_be32) +ci_inline void ci_make_sockaddr_in6_from_ip4( + struct sockaddr_in6* sin, ci_uint16 port_be16, ci_uint32 addr_be32) { sin->sin6_family = AF_INET6; sin->sin6_port = port_be16; @@ -5103,9 +5113,8 @@ ci_make_sockaddr_in6_from_ip4(struct sockaddr_in6 *sin, } /* Get an IPv6 address pointed by addr_be32_p and fill it * into sockaddr_in6. */ -ci_inline void -ci_make_sockaddr_in6_from_ip6(struct sockaddr_in6 *sin, ci_uint16 port_be16, - const ci_uint32* addr_be32_p) +ci_inline void ci_make_sockaddr_in6_from_ip6(struct sockaddr_in6* sin, + ci_uint16 port_be16, const ci_uint32* addr_be32_p) { sin->sin6_family = AF_INET6; sin->sin6_port = port_be16; @@ -5113,8 +5122,8 @@ ci_make_sockaddr_in6_from_ip6(struct sockaddr_in6 *sin, ci_uint16 port_be16, } #endif -ci_inline struct sockaddr_storage -ci_make_sockaddr_storage_from_addr(ci_uint16 port_be16, ci_addr_t addr) +ci_inline struct sockaddr_storage ci_make_sockaddr_storage_from_addr( + ci_uint16 port_be16, ci_addr_t addr) { union { struct sockaddr_in in; @@ -5130,8 +5139,7 @@ ci_make_sockaddr_storage_from_addr(ci_uint16 port_be16, ci_addr_t addr) u.in6.sin6_family = AF_INET6; u.in6.sin6_port = port_be16; memcpy(&u.in6.sin6_addr.s6_addr, addr.ip6, sizeof(addr.ip6)); - } - else + } else #endif { u.in.sin_family = AF_INET; @@ -5151,11 +5159,9 @@ ci_make_sockaddr_storage_from_addr(ci_uint16 port_be16, ci_addr_t addr) * - addr_be32_p is the pointer to IPv4 or IPv6 address, depending on * domain_in parameter; * - scope_id defines sin6_scope_id value for IPv6 link-local addresses. */ -ci_inline void -ci_addr_to_user(struct sockaddr *sa, socklen_t *sa_len, - sa_family_t domain_in, sa_family_t domain_out, - ci_uint16 port_be16, const ci_uint32* addr_be32_p, - ci_ifid_t scope_id) +ci_inline void ci_addr_to_user(struct sockaddr* sa, socklen_t* sa_len, + sa_family_t domain_in, sa_family_t domain_out, ci_uint16 port_be16, + const ci_uint32* addr_be32_p, ci_ifid_t scope_id) { socklen_t len = sizeof(struct sockaddr_in); @@ -5163,28 +5169,26 @@ ci_addr_to_user(struct sockaddr *sa, socklen_t *sa_len, ci_assert(domain_in == AF_INET || domain_in == AF_INET6); ci_assert(domain_out == AF_INET || domain_out == AF_INET6); - if (domain_out == AF_INET6) + if( domain_out == AF_INET6 ) /* One might expect to see SIN6_LEN_RFC2133 here, but Linux uses * sizeof() instead. */ len = sizeof(struct sockaddr_in6); else ci_assert_equal(domain_in, AF_INET); -#else +#else ci_assert_equal(domain_in, AF_INET); ci_assert_equal(domain_out, AF_INET); #endif - if (CI_LIKELY(*sa_len >= len)) { + if( CI_LIKELY(*sa_len >= len) ) { *sa_len = CI_MIN(*sa_len, len); memset(sa, 0, *sa_len); #if CI_CFG_FAKE_IPV6 - if (domain_out == AF_INET) { + if( domain_out == AF_INET ) { ci_make_sockaddr_from_ip4(CI_SIN(sa), port_be16, *addr_be32_p); - } - else if( domain_in == AF_INET ) { + } else if( domain_in == AF_INET ) { ci_make_sockaddr_in6_from_ip4(CI_SIN6(sa), port_be16, *addr_be32_p); - } - else { + } else { ci_make_sockaddr_in6_from_ip6(CI_SIN6(sa), port_be16, addr_be32_p); if( CI_IP6_IS_LINKLOCAL(&CI_SIN6(sa)->sin6_addr) ) CI_SIN6(sa)->sin6_scope_id = scope_id; @@ -5192,18 +5196,17 @@ ci_addr_to_user(struct sockaddr *sa, socklen_t *sa_len, #else ci_make_sockaddr_from_ip4(CI_SIN(sa), port_be16, *addr_be32_p); #endif - } - else { + } else { union ci_sockaddr_u ss_u; - if (*sa_len == 0) { + if( *sa_len == 0 ) { *sa_len = len; return; } memset(&ss_u, 0, len); #if CI_CFG_FAKE_IPV6 - if (domain_out == AF_INET) + if( domain_out == AF_INET ) ci_make_sockaddr_from_ip4(&ss_u.sin, port_be16, *addr_be32_p); else if( domain_in == AF_INET ) ci_make_sockaddr_in6_from_ip4(&ss_u.sin6, port_be16, *addr_be32_p); @@ -5226,12 +5229,10 @@ ci_addr_to_user(struct sockaddr *sa, socklen_t *sa_len, * - at_connect [in] set to 1 means function call on connect(), * 0 means - on bind(). * Returns 0 on success or -1 on failure. */ -ci_inline int -ci_sock_set_ip6_scope_id(ci_netif* ni, ci_sock_cmn* s, - const struct sockaddr* addr, socklen_t addrlen, - int/*bool*/ at_connect) +ci_inline int ci_sock_set_ip6_scope_id(ci_netif* ni, ci_sock_cmn* s, + const struct sockaddr* addr, socklen_t addrlen, int /*bool*/ at_connect) { - const struct sockaddr_in6* sin6 = (const struct sockaddr_in6*)addr; + const struct sockaddr_in6* sin6 = (const struct sockaddr_in6*) addr; if( addrlen >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id ) { ci_ifid_t ifindex = sin6->sin6_scope_id; cicp_hwport_mask_t hwports = 0; @@ -5240,7 +5241,8 @@ ci_sock_set_ip6_scope_id(ci_netif* ni, ci_sock_cmn* s, if( at_connect && s->cp.so_bindtodevice && s->cp.so_bindtodevice != ifindex ) return -1; - rc = oo_cp_find_llap(ni->cplane, ifindex, NULL, NULL, &hwports, NULL, NULL); + rc = + oo_cp_find_llap(ni->cplane, ifindex, NULL, NULL, &hwports, NULL, NULL); if( rc != 0 || hwports == 0 ) return -1; s->cp.so_bindtodevice = ifindex; @@ -5259,9 +5261,9 @@ extern ci_ifid_t ci_rx_pkt_ifindex(ci_netif* ni, const ci_ip_pkt_fmt* pkt); * ci_tcp_recvmsg() */ -ci_inline void ci_tcp_recvmsg_args_init(ci_tcp_recvmsg_args* a, - ci_netif* ni, ci_tcp_state* ts, - ci_msghdr* msg, int flags) { +ci_inline void ci_tcp_recvmsg_args_init(ci_tcp_recvmsg_args* a, ci_netif* ni, + ci_tcp_state* ts, ci_msghdr* msg, int flags) +{ a->ni = ni; a->ts = ts; a->msg = msg; @@ -5269,8 +5271,6 @@ ci_inline void ci_tcp_recvmsg_args_init(ci_tcp_recvmsg_args* a, } - - /********************************************************************* ***************************** Tcpdump support ************************ *********************************************************************/ @@ -5282,7 +5282,7 @@ ci_inline ci_uint16 oo_tcpdump_queue_len(ci_netif* ni) } /* Should we dump this packet? */ -ci_inline int oo_tcpdump_check(ci_netif *ni, ci_ip_pkt_fmt *pkt, int intf_i) +ci_inline int oo_tcpdump_check(ci_netif* ni, ci_ip_pkt_fmt* pkt, int intf_i) { if( ni->state->dump_intf[intf_i] == OO_INTF_I_DUMP_ALL ) { if( oo_tcpdump_queue_len(ni) < CI_CFG_DUMPQUEUE_LEN - 1 ) @@ -5294,8 +5294,8 @@ ci_inline int oo_tcpdump_check(ci_netif *ni, ci_ip_pkt_fmt *pkt, int intf_i) } /* Should we dump this no_match */ -ci_inline int oo_tcpdump_check_no_match(ci_netif *ni, ci_ip_pkt_fmt *pkt, - int intf_i) +ci_inline int oo_tcpdump_check_no_match( + ci_netif* ni, ci_ip_pkt_fmt* pkt, int intf_i) { if( ni->state->dump_intf[intf_i] == OO_INTF_I_DUMP_NO_MATCH ) { if( oo_tcpdump_queue_len(ni) < CI_CFG_DUMPQUEUE_LEN - 1 ) @@ -5310,12 +5310,12 @@ ci_inline int oo_tcpdump_check_no_match(ci_netif *ni, ci_ip_pkt_fmt *pkt, extern void oo_tcpdump_free_pkts(ci_netif* ni, ci_uint16 i); /* Dump this packet */ -ci_inline void oo_tcpdump_dump_pkt(ci_netif *ni, ci_ip_pkt_fmt *pkt) +ci_inline void oo_tcpdump_dump_pkt(ci_netif* ni, ci_ip_pkt_fmt* pkt) { ci_uint16 write_i = ni->state->dump_write_i; oo_pkt_p* dq = ni->state->dump_queue; - if(CI_UNLIKELY( pkt->flags & CI_PKT_FLAG_MSG_WARM )) + if( CI_UNLIKELY(pkt->flags & CI_PKT_FLAG_MSG_WARM) ) return; if( dq[write_i % CI_CFG_DUMPQUEUE_LEN] != OO_PP_NULL ) @@ -5339,14 +5339,13 @@ ci_inline void oo_tcpdump_dump_pkt(ci_netif *ni, ci_ip_pkt_fmt *pkt) *********************************************************************/ /* _bit_set() always increment seqno, even if the bit is already set */ -ci_inline void -oo_os_sock_status_bit_set(ci_sock_cmn *s, ci_int32 bits) +ci_inline void oo_os_sock_status_bit_set(ci_sock_cmn* s, ci_int32 bits) { ci_uint32 tmp; do { tmp = s->os_sock_status; - } while( ci_cas32u_fail(&s->os_sock_status, tmp, - (tmp + (1 << OO_OS_STATUS_SEQ_SHIFT)) | bits) ); + } while( ci_cas32u_fail( + &s->os_sock_status, tmp, (tmp + (1 << OO_OS_STATUS_SEQ_SHIFT)) | bits) ); } ci_inline ci_uint32 oo_os_sock_status_from_mask(int mask) @@ -5372,7 +5371,7 @@ ci_inline ci_uint32 oo_os_sock_status_from_mask(int mask) */ #ifdef __KERNEL__ -#define KERNEL_CAST_TO_UNSIGNED_LONG(v) ((unsigned long)(v)) +#define KERNEL_CAST_TO_UNSIGNED_LONG(v) ((unsigned long) (v)) #else #define KERNEL_CAST_TO_UNSIGNED_LONG(v) (v) #endif @@ -5389,7 +5388,7 @@ ci_inline unsigned oo_cycles64_to_usec(ci_netif* ni, ci_uint64 cycles) c = cycles * 1000; #ifdef __KERNEL__ /* 32-bit kernel can't divide 64-bit value */ - if( (unsigned long)c != cycles ) + if( (unsigned long) c != cycles ) val = cycles >> IPTIMER_STATE(ni)->ci_ip_time_frc2us; else #endif @@ -5406,11 +5405,11 @@ ci_inline ci_uint64 __oo_usec_to_cycles64(ci_uint32 khz, unsigned usec) return (ci_uint64) -1; if( usec == 0 ) return 0; - val = (ci_uint64)usec * khz; + val = (ci_uint64) usec * khz; #ifdef __KERNEL__ /* 32-bit kernel can't divide 64-bit value */ - if( (ci_uint64)(unsigned long)val != val ) - val = val << 10; + if( (ci_uint64) (unsigned long) val != val ) + val = val << 10; else #endif val = KERNEL_CAST_TO_UNSIGNED_LONG(val) / 1000; @@ -5418,7 +5417,7 @@ ci_inline ci_uint64 __oo_usec_to_cycles64(ci_uint32 khz, unsigned usec) } #undef KERNEL_CAST_TO_UNSIGNED_LONG #define oo_usec_to_cycles64(ni, usec) \ - __oo_usec_to_cycles64(IPTIMER_STATE(ni)->khz, usec) + __oo_usec_to_cycles64(IPTIMER_STATE(ni)->khz, usec) /********************************************************************** @@ -5440,11 +5439,11 @@ typedef struct oo_zc_buf* onload_zc_handle; * they're invalid pointers and users can't avoid calling zc_handle_to_... * in order to read them. If Intel ever do 6-level page tables then this * debugging facility will have to be removed. */ -#define CI_ZC_HANDLE_MAGIC_MASK 0xff00000000000000ull -#define CI_ZC_HANDLE_MAGIC 0xab00000000000000ull +#define CI_ZC_HANDLE_MAGIC_MASK 0xff00000000000000ull +#define CI_ZC_HANDLE_MAGIC 0xab00000000000000ull #else -#define CI_ZC_HANDLE_MAGIC_MASK ((uintptr_t)0) -#define CI_ZC_HANDLE_MAGIC ((uintptr_t)0) +#define CI_ZC_HANDLE_MAGIC_MASK ((uintptr_t) 0) +#define CI_ZC_HANDLE_MAGIC ((uintptr_t) 0) #endif struct ci_zc_usermem { @@ -5459,15 +5458,15 @@ struct ci_zc_usermem { static inline onload_zc_handle zc_pktbuf_to_handle(ci_ip_pkt_fmt* pkt) { - onload_zc_handle h = (onload_zc_handle)pkt; - CI_DEBUG(h = (onload_zc_handle)((uintptr_t)h | CI_ZC_HANDLE_MAGIC)); + onload_zc_handle h = (onload_zc_handle) pkt; + CI_DEBUG(h = (onload_zc_handle) ((uintptr_t) h | CI_ZC_HANDLE_MAGIC)); return h; } static inline onload_zc_handle zc_usermem_to_handle(struct ci_zc_usermem* um) { - onload_zc_handle h = (onload_zc_handle)((uintptr_t)um | 1); - CI_DEBUG(h = (onload_zc_handle)((uintptr_t)h | CI_ZC_HANDLE_MAGIC)); + onload_zc_handle h = (onload_zc_handle) ((uintptr_t) um | 1); + CI_DEBUG(h = (onload_zc_handle) ((uintptr_t) h | CI_ZC_HANDLE_MAGIC)); return h; } @@ -5475,48 +5474,47 @@ static inline void zc_handle_check(onload_zc_handle h) { /* The surprising -2 in the below is because we use the bottom bit to * indicate pktbuf-or-usermem */ - ci_assert_equal((uintptr_t)h & (sizeof(void*) - 2), 0); - ci_assert_equal((uintptr_t)h & CI_ZC_HANDLE_MAGIC_MASK, CI_ZC_HANDLE_MAGIC); + ci_assert_equal((uintptr_t) h & (sizeof(void*) - 2), 0); + ci_assert_equal((uintptr_t) h & CI_ZC_HANDLE_MAGIC_MASK, CI_ZC_HANDLE_MAGIC); } static inline bool zc_is_pktbuf(onload_zc_handle h) { zc_handle_check(h); - return ((uintptr_t)h & 1) == 0; + return ((uintptr_t) h & 1) == 0; } static inline bool zc_is_usermem(onload_zc_handle h) { zc_handle_check(h); - return ((uintptr_t)h & 1) == 1; + return ((uintptr_t) h & 1) == 1; } static inline ci_ip_pkt_fmt* zc_handle_to_pktbuf(onload_zc_handle h) { ci_assert(zc_is_pktbuf(h)); - CI_DEBUG(h = (onload_zc_handle)((uintptr_t)h &~ CI_ZC_HANDLE_MAGIC_MASK)); - return (ci_ip_pkt_fmt*)h; + CI_DEBUG(h = (onload_zc_handle) ((uintptr_t) h & ~CI_ZC_HANDLE_MAGIC_MASK)); + return (ci_ip_pkt_fmt*) h; } static inline struct ci_zc_usermem* zc_handle_to_usermem(onload_zc_handle h) { ci_assert(zc_is_usermem(h)); - CI_DEBUG(h = (onload_zc_handle)((uintptr_t)h &~ CI_ZC_HANDLE_MAGIC_MASK)); + CI_DEBUG(h = (onload_zc_handle) ((uintptr_t) h & ~CI_ZC_HANDLE_MAGIC_MASK)); /* -1 rather than &~1 because it allows better codegen */ - return (struct ci_zc_usermem*)((uintptr_t)h - 1); + return (struct ci_zc_usermem*) ((uintptr_t) h - 1); } -static inline ef_addr zc_usermem_dma_addr(struct ci_zc_usermem* um, - uint64_t user_ptr, int intf_i) +static inline ef_addr zc_usermem_dma_addr( + struct ci_zc_usermem* um, uint64_t user_ptr, int intf_i) { if( um->addr_space == EF_ADDRSPACE_LOCAL ) { uint64_t offset = user_ptr - um->base; - uint64_t* hw_addrs = um->hw_addrs + - ((intf_i * um->size) >> EF_VI_NIC_PAGE_SHIFT); + uint64_t* hw_addrs = + um->hw_addrs + ((intf_i * um->size) >> EF_VI_NIC_PAGE_SHIFT); return hw_addrs[offset >> EF_VI_NIC_PAGE_SHIFT] | (offset & (EF_VI_NIC_PAGE_SIZE - 1)); - } - else { + } else { return user_ptr; } } @@ -5526,5 +5524,5 @@ extern void ci_netif_handle_actions(ci_netif* ni); extern void ci_netif_close_pending(ci_netif* ni); #endif -#endif /* __CI_INTERNAL_IP_H__ */ +#endif /* __CI_INTERNAL_IP_H__ */ /*! \cidoxg_end */ diff --git a/src/include/ci/internal/transport_config_opt.h b/src/include/ci/internal/transport_config_opt.h index 3eee1ef32..1286b6375 100644 --- a/src/include/ci/internal/transport_config_opt.h +++ b/src/include/ci/internal/transport_config_opt.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2004-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author stg ** \brief Configuration options for transport lib @@ -23,35 +23,35 @@ ** TODO: Checking against MD5 has of the file would be better. ** See also include/ci/internal/ip.h where we do the same thing. */ -#define CI_CVS_OPT_HDR_VERSION ("$Revision$") +#define CI_CVS_OPT_HDR_VERSION ("$Revision$") /* Maximum number of network interfaces (ports) per stack. */ -#define CI_CFG_MAX_INTERFACES 30 +#define CI_CFG_MAX_INTERFACES 30 /* Maximim number of hwports in the system */ -#define CI_CFG_MAX_HWPORTS 30 +#define CI_CFG_MAX_HWPORTS 30 /* Maximum number of local IP addresses in the system */ -#define CI_CFG_MAX_LOCAL_IPADDRS 256 +#define CI_CFG_MAX_LOCAL_IPADDRS 256 /* Do we need team/bond support? */ -#define CI_CFG_TEAMING 1 +#define CI_CFG_TEAMING 1 /* Some defaults. These can be overridden at runtime. */ -#define CI_CFG_NETIF_MAX_ENDPOINTS (1<<13) +#define CI_CFG_NETIF_MAX_ENDPOINTS (1 << 13) /* The real max for endpoint order. * Do not forget to change CI_EPLOCK_NETIF_SOCKET_LIST if you increase * this number.*/ -#define CI_CFG_NETIF_MAX_ENDPOINTS_MAX (1<<21) +#define CI_CFG_NETIF_MAX_ENDPOINTS_MAX (1 << 21) /* ANVL assumes the 2MSL time is 60 secs. Set slightly smaller */ -#define CI_CFG_TCP_TCONST_MSL 25 +#define CI_CFG_TCP_TCONST_MSL 25 #define CI_CFG_TCP_FIN_TIMEOUT 60 -#define CI_CFG_BURST_CONTROL 1 +#define CI_CFG_BURST_CONTROL 1 #if CI_CFG_BURST_CONTROL -#define CI_CFG_TCP_BURST_CONTROL_LIMIT 0 +#define CI_CFG_TCP_BURST_CONTROL_LIMIT 0 #endif #define CI_CFG_CONG_AVOID_NOTIFIED 0 @@ -61,84 +61,84 @@ /* Debug aids. Off by default, as some add lots of overhead. */ #ifndef CI_CFG_RANDOM_DROP -#define CI_CFG_RANDOM_DROP 0 +#define CI_CFG_RANDOM_DROP 0 #endif #ifndef CI_CFG_POISON_BUFS -#define CI_CFG_POISON_BUFS 0 +#define CI_CFG_POISON_BUFS 0 #endif #ifndef CI_CFG_DETAILED_CHECKS -#define CI_CFG_DETAILED_CHECKS 0 +#define CI_CFG_DETAILED_CHECKS 0 #endif /* Whether to hook the syscall function from libc. Currently supported only * on x86-64 to simplify the implementation. */ #ifdef __x86_64__ -#define CI_CFG_USERSPACE_SYSCALL 1 +#define CI_CFG_USERSPACE_SYSCALL 1 #else -#define CI_CFG_USERSPACE_SYSCALL 0 +#define CI_CFG_USERSPACE_SYSCALL 0 #endif /* Maximum number of onload stacks handled by single epoll object. * See also epoll_max_stacks module parameter. * Socket from other stacks will look just like "regular file descriptor" * for the onload object, without onload-specific acceleration. */ -#define CI_CFG_EPOLL_MAX_STACKS 16 +#define CI_CFG_EPOLL_MAX_STACKS 16 /* Maximum number of postponed epoll_ctl operations, in case of * EF_UL_EPOLL=2 and EF_EPOLL_CTL_FAST=1 */ -#define CI_CFG_EPOLL_MAX_POSTPONED 10 +#define CI_CFG_EPOLL_MAX_POSTPONED 10 /* Arbitrary limit of 1MB following linux kernel in Onload pipe * implementation */ -#define CI_CFG_MAX_PIPE_SIZE (1<<20) +#define CI_CFG_MAX_PIPE_SIZE (1 << 20) /* Enable this to support port striping. */ -#define CI_CFG_PORT_STRIPING 0 +#define CI_CFG_PORT_STRIPING 0 /* Non-RFC1191 recovery time: * when PMTU goes to min (a very small number, poss. a DoS attack) use - * a shorter recovery time than the RFC allows. + * a shorter recovery time than the RFC allows. * Set to 0 to keep ANVL happy */ -#define CI_CFG_FAST_RECOVER_PMTU_AT_MIN 0 +#define CI_CFG_FAST_RECOVER_PMTU_AT_MIN 0 -#define CI_CFG_SUPPORT_STATS_COLLECTION 1 -#define CI_CFG_TCP_SOCK_STATS 0 +#define CI_CFG_SUPPORT_STATS_COLLECTION 1 +#define CI_CFG_TCP_SOCK_STATS 0 /* Enable this to cause buffered stats (from sockopt) to be output * to the log rather than written to a buffer */ -#define CI_CFG_SEND_STATS_TO_LOG 1 +#define CI_CFG_SEND_STATS_TO_LOG 1 -#define CI_CFG_IP_TIMER_DEBUG 0 +#define CI_CFG_IP_TIMER_DEBUG 0 /* Enable this to return ENOTCONN when recv/recvfrom/recvmsg are * called when not bound/connected (UDP) (see udp_recv.c) */ -#define CI_CFG_POSIX_RECV 0 +#define CI_CFG_POSIX_RECV 0 /* Enable this to have recvmsg() on TCP socket fill the [msg_name]. Linux * certainly doesn't. */ -#define CI_CFG_TCP_RECVMSG_MSGNAME 0 +#define CI_CFG_TCP_RECVMSG_MSGNAME 0 /*! * Enable this to return EOPNOTSUPP when connect() is called after * listen() on the same socket (see tcp_connect.c). */ -#define CI_CFG_POSIX_CONNECT_AFTER_LISTEN 0 +#define CI_CFG_POSIX_CONNECT_AFTER_LISTEN 0 -/* send reset for connections with invalid options in SYN packets */ -#define CI_CFG_TCP_INVALID_OPT_RST 1 +/* send reset for connections with invalid options in SYN packets */ +#define CI_CFG_TCP_INVALID_OPT_RST 1 /* initial cwnd setting possible according to rfcs: ** 2001, 2581, 3390 */ -#define CI_CFG_TCP_INITIAL_CWND_RFC 2581 +#define CI_CFG_TCP_INITIAL_CWND_RFC 2581 /* check PAWs on fastpath ** Not necessary by rfc1323, but by ANVL tcp_highperf4.17 */ -#define CI_CFG_TCP_PAWS_ON_FASTPATH 1 +#define CI_CFG_TCP_PAWS_ON_FASTPATH 1 -/* strict check of SEG.SEQ <= Last.ACK.sent < SEG.SEQ + SEG.LEN +/* strict check of SEG.SEQ <= Last.ACK.sent < SEG.SEQ + SEG.LEN ** as on rfc1323 p16 or the looser on p35: ** SEG.SEQ <= Last.ACK.sent <= SEG.SEQ + SEG.LEN implied ** Setting this to 1 will cause it to not update the echoed value @@ -146,21 +146,21 @@ ** Setting this to 0 will leave it vulnerable to misdetection of ** failures when zero length packets get reordered. */ -#define CI_CFG_TCP_RFC1323_STRICT_TSO 0 +#define CI_CFG_TCP_RFC1323_STRICT_TSO 0 /* Minimum MSS value */ -/* ANVL requires some pretty small MSS values. +/* ANVL requires some pretty small MSS values. This is chosen to match the ANVL parameter */ -#define CI_CFG_TCP_MINIMUM_MSS 64 +#define CI_CFG_TCP_MINIMUM_MSS 64 /* Default MSS value */ -#define CI_CFG_TCP_DEFAULT_MSS 536 +#define CI_CFG_TCP_DEFAULT_MSS 536 /* How many RX descriptors to push at a time. */ -#define CI_CFG_RX_DESC_BATCH 16 +#define CI_CFG_RX_DESC_BATCH 16 /* How many packets to fill on TX path before pushing them out. */ -#define CI_CFG_TCP_TX_BATCH 8 +#define CI_CFG_TCP_TX_BATCH 8 /* Maximum receive window size. This used to be 0x7fff. Here's why: ** @@ -173,69 +173,69 @@ ** there's a rumour that issues with signed arithmetic may become a problem. ** We have done a few development days of testing with 0xffff without this. */ -#define CI_CFG_TCP_MAX_WINDOW 0xffff +#define CI_CFG_TCP_MAX_WINDOW 0xffff /* RFCs specify that if the receiver shrinks the window the sender * should be robust and notice this. We used to, in the name of * efficiency, ignore shrinking windows. Set to zero to get this old * behaviour */ -#define CI_CFG_NOTICE_WINDOW_SHRINKAGE 1 +#define CI_CFG_NOTICE_WINDOW_SHRINKAGE 1 /* ** Base value for dupack threshold. -*/ -#define CI_CFG_TCP_DUPACK_THRESH_BASE 3 +*/ +#define CI_CFG_TCP_DUPACK_THRESH_BASE 3 /* -** Maximum value for dupack threshold. Should be less than typical window +** Maximum value for dupack threshold. Should be less than typical window ** size (in calculated packets, not in bytes). */ -#define CI_CFG_TCP_DUPACK_THRESH_MAX 127 +#define CI_CFG_TCP_DUPACK_THRESH_MAX 127 /* IP TTL settings */ -#define CI_IP_DFLT_TTL 64 -#define CI_IP_MAX_TTL 255 +#define CI_IP_DFLT_TTL 64 +#define CI_IP_MAX_TTL 255 /* IP TOS default */ -#define CI_IP_DFLT_TOS 0 +#define CI_IP_DFLT_TOS 0 /* 8-bit field - but individual bits have (ignored) meaning */ /* IPv6 Traffic Class default */ -#define CI_IPV6_DFLT_TCLASS 0 +#define CI_IPV6_DFLT_TCLASS 0 /* IPv6 hop limit defaults. Both are equal to corresponding Linux ones. */ -#define CI_IPV6_DFLT_HOPLIMIT 64 -#define CI_IPV6_DFLT_MCASTHOPS 1 +#define CI_IPV6_DFLT_HOPLIMIT 64 +#define CI_IPV6_DFLT_MCASTHOPS 1 /* Should we generate code that protects us against invalid shared state? ** By default we want the kernel to be robust to arbitrary shared state, ** but user-level to be fast. */ #ifndef CI_CFG_NETIF_HARDEN -# ifdef __KERNEL__ -# define CI_CFG_NETIF_HARDEN 1 -# else -# define CI_CFG_NETIF_HARDEN 0 -# endif +#ifdef __KERNEL__ +#define CI_CFG_NETIF_HARDEN 1 +#else +#define CI_CFG_NETIF_HARDEN 0 +#endif #endif /* Support H/W timer to give stack a kick when events are left unhandled * for a while. */ -#define CI_CFG_HW_TIMER 1 +#define CI_CFG_HW_TIMER 1 /* Enable invariant checking on entry/exit to library (sockcall intercept) */ -#define CI_CFG_FDTABLE_CHECKS 0 +#define CI_CFG_FDTABLE_CHECKS 0 /* ** Configuration options for TCP/IP striping. ** - we stripe between hosts if we have a common netmask -** - dupack threshold can be rasied to make the stack more +** - dupack threshold can be rasied to make the stack more ** tolerant to reordering ** - default is all 1s - i.e. striping off */ -#define CI_CFG_STRIPE_DEFAULT_NETMASK 0xffffffff -#define CI_CFG_STRIPE_DEFAULT_DUPACK_THRESHOLD 3 +#define CI_CFG_STRIPE_DEFAULT_NETMASK 0xffffffff +#define CI_CFG_STRIPE_DEFAULT_DUPACK_THRESHOLD 3 /* The default TCP header option number used for striping. We'd like a ** proper assignment, but for now this will have to do: @@ -244,92 +244,92 @@ ** pancakes, and Black Jumbo ate Fifty-five but Little Black Sambo ate a ** Hundred and Sixty-nine, because he was so hungry." */ -#define CI_CFG_STRIPE_DEFAULT_TCP_OPT 251 +#define CI_CFG_STRIPE_DEFAULT_TCP_OPT 251 -/* +/* ** Defaults for non-Linux and for broken Linux. -** Normally, we hope to get these values from OS. +** Normally, we hope to get these values from OS. */ -#define CI_CFG_UDP_SNDBUF_DEFAULT 212992 -#define CI_CFG_UDP_RCVBUF_DEFAULT 212992 -#define CI_CFG_UDP_SNDBUF_MAX 212992 -#define CI_CFG_UDP_RCVBUF_MAX 212992 +#define CI_CFG_UDP_SNDBUF_DEFAULT 212992 +#define CI_CFG_UDP_RCVBUF_DEFAULT 212992 +#define CI_CFG_UDP_SNDBUF_MAX 212992 +#define CI_CFG_UDP_RCVBUF_MAX 212992 /* -**These values are chosen to match the Linux definition of +**These values are chosen to match the Linux definition of **SOCK_MIN_SNDBUF and SOCK_MIN_RCVBUF */ #ifndef SOCK_MIN_SNDBUF -# define CI_SOCK_MIN_SNDBUF 2048 +#define CI_SOCK_MIN_SNDBUF 2048 #else -# define CI_SOCK_MIN_SNDBUF SOCK_MIN_SNDBUF +#define CI_SOCK_MIN_SNDBUF SOCK_MIN_SNDBUF #endif #ifndef SOCK_MIN_RCVBUF -# define CI_SOCK_MIN_RCVBUF 256 +#define CI_SOCK_MIN_RCVBUF 256 #else -# define CI_SOCK_MIN_RCVBUF SOCK_MIN_RCVBUF +#define CI_SOCK_MIN_RCVBUF SOCK_MIN_RCVBUF #endif -#define CI_CFG_UDP_SNDBUF_MIN CI_SOCK_MIN_SNDBUF -#define CI_CFG_UDP_RCVBUF_MIN CI_SOCK_MIN_RCVBUF +#define CI_CFG_UDP_SNDBUF_MIN CI_SOCK_MIN_SNDBUF +#define CI_CFG_UDP_RCVBUF_MIN CI_SOCK_MIN_RCVBUF /* TCP sndbuf */ -#define CI_CFG_TCP_SNDBUF_MIN CI_SOCK_MIN_SNDBUF -#define CI_CFG_TCP_SNDBUF_DEFAULT 16384 -#define CI_CFG_TCP_SNDBUF_MAX 4194304 +#define CI_CFG_TCP_SNDBUF_MIN CI_SOCK_MIN_SNDBUF +#define CI_CFG_TCP_SNDBUF_DEFAULT 16384 +#define CI_CFG_TCP_SNDBUF_MAX 4194304 -#define CI_CFG_TCP_RCVBUF_MIN CI_SOCK_MIN_RCVBUF +#define CI_CFG_TCP_RCVBUF_MIN CI_SOCK_MIN_RCVBUF -#define CI_CFG_TCP_RCVBUF_DEFAULT 87380 -#define CI_CFG_TCP_RCVBUF_MAX 6291456 +#define CI_CFG_TCP_RCVBUF_DEFAULT 87380 +#define CI_CFG_TCP_RCVBUF_MAX 6291456 /* These configuration "options" describe whether the host O/S normally * inherits specific socket state when accept() is called. */ -#define CI_CFG_ACCEPT_INHERITS_NONBLOCK 0 +#define CI_CFG_ACCEPT_INHERITS_NONBLOCK 0 /* Maximum possible value for listen queue (backlog). * It is substituted from OS, when possible. */ -#define CI_TCP_LISTENQ_MAX 256 +#define CI_TCP_LISTENQ_MAX 256 /* Assume this number of listening socket per stack when calculating * EF_TCP_SYNRECV_MAX. */ -#define CI_CFG_ASSUME_LISTEN_SOCKS 4 +#define CI_CFG_ASSUME_LISTEN_SOCKS 4 /* TCP window scale maximum and default. * Maximum is taken from RFC1323 and may be overriden by OS settings for * send value. * Default is overriden based on receive buffer. */ -#define CI_TCP_WSCL_MAX 14 /* RFC 1323 max shift */ +#define CI_TCP_WSCL_MAX 14 /* RFC 1323 max shift */ -/* It is supposed that +/* It is supposed that * CI_TCP_RETRANSMIT_THRESHOLD > CI_TCP_RETRANSMIT_THRESHOLD_SYN. * Do not break this! */ -#define CI_TCP_RETRANSMIT_THRESHOLD 15 /* retransmit 15 times */ -#define CI_TCP_RETRANSMIT_THRESHOLD_ORPHAN 8 /* orphaned sock: 8 times */ -#define CI_TCP_RETRANSMIT_THRESHOLD_SYN 4 /* retransmit SYN 4 times */ +#define CI_TCP_RETRANSMIT_THRESHOLD 15 /* retransmit 15 times */ +#define CI_TCP_RETRANSMIT_THRESHOLD_ORPHAN 8 /* orphaned sock: 8 times */ +#define CI_TCP_RETRANSMIT_THRESHOLD_SYN 4 /* retransmit SYN 4 times */ /* Should we send DSACK option in TCP? */ -#define CI_CFG_TCP_DSACK 1 +#define CI_CFG_TCP_DSACK 1 /* Do we assassinate TIME-WAIT TCP connections when needed? * Default value for EF_TCP_TIME_WAIT_ASSASSINATION. */ -#define CI_CFG_TIME_WAIT_ASSASSINATE 1 +#define CI_CFG_TIME_WAIT_ASSASSINATE 1 /* Default challenge ACK limitation (in count per second), * same as of linux-4.19 */ -#define CI_CFG_CHALLENGE_ACK_LIMIT 1000 +#define CI_CFG_CHALLENGE_ACK_LIMIT 1000 /* Default ACK limitation when sending respnse to invalid packet, * in ms, same as of linux-4.19 */ -#define CI_CFG_TCP_OUT_OF_WINDOW_ACK_RATELIMIT 500 +#define CI_CFG_TCP_OUT_OF_WINDOW_ACK_RATELIMIT 500 /* Path to the /proc/sys/ */ -#define CI_CFG_PROC_PATH "/proc/sys/" +#define CI_CFG_PROC_PATH "/proc/sys/" /* The real max is 30, but let's use larger value. */ -#define CI_CFG_PROC_PATH_LEN_MAX 70 +#define CI_CFG_PROC_PATH_LEN_MAX 70 /* Match procfs/sysctl line limits. */ -#define CI_CFG_PROC_LINE_LEN_MAX 1025 +#define CI_CFG_PROC_LINE_LEN_MAX 1025 /* * CI_CFG_CONGESTION_WINDOW_VALIDATION actviates RFC2861 compliance; @@ -341,12 +341,12 @@ * miliseconds, they end with a tiny congestion window which needs to * be opened up. * - * Make sure you read the comment below for - * CI_CFG_CONGESTION_WINDOW_VALIDATION_DELACK_SCALING if you activate this; - * it is recommended that you activate that option as well if you want this + * Make sure you read the comment below for + * CI_CFG_CONGESTION_WINDOW_VALIDATION_DELACK_SCALING if you activate this; + * it is recommended that you activate that option as well if you want this * option. */ -#define CI_CFG_CONGESTION_WINDOW_VALIDATION 0 +#define CI_CFG_CONGESTION_WINDOW_VALIDATION 0 /* * A substantial performance problem with congestion window validation @@ -366,18 +366,19 @@ /* When the netif is wedged, due to userspace dying while the kernel is in an * inconsistent state, rather than go through the full process of closing the - * endpoint (which could fail, due to the inconsistent state), if DESTROY_WEDGED - * is set, we remove the filters and go straight to deleting data structures. + * endpoint (which could fail, due to the inconsistent state), if + * DESTROY_WEDGED is set, we remove the filters and go straight to deleting + * data structures. */ -#define CI_CFG_DESTROY_WEDGED 1 +#define CI_CFG_DESTROY_WEDGED 1 /* Include support for reducing the rate at which the congestion window is * increased during congestion avoidance. */ -#define CI_CFG_CONG_AVOID_SCALE_BACK 1 +#define CI_CFG_CONG_AVOID_SCALE_BACK 1 -/* +/* * Define how aggressive we should be in opening the congestion window * during slow start. * 0: RFC3465 behaviour (at most 2MSS increase for each received ACK) @@ -387,38 +388,38 @@ * See Section 2.2 and 2.3 of RFC3465 for discussion of this, and the * implementation of tcp_slow_start() in the kernel */ -#define CI_CFG_CONG_AVOID_SLOW_START_MODE 2 +#define CI_CFG_CONG_AVOID_SLOW_START_MODE 2 -/* +/* * When CI_CFG_CONG_AVOID_SLOW_START_MODE is zero, and so * RFC3465 behaviour is selected, this supplies the value for "L" from * that RFC. It should be between 1 and 2 to comply - */ -#define CI_CFG_CONG_AVOID_RFC3465_L_VALUE 2 + */ +#define CI_CFG_CONG_AVOID_RFC3465_L_VALUE 2 /* Detect cases where delayed acks could be detrimental to performance * (e.g. in slow start, or after data loss) and send ACKs for all * packets. */ -#define CI_CFG_TCP_FASTSTART 1 +#define CI_CFG_TCP_FASTSTART 1 /* If a tail drop is suspected, try to probe it with a retransmission. -*/ -#define CI_CFG_TAIL_DROP_PROBE 1 + */ +#define CI_CFG_TAIL_DROP_PROBE 1 /* Dump users of TCP and UDP sockets to a log file. */ -#define CI_CFG_LOG_SOCKET_USERS 0 +#define CI_CFG_LOG_SOCKET_USERS 0 /* Include fake IPv6 support (0 - off, 1 - on) */ -#define CI_CFG_FAKE_IPV6 1 +#define CI_CFG_FAKE_IPV6 1 /* Include support for caching file descriptors at user-level. */ -#define CI_CFG_FD_CACHING 1 +#define CI_CFG_FD_CACHING 1 /* Active wild support */ -#define CI_CFG_TCP_SHARED_LOCAL_PORTS 1 +#define CI_CFG_TCP_SHARED_LOCAL_PORTS 1 /* Enable endpoint move. * It is used in: @@ -428,19 +429,19 @@ * - clustering (aka SO_REUSEPORT). * You probably want it to be turned on. */ -#define CI_CFG_ENDPOINT_MOVE 1 +#define CI_CFG_ENDPOINT_MOVE 1 /* Maintain statistics for listening sockets. At time of writing these are ** all gathered off the fast path, so there is no significant performance ** penalty for having them on. */ -#define CI_CFG_STATS_TCP_LISTEN 1 +#define CI_CFG_STATS_TCP_LISTEN 1 /* Maintain per-netif statistics for things like event-queue callbacks etc. ** At time of writing these are all gathered off the fast path, so there is ** no significant performance penalty for having them on. */ -#define CI_CFG_STATS_NETIF 1 +#define CI_CFG_STATS_NETIF 1 /* Per-netif statistics for spin rounds inside each operation. * It depends on CI_CFG_STATS_NETIF being on. */ @@ -453,31 +454,31 @@ /* Size of packet buffers. Must be 2048 or 4096. The larger value reduces * overhead when packets are large, but wastes memory when they aren't. */ -#define CI_CFG_PKT_BUF_SIZE 2048 +#define CI_CFG_PKT_BUF_SIZE 2048 /* Maximum number of retransmit for SYN-ACKs */ #define CI_CFG_TCP_SYNACK_RETRANS_MAX 10 /* Enable inspection of packets before delivery */ -#define CI_CFG_ZC_RECV_FILTER 1 +#define CI_CFG_ZC_RECV_FILTER 1 /* HACK: Limit the advertised MSS for TCP because our TCP path does not * currently cope with frames that don't fit in a single packet buffer. * This define really exists just to make it easy to find and remove this * hack. */ -#define CI_CFG_LIMIT_AMSS 1 -#define CI_CFG_LIMIT_SMSS 1 +#define CI_CFG_LIMIT_AMSS 1 +#define CI_CFG_LIMIT_SMSS 1 /* Max length of "name" of a stack. */ -#define CI_CFG_STACK_NAME_LEN 26 +#define CI_CFG_STACK_NAME_LEN 26 /* Max length of "name" of a cluster. */ -#define CI_CFG_CLUSTER_NAME_LEN (CI_CFG_STACK_NAME_LEN >> 1) +#define CI_CFG_CLUSTER_NAME_LEN (CI_CFG_STACK_NAME_LEN >> 1) /* Onload tcpdump support */ -#define CI_CFG_TCPDUMP 1 +#define CI_CFG_TCPDUMP 1 #if CI_CFG_TCPDUMP /* Dump queue length, should be 2^x, x <= 16 */ @@ -486,7 +487,7 @@ /* Support for reducing ACK rate at high throughput to improve efficiency */ -#define CI_CFG_DYNAMIC_ACK_RATE 1 +#define CI_CFG_DYNAMIC_ACK_RATE 1 /* Allocate packets in huge pages when possible * Ignored unless your kernel has CONFIG_HUGETLB_PAGE turned on (all the @@ -498,62 +499,62 @@ * To use huge pages, we should allocate exactly 2^10 pkts per set. * DO NOT CHANGE THIS VALUE if you have CI_CFG_PKTS_AS_HUGE_PAGES=1 */ #if CI_CFG_PKT_BUF_SIZE == 2048 -#define CI_CFG_PKTS_PER_SET_S 10u +#define CI_CFG_PKTS_PER_SET_S 10u #elif CI_CFG_PKT_BUF_SIZE == 4096 -#define CI_CFG_PKTS_PER_SET_S 9u +#define CI_CFG_PKTS_PER_SET_S 9u #else #error "Incorrect CI_CFG_PKT_BUF_SIZE value" #endif -#define PKTS_PER_SET (1u << CI_CFG_PKTS_PER_SET_S) -#define PKTS_PER_SET_M (PKTS_PER_SET - 1u) +#define PKTS_PER_SET (1u << CI_CFG_PKTS_PER_SET_S) +#define PKTS_PER_SET_M (PKTS_PER_SET - 1u) /* When all packet sets have less than this number of packets available to * use, we'll allocate more packet sets */ -#define CI_CFG_PKT_SET_LOW_WATER (PKTS_PER_SET / 2) +#define CI_CFG_PKT_SET_LOW_WATER (PKTS_PER_SET / 2) /* A packet set with this number of available packets is considered as good * as a completely-unused set. It allows for packet set reuse when there * are a few long-living TCP connections which use 1-10 packets from each * set. */ -#define CI_CFG_PKT_SET_HIGH_WATER (PKTS_PER_SET - PKTS_PER_SET / 32) +#define CI_CFG_PKT_SET_HIGH_WATER (PKTS_PER_SET - PKTS_PER_SET / 32) /* Whether to include code to transmit small packets via PIO */ -#define CI_CFG_PIO 1 -#define CI_CFG_MIN_PIO_BLOCK_ORDER 7 +#define CI_CFG_PIO 1 +#define CI_CFG_MIN_PIO_BLOCK_ORDER 7 /* Whether to include code to transmit packets via CTPIO */ -#define CI_CFG_CTPIO 1 +#define CI_CFG_CTPIO 1 /* How many epolls sets will have a ready list maintained by the stack */ #define CI_CFG_EPOLL1_SETS_PER_STACK 4 /* How many ready lists are maintained */ -#define CI_CFG_N_READY_LISTS CI_CFG_EPOLL1_SETS_PER_STACK +#define CI_CFG_N_READY_LISTS CI_CFG_EPOLL1_SETS_PER_STACK /* Do we need SO_TIMESTAMPING, WODA, ...? */ -#define CI_CFG_TIMESTAMPING 1 +#define CI_CFG_TIMESTAMPING 1 /* Enable native kernel BPF program functionality * (subject to kernel support see CI_HAVE_BPF_NATIVE). * Currently aarch64 doesn't support Onload BPF. */ #ifndef __aarch64__ -#define CI_CFG_WANT_BPF_NATIVE 1 +#define CI_CFG_WANT_BPF_NATIVE 1 #else -#define CI_CFG_WANT_BPF_NATIVE 0 +#define CI_CFG_WANT_BPF_NATIVE 0 #endif /* Most users want epoll2 and epoll3 modes */ -#define CI_CFG_EPOLL2 1 -#define CI_CFG_EPOLL3 1 +#define CI_CFG_EPOLL2 1 +#define CI_CFG_EPOLL3 1 /* Inject packets into kernel if they match hardware filters but do not * match software ones. See inject_kernel_gid module parameter. */ -#define CI_CFG_INJECT_PACKETS 1 +#define CI_CFG_INJECT_PACKETS 1 /* NIC reset, suspend and hot-plug support */ -#define CI_CFG_NIC_RESET_SUPPORT 1 +#define CI_CFG_NIC_RESET_SUPPORT 1 /* Handle incoming ICMP for Onloaded sockets */ -#define CI_CFG_HANDLE_ICMP 1 +#define CI_CFG_HANDLE_ICMP 1 /* Enable cooperation with the SmartNIC TCP reordering plugin */ #define CI_CFG_TCP_OFFLOAD_RECYCLER 0 @@ -575,7 +576,7 @@ #endif /* Enable cooperation with the SmartNIC TX CRC-offload plugin */ -#define CI_CFG_TX_CRC_OFFLOAD 0 +#define CI_CFG_TX_CRC_OFFLOAD 0 /* Do not use SmartNIC TX CRC-offload plugin for NVMeoTCP and instead calculate * PDU digests in Onload. Useful for testing of Onload CRC-offload logic. */ #define CI_CFG_NVME_LOCAL_CRC_MODE 0 @@ -593,7 +594,7 @@ * * * temporary disabled for linux-5.10 (ON-12686) */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,18,0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) #define CI_HAVE_BPF_NATIVE 1 #else #define CI_HAVE_BPF_NATIVE 0 @@ -609,9 +610,9 @@ /* Size of socket shared state buffer. Must be 1024 or 2048. Larger * value is needed if you enable too many CI_CFG_* options, such as * CI_CFG_TCP_SOCK_STATS. */ -#define CI_CFG_EP_BUF_SIZE 1024 +#define CI_CFG_EP_BUF_SIZE 1024 -#if CI_CFG_IPV6 && !CI_CFG_FAKE_IPV6 +#if CI_CFG_IPV6 && ! CI_CFG_FAKE_IPV6 #error "CI_CFG_FAKE_IPV6 should be enabled to support IPv6" #endif diff --git a/src/include/etherfabric/ef_vi.h b/src/include/etherfabric/ef_vi.h index 600940080..ed2f65515 100644 --- a/src/include/etherfabric/ef_vi.h +++ b/src/include/etherfabric/ef_vi.h @@ -2,7 +2,7 @@ /* X-SPDX-Copyright-Text: (c) Copyright 2007-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** \author Solarflare Communications, Inc. ** \brief Virtual Interface definitions for EtherFabric Virtual ** Interface HAL. @@ -25,44 +25,44 @@ * for compilers/platforms that don't provide them */ #if defined(__GNUC__) -# ifdef __KERNEL__ -# include -# include -# include -# include -# else -# include -# ifndef __STDC_FORMAT_MACROS -# define __STDC_FORMAT_MACROS -# endif -# include -# include -# include -# include -# include -# endif -# define EF_VI_ALIGN(x) __attribute__ ((aligned (x))) -# define ef_vi_inline static inline -# define ef_vi_pure __attribute__ ((pure)) -# define ef_vi_cold __attribute__ ((cold)) +#ifdef __KERNEL__ +#include +#include +#include +#include +#else +#include +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif +#include +#include +#include +#include +#include +#endif +#define EF_VI_ALIGN(x) __attribute__((aligned(x))) +#define ef_vi_inline static inline +#define ef_vi_pure __attribute__((pure)) +#define ef_vi_cold __attribute__((cold)) /* Expect noinline to be defined in kernel */ -# if defined(__KERNEL__) && defined (noinline) -# define ef_vi_noinline noinline -# else -# define ef_vi_noinline __attribute__ ((noinline)) -# endif +#if defined(__KERNEL__) && defined(noinline) +#define ef_vi_noinline noinline +#else +#define ef_vi_noinline __attribute__((noinline)) +#endif #else -# error Unknown compiler +#error Unknown compiler #endif /*! \brief Cache line sizes for alignment purposes */ #if defined(__powerpc64__) || defined(__powerpc__) -# define EF_VI_DMA_ALIGN 128 +#define EF_VI_DMA_ALIGN 128 #else -# define EF_VI_DMA_ALIGN 64 +#define EF_VI_DMA_ALIGN 64 #endif @@ -76,23 +76,23 @@ extern "C" { /*! \brief An ef_driver_handle is needed to allocate resources. */ #ifdef __KERNEL__ -typedef struct efhw_nic* ef_driver_handle; +typedef struct efhw_nic* ef_driver_handle; #else -typedef int ef_driver_handle; +typedef int ef_driver_handle; #endif /*! \brief A pointer to an event queue */ -typedef uint32_t ef_eventq_ptr; +typedef uint32_t ef_eventq_ptr; /*! \brief An address */ -typedef uint64_t ef_addr; +typedef uint64_t ef_addr; /*! \brief An address of an I/O area for a virtual interface */ -typedef char* ef_vi_ioaddr_t; +typedef char* ef_vi_ioaddr_t; /*! \brief Reference to a non-local address space */ typedef uint64_t ef_addrspace; -#define EF_ADDRSPACE_LOCAL ((uint64_t)-1) +#define EF_ADDRSPACE_LOCAL ((uint64_t) -1) struct ef_vi; struct ef_filter_spec; @@ -103,11 +103,11 @@ struct ef_filter_cookie; **********************************************************************/ /*! \brief The maximum number of queues per virtual interface */ -#define EF_VI_MAX_QS 32 +#define EF_VI_MAX_QS 32 /*! \brief The minimum size of array to pass when polling the event queue */ -#define EF_VI_EVENT_POLL_MIN_EVS 2 +#define EF_VI_EVENT_POLL_MIN_EVS 2 /*! \brief The maximum number of efct receive queues per virtual interface */ -#define EF_VI_MAX_EFCT_RXQS 8 +#define EF_VI_MAX_EFCT_RXQS 8 /********************************************************************** @@ -121,11 +121,11 @@ struct ef_filter_cookie; ** events. It is typically used to identify the buffer associated with ** the transfer. */ -typedef int ef_request_id; +typedef int ef_request_id; /*! \brief Mask to use with an ef_request_id. */ -#define EF_REQUEST_ID_MASK 0xffffffff +#define EF_REQUEST_ID_MASK 0xffffffff /*! \brief A token that identifies something that has happened. @@ -138,131 +138,131 @@ typedef int ef_request_id; typedef union { /** A generic event, to query the type when it is unknown */ struct { - unsigned type :16; + unsigned type : 16; } generic; /** An event of type EF_EVENT_TYPE_RX */ struct { - unsigned type :16; - unsigned q_id :8; - unsigned __reserved :8; - unsigned rq_id :32; - unsigned len :16; - unsigned flags :16; - unsigned ofs :16; /* AF_XDP specific */ + unsigned type : 16; + unsigned q_id : 8; + unsigned __reserved : 8; + unsigned rq_id : 32; + unsigned len : 16; + unsigned flags : 16; + unsigned ofs : 16; /* AF_XDP specific */ } rx; /** An event of type EF_EVENT_TYPE_RX_DISCARD */ - struct { /* This *must* have same initial layout as [rx]. */ - unsigned type :16; - unsigned q_id :8; - unsigned __reserved :8; - unsigned rq_id :32; - unsigned len :16; - unsigned flags :16; - unsigned subtype :16; + struct { /* This *must* have same initial layout as [rx]. */ + unsigned type : 16; + unsigned q_id : 8; + unsigned __reserved : 8; + unsigned rq_id : 32; + unsigned len : 16; + unsigned flags : 16; + unsigned subtype : 16; } rx_discard; /** An event of type EF_EVENT_TYPE_TX */ struct { - unsigned type :16; - unsigned q_id :8; - unsigned flags :8; - unsigned desc_id :16; + unsigned type : 16; + unsigned q_id : 8; + unsigned flags : 8; + unsigned desc_id : 16; } tx; /** An event of type EF_EVENT_TYPE_TX_ERROR */ - struct { /* This *must* have same layout as [tx]. */ - unsigned type :16; - unsigned q_id :8; - unsigned flags :8; - unsigned desc_id :16; - unsigned subtype :16; + struct { /* This *must* have same layout as [tx]. */ + unsigned type : 16; + unsigned q_id : 8; + unsigned flags : 8; + unsigned desc_id : 16; + unsigned subtype : 16; } tx_error; /** An event of type EF_EVENT_TYPE_TX_WITH_TIMESTAMP */ - struct { /* This *must* have same layout as [tx] up to [flags]. */ - unsigned type :16; - unsigned q_id :8; - unsigned flags :8; - unsigned rq_id :32; - unsigned ts_sec :32; - unsigned ts_nsec :32; + struct { /* This *must* have same layout as [tx] up to [flags]. */ + unsigned type : 16; + unsigned q_id : 8; + unsigned flags : 8; + unsigned rq_id : 32; + unsigned ts_sec : 32; + unsigned ts_nsec : 32; } tx_timestamp; /** An event of type EF_EVENT_TYPE_TX_ALT */ struct { - unsigned type :16; - unsigned q_id :8; - unsigned __reserved :8; - unsigned alt_id :16; + unsigned type : 16; + unsigned q_id : 8; + unsigned __reserved : 8; + unsigned alt_id : 16; } tx_alt; /** An event of type EF_EVENT_TYPE_RX_NO_DESC_TRUNC */ struct { - unsigned type :16; - unsigned q_id :8; + unsigned type : 16; + unsigned q_id : 8; } rx_no_desc_trunc; /** An event of type EF_EVENT_TYPE_RX_PACKED_STREAM */ struct { - unsigned type :16; - unsigned q_id :8; - unsigned __reserved :8; - unsigned flags :16; - unsigned n_pkts :16; - unsigned ps_flags :8; + unsigned type : 16; + unsigned q_id : 8; + unsigned __reserved : 8; + unsigned flags : 16; + unsigned n_pkts : 16; + unsigned ps_flags : 8; } rx_packed_stream; /** An event of type EF_EVENT_TYPE_SW */ struct { - unsigned type :16; - unsigned data; + unsigned type : 16; + unsigned data; } sw; /** An event of type EF_EVENT_TYPE_RX_MULTI */ struct { - unsigned type :16; - unsigned q_id :8; - unsigned __reserved :8; - unsigned n_descs :16; - unsigned flags :16; + unsigned type : 16; + unsigned q_id : 8; + unsigned __reserved : 8; + unsigned n_descs : 16; + unsigned flags : 16; } rx_multi; /** An event of type EF_EVENT_TYPE_RX_MULTI_DISCARD */ - struct { /* Common layout with rx_multi. */ - unsigned type :16; - unsigned q_id :8; - unsigned __reserved :8; - unsigned n_descs :16; - unsigned flags :16; - unsigned subtype :16; + struct { /* Common layout with rx_multi. */ + unsigned type : 16; + unsigned q_id : 8; + unsigned __reserved : 8; + unsigned n_descs : 16; + unsigned flags : 16; + unsigned subtype : 16; } rx_multi_discard; /** An event of type EF_EVENT_TYPE_RX_MULTI_PKTS */ struct { - unsigned type :16; - unsigned q_id :8; - unsigned __reserved :8; - unsigned n_pkts :16; - unsigned flags :16; + unsigned type : 16; + unsigned q_id : 8; + unsigned __reserved : 8; + unsigned n_pkts : 16; + unsigned flags : 16; } rx_multi_pkts; /** An event of type EF_EVENT_TYPE_MEMCPY */ struct { - unsigned type :16; - unsigned __reserved :16; - unsigned dma_id :32; + unsigned type : 16; + unsigned __reserved : 16; + unsigned dma_id : 32; } memcpy; /** An event of type EF_EVENT_TYPE_RX_REF */ struct { - unsigned type :16; - unsigned len :16; - unsigned pkt_id :32; - unsigned q_id :8; - unsigned user :24; + unsigned type : 16; + unsigned len : 16; + unsigned pkt_id : 32; + unsigned q_id : 8; + unsigned user : 24; } rx_ref; /** An event of type EF_EVENT_TYPE_RX_REF_DISCARD */ struct { - unsigned type :16; - unsigned len :16; - unsigned pkt_id :32; - unsigned q_id :8; - unsigned user :24; - unsigned flags :16; + unsigned type : 16; + unsigned len : 16; + unsigned pkt_id : 32; + unsigned q_id : 8; + unsigned user : 24; + unsigned flags : 16; } rx_ref_discard; } ef_event; /*! \brief Type of event in an ef_event e */ -#define EF_EVENT_TYPE(e) ((e).generic.type) +#define EF_EVENT_TYPE(e) ((e).generic.type) /*! \brief Possible types of events */ @@ -307,69 +307,67 @@ enum { /* Macros to look up various information per event */ /*! \brief Get the number of bytes received */ -#define EF_EVENT_RX_BYTES(e) ((e).rx.len) +#define EF_EVENT_RX_BYTES(e) ((e).rx.len) /*! \brief Get the RX descriptor ring ID used for a received packet. */ -#define EF_EVENT_RX_Q_ID(e) ((e).rx.q_id) +#define EF_EVENT_RX_Q_ID(e) ((e).rx.q_id) /*! \brief Get the dma_id used for a received packet. */ -#define EF_EVENT_RX_RQ_ID(e) ((e).rx.rq_id) +#define EF_EVENT_RX_RQ_ID(e) ((e).rx.rq_id) /*! \brief True if the CONTinuation Of Packet flag is set for an RX event */ -#define EF_EVENT_RX_CONT(e) ((e).rx.flags & EF_EVENT_FLAG_CONT) +#define EF_EVENT_RX_CONT(e) ((e).rx.flags & EF_EVENT_FLAG_CONT) /*! \brief True if the Start Of Packet flag is set for an RX event */ -#define EF_EVENT_RX_SOP(e) ((e).rx.flags & EF_EVENT_FLAG_SOP) +#define EF_EVENT_RX_SOP(e) ((e).rx.flags & EF_EVENT_FLAG_SOP) /*! \brief True if the next buffer flag is set for a packed stream event */ -#define EF_EVENT_RX_PS_NEXT_BUFFER(e) ((e).rx_packed_stream.flags & \ - EF_EVENT_FLAG_PS_NEXT_BUFFER) +#define EF_EVENT_RX_PS_NEXT_BUFFER(e) \ + ((e).rx_packed_stream.flags & EF_EVENT_FLAG_PS_NEXT_BUFFER) /*! \brief True if the iSCSIOK flag is set for an RX event */ -#define EF_EVENT_RX_ISCSI_OKAY(e) ((e).rx.flags & EF_EVENT_FLAG_ISCSI_OK) +#define EF_EVENT_RX_ISCSI_OKAY(e) ((e).rx.flags & EF_EVENT_FLAG_ISCSI_OK) /* RX-event flags. */ /*! \brief Start Of Packet flag. */ -#define EF_EVENT_FLAG_SOP 0x1 +#define EF_EVENT_FLAG_SOP 0x1 /*! \brief CONTinuation Of Packet flag. */ -#define EF_EVENT_FLAG_CONT 0x2 +#define EF_EVENT_FLAG_CONT 0x2 /*! \brief iSCSI CRC validated OK flag. */ -#define EF_EVENT_FLAG_ISCSI_OK 0x4 +#define EF_EVENT_FLAG_ISCSI_OK 0x4 /*! \brief Multicast flag. */ -#define EF_EVENT_FLAG_MULTICAST 0x8 +#define EF_EVENT_FLAG_MULTICAST 0x8 /*! \brief Packed Stream Next Buffer flag. */ -#define EF_EVENT_FLAG_PS_NEXT_BUFFER 0x10 +#define EF_EVENT_FLAG_PS_NEXT_BUFFER 0x10 /* TX-event flags. */ /*! \brief Packets were sent successfully with CTPIO. */ -#define EF_EVENT_FLAG_CTPIO 0x1 +#define EF_EVENT_FLAG_CTPIO 0x1 /*! \brief Get the TX descriptor ring ID used for a transmitted packet. */ -#define EF_EVENT_TX_Q_ID(e) ((e).tx.q_id) +#define EF_EVENT_TX_Q_ID(e) ((e).tx.q_id) /*! \brief True if packets were sent successfully with CTPIO. */ -#define EF_EVENT_TX_CTPIO(e) ((e).tx.flags & EF_EVENT_FLAG_CTPIO) +#define EF_EVENT_TX_CTPIO(e) ((e).tx.flags & EF_EVENT_FLAG_CTPIO) /*! \brief Get the RX descriptor ring ID used for a discarded packet. */ -#define EF_EVENT_RX_DISCARD_Q_ID(e) ((e).rx_discard.q_id) +#define EF_EVENT_RX_DISCARD_Q_ID(e) ((e).rx_discard.q_id) /*! \brief Get the dma_id used for a discarded packet. */ -#define EF_EVENT_RX_DISCARD_RQ_ID(e) ((e).rx_discard.rq_id) +#define EF_EVENT_RX_DISCARD_RQ_ID(e) ((e).rx_discard.rq_id) /*! \brief True if the CONTinuation Of Packet flag is set for an RX_DISCARD ** event */ -#define EF_EVENT_RX_DISCARD_CONT(e) ((e).rx_discard.flags&EF_EVENT_FLAG_CONT) +#define EF_EVENT_RX_DISCARD_CONT(e) ((e).rx_discard.flags & EF_EVENT_FLAG_CONT) /*! \brief True if the Start Of Packet flag is set for an RX_DISCARD event */ -#define EF_EVENT_RX_DISCARD_SOP(e) ((e).rx_discard.flags&EF_EVENT_FLAG_SOP) +#define EF_EVENT_RX_DISCARD_SOP(e) ((e).rx_discard.flags & EF_EVENT_FLAG_SOP) /*! \brief Get the reason for an EF_EVENT_TYPE_RX_DISCARD event */ -#define EF_EVENT_RX_DISCARD_TYPE(e) ((e).rx_discard.subtype) +#define EF_EVENT_RX_DISCARD_TYPE(e) ((e).rx_discard.subtype) /*! \brief Get the length of a discarded packet */ -#define EF_EVENT_RX_DISCARD_BYTES(e) ((e).rx_discard.len) +#define EF_EVENT_RX_DISCARD_BYTES(e) ((e).rx_discard.len) /*! \brief Get the RX descriptor ring ID used for a received packet. */ -#define EF_EVENT_RX_MULTI_Q_ID(e) ((e).rx_multi.q_id) +#define EF_EVENT_RX_MULTI_Q_ID(e) ((e).rx_multi.q_id) /*! \brief True if the CONTinuation Of Packet flag is set for an RX HT event */ -#define EF_EVENT_RX_MULTI_CONT(e) ((e).rx_multi.flags & \ - EF_EVENT_FLAG_CONT) +#define EF_EVENT_RX_MULTI_CONT(e) ((e).rx_multi.flags & EF_EVENT_FLAG_CONT) /*! \brief True if the Start Of Packet flag is set for an RX HT event */ -#define EF_EVENT_RX_MULTI_SOP(e) ((e).rx_multi.flags & \ - EF_EVENT_FLAG_SOP) +#define EF_EVENT_RX_MULTI_SOP(e) ((e).rx_multi.flags & EF_EVENT_FLAG_SOP) /*! \brief Get the reason for an EF_EVENT_TYPE_RX_MULTI_DISCARD event */ -#define EF_EVENT_RX_MULTI_DISCARD_TYPE(e) ((e).rx_multi_discard.subtype) +#define EF_EVENT_RX_MULTI_DISCARD_TYPE(e) ((e).rx_multi_discard.subtype) /*! \brief The reason for an EF_EVENT_TYPE_RX_DISCARD event */ enum { @@ -394,26 +392,26 @@ enum { }; /*! \brief Get the TX descriptor ring ID used for a transmit error */ -#define EF_EVENT_TX_ERROR_Q_ID(e) ((e).tx_error.q_id) +#define EF_EVENT_TX_ERROR_Q_ID(e) ((e).tx_error.q_id) /*! \brief Get the reason for a TX_ERROR event */ -#define EF_EVENT_TX_ERROR_TYPE(e) ((e).tx_error.subtype) +#define EF_EVENT_TX_ERROR_TYPE(e) ((e).tx_error.subtype) /*! \brief The adapter clock has previously been set in sync with the ** system */ -#define EF_VI_SYNC_FLAG_CLOCK_SET 1 +#define EF_VI_SYNC_FLAG_CLOCK_SET 1 /*! \brief The adapter clock is in sync with the external clock (PTP) */ -#define EF_VI_SYNC_FLAG_CLOCK_IN_SYNC 2 +#define EF_VI_SYNC_FLAG_CLOCK_IN_SYNC 2 /*! \brief Get the TX descriptor ring ID used for a timestamped packet. */ -#define EF_EVENT_TX_WITH_TIMESTAMP_Q_ID(e) ((e).tx_timestamp.q_id) +#define EF_EVENT_TX_WITH_TIMESTAMP_Q_ID(e) ((e).tx_timestamp.q_id) /*! \brief Get the dma_id used for a timestamped packet. */ -#define EF_EVENT_TX_WITH_TIMESTAMP_RQ_ID(e) ((e).tx_timestamp.rq_id) +#define EF_EVENT_TX_WITH_TIMESTAMP_RQ_ID(e) ((e).tx_timestamp.rq_id) /*! \brief Get the number of seconds from the timestamp of a transmitted ** packet */ -#define EF_EVENT_TX_WITH_TIMESTAMP_SEC(e) ((e).tx_timestamp.ts_sec) +#define EF_EVENT_TX_WITH_TIMESTAMP_SEC(e) ((e).tx_timestamp.ts_sec) /*! \brief Get the number of nanoseconds from the timestamp of a transmitted ** packet */ -#define EF_EVENT_TX_WITH_TIMESTAMP_NSEC(e) ((e).tx_timestamp.ts_nsec) +#define EF_EVENT_TX_WITH_TIMESTAMP_NSEC(e) ((e).tx_timestamp.ts_nsec) /*! \brief Mask for the sync flags in the timestamp of a transmitted packet */ #define EF_EVENT_TX_WITH_TIMESTAMP_SYNC_MASK \ (EF_VI_SYNC_FLAG_CLOCK_SET | EF_VI_SYNC_FLAG_CLOCK_IN_SYNC) @@ -422,9 +420,9 @@ enum { ((e).tx_timestamp.ts_nsec & EF_EVENT_TX_WITH_TIMESTAMP_SYNC_MASK) /*! \brief Get the TX descriptor ring ID used for a TX alternative packet. */ -#define EF_EVENT_TX_ALT_Q_ID(e) ((e).tx_alt.q_id) +#define EF_EVENT_TX_ALT_Q_ID(e) ((e).tx_alt.q_id) /*! \brief Get the TX alternative ID used for a TX alternative packet. */ -#define EF_EVENT_TX_ALT_ALT_ID(e) ((e).tx_alt.alt_id) +#define EF_EVENT_TX_ALT_ALT_ID(e) ((e).tx_alt.alt_id) /*! \brief The reason for an EF_EVENT_TYPE_TX_ERROR event */ enum { @@ -441,17 +439,17 @@ enum { /*! \brief Get the RX descriptor ring ID used for a received packet that ** was truncated due to a lack of descriptors. */ -#define EF_EVENT_RX_NO_DESC_TRUNC_Q_ID(e) ((e).rx_no_desc_trunc.q_id) +#define EF_EVENT_RX_NO_DESC_TRUNC_Q_ID(e) ((e).rx_no_desc_trunc.q_id) /*! \brief Mask for the data in a software generated event */ -#define EF_EVENT_SW_DATA_MASK 0xffff +#define EF_EVENT_SW_DATA_MASK 0xffff /*! \brief Get the data for an EF_EVENT_TYPE_SW event */ -#define EF_EVENT_SW_DATA(e) ((e).sw.data) +#define EF_EVENT_SW_DATA(e) ((e).sw.data) /*! \brief Output format for an ef_event */ -#define EF_EVENT_FMT "[ev:%x]" +#define EF_EVENT_FMT "[ev:%x]" /*! \brief Get the type of an event */ -#define EF_EVENT_PRI_ARG(e) (unsigned) (e).generic.type +#define EF_EVENT_PRI_ARG(e) (unsigned) (e).generic.type /* ***************** */ @@ -462,7 +460,7 @@ enum { */ typedef struct { /** base address of the buffer */ - ef_addr iov_base EF_VI_ALIGN(8); + ef_addr iov_base EF_VI_ALIGN(8); /** length of the buffer */ unsigned iov_len; } ef_iovec; @@ -475,7 +473,7 @@ typedef struct { */ typedef struct { /** base address of the buffer */ - ef_addr iov_base EF_VI_ALIGN(8); + ef_addr iov_base EF_VI_ALIGN(8); /** length of the buffer */ unsigned iov_len; uint32_t flags; /* EF_RIOV_FLAG_* */ @@ -502,75 +500,75 @@ typedef struct { /*! \brief Flags that can be requested when allocating an ef_vi */ enum ef_vi_flags { /** Default setting */ - EF_VI_FLAGS_DEFAULT = 0x0, + EF_VI_FLAGS_DEFAULT = 0x0, /** Receive iSCSI header digest enable: hardware verifies header digest ** (CRC) when packet is iSCSI */ - EF_VI_ISCSI_RX_HDIG = 0x2, + EF_VI_ISCSI_RX_HDIG = 0x2, /** Transmit iSCSI header digest enable: hardware calculates and inserts ** header digest (CRC) when packet is iSCSI */ - EF_VI_ISCSI_TX_HDIG = 0x4, + EF_VI_ISCSI_TX_HDIG = 0x4, /** Receive iSCSI data digest enable: hardware verifies data digest (CRC) ** when packet is iSCSI */ - EF_VI_ISCSI_RX_DDIG = 0x8, + EF_VI_ISCSI_RX_DDIG = 0x8, /** Transmit iSCSI data digest enable: hardware calculates and inserts ** data digest (CRC) when packet is iSCSI */ - EF_VI_ISCSI_TX_DDIG = 0x10, + EF_VI_ISCSI_TX_DDIG = 0x10, /** Use physically addressed TX descriptor ring */ - EF_VI_TX_PHYS_ADDR = 0x20, + EF_VI_TX_PHYS_ADDR = 0x20, /** Use physically addressed RX descriptor ring */ - EF_VI_RX_PHYS_ADDR = 0x40, + EF_VI_RX_PHYS_ADDR = 0x40, /** IP checksum calculation and replacement is disabled */ - EF_VI_TX_IP_CSUM_DIS = 0x80, + EF_VI_TX_IP_CSUM_DIS = 0x80, /** TCP/UDP checksum calculation and replacement is disabled */ - EF_VI_TX_TCPUDP_CSUM_DIS= 0x100, + EF_VI_TX_TCPUDP_CSUM_DIS = 0x100, /** Drop transmit packets that are not TCP or UDP */ - EF_VI_TX_TCPUDP_ONLY = 0x200, + EF_VI_TX_TCPUDP_ONLY = 0x200, /** Drop packets with a mismatched IP source address ** (5000 and 6000 series only) */ - EF_VI_TX_FILTER_IP = 0x400, /* Siena only */ + EF_VI_TX_FILTER_IP = 0x400, /* Siena only */ /** Drop packets with a mismatched MAC source address ** (5000 and 6000 series only) */ - EF_VI_TX_FILTER_MAC = 0x800, /* Siena only */ + EF_VI_TX_FILTER_MAC = 0x800, /* Siena only */ /** Set lowest bit of queue ID to 0 when matching within filter block ** (5000 and 6000 series only) */ - EF_VI_TX_FILTER_MASK_1 = 0x1000, /* Siena only */ + EF_VI_TX_FILTER_MASK_1 = 0x1000, /* Siena only */ /** Set lowest 2 bits of queue ID to 0 when matching within filter block ** (5000 and 6000 series only) */ - EF_VI_TX_FILTER_MASK_2 = 0x2000, /* Siena only */ + EF_VI_TX_FILTER_MASK_2 = 0x2000, /* Siena only */ /** Set lowest 3 bits of queue ID to 0 when matching within filter block ** (5000 and 6000 series only) */ - EF_VI_TX_FILTER_MASK_3 = (0x1000 | 0x2000), /* Siena only */ + EF_VI_TX_FILTER_MASK_3 = (0x1000 | 0x2000), /* Siena only */ /** Disable using TX descriptor push, so always use doorbell for transmit */ - EF_VI_TX_PUSH_DISABLE = 0x4000, + EF_VI_TX_PUSH_DISABLE = 0x4000, /** Always use TX descriptor push, so never use doorbell for transmit ** (7000 series and newer) */ - EF_VI_TX_PUSH_ALWAYS = 0x8000, /* ef10 only */ + EF_VI_TX_PUSH_ALWAYS = 0x8000, /* ef10 only */ /** Add timestamp to received packets (7000 series and newer) */ - EF_VI_RX_TIMESTAMPS = 0x10000, /* ef10 only */ + EF_VI_RX_TIMESTAMPS = 0x10000, /* ef10 only */ /** Add timestamp to transmitted packets (7000 series and newer), ** cannot be combined with EF_VI_TX_ALT */ - EF_VI_TX_TIMESTAMPS = 0x20000, /* ef10 only */ + EF_VI_TX_TIMESTAMPS = 0x20000, /* ef10 only */ /* Flag EF_VI_TX_LOOPBACK (0x40000) has been removed. Similar * functionality can now be achieved with protection domain and * EF_PD_MCAST_LOOP flag. * Flag value 0x40000 is not to be reused. */ /** Enable packed stream mode for received packets (7000 series and newer) */ - EF_VI_RX_PACKED_STREAM = 0x80000, /* ef10 only */ + EF_VI_RX_PACKED_STREAM = 0x80000, /* ef10 only */ /** Use 64KiB packed stream buffers, instead of the 1024KiB default (7000 * series and newer) */ - EF_VI_RX_PS_BUF_SIZE_64K = 0x100000, /* ef10 only */ + EF_VI_RX_PS_BUF_SIZE_64K = 0x100000, /* ef10 only */ /** Enable RX event merging mode for received packets; ** see ef_vi_receive_unbundle() and ef_vi_receive_get_bytes() for more ** details on using RX event merging mode */ - EF_VI_RX_EVENT_MERGE = 0x200000, /* ef10 only */ + EF_VI_RX_EVENT_MERGE = 0x200000, /* ef10 only */ /** Enable the "TX alternatives" feature (8000 series and newer), ** cannot be combined with EF_VI_TX_TIMESTAMPS */ - EF_VI_TX_ALT = 0x400000, + EF_VI_TX_ALT = 0x400000, /** Controls whether the hardware event timer is enabled (8000 series and ** newer) */ EF_VI_ENABLE_EV_TIMER = 0x800000, /** Enable the "cut-through PIO" feature (X2000 series and newer). */ - EF_VI_TX_CTPIO = 0x1000000, + EF_VI_TX_CTPIO = 0x1000000, /** When using CTPIO, prevent poisoned frames from reaching the wire (X2000 ** series and newer). */ EF_VI_TX_CTPIO_NO_POISON = 0x2000000, @@ -595,7 +593,7 @@ enum ef_vi_flags { /*! \brief Flags that can be returned when an ef_vi has been allocated */ enum ef_vi_out_flags { /** Clock sync status */ - EF_VI_OUT_CLOCK_SYNC_STATUS = 0x1, /* ef10 only */ + EF_VI_OUT_CLOCK_SYNC_STATUS = 0x1, /* ef10 only */ }; @@ -605,15 +603,15 @@ enum ef_vi_out_flags { */ enum ef_vi_rx_discard_err_flags { /** TCP or UDP checksum error */ - EF_VI_DISCARD_RX_L4_CSUM_ERR = 0x1, + EF_VI_DISCARD_RX_L4_CSUM_ERR = 0x1, /** IP checksum error */ - EF_VI_DISCARD_RX_L3_CSUM_ERR = 0x2, + EF_VI_DISCARD_RX_L3_CSUM_ERR = 0x2, /** Ethernet FCS error */ - EF_VI_DISCARD_RX_ETH_FCS_ERR = 0x4, + EF_VI_DISCARD_RX_ETH_FCS_ERR = 0x4, /** Ethernet frame length error */ - EF_VI_DISCARD_RX_ETH_LEN_ERR = 0x8, + EF_VI_DISCARD_RX_ETH_LEN_ERR = 0x8, /** To be discard in software (includes frame length error) */ - EF_VI_DISCARD_RX_TOBE_DISC = 0x10, /* Siena only */ + EF_VI_DISCARD_RX_TOBE_DISC = 0x10, /* Siena only */ /* Inner TCP or UDP checksum error */ EF_VI_DISCARD_RX_INNER_L4_CSUM_ERR = 0x20, /* Inner IP checksum error */ @@ -625,8 +623,8 @@ enum ef_vi_rx_discard_err_flags { /*! \brief Timestamp formats supported by various cards. */ enum ef_timestamp_format { - TS_FORMAT_SECONDS_27FRACTION = 0, - TS_FORMAT_SECONDS_QTR_NANOSECONDS = 1 + TS_FORMAT_SECONDS_27FRACTION = 0, + TS_FORMAT_SECONDS_QTR_NANOSECONDS = 1 }; @@ -646,6 +644,8 @@ enum ef_vi_arch { EF_VI_ARCH_EFCT, /** Arbitrary NICs using AF_XDP */ EF_VI_ARCH_AF_XDP, + /** swXtch.io specific implemenation */ + EF_VI_ARCH_SWXTCH, }; /*! \brief State of TX descriptor ring @@ -654,17 +654,17 @@ enum ef_vi_arch { */ typedef struct { /** Previous slot that has been handled */ - uint32_t previous; + uint32_t previous; /** Descriptors added to the ring */ - uint32_t added; + uint32_t added; /** Descriptors removed from the ring */ - uint32_t removed; + uint32_t removed; /** Bytes added to the cut-through FIFO */ - uint32_t ct_added; + uint32_t ct_added; /** Bytes removed from the cut-through FIFO */ - uint32_t ct_removed; + uint32_t ct_removed; /** Timestamp in nanoseconds */ - uint32_t ts_nsec; + uint32_t ts_nsec; } ef_vi_txq_state; /*! \brief State of efct receive queue @@ -695,19 +695,19 @@ typedef struct { */ typedef struct { /** Descriptors posted to the nic */ - uint32_t posted; + uint32_t posted; /** Descriptors added to the ring */ - uint32_t added; + uint32_t added; /** Descriptors removed from the ring */ - uint32_t removed; + uint32_t removed; /** Packets received as part of a jumbo (7000-series only) */ - uint32_t in_jumbo; /* ef10 only */ + uint32_t in_jumbo; /* ef10 only */ /** Bytes received as part of a jumbo (7000-series only) */ - uint32_t bytes_acc; /* ef10 only */ + uint32_t bytes_acc; /* ef10 only */ /** Last descriptor index completed (7000-series only) */ - uint16_t last_desc_i; /* ef10 only */ + uint16_t last_desc_i; /* ef10 only */ /** Credit for packed stream handling (7000-series only) */ - uint16_t rx_ps_credit_avail; /* ef10 only */ + uint16_t rx_ps_credit_avail; /* ef10 only */ ef_vi_efct_rxq_ptr rxq_ptr[EF_VI_MAX_EFCT_RXQS]; /* efct only */ } ef_vi_rxq_state; @@ -719,19 +719,19 @@ typedef struct { /** Event queue pointer */ ef_eventq_ptr evq_ptr; /** For internal use only */ - int32_t evq_clear_stride; + int32_t evq_clear_stride; /** Timestamp (major part) */ - uint32_t sync_timestamp_major; + uint32_t sync_timestamp_major; /** Timestamp (minor part) */ - uint32_t sync_timestamp_minor; + uint32_t sync_timestamp_minor; /** Smallest possible seconds value for given sync_timestamp_major */ - uint32_t sync_timestamp_minimum; + uint32_t sync_timestamp_minimum; /** Timestamp synchronized with adapter */ - uint32_t sync_timestamp_synchronised; /* with adapter */ + uint32_t sync_timestamp_synchronised; /* with adapter */ /** Unsolicited credit sequence */ - uint32_t unsol_credit_seq; + uint32_t unsol_credit_seq; /** Time synchronization flags */ - uint32_t sync_flags; + uint32_t sync_flags; } ef_eventq_state; /*! \brief TX descriptor ring @@ -740,13 +740,13 @@ typedef struct { */ typedef struct { /** Mask for indexes within ring, to wrap around */ - uint32_t mask; + uint32_t mask; /** Maximum space in the cut-through FIFO, reduced to account for header */ - uint32_t ct_fifo_bytes; + uint32_t ct_fifo_bytes; /** Pointer to descriptors */ - void* descriptors; + void* descriptors; /** Pointer to IDs */ - uint32_t* ids; + uint32_t* ids; } ef_vi_txq; /*! \brief RX descriptor ring @@ -755,11 +755,11 @@ typedef struct { */ typedef struct { /** Mask for indexes within ring, to wrap around */ - uint32_t mask; + uint32_t mask; /** Pointer to descriptors */ - void* descriptors; + void* descriptors; /** Pointer to IDs */ - uint32_t* ids; + uint32_t* ids; } ef_vi_rxq; typedef int ef_vi_efct_superbuf_refresh_t(struct ef_vi*, int); @@ -817,13 +817,13 @@ typedef struct { */ struct ef_vi_nic_type { /** Architecture of the NIC */ - unsigned char arch; + unsigned char arch; /** Variant of the NIC */ - char variant; + char variant; /** Revision of the NIC */ - unsigned char revision; + unsigned char revision; /** Flags indicating hardware features */ - unsigned char nic_flags; + unsigned char nic_flags; }; /*! \brief Per-packet overhead information @@ -884,153 +884,149 @@ struct ef_vi_tx_extra { */ typedef struct ef_vi { /** True if the virtual interface has been initialized */ - unsigned inited; + unsigned inited; /** The resource ID of the virtual interface */ - unsigned vi_resource_id; + unsigned vi_resource_id; /** The instance ID of the virtual interface */ - unsigned vi_i; + unsigned vi_i; /** NIC-global ID of this virtual interface, or -1 */ - unsigned abs_idx; + unsigned abs_idx; /** fd used for original initialisation */ - ef_driver_handle dh; + ef_driver_handle dh; /** The length of a receive buffer */ - unsigned rx_buffer_len; + unsigned rx_buffer_len; /** The length of the prefix at the start of a received packet */ - unsigned rx_prefix_len; + unsigned rx_prefix_len; /** efct: The last call to transmit_ctpio didn't have space; remember this * for the call to ctpio_fallback */ - uint8_t last_ctpio_failed; + uint8_t last_ctpio_failed; /** The mask to select which errors cause a discard event */ - uint64_t rx_discard_mask; + uint64_t rx_discard_mask; /** The timestamp correction (ticks) for received packets */ - int rx_ts_correction; + int rx_ts_correction; /** The offset to packet length in receive buffer */ - unsigned rx_pkt_len_offset; + unsigned rx_pkt_len_offset; /** The mask of packet length in receive buffer */ - unsigned rx_pkt_len_mask; + unsigned rx_pkt_len_mask; /** The timestamp correction (ns) for transmitted packets */ - int tx_ts_correction_ns; + int tx_ts_correction_ns; /** The timestamp format used by the hardware */ - enum ef_timestamp_format ts_format; + enum ef_timestamp_format ts_format; /** Pointer to virtual interface memory */ - char* vi_mem_mmap_ptr; + char* vi_mem_mmap_ptr; /** Length of virtual interface memory */ - int vi_mem_mmap_bytes; + int vi_mem_mmap_bytes; /** Pointer to virtual interface I/O region */ - char* vi_io_mmap_ptr; + char* vi_io_mmap_ptr; /** Length of virtual interface I/O region */ - int vi_io_mmap_bytes; + int vi_io_mmap_bytes; /** Pointer to CTPIO region */ - char* vi_ctpio_mmap_ptr; + char* vi_ctpio_mmap_ptr; /** Controls rate of writes into CTPIO aperture */ - uint32_t vi_ctpio_wb_ticks; + uint32_t vi_ctpio_wb_ticks; /** Length of region allocated at ep_state */ - int ep_state_bytes; + int ep_state_bytes; /** True if the virtual interface is in a cluster */ - int vi_clustered; + int vi_clustered; /** True if packed stream mode is enabled for the virtual interface */ - int vi_is_packed_stream; + int vi_is_packed_stream; /** True if no special mode is enabled for the virtual interface */ - int vi_is_normal; + int vi_is_normal; /** The packed stream buffer size for the virtual interface */ - unsigned vi_ps_buf_size; + unsigned vi_ps_buf_size; /** I/O address for the virtual interface */ - ef_vi_ioaddr_t io; + ef_vi_ioaddr_t io; /** Programmed I/O region linked to the virtual interface */ - struct ef_pio* linked_pio; + struct ef_pio* linked_pio; /** Base of the event queue for the virtual interface */ - char* evq_base; + char* evq_base; /** Mask for offsets within the event queue for the virtual interface */ - unsigned evq_mask; + unsigned evq_mask; /** True if the event queue uses phase bits */ - int evq_phase_bits; + int evq_phase_bits; /** The timer quantum for the virtual interface, in nanoseconds */ - unsigned timer_quantum_ns; + unsigned timer_quantum_ns; /** The threshold at which to switch from using TX descriptor push to ** using a doorbell */ - unsigned tx_push_thresh; + unsigned tx_push_thresh; /** The TX descriptor ring for the virtual interface */ - ef_vi_txq vi_txq; + ef_vi_txq vi_txq; /** The RX descriptor ring for the virtual interface */ - ef_vi_rxq vi_rxq; + ef_vi_rxq vi_rxq; /** The state of the virtual interface */ - ef_vi_state* ep_state; + ef_vi_state* ep_state; /** The flags for the virtual interface */ - enum ef_vi_flags vi_flags; + enum ef_vi_flags vi_flags; /** Flags returned when the virtual interface is allocated */ - enum ef_vi_out_flags vi_out_flags; + enum ef_vi_out_flags vi_out_flags; /** Statistics for the virtual interface */ - ef_vi_stats* vi_stats; + ef_vi_stats* vi_stats; /** Virtual queues for the virtual interface */ - struct ef_vi* vi_qs[EF_VI_MAX_QS]; + struct ef_vi* vi_qs[EF_VI_MAX_QS]; /** Number of virtual queues for the virtual interface */ - int vi_qs_n; + int vi_qs_n; /** Id of queue a pending PFTF packet belongs to */ - uint8_t future_qid; + uint8_t future_qid; /** Attached rxqs for efct VIs (NB: not necessarily in rxq order) */ - ef_vi_efct_rxq efct_rxq[EF_VI_MAX_EFCT_RXQS]; + ef_vi_efct_rxq efct_rxq[EF_VI_MAX_EFCT_RXQS]; /** efct kernel/userspace shared queue area. */ struct efab_efct_rxq_uk_shm_base* efct_shm; /** 1 + highest allowed index of a used element in efct_rxq */ - int max_efct_rxq; + int max_efct_rxq; /** Number of TX alternatives for the virtual interface */ - unsigned tx_alt_num; + unsigned tx_alt_num; /** Mapping from end-user TX alternative IDs to hardware IDs */ - unsigned* tx_alt_id2hw; + unsigned* tx_alt_id2hw; /** Mapping from hardware TX alternative IDs to end-user IDs */ - unsigned* tx_alt_hw2id; + unsigned* tx_alt_hw2id; /** The type of NIC hosting the virtual interface */ - struct ef_vi_nic_type nic_type; + struct ef_vi_nic_type nic_type; /** Callback to invoke AF_XDP send operations */ - int (*xdp_kick)(struct ef_vi*); - void* xdp_kick_context; + int (*xdp_kick)(struct ef_vi*); + void* xdp_kick_context; /*! \brief Driver-dependent operations. */ /* Doxygen comment above is the detailed description of ef_vi::ops */ struct ops { /** Transmit a packet from a single packet buffer */ - int (*transmit)(struct ef_vi*, ef_addr base, int len, - ef_request_id); + int (*transmit)(struct ef_vi*, ef_addr base, int len, ef_request_id); /** Transmit a packet from a vector of packet buffers */ - int (*transmitv)(struct ef_vi*, const ef_iovec*, int iov_len, - ef_request_id); + int (*transmitv)( + struct ef_vi*, const ef_iovec*, int iov_len, ef_request_id); /** Initialize TX descriptors on the TX descriptor ring, for a vector ** of packet buffers */ - int (*transmitv_init)(struct ef_vi*, const ef_iovec*, - int iov_len, ef_request_id); + int (*transmitv_init)( + struct ef_vi*, const ef_iovec*, int iov_len, ef_request_id); /** Submit newly initialized TX descriptors to the NIC */ void (*transmit_push)(struct ef_vi*); /** Transmit a packet already resident in Programmed I/O */ - int (*transmit_pio)(struct ef_vi*, int offset, int len, - ef_request_id dma_id); + int (*transmit_pio)( + struct ef_vi*, int offset, int len, ef_request_id dma_id); /** Copy a packet to Programmed I/O region and transmit it */ int (*transmit_copy_pio)(struct ef_vi*, int pio_offset, - const void* src_buf, int len, - ef_request_id dma_id); + const void* src_buf, int len, ef_request_id dma_id); /** Warm Programmed I/O transmit path for subsequent transmit */ void (*transmit_pio_warm)(struct ef_vi*); /** Copy a packet to Programmed I/O region and warm transmit path */ - void (*transmit_copy_pio_warm)(struct ef_vi*, int pio_offset, - const void* src_buf, int len); + void (*transmit_copy_pio_warm)( + struct ef_vi*, int pio_offset, const void* src_buf, int len); /** Transmit a vector of packet buffers using CTPIO */ void (*transmitv_ctpio)(struct ef_vi*, size_t frame_len, - const struct iovec* iov, - int iov_len, unsigned threshold); + const struct iovec* iov, int iov_len, unsigned threshold); /** Transmit a vector of packet buffers using CTPIO and copy to fallback */ void (*transmitv_ctpio_copy)(struct ef_vi*, size_t frame_len, - const struct iovec* iov, - int iov_len, unsigned threshold, - void* fallback); + const struct iovec* iov, int iov_len, unsigned threshold, + void* fallback); /** Select a TX alternative as the destination for future sends */ int (*transmit_alt_select)(struct ef_vi*, unsigned alt_id); /** Select the "normal" data path as the destination for future sends */ @@ -1060,17 +1056,20 @@ typedef struct ef_vi { /** Initialize TX descriptors on the TX descriptor ring, using * extra options and (optionally) remote buffers */ int (*transmitv_init_extra)(struct ef_vi*, const struct ef_vi_tx_extra*, - const ef_remote_iovec*, int iov_len, - ef_request_id); + const ef_remote_iovec*, int iov_len, ef_request_id); ssize_t (*transmit_memcpy)(struct ef_vi*, const ef_remote_iovec* dst_iov, - int dst_iov_len, const ef_remote_iovec* src_iov, - int src_iov_len); + int dst_iov_len, const ef_remote_iovec* src_iov, int src_iov_len); int (*transmit_memcpy_sync)(struct ef_vi*, ef_request_id dma_id); - int (*transmit_ctpio_fallback)(struct ef_vi* vi, ef_addr dma_addr, - size_t len, ef_request_id dma_id); + int (*transmit_ctpio_fallback)( + struct ef_vi* vi, ef_addr dma_addr, size_t len, ef_request_id dma_id); int (*transmitv_ctpio_fallback)(struct ef_vi* vi, const ef_iovec* dma_iov, - int dma_iov_len, ef_request_id dma_id); - } ops; /**< Driver-dependent operations. */ + int dma_iov_len, ef_request_id dma_id); + void (*rx_fill_pkt)( + struct ef_vi* vi, char* output_buffer, int event_index); + void (*tx_fill_pkt)( + struct ef_vi* vi, const char* output_buffer, const unsigned len); + int (*refill_rx)(struct ef_vi* vi); + } ops; /**< Driver-dependent operations. */ /* Doxygen comment above is documentation for the ops member of ef_vi */ /*! \brief Driver-dependent operations not corresponding to a public API. */ @@ -1079,7 +1078,7 @@ typedef struct ef_vi { struct internal_ops { /** A filter has just been added to the given VI */ int (*post_filter_add)(struct ef_vi*, const struct ef_filter_spec* fs, - const struct ef_filter_cookie* cookie, int rxq); + const struct ef_filter_cookie* cookie, int rxq); } internal_ops; } ef_vi; @@ -1112,7 +1111,6 @@ ef_vi_inline enum ef_vi_flags ef_vi_flags(ef_vi* vi) } - /*! \brief Return the instance ID of the virtual interface ** ** \param vi The virtual interface to query. @@ -1300,7 +1298,7 @@ ef_vi_inline int ef_vi_receive_capacity(const ef_vi* vi) ** packets. This function only writes a few bytes into host memory, and is ** very fast. */ -#define ef_vi_receive_init(vi, addr, dma_id) \ +#define ef_vi_receive_init(vi, addr, dma_id) \ (vi)->ops.receive_init((vi), (addr), (dma_id)) @@ -1375,11 +1373,12 @@ extern int ef_vi_receive_post(ef_vi* vi, ef_addr addr, ef_request_id dma_id); ** If the virtual interface does not have RX timestamps enabled, the ** behavior of this function is undefined. ** -** Note: ef_eventq_poll(), efct_vi_rx_future_poll() and efct_vi_rx_future_peek() +** Note: ef_eventq_poll(), efct_vi_rx_future_poll() and +*efct_vi_rx_future_peek() ** invalidate timestamps retrieved by previous poll function. */ -extern int ef_vi_receive_get_timestamp(ef_vi* vi, const void* pkt, - ef_timespec* ts_out); +extern int ef_vi_receive_get_timestamp( + ef_vi* vi, const void* pkt, ef_timespec* ts_out); /*! \brief Retrieve the UTC timestamp associated with a received packet, @@ -1429,10 +1428,8 @@ extern int ef_vi_receive_get_timestamp(ef_vi* vi, const void* pkt, ** In case of error the timestamp result (*ts_out) is set to zero, and a ** non-zero error code is returned (see Return value above). */ -extern int -ef_vi_receive_get_timestamp_with_sync_flags(ef_vi* vi, const void* pkt, - ef_timespec* ts_out, - unsigned* flags_out); +extern int ef_vi_receive_get_timestamp_with_sync_flags( + ef_vi* vi, const void* pkt, ef_timespec* ts_out, unsigned* flags_out); /*! \brief Retrieve the number of bytes in a received packet in RX event @@ -1452,8 +1449,8 @@ ef_vi_receive_get_timestamp_with_sync_flags(ef_vi* vi, const void* pkt, ** ** \return 0 on success, or a negative error code */ -extern int -ef_vi_receive_get_bytes(ef_vi* vi, const void* pkt, uint16_t* bytes_out); +extern int ef_vi_receive_get_bytes( + ef_vi* vi, const void* pkt, uint16_t* bytes_out); /*! \brief Retrieve the user_mark and user_flag fields in a received packet @@ -1472,9 +1469,8 @@ ef_vi_receive_get_bytes(ef_vi* vi, const void* pkt, uint16_t* bytes_out); ** ** \return 0 on success, or a negative error code */ -extern int -ef_vi_receive_get_user_data(ef_vi* vi, const void* pkt, uint32_t* user_mark, - uint8_t* user_flag); +extern int ef_vi_receive_get_user_data( + ef_vi* vi, const void* pkt, uint32_t* user_mark, uint8_t* user_flag); /*! \brief Maximum number of receive completions per receive event. */ @@ -1511,8 +1507,8 @@ ef_vi_receive_get_user_data(ef_vi* vi, const void* pkt, uint32_t* user_mark, ** must be called, or the length examined in the packet prefix (see ** ef_vi_receive_query_layout()). */ -extern int ef_vi_receive_unbundle(ef_vi* ep, const ef_event* event, - ef_request_id* ids); +extern int ef_vi_receive_unbundle( + ef_vi* ep, const ef_event* event, ef_request_id* ids); extern ef_request_id ef_vi_rxq_next_desc_id(ef_vi* vi); @@ -1527,8 +1523,7 @@ extern ef_request_id ef_vi_rxq_next_desc_id(ef_vi* vi); ** ** Set which errors cause an EF_EVENT_TYPE_RX_DISCARD event */ -extern int -ef_vi_receive_set_discards(ef_vi* vi, unsigned discard_err_flags); +extern int ef_vi_receive_set_discards(ef_vi* vi, unsigned discard_err_flags); /********************************************************************** @@ -1644,8 +1639,8 @@ ef_vi_inline int ef_vi_transmit_capacity(const ef_vi* vi) ** address) must contain the packet to transmit. This function only writes ** a few bytes into host memory, and is very fast. */ -extern int ef_vi_transmit_init(ef_vi* vi, ef_addr addr, int bytes, - ef_request_id dma_id); +extern int ef_vi_transmit_init( + ef_vi* vi, ef_addr addr, int bytes, ef_request_id dma_id); /*! \brief Initialize TX descriptors on the TX descriptor ring, for a @@ -1678,7 +1673,7 @@ extern int ef_vi_transmit_init(ef_vi* vi, ef_addr addr, int bytes, ** updated, but the buffers containing constant data are re-used ** - this minimizes the amount of data written between transmits. */ -#define ef_vi_transmitv_init(vi, iov, iov_len, dma_id) \ +#define ef_vi_transmitv_init(vi, iov, iov_len, dma_id) \ (vi)->ops.transmitv_init((vi), (iov), (iov_len), (dma_id)) /*! \brief Initialize TX descriptors on the TX descriptor ring, with @@ -1694,9 +1689,8 @@ extern int ef_vi_transmit_init(ef_vi* vi, ef_addr addr, int bytes, ** space (EF_ADDRSPACE_LOCAL) and cannot be translated; this is a ** limitation of the hardware. It can, however, be zero length. */ -#define ef_vi_transmitv_init_extra(vi, extra, iov, iov_len, dma_id) \ - (vi)->ops.transmitv_init_extra((vi), (extra), (iov), \ - (iov_len), (dma_id)) +#define ef_vi_transmitv_init_extra(vi, extra, iov, iov_len, dma_id) \ + (vi)->ops.transmitv_init_extra((vi), (extra), (iov), (iov_len), (dma_id)) /*! \brief Submit newly initialized TX descriptors to the NIC ** @@ -1739,7 +1733,7 @@ extern int ef_vi_transmit_init(ef_vi* vi, ef_addr addr, int bytes, ** functions separately, but unless there is a batch of packets to ** transmit, calling this function is often the right thing to do. */ -#define ef_vi_transmit(vi, base, len, dma_id) \ +#define ef_vi_transmit(vi, base, len, dma_id) \ (vi)->ops.transmit((vi), (base), (len), (dma_id)) @@ -1777,7 +1771,7 @@ extern int ef_vi_transmit_init(ef_vi* vi, ef_addr addr, int bytes, ** updated, but the buffers containing constant data are re-used ** - this minimizes the amount of data written between transmits. */ -#define ef_vi_transmitv(vi, iov, iov_len, dma_id) \ +#define ef_vi_transmitv(vi, iov, iov_len, dma_id) \ (vi)->ops.transmitv((vi), (iov), (iov_len), (dma_id)) @@ -1811,7 +1805,7 @@ extern int ef_vi_transmit_init(ef_vi* vi, ef_addr addr, int bytes, ** - maximum size ** - avoiding reuse until transmission is complete. */ -#define ef_vi_transmit_pio(vi, offset, len, dma_id) \ +#define ef_vi_transmit_pio(vi, offset, len, dma_id) \ (vi)->ops.transmit_pio((vi), (offset), (len), (dma_id)) @@ -1854,9 +1848,8 @@ extern int ef_vi_transmit_init(ef_vi* vi, ef_addr addr, int bytes, ** - maximum size ** - avoiding reuse until transmission is complete. */ -#define ef_vi_transmit_copy_pio(vi, pio_offset, src_buf, len, dma_id) \ - (vi)->ops.transmit_copy_pio((vi), (pio_offset), (src_buf), \ - (len), (dma_id)) +#define ef_vi_transmit_copy_pio(vi, pio_offset, src_buf, len, dma_id) \ + (vi)->ops.transmit_copy_pio((vi), (pio_offset), (src_buf), (len), (dma_id)) /*! \brief Warm Programmed I/O transmit path for subsequent transmit @@ -1876,8 +1869,7 @@ extern int ef_vi_transmit_init(ef_vi* vi, ef_addr addr, int bytes, ** it follows a different code path. See ef_vi_transmit_copy_pio_warm() for ** a warming function designed to warm for ef_vi_transmit_copy_pio(). */ -#define ef_vi_transmit_pio_warm(vi) \ - (vi)->ops.transmit_pio_warm((vi)) +#define ef_vi_transmit_pio_warm(vi) (vi)->ops.transmit_pio_warm((vi)) /*! \brief Copy a packet to Programmed I/O region and warm transmit path @@ -1910,7 +1902,7 @@ extern int ef_vi_transmit_init(ef_vi* vi, ef_addr addr, int bytes, ** it follows a different code path. See ef_vi_transmit_pio_warm() for ** a warming function designed to warm for ef_vi_transmit_pio(). */ -#define ef_vi_transmit_copy_pio_warm(vi, pio_offset, src_buf, len) \ +#define ef_vi_transmit_copy_pio_warm(vi, pio_offset, src_buf, len) \ (vi)->ops.transmit_copy_pio_warm((vi), (pio_offset), (src_buf), (len)) @@ -1937,7 +1929,7 @@ extern void ef_vi_transmit_init_undo(ef_vi* vi); /*! \brief Maximum number of transmit completions per transmit event. */ -#define EF_VI_TRANSMIT_BATCH 64 +#define EF_VI_TRANSMIT_BATCH 64 /*! \brief Unbundle an event of type of type EF_EVENT_TYPE_TX or @@ -1966,8 +1958,8 @@ extern void ef_vi_transmit_init_undo(ef_vi* vi); ** re-used (for example as a packet buffer for a descriptor on the TX ring, ** or on the RX ring). */ -extern int ef_vi_transmit_unbundle(ef_vi* ep, const ef_event* event, - ef_request_id* ids); +extern int ef_vi_transmit_unbundle( + ef_vi* ep, const ef_event* event, ef_request_id* ids); /*! \brief Return the number of TX alternatives allocated for a virtual @@ -1982,7 +1974,6 @@ extern int ef_vi_transmit_unbundle(ef_vi* ep, const ef_event* event, extern unsigned ef_vi_transmit_alt_num_ids(ef_vi* vi); - /*! \brief Select a TX alternative as the destination for future sends ** ** \param vi The virtual interface associated with the TX alternative. @@ -1998,7 +1989,7 @@ extern unsigned ef_vi_transmit_alt_num_ids(ef_vi* vi); ** - if the TX alternative is in the GO state, the packet is immediately ** transmitted. */ -#define ef_vi_transmit_alt_select(vi, alt_id) \ +#define ef_vi_transmit_alt_select(vi, alt_id) \ (vi)->ops.transmit_alt_select((vi), (alt_id)) @@ -2012,7 +2003,7 @@ extern unsigned ef_vi_transmit_alt_num_ids(ef_vi* vi); ** immediately, in the normal way. This call undoes the effect of ** ef_vi_transmit_alt_select(). */ -#define ef_vi_transmit_alt_select_normal(vi) \ +#define ef_vi_transmit_alt_select_normal(vi) \ (vi)->ops.transmit_alt_select_default((vi)) @@ -2024,7 +2015,7 @@ extern unsigned ef_vi_transmit_alt_num_ids(ef_vi* vi); ** Transitions a TX alternative to the STOP state. Packets that are sent ** to a TX alternative in the STOP state are buffered on the adapter. */ -#define ef_vi_transmit_alt_stop(vi, alt_id) \ +#define ef_vi_transmit_alt_stop(vi, alt_id) \ (vi)->ops.transmit_alt_stop((vi), (alt_id)) @@ -2042,7 +2033,7 @@ extern unsigned ef_vi_transmit_alt_num_ids(ef_vi* vi); ** returned to the application. The application should normally wait until ** all packets have been sent before transitioning to a different state. */ -#define ef_vi_transmit_alt_go(vi, alt_id) \ +#define ef_vi_transmit_alt_go(vi, alt_id) \ (vi)->ops.transmit_alt_go((vi), (alt_id)) @@ -2063,7 +2054,7 @@ extern unsigned ef_vi_transmit_alt_num_ids(ef_vi* vi); ** Memory for the TX alternative remains allocated, and is not freed until ** the virtual interface is freed. */ -#define ef_vi_transmit_alt_discard(vi, alt_id) \ +#define ef_vi_transmit_alt_discard(vi, alt_id) \ (vi)->ops.transmit_alt_discard((vi), (alt_id)) @@ -2078,8 +2069,8 @@ extern unsigned ef_vi_transmit_alt_num_ids(ef_vi* vi); ** This function returns parameters which are needed by the ** ef_vi_transmit_alt_usage() function below. */ -extern int ef_vi_transmit_alt_query_overhead(ef_vi* vi, - struct ef_vi_transmit_alt_overhead* params); +extern int ef_vi_transmit_alt_query_overhead( + ef_vi* vi, struct ef_vi_transmit_alt_overhead* params); /*! \brief Calculate a packet's buffer usage @@ -2111,9 +2102,8 @@ extern int ef_vi_transmit_alt_query_overhead(ef_vi* vi, ** is provided instead to allow applications to calculate their buffer ** usage accurately. */ -ef_vi_inline ef_vi_pure uint32_t -ef_vi_transmit_alt_usage(const struct ef_vi_transmit_alt_overhead* params, - uint32_t pkt_len) +ef_vi_inline ef_vi_pure uint32_t ef_vi_transmit_alt_usage( + const struct ef_vi_transmit_alt_overhead* params, uint32_t pkt_len) { pkt_len += params->pre_round; pkt_len &= params->mask; @@ -2182,10 +2172,10 @@ extern void ef_vi_set_tx_push_threshold(ef_vi* vi, unsigned threshold); ** The buffers referenced by @p frame_iov can be reused as soon as this ** call returns. */ -#define ef_vi_transmitv_ctpio(vi, frame_len, frame_iov, \ - frame_iov_len, ct_threshold) \ - (vi)->ops.transmitv_ctpio((vi), (frame_len), (frame_iov), \ - (frame_iov_len), (ct_threshold)) +#define ef_vi_transmitv_ctpio( \ + vi, frame_len, frame_iov, frame_iov_len, ct_threshold) \ + (vi)->ops.transmitv_ctpio( \ + (vi), (frame_len), (frame_iov), (frame_iov_len), (ct_threshold)) /*! \brief Transmit a packet using CTPIO from an array of buffers, @@ -2204,12 +2194,10 @@ extern void ef_vi_set_tx_push_threshold(ef_vi* vi, unsigned threshold); ** ef_vi_transmit_ctpio_fallback. This is an optimisation to avoid the need ** to copy the data in a separate step. */ -#define ef_vi_transmitv_ctpio_copy(vi, frame_len, frame_iov, \ - frame_iov_len, ct_threshold, \ - fallback) \ +#define ef_vi_transmitv_ctpio_copy( \ + vi, frame_len, frame_iov, frame_iov_len, ct_threshold, fallback) \ (vi)->ops.transmitv_ctpio_copy((vi), (frame_len), (frame_iov), \ - (frame_iov_len), (ct_threshold), \ - (fallback)) + (frame_iov_len), (ct_threshold), (fallback)) /*! \brief Transmit a packet using CTPIO ** @@ -2244,9 +2232,8 @@ extern void ef_vi_set_tx_push_threshold(ef_vi* vi, unsigned threshold); ** ** The buffer @p frame_buf can be reused as soon as this call returns. */ -ef_vi_inline void -ef_vi_transmit_ctpio(ef_vi* vi, const void* frame_buf, size_t frame_len, - unsigned ct_threshold) +ef_vi_inline void ef_vi_transmit_ctpio( + ef_vi* vi, const void* frame_buf, size_t frame_len, unsigned ct_threshold) { struct iovec iov = { (void*) frame_buf, frame_len }; ef_vi_transmitv_ctpio(vi, frame_len, &iov, 1, ct_threshold); @@ -2299,7 +2286,7 @@ ef_vi_transmit_ctpio(ef_vi* vi, const void* frame_buf, size_t frame_len, ** for all packet sizes. For use with ef_vi_transmit_ctpio() and ** ef_vi_transmitv_ctpio(). */ -#define EF_VI_CTPIO_CT_THRESHOLD_SNF 0xffff +#define EF_VI_CTPIO_CT_THRESHOLD_SNF 0xffff /*! \brief Request the NIC copy data from one place to another @@ -2331,8 +2318,8 @@ ef_vi_transmit_ctpio(ef_vi* vi, const void* frame_buf, size_t frame_len, ** -EOPNOTSUPP the VI was created without EF_VI_ALLOW_MEMCPY. */ #define ef_vi_transmit_memcpy(vi, dst_iov, dst_iov_len, src_iov, src_iov_len) \ - (vi)->ops.transmit_memcpy((vi), (dst_iov), (dst_iov_len), (src_iov), \ - (src_iov_len)) + (vi)->ops.transmit_memcpy( \ + (vi), (dst_iov), (dst_iov_len), (src_iov), (src_iov_len)) /*! \brief Require a completion event for all preceding ef_vi_transmit_memcpy() @@ -2351,7 +2338,7 @@ ef_vi_transmit_ctpio(ef_vi* vi, const void* frame_buf, size_t frame_len, ** -EAGAIN if the descriptor ring is full. ** -EOPNOTSUPP the VI was created without EF_VI_ALLOW_MEMCPY. */ -#define ef_vi_transmit_memcpy_sync(vi, dma_id) \ +#define ef_vi_transmit_memcpy_sync(vi, dma_id) \ (vi)->ops.transmit_memcpy_sync((vi), (dma_id)) @@ -2384,6 +2371,7 @@ extern int ef_eventq_check_event_phase_bit(const ef_vi* vi, int look_ahead); extern int efxdp_ef_eventq_check_event(const ef_vi* vi, int look_ahead); extern int efct_ef_eventq_check_event(const ef_vi* vi); +extern int efswxtch_ef_eventq_check_event(const ef_vi* vi); /*! \brief Returns true if ef_eventq_poll() will return event(s) @@ -2394,8 +2382,7 @@ extern int efct_ef_eventq_check_event(const ef_vi* vi); ** ** Returns true if ef_eventq_poll() will return event(s). */ -ef_vi_inline int -ef_eventq_has_event(const ef_vi* vi) +ef_vi_inline int ef_eventq_has_event(const ef_vi* vi) { if( ! vi->evq_phase_bits ) return ef_eventq_check_event(vi, 0); @@ -2403,6 +2390,10 @@ ef_eventq_has_event(const ef_vi* vi) switch( vi->nic_type.arch ) { case EF_VI_ARCH_AF_XDP: return efxdp_ef_eventq_check_event(vi, 0); +#ifndef __KERNEL__ + case EF_VI_ARCH_SWXTCH: + return efswxtch_ef_eventq_check_event(vi); +#endif case EF_VI_ARCH_EFCT: return efct_ef_eventq_check_event(vi); default: @@ -2428,8 +2419,7 @@ ef_eventq_has_event(const ef_vi* vi) ** This function returns quickly. It is useful for an application to ** determine whether it is falling behind in its event processing. */ -ef_vi_inline int -ef_eventq_has_many_events(const ef_vi* evq, int n_events) +ef_vi_inline int ef_eventq_has_many_events(const ef_vi* evq, int n_events) { if( evq->evq_phase_bits ) return ef_eventq_check_event_phase_bit(evq, n_events); @@ -2467,10 +2457,19 @@ ef_eventq_has_many_events(const ef_vi* evq, int n_events) ** This function returns immediately, even if there are no outstanding ** events. The array might not be full on return. */ -#define ef_eventq_poll(evq, evs, evs_len) \ +#define ef_eventq_poll(evq, evs, evs_len) \ (evq)->ops.eventq_poll((evq), (evs), (evs_len)) +/* These are hacky functions used to onload packets into mbufs */ +#define ef_fill_rx_data(evq, pkt, index) \ + (evq)->ops.rx_fill_pkt((evq), (pkt), (index)) +#define ef_fill_tx_data(evq, pkt, len) \ + (evq)->ops.tx_fill_pkt((evq), (pkt), (len)) + +/* This will refil the rx fill queue to the level indicated by the netif */ +#define ef_resume_rx(evq) (evq)->ops.refill_rx((evq)) + /*! \brief Returns the capacity of an event queue ** ** \param vi The event queue to query. @@ -2525,11 +2524,11 @@ enum ef_vi_layout_type { /*! \brief Layout of the data that is delivered into receive buffers. */ typedef struct { /** The type of layout */ - enum ef_vi_layout_type evle_type; + enum ef_vi_layout_type evle_type; /** Offset to the data */ - int evle_offset; + int evle_offset; /** Description of the layout */ - const char* evle_description; + const char* evle_description; } ef_vi_layout_entry; @@ -2553,10 +2552,8 @@ typedef struct { ** The first entry is always of type EF_VI_LAYOUT_FRAME, and the offset is ** the same as the value returned by ef_vi_receive_prefix_len(). */ -extern int -ef_vi_receive_query_layout(ef_vi* vi, - const ef_vi_layout_entry**const layout_out, - int* layout_len_out); +extern int ef_vi_receive_query_layout(ef_vi* vi, + const ef_vi_layout_entry** const layout_out, int* layout_len_out); /*! \brief Retrieve the discard flags associated with a received packet. @@ -2574,11 +2571,10 @@ ef_vi_receive_query_layout(ef_vi* vi, ** ** Read CLASS field from the prefix of received packet and return discard flags ** about packet length, CRC or checksum validation errors. -** +** */ -extern int -ef_vi_receive_get_discard_flags(ef_vi* vi, const void* pkt, - unsigned* discard_flags); +extern int ef_vi_receive_get_discard_flags( + ef_vi* vi, const void* pkt, unsigned* discard_flags); #ifdef __cplusplus } diff --git a/src/lib/ciul/ef_vi_internal.h b/src/lib/ciul/ef_vi_internal.h index 5cd0d59e8..b5c008bc0 100644 --- a/src/lib/ciul/ef_vi_internal.h +++ b/src/lib/ciul/ef_vi_internal.h @@ -28,38 +28,44 @@ * Initialisation state. */ -#define EF_VI_INITED_NIC 0x1 -#define EF_VI_INITED_IO 0x2 -#define EF_VI_INITED_RXQ 0x4 -#define EF_VI_INITED_TXQ 0x8 -#define EF_VI_INITED_EVQ 0x10 -#define EF_VI_INITED_TIMER 0x20 -#define EF_VI_INITED_RX_TIMESTAMPING 0x40 -#define EF_VI_INITED_TX_TIMESTAMPING 0x80 -#define EF_VI_INITED_OUT_FLAGS 0x100 +#define EF_VI_INITED_NIC 0x1 +#define EF_VI_INITED_IO 0x2 +#define EF_VI_INITED_RXQ 0x4 +#define EF_VI_INITED_TXQ 0x8 +#define EF_VI_INITED_EVQ 0x10 +#define EF_VI_INITED_TIMER 0x20 +#define EF_VI_INITED_RX_TIMESTAMPING 0x40 +#define EF_VI_INITED_TX_TIMESTAMPING 0x80 +#define EF_VI_INITED_OUT_FLAGS 0x100 /********************************************************************** * Debugging. */ -#define __EF_VI_BUILD_ASSERT_NAME(_x) __EF_VI_BUILD_ASSERT_ILOATHECPP(_x) -#define __EF_VI_BUILD_ASSERT_ILOATHECPP(_x) __EF_VI_BUILD_ASSERT__##_x +#define __EF_VI_BUILD_ASSERT_NAME(_x) __EF_VI_BUILD_ASSERT_ILOATHECPP(_x) +#define __EF_VI_BUILD_ASSERT_ILOATHECPP(_x) __EF_VI_BUILD_ASSERT__##_x #define EF_VI_BUILD_ASSERT(e) \ - { __attribute__((__unused__)) \ - typedef char __EF_VI_BUILD_ASSERT_NAME(__LINE__)[(e) ? 1 : -1]; } + { \ + __attribute__((__unused__)) typedef char __EF_VI_BUILD_ASSERT_NAME( \ + __LINE__)[(e) ? 1 : -1]; \ + } #ifdef NDEBUG -# define EF_VI_ASSERT(x) do{}while(0) -# ifdef __KERNEL__ -# define EF_VI_BUG_ON(x) WARN_ON(x) -# else -# define EF_VI_BUG_ON(x) do{}while(0) -# endif +#define EF_VI_ASSERT(x) \ + do { \ + } while( 0 ) +#ifdef __KERNEL__ +#define EF_VI_BUG_ON(x) WARN_ON(x) #else -# define EF_VI_ASSERT(x) BUG_ON(!(x)) -# define EF_VI_BUG_ON(x) BUG_ON(x) +#define EF_VI_BUG_ON(x) \ + do { \ + } while( 0 ) +#endif +#else +#define EF_VI_ASSERT(x) BUG_ON(! (x)) +#define EF_VI_BUG_ON(x) BUG_ON(x) #endif @@ -68,101 +74,100 @@ */ #ifdef NDEBUG -# define EF_VI_DEBUG(x) +#define EF_VI_DEBUG(x) #else -# define EF_VI_DEBUG(x) x +#define EF_VI_DEBUG(x) x #endif -#define EF_VI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u)) -#define EF_VI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u)) -#define EF_VI_ALIGN_BACK(p, align) ((p) & ~((align)-1u)) -#define EF_VI_PTR_ALIGN_BACK(p, align) \ - ((char*)EF_VI_ALIGN_BACK(((intptr_t)(p)), ((intptr_t)(align)))) -#define EF_VI_IS_POW2(x) ((x) && ! ((x) & ((x) - 1))) +#define EF_VI_ROUND_UP(i, align) (((i) + (align) -1u) & ~((align) -1u)) +#define EF_VI_ALIGN_FWD(p, align) (((p) + (align) -1u) & ~((align) -1u)) +#define EF_VI_ALIGN_BACK(p, align) ((p) & ~((align) -1u)) +#define EF_VI_PTR_ALIGN_BACK(p, align) \ + ((char*) EF_VI_ALIGN_BACK(((intptr_t) (p)), ((intptr_t) (align)))) +#define EF_VI_IS_POW2(x) ((x) && ! ((x) & ((x) -1))) /* This macro must be defined to the same value as EFHW_NIC_PAGE_SIZE * in ci/efhw/common.h. Only defined numerically so that there be no * dependency on that header here */ -#define EF_VI_PAGE_SIZE 4096 -#define EF_VI_PAGE_SHIFT 12 +#define EF_VI_PAGE_SIZE 4096 +#define EF_VI_PAGE_SHIFT 12 -#define EF_VI_TX_TIMESTAMP_TS_NSEC_INVALID (1u<<30) +#define EF_VI_TX_TIMESTAMP_TS_NSEC_INVALID (1u << 30) -#define EF_VI_EV_SIZE 8 +#define EF_VI_EV_SIZE 8 -#define EF_VI_EVS_PER_CACHE_LINE (EF_VI_CACHE_LINE_SIZE / EF_VI_EV_SIZE) +#define EF_VI_EVS_PER_CACHE_LINE (EF_VI_CACHE_LINE_SIZE / EF_VI_EV_SIZE) /* required for CI_PAGE_SIZE and related things */ #include "ci/compat.h" /*---------------------------------------------------------------------------- * - * Helpers to turn bit shifts into dword shifts and check that the bit fields - * haven't overflown the dword etc. Aim is to preserve consistency with the + * Helpers to turn bit shifts into dword shifts and check that the bit fields + * haven't overflown the dword etc. Aim is to preserve consistency with the * autogenerated headers - once stable we could hard code. * *---------------------------------------------------------------------------*/ /* mask constructors */ -#define __EFVI_MASK(WIDTH,T) ((((T)1) << (WIDTH)) - 1) -#define __EFVI_MASK32(WIDTH) __EFVI_MASK((WIDTH),uint32_t) -#define __EFVI_MASK64(WIDTH) __EFVI_MASK((WIDTH),uint64_t) +#define __EFVI_MASK(WIDTH, T) ((((T) 1) << (WIDTH)) - 1) +#define __EFVI_MASK32(WIDTH) __EFVI_MASK((WIDTH), uint32_t) +#define __EFVI_MASK64(WIDTH) __EFVI_MASK((WIDTH), uint64_t) -#define __EFVI_MASKFIELD32(LBN, WIDTH) ((uint32_t) \ - (__EFVI_MASK32(WIDTH) << (LBN))) +#define __EFVI_MASKFIELD32(LBN, WIDTH) \ + ((uint32_t) (__EFVI_MASK32(WIDTH) << (LBN))) /* constructors for fields which span the first and second dwords */ #define __LW(LBN) (32 - LBN) -#define LOW(v, LBN, WIDTH) ((uint32_t) \ - (((v) & __EFVI_MASK64(__LW((LBN)))) << (LBN))) -#define HIGH(v, LBN, WIDTH) ((uint32_t)(((v) >> __LW((LBN))) & \ - __EFVI_MASK64((WIDTH - __LW((LBN)))))) +#define LOW(v, LBN, WIDTH) \ + ((uint32_t) (((v) &__EFVI_MASK64(__LW((LBN)))) << (LBN))) +#define HIGH(v, LBN, WIDTH) \ + ((uint32_t) (((v) >> __LW((LBN))) & __EFVI_MASK64((WIDTH - __LW((LBN)))))) /* constructors for fields within the second dword */ -#define __DW2(LBN) ((LBN) - 32) +#define __DW2(LBN) ((LBN) -32) /* constructors for fields which span the second and third dwords */ #define __LW2(LBN) (64 - LBN) -#define LOW2(v, LBN, WIDTH) ((uint32_t) \ - (((v) & __EFVI_MASK64(__LW2((LBN)))) << ((LBN) - 32))) -#define HIGH2(v, LBN, WIDTH) ((uint32_t) \ - (((v) >> __LW2((LBN))) & __EFVI_MASK64((WIDTH - __LW2((LBN)))))) +#define LOW2(v, LBN, WIDTH) \ + ((uint32_t) (((v) &__EFVI_MASK64(__LW2((LBN)))) << ((LBN) -32))) +#define HIGH2(v, LBN, WIDTH) \ + ((uint32_t) (((v) >> __LW2((LBN))) & __EFVI_MASK64((WIDTH - __LW2((LBN)))))) /* constructors for fields within the third dword */ -#define __DW3(LBN) ((LBN) - 64) +#define __DW3(LBN) ((LBN) -64) + - /* constructors for fields which span the third and fourth dwords */ #define __LW3(LBN) (96 - LBN) -#define LOW3(v, LBN, WIDTH) ((uint32_t) \ - (((v) & __EFVI_MASK64(__LW3((LBN)))) << ((LBN) - 64))) -#define HIGH3(v, LBN, WIDTH) ((unit32_t) \ - (((v) >> __LW3((LBN))) & __EFVI_MASK64((WIDTH - __LW3((LBN)))))) +#define LOW3(v, LBN, WIDTH) \ + ((uint32_t) (((v) &__EFVI_MASK64(__LW3((LBN)))) << ((LBN) -64))) +#define HIGH3(v, LBN, WIDTH) \ + ((unit32_t) (((v) >> __LW3((LBN))) & __EFVI_MASK64((WIDTH - __LW3((LBN)))))) /* constructors for fields within the fourth dword */ -#define __DW4(LBN) ((LBN) - 96) +#define __DW4(LBN) ((LBN) -96) /* checks that the autogenerated headers our consistent with our model */ -#define WIDTHCHCK(a, b) EF_VI_BUG_ON((a) != (b)) -#define RANGECHCK(v, WIDTH) \ - EF_VI_BUG_ON(((uint64_t)(v) & ~(__EFVI_MASK64((WIDTH)))) != 0) +#define WIDTHCHCK(a, b) EF_VI_BUG_ON((a) != (b)) +#define RANGECHCK(v, WIDTH) \ + EF_VI_BUG_ON(((uint64_t) (v) & ~(__EFVI_MASK64((WIDTH)))) != 0) /* fields within the first dword */ -#define DWCHCK(LBN, WIDTH) EF_VI_BUG_ON(((LBN) < 0) || (((LBN)+(WIDTH)) > 32)) +#define DWCHCK(LBN, WIDTH) \ + EF_VI_BUG_ON(((LBN) < 0) || (((LBN) + (WIDTH)) > 32)) /* fields which span the first and second dwords */ -#define LWCHK(LBN, WIDTH) EF_VI_BUG_ON(WIDTH < __LW(LBN)) +#define LWCHK(LBN, WIDTH) EF_VI_BUG_ON(WIDTH < __LW(LBN)) /********************************************************************** * Extracting bit fields. */ -#define QWORD_GET_U(field, val) \ - ((unsigned) CI_QWORD_FIELD(val, field)) +#define QWORD_GET_U(field, val) ((unsigned) CI_QWORD_FIELD(val, field)) -#define QWORD_TEST_BIT(field, val) \ - ( !!CI_QWORD_FIELD(val, field) ) +#define QWORD_TEST_BIT(field, val) (! ! CI_QWORD_FIELD(val, field)) /********************************************************************** @@ -170,10 +175,10 @@ */ /* The gap left after each packet in a packed stream buffer. */ -#define EF_VI_PS_PACKET_GAP 64 +#define EF_VI_PS_PACKET_GAP 64 /* Firmware aligns DMAs onto this boundary. */ -#define EF_VI_PS_ALIGNMENT 64 +#define EF_VI_PS_ALIGNMENT 64 /* The negative offset from the start of a packet's DMA to where we put the * ef_packed_stream_packet header. @@ -183,8 +188,7 @@ * cache line so that we only have to write into one cache line, and so * that we don't dirty the cache line that holds packet data. */ -#define EF_VI_PS_METADATA_OFFSET \ - (sizeof(ef_packed_stream_packet)) +#define EF_VI_PS_METADATA_OFFSET (sizeof(ef_packed_stream_packet)) /* The amount of space we leave at the start of each buffer before the * first DMA. Needs to be enough space for ef_packed_stream_packet, plus @@ -194,33 +198,33 @@ * Firmware requires this be a multiple of EF_VI_PS_ALIGNMENT, and also * important for it to be a multiple of EF_VI_DMA_ALIGN. */ -#define EF_VI_PS_DMA_START_OFFSET 256 +#define EF_VI_PS_DMA_START_OFFSET 256 /* Doxbox SF-112241-TC: One credit is consumed on crossing a 64KB boundary * in buffer space. */ -#define EF_VI_PS_SPACE_PER_CREDIT 0x10000 +#define EF_VI_PS_SPACE_PER_CREDIT 0x10000 /********************************************************************** * Custom descriptor for ef_vi_transmit_memcpy_sync() (of type EV_DRIVER) */ -#define EF_VI_EV_DRIVER_MEMCPY_SYNC_DMA_ID_LBN 0 -#define EF_VI_EV_DRIVER_MEMCPY_SYNC_DMA_ID_WIDTH 32 -#define EF_VI_EV_DRIVER_SUBTYPE_LBN 55 -#define EF_VI_EV_DRIVER_SUBTYPE_WIDTH 4 -#define EF_VI_EV_DRIVER_SUBTYPE_MEMCPY_SYNC 15 +#define EF_VI_EV_DRIVER_MEMCPY_SYNC_DMA_ID_LBN 0 +#define EF_VI_EV_DRIVER_MEMCPY_SYNC_DMA_ID_WIDTH 32 +#define EF_VI_EV_DRIVER_SUBTYPE_LBN 55 +#define EF_VI_EV_DRIVER_SUBTYPE_WIDTH 4 +#define EF_VI_EV_DRIVER_SUBTYPE_MEMCPY_SYNC 15 -/* ******************************************************************** +/* ******************************************************************** */ extern void ef10_vi_init(ef_vi*) EF_VI_HF; extern void ef10_ef_eventq_prime(ef_vi*); extern void ef10_ef_eventq_prime_bug35388_workaround(ef_vi*); -extern int ef10_ef_eventq_poll(ef_vi*, ef_event*, int evs_len); +extern int ef10_ef_eventq_poll(ef_vi*, ef_event*, int evs_len); extern void ef10_ef_eventq_timer_prime(ef_vi*, unsigned v); extern void ef10_ef_eventq_timer_run(ef_vi*, unsigned v); @@ -230,7 +234,7 @@ extern void ef10_ef_eventq_timer_zero(ef_vi*); extern void ef100_vi_init(ef_vi*) EF_VI_HF; extern void ef100_ef_eventq_prime(ef_vi*); -extern int ef100_ef_eventq_poll(ef_vi*, ef_event*, int evs_len); +extern int ef100_ef_eventq_poll(ef_vi*, ef_event*, int evs_len); extern void ef100_ef_eventq_timer_prime(ef_vi*, unsigned v); extern void ef100_ef_eventq_timer_run(ef_vi*, unsigned v); @@ -240,26 +244,25 @@ extern void ef100_ef_eventq_timer_zero(ef_vi*); extern void efxdp_vi_init(ef_vi*) EF_VI_HF; extern long efxdp_vi_mmap_bytes(ef_vi*); +extern void efswxtch_vi_init(ef_vi*) EF_VI_HF; + extern void efct_vi_init(ef_vi*) EF_VI_HF; -extern int efct_vi_mmap_init(ef_vi* vi, int rxq_capacity) EF_VI_HF; +extern int efct_vi_mmap_init(ef_vi* vi, int rxq_capacity) EF_VI_HF; extern void efct_vi_munmap(ef_vi* vi) EF_VI_HF; -extern int ef_pd_cluster_free(ef_pd*, ef_driver_handle); +extern int ef_pd_cluster_free(ef_pd*, ef_driver_handle); extern void ef_vi_packed_stream_update_credit(ef_vi* vi); extern void ef_vi_set_intf_ver(char* intf_ver, size_t len); enum ef_vi_capability; -extern int -__ef_vi_capabilities_get(ef_driver_handle handle, int ifindex, int pd_id, - ef_driver_handle pd_dh, enum ef_vi_capability cap, - unsigned long* value); -extern int -ef_pd_capabilities_get(ef_driver_handle handle, ef_pd* pd, - ef_driver_handle pd_dh, enum ef_vi_capability cap, - unsigned long* value); +extern int __ef_vi_capabilities_get(ef_driver_handle handle, int ifindex, + int pd_id, ef_driver_handle pd_dh, enum ef_vi_capability cap, + unsigned long* value); +extern int ef_pd_capabilities_get(ef_driver_handle handle, ef_pd* pd, + ef_driver_handle pd_dh, enum ef_vi_capability cap, unsigned long* value); extern unsigned ef_vi_evq_clear_stride(void); -#endif /* __CI_EF_VI_INTERNAL_H__ */ +#endif /* __CI_EF_VI_INTERNAL_H__ */ diff --git a/src/lib/ciul/efswxtch_vi.c b/src/lib/ciul/efswxtch_vi.c new file mode 100644 index 000000000..b20fa953e --- /dev/null +++ b/src/lib/ciul/efswxtch_vi.c @@ -0,0 +1,572 @@ +#include "ef_vi_internal.h" +#include "logging.h" +#include +#include +#include + +/* + The names of the rings must match what is in the swxtch primary process + */ +static const char *_TX_RING = "TX_RING"; +static const char *_TX_COMP_RING = "TX_COMP_RING"; +static const char *_RX_RING = "RX_RING"; +static const char *_RX_FILL_RING = "RX_FILL_RING"; +static const char *_RX_MBUF_POOL = "RX_MBUF_POOL"; +static const char *_RX_PENDING_RING = "RX_PENDING_RING"; + +typedef struct swxtch_ring_state { + uint32_t old_head; + uint32_t new_head; +} swxtch_ring_state; + +/* This type is a wrapper around DPDK rings that maintains state and allows + * splitting enqueue operations across function calls */ +typedef struct swxtch_ring { + struct rte_ring *r; + swxtch_ring_state states[16]; + uint16_t num_states; +} swxtch_ring; + + +typedef struct swxtch_rings { + struct rte_mempool *mempool; // basic mempool to pull mbufs from + swxtch_ring rx_fill_ring; // rx fill ring to fill indicate to + // that rx packets can be received + struct rte_ring *rx_ring; // ring containing rx packets + struct swxtch_ring tx_ring; // ring contain packets to send + struct rte_ring *tx_comp_ring; // ring containing packets that were sent + // in between polling and reading rx packets live here + struct rte_ring *rx_pending_ring; +} swxtch_rings; + +static swxtch_rings m_rings; + +// drain a ring on startup to free any stuck mbufs +void efswxtch_drain_ring(struct rte_ring *ring, int should_free) +{ + unsigned available = 0; + int count = 0; + int burst_size = 32; + void *bufs[burst_size]; + unsigned drained = 0; + + do { + count = rte_ring_dequeue_burst(ring, &bufs[0], burst_size, &available); + + if( should_free ) { + rte_pktmbuf_free_bulk((struct rte_mbuf **) bufs, count); + } + + drained += count; + } while( available != 0 ); +} + +void efswxtch_drain_rings(void) +{ + efswxtch_drain_ring(m_rings.rx_fill_ring.r, 0); + efswxtch_drain_ring(m_rings.tx_ring.r, 1); + efswxtch_drain_ring(m_rings.rx_ring, 1); + efswxtch_drain_ring(m_rings.tx_comp_ring, 1); + efswxtch_drain_ring(m_rings.rx_pending_ring, 1); +} + +int efswxtch_init_rings(void) +{ + m_rings.mempool = rte_mempool_lookup(_RX_MBUF_POOL); + m_rings.rx_fill_ring.r = rte_ring_lookup(_RX_FILL_RING); + m_rings.rx_ring = rte_ring_lookup(_RX_RING); + m_rings.tx_ring.r = rte_ring_lookup(_TX_RING); + m_rings.tx_comp_ring = rte_ring_lookup(_TX_COMP_RING); + m_rings.rx_pending_ring = rte_ring_lookup(_RX_PENDING_RING); + if( m_rings.mempool == NULL ) { + ef_log("NO MEMPOOL"); + return -1; + } + if( m_rings.tx_ring.r == NULL ) { + ef_log("NO TX RING"); + return -1; + } + + if( m_rings.tx_comp_ring == NULL ) { + ef_log("NO TX COMP RING"); + return -1; + } + if( m_rings.rx_fill_ring.r == NULL ) { + ef_log("NO FILL RING"); + return -1; + } + if( m_rings.rx_ring == NULL ) { + ef_log("NO RX RING"); + return -1; + } + if( m_rings.rx_pending_ring == NULL ) { + ef_log("NO RX Pending RING"); + return -1; + } + + efswxtch_drain_rings(); + + return 0; +} + +// Enqueues as many mbufs as it can. Returns false if not all are enqueued and +// will free any mbufs that aren't enqueued +static int efswxtch_ef_vi_safe_enqueue( + struct rte_ring *ring, struct rte_mbuf **mbufs, unsigned count) +{ + unsigned enq = rte_ring_enqueue_burst(ring, (void **) mbufs, count, NULL); + if( unlikely(enq != count) ) { + ef_log("Only enqueued %d:%d on ring: %s", enq, count, ring->name); + rte_pktmbuf_free_bulk((struct rte_mbuf **) &mbufs[enq], count - enq); + return 0; + } + + return 1; +} + +static __rte_always_inline void swxtch_ring_add_state( + swxtch_ring *ring, uint32_t prev, uint32_t new) +{ + if( likely(ring->r->prod.single) ) { + return; + } + + if( unlikely(ring->num_states == 0) ) { + ring->num_states++; + ring->states[0].new_head = new; + ring->states[0].old_head = prev; + return; + } + + swxtch_ring_state *state = &ring->states[ring->num_states - 1]; + if( likely(state->new_head == prev) ) { + // the new state to add is continugous with the previously added one. Just + // extend the range and move on. + state->new_head = new; + } else { + ring->num_states++; + state = &ring->states[ring->num_states - 1]; + state->new_head = new; + state->old_head = prev; + } +} + +static __rte_always_inline int swxtch_ring_enqueue( + swxtch_ring *ring, unsigned n) +{ + const uint32_t capacity = ring->r->capacity; + unsigned int max = n; + int success; + uint32_t free_entries; + uint32_t old_head, new_head; + + do { + /* Reset n to the initial burst count */ + n = max; + + old_head = ring->r->prod.head; + + /* add rmb barrier to avoid load/load reorder in weak + * memory model. It is noop on x86 + */ + rte_smp_rmb(); + + /* + * The subtraction is done between two unsigned 32bits value + * (the result is always modulo 32 bits even if we have + * *old_head > cons_tail). So 'free_entries' is always between 0 + * and capacity (which is < size). + */ + free_entries = (capacity + ring->r->cons.tail - old_head); + + /* check that we have enough room in ring */ + if( unlikely(n > free_entries) ) + n = 0; + + if( n == 0 ) + return 0; + + new_head = old_head + n; + if( likely(ring->r->prod.single) ) { + success = 1; + ring->r->prod.head = new_head; + } else { + new_head = old_head + n; + success = rte_atomic32_cmpset(&ring->r->prod.head, old_head, new_head); + } + + } while( unlikely(success == 0) ); + + swxtch_ring_add_state(ring, old_head, new_head); + return n; +} + +/* This function only works for single producers and 1 packet*/ +static __rte_always_inline void swxtch_enqueue_ptrs( + swxtch_ring *ring, void **obj_table) +{ + uint32_t head = ring->r->prod.head - 1; + uint32_t idx = head & ring->r->mask; + void **start = (void **) &ring->r[1]; + + start[idx] = obj_table[0]; +} + +static __rte_always_inline void swxtch_ring_confirm(swxtch_ring *ring) +{ + rte_smp_wmb(); + if( likely(ring->r->prod.single) ) { + ring->r->prod.tail = ring->r->prod.head; + return; + } + for( int i = 0; i < ring->num_states; ++i ) { + while( unlikely(ring->r->prod.tail != ring->states[i].old_head) ) { + rte_pause(); + } + ring->r->prod.tail = ring->states[i].new_head; + } + ring->num_states = 0; +} + + +static void efswxtch_ef_vi_tx_fill_pkt( + ef_vi *vi, const char *pkt, const unsigned len) +{ + struct rte_mbuf *mbufs[1]; + if( rte_mempool_get_bulk(m_rings.mempool, (void **) mbufs, 1) == 0 ) { + memcpy(rte_pktmbuf_mtod(mbufs[0], char *), pkt, len); + mbufs[0]->data_len = len; + if( swxtch_ring_enqueue(&m_rings.tx_ring, 1) == 0 ) { + ef_log("failed to enqueue tx packet."); + } + swxtch_enqueue_ptrs(&m_rings.tx_ring, (void **) mbufs); + } else { + ef_log("failed to fill tx packet. Out of buffers"); + } +} + +static int efswxtch_ef_vi_transmitv_init( + ef_vi *vi, const ef_iovec *iov, int iov_len, ef_request_id dma_id) +{ + ef_vi_txq *q = &vi->vi_txq; + ef_vi_txq_state *qs = &vi->ep_state->txq; + int i; + if( iov_len != 1 ) + return -EINVAL; /* Multiple buffers per packet not supported */ + + if( qs->added - qs->removed >= q->mask ) + return -EAGAIN; + + i = qs->added++ & q->mask; + EF_VI_BUG_ON(q->ids[i] != EF_REQUEST_ID_MASK); + q->ids[i] = dma_id; + return 0; +} + +static void efswxtch_ef_vi_transmit_push(ef_vi *vi) +{ + swxtch_ring_confirm(&m_rings.tx_ring); +} + +static int efswxtch_ef_vi_transmit( + ef_vi *vi, ef_addr base, int len, ef_request_id dma_id) +{ + ef_iovec iov = { base, len }; + int rc = efswxtch_ef_vi_transmitv_init(vi, &iov, 1, dma_id); + if( likely(rc == 0) ) { + wmb(); + efswxtch_ef_vi_transmit_push(vi); + } + return rc; +} + +static int efswxtch_ef_vi_transmitv( + ef_vi *vi, const ef_iovec *iov, int iov_len, ef_request_id dma_id) +{ + int rc = efswxtch_ef_vi_transmitv_init(vi, iov, iov_len, dma_id); + if( likely(rc == 0) ) { + wmb(); + efswxtch_ef_vi_transmit_push(vi); + } + return rc; +} + +static int efswxtch_ef_vi_transmit_pio( + ef_vi *vi, int offset, int len, ef_request_id dma_id) +{ + return -EOPNOTSUPP; +} + +static int efswxtch_ef_vi_transmit_copy_pio( + ef_vi *vi, int offset, const void *src_buf, int len, ef_request_id dma_id) +{ + return -EOPNOTSUPP; +} + +static void efswxtch_ef_vi_transmit_pio_warm(ef_vi *vi) +{ + /* PIO is unsupported so do nothing */ +} + +static void efswxtch_ef_vi_transmit_copy_pio_warm( + ef_vi *vi, int pio_offset, const void *src_buf, int len) +{ + /* PIO is unsupported so do nothing */ +} + +static void efswxtch_ef_vi_transmitv_ctpio(ef_vi *vi, size_t frame_len, + const struct iovec *iov, int iovcnt, unsigned threshold) +{ + /* CTPIO is unsupported so do nothing. Fallback will send the packet. */ +} + +static void efswxtch_ef_vi_transmitv_ctpio_copy(ef_vi *vi, size_t frame_len, + const struct iovec *iov, int iovcnt, unsigned threshold, void *fallback) +{ + // TODO copy to fallback +} + +static int efswxtch_ef_vi_transmit_ctpio_fallback( + ef_vi *vi, ef_addr dma_addr, size_t len, ef_request_id dma_id) +{ + EF_VI_ASSERT(vi->vi_flags & EF_VI_TX_CTPIO); + return efswxtch_ef_vi_transmit(vi, dma_addr, len, dma_id); +} + +static int efswxtch_ef_vi_transmitv_ctpio_fallback( + ef_vi *vi, const ef_iovec *dma_iov, int dma_iov_len, ef_request_id dma_id) +{ + EF_VI_ASSERT(vi->vi_flags & EF_VI_TX_CTPIO); + return efswxtch_ef_vi_transmitv(vi, dma_iov, dma_iov_len, dma_id); +} + +static int efswxtch_ef_vi_transmit_ctpio_fallback_not_supp( + ef_vi *vi, ef_addr dma_addr, size_t len, ef_request_id dma_id) +{ + return -EOPNOTSUPP; +} + +static int efswxtch_ef_vi_transmitv_ctpio_fallback_not_supp( + ef_vi *vi, const ef_iovec *dma_iov, int dma_iov_len, ef_request_id dma_id) +{ + return -EOPNOTSUPP; +} + +static int efswxtch_ef_vi_transmit_alt_select(ef_vi *vi, unsigned alt_id) +{ + return -EOPNOTSUPP; +} + +static int efswxtch_ef_vi_transmit_alt_select_normal(ef_vi *vi) +{ + return -EOPNOTSUPP; +} + +static int efswxtch_ef_vi_transmit_alt_stop(ef_vi *vi, unsigned alt_id) +{ + return -EOPNOTSUPP; +} + +static int efswxtch_ef_vi_transmit_alt_discard(ef_vi *vi, unsigned alt_id) +{ + return -EOPNOTSUPP; +} + +static int efswxtch_ef_vi_transmit_alt_go(ef_vi *vi, unsigned alt_id) +{ + return -EOPNOTSUPP; +} + +static ssize_t efswxtch_ef_vi_transmit_memcpy(struct ef_vi *vi, + const ef_remote_iovec *dst_iov, int dst_iov_len, + const ef_remote_iovec *src_iov, int src_iov_len) +{ + return -EOPNOTSUPP; +} + +static int efswxtch_ef_vi_transmit_memcpy_sync( + struct ef_vi *vi, ef_request_id dma_id) +{ + return -EOPNOTSUPP; +} + + +static int efswxtch_ef_vi_receive_init( + ef_vi *vi, ef_addr addr, ef_request_id dma_id) +{ + ef_vi_rxq *q = &vi->vi_rxq; + ef_vi_rxq_state *qs = &vi->ep_state->rxq; + int i; + if( unlikely(qs->added - qs->removed >= q->mask) ) + return -EAGAIN; + + i = qs->added++ & q->mask; + q->ids[i] = dma_id; + + if( unlikely(swxtch_ring_enqueue(&m_rings.rx_fill_ring, 1) == 0) ) { + ef_log("Unable to initialize the RX Fill ring"); + return -EAGAIN; + } + + return 0; +} + +static void efswxtch_ef_vi_receive_push(ef_vi *vi) +{ + swxtch_ring_confirm(&m_rings.rx_fill_ring); +} + + +static void efswxtch_ef_eventq_prime(ef_vi *vi) +{ + // TODO +} + +static void efswxtch_rx_fill_pkt(ef_vi *vi, char *pkt, int index) +{ + struct rte_mbuf *mbuf[1]; + if( unlikely(rte_ring_dequeue(m_rings.rx_pending_ring, (void **) &mbuf[0]) != + 0) ) { + ef_log("Nothing to fill in the rx pkts"); + return; + } + memcpy(pkt, rte_pktmbuf_mtod(mbuf[0], unsigned char *), + rte_pktmbuf_data_len(mbuf[0])); + rte_pktmbuf_free(mbuf[0]); +} + +static int efswxtch_ef_eventq_poll(ef_vi *vi, ef_event *evs, int evs_len) +{ + // n is the index of the current event we're filling in. consists of both RX + // events and TX completion events + int n = 0, count = 0; + unsigned available = 0, page_start = 0; + + ef_vi_rxq *rx_q = &vi->vi_rxq; + ef_vi_txq *tx_q = &vi->vi_txq; + ef_vi_rxq_state *rx_qs = &vi->ep_state->rxq; + ef_vi_txq_state *tx_qs = &vi->ep_state->txq; + + // tx_bufs can be large because we can confirm up EF_VI_TRANSMIT_BATCH per + // event. We can have at most 1 RX event per RX packet + struct rte_mbuf *rx_bufs[evs_len]; + struct rte_mbuf *tx_bufs[EF_VI_TRANSMIT_BATCH]; + + count = rte_ring_dequeue_burst( + m_rings.rx_ring, (void **) rx_bufs, evs_len, NULL); + for( int i = 0; i < count; i++ ) { + unsigned desc_i = rx_qs->removed & rx_q->mask; + evs[n].rx.type = EF_EVENT_TYPE_RX; + evs[n].rx.q_id = 0; + evs[n].rx.rq_id = rx_q->ids[desc_i]; // dma_id + ++rx_qs->removed; + + // mark the descriptor reusable + rx_q->ids[desc_i] = EF_REQUEST_ID_MASK; /* Debug only? */ + + evs[n].rx.flags = EF_EVENT_FLAG_SOP; + evs[n].rx.len = rte_pktmbuf_data_len(rx_bufs[i]); + evs[n].rx.ofs = 0; + ++n; + } + + efswxtch_ef_vi_safe_enqueue( + m_rings.rx_pending_ring, (struct rte_mbuf **) rx_bufs, count); + + if( n < evs_len ) { + available = 0; + do { + // TX can acknowledge upto EF_VI_TRANSMIT_BATCH at once using the last id + // for a single TYPE_TX event + count = rte_ring_dequeue_burst(m_rings.tx_comp_ring, (void **) tx_bufs, + EF_VI_TRANSMIT_BATCH, &available); + if( likely(count != 0) ) { + int desc_id = (tx_qs->removed + count + page_start) & tx_q->mask; + evs[n].tx.type = EF_EVENT_TYPE_TX; + evs[n].tx.desc_id = desc_id; + evs[n].tx.flags = 0; + evs[n].tx.q_id = 0; + ++n; + page_start += count; + + rte_pktmbuf_free_bulk(tx_bufs, count); + } + // don't waste our time confirming anything less than the transmit max? + } while( available > EF_VI_TRANSMIT_BATCH && n < evs_len ); + } + + return n; +} + +static void efswxtch_ef_eventq_timer_prime(ef_vi *vi, unsigned v) +{ + // TODO +} + +static void efswxtch_ef_eventq_timer_run(ef_vi *vi, unsigned v) +{ + // TODO +} + +static void efswxtch_ef_eventq_timer_clear(ef_vi *vi) +{ + // TODO +} + +static void efswxtch_ef_eventq_timer_zero(ef_vi *vi) +{ + // TODO +} + +int efswxtch_ef_eventq_check_event(const ef_vi *_vi) +{ + return rte_ring_count(m_rings.rx_ring) > 0 || + rte_ring_count(m_rings.tx_comp_ring) >= EF_VI_TRANSMIT_BATCH; +} + +void efswxtch_vi_init(ef_vi *vi) +{ + efswxtch_init_rings(); + vi->ops.transmit = efswxtch_ef_vi_transmit; + vi->ops.transmitv = efswxtch_ef_vi_transmitv; + vi->ops.transmitv_init = efswxtch_ef_vi_transmitv_init; + vi->ops.transmit_push = efswxtch_ef_vi_transmit_push; + vi->ops.transmit_pio = efswxtch_ef_vi_transmit_pio; + vi->ops.transmit_copy_pio = efswxtch_ef_vi_transmit_copy_pio; + vi->ops.transmit_pio_warm = efswxtch_ef_vi_transmit_pio_warm; + vi->ops.transmit_copy_pio_warm = efswxtch_ef_vi_transmit_copy_pio_warm; + vi->ops.transmitv_ctpio = efswxtch_ef_vi_transmitv_ctpio; + vi->ops.transmitv_ctpio_copy = efswxtch_ef_vi_transmitv_ctpio_copy; + vi->ops.transmit_alt_select = efswxtch_ef_vi_transmit_alt_select; + vi->ops.transmit_alt_select_default = + efswxtch_ef_vi_transmit_alt_select_normal; + vi->ops.transmit_alt_stop = efswxtch_ef_vi_transmit_alt_stop; + vi->ops.transmit_alt_go = efswxtch_ef_vi_transmit_alt_go; + vi->ops.transmit_alt_discard = efswxtch_ef_vi_transmit_alt_discard; + vi->ops.receive_init = efswxtch_ef_vi_receive_init; + vi->ops.receive_push = efswxtch_ef_vi_receive_push; + vi->ops.eventq_poll = efswxtch_ef_eventq_poll; + vi->ops.eventq_prime = efswxtch_ef_eventq_prime; + vi->ops.eventq_timer_prime = efswxtch_ef_eventq_timer_prime; + vi->ops.eventq_timer_run = efswxtch_ef_eventq_timer_run; + vi->ops.eventq_timer_clear = efswxtch_ef_eventq_timer_clear; + vi->ops.eventq_timer_zero = efswxtch_ef_eventq_timer_zero; + vi->ops.transmit_memcpy = efswxtch_ef_vi_transmit_memcpy; + vi->ops.transmit_memcpy_sync = efswxtch_ef_vi_transmit_memcpy_sync; + vi->ops.rx_fill_pkt = efswxtch_rx_fill_pkt; + vi->ops.tx_fill_pkt = efswxtch_ef_vi_tx_fill_pkt; + if( vi->vi_flags & EF_VI_TX_CTPIO ) { + vi->ops.transmit_ctpio_fallback = efswxtch_ef_vi_transmit_ctpio_fallback; + vi->ops.transmitv_ctpio_fallback = efswxtch_ef_vi_transmitv_ctpio_fallback; + } else { + vi->ops.transmit_ctpio_fallback = + efswxtch_ef_vi_transmit_ctpio_fallback_not_supp; + vi->ops.transmitv_ctpio_fallback = + efswxtch_ef_vi_transmitv_ctpio_fallback_not_supp; + } + + vi->rx_buffer_len = 2048; + vi->rx_prefix_len = 0; + vi->evq_phase_bits = 1; /* We set this flag for ef_eventq_has_event */ + vi->nic_type.arch = EF_VI_ARCH_SWXTCH; +} diff --git a/src/lib/ciul/efxdp_vi.c b/src/lib/ciul/efxdp_vi.c index 014ba7454..b65be66fc 100644 --- a/src/lib/ciul/efxdp_vi.c +++ b/src/lib/ciul/efxdp_vi.c @@ -7,9 +7,9 @@ * within the macro. */ #ifdef __has_include -# if __has_include("linux/if_xdp.h") -# define HAVE_AF_XDP -# endif +#if __has_include("linux/if_xdp.h") +#define HAVE_AF_XDP +#endif #endif #include "ef_vi_internal.h" @@ -48,22 +48,22 @@ static void efxdp_tx_kick(ef_vi* vi) */ static struct efab_af_xdp_offsets* xdp_offsets(ef_vi* vi) { - return (struct efab_af_xdp_offsets*)vi->evq_base; + return (struct efab_af_xdp_offsets*) vi->evq_base; } #define RING_THING(vi, ring, thing) \ - ((void*)(vi->evq_base + xdp_offsets(vi)->rings.ring.thing)) + ((void*) (vi->evq_base + xdp_offsets(vi)->rings.ring.thing)) #define RING_PRODUCER(vi, ring) \ - ((volatile uint32_t*)RING_THING(vi, ring, producer)) + ((volatile uint32_t*) RING_THING(vi, ring, producer)) #define RING_CONSUMER(vi, ring) \ - ((volatile uint32_t*)RING_THING(vi, ring, consumer)) + ((volatile uint32_t*) RING_THING(vi, ring, consumer)) #define RING_DESC(vi, ring) RING_THING(vi, ring, desc) -static int efxdp_ef_vi_transmitv_init(ef_vi* vi, const ef_iovec* iov, - int iov_len, ef_request_id dma_id) +static int efxdp_ef_vi_transmitv_init( + ef_vi* vi, const ef_iovec* iov, int iov_len, ef_request_id dma_id) { ef_vi_txq* q = &vi->vi_txq; ef_vi_txq_state* qs = &vi->ep_state->txq; @@ -98,12 +98,12 @@ static void efxdp_ef_vi_transmit_push(ef_vi* vi) EF_VI_BUG_ON(vi->ep_state->txq.added == vi->ep_state->txq.previous); if( vi->ep_state->txq.added - vi->ep_state->txq.removed < 3 || (vi->ep_state->txq.added ^ vi->ep_state->txq.previous) / - (AF_XDP_TX_BATCH_MAX >> 2) ) + (AF_XDP_TX_BATCH_MAX >> 2) ) efxdp_tx_kick(vi); } -static int efxdp_ef_vi_transmit(ef_vi* vi, ef_addr base, int len, - ef_request_id dma_id) +static int efxdp_ef_vi_transmit( + ef_vi* vi, ef_addr base, int len, ef_request_id dma_id) { ef_iovec iov = { base, len }; int rc = efxdp_ef_vi_transmitv_init(vi, &iov, 1, dma_id); @@ -114,8 +114,8 @@ static int efxdp_ef_vi_transmit(ef_vi* vi, ef_addr base, int len, return rc; } -static int efxdp_ef_vi_transmitv(ef_vi* vi, const ef_iovec* iov, int iov_len, - ef_request_id dma_id) +static int efxdp_ef_vi_transmitv( + ef_vi* vi, const ef_iovec* iov, int iov_len, ef_request_id dma_id) { int rc = efxdp_ef_vi_transmitv_init(vi, iov, iov_len, dma_id); if( rc == 0 ) { @@ -125,15 +125,14 @@ static int efxdp_ef_vi_transmitv(ef_vi* vi, const ef_iovec* iov, int iov_len, return rc; } -static int efxdp_ef_vi_transmit_pio(ef_vi* vi, int offset, int len, - ef_request_id dma_id) +static int efxdp_ef_vi_transmit_pio( + ef_vi* vi, int offset, int len, ef_request_id dma_id) { return -EOPNOTSUPP; } -static int efxdp_ef_vi_transmit_copy_pio(ef_vi* vi, int offset, - const void* src_buf, int len, - ef_request_id dma_id) +static int efxdp_ef_vi_transmit_copy_pio( + ef_vi* vi, int offset, const void* src_buf, int len, ef_request_id dma_id) { return -EOPNOTSUPP; } @@ -143,54 +142,46 @@ static void efxdp_ef_vi_transmit_pio_warm(ef_vi* vi) /* PIO is unsupported so do nothing */ } -static void efxdp_ef_vi_transmit_copy_pio_warm(ef_vi* vi, int pio_offset, - const void* src_buf, int len) +static void efxdp_ef_vi_transmit_copy_pio_warm( + ef_vi* vi, int pio_offset, const void* src_buf, int len) { /* PIO is unsupported so do nothing */ } static void efxdp_ef_vi_transmitv_ctpio(ef_vi* vi, size_t frame_len, - const struct iovec* iov, int iovcnt, - unsigned threshold) + const struct iovec* iov, int iovcnt, unsigned threshold) { /* CTPIO is unsupported so do nothing. Fallback will send the packet. */ } static void efxdp_ef_vi_transmitv_ctpio_copy(ef_vi* vi, size_t frame_len, - const struct iovec* iov, int iovcnt, - unsigned threshold, void* fallback) + const struct iovec* iov, int iovcnt, unsigned threshold, void* fallback) { // TODO copy to fallback } -static int efxdp_ef_vi_transmit_ctpio_fallback(ef_vi* vi, ef_addr dma_addr, - size_t len, ef_request_id dma_id) +static int efxdp_ef_vi_transmit_ctpio_fallback( + ef_vi* vi, ef_addr dma_addr, size_t len, ef_request_id dma_id) { - EF_VI_ASSERT( vi->vi_flags & EF_VI_TX_CTPIO ); + EF_VI_ASSERT(vi->vi_flags & EF_VI_TX_CTPIO); return efxdp_ef_vi_transmit(vi, dma_addr, len, dma_id); } -static int efxdp_ef_vi_transmitv_ctpio_fallback(ef_vi* vi, - const ef_iovec* dma_iov, - int dma_iov_len, - ef_request_id dma_id) +static int efxdp_ef_vi_transmitv_ctpio_fallback( + ef_vi* vi, const ef_iovec* dma_iov, int dma_iov_len, ef_request_id dma_id) { - EF_VI_ASSERT( vi->vi_flags & EF_VI_TX_CTPIO ); + EF_VI_ASSERT(vi->vi_flags & EF_VI_TX_CTPIO); return efxdp_ef_vi_transmitv(vi, dma_iov, dma_iov_len, dma_id); } -static int efxdp_ef_vi_transmit_ctpio_fallback_not_supp(ef_vi* vi, - ef_addr dma_addr, - size_t len, - ef_request_id dma_id) +static int efxdp_ef_vi_transmit_ctpio_fallback_not_supp( + ef_vi* vi, ef_addr dma_addr, size_t len, ef_request_id dma_id) { return -EOPNOTSUPP; } -static int efxdp_ef_vi_transmitv_ctpio_fallback_not_supp(ef_vi* vi, - const ef_iovec* dma_iov, - int dma_iov_len, - ef_request_id dma_id) +static int efxdp_ef_vi_transmitv_ctpio_fallback_not_supp( + ef_vi* vi, const ef_iovec* dma_iov, int dma_iov_len, ef_request_id dma_id) { return -EOPNOTSUPP; } @@ -221,23 +212,21 @@ static int efxdp_ef_vi_transmit_alt_go(ef_vi* vi, unsigned alt_id) } static ssize_t efxdp_ef_vi_transmit_memcpy(struct ef_vi* vi, - const ef_remote_iovec* dst_iov, - int dst_iov_len, - const ef_remote_iovec* src_iov, - int src_iov_len) + const ef_remote_iovec* dst_iov, int dst_iov_len, + const ef_remote_iovec* src_iov, int src_iov_len) { return -EOPNOTSUPP; } -static int efxdp_ef_vi_transmit_memcpy_sync(struct ef_vi* vi, - ef_request_id dma_id) +static int efxdp_ef_vi_transmit_memcpy_sync( + struct ef_vi* vi, ef_request_id dma_id) { return -EOPNOTSUPP; } /* Note: for AF_XDP devices dma_id is disregarded */ -static int efxdp_ef_vi_receive_init(ef_vi* vi, ef_addr addr, - ef_request_id dma_id) +static int efxdp_ef_vi_receive_init( + ef_vi* vi, ef_addr addr, ef_request_id dma_id) { ef_vi_rxq* q = &vi->vi_rxq; ef_vi_rxq_state* qs = &vi->ep_state->rxq; @@ -265,12 +254,7 @@ static void efxdp_ef_eventq_prime(ef_vi* vi) int efxdp_ef_eventq_check_event(const ef_vi* _vi, int look_ahead) { - ef_vi* vi = (ef_vi*) _vi; /* drop const */ - EF_VI_ASSERT(vi->evq_base); - EF_VI_BUG_ON(look_ahead < 0); - return *RING_CONSUMER(vi, rx) - *RING_PRODUCER(vi, rx) + - *RING_CONSUMER(vi, cr) - *RING_PRODUCER(vi, cr) - > look_ahead; + return 0; } @@ -278,6 +262,10 @@ static int efxdp_ef_eventq_poll(ef_vi* vi, ef_event* evs, int evs_len) { int n = 0; + /* We can't let the control planes version get out of sync with our userspace + * app */ + return 0; + /* rx_buffer_len is power of two */ EF_VI_ASSERT(((vi->rx_buffer_len - 1) & vi->rx_buffer_len) == 0); @@ -306,21 +294,21 @@ static int efxdp_ef_eventq_poll(ef_vi* vi, ef_event* evs, int evs_len) * for the client to resolve themselves. */ evs[n].rx.rq_id = dq[desc_i].addr / vi->rx_buffer_len; - q->ids[desc_i] = EF_REQUEST_ID_MASK; /* Debug only? */ + q->ids[desc_i] = EF_REQUEST_ID_MASK; /* Debug only? */ /* FIXME: handle jumbo, multicast */ evs[n].rx.flags = EF_EVENT_FLAG_SOP; /* In case of AF_XDP offset of the placement of payload from * the beginning of the packet buffer may vary. */ - evs[n].rx.ofs = dq[desc_i].addr & (vi->rx_buffer_len - 1); + evs[n].rx.ofs = dq[desc_i].addr & (vi->rx_buffer_len - 1); evs[n].rx.len = dq[desc_i].len; ++n; ++cons; } while( cons != prod && n != evs_len ); - /* Full memory barrier needed to ensure the descriptors aren't overwritten - * by incoming packets before the read accesses above */ + /* Full memory barrier needed to ensure the descriptors aren't + * overwritten by incoming packets before the read accesses above */ ci_mb(); *RING_CONSUMER(vi, rx) = cons; } @@ -349,7 +337,6 @@ static int efxdp_ef_eventq_poll(ef_vi* vi, ef_event* evs, int evs_len) * We just recorded the value of 'cons` for later use to access `q->ids` * from `ef_vi_transmit_unbundle`. */ *RING_CONSUMER(vi, cr) = cons; - } } if( efxdp_tx_need_kick(vi) ) @@ -382,40 +369,39 @@ void efxdp_vi_init(ef_vi* vi) { EF_VI_BUILD_ASSERT(EFAB_AF_XDP_DESC_BYTES == sizeof(struct xdp_desc)); - vi->ops.transmit = efxdp_ef_vi_transmit; - vi->ops.transmitv = efxdp_ef_vi_transmitv; - vi->ops.transmitv_init = efxdp_ef_vi_transmitv_init; - vi->ops.transmit_push = efxdp_ef_vi_transmit_push; - vi->ops.transmit_pio = efxdp_ef_vi_transmit_pio; - vi->ops.transmit_copy_pio = efxdp_ef_vi_transmit_copy_pio; - vi->ops.transmit_pio_warm = efxdp_ef_vi_transmit_pio_warm; + vi->ops.transmit = efxdp_ef_vi_transmit; + vi->ops.transmitv = efxdp_ef_vi_transmitv; + vi->ops.transmitv_init = efxdp_ef_vi_transmitv_init; + vi->ops.transmit_push = efxdp_ef_vi_transmit_push; + vi->ops.transmit_pio = efxdp_ef_vi_transmit_pio; + vi->ops.transmit_copy_pio = efxdp_ef_vi_transmit_copy_pio; + vi->ops.transmit_pio_warm = efxdp_ef_vi_transmit_pio_warm; vi->ops.transmit_copy_pio_warm = efxdp_ef_vi_transmit_copy_pio_warm; - vi->ops.transmitv_ctpio = efxdp_ef_vi_transmitv_ctpio; - vi->ops.transmitv_ctpio_copy = efxdp_ef_vi_transmitv_ctpio_copy; - vi->ops.transmit_alt_select = efxdp_ef_vi_transmit_alt_select; + vi->ops.transmitv_ctpio = efxdp_ef_vi_transmitv_ctpio; + vi->ops.transmitv_ctpio_copy = efxdp_ef_vi_transmitv_ctpio_copy; + vi->ops.transmit_alt_select = efxdp_ef_vi_transmit_alt_select; vi->ops.transmit_alt_select_default = efxdp_ef_vi_transmit_alt_select_normal; - vi->ops.transmit_alt_stop = efxdp_ef_vi_transmit_alt_stop; - vi->ops.transmit_alt_go = efxdp_ef_vi_transmit_alt_go; - vi->ops.transmit_alt_discard = efxdp_ef_vi_transmit_alt_discard; - vi->ops.receive_init = efxdp_ef_vi_receive_init; - vi->ops.receive_push = efxdp_ef_vi_receive_push; - vi->ops.eventq_poll = efxdp_ef_eventq_poll; - vi->ops.eventq_prime = efxdp_ef_eventq_prime; - vi->ops.eventq_timer_prime = efxdp_ef_eventq_timer_prime; - vi->ops.eventq_timer_run = efxdp_ef_eventq_timer_run; - vi->ops.eventq_timer_clear = efxdp_ef_eventq_timer_clear; - vi->ops.eventq_timer_zero = efxdp_ef_eventq_timer_zero; - vi->ops.transmit_memcpy = efxdp_ef_vi_transmit_memcpy; - vi->ops.transmit_memcpy_sync = efxdp_ef_vi_transmit_memcpy_sync; + vi->ops.transmit_alt_stop = efxdp_ef_vi_transmit_alt_stop; + vi->ops.transmit_alt_go = efxdp_ef_vi_transmit_alt_go; + vi->ops.transmit_alt_discard = efxdp_ef_vi_transmit_alt_discard; + vi->ops.receive_init = efxdp_ef_vi_receive_init; + vi->ops.receive_push = efxdp_ef_vi_receive_push; + vi->ops.eventq_poll = efxdp_ef_eventq_poll; + vi->ops.eventq_prime = efxdp_ef_eventq_prime; + vi->ops.eventq_timer_prime = efxdp_ef_eventq_timer_prime; + vi->ops.eventq_timer_run = efxdp_ef_eventq_timer_run; + vi->ops.eventq_timer_clear = efxdp_ef_eventq_timer_clear; + vi->ops.eventq_timer_zero = efxdp_ef_eventq_timer_zero; + vi->ops.transmit_memcpy = efxdp_ef_vi_transmit_memcpy; + vi->ops.transmit_memcpy_sync = efxdp_ef_vi_transmit_memcpy_sync; if( vi->vi_flags & EF_VI_TX_CTPIO ) { vi->ops.transmit_ctpio_fallback = efxdp_ef_vi_transmit_ctpio_fallback; vi->ops.transmitv_ctpio_fallback = efxdp_ef_vi_transmitv_ctpio_fallback; - } - else { + } else { vi->ops.transmit_ctpio_fallback = - efxdp_ef_vi_transmit_ctpio_fallback_not_supp; + efxdp_ef_vi_transmit_ctpio_fallback_not_supp; vi->ops.transmitv_ctpio_fallback = - efxdp_ef_vi_transmitv_ctpio_fallback_not_supp; + efxdp_ef_vi_transmitv_ctpio_fallback_not_supp; } vi->rx_buffer_len = 2048; @@ -429,6 +415,12 @@ long efxdp_vi_mmap_bytes(ef_vi* vi) } #else void efxdp_vi_init(ef_vi* vi) {} -long efxdp_vi_mmap_bytes(ef_vi* vi) { return 0; } -int efxdp_ef_eventq_check_event(const ef_vi* _vi, int look_ahead) { return 0; } +long efxdp_vi_mmap_bytes(ef_vi* vi) +{ + return 0; +} +int efxdp_ef_eventq_check_event(const ef_vi* _vi, int look_ahead) +{ + return 0; +} #endif diff --git a/src/lib/ciul/mmake.mk b/src/lib/ciul/mmake.mk index eb0534ac8..22582a19b 100644 --- a/src/lib/ciul/mmake.mk +++ b/src/lib/ciul/mmake.mk @@ -9,6 +9,8 @@ TARGETS := $(CIUL_LIB) endif MMAKE_TYPE := LIB + + # Standalone subset for descriptor munging only. EFVI_SRCS := \ pt_tx.c \ @@ -28,6 +30,11 @@ LIB_SRCS := \ checksum.c ifneq ($(DRIVER),1) +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif +MMAKE_INCLUDE += -I$(RTE_SDK)/build/install/include +MMAKE_DPDK := $(DEFAULT_DPDK) LIB_SRCS += \ open.c \ event_q.c \ @@ -46,6 +53,7 @@ LIB_SRCS += \ vi_discard.c \ capabilities.c \ smartnic_exts.c \ + efswxtch_vi.c \ ctpio.c # librt is needed on old glibc, e.g. on RHEL 6 @@ -108,7 +116,7 @@ $(objd)efch_intf_ver.h: $(EFCH_INTF_HDRS) $(objd)$(MMAKE_OBJ_PREFIX)pt_endpoint.o: $(objd)efch_intf_ver.h $(objd)$(MMAKE_OBJ_PREFIX)vi_init.o: $(objd)efch_intf_ver.h - +$(MMAKE_OBJ_PREFIX)efswxtch_vi.o: cwarnings += -Wno-error=deprecated-declarations -Wno-error=implicit-fallthrough ###################################################### # UL library diff --git a/src/lib/ciul/vi_init.c b/src/lib/ciul/vi_init.c index e0ab45626..3b987b857 100644 --- a/src/lib/ciul/vi_init.c +++ b/src/lib/ciul/vi_init.c @@ -20,9 +20,9 @@ #include #endif -#define EF_VI_STATE_BYTES(rxq_sz, txq_sz) \ - (sizeof(ef_vi_state) + (rxq_sz) * sizeof(uint32_t) \ - + (txq_sz) * sizeof(uint32_t)) +#define EF_VI_STATE_BYTES(rxq_sz, txq_sz) \ + (sizeof(ef_vi_state) + (rxq_sz) * sizeof(uint32_t) + \ + (txq_sz) * sizeof(uint32_t)) unsigned ef_vi_evq_clear_stride(void) @@ -33,11 +33,11 @@ unsigned ef_vi_evq_clear_stride(void) return atoi(s); #endif -# ifdef __x86_64__ +#ifdef __x86_64__ return sys_is_numa() ? EF_VI_EVS_PER_CACHE_LINE : 0; -# else +#else return EF_VI_EVS_PER_CACHE_LINE; -# endif +#endif } @@ -79,7 +79,7 @@ void ef_vi_init_state(ef_vi* vi) int ef_vi_add_queue(ef_vi* evq_vi, ef_vi* add_vi) { int q_label; - if (evq_vi->vi_qs_n == EF_VI_MAX_QS) + if( evq_vi->vi_qs_n == EF_VI_MAX_QS ) return -EBUSY; q_label = evq_vi->vi_qs_n++; EF_VI_BUG_ON(evq_vi->vi_qs[q_label] != NULL); @@ -115,11 +115,11 @@ const char* ef_vi_driver_interface_str(void) int ef_vi_rxq_reinit(ef_vi* vi, ef_vi_reinit_callback cb, void* cb_arg) { ef_vi_state* state = vi->ep_state; - int di; - + int di; + /* shared rxqs always claim to have a full rxq because buffer posting is * managed elsewhere, but it's a lie. */ - if( vi->vi_rxq.mask && !vi->max_efct_rxq ) { + if( vi->vi_rxq.mask && ! vi->max_efct_rxq ) { while( state->rxq.removed < state->rxq.added ) { di = state->rxq.removed & vi->vi_rxq.mask; BUG_ON(vi->vi_rxq.ids[di] == EF_REQUEST_ID_MASK); @@ -138,7 +138,7 @@ int ef_vi_rxq_reinit(ef_vi* vi, ef_vi_reinit_callback cb, void* cb_arg) int ef_vi_txq_reinit(ef_vi* vi, ef_vi_reinit_callback cb, void* cb_arg) { ef_vi_state* state = vi->ep_state; - int di; + int di; while( state->txq.removed < state->txq.added ) { di = state->txq.removed & vi->vi_txq.mask; @@ -156,7 +156,7 @@ int ef_vi_txq_reinit(ef_vi* vi, ef_vi_reinit_callback cb, void* cb_arg) int ef_vi_evq_reinit(ef_vi* vi) { - memset(vi->evq_base, (char)0xff, vi->evq_mask + 1); + memset(vi->evq_base, (char) 0xff, vi->evq_mask + 1); vi->ep_state->evq.evq_ptr = 0; return 0; } @@ -169,56 +169,57 @@ int ef_vi_evq_reinit(ef_vi* vi) static int ef_vi_calc_rxq_descriptors_bytes(enum ef_vi_arch arch, int qsize) { switch( arch ) { - case EF_VI_ARCH_EF10: - case EF_VI_ARCH_EF100: - return 8 * qsize; - case EF_VI_ARCH_EFCT: - return EFCT_RX_DESCRIPTOR_BYTES * CI_EFCT_MAX_SUPERBUFS * - EF_VI_MAX_EFCT_RXQS; - default: - EF_VI_BUG_ON(1); - return 8 * qsize; + case EF_VI_ARCH_EF10: + case EF_VI_ARCH_EF100: + return 8 * qsize; + case EF_VI_ARCH_EFCT: + return EFCT_RX_DESCRIPTOR_BYTES * CI_EFCT_MAX_SUPERBUFS * + EF_VI_MAX_EFCT_RXQS; + default: + EF_VI_BUG_ON(1); + return 8 * qsize; } } static int tx_desc_bytes(struct ef_vi* vi) { switch( vi->nic_type.arch ) { - case EF_VI_ARCH_EF10: - return 8; - case EF_VI_ARCH_EF100: - return 16; - case EF_VI_ARCH_EFCT: - return EFCT_TX_DESCRIPTOR_BYTES; - default: - EF_VI_BUG_ON(1); - return 8; + case EF_VI_ARCH_EF10: + return 8; + case EF_VI_ARCH_EF100: + return 16; + case EF_VI_ARCH_EFCT: + return EFCT_TX_DESCRIPTOR_BYTES; + default: + EF_VI_BUG_ON(1); + return 8; } } static int tx_fifo_bytes(struct ef_vi* vi) { switch( vi->nic_type.arch ) { - case EF_VI_ARCH_EF10: - case EF_VI_ARCH_EF100: - case EF_VI_ARCH_AF_XDP: - /* No FIFO, so return a large number to indicate no limit */ - return INT_MAX; - case EF_VI_ARCH_EFCT: - /* 32k FIFO, reduced by 8 bytes for the TX header. Hardware reduces this - * by one cache line to make their overflow tracking easier */ - return EFCT_TX_FIFO_BYTES - EFCT_TX_ALIGNMENT - EFCT_TX_HEADER_BYTES; - default: - EF_VI_BUG_ON(1); - return 0; + case EF_VI_ARCH_EF10: + case EF_VI_ARCH_EF100: + case EF_VI_ARCH_AF_XDP: + case EF_VI_ARCH_SWXTCH: + /* No FIFO, so return a large number to indicate no limit */ + return INT_MAX; + case EF_VI_ARCH_EFCT: + /* 32k FIFO, reduced by 8 bytes for the TX header. Hardware reduces this + * by one cache line to make their overflow tracking easier */ + return EFCT_TX_FIFO_BYTES - EFCT_TX_ALIGNMENT - EFCT_TX_HEADER_BYTES; + default: + EF_VI_BUG_ON(1); + return 0; } } int ef_vi_rx_ring_bytes(struct ef_vi* vi) { EF_VI_ASSERT(vi->inited & EF_VI_INITED_RXQ); - return ef_vi_calc_rxq_descriptors_bytes(vi->nic_type.arch, - vi->vi_rxq.mask + 1); + return ef_vi_calc_rxq_descriptors_bytes( + vi->nic_type.arch, vi->vi_rxq.mask + 1); } @@ -230,41 +231,46 @@ int ef_vi_tx_ring_bytes(struct ef_vi* vi) int ef_vi_init(struct ef_vi* vi, int arch, int variant, int revision, - unsigned ef_vi_flags, unsigned char nic_flags, - ef_vi_state* state) + unsigned ef_vi_flags, unsigned char nic_flags, ef_vi_state* state) { memset(vi, 0, sizeof(*vi)); /* vi->vi_qs_n = 0; */ /* vi->inited = 0; */ /* vi->vi_i = 0; */ - vi->nic_type.arch = arch; - vi->nic_type.variant = variant; - vi->nic_type.revision = revision; + vi->nic_type.arch = arch; + vi->nic_type.variant = variant; + vi->nic_type.revision = revision; vi->nic_type.nic_flags = nic_flags; - vi->vi_flags = (enum ef_vi_flags) ef_vi_flags; - vi->ep_state = state; + vi->vi_flags = (enum ef_vi_flags) ef_vi_flags; + vi->ep_state = state; /* vi->vi_stats = NULL; */ /* vi->io = NULL; */ /* vi->linked_pio = NULL; */ /* vi->tx_alt_num = 0; */ /* vi->tx_alt_ids = NULL; */ - vi->vi_is_normal = !(ef_vi_flags & EF_VI_RX_EVENT_MERGE) && - !(ef_vi_flags & EF_VI_RX_PACKED_STREAM); + vi->vi_is_normal = ! (ef_vi_flags & EF_VI_RX_EVENT_MERGE) && + ! (ef_vi_flags & EF_VI_RX_PACKED_STREAM); switch( arch ) { - case EF_VI_ARCH_EF10: - ef10_vi_init(vi); - break; - case EF_VI_ARCH_EF100: - ef100_vi_init(vi); - break; - case EF_VI_ARCH_EFCT: - efct_vi_init(vi); - break; - case EF_VI_ARCH_AF_XDP: - efxdp_vi_init(vi); - break; - default: - return -EINVAL; + case EF_VI_ARCH_EF10: + ef10_vi_init(vi); + break; + case EF_VI_ARCH_EF100: + ef100_vi_init(vi); + break; + case EF_VI_ARCH_EFCT: + efct_vi_init(vi); + break; + case EF_VI_ARCH_AF_XDP: +#ifndef __KERNEL__ + /* In the control plane we still need the xdp implementation for + * consistency even though we disable it almost entirely*/ + efswxtch_vi_init(vi); +#else + efxdp_vi_init(vi); +#endif + break; + default: + return -EINVAL; } vi->inited |= EF_VI_INITED_NIC; return 0; @@ -274,34 +280,36 @@ int ef_vi_init(struct ef_vi* vi, int arch, int variant, int revision, void ef_vi_init_io(struct ef_vi* vi, void* io_area) { EF_VI_BUG_ON(vi->inited & EF_VI_INITED_IO); - EF_VI_BUG_ON((vi->nic_type.arch != EF_VI_ARCH_AF_XDP) && io_area == NULL); + EF_VI_BUG_ON(! (vi->nic_type.arch == EF_VI_ARCH_AF_XDP || + vi->nic_type.arch == EF_VI_ARCH_SWXTCH) && + io_area == NULL); vi->io = io_area; vi->inited |= EF_VI_INITED_IO; } void ef_vi_init_rxq(struct ef_vi* vi, int ring_size, void* descriptors, - void* ids, int prefix_len) + void* ids, int prefix_len) { EF_VI_BUG_ON(vi->inited & EF_VI_INITED_RXQ); EF_VI_BUG_ON(ring_size & (ring_size - 1)); /* not power-of-2 */ - vi->vi_rxq.mask = ring_size - 1; + vi->vi_rxq.mask = ring_size - 1; vi->vi_rxq.descriptors = descriptors; - vi->vi_rxq.ids = ids; - vi->rx_prefix_len = prefix_len; + vi->vi_rxq.ids = ids; + vi->rx_prefix_len = prefix_len; vi->inited |= EF_VI_INITED_RXQ; } -void ef_vi_init_txq(struct ef_vi* vi, int ring_size, void* descriptors, - void* ids) +void ef_vi_init_txq( + struct ef_vi* vi, int ring_size, void* descriptors, void* ids) { EF_VI_BUG_ON(vi->inited & EF_VI_INITED_TXQ); - vi->vi_txq.mask = ring_size - 1; + vi->vi_txq.mask = ring_size - 1; vi->vi_txq.ct_fifo_bytes = tx_fifo_bytes(vi); - vi->vi_txq.descriptors = descriptors; - vi->vi_txq.ids = ids; - vi->tx_push_thresh = 16; + vi->vi_txq.descriptors = descriptors; + vi->vi_txq.ids = ids; + vi->tx_push_thresh = 16; if( vi->vi_flags & EF_VI_TX_PUSH_DISABLE ) vi->tx_push_thresh = 0; if( vi->vi_flags & EF_VI_TX_PUSH_ALWAYS ) @@ -311,7 +319,7 @@ void ef_vi_init_txq(struct ef_vi* vi, int ring_size, void* descriptors, static char* ef_vi_xdp_init_qs(struct ef_vi* vi, char* q_mem, uint32_t* ids, - int rxq_size, int rx_prefix_len, int txq_size) + int rxq_size, int rx_prefix_len, int txq_size) { /* We need to initialise event queue to access things in the mapped memory */ ef_vi_init_evq(vi, 1, q_mem); @@ -323,8 +331,7 @@ static char* ef_vi_xdp_init_qs(struct ef_vi* vi, char* q_mem, uint32_t* ids, static char* ef_vi_sfc_init_qs(struct ef_vi* vi, char* q_mem, uint32_t* ids, - int evq_size, int rxq_size, int rx_prefix_len, - int txq_size) + int evq_size, int rxq_size, int rx_prefix_len, int txq_size) { if( evq_size ) { ef_vi_init_evq(vi, evq_size, q_mem); @@ -332,27 +339,28 @@ static char* ef_vi_sfc_init_qs(struct ef_vi* vi, char* q_mem, uint32_t* ids, } if( rxq_size ) { ef_vi_init_rxq(vi, rxq_size, q_mem, ids, rx_prefix_len); - q_mem += (ef_vi_rx_ring_bytes(vi) + CI_PAGE_SIZE-1) & CI_PAGE_MASK; + q_mem += (ef_vi_rx_ring_bytes(vi) + CI_PAGE_SIZE - 1) & CI_PAGE_MASK; ids += rxq_size; } if( txq_size ) { ef_vi_init_txq(vi, txq_size, q_mem, ids); - q_mem += (ef_vi_tx_ring_bytes(vi) + CI_PAGE_SIZE-1) & CI_PAGE_MASK; + q_mem += (ef_vi_tx_ring_bytes(vi) + CI_PAGE_SIZE - 1) & CI_PAGE_MASK; } return q_mem; } -char* ef_vi_init_qs(struct ef_vi* vi, char* q_mem, uint32_t* ids, - int evq_size, int rxq_size, int rx_prefix_len, - int txq_size) +char* ef_vi_init_qs(struct ef_vi* vi, char* q_mem, uint32_t* ids, int evq_size, + int rxq_size, int rx_prefix_len, int txq_size) { - if( vi->nic_type.arch == EF_VI_ARCH_AF_XDP ) - return ef_vi_xdp_init_qs(vi, q_mem, ids, rxq_size, rx_prefix_len, txq_size); + if( vi->nic_type.arch == EF_VI_ARCH_AF_XDP || + vi->nic_type.arch == EF_VI_ARCH_SWXTCH ) + return ef_vi_xdp_init_qs( + vi, q_mem, ids, rxq_size, rx_prefix_len, txq_size); else - return ef_vi_sfc_init_qs(vi, q_mem, ids, evq_size, rxq_size, - rx_prefix_len, txq_size); + return ef_vi_sfc_init_qs( + vi, q_mem, ids, evq_size, rxq_size, rx_prefix_len, txq_size); } @@ -391,9 +399,10 @@ void ef_vi_init_rx_timestamping(struct ef_vi* vi, int rx_ts_correction) * We should only get here on Medford II or later, so use a * value that we know is appropriate for that hardware. */ - LOG(ef_log("%s: ERROR: NIC returned zero timestamp correction. " - "Firmware update required to get accurate timestamps.", - __FUNCTION__)); + LOG( + ef_log("%s: ERROR: NIC returned zero timestamp correction. " + "Firmware update required to get accurate timestamps.", + __FUNCTION__)); vi->rx_ts_correction = -76; } @@ -417,14 +426,14 @@ void ef_vi_init_tx_timestamping(struct ef_vi* vi, int tx_ts_correction) * medford2 and later. */ if( vi->nic_type.variant >= 'C' ) - tx_ts_correction /= 4; /* convert to ns */ + tx_ts_correction /= 4; /* convert to ns */ /* Bottom two bits of the nsec field contain the sync flags, and we * don't want to affect those when we add in the correction, so - * ensure those bits are zero + * ensure those bits are zero */ vi->tx_ts_correction_ns = - tx_ts_correction &~ EF_EVENT_TX_WITH_TIMESTAMP_SYNC_MASK; + tx_ts_correction & ~EF_EVENT_TX_WITH_TIMESTAMP_SYNC_MASK; vi->inited |= EF_VI_INITED_TX_TIMESTAMPING; } @@ -439,18 +448,18 @@ void ef_vi_init_out_flags(struct ef_vi* vi, unsigned flags) void ef_vi_reset_rxq(struct ef_vi* vi) { ef_vi_rxq_state* qs = &vi->ep_state->rxq; - qs->posted = 0; + qs->posted = 0; /* shared rxqs have their buffer posting managed elsewhere, not by the app, * so let's make it look like the queue is constantly full */ if( vi->max_efct_rxq ) qs->added = vi->vi_rxq.mask + 1; else qs->added = 0; - qs->removed = 0; - qs->in_jumbo = 0; - qs->bytes_acc = 0; + qs->removed = 0; + qs->in_jumbo = 0; + qs->bytes_acc = 0; qs->rx_ps_credit_avail = 1; - qs->last_desc_i = vi->vi_is_packed_stream ? vi->vi_rxq.mask : 0; + qs->last_desc_i = vi->vi_is_packed_stream ? vi->vi_rxq.mask : 0; if( vi->vi_rxq.mask ) { int i; for( i = 0; i <= vi->vi_rxq.mask; ++i ) @@ -462,12 +471,12 @@ void ef_vi_reset_rxq(struct ef_vi* vi) void ef_vi_reset_txq(struct ef_vi* vi) { ef_vi_txq_state* qs = &vi->ep_state->txq; - qs->previous = 0; - qs->added = 0; - qs->removed = 0; - qs->ct_added = 0; - qs->ct_removed = 0; - qs->ts_nsec = EF_VI_TX_TIMESTAMP_TS_NSEC_INVALID; + qs->previous = 0; + qs->added = 0; + qs->removed = 0; + qs->ct_added = 0; + qs->ct_removed = 0; + qs->ts_nsec = EF_VI_TX_TIMESTAMP_TS_NSEC_INVALID; if( vi->vi_txq.mask ) { int i; @@ -481,19 +490,21 @@ void ef_vi_reset_evq(struct ef_vi* vi, int clear_ring) { if( clear_ring ) memset(vi->evq_base, (char) 0xff, vi->evq_mask + 1); - vi->ep_state->evq.evq_ptr = 0; + vi->ep_state->evq.evq_ptr = 0; vi->ep_state->evq.evq_clear_stride = -((int) ef_vi_evq_clear_stride()); - EF_VI_BUG_ON( vi->ep_state->evq.evq_clear_stride > 0 ); + EF_VI_BUG_ON(vi->ep_state->evq.evq_clear_stride > 0); vi->ep_state->evq.sync_timestamp_synchronised = 0; - vi->ep_state->evq.sync_timestamp_major = ~0u; - /* Set unsol_seq to default, but leave 1 credit-space in reserve for overflow event. */ + vi->ep_state->evq.sync_timestamp_major = ~0u; + /* Set unsol_seq to default, but leave 1 credit-space in reserve for overflow + * event. */ vi->ep_state->evq.unsol_credit_seq = CI_CFG_TIME_SYNC_EVENT_EVQ_CAPACITY - 1; - vi->ep_state->evq.sync_flags = 0; + vi->ep_state->evq.sync_flags = 0; } int ef_eventq_capacity(ef_vi* vi) { - EF_VI_ASSERT( vi->ep_state->evq.evq_clear_stride <= 0 ); - return vi->evq_mask / EF_VI_EV_SIZE - 1u + vi->ep_state->evq.evq_clear_stride; + EF_VI_ASSERT(vi->ep_state->evq.evq_clear_stride <= 0); + return vi->evq_mask / EF_VI_EV_SIZE - 1u + + vi->ep_state->evq.evq_clear_stride; } diff --git a/src/lib/efhw/af_xdp.c b/src/lib/efhw/af_xdp.c index 6f268aa52..06048881f 100644 --- a/src/lib/efhw/af_xdp.c +++ b/src/lib/efhw/af_xdp.c @@ -30,8 +30,8 @@ int enable_af_xdp_flow_filters = 1; module_param(enable_af_xdp_flow_filters, int, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(enable_af_xdp_flow_filters, - "Enables flow filter use for AF_XDP devices "); +MODULE_PARM_DESC( + enable_af_xdp_flow_filters, "Enables flow filter use for AF_XDP devices "); /* filter id when no actual filter is installed */ #define AF_XDP_NO_FILTER_MAGIC_ID 0x7FFFFF00 @@ -80,12 +80,12 @@ static int __sys_call_area_alloc(struct sys_call_area* area, const char* func) * etc. */ EFHW_ASSERT(current); - EFHW_ASSERT( ! (current->flags & PF_WQ_WORKER) ); - EFHW_ASSERT( ! (current->flags & PF_KTHREAD) ); + EFHW_ASSERT(! (current->flags & PF_WQ_WORKER)); + EFHW_ASSERT(! (current->flags & PF_KTHREAD)); area->user_addr = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, 0); + MAP_ANONYMOUS | MAP_PRIVATE, 0); if( area->user_addr == 0 ) { EFHW_ERR("%s: ERROR: failed to allocate a page via vm_mmap()", func); return -ENOMEM; @@ -109,26 +109,24 @@ static void* sys_call_area_ptr(struct sys_call_area* area) return page_address(area->page); } -static unsigned long -sys_call_area_user_addr(struct sys_call_area* area, void* ptr) +static unsigned long sys_call_area_user_addr( + struct sys_call_area* area, void* ptr) { return area->user_addr + - ((uintptr_t)ptr - (uintptr_t)sys_call_area_ptr(area)); + ((uintptr_t) ptr - (uintptr_t) sys_call_area_ptr(area)); } #define UMEM_BLOCK (PAGE_SIZE / sizeof(void*)) -#define MAX_PDS 256 +#define MAX_PDS 256 /* A block of addresses of user memory pages */ -struct umem_block -{ +struct umem_block { void* addrs[UMEM_BLOCK]; }; /* A collection of all the user memory pages for a VI */ -struct umem_pages -{ +struct umem_pages { long page_count; long block_count; long used_page_count; @@ -137,8 +135,7 @@ struct umem_pages }; /* Resources for waiting for and handling events */ -struct event_waiter -{ +struct event_waiter { struct wait_queue_entry wait; struct efhw_nic* nic; @@ -147,8 +144,7 @@ struct event_waiter }; /* Per-VI AF_XDP resources */ -struct efhw_af_xdp_vi -{ +struct efhw_af_xdp_vi { struct socket* sock; int owner_id; int rxq_capacity; @@ -160,16 +156,14 @@ struct efhw_af_xdp_vi struct event_waiter waiter; }; -struct protection_domain -{ +struct protection_domain { struct umem_pages umem; long buffer_table_count; long freed_buffer_table_count; }; /* Per-NIC AF_XDP resources */ -struct efhw_nic_af_xdp -{ +struct efhw_nic_af_xdp { struct file* map; struct efhw_af_xdp_vi* vi; struct protection_domain* pd; @@ -182,7 +176,8 @@ struct efhw_nic_af_xdp * *---------------------------------------------------------------------------*/ -/* Free the collection of page addresses. Does not free the pages themselves. */ +/* Free the collection of page addresses. Does not free the pages themselves. + */ static void umem_pages_free(struct umem_pages* pages) { long block; @@ -226,7 +221,8 @@ static void** umem_pages_addr_ptr(struct umem_pages* pages, long index) return &pages->blocks[index / UMEM_BLOCK]->addrs[index % UMEM_BLOCK]; } -static void umem_pages_set_addr(struct umem_pages* pages, long page, void* addr) +static void umem_pages_set_addr( + struct umem_pages* pages, long page, void* addr) { *umem_pages_addr_ptr(pages, page) = addr; if( page > pages->used_page_count ) @@ -245,7 +241,8 @@ static void* umem_pages_get_addr(struct umem_pages* pages, long page) *---------------------------------------------------------------------------*/ /* Get the VI with the given instance number */ -static struct efhw_af_xdp_vi* vi_by_instance(struct efhw_nic* nic, int instance) +static struct efhw_af_xdp_vi* vi_by_instance( + struct efhw_nic* nic, int instance) { struct efhw_nic_af_xdp* xdp = nic->arch_extra; @@ -256,7 +253,8 @@ static struct efhw_af_xdp_vi* vi_by_instance(struct efhw_nic* nic, int instance) } /* Get the VI with the given owner ID */ -static struct protection_domain* pd_by_owner(struct efhw_nic* nic, int owner_id) +static struct protection_domain* pd_by_owner( + struct efhw_nic* nic, int owner_id) { struct efhw_nic_af_xdp* xdp = nic->arch_extra; @@ -274,11 +272,10 @@ static struct protection_domain* pd_by_owner(struct efhw_nic* nic, int owner_id) /* Invoke the bpf() syscall args is assumed to be kernel memory */ -noinline -static int xdp_sys_bpf(int cmd, unsigned long user_addr) +noinline static int xdp_sys_bpf(int cmd, unsigned long user_addr) { - int rc = SYSCALL_DISPATCHn(3, bpf, (int, unsigned long, size_t), - cmd, user_addr, sizeof(union bpf_attr)); + int rc = SYSCALL_DISPATCHn(3, bpf, (int, unsigned long, size_t), cmd, + user_addr, sizeof(union bpf_attr)); if( rc < 0 ) EFHW_ERR("%s: sys_bpf(%d) failed: %d", __func__, cmd, rc); return rc; @@ -321,47 +318,7 @@ static int xdp_map_create(struct sys_call_area* area, int max_entries) * See af_xdp_bpf.c for the program's source and compilation guidelines. */ static int xdp_prog_load(struct sys_call_area* area, int map_fd) { - const uint64_t const_prog[] = { - 0x00000002000000b7, 0x0000000000041361, - 0x0000000000001261, 0x00000000000024bf, - 0x0000002600000407, 0x000000000012342d, - 0x0000000000002379, 0xffffffff00000418, - 0x0000ffff00000000, 0x000000000000435f, - 0x00000000000d431d, 0x00000000000c2369, - 0x0000008100020355, 0x0000000000102369, - 0x0000000400000207, 0x0000000800080355, - 0x0000000000172271, 0x0000001100010215, - 0x0000000600050255, 0x0000000000101261, - 0x0000000000000118, /* <-- insert map_fd here */ - 0x0000000000000000, - 0x00000002000003b7, 0x0000003300000085, - 0x0000000000000095, - }; - - uint64_t* prog; - char* license; - union bpf_attr* attr; - - attr = sys_call_area_ptr(area); - memset(attr, 0, sizeof(*attr)); - - license = (void*)(attr + 1); -#define LICENSE "GPL" - strncpy(license, LICENSE, strlen(LICENSE) + 1); - - prog = (void*)(license + strlen(LICENSE) + 1); -#undef LICENSE - memcpy(prog, const_prog, sizeof(const_prog)); - prog[20] |= 0x1000; /* "immediate" flag */ - prog[20] |= (uint64_t) map_fd << 32; /* immediate value */ - - attr->prog_type = BPF_PROG_TYPE_XDP; - attr->insn_cnt = sizeof(const_prog) / sizeof(struct bpf_insn); - attr->insns = sys_call_area_user_addr(area, prog); - attr->license = sys_call_area_user_addr(area, license); - strncpy(attr->prog_name, "xdpsock", strlen("xdpsock")); - - return xdp_sys_bpf(BPF_PROG_LOAD, sys_call_area_user_addr(area, attr)); + return 0; } /* Update an element in the XDP socket map (using fds) */ @@ -379,8 +336,8 @@ static int xdp_map_update_fd(int map_fd, int key, int sock_fd) attr = sys_call_area_ptr(&area); memset(attr, 0, sizeof(*attr)); - key_user = (void*)(attr + 1); - sock_user = (void*)(key_user + 1); + key_user = (void*) (attr + 1); + sock_user = (void*) (key_user + 1); *key_user = key; *sock_user = sock_fd; @@ -395,8 +352,8 @@ static int xdp_map_update_fd(int map_fd, int key, int sock_fd) } /* Update an element in the XDP socket map (using file pointers) */ -static int xdp_map_update(struct efhw_nic_af_xdp* af_xdp, int key, - struct file* sock) +static int xdp_map_update( + struct efhw_nic_af_xdp* af_xdp, int key, struct file* sock) { int rc, map_fd, sock_fd; @@ -424,36 +381,21 @@ static int xdp_map_update(struct efhw_nic_af_xdp* af_xdp, int key, } /* Bind an AF_XDP socket to an interface */ -static int xdp_bind(struct socket* sock, int ifindex, unsigned queue, unsigned flags) +static int xdp_bind( + struct socket* sock, int ifindex, unsigned queue, unsigned flags) { - struct sockaddr_xdp sxdp = {}; - - sxdp.sxdp_family = PF_XDP; - sxdp.sxdp_ifindex = ifindex; - sxdp.sxdp_queue_id = queue; - sxdp.sxdp_flags = flags; - - return kernel_bind(sock, (struct sockaddr*)&sxdp, sizeof(sxdp)); + return 0; } /* Link an XDP program to an interface */ static int xdp_set_link(struct net_device* dev, struct bpf_prog* prog) { - struct netdev_bpf bpf = { - .command = XDP_SETUP_PROG, - .prog = prog - }; - - if( !dev->netdev_ops->ndo_bpf ) { - EFHW_ERR("%s: %s does not support XDP", __FUNCTION__, dev->name); - return -ENOSYS; - } - - return dev->netdev_ops->ndo_bpf(dev, &bpf); + return 0; } /* Fault handler to provide buffer memory pages for our user mapping */ -static vm_fault_t xdp_umem_fault(struct vm_fault* vmf) { +static vm_fault_t xdp_umem_fault(struct vm_fault* vmf) +{ struct umem_pages* pages = vmf->vma->vm_private_data; struct page* page; @@ -479,13 +421,11 @@ static vm_fault_t xdp_umem_fault(struct vm_fault* vmf) { return 0; } -static struct vm_operations_struct vm_ops = { - .fault = xdp_umem_fault -}; +static struct vm_operations_struct vm_ops = { .fault = xdp_umem_fault }; /* Register user memory with an XDP socket */ static int xdp_register_umem(struct socket* sock, struct umem_pages* pages, - int chunk_size, int headroom) + int chunk_size, int headroom) { struct vm_area_struct* vma; int rc = -EFAULT; @@ -494,11 +434,9 @@ static int xdp_register_umem(struct socket* sock, struct umem_pages* pages, * a flags fields added in 5.4. We don't currently need to set any flags, * so just zero everything we don't use. */ - struct xdp_umem_reg mr = { - .len = pages->used_page_count << PAGE_SHIFT, + struct xdp_umem_reg mr = { .len = pages->used_page_count << PAGE_SHIFT, .chunk_size = chunk_size, - .headroom = headroom - }; + .headroom = headroom }; mr.addr = vm_mmap(NULL, 0, mr.len, PROT_READ | PROT_WRITE, MAP_SHARED, 0); if( offset_in_page(mr.addr) ) @@ -515,20 +453,19 @@ static int xdp_register_umem(struct socket* sock, struct umem_pages* pages, vma->vm_private_data = pages; vma->vm_ops = &vm_ops; - rc = sock_ops_setsockopt(sock, SOL_XDP, XDP_UMEM_REG, - (char*)&mr, sizeof(mr)); + rc = sock_ops_setsockopt( + sock, SOL_XDP, XDP_UMEM_REG, (char*) &mr, sizeof(mr)); vm_munmap(mr.addr, mr.len); return rc; } /* Create the rings for an AF_XDP socket and associated umem */ -static int xdp_create_ring(struct socket* sock, - struct efhw_page_map* page_map, void* kern_mem_base, - int capacity, int desc_size, int sockopt, long pgoff, - const struct xdp_ring_offset* xdp_offset, - struct efab_af_xdp_offsets_ring* kern_offset, - struct efab_af_xdp_offsets_ring* user_offset) +static int xdp_create_ring(struct socket* sock, struct efhw_page_map* page_map, + void* kern_mem_base, int capacity, int desc_size, int sockopt, long pgoff, + const struct xdp_ring_offset* xdp_offset, + struct efab_af_xdp_offsets_ring* kern_offset, + struct efab_af_xdp_offsets_ring* user_offset) { int rc; unsigned long map_size, addr, pfn, pages; @@ -538,24 +475,23 @@ static int xdp_create_ring(struct socket* sock, user_base = page_map->n_pages << PAGE_SHIFT; - rc = sock_ops_setsockopt(sock, SOL_XDP, sockopt, - (char*)&capacity, sizeof(int)); + rc = sock_ops_setsockopt( + sock, SOL_XDP, sockopt, (char*) &capacity, sizeof(int)); if( rc < 0 ) return rc; map_size = xdp_offset->desc + (capacity + 1) * desc_size; addr = vm_mmap(sock->file, 0, map_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, pgoff); + MAP_SHARED | MAP_POPULATE, pgoff); if( IS_ERR_VALUE(addr) ) - return addr; + return addr; mmap_write_lock(current->mm); vma = find_vma(current->mm, addr); if( vma == NULL ) { rc = -EFAULT; - } - else { + } else { rc = follow_pfn(vma, addr, &pfn); pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } @@ -574,20 +510,19 @@ static int xdp_create_ring(struct socket* sock, kern_base = ring_base - kern_mem_base; kern_offset->producer = kern_base + xdp_offset->producer; kern_offset->consumer = kern_base + xdp_offset->consumer; - kern_offset->desc = kern_base + xdp_offset->desc; + kern_offset->desc = kern_base + xdp_offset->desc; user_offset->producer = user_base + xdp_offset->producer; user_offset->consumer = user_base + xdp_offset->consumer; - user_offset->desc = user_base + xdp_offset->desc; + user_offset->desc = user_base + xdp_offset->desc; return 0; } static int xdp_create_rings(struct socket* sock, - struct efhw_page_map* page_map, void* kern_mem_base, - long rxq_capacity, long txq_capacity, - struct efab_af_xdp_offsets_rings* kern_offsets, - struct efab_af_xdp_offsets_rings* user_offsets) + struct efhw_page_map* page_map, void* kern_mem_base, long rxq_capacity, + long txq_capacity, struct efab_af_xdp_offsets_rings* kern_offsets, + struct efab_af_xdp_offsets_rings* user_offsets) { int rc; struct sys_call_area rw_area; @@ -603,16 +538,15 @@ static int xdp_create_rings(struct socket* sock, return rc; mmap_offsets = sys_call_area_ptr(&rw_area); - optlen = (void*)(mmap_offsets + 1); + optlen = (void*) (mmap_offsets + 1); *optlen = sizeof(*mmap_offsets); /* For linux<=5.7 you can use kernel_getsockopt(), * but newer versions does not have this function, so we have all that * sys_call_area_*() calls. */ rc = sock->ops->getsockopt(sock, SOL_XDP, XDP_MMAP_OFFSETS, - (void*)sys_call_area_user_addr(&rw_area, - mmap_offsets), - (void*)sys_call_area_user_addr(&rw_area, optlen)); + (void*) sys_call_area_user_addr(&rw_area, mmap_offsets), + (void*) sys_call_area_user_addr(&rw_area, optlen)); /* Security consideration: mmap_offsets is located in untrusted user * memory. I.e. the process can overwrite all this data. @@ -629,35 +563,32 @@ static int xdp_create_rings(struct socket* sock, } EFHW_ASSERT(*optlen == sizeof(*mmap_offsets)); - rc = xdp_create_ring(sock, page_map, kern_mem_base, - rxq_capacity, sizeof(struct xdp_desc), - XDP_RX_RING, XDP_PGOFF_RX_RING, - &mmap_offsets->rx, &kern_offsets->rx, &user_offsets->rx); + rc = xdp_create_ring(sock, page_map, kern_mem_base, rxq_capacity, + sizeof(struct xdp_desc), XDP_RX_RING, XDP_PGOFF_RX_RING, + &mmap_offsets->rx, &kern_offsets->rx, &user_offsets->rx); if( rc < 0 ) goto out; - rc = xdp_create_ring(sock, page_map, kern_mem_base, - txq_capacity, sizeof(struct xdp_desc), - XDP_TX_RING, XDP_PGOFF_TX_RING, - &mmap_offsets->tx, &kern_offsets->tx, &user_offsets->tx); + rc = xdp_create_ring(sock, page_map, kern_mem_base, txq_capacity, + sizeof(struct xdp_desc), XDP_TX_RING, XDP_PGOFF_TX_RING, + &mmap_offsets->tx, &kern_offsets->tx, &user_offsets->tx); if( rc < 0 ) goto out; - rc = xdp_create_ring(sock, page_map, kern_mem_base, - rxq_capacity, sizeof(uint64_t), - XDP_UMEM_FILL_RING, XDP_UMEM_PGOFF_FILL_RING, - &mmap_offsets->fr, &kern_offsets->fr, &user_offsets->fr); + rc = xdp_create_ring(sock, page_map, kern_mem_base, rxq_capacity, + sizeof(uint64_t), XDP_UMEM_FILL_RING, XDP_UMEM_PGOFF_FILL_RING, + &mmap_offsets->fr, &kern_offsets->fr, &user_offsets->fr); if( rc < 0 ) goto out; - rc = xdp_create_ring(sock, page_map, kern_mem_base, - txq_capacity, sizeof(uint64_t), - XDP_UMEM_COMPLETION_RING, XDP_UMEM_PGOFF_COMPLETION_RING, - &mmap_offsets->cr, &kern_offsets->cr, &user_offsets->cr); + rc = xdp_create_ring(sock, page_map, kern_mem_base, txq_capacity, + sizeof(uint64_t), XDP_UMEM_COMPLETION_RING, + XDP_UMEM_PGOFF_COMPLETION_RING, &mmap_offsets->cr, &kern_offsets->cr, + &user_offsets->cr); if( rc < 0 ) goto out; - out: +out: sys_call_area_unpin(&rw_area); return rc; } @@ -677,7 +608,7 @@ static void xdp_release_pd(struct efhw_nic* nic, int owner) static void xdp_release_vi(struct efhw_nic* nic, struct efhw_af_xdp_vi* vi) { - if( !vi->sock ) + if( ! vi->sock ) /* We expect uninitialized vi in cases where af_xdp_init() * has not been called after enabling evq. * This can happen on cleanup from failure of stack allocation */ @@ -717,9 +648,8 @@ static void* af_xdp_mem(struct efhw_nic* nic, int instance) return vi ? &vi->kernel_offsets : NULL; } -static int af_xdp_init(struct efhw_nic* nic, int instance, - int chunk_size, int headroom, - struct efhw_page_map* page_map) +static int af_xdp_init(struct efhw_nic* nic, int instance, int chunk_size, + int headroom, struct efhw_page_map* page_map) { int rc; struct efhw_af_xdp_vi* vi; @@ -729,9 +659,7 @@ static int af_xdp_init(struct efhw_nic* nic, int instance, struct file* file; struct efab_af_xdp_offsets* user_offsets; - if( chunk_size == 0 || - chunk_size < headroom || - chunk_size > PAGE_SIZE || + if( chunk_size == 0 || chunk_size < headroom || chunk_size > PAGE_SIZE || PAGE_SIZE % chunk_size != 0 ) return -EINVAL; @@ -764,7 +692,7 @@ static int af_xdp_init(struct efhw_nic* nic, int instance, rc = efhw_page_alloc_zeroed(&vi->user_offsets_page); if( rc < 0 ) goto out_free_sock; - user_offsets = (void*)efhw_page_ptr(&vi->user_offsets_page); + user_offsets = (void*) efhw_page_ptr(&vi->user_offsets_page); rc = efhw_page_map_add_page(page_map, &vi->user_offsets_page); if( rc < 0 ) @@ -774,9 +702,8 @@ static int af_xdp_init(struct efhw_nic* nic, int instance, if( rc < 0 ) goto out_free_user_offsets; - rc = xdp_create_rings(sock, page_map, &vi->kernel_offsets, - vi->rxq_capacity, vi->txq_capacity, - &vi->kernel_offsets.rings, &user_offsets->rings); + rc = xdp_create_rings(sock, page_map, &vi->kernel_offsets, vi->rxq_capacity, + vi->txq_capacity, &vi->kernel_offsets.rings, &user_offsets->rings); if( rc < 0 ) goto out_free_user_offsets; @@ -807,19 +734,19 @@ static int af_xdp_init(struct efhw_nic* nic, int instance, user_offsets->mmap_bytes = efhw_page_map_bytes(page_map); return 0; - xdp_bind_failed: - out_free_user_offsets: +xdp_bind_failed: +out_free_user_offsets: efhw_page_free(&vi->user_offsets_page); - out_free_sock: +out_free_sock: fput(file); memset(vi, 0, sizeof(*vi)); return rc; } -static int af_xdp_dmaq_kick(struct efhw_nic *nic, int instance) +static int af_xdp_dmaq_kick(struct efhw_nic* nic, int instance) { struct efhw_af_xdp_vi* vi; - struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; vi = vi_by_instance(nic, instance); if( vi == NULL ) return -ENODEV; @@ -832,146 +759,130 @@ static int af_xdp_dmaq_kick(struct efhw_nic *nic, int instance) * Initialisation and configuration discovery * *---------------------------------------------------------------------------*/ -static int -af_xdp_nic_license_check(struct efhw_nic *nic, const uint32_t feature, - int* licensed) +static int af_xdp_nic_license_check( + struct efhw_nic* nic, const uint32_t feature, int* licensed) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return 0; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return 0; } -static int -af_xdp_nic_v3_license_check(struct efhw_nic *nic, const uint64_t app_id, - int* licensed) +static int af_xdp_nic_v3_license_check( + struct efhw_nic* nic, const uint64_t app_id, int* licensed) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return 0; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return 0; } -static int -af_xdp_nic_license_challenge(struct efhw_nic *nic, - const uint32_t feature, - const uint8_t* challenge, - uint32_t* expiry, - uint8_t* signature) +static int af_xdp_nic_license_challenge(struct efhw_nic* nic, + const uint32_t feature, const uint8_t* challenge, uint32_t* expiry, + uint8_t* signature) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return 0; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return 0; } -static int -af_xdp_nic_v3_license_challenge(struct efhw_nic *nic, - const uint64_t app_id, - const uint8_t* challenge, - uint32_t* expiry, - uint32_t* days, - uint8_t* signature, - uint8_t* base_mac, - uint8_t* vadaptor_mac) +static int af_xdp_nic_v3_license_challenge(struct efhw_nic* nic, + const uint64_t app_id, const uint8_t* challenge, uint32_t* expiry, + uint32_t* days, uint8_t* signature, uint8_t* base_mac, + uint8_t* vadaptor_mac) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return 0; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return 0; } -static void -af_xdp_nic_tweak_hardware(struct efhw_nic *nic) +static void af_xdp_nic_tweak_hardware(struct efhw_nic* nic) { - nic->pio_num = 0; - nic->pio_size = 0; - nic->tx_alts_vfifos = 0; - nic->tx_alts_cp_bufs = 0; - nic->tx_alts_cp_buf_size = 0; - nic->rx_variant = 0; - nic->tx_variant = 0; - nic->rx_prefix_len = 0; - nic->flags = NIC_FLAG_RX_ZEROCOPY /* TODO AFXDP: hardcoded for now */ - | NIC_FLAG_RX_FILTER_TYPE_IP_LOCAL /* only wild filters */ - | NIC_FLAG_USERSPACE_PRIME /* no explicit priming needed */ - ; + nic->pio_num = 0; + nic->pio_size = 0; + nic->tx_alts_vfifos = 0; + nic->tx_alts_cp_bufs = 0; + nic->tx_alts_cp_buf_size = 0; + nic->rx_variant = 0; + nic->tx_variant = 0; + nic->rx_prefix_len = 0; + nic->flags = NIC_FLAG_RX_ZEROCOPY /* TODO AFXDP: hardcoded for now */ + | NIC_FLAG_RX_FILTER_TYPE_IP_LOCAL /* only wild filters */ + | NIC_FLAG_USERSPACE_PRIME /* no explicit priming needed */ + ; } - -static int -__af_xdp_nic_init_hardware(struct efhw_nic *nic, - struct efhw_ev_handler *ev_handlers, - const uint8_t *mac_addr, - struct sys_call_area* sys_call_area) +static int __af_xdp_nic_init_hardware(struct efhw_nic* nic, + struct efhw_ev_handler* ev_handlers, const uint8_t* mac_addr, + struct sys_call_area* sys_call_area) { - int map_fd, rc; - struct bpf_prog* prog; - struct efhw_nic_af_xdp* xdp; - - xdp = kzalloc(sizeof(*xdp) + - nic->vi_lim * sizeof(struct efhw_af_xdp_vi) + - MAX_PDS * sizeof(struct protection_domain), - GFP_KERNEL); - if( xdp == NULL ) - return -ENOMEM; + int map_fd, rc; + struct bpf_prog* prog; + struct efhw_nic_af_xdp* xdp; - nic->ev_handlers = ev_handlers; - xdp->vi = (struct efhw_af_xdp_vi*) (xdp + 1); - xdp->pd = (struct protection_domain*) (xdp->vi + nic->vi_lim); - - rc = map_fd = xdp_map_create(sys_call_area, nic->vi_lim); - if( rc < 0 ) - goto fail_map; + xdp = kzalloc(sizeof(*xdp) + nic->vi_lim * sizeof(struct efhw_af_xdp_vi) + + MAX_PDS * sizeof(struct protection_domain), + GFP_KERNEL); + if( xdp == NULL ) + return -ENOMEM; - rc = xdp_prog_load(sys_call_area, map_fd); - if( rc < 0 ) - goto fail; + nic->ev_handlers = ev_handlers; + xdp->vi = (struct efhw_af_xdp_vi*) (xdp + 1); + xdp->pd = (struct protection_domain*) (xdp->vi + nic->vi_lim); - prog = bpf_prog_get_type_dev(rc, BPF_PROG_TYPE_XDP, 1); - ci_close_fd(rc); - if( IS_ERR(prog) ) { - rc = PTR_ERR(prog); - goto fail; - } + rc = map_fd = xdp_map_create(sys_call_area, nic->vi_lim); + if( rc < 0 ) + goto fail_map; - rc = xdp_set_link(nic->net_dev, prog); - if( rc < 0 ) - goto fail; + rc = xdp_prog_load(sys_call_area, map_fd); + if( rc < 0 ) + goto fail; + ci_close_fd(rc); + + /* + prog = bpf_prog_get_type_dev(rc, BPF_PROG_TYPE_XDP, 1); + if( IS_ERR(prog) ) { + rc = PTR_ERR(prog); + goto fail; + } + */ + + rc = xdp_set_link(nic->net_dev, prog); + if( rc < 0 ) + goto fail; - xdp->map = fget(map_fd); - ci_close_fd(map_fd); + xdp->map = fget(map_fd); + ci_close_fd(map_fd); - nic->arch_extra = xdp; - memcpy(nic->mac_addr, mac_addr, ETH_ALEN); + nic->arch_extra = xdp; + memcpy(nic->mac_addr, mac_addr, ETH_ALEN); - af_xdp_nic_tweak_hardware(nic); - return 0; + af_xdp_nic_tweak_hardware(nic); + return 0; fail: - ci_close_fd(map_fd); + ci_close_fd(map_fd); fail_map: - kfree(xdp); - return rc; + kfree(xdp); + return rc; } -static int -af_xdp_nic_init_hardware(struct efhw_nic *nic, - struct efhw_ev_handler *ev_handlers, - const uint8_t *mac_addr) +static int af_xdp_nic_init_hardware(struct efhw_nic* nic, + struct efhw_ev_handler* ev_handlers, const uint8_t* mac_addr) { - int rc; - struct sys_call_area area; + int rc; + struct sys_call_area area; - rc = sys_call_area_alloc(&area); - if( rc < 0 ) - return rc; + rc = sys_call_area_alloc(&area); + if( rc < 0 ) + return rc; - rc = __af_xdp_nic_init_hardware(nic, ev_handlers, mac_addr, &area); + rc = __af_xdp_nic_init_hardware(nic, ev_handlers, mac_addr, &area); - sys_call_area_free(&area); + sys_call_area_free(&area); - return rc; + return rc; } -static void -af_xdp_nic_release_hardware(struct efhw_nic* nic) +static void af_xdp_nic_release_hardware(struct efhw_nic* nic) { struct efhw_nic_af_xdp* xdp = nic->arch_extra; xdp_set_link(nic->net_dev, NULL); @@ -987,8 +898,8 @@ af_xdp_nic_release_hardware(struct efhw_nic* nic) * *--------------------------------------------------------------------*/ -static int wait_callback(struct wait_queue_entry* wait, unsigned mode, - int flags, void* key) +static int wait_callback( + struct wait_queue_entry* wait, unsigned mode, int flags, void* key) { struct event_waiter* w = container_of(wait, struct event_waiter, wait); efhw_handle_wakeup_event(w->nic, w->evq, w->budget); @@ -998,9 +909,8 @@ static int wait_callback(struct wait_queue_entry* wait, unsigned mode, /* This function will enable the given event queue with the requested * properties. */ -static int -af_xdp_nic_event_queue_enable(struct efhw_nic *nic, uint32_t client_id, - struct efhw_evq_params *params) +static int af_xdp_nic_event_queue_enable( + struct efhw_nic* nic, uint32_t client_id, struct efhw_evq_params* params) { struct efhw_af_xdp_vi* vi = vi_by_instance(nic, params->evq); @@ -1021,24 +931,22 @@ af_xdp_nic_event_queue_enable(struct efhw_nic *nic, uint32_t client_id, return 0; } -static void -af_xdp_nic_event_queue_disable(struct efhw_nic *nic, uint32_t client_id, - uint evq, int time_sync_events_enabled) +static void af_xdp_nic_event_queue_disable(struct efhw_nic* nic, + uint32_t client_id, uint evq, int time_sync_events_enabled) { - struct efhw_af_xdp_vi* vi = vi_by_instance(nic, evq); - if( vi != NULL ) - xdp_release_vi(nic, vi); + struct efhw_af_xdp_vi* vi = vi_by_instance(nic, evq); + if( vi != NULL ) + xdp_release_vi(nic, vi); } -static void -af_xdp_nic_wakeup_request(struct efhw_nic *nic, volatile void __iomem* io_page, - int vi_id, int rptr) +static void af_xdp_nic_wakeup_request( + struct efhw_nic* nic, volatile void __iomem* io_page, int vi_id, int rptr) { } -static void af_xdp_nic_sw_event(struct efhw_nic *nic, int data, int evq) +static void af_xdp_nic_sw_event(struct efhw_nic* nic, int data, int evq) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); } /*-------------------------------------------------------------------- @@ -1047,12 +955,12 @@ static void af_xdp_nic_sw_event(struct efhw_nic *nic, int data, int evq) * *--------------------------------------------------------------------*/ -static int -af_xdp_handle_event(struct efhw_nic *nic, efhw_event_t *ev, int budget) +static int af_xdp_handle_event( + struct efhw_nic* nic, efhw_event_t* ev, int budget) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - EFHW_ASSERT(0); - return -EOPNOTSUPP; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + EFHW_ASSERT(0); + return -EOPNOTSUPP; } @@ -1063,21 +971,19 @@ af_xdp_handle_event(struct efhw_nic *nic, efhw_event_t *ev, int budget) *---------------------------------------------------------------------------*/ -static int -af_xdp_tx_alt_alloc(struct efhw_nic *nic, int tx_q_id, int num_alt, - int num_32b_words, unsigned *cp_id_out, unsigned *alt_ids_out) +static int af_xdp_tx_alt_alloc(struct efhw_nic* nic, int tx_q_id, int num_alt, + int num_32b_words, unsigned* cp_id_out, unsigned* alt_ids_out) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return -EOPNOTSUPP; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return -EOPNOTSUPP; } -static int -af_xdp_tx_alt_free(struct efhw_nic *nic, int num_alt, unsigned cp_id, - const unsigned *alt_ids) +static int af_xdp_tx_alt_free( + struct efhw_nic* nic, int num_alt, unsigned cp_id, const unsigned* alt_ids) { - EFHW_ASSERT(0); - return -EOPNOTSUPP; + EFHW_ASSERT(0); + return -EOPNOTSUPP; } @@ -1088,9 +994,8 @@ af_xdp_tx_alt_free(struct efhw_nic *nic, int num_alt, unsigned cp_id, *---------------------------------------------------------------------------*/ -static int -af_xdp_dmaq_tx_q_init(struct efhw_nic *nic, uint32_t client_id, - struct efhw_dmaq_params *params) +static int af_xdp_dmaq_tx_q_init( + struct efhw_nic* nic, uint32_t client_id, struct efhw_dmaq_params* params) { struct efhw_af_xdp_vi* vi = vi_by_instance(nic, params->evq); if( vi == NULL ) @@ -1104,9 +1009,8 @@ af_xdp_dmaq_tx_q_init(struct efhw_nic *nic, uint32_t client_id, } -static int -af_xdp_dmaq_rx_q_init(struct efhw_nic *nic, uint32_t client_id, - struct efhw_dmaq_params *params) +static int af_xdp_dmaq_rx_q_init( + struct efhw_nic* nic, uint32_t client_id, struct efhw_dmaq_params* params) { struct efhw_af_xdp_vi* vi = vi_by_instance(nic, params->evq); if( vi == NULL ) @@ -1120,7 +1024,7 @@ af_xdp_dmaq_rx_q_init(struct efhw_nic *nic, uint32_t client_id, } -static size_t af_xdp_max_shared_rxqs(struct efhw_nic *nic) +static size_t af_xdp_max_shared_rxqs(struct efhw_nic* nic) { return 0; } @@ -1132,27 +1036,26 @@ static size_t af_xdp_max_shared_rxqs(struct efhw_nic *nic) *--------------------------------------------------------------------*/ -static int af_xdp_flush_tx_dma_channel(struct efhw_nic *nic, - uint32_t client_id, uint dmaq, uint evq) +static int af_xdp_flush_tx_dma_channel( + struct efhw_nic* nic, uint32_t client_id, uint dmaq, uint evq) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return -EOPNOTSUPP; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return -EOPNOTSUPP; } -static int af_xdp_flush_rx_dma_channel(struct efhw_nic *nic, - uint32_t client_id, uint dmaq) +static int af_xdp_flush_rx_dma_channel( + struct efhw_nic* nic, uint32_t client_id, uint dmaq) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return -EOPNOTSUPP; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return -EOPNOTSUPP; } -static int af_xdp_translate_dma_addrs(struct efhw_nic* nic, - const dma_addr_t *src, dma_addr_t *dst, - int n) +static int af_xdp_translate_dma_addrs( + struct efhw_nic* nic, const dma_addr_t* src, dma_addr_t* dst, int n) { - return -EOPNOTSUPP; + return -EOPNOTSUPP; } /*-------------------------------------------------------------------- @@ -1161,13 +1064,12 @@ static int af_xdp_translate_dma_addrs(struct efhw_nic* nic, * *--------------------------------------------------------------------*/ -static const int __af_xdp_nic_buffer_table_get_orders[] = {0,1,2,3,4,5,6,7,8,9,10}; +static const int __af_xdp_nic_buffer_table_get_orders[] = { 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10 }; -static int -af_xdp_nic_buffer_table_alloc(struct efhw_nic *nic, int owner, int order, - struct efhw_buffer_table_block **block_out, - int reset_pending) +static int af_xdp_nic_buffer_table_alloc(struct efhw_nic* nic, int owner, + int order, struct efhw_buffer_table_block** block_out, int reset_pending) { struct efhw_buffer_table_block* block; struct protection_domain* pd = pd_by_owner(nic, owner); @@ -1204,19 +1106,16 @@ af_xdp_nic_buffer_table_alloc(struct efhw_nic *nic, int owner, int order, } -static int -af_xdp_nic_buffer_table_realloc(struct efhw_nic *nic, int owner, int order, - struct efhw_buffer_table_block *block) +static int af_xdp_nic_buffer_table_realloc(struct efhw_nic* nic, int owner, + int order, struct efhw_buffer_table_block* block) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return -EOPNOTSUPP; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return -EOPNOTSUPP; } -static void -af_xdp_nic_buffer_table_free(struct efhw_nic *nic, - struct efhw_buffer_table_block *block, - int reset_pending) +static void af_xdp_nic_buffer_table_free(struct efhw_nic* nic, + struct efhw_buffer_table_block* block, int reset_pending) { int owner = block->btb_hw.ef10.handle >> 8; kfree(block); @@ -1224,11 +1123,9 @@ af_xdp_nic_buffer_table_free(struct efhw_nic *nic, } -static int -af_xdp_nic_buffer_table_set(struct efhw_nic *nic, - struct efhw_buffer_table_block *block, - int first_entry, int n_entries, - dma_addr_t *dma_addrs) +static int af_xdp_nic_buffer_table_set(struct efhw_nic* nic, + struct efhw_buffer_table_block* block, int first_entry, int n_entries, + dma_addr_t* dma_addrs) { int i, j, owner, order; long page; @@ -1242,26 +1139,27 @@ af_xdp_nic_buffer_table_set(struct efhw_nic *nic, /* We are mapping between two address types. * - * block->btb_vaddr stores the byte offset within the umem block, suitable for - * use with AF_XDP descriptor queues. This is eventually used to provide the - * "user" addresses returned from efrm_pd_dma_map, which in turn provide the - * packet "dma" addresses posted to ef_vi, which are passed on to AF_XDP. + * block->btb_vaddr stores the byte offset within the umem block, suitable + * for use with AF_XDP descriptor queues. This is eventually used to provide + * the "user" addresses returned from efrm_pd_dma_map, which in turn provide + * the packet "dma" addresses posted to ef_vi, which are passed on to AF_XDP. * (Note: "user" and "dma" don't mean userland and DMA in this context). * - * dma_addr is the corresponding kernel address, which we use to calculate the - * addresses to store in vi->addrs, and later map into userland. This comes - * from the "dma" (or "pci") addresses obtained by efrm_pd_dma_map which, for - * a non-PCI device, are copied from the provided kernel addresses. - * (Note: "dma" and "pci" don't mean DMA and PCI in this context either). + * dma_addr is the corresponding kernel address, which we use to calculate + * the addresses to store in vi->addrs, and later map into userland. This + * comes from the "dma" (or "pci") addresses obtained by efrm_pd_dma_map + * which, for a non-PCI device, are copied from the provided kernel + * addresses. (Note: "dma" and "pci" don't mean DMA and PCI in this context + * either). * * We get one umem address giving the start of each buffer table block. The * block might contain several consecutive pages, which might be compound * (but all with the same order). * - * We store one kernel address for each single page in the umem block. This is - * somewhat profligate with memory; we could store one per buffer table block, - * or one per compound page, with a slightly more complicated lookup when - * finding each page during mmap. + * We store one kernel address for each single page in the umem block. This + * is somewhat profligate with memory; we could store one per buffer table + * block, or one per compound page, with a slightly more complicated lookup + * when finding each page during mmap. */ page = (block->btb_vaddr >> PAGE_SHIFT) + (first_entry << order); @@ -1269,7 +1167,7 @@ af_xdp_nic_buffer_table_set(struct efhw_nic *nic, return -EINVAL; for( i = 0; i < n_entries; ++i ) { - char* dma_addr = (char*)dma_addrs[i]; + char* dma_addr = (char*) dma_addrs[i]; for( j = 0; j < (1 << order); ++j, ++page, dma_addr += PAGE_SIZE ) umem_pages_set_addr(&pd->umem, page, dma_addr); } @@ -1278,10 +1176,8 @@ af_xdp_nic_buffer_table_set(struct efhw_nic *nic, } -static void -af_xdp_nic_buffer_table_clear(struct efhw_nic *nic, - struct efhw_buffer_table_block *block, - int first_entry, int n_entries) +static void af_xdp_nic_buffer_table_clear(struct efhw_nic* nic, + struct efhw_buffer_table_block* block, int first_entry, int n_entries) { } @@ -1292,21 +1188,19 @@ af_xdp_nic_buffer_table_clear(struct efhw_nic *nic, * *--------------------------------------------------------------------*/ -static int -af_xdp_nic_set_tx_port_sniff(struct efhw_nic *nic, int instance, int enable, - int rss_context) +static int af_xdp_nic_set_tx_port_sniff( + struct efhw_nic* nic, int instance, int enable, int rss_context) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return -EOPNOTSUPP; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return -EOPNOTSUPP; } -static int -af_xdp_nic_set_port_sniff(struct efhw_nic *nic, int instance, int enable, - int promiscuous, int rss_context) +static int af_xdp_nic_set_port_sniff(struct efhw_nic* nic, int instance, + int enable, int promiscuous, int rss_context) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return -EOPNOTSUPP; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return -EOPNOTSUPP; } /*-------------------------------------------------------------------- @@ -1315,12 +1209,11 @@ af_xdp_nic_set_port_sniff(struct efhw_nic *nic, int instance, int enable, * *--------------------------------------------------------------------*/ -static int -af_xdp_get_rx_error_stats(struct efhw_nic *nic, int instance, - void *data, int data_len, int do_reset) +static int af_xdp_get_rx_error_stats( + struct efhw_nic* nic, int instance, void* data, int data_len, int do_reset) { - EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); - return -EOPNOTSUPP; + EFHW_ERR("%s: FIXME AF_XDP", __FUNCTION__); + return -EOPNOTSUPP; } /*-------------------------------------------------------------------- @@ -1329,24 +1222,23 @@ af_xdp_get_rx_error_stats(struct efhw_nic *nic, int instance, * *--------------------------------------------------------------------*/ -static int -af_xdp_client_alloc(struct efhw_nic *nic, uint32_t parent, uint32_t *id) +static int af_xdp_client_alloc( + struct efhw_nic* nic, uint32_t parent, uint32_t* id) { - return -ENOSYS; + return -ENOSYS; } -static int -af_xdp_client_free(struct efhw_nic *nic, uint32_t id) +static int af_xdp_client_free(struct efhw_nic* nic, uint32_t id) { - return -ENOSYS; + return -ENOSYS; } -static int -af_xdp_vi_set_user(struct efhw_nic *nic, uint32_t vi_instance, uint32_t user) +static int af_xdp_vi_set_user( + struct efhw_nic* nic, uint32_t vi_instance, uint32_t user) { - return -ENOSYS; + return -ENOSYS; } /*-------------------------------------------------------------------- @@ -1354,172 +1246,164 @@ af_xdp_vi_set_user(struct efhw_nic *nic, uint32_t vi_instance, uint32_t user) * Filtering * *--------------------------------------------------------------------*/ -static int -af_xdp_rss_alloc(struct efhw_nic *nic, const u32 *indir, const u8 *key, - u32 nic_rss_flags, int num_qs, u32 *rss_context_out) -{ - return -ENOSYS; -} - -static int -af_xdp_rss_update(struct efhw_nic *nic, const u32 *indir, const u8 *key, - u32 nic_rss_flags, u32 rss_context) -{ - return -ENOSYS; -} - -static int -af_xdp_rss_free(struct efhw_nic *nic, u32 rss_context) -{ - return -ENOSYS; -} - -static int -af_xdp_rss_flags(struct efhw_nic *nic, u32 *flags_out) -{ - return -ENOSYS; -} - -static int af_xdp_efx_spec_to_ethtool_flow(struct efx_filter_spec* efx_spec, - struct ethtool_rx_flow_spec* fs) -{ - /* In order to support different driver capabilities we need to - * always install the same filter type. This means that we will - * always use a 3-tuple IP filter, even if a 5-tuple was requested. - * Although this can in theory match traffic not destined for us, in - * practice common usage means that it's sufficiently specific. - * - * The ethtool interface does not complain if a duplicate filter is - * inserted, and does not reference count such filters. That causes - * issues for the case where onload tries to replace a wild match - * filter with a full match filter, as it will add the new full match - * before removing the original wild. However, we treat both of these - * as the same 3-tuple and so the net result is that we remove the - * filter entirely. This occurs in two circumstances: - * - closing a listening socket with accepted sockets still open - * - connecting an already bound UDP socket - * We can avoid the first by setting oof_shared_keep_thresh=0 when - * using AF_XDP. - * The second is a rare case, and the failure mode here is to fall - * back to traffic via the kernel, so I'm living with it for now. - */ - - int rc = efx_spec_to_ethtool_flow(efx_spec, fs); - if (rc < 0) - return rc; - - switch (fs->flow_type) { - case UDP_V4_FLOW: - if (fs->m_u.udp_ip4_spec.tos) - return -EOPNOTSUPP; - fs->h_u.udp_ip4_spec.ip4src = 0; - fs->h_u.udp_ip4_spec.psrc = 0; - fs->m_u.udp_ip4_spec.ip4src = 0; - fs->m_u.udp_ip4_spec.psrc = 0; - break; - case TCP_V4_FLOW: - if (fs->m_u.tcp_ip4_spec.tos) - return -EOPNOTSUPP; - fs->h_u.tcp_ip4_spec.ip4src = 0; - fs->h_u.tcp_ip4_spec.psrc = 0; - fs->m_u.tcp_ip4_spec.ip4src = 0; - fs->m_u.tcp_ip4_spec.psrc = 0; - break; - default: - /* FIXME AF_XDP need to check whether we can install both IPv6 - * and IPv4 filters. For now just support IPv4. - */ - return -EOPNOTSUPP; - } - - /* TODO AF_XDP: for now assume dmaq_id matches NIC channel - * based on insight into efhw/af_xdp.c */ - fs->ring_cookie = efx_spec->dmaq_id; - - return 0; -} - -static int -af_xdp_filter_insert(struct efhw_nic *nic, struct efx_filter_spec *spec, - int *rxq, const struct cpumask *mask, unsigned flags) -{ - struct net_device *dev = nic->net_dev; - int rc; - struct ethtool_rxnfc info; - const struct ethtool_ops *ops; - struct cmd_context ctx; - - if (!enable_af_xdp_flow_filters) - return AF_XDP_NO_FILTER_MAGIC_ID; /* pretend a filter is installed */ - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_SRXCLSRLINS; - rc = af_xdp_efx_spec_to_ethtool_flow(spec, &info.fs); - if ( rc < 0 ) - return rc; - - rtnl_lock(); - - ops = dev->ethtool_ops; - if (!ops->set_rxnfc) { - rc = -EOPNOTSUPP; - goto unlock_out; - } - - ctx.netdev = dev; - rc = rmgr_set_location(&ctx, &info.fs); - if ( rc < 0 ) - goto unlock_out; - - rc = ops->set_rxnfc(dev, &info); - if ( rc >= 0 ) - rc = info.fs.location; +static int af_xdp_rss_alloc(struct efhw_nic* nic, const u32* indir, + const u8* key, u32 nic_rss_flags, int num_qs, u32* rss_context_out) +{ + return -ENOSYS; +} + +static int af_xdp_rss_update(struct efhw_nic* nic, const u32* indir, + const u8* key, u32 nic_rss_flags, u32 rss_context) +{ + return -ENOSYS; +} + +static int af_xdp_rss_free(struct efhw_nic* nic, u32 rss_context) +{ + return -ENOSYS; +} + +static int af_xdp_rss_flags(struct efhw_nic* nic, u32* flags_out) +{ + return -ENOSYS; +} + +static int af_xdp_efx_spec_to_ethtool_flow( + struct efx_filter_spec* efx_spec, struct ethtool_rx_flow_spec* fs) +{ + /* In order to support different driver capabilities we need to + * always install the same filter type. This means that we will + * always use a 3-tuple IP filter, even if a 5-tuple was requested. + * Although this can in theory match traffic not destined for us, in + * practice common usage means that it's sufficiently specific. + * + * The ethtool interface does not complain if a duplicate filter is + * inserted, and does not reference count such filters. That causes + * issues for the case where onload tries to replace a wild match + * filter with a full match filter, as it will add the new full match + * before removing the original wild. However, we treat both of these + * as the same 3-tuple and so the net result is that we remove the + * filter entirely. This occurs in two circumstances: + * - closing a listening socket with accepted sockets still open + * - connecting an already bound UDP socket + * We can avoid the first by setting oof_shared_keep_thresh=0 when + * using AF_XDP. + * The second is a rare case, and the failure mode here is to fall + * back to traffic via the kernel, so I'm living with it for now. + */ + + int rc = efx_spec_to_ethtool_flow(efx_spec, fs); + if( rc < 0 ) + return rc; + + switch( fs->flow_type ) { + case UDP_V4_FLOW: + if( fs->m_u.udp_ip4_spec.tos ) + return -EOPNOTSUPP; + fs->h_u.udp_ip4_spec.ip4src = 0; + fs->h_u.udp_ip4_spec.psrc = 0; + fs->m_u.udp_ip4_spec.ip4src = 0; + fs->m_u.udp_ip4_spec.psrc = 0; + break; + case TCP_V4_FLOW: + if( fs->m_u.tcp_ip4_spec.tos ) + return -EOPNOTSUPP; + fs->h_u.tcp_ip4_spec.ip4src = 0; + fs->h_u.tcp_ip4_spec.psrc = 0; + fs->m_u.tcp_ip4_spec.ip4src = 0; + fs->m_u.tcp_ip4_spec.psrc = 0; + break; + default: + /* FIXME AF_XDP need to check whether we can install both IPv6 + * and IPv4 filters. For now just support IPv4. + */ + return -EOPNOTSUPP; + } + + /* TODO AF_XDP: for now assume dmaq_id matches NIC channel + * based on insight into efhw/af_xdp.c */ + fs->ring_cookie = efx_spec->dmaq_id; + + return 0; +} + +static int af_xdp_filter_insert(struct efhw_nic* nic, + struct efx_filter_spec* spec, int* rxq, const struct cpumask* mask, + unsigned flags) +{ + struct net_device* dev = nic->net_dev; + int rc; + struct ethtool_rxnfc info; + const struct ethtool_ops* ops; + struct cmd_context ctx; + + if( ! enable_af_xdp_flow_filters ) + return AF_XDP_NO_FILTER_MAGIC_ID; /* pretend a filter is installed */ + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_SRXCLSRLINS; + rc = af_xdp_efx_spec_to_ethtool_flow(spec, &info.fs); + if( rc < 0 ) + return rc; + + rtnl_lock(); + + ops = dev->ethtool_ops; + if( ! ops->set_rxnfc ) { + rc = -EOPNOTSUPP; + goto unlock_out; + } + + ctx.netdev = dev; + rc = rmgr_set_location(&ctx, &info.fs); + if( rc < 0 ) + goto unlock_out; + + rc = ops->set_rxnfc(dev, &info); + if( rc >= 0 ) + rc = info.fs.location; unlock_out: - rtnl_unlock(); - return rc; + rtnl_unlock(); + return rc; } -static void -af_xdp_filter_remove(struct efhw_nic *nic, int filter_id) +static void af_xdp_filter_remove(struct efhw_nic* nic, int filter_id) { - struct net_device *dev = nic->net_dev; - struct ethtool_rxnfc info; - const struct ethtool_ops *ops; + struct net_device* dev = nic->net_dev; + struct ethtool_rxnfc info; + const struct ethtool_ops* ops; - if (filter_id == AF_XDP_NO_FILTER_MAGIC_ID) - return; + if( filter_id == AF_XDP_NO_FILTER_MAGIC_ID ) + return; - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_SRXCLSRLDEL; - info.fs.location = filter_id; + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_SRXCLSRLDEL; + info.fs.location = filter_id; - rtnl_lock(); - ops = dev->ethtool_ops; - if (ops->set_rxnfc) - ops->set_rxnfc(dev, &info); - rtnl_unlock(); + rtnl_lock(); + ops = dev->ethtool_ops; + if( ops->set_rxnfc ) + ops->set_rxnfc(dev, &info); + rtnl_unlock(); } -static int -af_xdp_filter_redirect(struct efhw_nic *nic, int filter_id, - struct efx_filter_spec *spec) +static int af_xdp_filter_redirect( + struct efhw_nic* nic, int filter_id, struct efx_filter_spec* spec) { - /* This error code is proxied by efrm_filter_redirect() and goes to - * oo_hw_filter_set_hwport(). Do not change this value without - * looking in there. */ - return -ENODEV; + /* This error code is proxied by efrm_filter_redirect() and goes to + * oo_hw_filter_set_hwport(). Do not change this value without + * looking in there. */ + return -ENODEV; } -static int -af_xdp_multicast_block(struct efhw_nic *nic, bool block) +static int af_xdp_multicast_block(struct efhw_nic* nic, bool block) { - return -ENOSYS; + return -ENOSYS; } -static int -af_xdp_unicast_block(struct efhw_nic *nic, bool block) +static int af_xdp_unicast_block(struct efhw_nic* nic, bool block) { - return -ENOSYS; + return -ENOSYS; } /*-------------------------------------------------------------------- @@ -1527,16 +1411,15 @@ af_xdp_unicast_block(struct efhw_nic *nic, bool block) * vports * *--------------------------------------------------------------------*/ -static int -af_xdp_vport_alloc(struct efhw_nic *nic, u16 vlan_id, u16 *vport_handle_out) +static int af_xdp_vport_alloc( + struct efhw_nic* nic, u16 vlan_id, u16* vport_handle_out) { - return -ENOSYS; + return -ENOSYS; } -static int -af_xdp_vport_free(struct efhw_nic *nic, u16 vport_handle) +static int af_xdp_vport_free(struct efhw_nic* nic, u16 vport_handle) { - return -ENOSYS; + return -ENOSYS; } /*-------------------------------------------------------------------- @@ -1544,25 +1427,22 @@ af_xdp_vport_free(struct efhw_nic *nic, u16 vport_handle) * Device * *--------------------------------------------------------------------*/ -static struct pci_dev* -af_xdp_get_pci_dev(struct efhw_nic *nic) +static struct pci_dev* af_xdp_get_pci_dev(struct efhw_nic* nic) { - return NULL; + return NULL; } -static int -af_xdp_vi_io_region(struct efhw_nic *nic, int instance, size_t* size_out, - resource_size_t* addr_out) +static int af_xdp_vi_io_region(struct efhw_nic* nic, int instance, + size_t* size_out, resource_size_t* addr_out) { - *size_out = 0; - return 0; + *size_out = 0; + return 0; } -static int -af_xdp_inject_reset_ev(struct efhw_nic* nic, void* base, unsigned capacity, - const volatile uint32_t* evq_ptr) +static int af_xdp_inject_reset_ev(struct efhw_nic* nic, void* base, + unsigned capacity, const volatile uint32_t* evq_ptr) { - return -EOPNOTSUPP; + return -EOPNOTSUPP; } /*-------------------------------------------------------------------- @@ -1570,10 +1450,10 @@ af_xdp_inject_reset_ev(struct efhw_nic* nic, void* base, unsigned capacity, * CTPIO * *--------------------------------------------------------------------*/ -static int -af_xdp_ctpio_addr(struct efhw_nic* nic, int instance, resource_size_t* addr) +static int af_xdp_ctpio_addr( + struct efhw_nic* nic, int instance, resource_size_t* addr) { - return -ENOSYS; + return -ENOSYS; } /*-------------------------------------------------------------------- @@ -1583,58 +1463,58 @@ af_xdp_ctpio_addr(struct efhw_nic* nic, int instance, resource_size_t* addr) *--------------------------------------------------------------------*/ struct efhw_func_ops af_xdp_char_functional_units = { - af_xdp_nic_init_hardware, - af_xdp_nic_tweak_hardware, - af_xdp_nic_release_hardware, - af_xdp_nic_event_queue_enable, - af_xdp_nic_event_queue_disable, - af_xdp_nic_wakeup_request, - af_xdp_nic_sw_event, - af_xdp_handle_event, - af_xdp_dmaq_tx_q_init, - af_xdp_dmaq_rx_q_init, - af_xdp_flush_tx_dma_channel, - af_xdp_flush_rx_dma_channel, - af_xdp_translate_dma_addrs, - __af_xdp_nic_buffer_table_get_orders, - sizeof(__af_xdp_nic_buffer_table_get_orders) / - sizeof(__af_xdp_nic_buffer_table_get_orders[0]), - af_xdp_nic_buffer_table_alloc, - af_xdp_nic_buffer_table_realloc, - af_xdp_nic_buffer_table_free, - af_xdp_nic_buffer_table_set, - af_xdp_nic_buffer_table_clear, - af_xdp_nic_set_port_sniff, - af_xdp_nic_set_tx_port_sniff, - af_xdp_nic_license_challenge, - af_xdp_nic_license_check, - af_xdp_nic_v3_license_challenge, - af_xdp_nic_v3_license_check, - af_xdp_get_rx_error_stats, - af_xdp_tx_alt_alloc, - af_xdp_tx_alt_free, - af_xdp_client_alloc, - af_xdp_client_free, - af_xdp_vi_set_user, - af_xdp_rss_alloc, - af_xdp_rss_update, - af_xdp_rss_free, - af_xdp_rss_flags, - af_xdp_filter_insert, - af_xdp_filter_remove, - af_xdp_filter_redirect, - af_xdp_multicast_block, - af_xdp_unicast_block, - af_xdp_vport_alloc, - af_xdp_vport_free, - af_xdp_dmaq_kick, - af_xdp_mem, - af_xdp_init, - af_xdp_get_pci_dev, - af_xdp_vi_io_region, - af_xdp_inject_reset_ev, - af_xdp_ctpio_addr, - af_xdp_max_shared_rxqs, + af_xdp_nic_init_hardware, + af_xdp_nic_tweak_hardware, + af_xdp_nic_release_hardware, + af_xdp_nic_event_queue_enable, + af_xdp_nic_event_queue_disable, + af_xdp_nic_wakeup_request, + af_xdp_nic_sw_event, + af_xdp_handle_event, + af_xdp_dmaq_tx_q_init, + af_xdp_dmaq_rx_q_init, + af_xdp_flush_tx_dma_channel, + af_xdp_flush_rx_dma_channel, + af_xdp_translate_dma_addrs, + __af_xdp_nic_buffer_table_get_orders, + sizeof(__af_xdp_nic_buffer_table_get_orders) / + sizeof(__af_xdp_nic_buffer_table_get_orders[0]), + af_xdp_nic_buffer_table_alloc, + af_xdp_nic_buffer_table_realloc, + af_xdp_nic_buffer_table_free, + af_xdp_nic_buffer_table_set, + af_xdp_nic_buffer_table_clear, + af_xdp_nic_set_port_sniff, + af_xdp_nic_set_tx_port_sniff, + af_xdp_nic_license_challenge, + af_xdp_nic_license_check, + af_xdp_nic_v3_license_challenge, + af_xdp_nic_v3_license_check, + af_xdp_get_rx_error_stats, + af_xdp_tx_alt_alloc, + af_xdp_tx_alt_free, + af_xdp_client_alloc, + af_xdp_client_free, + af_xdp_vi_set_user, + af_xdp_rss_alloc, + af_xdp_rss_update, + af_xdp_rss_free, + af_xdp_rss_flags, + af_xdp_filter_insert, + af_xdp_filter_remove, + af_xdp_filter_redirect, + af_xdp_multicast_block, + af_xdp_unicast_block, + af_xdp_vport_alloc, + af_xdp_vport_free, + af_xdp_dmaq_kick, + af_xdp_mem, + af_xdp_init, + af_xdp_get_pci_dev, + af_xdp_vi_io_region, + af_xdp_inject_reset_ev, + af_xdp_ctpio_addr, + af_xdp_max_shared_rxqs, }; #endif /* EFHW_HAS_AF_XDP */ diff --git a/src/lib/efthrm/tcp_helper_endpoint.c b/src/lib/efthrm/tcp_helper_endpoint.c index 8034db6df..cb38c7362 100644 --- a/src/lib/efthrm/tcp_helper_endpoint.c +++ b/src/lib/efthrm/tcp_helper_endpoint.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2004-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author Andrew Rybchenko ** \brief Kernel-private endpoints routines @@ -29,10 +29,8 @@ \**************************************************************************/ /* See description in include/driver/efab/tcp_helper_endpoint.h */ -void -tcp_helper_endpoint_ctor(tcp_helper_endpoint_t *ep, - tcp_helper_resource_t * thr, - int id) +void tcp_helper_endpoint_ctor( + tcp_helper_endpoint_t* ep, tcp_helper_resource_t* thr, int id) { int i; @@ -74,31 +72,31 @@ tcp_helper_endpoint_ctor(tcp_helper_endpoint_t *ep, /*--------------------------------------------------------------------*/ -static void -clear_plugin_state(tcp_helper_endpoint_t * ep) +static void clear_plugin_state(tcp_helper_endpoint_t* ep) { #if CI_CFG_TCP_OFFLOAD_RECYCLER ci_netif* ni = &ep->thr->netif; int intf_i; - ci_assert( ! in_atomic() ); - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + ci_assert(! in_atomic()); + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { struct xsn_ceph_destroy_stream param = {}; int rc; ci_uint32 conn_id; /* This function can be called from tcp_helper_endpoint_clear_filters() * without the stack lock */ - conn_id = ci_xchg32(&ep->plugin_stream_id[intf_i], - INVALID_PLUGIN_HANDLE); + conn_id = ci_xchg32(&ep->plugin_stream_id[intf_i], INVALID_PLUGIN_HANDLE); if( conn_id == INVALID_PLUGIN_HANDLE ) continue; - param.in_conn_id = cpu_to_le32(conn_id);; - rc = efrm_ext_msg(ni->nic_hw[intf_i].plugin_rx, - XSN_CEPH_DESTROY_STREAM, ¶m, sizeof(param)); + param.in_conn_id = cpu_to_le32(conn_id); + ; + rc = efrm_ext_msg(ni->nic_hw[intf_i].plugin_rx, XSN_CEPH_DESTROY_STREAM, + ¶m, sizeof(param)); if( rc ) - OO_DEBUG_ERR(ci_log("%s: ERROR: Destroy Ceph stream failed (%d)", - __FUNCTION__, rc)); + OO_DEBUG_ERR(ci_log( + "%s: ERROR: Destroy Ceph stream failed (%d)", __FUNCTION__, rc)); } #endif } @@ -112,8 +110,7 @@ clear_plugin_state(tcp_helper_endpoint_t * ep) #endif /* See description in include/onload/tcp_helper_endpoint.h */ -void -tcp_helper_endpoint_dtor(tcp_helper_endpoint_t * ep) +void tcp_helper_endpoint_dtor(tcp_helper_endpoint_t* ep) { unsigned long lock_flags; #ifndef BREAK_SCALABLE_FILTERS @@ -133,20 +130,20 @@ tcp_helper_endpoint_dtor(tcp_helper_endpoint_t * ep) clear_plugin_state(ep); #ifndef BREAK_SCALABLE_FILTERS if( s->s_flags & CI_SOCK_FLAG_STACK_FILTER ) - ci_tcp_sock_clear_stack_filter(&ep->thr->netif, - SP_TO_TCP(&ep->thr->netif, ep->id)); + ci_tcp_sock_clear_stack_filter( + &ep->thr->netif, SP_TO_TCP(&ep->thr->netif, ep->id)); #endif oof_socket_del(oo_filter_ns_to_manager(ep->thr->filter_ns), &ep->oofilter); - oof_socket_mcast_del_all(oo_filter_ns_to_manager(ep->thr->filter_ns), - &ep->oofilter); + oof_socket_mcast_del_all( + oo_filter_ns_to_manager(ep->thr->filter_ns), &ep->oofilter); oof_socket_dtor(&ep->oofilter); spin_lock_irqsave(&ep->lock, lock_flags); if( ep->os_socket != NULL ) { if( ID_TO_WAITABLE_OBJ(&ep->thr->netif, ep->id)->waitable.state != CI_TCP_STATE_ACTIVE_WILD ) { - OO_DEBUG_ERR(ci_log(FEP_FMT "ERROR: O/S socket still referenced", - FEP_PRI_ARGS(ep))); + OO_DEBUG_ERR(ci_log( + FEP_FMT "ERROR: O/S socket still referenced", FEP_PRI_ARGS(ep))); } files_to_drop[num_files_to_drop++] = ep->os_socket; ep->os_socket = NULL; @@ -161,8 +158,8 @@ tcp_helper_endpoint_dtor(tcp_helper_endpoint_t * ep) fput(files_to_drop[i]); if( ep->alien_ref != NULL ) { - OO_DEBUG_ERR(ci_log(FEP_FMT "ERROR: alien socket still referenced", - FEP_PRI_ARGS(ep))); + OO_DEBUG_ERR(ci_log( + FEP_FMT "ERROR: alien socket still referenced", FEP_PRI_ARGS(ep))); fput(ep->alien_ref->_filp); ep->alien_ref = NULL; } @@ -176,17 +173,16 @@ tcp_helper_endpoint_dtor(tcp_helper_endpoint_t * ep) #if CI_CFG_ENDPOINT_MOVE -static int -tcp_helper_endpoint_reuseaddr_cleanup(ci_netif* ni, ci_sock_cmn* s) +static int tcp_helper_endpoint_reuseaddr_cleanup(ci_netif* ni, ci_sock_cmn* s) { int i; if( (~s->b.state & CI_TCP_STATE_TCP) || s->b.state == CI_TCP_LISTEN ) return 0; - for( i = 0; i < (int)ni->state->n_ep_bufs; ++i ) { + for( i = 0; i < (int) ni->state->n_ep_bufs; ++i ) { citp_waitable_obj* wo = ID_TO_WAITABLE_OBJ(ni, i); - + if( wo->waitable.state != CI_TCP_TIME_WAIT ) continue; @@ -231,8 +227,8 @@ tcp_helper_endpoint_reuseaddr_cleanup(ci_netif* ni, ci_sock_cmn* s) * lIP/lp rIP/rp from_tcp_id=n TCP connection passively opened * (use filter from this TCP ep) * aIP/ap rIP/rp s_flags & TPROXY - * && phys_port=n TCP connection using transparent - * shared filter + * && phys_port=n TCP connection using + *transparent shared filter * * *--------------------------------------------------------------------*/ @@ -242,13 +238,12 @@ tcp_helper_endpoint_reuseaddr_cleanup(ci_netif* ni, ci_sock_cmn* s) * normally scheduled to be done asnychronously by tcp_helper_do_non_atomic() * in workqueue context. * Flushing is required before setting new filter. */ -static int -tcp_helper_flush_clear_filters(tcp_helper_endpoint_t* ep) +static int tcp_helper_flush_clear_filters(tcp_helper_endpoint_t* ep) { #if ! CI_CFG_UL_INTERRUPT_HELPER /* Avoid racing with tcp_helper_do_non_atomic(). */ unsigned ep_aflags; - ci_assert( ci_netif_is_locked(&ep->thr->netif) ); + ci_assert(ci_netif_is_locked(&ep->thr->netif)); again: if( (ep_aflags = ep->ep_aflags) & OO_THR_EP_AFLAG_NON_ATOMIC ) { if( in_atomic() ) @@ -258,29 +253,28 @@ tcp_helper_flush_clear_filters(tcp_helper_endpoint_t* ep) * retransmission. */ return -EAGAIN; /* do not expect this endpoint to be going to be freed */ - ci_assert(!(ep_aflags & OO_THR_EP_AFLAG_NEED_FREE)); + ci_assert(! (ep_aflags & OO_THR_EP_AFLAG_NEED_FREE)); if( (ep_aflags = ep->ep_aflags) & OO_THR_EP_AFLAG_CLEAR_FILTERS ) { /* let us try to steal the flag, so we can do the operation ourselves */ if( ci_cas32_fail(&ep->ep_aflags, ep_aflags, - ep_aflags & ~ OO_THR_EP_AFLAG_CLEAR_FILTERS) ) + ep_aflags & ~OO_THR_EP_AFLAG_CLEAR_FILTERS) ) goto again; /* we have stolen the flag, clearing the filters */ tcp_helper_endpoint_clear_filters(ep, 0); return 0; } - /* Looks we clashed with the tcp_helper_do_non_atomic() while it is running, - * let us wait till it finishes */ + /* Looks we clashed with the tcp_helper_do_non_atomic() while it is + * running, let us wait till it finishes */ flush_work(&ep->thr->non_atomic_work); - ci_assert(!(ep->ep_aflags & OO_THR_EP_AFLAG_NON_ATOMIC)); + ci_assert(! (ep->ep_aflags & OO_THR_EP_AFLAG_NON_ATOMIC)); } #endif return 0; } -static int -ci_tcp_use_mac_filter(ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex, - oo_sp from_tcp_id) +static int ci_tcp_use_mac_filter( + ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex, oo_sp from_tcp_id) { int use_mac_filter = 0; int mode; @@ -300,8 +294,9 @@ ci_tcp_use_mac_filter(ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex, /* Passively opened sockets accepted from a listener using a MAC filter * also use the MAC filter. */ - use_mac_filter |= OO_SP_NOT_NULL(from_tcp_id) && - (SP_TO_SOCK(ni, from_tcp_id)->s_flags & CI_SOCK_FLAG_STACK_FILTER); + use_mac_filter |= + OO_SP_NOT_NULL(from_tcp_id) && + (SP_TO_SOCK(ni, from_tcp_id)->s_flags & CI_SOCK_FLAG_STACK_FILTER); #ifndef BREAK_SCALABLE_FILTERS if( (use_mac_filter == 0) && (s->b.state == CI_TCP_LISTEN) && @@ -319,9 +314,8 @@ ci_tcp_use_mac_filter(ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex, } -int -tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, - ci_ifid_t bindto_ifindex, oo_sp from_tcp_id) +int tcp_helper_endpoint_set_filters( + tcp_helper_endpoint_t* ep, ci_ifid_t bindto_ifindex, oo_sp from_tcp_id) { struct file* os_sock_ref; ci_netif* ni = &ep->thr->netif; @@ -337,9 +331,9 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, ! CI_IPX_ADDR_IS_ANY(sock_raddr(s)); #endif - OO_DEBUG_TCPH(ci_log("%s: [%d:%d] bindto_ifindex=%d from_tcp_id=%d", - __FUNCTION__, ep->thr->id, - OO_SP_FMT(ep->id), bindto_ifindex, from_tcp_id)); + OO_DEBUG_TCPH( + ci_log("%s: [%d:%d] bindto_ifindex=%d from_tcp_id=%d", __FUNCTION__, + ep->thr->id, OO_SP_FMT(ep->id), bindto_ifindex, from_tcp_id)); /* Make sure the endpoint is not subject to pending async filter operations. * @@ -349,12 +343,12 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, * Before proceeding with setting the filter a pending filter clear * operation needs to be flushed. */ rc = tcp_helper_flush_clear_filters(ep); - if(CI_UNLIKELY( rc < 0 )) + if( CI_UNLIKELY(rc < 0) ) return rc; /* The lock is needed for assertions with CI_NETIF_FLAG_IN_DL_CONTEXT * flag only. */ - ci_assert( ci_netif_is_locked(&ep->thr->netif) ); + ci_assert(ci_netif_is_locked(&ep->thr->netif)); #if CI_CFG_FD_CACHING /* The special cases that allow active-wild sharers to be cacheable depend on @@ -379,14 +373,11 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, */ spin_lock_irqsave(&ep->lock, lock_flags); if( OO_SP_NOT_NULL(from_tcp_id) && - ! ( use_mac_filter && - NI_OPTS(ni).scalable_listen == - CITP_SCALABLE_LISTEN_ACCELERATED_ONLY ) ) { - + ! (use_mac_filter && NI_OPTS(ni).scalable_listen == + CITP_SCALABLE_LISTEN_ACCELERATED_ONLY) ) { listen_ep = ci_trs_get_valid_ep(ep->thr, from_tcp_id); os_sock_ref = listen_ep->os_socket; - } - else { + } else { os_sock_ref = ep->os_socket; } if( os_sock_ref != NULL ) @@ -406,8 +397,8 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, * However, the user can call OO_IOC_EP_FILTER_SET for any endpoint, * and we should not crash (at least in NDEBUG build). */ ci_assert(ep->os_port_keeper); - ci_assert( ! in_atomic() ); - ci_assert( ~ep->thr->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT ); + ci_assert(! in_atomic()); + ci_assert(~ep->thr->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT); ci_assert_equal(protocol, IPPROTO_UDP); /* Closing a listening socket without being able to get the stack @@ -419,41 +410,40 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, if( os_sock_ref != NULL ) { fput(os_sock_ref); os_sock_ref = NULL; - } - else { - OO_DEBUG_ERR(ci_log( - "ERROR: %s is changing the socket [%d:%d] filter to " - "%s " IPX_PORT_FMT " -> " IPX_PORT_FMT ", " - "the filter already exists and there is no backing socket. " - "Something went awry.", - __func__, ep->thr->id, OO_SP_FMT(ep->id), - protocol == IPPROTO_UDP ? "UDP" : "TCP", - IPX_ARG(AF_IP(laddr)), lport, IPX_ARG(AF_IP(raddr)), rport)); + } else { + OO_DEBUG_ERR( + ci_log("ERROR: %s is changing the socket [%d:%d] filter to " + "%s " IPX_PORT_FMT " -> " IPX_PORT_FMT ", " + "the filter already exists and there is no backing socket. " + "Something went awry.", + __func__, ep->thr->id, OO_SP_FMT(ep->id), + protocol == IPPROTO_UDP ? "UDP" : "TCP", IPX_ARG(AF_IP(laddr)), + lport, IPX_ARG(AF_IP(raddr)), rport)); ci_assert(0); } - if( protocol == IPPROTO_UDP && !CI_IPX_ADDR_IS_ANY(raddr) && + if( protocol == IPPROTO_UDP && ! CI_IPX_ADDR_IS_ANY(raddr) && CI_IPX_ADDR_IS_ANY(ep->oofilter.sf_raddr) ) { return oof_udp_connect(oo_filter_ns_to_manager(ep->thr->filter_ns), - &ep->oofilter, af_space, laddr, raddr, rport); + &ep->oofilter, af_space, laddr, raddr, rport); } if( protocol != IPPROTO_UDP ) { /* UDP re-connect is OK, but we do not expect anything else. * We've already crashed in DEBUG, but let's complain in NDEBUG. */ - OO_DEBUG_ERR(ci_log( - "ERROR: %s is changing the socket [%d:%d] filter to " - "%s " IPX_PORT_FMT" -> " IPX_PORT_FMT ", " - "but some filter is already installed. Something went awry.", - __func__, ep->thr->id, OO_SP_FMT(ep->id), - protocol == IPPROTO_UDP ? "UDP" : "TCP", - IPX_ARG(AF_IP(laddr)), lport, IPX_ARG(AF_IP(raddr)), rport)); + OO_DEBUG_ERR( + ci_log("ERROR: %s is changing the socket [%d:%d] filter to " + "%s " IPX_PORT_FMT " -> " IPX_PORT_FMT ", " + "but some filter is already installed. Something went awry.", + __func__, ep->thr->id, OO_SP_FMT(ep->id), + protocol == IPPROTO_UDP ? "UDP" : "TCP", IPX_ARG(AF_IP(laddr)), + lport, IPX_ARG(AF_IP(raddr)), rport)); /* Filter is cleared so that endpoint comes back to consistent state: * tcp sockets after failed set filter operations have no filter. * However, as we are afraid that endpoint is compromised we * return error to prevent its use. */ - tcp_helper_endpoint_clear_filters - (ep, - (ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT) ? - EP_CLEAR_FILTERS_FLAG_SUPRESS_HW : 0); + tcp_helper_endpoint_clear_filters( + ep, (ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT) + ? EP_CLEAR_FILTERS_FLAG_SUPRESS_HW + : 0); return -EALREADY; } oof_socket_del(oo_filter_ns_to_manager(ep->thr->filter_ns), &ep->oofilter); @@ -468,19 +458,20 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, rc = ci_tcp_sock_set_stack_filter(ni, SP_TO_SOCK(ni, ep->id)); else #endif - if( OO_SP_NOT_NULL(from_tcp_id) ) + if( OO_SP_NOT_NULL(from_tcp_id) ) rc = oof_socket_share(oo_filter_ns_to_manager(ep->thr->filter_ns), - &ep->oofilter, &listen_ep->oofilter, - af_space, laddr, raddr, lport, rport); + &ep->oofilter, &listen_ep->oofilter, af_space, laddr, raddr, lport, + rport); else { int flags; - ci_assert( ! in_atomic() ); - ci_assert( ~ep->thr->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT ); + ci_assert(! in_atomic()); + ci_assert(~ep->thr->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT); flags = #if CI_CFG_ENDPOINT_MOVE - (ep->thr->thc != NULL && (s->s_flags & CI_SOCK_FLAG_REUSEPORT) != 0) ? - OOF_SOCKET_ADD_FLAG_CLUSTERED : + (ep->thr->thc != NULL && (s->s_flags & CI_SOCK_FLAG_REUSEPORT) != 0) + ? OOF_SOCKET_ADD_FLAG_CLUSTERED + : #endif 0; @@ -500,22 +491,22 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, } rc = oof_socket_add(oo_filter_ns_to_manager(ep->thr->filter_ns), - &ep->oofilter, flags, protocol, - af_space, laddr, lport, raddr, rport, NULL); + &ep->oofilter, flags, protocol, af_space, laddr, lport, raddr, rport, + NULL); #if CI_CFG_ENDPOINT_MOVE if( rc != 0 && rc != -EFILTERSSOME && (s->s_flags & CI_SOCK_FLAG_REUSEADDR) && tcp_helper_endpoint_reuseaddr_cleanup(&ep->thr->netif, s) ) { rc = oof_socket_add(oo_filter_ns_to_manager(ep->thr->filter_ns), - &ep->oofilter, flags, protocol, - af_space, laddr, lport, raddr, rport, NULL); + &ep->oofilter, flags, protocol, af_space, laddr, lport, raddr, rport, + NULL); } #endif if( rc == 0 || rc == -EFILTERSSOME ) s->s_flags |= CI_SOCK_FLAG_FILTER; } - set_os_port_keeper_and_out: +set_os_port_keeper_and_out: if( os_sock_ref != NULL && (rc == 0 || rc == -EFILTERSSOME) ) os_sock_ref = oo_file_xchg(&ep->os_port_keeper, os_sock_ref); if( os_sock_ref != NULL ) @@ -525,8 +516,9 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, if( rc == 0 && enable_recycler ) { int intf_i; int stream_count = 0; - ci_assert( ! in_atomic() ); - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + ci_assert(! in_atomic()); + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { struct xsn_ceph_create_stream create; ci_netif_state_nic_t* nsn = &ni->state->nic[intf_i]; @@ -537,10 +529,10 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, if( ! ci_netif_tcp_plugin_uses_p2h(ni, intf_i) && intf_i != s->pkt.intf_i ) continue; - create = (struct xsn_ceph_create_stream){ + create = (struct xsn_ceph_create_stream) { .tcp.in_app_id = cpu_to_le32(ni->nic_hw[intf_i].plugin_rx_app_id), .tcp.in_user_mark = cpu_to_le32(ep->id), - .tcp.in_synchronised = false, /* passive-open not supported */ + .tcp.in_synchronised = false, /* passive-open not supported */ .tcp.in_source_ip = raddr.ip4, .tcp.in_dest_ip = laddr.ip4, .tcp.in_source_port = rport, @@ -548,7 +540,7 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, .in_data_buf_capacity = NI_OPTS(ni).ceph_data_buf_bytes, }; rc = efrm_ext_msg(ni->nic_hw[intf_i].plugin_rx, XSN_CEPH_CREATE_STREAM, - &create, sizeof(create)); + &create, sizeof(create)); if( rc ) { OO_DEBUG_ERR(ci_log("ERROR: Can't create Ceph stream state (%d)", rc)); continue; @@ -564,13 +556,12 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, } if( stream_count > 0 ) { rc = 0; - } - else { + } else { /* Current policy is to hand over, so choose an error code that will * cause this. We may add alternative options later. */ rc = -EBUSY; - oof_socket_del(oo_filter_ns_to_manager(ep->thr->filter_ns), - &ep->oofilter); + oof_socket_del( + oo_filter_ns_to_manager(ep->thr->filter_ns), &ep->oofilter); } } #endif @@ -589,32 +580,28 @@ tcp_helper_endpoint_set_filters(tcp_helper_endpoint_t* ep, * *--------------------------------------------------------------------*/ -int -tcp_helper_endpoint_clear_filters(tcp_helper_endpoint_t* ep, - int flags) +int tcp_helper_endpoint_clear_filters(tcp_helper_endpoint_t* ep, int flags) { struct file* os_sock_ref; ci_sock_cmn* s = SP_TO_SOCK(&ep->thr->netif, ep->id); int rc = 0; int handled_flags = CI_SOCK_FLAG_FILTER | CI_SOCK_FLAG_STACK_FILTER; - OO_DEBUG_TCPH( - ci_log("%s: [%d:%d] %s%s%s", __FUNCTION__, ep->thr->id, OO_SP_FMT(ep->id), - in_atomic() ? "ATOMIC":"", - flags & EP_CLEAR_FILTERS_FLAG_SUPRESS_HW ? " SUPRESS_HW":"", - flags & EP_CLEAR_FILTERS_FLAG_NEED_UPDATE ? " NEED_UPDATE":"") - ); + OO_DEBUG_TCPH(ci_log("%s: [%d:%d] %s%s%s", __FUNCTION__, ep->thr->id, + OO_SP_FMT(ep->id), in_atomic() ? "ATOMIC" : "", + flags & EP_CLEAR_FILTERS_FLAG_SUPRESS_HW ? " SUPRESS_HW" : "", + flags & EP_CLEAR_FILTERS_FLAG_NEED_UPDATE ? " NEED_UPDATE" : "")); /* Sockets have either FILTER or MAC_FILTER with exception of * scalable SO_REUSEPORT listen sockets, which can have both */ ci_assert_impl(! (s->b.state == CI_TCP_LISTEN && - (s->s_flags & CI_SOCK_FLAG_REUSEPORT) != 0), - (s->s_flags & CI_SOCK_FLAG_FILTER) == 0 || - (s->s_flags & CI_SOCK_FLAG_STACK_FILTER) == 0); + (s->s_flags & CI_SOCK_FLAG_REUSEPORT) != 0), + (s->s_flags & CI_SOCK_FLAG_FILTER) == 0 || + (s->s_flags & CI_SOCK_FLAG_STACK_FILTER) == 0); #if CI_CFG_FD_CACHING if( (flags & EP_CLEAR_FILTERS_FLAG_NEED_UPDATE) && - !(s->s_flags & CI_SOCK_FLAGS_SCALABLE) ) + ! (s->s_flags & CI_SOCK_FLAGS_SCALABLE) ) tcp_helper_endpoint_update_filter_details(ep); #endif @@ -622,11 +609,12 @@ tcp_helper_endpoint_clear_filters(tcp_helper_endpoint_t* ep, ci_assert_flags(flags, EP_CLEAR_FILTERS_FLAG_SUPRESS_HW); } - if( (s->s_flags & (CI_SOCK_FLAGS_SCALABLE | CI_SOCK_FLAG_STACK_FILTER)) != 0 ) { + if( (s->s_flags & (CI_SOCK_FLAGS_SCALABLE | CI_SOCK_FLAG_STACK_FILTER)) != + 0 ) { #ifndef BREAK_SCALABLE_FILTERS if( (s->s_flags & CI_SOCK_FLAG_STACK_FILTER) != 0 ) - ci_tcp_sock_clear_stack_filter(&ep->thr->netif, - SP_TO_TCP(&ep->thr->netif,ep->id)); + ci_tcp_sock_clear_stack_filter( + &ep->thr->netif, SP_TO_TCP(&ep->thr->netif, ep->id)); #endif if( (s->s_flags & CI_SOCK_FLAG_FILTER) == 0 ) { @@ -645,8 +633,8 @@ tcp_helper_endpoint_clear_filters(tcp_helper_endpoint_t* ep, * delivered to this endpoint. Defer oof_socket_del() if needed * to non-atomic context. */ - if( oof_socket_del_sw(oo_filter_ns_to_manager(ep->thr->filter_ns), - &ep->oofilter) ) { + if( oof_socket_del_sw( + oo_filter_ns_to_manager(ep->thr->filter_ns), &ep->oofilter) ) { tcp_helper_endpoint_queue_non_atomic(ep, OO_THR_EP_AFLAG_CLEAR_FILTERS); /* If we have been called from atomic context, we sill might actually * have a hw filter. However in such a case there is a non-atomic work @@ -654,20 +642,18 @@ tcp_helper_endpoint_clear_filters(tcp_helper_endpoint_t* ep, * socket filter flags */ rc = -EAGAIN; handled_flags = CI_SOCK_FLAG_STACK_FILTER; - } - else { + } else { os_sock_ref = oo_file_xchg(&ep->os_port_keeper, NULL); if( os_sock_ref != NULL ) fput(os_sock_ref); } - } - else + } else #endif { clear_plugin_state(ep); oof_socket_del(oo_filter_ns_to_manager(ep->thr->filter_ns), &ep->oofilter); - oof_socket_mcast_del_all(oo_filter_ns_to_manager(ep->thr->filter_ns), - &ep->oofilter); + oof_socket_mcast_del_all( + oo_filter_ns_to_manager(ep->thr->filter_ns), &ep->oofilter); os_sock_ref = oo_file_xchg(&ep->os_port_keeper, NULL); if( os_sock_ref != NULL ) fput(os_sock_ref); @@ -697,16 +683,14 @@ tcp_helper_endpoint_clear_filters(tcp_helper_endpoint_t* ep, * This function MUST NOT clear software filters from ep_from, * because there might be handled packets for it in the stack rx queue. */ -int -tcp_helper_endpoint_move_filters_pre(tcp_helper_endpoint_t* ep_from, - tcp_helper_endpoint_t* ep_to, - int drop_filter) +int tcp_helper_endpoint_move_filters_pre(tcp_helper_endpoint_t* ep_from, + tcp_helper_endpoint_t* ep_to, int drop_filter) { struct file* os_sock_ref; int rc; ci_sock_cmn* s = SP_TO_SOCK(&ep_from->thr->netif, ep_from->id); - ci_assert(!in_atomic()); + ci_assert(! in_atomic()); if( ep_to->os_port_keeper != NULL ) { ci_log("%s: non-null target port keeper", __func__); @@ -718,21 +702,20 @@ tcp_helper_endpoint_move_filters_pre(tcp_helper_endpoint_t* ep_from, ep_from->oofilter.sf_local_port != NULL ) { if( (s->s_flags & CI_SOCK_FLAG_REUSEPORT) != 0 && (NI_OPTS(&ep_from->thr->netif).cluster_ignore == 0 || - NI_OPTS(&ep_to->thr->netif).cluster_ignore == 0) ) { - LOG_E(ci_log("%s: ERROR: reuseport being set and socket not closed", - __func__)); + NI_OPTS(&ep_to->thr->netif).cluster_ignore == 0) ) { + LOG_E(ci_log( + "%s: ERROR: reuseport being set and socket not closed", __func__)); return -EINVAL; } rc = tcp_helper_endpoint_set_filters(ep_to, CI_IFID_BAD, OO_SP_NULL); if( rc != 0 ) return rc; - } - else { + } else { /* Before further operations we need to ensure no clear filter operations * is pending, typically tcp_helper_endpoint_set_filters() would do that * but we do not call it here */ rc = tcp_helper_flush_clear_filters(ep_to); - if(CI_UNLIKELY( rc < 0 )) + if( CI_UNLIKELY(rc < 0) ) return rc; } @@ -757,9 +740,8 @@ tcp_helper_endpoint_move_filters_pre(tcp_helper_endpoint_t* ep_from, * All ep_from filters should be cleared; * ep_to should have properly-installed filters. */ -void -tcp_helper_endpoint_move_filters_post(tcp_helper_endpoint_t* ep_from, - tcp_helper_endpoint_t* ep_to) +void tcp_helper_endpoint_move_filters_post( + tcp_helper_endpoint_t* ep_from, tcp_helper_endpoint_t* ep_to) { tcp_helper_endpoint_clear_filters(ep_from, 0); } @@ -768,9 +750,8 @@ tcp_helper_endpoint_move_filters_post(tcp_helper_endpoint_t* ep_from, * All ep_to filters should be cleared; * ep_from should have properly-installed filters. */ -void -tcp_helper_endpoint_move_filters_undo(tcp_helper_endpoint_t* ep_from, - tcp_helper_endpoint_t* ep_to) +void tcp_helper_endpoint_move_filters_undo( + tcp_helper_endpoint_t* ep_from, tcp_helper_endpoint_t* ep_to) { struct file* os_sock_ref; @@ -786,42 +767,39 @@ tcp_helper_endpoint_move_filters_undo(tcp_helper_endpoint_t* ep_from, tcp_helper_endpoint_clear_filters(ep_to, 0); } -void -tcp_helper_endpoint_update_filter_details(tcp_helper_endpoint_t* ep) +void tcp_helper_endpoint_update_filter_details(tcp_helper_endpoint_t* ep) { ci_netif* ni = &ep->thr->netif; ci_sock_cmn* s = SP_TO_SOCK(ni, ep->id); struct oof_manager* om = oo_filter_ns_to_manager(ep->thr->filter_ns); - if( !(s->s_flags & (CI_SOCK_FLAG_STACK_FILTER | CI_SOCK_FLAGS_SCALABLE)) ) - oof_socket_update_sharer_details(om, &ep->oofilter, - sock_ipx_raddr(s), sock_rport_be16(s)); + if( ! (s->s_flags & (CI_SOCK_FLAG_STACK_FILTER | CI_SOCK_FLAGS_SCALABLE)) ) + oof_socket_update_sharer_details( + om, &ep->oofilter, sock_ipx_raddr(s), sock_rport_be16(s)); } static void oof_socket_dump_fn(void* arg, oo_dump_log_fn_t log, void* log_arg) { -/* FIXME SCJ OOF */ + /* FIXME SCJ OOF */ oof_onload_socket_dump(&efab_tcp_driver, arg, log, log_arg); } static void oof_manager_dump_fn(void* arg, oo_dump_log_fn_t log, void* log_arg) { -/* FIXME SCJ OOF */ + /* FIXME SCJ OOF */ oof_onload_manager_dump(&efab_tcp_driver, log, log_arg); } -int -tcp_helper_endpoint_filter_dump(tcp_helper_resource_t* thr, oo_sp sockp, - void* user_buf, int user_buf_len) +int tcp_helper_endpoint_filter_dump( + tcp_helper_resource_t* thr, oo_sp sockp, void* user_buf, int user_buf_len) { if( OO_SP_NOT_NULL(sockp) ) { tcp_helper_endpoint_t* ep = ci_trs_get_valid_ep(thr, sockp); - return oo_dump_to_user(oof_socket_dump_fn, &ep->oofilter, - user_buf, user_buf_len); - } - else { + return oo_dump_to_user( + oof_socket_dump_fn, &ep->oofilter, user_buf, user_buf_len); + } else { return oo_dump_to_user(oof_manager_dump_fn, NULL, user_buf, user_buf_len); } } @@ -840,11 +818,10 @@ tcp_helper_endpoint_filter_dump(tcp_helper_resource_t* thr, oo_sp sockp, * *--------------------------------------------------------------------*/ -int -tcp_helper_endpoint_shutdown(tcp_helper_resource_t* thr, oo_sp ep_id, - int how, ci_uint32 old_state) +int tcp_helper_endpoint_shutdown( + tcp_helper_resource_t* thr, oo_sp ep_id, int how, ci_uint32 old_state) { - tcp_helper_endpoint_t * ep = ci_trs_get_valid_ep(thr, ep_id); + tcp_helper_endpoint_t* ep = ci_trs_get_valid_ep(thr, ep_id); int rc, supress_hw_ops = thr->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT; ci_assert_equal(old_state, CI_TCP_LISTEN); @@ -852,27 +829,27 @@ tcp_helper_endpoint_shutdown(tcp_helper_resource_t* thr, oo_sp ep_id, /* This must be done before we remove filters, as the information must be * correct for sockets sharing our filter when we do the un-share fixup. */ - ci_tcp_listen_update_cached(&thr->netif, - SP_TO_TCP_LISTEN(&thr->netif, ep->id)); + ci_tcp_listen_update_cached( + &thr->netif, SP_TO_TCP_LISTEN(&thr->netif, ep->id)); #endif /* Calling shutdown on the socket unbinds it in most situations. * Since we must never have a filter configured for an unbound * socket, we clear the filters here. */ tcp_helper_endpoint_clear_filters( - ep, supress_hw_ops ? EP_CLEAR_FILTERS_FLAG_SUPRESS_HW : 0); + ep, supress_hw_ops ? EP_CLEAR_FILTERS_FLAG_SUPRESS_HW : 0); /* Filter flags should have been cleared by * tcp_helper_endpoint_clear_filters. */ ci_assert_nflags(SP_TO_SOCK(&thr->netif, ep_id)->s_flags, - (CI_SOCK_FLAG_FILTER | CI_SOCK_FLAG_STACK_FILTER)); + (CI_SOCK_FLAG_FILTER | CI_SOCK_FLAG_STACK_FILTER)); rc = efab_tcp_helper_shutdown_os_sock(ep, how); #if ! CI_CFG_UL_INTERRUPT_HELPER ci_assert(ci_netif_is_locked(&thr->netif)); - ci_tcp_listen_shutdown_queues(&thr->netif, - SP_TO_TCP_LISTEN(&thr->netif, ep->id)); + ci_tcp_listen_shutdown_queues( + &thr->netif, SP_TO_TCP_LISTEN(&thr->netif, ep->id)); #endif return rc; } diff --git a/src/lib/efthrm/tcp_helper_resource.c b/src/lib/efthrm/tcp_helper_resource.c index cf25b1c0a..08976c050 100644 --- a/src/lib/efthrm/tcp_helper_resource.c +++ b/src/lib/efthrm/tcp_helper_resource.c @@ -52,20 +52,20 @@ #include #ifdef NDEBUG -# define DEBUG_STR "" +#define DEBUG_STR "" #else -# define DEBUG_STR " debug" +#define DEBUG_STR " debug" #endif #if CI_CFG_PKT_BUF_SIZE == EFHW_NIC_PAGE_SIZE #define HW_PAGES_PER_SET_S CI_CFG_PKTS_PER_SET_S -#define PKTS_PER_HW_PAGE 1 +#define PKTS_PER_HW_PAGE 1 #elif CI_CFG_PKT_BUF_SIZE * 2 == EFHW_NIC_PAGE_SIZE #define HW_PAGES_PER_SET_S (CI_CFG_PKTS_PER_SET_S - 1) -#define PKTS_PER_HW_PAGE 2 +#define PKTS_PER_HW_PAGE 2 #elif CI_CFG_PKT_BUF_SIZE * 4 == EFHW_NIC_PAGE_SIZE #define HW_PAGES_PER_SET_S (CI_CFG_PKTS_PER_SET_S - 2) -#define PKTS_PER_HW_PAGE 4 +#define PKTS_PER_HW_PAGE 4 #else #error "Unknown value for CI_CFG_PKT_BUF_SIZE" #endif @@ -78,25 +78,25 @@ #define EFAB_THR_MAX_NUM_INSTANCES 0x00010000 -static const unsigned EFCT_HUGEPAGES_PER_RXQ = 2; /* EFCT TODO: un-hardcode */ +static const unsigned EFCT_HUGEPAGES_PER_RXQ = 2; /* EFCT TODO: un-hardcode */ /* Provides upper limit to EF_MAX_PACKETS. default is 512K packets, - * which equates to roughly 1GB of memory + * which equates to roughly 1GB of memory */ static unsigned max_packets_per_stack = 0x80000; module_param(max_packets_per_stack, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(max_packets_per_stack, - "Limit the number of packet buffers that each Onload stack " - "can allocate. This module option places an upper limit " - "on the EF_MAX_PACKETS option. Changes to this module " - "option are not applied retrospectively to stacks already " - "existing before the change."); + "Limit the number of packet buffers that each Onload stack " + "can allocate. This module option places an upper limit " + "on the EF_MAX_PACKETS option. Changes to this module " + "option are not applied retrospectively to stacks already " + "existing before the change."); static int allow_insecure_setuid_sharing; module_param(allow_insecure_setuid_sharing, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(allow_insecure_setuid_sharing, - "Override default security rules and allow setuid processes " - "to map Onload stacks created by other users."); + "Override default security rules and allow setuid processes " + "to map Onload stacks created by other users."); #ifdef CONFIG_PREEMPT unsigned long oo_avoid_wakeup_under_pressure = 1; @@ -105,74 +105,64 @@ unsigned long oo_avoid_wakeup_under_pressure = 0; #endif module_param(oo_avoid_wakeup_under_pressure, ulong, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(oo_avoid_wakeup_under_pressure, - "Avoid endpoint wakeups for this number of jiffies after " - "NAPI budget limited interrupt handler. This is typically " - "needed on realtime kernels, where you can see " - "\"stall on CPU\" messages when this value is set to 0."); + "Avoid endpoint wakeups for this number of jiffies after " + "NAPI budget limited interrupt handler. This is typically " + "needed on realtime kernels, where you can see " + "\"stall on CPU\" messages when this value is set to 0."); DEFINE_PER_CPU(unsigned long, oo_budget_limit_last_ts); #if HZ < 100 -# error FIXME: Not able to cope with low HZ at the moment. +#error FIXME: Not able to cope with low HZ at the moment. #endif - /* Periodic timer fires roughly 10 times per sec. */ -#define CI_TCP_HELPER_PERIODIC_BASE_T ((unsigned long)(HZ*9/100)) -#define CI_TCP_HELPER_PERIODIC_FLOAT_T ((unsigned long)(HZ*1/100)) +/* Periodic timer fires roughly 10 times per sec. */ +#define CI_TCP_HELPER_PERIODIC_BASE_T ((unsigned long) (HZ * 9 / 100)) +#define CI_TCP_HELPER_PERIODIC_FLOAT_T ((unsigned long) (HZ * 1 / 100)) unsigned long periodic_poll = CI_TCP_HELPER_PERIODIC_BASE_T; unsigned long periodic_poll_skew = CI_TCP_HELPER_PERIODIC_FLOAT_T; module_param(periodic_poll, ulong, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(periodic_poll, - "Number of jiffies between periodic polls of " - "any Onload stack. Defaults to 90ms."); + "Number of jiffies between periodic polls of " + "any Onload stack. Defaults to 90ms."); module_param(periodic_poll_skew, ulong, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(periodic_poll_skew, - "Allowed time skew for periodic polls. " - "Defaults to 10ms."); + "Allowed time skew for periodic polls. " + "Defaults to 10ms."); /* Global structure for onload driver */ efab_tcp_driver_t efab_tcp_driver; -static void -efab_tcp_helper_rm_free_locked(tcp_helper_resource_t*); +static void efab_tcp_helper_rm_free_locked(tcp_helper_resource_t*); #if ! CI_CFG_UL_INTERRUPT_HELPER -static void -efab_tcp_helper_rm_schedule_free(tcp_helper_resource_t*); +static void efab_tcp_helper_rm_schedule_free(tcp_helper_resource_t*); #endif #if ! CI_CFG_UL_INTERRUPT_HELPER -static int -oo_handle_wakeup_int_driven(void*, int is_timeout, - struct efhw_nic*, int budget); -static int -oo_handle_wakeup_or_timeout(void*, int is_timeout, - struct efhw_nic*, int budget); -static void -tcp_helper_initialize_and_start_periodic_timer(tcp_helper_resource_t*); -static void -tcp_helper_stop_periodic_work(tcp_helper_resource_t*); +static int oo_handle_wakeup_int_driven( + void*, int is_timeout, struct efhw_nic*, int budget); +static int oo_handle_wakeup_or_timeout( + void*, int is_timeout, struct efhw_nic*, int budget); +static void tcp_helper_initialize_and_start_periodic_timer( + tcp_helper_resource_t*); +static void tcp_helper_stop_periodic_work(tcp_helper_resource_t*); -static void -tcp_helper_close_pending_endpoints(tcp_helper_resource_t*); +static void tcp_helper_close_pending_endpoints(tcp_helper_resource_t*); #endif #if CI_CFG_NIC_RESET_SUPPORT -static void -tcp_helper_purge_txq_work(struct work_struct *data); +static void tcp_helper_purge_txq_work(struct work_struct* data); -static void -tcp_helper_reset_stack_work(struct work_struct *data); +static void tcp_helper_reset_stack_work(struct work_struct* data); #endif #if CI_CFG_EPOLL3 -static void -get_os_ready_list(tcp_helper_resource_t* thr, int ready_list); +static void get_os_ready_list(tcp_helper_resource_t* thr, int ready_list); #endif -static void -efab_tcp_helper_drop_os_socket(tcp_helper_resource_t* trs, - tcp_helper_endpoint_t* ep); +static void efab_tcp_helper_drop_os_socket( + tcp_helper_resource_t* trs, tcp_helper_endpoint_t* ep); /* Allocate a block of IDs from the pool of ID blocks */ static int efab_ipid_alloc(efab_ipid_cb_t* ipid); @@ -188,27 +178,25 @@ static int efab_ipid_free(efab_ipid_cb_t* ipid, int base); * *---------------------------------------------------------------------------*/ -ci_inline int -oo_trusted_lock_is_locked(tcp_helper_resource_t* trs) +ci_inline int oo_trusted_lock_is_locked(tcp_helper_resource_t* trs) { return trs->trusted_lock & OO_TRUSTED_LOCK_LOCKED; } -static int -oo_trusted_lock_try_lock(tcp_helper_resource_t* trs) +static int oo_trusted_lock_try_lock(tcp_helper_resource_t* trs) { return trs->trusted_lock == OO_TRUSTED_LOCK_UNLOCKED && - ci_cas32u_succeed(&trs->trusted_lock, OO_TRUSTED_LOCK_UNLOCKED, - OO_TRUSTED_LOCK_LOCKED); + ci_cas32u_succeed(&trs->trusted_lock, OO_TRUSTED_LOCK_UNLOCKED, + OO_TRUSTED_LOCK_LOCKED); } /* Returns true if flags were set, or false if the lock was not locked. * NB. We ignore flags if AWAITING_FREE. */ -static int -oo_trusted_lock_set_flags_if_locked(tcp_helper_resource_t* trs, unsigned flags) +static int oo_trusted_lock_set_flags_if_locked( + tcp_helper_resource_t* trs, unsigned flags) { unsigned l; @@ -229,19 +217,18 @@ oo_trusted_lock_set_flags_if_locked(tcp_helper_resource_t* trs, unsigned flags) * 1 if unlock has been deferred * If has_shared is set both locks would get deferred. */ -static int -oo_trusted_lock_drop(tcp_helper_resource_t* trs, - int in_dl_context, int has_shared) +static int oo_trusted_lock_drop( + tcp_helper_resource_t* trs, int in_dl_context, int has_shared) { unsigned l; ci_uint64 sl_flags; ci_netif* ni = &trs->netif; - again: +again: l = trs->trusted_lock; ci_assert_flags(l, OO_TRUSTED_LOCK_LOCKED); - if(CI_UNLIKELY( l & OO_TRUSTED_LOCK_AWAITING_FREE )) { + if( CI_UNLIKELY(l & OO_TRUSTED_LOCK_AWAITING_FREE) ) { /* We may be called from the stack workqueue, so postpone destruction * to the point where wq may be flushed */ /* rm_free_locked expects trusted lock only @@ -266,34 +253,35 @@ oo_trusted_lock_drop(tcp_helper_resource_t* trs, if( ci_cas32_fail(&trs->trusted_lock, l, new_l) ) goto again; - if( has_shared || - ef_eplock_lock_or_set_flag(&trs->netif.state->lock, - CI_EPLOCK_NETIF_CLOSE_ENDPOINT) ) { + if( has_shared || ef_eplock_lock_or_set_flag(&trs->netif.state->lock, + CI_EPLOCK_NETIF_CLOSE_ENDPOINT) ) { /* let's reset the shared lock flag to avoid flag ping pong */ - ef_eplock_clear_flags(&trs->netif.state->lock, CI_EPLOCK_NETIF_CLOSE_ENDPOINT); + ef_eplock_clear_flags( + &trs->netif.state->lock, CI_EPLOCK_NETIF_CLOSE_ENDPOINT); /* We've got both locks. If in non-dl context, do the work, else * defer work and locks to workitem. */ if( in_dl_context ) { - /* the flag needs to be reinstated for atomic work to undertake the work */ - oo_trusted_lock_set_flags_if_locked(trs, OO_TRUSTED_LOCK_CLOSE_ENDPOINT); + /* the flag needs to be reinstated for atomic work to undertake the + * work */ + oo_trusted_lock_set_flags_if_locked( + trs, OO_TRUSTED_LOCK_CLOSE_ENDPOINT); trs->netif.flags |= CI_NETIF_FLAG_IN_DL_CONTEXT; OO_DEBUG_TCPH(ci_log("%s: [%u] defer CLOSE_ENDPOINT to workitem", - __FUNCTION__, trs->id)); + __FUNCTION__, trs->id)); tcp_helper_defer_dl2work(trs, OO_THR_AFLAG_UNLOCK_TRUSTED); return 1; } - OO_DEBUG_TCPH(ci_log("%s: [%u] CLOSE_ENDPOINT now", - __FUNCTION__, trs->id)); + OO_DEBUG_TCPH( + ci_log("%s: [%u] CLOSE_ENDPOINT now", __FUNCTION__, trs->id)); tcp_helper_close_pending_endpoints(trs); if( ! has_shared ) efab_eplock_unlock_and_wake(ni, in_dl_context); - } - else { + } else { /* Untrusted lock holder now responsible for invoking non-atomic work. */ OO_DEBUG_TCPH(ci_log("%s: [%u] defer CLOSE_ENDPOINT to untrusted lock", - __FUNCTION__, trs->id)); + __FUNCTION__, trs->id)); } goto again; } @@ -305,30 +293,28 @@ oo_trusted_lock_drop(tcp_helper_resource_t* trs, unsigned new_l = l & ~OO_TRUSTED_LOCK_OS_READY; if( ci_cas32_fail(&trs->trusted_lock, l, new_l) ) goto again; - if( has_shared || - ef_eplock_lock_or_set_flag(&trs->netif.state->lock, - CI_EPLOCK_NETIF_NEED_WAKE) ) { + if( has_shared || ef_eplock_lock_or_set_flag(&trs->netif.state->lock, + CI_EPLOCK_NETIF_NEED_WAKE) ) { /* We've got both locks, do the work now. */ - OO_DEBUG_TCPH(ci_log("%s: [%u] OS READY now", - __FUNCTION__, trs->id)); + OO_DEBUG_TCPH(ci_log("%s: [%u] OS READY now", __FUNCTION__, trs->id)); #if CI_CFG_EPOLL3 - CI_READY_LIST_EACH(trs->netif.state->ready_lists_in_use, tmp, i) { + CI_READY_LIST_EACH(trs->netif.state->ready_lists_in_use, tmp, i) + { get_os_ready_list(trs, i); - if( ! oo_p_dllink_is_empty(&trs->netif, - oo_p_dllink_ptr(&trs->netif, - &trs->netif.state->ready_lists[i])) ) + if( ! oo_p_dllink_is_empty( + &trs->netif, oo_p_dllink_ptr(&trs->netif, + &trs->netif.state->ready_lists[i])) ) ci_waitable_wakeup_all(&trs->ready_list_waitqs[i]); } #endif if( ! has_shared ) efab_eplock_unlock_and_wake(ni, in_dl_context); - } - else { + } else { /* Untrusted lock holder now responsible for invoking work. */ OO_DEBUG_TCPH(ci_log("%s: [%u] defer OS READY WAKE to trusted lock", - __FUNCTION__, trs->id)); + __FUNCTION__, trs->id)); } goto again; } @@ -353,7 +339,8 @@ oo_trusted_lock_drop(tcp_helper_resource_t* trs, if( ci_cas32_succeed(&trs->trusted_lock, l, OO_TRUSTED_LOCK_LOCKED) ) { if( has_shared ) ef_eplock_holder_set_flags(&trs->netif.state->lock, sl_flags); - else if ( ef_eplock_trylock_and_set_flags(&trs->netif.state->lock, sl_flags) ) + else if( ef_eplock_trylock_and_set_flags( + &trs->netif.state->lock, sl_flags) ) efab_eplock_unlock_and_wake(ni, in_dl_context); } goto again; @@ -364,8 +351,8 @@ oo_trusted_lock_drop(tcp_helper_resource_t* trs, /* Returns true if the lock is obtained, or false otherwise. * The flags will be set (unless AWAITING_FREE). */ -static int -oo_trusted_lock_lock_and_set_flags(tcp_helper_resource_t* trs, unsigned flags) +static int oo_trusted_lock_lock_and_set_flags( + tcp_helper_resource_t* trs, unsigned flags) { unsigned l, new_l; @@ -390,13 +377,13 @@ oo_trusted_lock_lock_and_set_flags(tcp_helper_resource_t* trs, unsigned flags) * *---------------------------------------------------------------------------*/ -int -efab_tcp_helper_netif_try_lock(tcp_helper_resource_t* trs, int in_dl_context) +int efab_tcp_helper_netif_try_lock( + tcp_helper_resource_t* trs, int in_dl_context) { if( oo_trusted_lock_try_lock(trs) ) { ci_netif* ni = &trs->netif; if( ci_netif_trylock(&trs->netif) ) { - ci_assert( ! (ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT) ); + ci_assert(! (ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT)); if( in_dl_context ) ni->flags |= CI_NETIF_FLAG_IN_DL_CONTEXT; return 1; @@ -407,10 +394,11 @@ efab_tcp_helper_netif_try_lock(tcp_helper_resource_t* trs, int in_dl_context) } -void -efab_tcp_helper_netif_unlock(tcp_helper_resource_t* trs, int in_dl_context) +void efab_tcp_helper_netif_unlock( + tcp_helper_resource_t* trs, int in_dl_context) { - ci_assert_equiv(in_dl_context, trs->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT); + ci_assert_equiv( + in_dl_context, trs->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT); #if 0 /* See ON-13698 & ON-13099: we want it to be true, but it fires for now. */ ci_assert_impl(trs->netif.flags & CI_NETIF_FLAGS_AVOID_ATOMIC, !in_dl_context); @@ -428,25 +416,22 @@ efab_tcp_helper_netif_unlock(tcp_helper_resource_t* trs, int in_dl_context) } -/* Returns 1 if the locks are held, or 0 if not and the flags are set. - * +/* Returns 1 if the locks are held, or 0 if not and the flags are set. + * * NB if trusted lock has OO_TRUSTED_LOCK_AWAITING_FREE this function - * will return 0, but the flags will not be set + * will return 0, but the flags will not be set */ -int -efab_tcp_helper_netif_lock_or_set_flags(tcp_helper_resource_t* trs, - unsigned trusted_flags, - ci_uint64 untrusted_flags, - int in_dl_context) +int efab_tcp_helper_netif_lock_or_set_flags(tcp_helper_resource_t* trs, + unsigned trusted_flags, ci_uint64 untrusted_flags, int in_dl_context) { do { if( efab_tcp_helper_netif_try_lock(trs, in_dl_context) ) return 1; - if( ef_eplock_set_flags_if_locked(&trs->netif.state->lock, - untrusted_flags) ) + if( ef_eplock_set_flags_if_locked( + &trs->netif.state->lock, untrusted_flags) ) return 0; if( oo_trusted_lock_set_flags_if_locked(trs, trusted_flags) ) - return 0; + return 0; } while( 1 ); } #endif /* CI_CFG_UL_INTERRUPT_HELPER */ @@ -458,20 +443,20 @@ efab_tcp_helper_netif_lock_or_set_flags(tcp_helper_resource_t* trs, * *---------------------------------------------------------------------------*/ -static int thr_table_ctor(tcp_helpers_table_t *table) +static int thr_table_ctor(tcp_helpers_table_t* table) { ci_dllist_init(&table->all_stacks); ci_dllist_init(&table->started_stacks); table->stack_count = 0; ci_irqlock_ctor(&table->lock); ci_id_pool_ctor(&table->instances, EFAB_THR_MAX_NUM_INSTANCES, - /* initial size */ 8); + /* initial size */ 8); return 0; } #if ! CI_CFG_UL_INTERRUPT_HELPER -void tcp_helper_kill_stack(tcp_helper_resource_t *thr) +void tcp_helper_kill_stack(tcp_helper_resource_t* thr) { ci_uint32 n_ep_orphaned; ci_netif* netif = &thr->netif; @@ -516,9 +501,10 @@ void tcp_helper_kill_stack(tcp_helper_resource_t *thr) } while( ci_cas32u_fail(&netif->n_ep_orphaned, n_ep_orphaned, 0) ); if( n_ep_orphaned > 0 ) { - ci_log("%s: ERROR: force-kill stack [%d]: " - "leaking %d OS sockets and filters", - __func__, thr->id, n_ep_orphaned); + ci_log( + "%s: ERROR: force-kill stack [%d]: " + "leaking %d OS sockets and filters", + __func__, thr->id, n_ep_orphaned); #ifndef NDEBUG dump_stack_to_logger(&thr->netif, ci_log_dump_fn, NULL); #endif @@ -528,7 +514,7 @@ void tcp_helper_kill_stack(tcp_helper_resource_t *thr) #endif -static void thr_table_dtor(tcp_helpers_table_t *table) +static void thr_table_dtor(tcp_helpers_table_t* table) { /* Onload is going away, so kill off any remaining stacks. */ @@ -552,8 +538,8 @@ static void thr_table_dtor(tcp_helpers_table_t *table) ci_irqlock_unlock(&table->lock, &lock_flags); if( thr->ref[OO_THR_REF_FILE] != 0 ) - ci_log("%s: ERROR: non-orphaned stack=%u ref "OO_THR_REF_FMT, - __FUNCTION__, thr->id, OO_THR_REF_ARG(thr->ref)); + ci_log("%s: ERROR: non-orphaned stack=%u ref " OO_THR_REF_FMT, + __FUNCTION__, thr->id, OO_THR_REF_ARG(thr->ref)); #if ! CI_CFG_UL_INTERRUPT_HELPER OO_DEBUG_TCPH(ci_log("%s: killing stack %d", __FUNCTION__, thr->id)); @@ -570,17 +556,16 @@ static void thr_table_dtor(tcp_helpers_table_t *table) } - -static -int efab_thr_table_check_name(const char* name, struct net* netns) +static int efab_thr_table_check_name(const char* name, struct net* netns) { /* Check that there is no name collision with already-existing stacks. */ tcp_helpers_table_t* table = &THR_TABLE; - tcp_helper_resource_t *thr2; - ci_dllink *link; + tcp_helper_resource_t* thr2; + ci_dllink* link; - CI_DLLIST_FOR_EACH(link, &table->all_stacks) { + CI_DLLIST_FOR_EACH(link, &table->all_stacks) + { thr2 = CI_CONTAINER(tcp_helper_resource_t, all_stacks_link, link); if( netns == thr2->netif.cplane->cp_netns && strncmp(thr2->netif.state->name, name, CI_CFG_STACK_NAME_LEN) == 0 && @@ -591,17 +576,18 @@ int efab_thr_table_check_name(const char* name, struct net* netns) } -int efab_thr_get_inaccessible_stack_info(unsigned id, uid_t* uid, uid_t* euid, - ci_int32* share_with, char* name) +int efab_thr_get_inaccessible_stack_info( + unsigned id, uid_t* uid, uid_t* euid, ci_int32* share_with, char* name) { tcp_helpers_table_t* table = &THR_TABLE; ci_irqlock_state_t lock_flags; - tcp_helper_resource_t *thr; - ci_dllink *link; + tcp_helper_resource_t* thr; + ci_dllink* link; int match; ci_irqlock_lock(&table->lock, &lock_flags); - CI_DLLIST_FOR_EACH(link, &table->all_stacks) { + CI_DLLIST_FOR_EACH(link, &table->all_stacks) + { thr = CI_CONTAINER(tcp_helper_resource_t, all_stacks_link, link); match = thr->id == id; @@ -611,12 +597,11 @@ int efab_thr_get_inaccessible_stack_info(unsigned id, uid_t* uid, uid_t* euid, /* Translate the share_with uid from the target stack's user_ns * to the kernel space. */ - uid_t kshare_with = ci_make_kuid(tcp_helper_get_user_ns(thr), - NI_OPTS(&thr->netif).share_with); + uid_t kshare_with = ci_make_kuid( + tcp_helper_get_user_ns(thr), NI_OPTS(&thr->netif).share_with); /* Then translate that into the user_ns of the requestor */ *share_with = ci_current_from_kuid_munged(kshare_with); - } - else { + } else { /* Special value indicating either none (0) or all (-1) doesn't need * translation. */ @@ -634,8 +619,8 @@ int efab_thr_get_inaccessible_stack_info(unsigned id, uid_t* uid, uid_t* euid, } -int efab_thr_user_can_access_stack(uid_t uid, uid_t euid, - tcp_helper_resource_t* thr) +int efab_thr_user_can_access_stack( + uid_t uid, uid_t euid, tcp_helper_resource_t* thr) { uid_t kshare_with; @@ -645,8 +630,8 @@ int efab_thr_user_can_access_stack(uid_t uid, uid_t euid, uid == 0 ) return 1; - kshare_with = ci_make_kuid(tcp_helper_get_user_ns(thr), - NI_OPTS(&thr->netif).share_with); + kshare_with = ci_make_kuid( + tcp_helper_get_user_ns(thr), NI_OPTS(&thr->netif).share_with); if( /* Owner does not allow other users to map this stack. */ kshare_with == 0 || /* Stack can be shared with another user, but not this user. */ @@ -667,17 +652,17 @@ int efab_thr_can_access_stack(tcp_helper_resource_t* thr, int check_user) */ if( /* We're not about to give a user access to the stack. */ - ! (check_user & EFAB_THR_TABLE_LOOKUP_CHECK_USER) ) + ! (check_user & EFAB_THR_TABLE_LOOKUP_CHECK_USER) ) return 1; return efab_thr_user_can_access_stack(ci_getuid(), ci_geteuid(), thr); } -/* +/* * If this returns 0 it will have taken a reference either through: * - efab_thr_ref(); or * - efab_tcp_helper_k_ref_count_inc() if it is an orphan; - * + * * It is up to the caller to drop the appropriate reference when safe * to do so. * @@ -688,15 +673,13 @@ int efab_thr_can_access_stack(tcp_helper_resource_t* thr, int check_user) * efab_tcp_helper_k_ref_count_inc() case as you won't see parented * stacks. */ -int efab_thr_table_lookup(const char* name, struct net* netns, - unsigned id, int flags, - enum oo_thr_ref_type ref_type, - tcp_helper_resource_t** thr_p) +int efab_thr_table_lookup(const char* name, struct net* netns, unsigned id, + int flags, enum oo_thr_ref_type ref_type, tcp_helper_resource_t** thr_p) { tcp_helpers_table_t* table = &THR_TABLE; ci_irqlock_state_t lock_flags; - tcp_helper_resource_t *thr; - ci_dllink *link; + tcp_helper_resource_t* thr; + ci_dllink* link; int match, rc = -ENODEV; ci_assert(thr_p != NULL); @@ -706,14 +689,14 @@ int efab_thr_table_lookup(const char* name, struct net* netns, ci_assert_flags(flags, EFAB_THR_TABLE_LOOKUP_CHECK_USER); ci_irqlock_lock(&table->lock, &lock_flags); - CI_DLLIST_FOR_EACH(link, &table->all_stacks) { + CI_DLLIST_FOR_EACH(link, &table->all_stacks) + { thr = CI_CONTAINER(tcp_helper_resource_t, all_stacks_link, link); if( name ) { match = (strcmp(thr->name, name) == 0) && (thr->netif.cplane->cp_netns == netns); - } - else { + } else { match = thr->id == id; } @@ -724,28 +707,26 @@ int efab_thr_table_lookup(const char* name, struct net* netns, * we're in a context where we have a current user namespace * (if user namespaces are supported). */ - uid_t kshare_with = ci_make_kuid(tcp_helper_get_user_ns(thr), - NI_OPTS(&thr->netif).share_with); - - ci_log("User %d:%d can't share stack %d(%s) owned by %d:%d " - "share_with=%d", - ci_current_from_kuid_munged(ci_getuid()), - ci_current_from_kuid_munged(ci_geteuid()), - thr->id, thr->name, - ci_current_from_kuid_munged(thr->netif.kuid), - ci_current_from_kuid_munged(thr->netif.keuid), - NI_OPTS(&thr->netif).share_with > 0 ? - ci_current_from_kuid_munged(kshare_with) : - NI_OPTS(&thr->netif).share_with); + uid_t kshare_with = ci_make_kuid( + tcp_helper_get_user_ns(thr), NI_OPTS(&thr->netif).share_with); + + ci_log( + "User %d:%d can't share stack %d(%s) owned by %d:%d " + "share_with=%d", + ci_current_from_kuid_munged(ci_getuid()), + ci_current_from_kuid_munged(ci_geteuid()), thr->id, thr->name, + ci_current_from_kuid_munged(thr->netif.kuid), + ci_current_from_kuid_munged(thr->netif.keuid), + NI_OPTS(&thr->netif).share_with > 0 + ? ci_current_from_kuid_munged(kshare_with) + : NI_OPTS(&thr->netif).share_with); } rc = -EACCES; - } - else if( (thr->ref[OO_THR_REF_FILE] != 0) != - ! (flags & EFAB_THR_TABLE_LOOKUP_NO_UL) ) { + } else if( (thr->ref[OO_THR_REF_FILE] != 0) != + ! (flags & EFAB_THR_TABLE_LOOKUP_NO_UL) ) { /* Orphan stacks flag does not match */ rc = -EBUSY; - } - else { + } else { /* Success */ rc = oo_thr_ref_get(thr->ref, ref_type); *thr_p = thr; @@ -779,10 +760,10 @@ static unsigned rescale(unsigned v, unsigned new_scale, unsigned old_scale) static void tcp_helper_reduce_max_packets(ci_netif* ni, int new_max_packets) { ci_assert_lt(new_max_packets, NI_OPTS(ni).max_packets); - NI_OPTS(ni).max_rx_packets = rescale(NI_OPTS(ni).max_rx_packets, - new_max_packets, NI_OPTS(ni).max_packets); - NI_OPTS(ni).max_tx_packets = rescale(NI_OPTS(ni).max_tx_packets, - new_max_packets, NI_OPTS(ni).max_packets); + NI_OPTS(ni).max_rx_packets = rescale( + NI_OPTS(ni).max_rx_packets, new_max_packets, NI_OPTS(ni).max_packets); + NI_OPTS(ni).max_tx_packets = rescale( + NI_OPTS(ni).max_tx_packets, new_max_packets, NI_OPTS(ni).max_packets); NI_OPTS(ni).max_packets = new_max_packets; if( ni->state != NULL ) { ni->state->opts.max_packets = NI_OPTS(ni).max_packets; @@ -793,17 +774,17 @@ static void tcp_helper_reduce_max_packets(ci_netif* ni, int new_max_packets) #if ! CI_CFG_UL_INTERRUPT_HELPER -static int -__tcp_helper_kill_stack_by_id(unsigned id, unsigned ignore_id) +static int __tcp_helper_kill_stack_by_id(unsigned id, unsigned ignore_id) { tcp_helpers_table_t* table = &THR_TABLE; ci_irqlock_state_t lock_flags; - tcp_helper_resource_t *thr = NULL; - ci_dllink *link; + tcp_helper_resource_t* thr = NULL; + ci_dllink* link; int rc = -ENODEV; ci_irqlock_lock(&table->lock, &lock_flags); - CI_DLLIST_FOR_EACH(link, &table->all_stacks) { + CI_DLLIST_FOR_EACH(link, &table->all_stacks) + { thr = CI_CONTAINER(tcp_helper_resource_t, all_stacks_link, link); if( ignore_id || thr->id == id ) { OO_DEBUG_TCPH(ci_log("Stack to release [%d]", thr->id)); @@ -819,11 +800,11 @@ __tcp_helper_kill_stack_by_id(unsigned id, unsigned ignore_id) tcp_helper_kill_stack(thr); if( ignore_id ) - OO_DEBUG_TCPH(ci_log("Orphaned stack %d(%s) owned by %d:%d has been " - "released.", - thr->id, thr->name, - ci_current_from_kuid_munged(thr->netif.kuid), - ci_current_from_kuid_munged(thr->netif.keuid))); + OO_DEBUG_TCPH( + ci_log("Orphaned stack %d(%s) owned by %d:%d has been " + "released.", + thr->id, thr->name, ci_current_from_kuid_munged(thr->netif.kuid), + ci_current_from_kuid_munged(thr->netif.keuid))); /* Remove reference we took in this function */ oo_thr_ref_drop(thr->ref, OO_THR_REF_BASE); @@ -839,20 +820,19 @@ int tcp_helper_kill_stack_by_id(unsigned id) #endif -void -tcp_helper_resource_assert_valid(tcp_helper_resource_t* thr, int no_ul, - const char *file, int line) +void tcp_helper_resource_assert_valid( + tcp_helper_resource_t* thr, int no_ul, const char* file, int line) { _ci_assert(thr, file, line); _ci_assert_nequal(thr->id, CI_ID_POOL_ID_NONE, file, line); _ci_assert_equal(thr->id, thr->netif.state->stack_id, file, line); - if (no_ul >=0) { - if ((no_ul && thr->ref[OO_THR_REF_FILE] > 0) || - (!no_ul && thr->ref[OO_THR_REF_FILE] == 0)) { + if( no_ul >= 0 ) { + if( (no_ul && thr->ref[OO_THR_REF_FILE] > 0) || + (! no_ul && thr->ref[OO_THR_REF_FILE] == 0) ) { ci_log("%s %d: %s check %u for %szero ul ref=%d", file, line, - __FUNCTION__, thr->id, no_ul ? "" : "non-", - thr->ref[OO_THR_REF_FILE]); + __FUNCTION__, thr->id, no_ul ? "" : "non-", + thr->ref[OO_THR_REF_FILE]); } _ci_assert(no_ul || thr->ref[OO_THR_REF_FILE], file, line); } @@ -890,8 +870,7 @@ int tcp_helper_vi_hw_stack_id(tcp_helper_resource_t* trs, int hwport) struct efrm_vi* vi = tcp_helper_vi(trs, intf_i); struct efrm_pd* pd = efrm_vi_get_pd(vi); return efrm_pd_stack_id_get(pd); - } - else + } else return -1; } @@ -902,8 +881,7 @@ int tcp_helper_cluster_vi_hw_stack_id(tcp_helper_cluster_t* thc, int hwport) if( thc->thc_vi_set[hwport] != NULL ) { struct efrm_pd* pd = efrm_vi_set_get_pd(thc->thc_vi_set[hwport]); return efrm_pd_stack_id_get(pd); - } - else + } else return -1; } @@ -918,8 +896,8 @@ int tcp_helper_cluster_vi_base(tcp_helper_cluster_t* thc, int hwport) } -int tcp_helper_vi_hw_rx_loopback_supported(tcp_helper_resource_t* trs, - int hwport) +int tcp_helper_vi_hw_rx_loopback_supported( + tcp_helper_resource_t* trs, int hwport) { int intf_i; ci_assert_lt((unsigned) hwport, CI_CFG_MAX_HWPORTS); @@ -930,8 +908,8 @@ int tcp_helper_vi_hw_rx_loopback_supported(tcp_helper_resource_t* trs, } -int tcp_helper_vi_hw_drop_filter_supported(tcp_helper_resource_t* trs, - int hwport) +int tcp_helper_vi_hw_drop_filter_supported( + tcp_helper_resource_t* trs, int hwport) { int intf_i; ci_assert_lt((unsigned) hwport, CI_CFG_MAX_HWPORTS); @@ -943,7 +921,7 @@ int tcp_helper_vi_hw_drop_filter_supported(tcp_helper_resource_t* trs, void tcp_helper_get_filter_params(tcp_helper_resource_t* trs, int hwport, - int* vi_id, int* rxq, unsigned *flags) + int* vi_id, int* rxq, unsigned* flags) { int intf_i; ci_assert_lt((unsigned) hwport, CI_CFG_MAX_HWPORTS); @@ -953,13 +931,11 @@ void tcp_helper_get_filter_params(tcp_helper_resource_t* trs, int hwport, if( NI_OPTS_TRS(trs).shared_rxq_num >= 0 ) { *rxq = NI_OPTS_TRS(trs).shared_rxq_num; *flags |= EFHW_FILTER_F_PREF_RXQ; - } - else if( vi->efct_shm ) { + } else if( vi->efct_shm ) { if( vi->efct_shm->q[0].superbuf_pkts ) { *rxq = vi->efct_shm->q[0].qid; *flags |= EFHW_FILTER_F_PREF_RXQ; - } - else { + } else { *flags |= EFHW_FILTER_F_ANY_RXQ; } } @@ -968,13 +944,14 @@ void tcp_helper_get_filter_params(tcp_helper_resource_t* trs, int hwport, int tcp_helper_post_filter_add(tcp_helper_resource_t* trs, int hwport, - const struct efx_filter_spec* spec, int rxq, - bool replace) + const struct efx_filter_spec* spec, int rxq, bool replace) { int intf_i; struct efhw_nic* nic; - int hugepages = max(1, DIV_ROUND_UP(NI_OPTS_TRS(trs).rxq_size * EFCT_PKT_STRIDE, CI_HUGEPAGE_SIZE)); + int hugepages = + max(1, DIV_ROUND_UP(NI_OPTS_TRS(trs).rxq_size * EFCT_PKT_STRIDE, + CI_HUGEPAGE_SIZE)); ci_assert_lt((unsigned) hwport, CI_CFG_MAX_HWPORTS); if( (intf_i = trs->netif.hwport_to_intf_i[hwport]) < 0 ) return 0; @@ -986,7 +963,7 @@ int tcp_helper_post_filter_add(tcp_helper_resource_t* trs, int hwport, int qix; int rc; - if( vi_rs->q[EFHW_RXQ].capacity == 0 ) /* e.g. EF_RXQ_SIZE=0 */ + if( vi_rs->q[EFHW_RXQ].capacity == 0 ) /* e.g. EF_RXQ_SIZE=0 */ return 0; ci_assert_ge(rxq, 0); @@ -998,9 +975,8 @@ int tcp_helper_post_filter_add(tcp_helper_resource_t* trs, int hwport, if( qix == -EALREADY ) return 0; - rc = efrm_rxq_alloc(vi_rs, rxq, qix, true, hugepages, - trs->thc_efct_memfd, &trs->thc_efct_memfd_off, - &trs->nic[intf_i].thn_efct_rxq[qix]); + rc = efrm_rxq_alloc(vi_rs, rxq, qix, true, hugepages, trs->thc_efct_memfd, + &trs->thc_efct_memfd_off, &trs->nic[intf_i].thn_efct_rxq[qix]); if( rc < 0 ) { ci_log("%s: ERROR: efrm_rxq_alloc failed (%d)\n", __func__, rc); return rc; @@ -1011,11 +987,10 @@ int tcp_helper_post_filter_add(tcp_helper_resource_t* trs, int hwport, if( NI_OPTS(&trs->netif).int_driven ) { ci_bit_set(&trs->netif.state->evq_prime_deferred, intf_i); if( efab_tcp_helper_netif_lock_or_set_flags(trs, - OO_TRUSTED_LOCK_NEED_POLL | OO_TRUSTED_LOCK_NEED_PRIME, - CI_EPLOCK_NETIF_NEED_POLL | CI_EPLOCK_NETIF_NEED_PRIME, 0) ) { + OO_TRUSTED_LOCK_NEED_POLL | OO_TRUSTED_LOCK_NEED_PRIME, + CI_EPLOCK_NETIF_NEED_POLL | CI_EPLOCK_NETIF_NEED_PRIME, 0) ) { ef_eplock_holder_set_flag(&trs->netif.state->lock, - CI_EPLOCK_NETIF_NEED_POLL | - CI_EPLOCK_NETIF_NEED_PRIME); + CI_EPLOCK_NETIF_NEED_POLL | CI_EPLOCK_NETIF_NEED_PRIME); efab_tcp_helper_netif_unlock(trs, 0); } } @@ -1027,19 +1002,18 @@ int tcp_helper_post_filter_add(tcp_helper_resource_t* trs, int hwport, #if CI_CFG_PIO -static int allocate_pio(tcp_helper_resource_t* trs, int intf_i, - struct efrm_pd *pd, struct efhw_nic* nic, - unsigned *pio_buf_offset) +static int allocate_pio(tcp_helper_resource_t* trs, int intf_i, + struct efrm_pd* pd, struct efhw_nic* nic, unsigned* pio_buf_offset) { ci_netif* ni = &trs->netif; ci_netif_state* ns = ni->state; ci_netif_state_nic_t* nsn = &ns->nic[intf_i]; - ci_netif_nic_t *netif_nic = &trs->netif.nic_hw[intf_i]; + ci_netif_nic_t* netif_nic = &trs->netif.nic_hw[intf_i]; struct tcp_helper_nic* trs_nic = &trs->nic[intf_i]; int rc = 0; if( nic->pio_num == 0 ) - return 0; + return 0; if( trs_nic->thn_pio_rs == NULL ) { rc = efrm_pio_alloc(pd, &trs_nic->thn_pio_rs); @@ -1047,27 +1021,25 @@ static int allocate_pio(tcp_helper_resource_t* trs, int intf_i, if( NI_OPTS(ni).pio == 1 ) { if( rc == -ENOSPC ) { NI_LOG(ni, RESOURCE_WARNINGS, - "[%s]: WARNING: all PIO bufs allocated to other stacks. " - "Continuing without PIO. Use EF_PIO to control this.", - ns->pretty_name); + "[%s]: WARNING: all PIO bufs allocated to other stacks. " + "Continuing without PIO. Use EF_PIO to control this.", + ns->pretty_name); return 0; - } - else { - CI_NDEBUG( if( rc != -ENETDOWN && rc != -EPERM ) ) - /* ENETDOWN means absent hardware, so this failure is - * expected, and we should not warn about it in NDEBUG - * builds. EPERM is expected on NICs that don't - * support PIO. - */ - NI_LOG(ni, RESOURCE_WARNINGS, - "[%s]: Unable to alloc PIO (%d), will continue without it", - ns->pretty_name, rc); + } else { + CI_NDEBUG(if( rc != -ENETDOWN && rc != -EPERM )) + /* ENETDOWN means absent hardware, so this failure is + * expected, and we should not warn about it in NDEBUG + * builds. EPERM is expected on NICs that don't + * support PIO. + */ + NI_LOG(ni, RESOURCE_WARNINGS, + "[%s]: Unable to alloc PIO (%d), will continue without it", + ns->pretty_name, rc); return 0; } - } - else { - OO_DEBUG_VM (ci_log ("%s: ERROR: efrm_pio_alloc(%d) failed %d", - __FUNCTION__, intf_i, rc)); + } else { + OO_DEBUG_VM(ci_log("%s: ERROR: efrm_pio_alloc(%d) failed %d", + __FUNCTION__, intf_i, rc)); return rc; } } @@ -1080,41 +1052,39 @@ static int allocate_pio(tcp_helper_resource_t* trs, int intf_i, trs_nic->thn_pio_rs = NULL; if( NI_OPTS(ni).pio == 1 ) { NI_LOG(ni, RESOURCE_WARNINGS, - "[%s]: Unable to link PIO (%d), will continue without it", - ns->pretty_name, rc); + "[%s]: Unable to link PIO (%d), will continue without it", + ns->pretty_name, rc); return 0; - } - else { - OO_DEBUG_VM (ci_log ("%s: ERROR: efrm_pio_link_vi(%d) failed %d", - __FUNCTION__, intf_i, rc)); + } else { + OO_DEBUG_VM(ci_log("%s: ERROR: efrm_pio_link_vi(%d) failed %d", + __FUNCTION__, intf_i, rc)); return rc; } } - + /* efrm_pio_link_vi() success */ - rc = efrm_pio_map_kernel(tcp_helper_vi(trs, intf_i), - (void**) &netif_nic->pio.pio_io); + rc = efrm_pio_map_kernel( + tcp_helper_vi(trs, intf_i), (void**) &netif_nic->pio.pio_io); if( rc < 0 ) { efrm_pio_unlink_vi(trs_nic->thn_pio_rs, tcp_helper_vi(trs, intf_i), NULL); efrm_pio_release(trs_nic->thn_pio_rs, true); trs_nic->thn_pio_rs = NULL; if( NI_OPTS(ni).pio == 1 ) { NI_LOG(ni, RESOURCE_WARNINGS, - "[%s]: Unable to kmap PIO (%d), will continue without it", - ns->pretty_name, rc); + "[%s]: Unable to kmap PIO (%d), will continue without it", + ns->pretty_name, rc); return 0; - } - else { + } else { OO_DEBUG_VM(ci_log("%s: ERROR: efrm_pio_map_kernel(%d) failed %d", - __FUNCTION__, intf_i, rc)); + __FUNCTION__, intf_i, rc)); return rc; } - } + } /* efrm_pio_map_kernel() success */ /* Set up the pio struct so we can call ef_vi_pio_memcpy */ - netif_nic->pio.pio_buffer = - (uint8_t*)ns + ns->pio_bufs_ofs + *pio_buf_offset; + netif_nic->pio.pio_buffer = + (uint8_t*) ns + ns->pio_bufs_ofs + *pio_buf_offset; netif_nic->pio.pio_len = efrm_pio_get_size(trs_nic->thn_pio_rs); /* Advertise that PIO can be used on this VI */ nsn->oo_vi_flags |= OO_VI_FLAGS_PIO_EN; @@ -1128,7 +1098,7 @@ static int allocate_pio(tcp_helper_resource_t* trs, int intf_i, netif_nic->vis[CI_Q_ID_NORMAL].linked_pio = &netif_nic->pio; trs->pio_mmap_bytes += CI_PAGE_SIZE; *pio_buf_offset += efrm_pio_get_size(trs_nic->thn_pio_rs); - /* Drop original ref to PIO region as linked VI now holds it */ + /* Drop original ref to PIO region as linked VI now holds it */ efrm_pio_release(trs_nic->thn_pio_rs, true); /* Initialise the buddy allocator for the PIO region. */ ci_pio_buddy_ctor(ni, &nsn->pio_buddy, nsn->pio_io_len); @@ -1151,14 +1121,14 @@ static void get_if_name(ci_netif* ni, int intf_i, char* buf_out) if( ifindex == 0 ) goto no_dev; ndev = dev_get_by_index(&init_net, ifindex); - if( !ndev ) + if( ! ndev ) goto no_dev; memcpy(buf_out, ndev->name, IFNAMSIZ); dev_put(ndev); return; - no_dev: +no_dev: /* cannot identify the device, let's produce a name */ snprintf(buf_out, IFNAMSIZ, "noif/hwp0x%x", hwport); @@ -1167,25 +1137,22 @@ static void get_if_name(ci_netif* ni, int intf_i, char* buf_out) /* Evaluates whether timestamping is to be enabled * based on respective netif options and NIC architecture. */ -static int -check_timestamping_support(const char* stack_name, const char* dir, - int user_val, int device_supports_ts, - const char* if_name, - int* out_try_ts, int* out_retry_without) +static int check_timestamping_support(const char* stack_name, const char* dir, + int user_val, int device_supports_ts, const char* if_name, int* out_try_ts, + int* out_retry_without) { *out_try_ts = (user_val != 0); *out_retry_without = 0; if( ! device_supports_ts && (user_val == 3) ) { - ci_log( - "[%s]: %s timestamping not supported on given interface (%s)", + ci_log("[%s]: %s timestamping not supported on given interface (%s)", stack_name, dir, if_name); return -ENOENT; } if( ! device_supports_ts && (user_val == 2) ) { ci_log( - "[%s]: %s timestamping not supported on given interface (%s), " - "continuing with timestamping disabled on this particular interface", - stack_name, dir, if_name); + "[%s]: %s timestamping not supported on given interface (%s), " + "continuing with timestamping disabled on this particular interface", + stack_name, dir, if_name); *out_try_ts = 0; } if( user_val == 1 ) { @@ -1195,40 +1162,41 @@ check_timestamping_support(const char* stack_name, const char* dir, } -static int allocate_pd(ci_netif* ni, struct vi_allocate_info* info, - struct efhw_nic* nic) +static int allocate_pd( + ci_netif* ni, struct vi_allocate_info* info, struct efhw_nic* nic) { int rc = 0; switch( NI_OPTS(ni).packet_buffer_mode ) { - case 0: - break; + case 0: + break; - case CITP_PKTBUF_MODE_PHYS: - info->ef_vi_flags |= EF_VI_RX_PHYS_ADDR | EF_VI_TX_PHYS_ADDR; - break; + case CITP_PKTBUF_MODE_PHYS: + info->ef_vi_flags |= EF_VI_RX_PHYS_ADDR | EF_VI_TX_PHYS_ADDR; + break; - default: - rc = -EINVAL; - return rc; + default: + rc = -EINVAL; + return rc; } if( info->cluster == NULL ) { rc = efrm_pd_alloc(&info->pd, info->client, - ((info->ef_vi_flags & EF_VI_RX_PHYS_ADDR) ? - EFRM_PD_ALLOC_FLAG_PHYS_ADDR_MODE : 0) | - ((info->oo_vi_flags & OO_VI_FLAGS_TX_HW_LOOPBACK_EN) ? - EFRM_PD_ALLOC_FLAG_HW_LOOPBACK : 0) ); + ((info->ef_vi_flags & EF_VI_RX_PHYS_ADDR) + ? EFRM_PD_ALLOC_FLAG_PHYS_ADDR_MODE + : 0) | + ((info->oo_vi_flags & OO_VI_FLAGS_TX_HW_LOOPBACK_EN) + ? EFRM_PD_ALLOC_FLAG_HW_LOOPBACK + : 0)); if( rc != 0 ) { - OO_DEBUG_VM (ci_log ("%s: ERROR: efrm_pd_alloc(%d) failed %d", - __FUNCTION__, info->intf_i, rc)); + OO_DEBUG_VM(ci_log("%s: ERROR: efrm_pd_alloc(%d) failed %d", + __FUNCTION__, info->intf_i, rc)); return rc; } ci_assert(info->pd); info->release_pd = 1; info->vi_set = NULL; - } - else { + } else { int hwport = ni->intf_i_to_hwport[info->intf_i]; ci_assert_ge(hwport, 0); info->vi_set = info->cluster->thc_vi_set[hwport]; @@ -1240,32 +1208,30 @@ static int allocate_pd(ci_netif* ni, struct vi_allocate_info* info, } #if CI_CFG_CTPIO -static int /*bool*/ should_try_ctpio(ci_netif* ni, struct efhw_nic* nic, - struct vi_allocate_info* info) +static int /*bool*/ should_try_ctpio( + ci_netif* ni, struct efhw_nic* nic, struct vi_allocate_info* info) { - return - nic->flags & NIC_FLAG_CTPIO_ONLY || - /* Stack configured to use CTPIO. */ - (NI_OPTS(ni).ctpio > 0 && - /* NIC claims support for CTPIO. */ - (nic->flags & NIC_FLAG_TX_CTPIO) != 0 && - /* CTPIO bypasses the NIC's switch. When the switch is enabled, don't use - * CTPIO unless we've been told to do so explicitly. */ - (~nic->flags & NIC_FLAG_MCAST_LOOP_HW || - NI_OPTS(ni).ctpio_switch_bypass)); + return nic->flags & NIC_FLAG_CTPIO_ONLY || + /* Stack configured to use CTPIO. */ + (NI_OPTS(ni).ctpio > 0 && + /* NIC claims support for CTPIO. */ + (nic->flags & NIC_FLAG_TX_CTPIO) != 0 && + /* CTPIO bypasses the NIC's switch. When the switch is enabled, + * don't use CTPIO unless we've been told to do so explicitly. */ + (~nic->flags & NIC_FLAG_MCAST_LOOP_HW || + NI_OPTS(ni).ctpio_switch_bypass)); } #endif -static int -get_vi_settings(ci_netif* ni, struct efhw_nic* nic, - struct vi_allocate_info* info) +static int get_vi_settings( + ci_netif* ni, struct efhw_nic* nic, struct vi_allocate_info* info) { char if_name[IFNAMSIZ]; int rc; info->wakeup_cpu_core = NI_OPTS(ni).irq_core; - info->log_resource_warnings = NI_OPTS(ni).log_category & - (1 << (EF_LOG_RESOURCE_WARNINGS)); + info->log_resource_warnings = + NI_OPTS(ni).log_category & (1 << (EF_LOG_RESOURCE_WARNINGS)); if( NI_OPTS(ni).irq_core < 0 && NI_OPTS(ni).irq_channel < 0 ) { info->wakeup_cpu_core = raw_smp_processor_id(); info->log_resource_warnings = 0; @@ -1277,8 +1243,8 @@ get_vi_settings(ci_netif* ni, struct efhw_nic* nic, * EF100 has different Rx merging mechanism and hasn't RX cut-through. */ if( NI_OPTS(ni).rx_merge_mode || (nic->devtype.arch != EFHW_ARCH_EF100 && - nic->devtype.arch != EFHW_ARCH_EFCT && - ! (nic->flags & NIC_FLAG_RX_CUT_THROUGH)) ) { + nic->devtype.arch != EFHW_ARCH_EFCT && + ! (nic->flags & NIC_FLAG_RX_CUT_THROUGH)) ) { info->efhw_flags |= HIGH_THROUGHPUT_EFHW_VI_FLAGS; info->ef_vi_flags |= EF_VI_RX_EVENT_MERGE; } @@ -1286,17 +1252,17 @@ get_vi_settings(ci_netif* ni, struct efhw_nic* nic, if( nic->flags & NIC_FLAG_CTPIO_ONLY ) info->oo_vi_flags |= OO_VI_FLAGS_TX_CTPIO_ONLY; else - info->oo_vi_flags &=~ OO_VI_FLAGS_TX_CTPIO_ONLY; + info->oo_vi_flags &= ~OO_VI_FLAGS_TX_CTPIO_ONLY; if( nic->flags & NIC_FLAG_RX_SHARED ) info->oo_vi_flags |= OO_VI_FLAGS_RX_SHARED; else - info->oo_vi_flags &=~ OO_VI_FLAGS_RX_SHARED; + info->oo_vi_flags &= ~OO_VI_FLAGS_RX_SHARED; if( nic->flags & NIC_FLAG_HW_MULTICAST_REPLICATION ) info->oo_vi_flags |= OO_VI_FLAGS_HW_MULTICAST_REPLICATION; else - info->oo_vi_flags &=~ OO_VI_FLAGS_HW_MULTICAST_REPLICATION; + info->oo_vi_flags &= ~OO_VI_FLAGS_HW_MULTICAST_REPLICATION; if( (nic->flags & NIC_FLAG_MCAST_LOOP_HW) && (NI_OPTS(ni).mcast_recv_hw_loop) ) { @@ -1311,8 +1277,8 @@ get_vi_settings(ci_netif* ni, struct efhw_nic* nic, #if CI_CFG_CTPIO if( should_try_ctpio(ni, nic, info) ) NI_LOG(ni, CONFIG_WARNINGS, - "[%s]: WARNING: Packets sent by CTPIO will not be looped back.", - ni->state->pretty_name); + "[%s]: WARNING: Packets sent by CTPIO will not be looped back.", + ni->state->pretty_name); #endif } else { info->efhw_flags &= ~EFHW_VI_TX_LOOPBACK; @@ -1323,21 +1289,21 @@ get_vi_settings(ci_netif* ni, struct efhw_nic* nic, info->retry_without_ctpio = 0; if( NI_OPTS(ni).af_xdp_zerocopy ) { - if (nic->flags & NIC_FLAG_RX_ZEROCOPY) { + if( nic->flags & NIC_FLAG_RX_ZEROCOPY ) { info->ef_vi_flags |= EF_VI_RX_ZEROCOPY; info->efhw_flags |= EFHW_VI_RX_ZEROCOPY; } else { NI_LOG(ni, CONFIG_WARNINGS, - "[%s]: WARNING: Zerocopy is required but NIC does not support it", - ni->state->pretty_name); + "[%s]: WARNING: Zerocopy is required but NIC does not support it", + ni->state->pretty_name); } } #if CI_CFG_CTPIO if( should_try_ctpio(ni, nic, info) ) { info->try_ctpio = 1; - info->retry_without_ctpio = NI_OPTS(ni).ctpio < 2 && - ! (nic->flags & NIC_FLAG_CTPIO_ONLY); + info->retry_without_ctpio = + NI_OPTS(ni).ctpio < 2 && ! (nic->flags & NIC_FLAG_CTPIO_ONLY); if( NI_OPTS(ni).ctpio_mode == EF_CTPIO_MODE_SF_NP ) { info->ef_vi_flags |= EF_VI_TX_CTPIO_NO_POISON; @@ -1347,14 +1313,13 @@ get_vi_settings(ci_netif* ni, struct efhw_nic* nic, info->ctpio_threshold = NI_OPTS(ni).ctpio_ct_thresh; else info->ctpio_threshold = EF_VI_CTPIO_CT_THRESHOLD_SNF; - } - else { + } else { info->vi_ctpio_mmap_bytes = 0; if( NI_OPTS(ni).ctpio == 2 ) { char if_name[IFNAMSIZ]; get_if_name(ni, info->intf_i, if_name); ci_log("[%s]: CTPIO is required, but interface %s does not support it.", - ni->state->pretty_name, if_name); + ni->state->pretty_name, if_name); return -EINVAL; } } @@ -1363,19 +1328,15 @@ get_vi_settings(ci_netif* ni, struct efhw_nic* nic, get_if_name(ni, info->intf_i, if_name); rc = check_timestamping_support(ni->state->pretty_name, "RX", - NI_OPTS(ni).rx_timestamping, - (nic->flags & NIC_FLAG_HW_RX_TIMESTAMPING) != 0, - if_name, - &info->try_rx_ts, - &info->retry_without_rx_ts); + NI_OPTS(ni).rx_timestamping, + (nic->flags & NIC_FLAG_HW_RX_TIMESTAMPING) != 0, if_name, + &info->try_rx_ts, &info->retry_without_rx_ts); if( rc == 0 ) rc = check_timestamping_support(ni->state->pretty_name, "TX", - NI_OPTS(ni).tx_timestamping, - (nic->flags & NIC_FLAG_HW_TX_TIMESTAMPING) != 0, - if_name, - &info->try_tx_ts, - &info->retry_without_tx_ts); + NI_OPTS(ni).tx_timestamping, + (nic->flags & NIC_FLAG_HW_TX_TIMESTAMPING) != 0, if_name, + &info->try_tx_ts, &info->retry_without_tx_ts); return rc; } @@ -1403,9 +1364,9 @@ static int find_and_release_orphaned_stack(void) static int allocate_vi(ci_netif* ni, struct vi_allocate_info* info, - struct efrm_vi *evq_virs, int q_tag) + struct efrm_vi* evq_virs, int q_tag) { - int rc = -EDOM; /* Placate compiler. */ + int rc = -EDOM; /* Placate compiler. */ unsigned evq_min; /* There are various VI flags that can be requested by the caller, and we @@ -1421,28 +1382,28 @@ static int allocate_vi(ci_netif* ni, struct vi_allocate_info* info, int oo_vi_flags; } features[] = { { - .attempt = info->try_rx_ts, - .retry_without = info->retry_without_rx_ts, - .description = "RX timestamping", - .ef_vi_flags = EF_VI_RX_TIMESTAMPS, - .efhw_flags = EFHW_VI_RX_TIMESTAMPS | EFHW_VI_RX_PREFIX, - .oo_vi_flags = OO_VI_FLAGS_RX_HW_TS_EN, + .attempt = info->try_rx_ts, + .retry_without = info->retry_without_rx_ts, + .description = "RX timestamping", + .ef_vi_flags = EF_VI_RX_TIMESTAMPS, + .efhw_flags = EFHW_VI_RX_TIMESTAMPS | EFHW_VI_RX_PREFIX, + .oo_vi_flags = OO_VI_FLAGS_RX_HW_TS_EN, }, { - .attempt = info->try_tx_ts, - .retry_without = info->retry_without_tx_ts, - .description = "TX timestamping", - .ef_vi_flags = EF_VI_TX_TIMESTAMPS, - .efhw_flags = EFHW_VI_TX_TIMESTAMPS, - .oo_vi_flags = OO_VI_FLAGS_TX_HW_TS_EN, + .attempt = info->try_tx_ts, + .retry_without = info->retry_without_tx_ts, + .description = "TX timestamping", + .ef_vi_flags = EF_VI_TX_TIMESTAMPS, + .efhw_flags = EFHW_VI_TX_TIMESTAMPS, + .oo_vi_flags = OO_VI_FLAGS_TX_HW_TS_EN, }, { - .attempt = info->try_ctpio, - .retry_without = info->retry_without_ctpio, - .description = "CTPIO", - .ef_vi_flags = EF_VI_TX_CTPIO, - .efhw_flags = EFHW_VI_TX_CTPIO, - .oo_vi_flags = OO_VI_FLAGS_CTPIO_EN, + .attempt = info->try_ctpio, + .retry_without = info->retry_without_ctpio, + .description = "CTPIO", + .ef_vi_flags = EF_VI_TX_CTPIO, + .efhw_flags = EFHW_VI_TX_CTPIO, + .oo_vi_flags = OO_VI_FLAGS_CTPIO_EN, }, }; const int feature_count = sizeof(features) / sizeof(features[0]); @@ -1486,7 +1447,7 @@ static int allocate_vi(ci_netif* ni, struct vi_allocate_info* info, for( i = 0; i < feature_count; ++i ) if( feature_mask & (1 << i) ) { info->ef_vi_flags |= features[i].ef_vi_flags; - info->efhw_flags |= features[i].efhw_flags; + info->efhw_flags |= features[i].efhw_flags; info->oo_vi_flags |= features[i].oo_vi_flags; } @@ -1494,22 +1455,17 @@ static int allocate_vi(ci_netif* ni, struct vi_allocate_info* info, * is made to find and release orphaned stack and try allocation again. */ for( i = 0; i < 2; ++i ) { rc = efrm_vi_resource_alloc(info->client, evq_virs, info->vi_set, -1, - info->pd, info->name, - info->efhw_flags, - info->evq_capacity, info->txq_capacity, - info->rxq_capacity, q_tag, q_tag, - info->wakeup_cpu_core, - info->wakeup_channel, - info->virs, - &info->vi_io_mmap_bytes, - &info->vi_ctpio_mmap_bytes, NULL, NULL, - info->log_resource_warnings); + info->pd, info->name, info->efhw_flags, info->evq_capacity, + info->txq_capacity, info->rxq_capacity, q_tag, q_tag, + info->wakeup_cpu_core, info->wakeup_channel, info->virs, + &info->vi_io_mmap_bytes, &info->vi_ctpio_mmap_bytes, NULL, NULL, + info->log_resource_warnings); /* If we succeeded, there is no need to find and release orphan stack. */ if( rc != -EBUSY ) break; /* If first allocation returned EBUSY there is try to search and release - * orphaned stack. If it succeed another allocation will be attempted. */ + * orphaned stack. If it succeed another allocation will be attempted. */ if( i == 0 && find_and_release_orphaned_stack() != 0 ) break; } @@ -1525,8 +1481,8 @@ static int allocate_vi(ci_netif* ni, struct vi_allocate_info* info, * about the interrupt affinity. In this case, and only in this case, do * we enable the warnings in the innards of the allocation functions. */ info_base.wakeup_cpu_core = NI_OPTS(ni).irq_core; - info_base.log_resource_warnings = NI_OPTS(ni).log_category & - (1 << (EF_LOG_RESOURCE_WARNINGS)); + info_base.log_resource_warnings = + NI_OPTS(ni).log_category & (1 << (EF_LOG_RESOURCE_WARNINGS)); /* Fake out the loop counter to give us one more shot. */ ++feature_mask; } @@ -1536,23 +1492,23 @@ static int allocate_vi(ci_netif* ni, struct vi_allocate_info* info, n_shm_rxqs = efhw_nic_max_shared_rxqs(efrm_client_get_nic(info->client)); info->vi_efct_shm_mmap_bytes = - n_shm_rxqs ? CI_ROUND_UP(CI_EFCT_SHM_BYTES(n_shm_rxqs), PAGE_SIZE) : 0; + n_shm_rxqs ? CI_ROUND_UP(CI_EFCT_SHM_BYTES(n_shm_rxqs), PAGE_SIZE) : 0; if( rc < 0 ) { - OO_DEBUG_VM (ci_log ("%s: ERROR: efrm_vi_resource_alloc(%d) failed %d", - __FUNCTION__, info->intf_i, rc)); + OO_DEBUG_VM(ci_log("%s: ERROR: efrm_vi_resource_alloc(%d) failed %d", + __FUNCTION__, info->intf_i, rc)); if( info->release_pd ) efrm_pd_release(info->pd); - } - else { + } else { /* Warn about any requested features that we didn't get. */ for( i = 0; i < feature_count; ++i ) if( features[i].attempt && ! (feature_mask & (1 << i)) ) { char if_name[IFNAMSIZ]; get_if_name(ni, info->intf_i, if_name); - ci_log("[%s]: enabling %s on interface %s failed, " - "continuing with it disabled on this interface", - ni->state->pretty_name, features[i].description, if_name); + ci_log( + "[%s]: enabling %s on interface %s failed, " + "continuing with it disabled on this interface", + ni->state->pretty_name, features[i].description, if_name); } } @@ -1562,37 +1518,36 @@ static int allocate_vi(ci_netif* ni, struct vi_allocate_info* info, static int tcp_helper_superbuf_config_refresh(ef_vi* vi, int qid) { - return efrm_rxq_refresh_kernel(vi->dh, vi->efct_shm->q[qid].qid, - vi->efct_rxq[qid].superbufs); + return efrm_rxq_refresh_kernel( + vi->dh, vi->efct_shm->q[qid].qid, vi->efct_rxq[qid].superbufs); } static int initialise_vi(ci_netif* ni, struct ef_vi* vi, struct efrm_vi* vi_rs, - struct efrm_vi_mappings* vm, void* vi_state, - int vi_arch, int vi_variant, int vi_revision, - unsigned char vi_nic_flags, - struct vi_allocate_info* alloc_info, - unsigned* vi_out_flags, ef_vi_stats* vi_stats) + struct efrm_vi_mappings* vm, void* vi_state, int vi_arch, int vi_variant, + int vi_revision, unsigned char vi_nic_flags, + struct vi_allocate_info* alloc_info, unsigned* vi_out_flags, + ef_vi_stats* vi_stats) { uint32_t* vi_ids = (void*) ((ef_vi_state*) vi_state + 1); efrm_vi_get_mappings(vi_rs, vm); ef_vi_init(vi, vi_arch, vi_variant, vi_revision, alloc_info->ef_vi_flags, - vi_nic_flags, (ef_vi_state*) vi_state); - *vi_out_flags = (vm->out_flags & EFHW_VI_CLOCK_SYNC_STATUS) ? - EF_VI_OUT_CLOCK_SYNC_STATUS : 0; + vi_nic_flags, (ef_vi_state*) vi_state); + *vi_out_flags = (vm->out_flags & EFHW_VI_CLOCK_SYNC_STATUS) + ? EF_VI_OUT_CLOCK_SYNC_STATUS + : 0; - ef_vi_init_out_flags( vi, *vi_out_flags); + ef_vi_init_out_flags(vi, *vi_out_flags); ef_vi_init_io(vi, vm->io_page); ef_vi_init_timer(vi, vm->timer_quantum_ns); ef_vi_init_evq(vi, vm->evq_size, vm->evq_base); if( vm->rxq_size > 0 ) { - ef_vi_init_rxq(vi, vm->rxq_size, vm->rxq_descriptors, vi_ids, - vm->rxq_prefix_len); + ef_vi_init_rxq( + vi, vm->rxq_size, vm->rxq_descriptors, vi_ids, vm->rxq_prefix_len); vi_ids += vm->rxq_size; - } - else { + } else { /* efct_poll_rx() will crash if called when rx hasn't been inited, so * guarantee that it never will be even if somebody corrupts the shared * memory */ @@ -1609,7 +1564,7 @@ static int initialise_vi(ci_netif* ni, struct ef_vi* vi, struct efrm_vi* vi_rs, return rc; for( i = 0; i < vi->max_efct_rxq; ++i ) efct_vi_attach_rxq_internal(vi, i, -1 /* resource ID not needed */, - tcp_helper_superbuf_config_refresh); + tcp_helper_superbuf_config_refresh); } ef_vi_init_state(vi); ef_vi_set_stats_buf(vi, vi_stats); @@ -1631,12 +1586,12 @@ static int af_xdp_kick(ef_vi* vi) } static int allocate_vis(tcp_helper_resource_t* trs, - ci_resource_onload_alloc_t* alloc, - void* vi_state, tcp_helper_cluster_t* thc) + ci_resource_onload_alloc_t* alloc, void* vi_state, + tcp_helper_cluster_t* thc) { /* Format is "onload:pretty_name-intf_i" * Do not use slash in this name! */ - char vf_name[7 + CI_CFG_STACK_NAME_LEN+8 + 3]; + char vf_name[7 + CI_CFG_STACK_NAME_LEN + 8 + 3]; ci_netif* ni = &trs->netif; ci_netif_state* ns = ni->state; int rc, intf_i; @@ -1681,7 +1636,8 @@ static int allocate_vis(tcp_helper_resource_t* trs, if( ! NI_OPTS(ni).tx_push ) base_ef_vi_flags |= EF_VI_TX_PUSH_DISABLE; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { int vi_i; for( vi_i = 0; vi_i < CI_MAX_VIS_PER_INTF; ++vi_i ) { trs->nic[intf_i].thn_vi_rs[vi_i] = NULL; @@ -1693,18 +1649,19 @@ static int allocate_vis(tcp_helper_resource_t* trs, trs->nic[intf_i].thn_pio_io_mmap_bytes = 0; #endif memset(trs->nic[intf_i].thn_efct_rxq, 0, - sizeof(trs->nic[intf_i].thn_efct_rxq)); + sizeof(trs->nic[intf_i].thn_efct_rxq)); } /* This loop does the work of allocating a vi, using the information built * up in the alloc_info structure. It then updates the nsn structure with * the resultant resource information. */ - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { struct tcp_helper_nic* trs_nic = &trs->nic[intf_i]; ci_netif_state_nic_t* nsn = &ns->nic[intf_i]; struct efhw_nic* nic = - efrm_client_get_nic(trs_nic->thn_oo_nic->efrm_client); + efrm_client_get_nic(trs_nic->thn_oo_nic->efrm_client); struct efrm_vi_mappings* vm = (void*) ni->vi_data; unsigned vi_out_flags = 0; struct device* dev; @@ -1713,11 +1670,11 @@ static int allocate_vis(tcp_helper_resource_t* trs, BUILD_BUG_ON(sizeof(ni->vi_data) < sizeof(struct efrm_vi_mappings)); - alloc_info.hwport_flags = 0; /* Placate compiler. */ + alloc_info.hwport_flags = 0; /* Placate compiler. */ /* Get interface properties. */ rc = oo_cp_get_hwport_properties(ni->cplane, ns->intf_i_to_hwport[intf_i], - &alloc_info.hwport_flags, NULL); + &alloc_info.hwport_flags, NULL); if( rc < 0 ) goto error_out; @@ -1734,8 +1691,8 @@ static int allocate_vis(tcp_helper_resource_t* trs, ci_assert(trs_nic->thn_oo_nic != NULL); ci_assert(alloc_info.client != NULL); - snprintf(vf_name, sizeof(vf_name), "onload:%s-%d", - ns->pretty_name, intf_i); + snprintf( + vf_name, sizeof(vf_name), "onload:%s-%d", ns->pretty_name, intf_i); rc = get_vi_settings(ni, nic, &alloc_info); if( rc != 0 ) @@ -1758,16 +1715,15 @@ static int allocate_vis(tcp_helper_resource_t* trs, trs->thc_rss_instance = efrm_vi_set_get_vi_instance(vi_rs); ns->rss_instance = trs->thc_rss_instance; ns->cluster_size = thc->thc_cluster_size; - } - else { + } else { ns->cluster_size = 1; } vi = ci_netif_vi(ni, intf_i); - rc = initialise_vi(ni, vi, tcp_helper_vi(trs, intf_i), vm, - vi_state, nic->devtype.arch, nic->devtype.variant, - nic->devtype.revision, efhw_vi_nic_flags(nic), - &alloc_info, &vi_out_flags, &ni->state->vi_stats); + rc = initialise_vi(ni, vi, tcp_helper_vi(trs, intf_i), vm, vi_state, + nic->devtype.arch, nic->devtype.variant, nic->devtype.revision, + efhw_vi_nic_flags(nic), &alloc_info, &vi_out_flags, + &ni->state->vi_stats); if( rc < 0 ) goto error_out; @@ -1780,22 +1736,22 @@ static int allocate_vis(tcp_helper_resource_t* trs, #if CI_CFG_CTPIO nsn->ctpio_ct_threshold = alloc_info.ctpio_threshold; nsn->ctpio_max_frame_len = nsn->ctpio_frame_len_check = - nsn->oo_vi_flags & OO_VI_FLAGS_CTPIO_EN ? - NI_OPTS(ni).ctpio_max_frame_len : 0; + nsn->oo_vi_flags & OO_VI_FLAGS_CTPIO_EN + ? NI_OPTS(ni).ctpio_max_frame_len + : 0; #endif dev = efrm_vi_get_dev(vi_rs); strncpy(nsn->dev_name, dev ? dev_name(dev) : "?", sizeof(nsn->dev_name)); if( dev ) put_device(dev); nsn->dev_name[sizeof(nsn->dev_name) - 1] = '\0'; - nsn->vi_instance[0] = - (ci_uint16) EFAB_VI_RESOURCE_INSTANCE(vi_rs); + nsn->vi_instance[0] = (ci_uint16) EFAB_VI_RESOURCE_INSTANCE(vi_rs); nsn->vi_abs_idx[0] = efhw_nic_rel_to_abs_idx(nic, nsn->vi_instance[0]); nsn->vi_arch = (ci_uint8) nic->devtype.arch; nsn->vi_variant = (ci_uint8) nic->devtype.variant; nsn->vi_revision = (ci_uint8) nic->devtype.revision; nsn->vi_nic_flags = efhw_vi_nic_flags(nic); - nsn->vi_channel = (ci_uint8)efrm_vi_get_channel(vi_rs); + nsn->vi_channel = (ci_uint8) efrm_vi_get_channel(vi_rs); nsn->vi_flags = alloc_info.ef_vi_flags; nsn->vi_out_flags = vi_out_flags; nsn->vi_evq_bytes = efrm_vi_rm_evq_bytes(vi_rs, -1); @@ -1811,8 +1767,8 @@ static int allocate_vis(tcp_helper_resource_t* trs, trs->io_mmap_bytes += alloc_info.vi_io_mmap_bytes; trs->efct_shm_mmap_bytes += alloc_info.vi_efct_shm_mmap_bytes; - vi_state = (char*) vi_state + - ef_vi_calc_state_bytes(vm->rxq_size, vm->txq_size); + vi_state = + (char*) vi_state + ef_vi_calc_state_bytes(vm->rxq_size, vm->txq_size); #if CI_CFG_CTPIO trs_nic->thn_ctpio_io_mmap_bytes = alloc_info.vi_ctpio_mmap_bytes; @@ -1866,10 +1822,10 @@ static int allocate_vis(tcp_helper_resource_t* trs, * to be the best for that */ alloc_info.try_ctpio = false; - for( vi_i = 1; vi_i < num_vis; ++vi_i) { - struct efrm_vi *vi_rs; - int qid = ef_vi_add_queue(ci_netif_vi(ni, intf_i), - &ni->nic_hw[intf_i].vis[vi_i]); + for( vi_i = 1; vi_i < num_vis; ++vi_i ) { + struct efrm_vi* vi_rs; + int qid = ef_vi_add_queue( + ci_netif_vi(ni, intf_i), &ni->nic_hw[intf_i].vis[vi_i]); ci_assert_equal(qid, vi_i); alloc_info.virs = &trs_nic->thn_vi_rs[vi_i]; rc = allocate_vi(ni, &alloc_info, tcp_helper_vi(trs, intf_i), qid); @@ -1879,27 +1835,24 @@ static int allocate_vis(tcp_helper_resource_t* trs, goto error_out; } vi_rs = trs_nic->thn_vi_rs[vi_i]; - rc = initialise_vi(ni, &ni->nic_hw[intf_i].vis[vi_i], - vi_rs, vm, - vi_state, nic->devtype.arch, nic->devtype.variant, - nic->devtype.revision, efhw_vi_nic_flags(nic), - &alloc_info, &vi_out_flags, &ni->state->vi_stats); + rc = initialise_vi(ni, &ni->nic_hw[intf_i].vis[vi_i], vi_rs, vm, + vi_state, nic->devtype.arch, nic->devtype.variant, + nic->devtype.revision, efhw_vi_nic_flags(nic), &alloc_info, + &vi_out_flags, &ni->state->vi_stats); if( rc < 0 ) { if( release_pd ) efrm_pd_release(alloc_info.pd); /* vi keeps a ref to pd */ goto error_out; } - nsn->vi_instance[vi_i] = - (ci_uint16) EFAB_VI_RESOURCE_INSTANCE(vi_rs); + nsn->vi_instance[vi_i] = (ci_uint16) EFAB_VI_RESOURCE_INSTANCE(vi_rs); nsn->vi_abs_idx[vi_i] = - efhw_nic_rel_to_abs_idx(nic, nsn->vi_instance[vi_i]); + efhw_nic_rel_to_abs_idx(nic, nsn->vi_instance[vi_i]); trs->io_mmap_bytes += alloc_info.vi_io_mmap_bytes; vi_state = (char*) vi_state + ef_vi_calc_state_bytes(vm->rxq_size, vm->txq_size); } - } - else + } else #endif ci_assert_equal(ci_netif_num_vis(ni), 1); @@ -1912,7 +1865,8 @@ static int allocate_vis(tcp_helper_resource_t* trs, return 0; error_out: - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { int vi_i; for( vi_i = ci_netif_num_vis(ni) - 1; vi_i >= 0; --vi_i ) { if( trs->nic[intf_i].thn_vi_rs[vi_i] ) { @@ -1937,20 +1891,22 @@ static int deferred_vis(tcp_helper_resource_t* trs) /* hugetlbfs pages are incompatible with AF_XDP */ if( NI_OPTS(ni).huge_pages != 0 ) { NI_LOG(ni, RESOURCE_WARNINGS, - "[%s]: WARNING: huge pages are incompatible with AF_XDP. " - "Disabling hugepage support.", - ni->state->pretty_name); + "[%s]: WARNING: huge pages are incompatible with AF_XDP. " + "Ignoring and not disabling hugepage support.", + ni->state->pretty_name); - NI_OPTS(ni).huge_pages = 0; + NI_OPTS(ni).huge_pages = 1; } /* All buffers need to be allocated before AF_XDP sockets are usable. */ - while( (rc = efab_tcp_helper_more_bufs(trs)) == 0 ); + while( (rc = efab_tcp_helper_more_bufs(trs)) == 0 ) + ; if( rc != -ENOSPC ) return rc; } - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { struct tcp_helper_nic* trs_nic = &trs->nic[intf_i]; int vi_i; @@ -1958,9 +1914,8 @@ static int deferred_vis(tcp_helper_resource_t* trs) uint32_t mmap_bytes; rc = efrm_vi_resource_deferred(trs_nic->thn_vi_rs[vi_i], - CI_CFG_PKT_BUF_SIZE, - offsetof(ci_ip_pkt_fmt, dma_start), - &mmap_bytes); + CI_CFG_PKT_BUF_SIZE, offsetof(ci_ip_pkt_fmt, dma_start), + &mmap_bytes); if( rc < 0 ) return rc; @@ -1971,13 +1926,13 @@ static int deferred_vis(tcp_helper_resource_t* trs) * someone else would get if they checked separately. */ ci_assert_equal(ni->state->nic[intf_i].vi_io_mmap_bytes, - efab_vi_resource_mmap_bytes(trs_nic->thn_vi_rs[vi_i], 0)); + efab_vi_resource_mmap_bytes(trs_nic->thn_vi_rs[vi_i], 0)); ci_assert_equal(trs_nic->thn_vi_mmap_bytes[vi_i], - efab_vi_resource_mmap_bytes(trs_nic->thn_vi_rs[vi_i], 1)); + efab_vi_resource_mmap_bytes(trs_nic->thn_vi_rs[vi_i], 1)); ci_assert_equal( - efab_vi_resource_mmap_bytes(tcp_helper_vi(trs, intf_i), 0), - efab_vi_resource_mmap_bytes(trs_nic->thn_vi_rs[vi_i], 0)); + efab_vi_resource_mmap_bytes(tcp_helper_vi(trs, intf_i), 0), + efab_vi_resource_mmap_bytes(trs_nic->thn_vi_rs[vi_i], 0)); } } @@ -1986,9 +1941,9 @@ static int deferred_vis(tcp_helper_resource_t* trs) } -static void vi_complete(void *completion_void) +static void vi_complete(void* completion_void) { - complete((struct completion *)completion_void); + complete((struct completion*) completion_void); } #if CI_CFG_NIC_RESET_SUPPORT @@ -2006,14 +1961,14 @@ static void release_pkts(tcp_helper_resource_t* trs) int n_free = 0; #endif - for (i = 0; i < ni->pkt_sets_n; i++) { + for( i = 0; i < ni->pkt_sets_n; i++ ) { ci_assert(ni->pkt_bufs[i]); #ifndef NDEBUG n_free += ni->packets->set[i].n_free; #endif OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - oo_iobufset_resource_release(ni->nic_hw[intf_i].pkt_rs[i], - intfs_suspended(trs) & (1 << intf_i)); + oo_iobufset_resource_release( + ni->nic_hw[intf_i].pkt_rs[i], intfs_suspended(trs) & (1 << intf_i)); } #ifndef NDEBUG if( ~trs->netif.flags & CI_NETIF_FLAG_WEDGED ) @@ -2022,9 +1977,9 @@ static void release_pkts(tcp_helper_resource_t* trs) /* Now release everything allocated in allocate_netif_hw_resources. */ OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - ci_free(ni->nic_hw[intf_i].pkt_rs); + ci_free(ni->nic_hw[intf_i].pkt_rs); - for (i = 0; i < ni->pkt_sets_n; i++) + for( i = 0; i < ni->pkt_sets_n; i++ ) oo_iobufset_pages_release(ni->pkt_bufs[i]); vfree(ni->pkt_bufs); } @@ -2033,7 +1988,8 @@ static void release_pkts(tcp_helper_resource_t* trs) static void detach_efct_rxqs(tcp_helper_resource_t* trs) { int intf_i; - OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) { + OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) + { int num_vis = ci_netif_num_vis(&trs->netif); int vi_i; struct tcp_helper_nic* trs_nic = &trs->nic[intf_i]; @@ -2055,10 +2011,11 @@ static void release_vi(tcp_helper_resource_t* trs) int intf_i; /* Flush vis first to ensure our bufs won't be used any more */ - OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) { + OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) + { int vi_i; for( vi_i = ci_netif_num_vis(&trs->netif) - 1; vi_i >= 0; --vi_i ) { - struct efrm_vi *vi_rs = trs->nic[intf_i].thn_vi_rs[vi_i]; + struct efrm_vi* vi_rs = trs->nic[intf_i].thn_vi_rs[vi_i]; reinit_completion(&trs->complete); efrm_vi_register_flush_callback(vi_rs, &vi_complete, &trs->complete); efrm_vi_resource_stop_callback(vi_rs); @@ -2067,29 +2024,28 @@ static void release_vi(tcp_helper_resource_t* trs) } /* Now do the rest of vi release */ - OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) { + OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) + { int num_vis = ci_netif_num_vis(&trs->netif); int vi_i; struct tcp_helper_nic* trs_nic = &trs->nic[intf_i]; - ci_netif_nic_t *netif_nic = &trs->netif.nic_hw[intf_i]; + ci_netif_nic_t* netif_nic = &trs->netif.nic_hw[intf_i]; #if CI_CFG_PIO || (CI_CFG_WANT_BPF_NATIVE && CI_HAVE_BPF_NATIVE) struct efhw_nic* nic = - efrm_client_get_nic(trs_nic->thn_oo_nic->efrm_client); + efrm_client_get_nic(trs_nic->thn_oo_nic->efrm_client); #endif #if CI_CFG_PIO - if( NI_OPTS(&trs->netif).pio && - (nic->devtype.arch == EFHW_ARCH_EF10) && + if( NI_OPTS(&trs->netif).pio && (nic->devtype.arch == EFHW_ARCH_EF10) && (trs_nic->thn_pio_io_mmap_bytes != 0) ) { - efrm_pio_unmap_kernel(tcp_helper_vi(trs, intf_i), - (void*)netif_nic->pio.pio_io); - ci_pio_buddy_dtor(&trs->netif, - &trs->netif.state->nic[intf_i].pio_buddy); + efrm_pio_unmap_kernel( + tcp_helper_vi(trs, intf_i), (void*) netif_nic->pio.pio_io); + ci_pio_buddy_dtor(&trs->netif, &trs->netif.state->nic[intf_i].pio_buddy); } #endif #if CI_CFG_CTPIO if( trs_nic->thn_ctpio_io_mmap != NULL ) - efrm_ctpio_unmap_kernel(tcp_helper_vi(trs, intf_i), - trs_nic->thn_ctpio_io_mmap); + efrm_ctpio_unmap_kernel( + tcp_helper_vi(trs, intf_i), trs_nic->thn_ctpio_io_mmap); #endif for( vi_i = num_vis - 1; vi_i >= 0; --vi_i ) { ef_vi* vi = &trs->netif.nic_hw[intf_i].vis[vi_i]; @@ -2125,9 +2081,9 @@ static void tcp_helper_leak_check(tcp_helper_resource_t* trs) for( i = 0; i < CI_TCP_AUX_TYPE_NUM; i++ ) { ci_assert_equal(ni->state->n_aux_bufs[i], 0); if( ni->state->n_aux_bufs[i] != 0 ) { - ci_log("%s[%d]: aux_bufs[%s]: leaked %d out of %d", - __func__, NI_ID(ni), ci_tcp_aux_type2str(i), - ni->state->n_aux_bufs[i], ni->state->max_aux_bufs[i]); + ci_log("%s[%d]: aux_bufs[%s]: leaked %d out of %d", __func__, NI_ID(ni), + ci_tcp_aux_type2str(i), ni->state->n_aux_bufs[i], + ni->state->max_aux_bufs[i]); } } @@ -2143,9 +2099,9 @@ static void tcp_helper_leak_check(tcp_helper_resource_t* trs) */ table_n_entries = ni->state->stats.table_n_entries #if CI_CFG_IPV6 - + ni->state->stats.ipv6_table_n_entries + + ni->state->stats.ipv6_table_n_entries #endif - ; + ; #ifndef NDEBUG oof_cb_sw_filter_apply(&trs->netif); @@ -2157,7 +2113,7 @@ static void tcp_helper_leak_check(tcp_helper_resource_t* trs) ! (trs->trusted_lock & OO_TRUSTED_LOCK_SWF_UPDATE) && ! (trs->netif.state->lock.lock & CI_EPLOCK_NETIF_SWF_UPDATE) ) ci_log("%s[%d]: leaked %d software filters", __func__, NI_ID(ni), - table_n_entries); + table_n_entries); ci_assert_equal(ni->state->reserved_pktbufs, 0); } @@ -2173,15 +2129,15 @@ static void ci_ipid_ctor(ci_netif* ni) ci_assert(ni); ipid = NI_IPID(ni); - memset( ipid, 0, sizeof(*ipid)); + memset(ipid, 0, sizeof(*ipid)); if( (range = efab_ipid_alloc(&efab_tcp_driver.ipid)) < 0 ) { - ipid->no_free = 1; /* skip range release in dtor */ + ipid->no_free = 1; /* skip range release in dtor */ range = CI_IPID_MIN; /* just use range 0 anyway */ } - ci_assert( (range + CI_IPID_BLOCK_MASK) < 0x10000); - ipid->base = (ci_uint16)range; + ci_assert((range + CI_IPID_BLOCK_MASK) < 0x10000); + ipid->base = (ci_uint16) range; ipid->next = 0; #if CI_CFG_IPV6 ipid->ip6_base = range << 16; @@ -2201,15 +2157,14 @@ static void ci_ipid_dtor(ci_netif* ni) /* Skip range relese if allocation failed in ctor */ if( ipid->no_free ) return; - ci_assert( ipid->base <= (ci_uint16)(0x10000 - CI_IPID_BLOCK_LENGTH) ); - ci_assert( !(ipid->base & CI_IPID_BLOCK_MASK)); - efab_ipid_free(&efab_tcp_driver.ipid, (ci_int32)ipid->base); + ci_assert(ipid->base <= (ci_uint16) (0x10000 - CI_IPID_BLOCK_LENGTH)); + ci_assert(! (ipid->base & CI_IPID_BLOCK_MASK)); + efab_ipid_free(&efab_tcp_driver.ipid, (ci_int32) ipid->base); } -static int -allocate_netif_resources(ci_resource_onload_alloc_t* alloc, - tcp_helper_resource_t* trs, int cluster_size) +static int allocate_netif_resources(ci_resource_onload_alloc_t* alloc, + tcp_helper_resource_t* trs, int cluster_size) { ci_netif* ni = &trs->netif; ci_netif_state* ns; @@ -2252,59 +2207,59 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, /* FIXME: Reconsider the size of this table. */ entries = NI_OPTS(ni).max_ep_bufs * 2; no_seq_table_entries = 1u << ci_log2_ge(entries, 1); - } - else { + } else { no_seq_table_entries = 0; } /* pkt_sets_n should be zeroed before possible NIC reset */ if( NI_OPTS(ni).max_packets > max_packets_per_stack ) { - OO_DEBUG_ERR(ci_log("WARNING: EF_MAX_PACKETS reduced from %d to %d due to " - "max_packets_per_stack module option", - NI_OPTS(ni).max_packets, max_packets_per_stack)); + OO_DEBUG_ERR( + ci_log("WARNING: EF_MAX_PACKETS reduced from %d to %d due to " + "max_packets_per_stack module option", + NI_OPTS(ni).max_packets, max_packets_per_stack)); ni->state = NULL; tcp_helper_reduce_max_packets(ni, max_packets_per_stack); } ni->pkt_sets_n = 0; ni->pkt_sets_max = - (NI_OPTS(ni).max_packets + PKTS_PER_SET - 1) >> CI_CFG_PKTS_PER_SET_S; + (NI_OPTS(ni).max_packets + PKTS_PER_SET - 1) >> CI_CFG_PKTS_PER_SET_S; /* Find size of netif state to allocate. */ - vi_state_bytes = ef_vi_calc_state_bytes(NI_OPTS(ni).rxq_size, - NI_OPTS(ni).txq_size); + vi_state_bytes = + ef_vi_calc_state_bytes(NI_OPTS(ni).rxq_size, NI_OPTS(ni).txq_size); vi_state_bytes *= ci_netif_num_vis(ni); #if CI_CFG_TCP_SHARED_LOCAL_PORTS if( ci_netif_should_allocate_tcp_shared_local_ports(ni) ) { - no_active_wild_pools = is_power_of_2(cluster_size) ? cluster_size : - RSS_HASH_SIZE; + no_active_wild_pools = + is_power_of_2(cluster_size) ? cluster_size : RSS_HASH_SIZE; /* Max active wilds is bounded by the number of local IPs and * shared local ports. */ - no_active_wild_table_entries = - CI_MIN((ci_uint32)CI_CFG_MAX_LOCAL_IPADDRS, - CI_MAX(NI_OPTS(ni).tcp_shared_local_ports, - NI_OPTS(ni).tcp_shared_local_ports_max)); + no_active_wild_table_entries = CI_MIN((ci_uint32) CI_CFG_MAX_LOCAL_IPADDRS, + CI_MAX(NI_OPTS(ni).tcp_shared_local_ports, + NI_OPTS(ni).tcp_shared_local_ports_max)); /* Quadruple the size to ensure the hash table does not get too full. */ no_active_wild_table_entries <<= 2; /* Round up to a power of two. */ if( no_active_wild_table_entries > 1 ) no_active_wild_table_entries = - 1u << (__fls(no_active_wild_table_entries - 1) + 1); - } - else { + 1u << (__fls(no_active_wild_table_entries - 1) + 1); + } else { no_active_wild_pools = 0; no_active_wild_table_entries = 0; } #endif - filter_table_size = sizeof(ci_netif_filter_table) + - sizeof(ci_netif_filter_table_entry_fast) * (no_table_entries - 1); - filter_table_ext_size = sizeof(ci_netif_filter_table_entry_ext) * - no_table_entries; + filter_table_size = + sizeof(ci_netif_filter_table) + + sizeof(ci_netif_filter_table_entry_fast) * (no_table_entries - 1); + filter_table_ext_size = + sizeof(ci_netif_filter_table_entry_ext) * no_table_entries; #if CI_CFG_IPV6 - ip6_filter_table_size = sizeof(ci_ip6_netif_filter_table) + - sizeof(ci_ip6_netif_filter_table_entry) * (no_table_entries - 1); + ip6_filter_table_size = + sizeof(ci_ip6_netif_filter_table) + + sizeof(ci_ip6_netif_filter_table_entry) * (no_table_entries - 1); #endif /* Allocate shmbuf for netif state. When calculating the size, it's @@ -2323,9 +2278,8 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, /* This is the worst-case amount of space needed for DMA addrs, i.e. when * we're never able to find any higher-order pages. It's extremely likely * that only a fraction of this space will ever be used: */ - dma_addrs_bytes = ni->pkt_sets_max * CI_CFG_MAX_INTERFACES * - PKTS_PER_SET / (PAGE_SIZE / CI_CFG_PKT_BUF_SIZE) * - sizeof(ef_addr); + dma_addrs_bytes = ni->pkt_sets_max * CI_CFG_MAX_INTERFACES * PKTS_PER_SET / + (PAGE_SIZE / CI_CFG_PKT_BUF_SIZE) * sizeof(ef_addr); sz += dma_addrs_bytes; sz = CI_ROUND_UP(sz, __alignof__(struct oo_p_dllink)); #if CI_CFG_TCP_SHARED_LOCAL_PORTS @@ -2372,7 +2326,7 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, /* [shmbuf] backs the shared stack state and the socket buffers. First, * count the pages required for the latter. */ i = (NI_OPTS(ni).max_ep_bufs / EP_BUF_PER_PAGE) >> - OO_SHARED_BUFFER_CHUNK_ORDER; + OO_SHARED_BUFFER_CHUNK_ORDER; /* Now add in the pages for the shared state. */ i += sz / OO_SHARED_BUFFER_CHUNK_SIZE; @@ -2380,10 +2334,12 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, * for the sockets). These pages get zeroed, so all fields in the shared * state can be assumed to have been zero-initialised. */ rc = oo_shmbuf_alloc(&ni->shmbuf, OO_SHARED_BUFFER_CHUNK_ORDER, i, - sz / OO_SHARED_BUFFER_CHUNK_SIZE); + sz / OO_SHARED_BUFFER_CHUNK_SIZE); if( rc < 0 ) { - OO_DEBUG_ERR(ci_log("%s: failed to alloc shmbuf for shared state and " - "socket buffers (%d)", __FUNCTION__, rc)); + OO_DEBUG_ERR( + ci_log("%s: failed to alloc shmbuf for shared state and " + "socket buffers (%d)", + __FUNCTION__, rc)); goto fail1; } @@ -2413,7 +2369,7 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, ns->hwport_mask = ni->hwport_mask; memset(ns->intf_i_to_hwport, 0, sizeof(ns->intf_i_to_hwport)); memcpy(ns->hwport_to_intf_i, ni->hwport_to_intf_i, - sizeof(ns->hwport_to_intf_i)); + sizeof(ns->hwport_to_intf_i)); for( i = 0; i < CI_CFG_MAX_HWPORTS; ++i ) if( ns->hwport_to_intf_i[i] >= 0 ) ns->intf_i_to_hwport[(int) ns->hwport_to_intf_i[i]] = i; @@ -2439,7 +2395,7 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, ns->active_wild_pools_n = no_active_wild_pools; ns_ofs += (sizeof(struct oo_p_dllink) * ns->active_wild_table_entries_n * - ns->active_wild_pools_n); + ns->active_wild_pools_n); #endif ns_ofs = CI_ROUND_UP(ns_ofs, __alignof__(ci_tcp_prev_seq_t)); @@ -2476,7 +2432,7 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, #endif /* The last addition to ns_ofs is not really used */ - (void)ns_ofs; + (void) ns_ofs; ns->vi_state_bytes = vi_state_bytes; @@ -2496,15 +2452,15 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, #endif #if CI_CFG_UL_INTERRUPT_HELPER - oo_ringbuffer_state_init(&ns->closed_eps, OO_CLOSED_EPS_RING_SIZE, - sizeof(oo_sp)); + oo_ringbuffer_state_init( + &ns->closed_eps, OO_CLOSED_EPS_RING_SIZE, sizeof(oo_sp)); oo_ringbuffer_init(&ni->closed_eps, &ns->closed_eps, - (void*)((char*) ns + ns->closed_eps_ofs)); + (void*) ((char*) ns + ns->closed_eps_ofs)); oo_ringbuffer_state_init(&ns->sw_filter_ops, OO_SW_FILTER_OPS_SIZE, - sizeof(struct oo_sw_filter_op)); + sizeof(struct oo_sw_filter_op)); oo_ringbuffer_init(&ni->sw_filter_ops, &ns->sw_filter_ops, - (void*)((char*) ns + ns->sw_filter_ofs)); + (void*) ((char*) ns + ns->sw_filter_ofs)); #endif ni->packets->sets_max = ni->pkt_sets_max; @@ -2522,21 +2478,23 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, /* The shared netif-state buffer and EP buffers are part of the mem mmap */ trs->mem_mmap_bytes += ns->netif_mmap_bytes; - OO_DEBUG_MEMSIZE(ci_log( - "added %d (0x%x) bytes for shared netif state and ep buffers, " - "reached %d (0x%x)", ns->netif_mmap_bytes, ns->netif_mmap_bytes, - trs->mem_mmap_bytes, trs->mem_mmap_bytes)); + OO_DEBUG_MEMSIZE( + ci_log("added %d (0x%x) bytes for shared netif state and ep buffers, " + "reached %d (0x%x)", + ns->netif_mmap_bytes, ns->netif_mmap_bytes, trs->mem_mmap_bytes, + trs->mem_mmap_bytes)); if( trs->name[0] == '\0' ) snprintf(ns->pretty_name, sizeof(ns->pretty_name), "%d", ns->stack_id); else - snprintf(ns->pretty_name, sizeof(ns->pretty_name), "%d,%s", - ns->stack_id, trs->name); + snprintf(ns->pretty_name, sizeof(ns->pretty_name), "%d,%s", ns->stack_id, + trs->name); /* Allocate an eplock resource. */ rc = eplock_ctor(ni); if( rc < 0 ) { - OO_DEBUG_ERR(ci_log("tcp_helper_alloc: failed to allocate EPLOCK (%d)", rc)); + OO_DEBUG_ERR( + ci_log("tcp_helper_alloc: failed to allocate EPLOCK (%d)", rc)); goto fail2; } ni->state->lock.lock = CI_EPLOCK_LOCKED; @@ -2549,11 +2507,13 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, init_waitqueue_head(&trs->ulh_waitq); trs->ulh_flags = 0; - { CI_BUILD_ASSERT(CI_CFG_MAX_INTERFACES<=32); } + { + CI_BUILD_ASSERT(CI_CFG_MAX_INTERFACES <= 32); + } ci_atomic_set(&trs->intr_intfs, 0); if( NI_OPTS(&trs->netif).int_driven ) - ci_atomic_set(&trs->wake_intfs, - (1ull << oo_stack_intf_max(&trs->netif)) - 1); + ci_atomic_set( + &trs->wake_intfs, (1ull << oo_stack_intf_max(&trs->netif)) - 1); else ci_atomic_set(&trs->wake_intfs, 0); #endif @@ -2566,9 +2526,9 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, return 0; - fail2: +fail2: oo_shmbuf_free(&ni->shmbuf); - fail1: +fail1: LOG_NC(ci_log("failed to allocate tcp_helper resources (%d)", rc)); return rc; } @@ -2576,7 +2536,7 @@ allocate_netif_resources(ci_resource_onload_alloc_t* alloc, #if CI_CFG_TCP_OFFLOAD_RECYCLER static void destroy_ceph_app(struct efrm_ext* plugin, uint32_t app_id) { - struct xsn_ceph_destroy_app param = {.in_app_id = cpu_to_le32(app_id)}; + struct xsn_ceph_destroy_app param = { .in_app_id = cpu_to_le32(app_id) }; int rc = efrm_ext_msg(plugin, XSN_CEPH_DESTROY_APP, ¶m, sizeof(param)); if( rc ) { OO_DEBUG_ERR(ci_log("%s: Destroy Ceph app failed (%d)", __FUNCTION__, rc)); @@ -2585,8 +2545,7 @@ static void destroy_ceph_app(struct efrm_ext* plugin, uint32_t app_id) } #endif -static int -create_plugin_rx_app(tcp_helper_resource_t* trs) +static int create_plugin_rx_app(tcp_helper_resource_t* trs) { #if CI_CFG_TCP_OFFLOAD_RECYCLER ci_netif* ni = &trs->netif; @@ -2595,12 +2554,13 @@ create_plugin_rx_app(tcp_helper_resource_t* trs) ni->state->plugin_mmap_bytes = 0; OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - ni->nic_hw[intf_i].plugin_rx = NULL; + ni->nic_hw[intf_i].plugin_rx = NULL; if( NI_OPTS(ni).tcp_offload_plugin != CITP_TCP_OFFLOAD_CEPH ) return 0; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { struct efrm_pd* pd = efrm_vi_get_pd(tcp_helper_vi(trs, intf_i)); struct xsn_ceph_create_app create; struct ef_vi* tcp_vi; @@ -2625,13 +2585,13 @@ create_plugin_rx_app(tcp_helper_resource_t* trs) rc = efrm_ext_get_meta_global(plugin, &meta); if( rc ) { OO_DEBUG_ERR(ci_log("%s: Failed to get plugin metadata (%d, %d)", - __FUNCTION__, intf_i, rc)); + __FUNCTION__, intf_i, rc)); goto fail_plugin_1; } tcp_vi = &ni->nic_hw[intf_i].vis[CI_Q_ID_TCP_RECYCLER]; ceph_vi = &ni->nic_hw[intf_i].vis[CI_Q_ID_TCP_APP]; - create = (struct xsn_ceph_create_app){ + create = (struct xsn_ceph_create_app) { .tcp.in_vi_id = cpu_to_le16(ef_vi_instance(tcp_vi)), .in_meta_vi_id = cpu_to_le16(ef_vi_instance(ceph_vi)), .in_meta_buflen = cpu_to_le16(ef_vi_receive_buffer_len(ceph_vi) - @@ -2639,8 +2599,8 @@ create_plugin_rx_app(tcp_helper_resource_t* trs) }; rc = efrm_ext_msg(plugin, XSN_CEPH_CREATE_APP, &create, sizeof(create)); if( rc ) { - OO_DEBUG_ERR(ci_log("%s: CEPH_CREATE_APP failed (%d, %d)", - __FUNCTION__, intf_i, rc)); + OO_DEBUG_ERR(ci_log( + "%s: CEPH_CREATE_APP failed (%d, %d)", __FUNCTION__, intf_i, rc)); goto fail_plugin_1; } app_id = le32_to_cpu(create.tcp.out_app_id); @@ -2648,15 +2608,13 @@ create_plugin_rx_app(tcp_helper_resource_t* trs) ni->nic_hw[intf_i].plugin_io = NULL; if( meta.mapped_csr_size ) { size_t bar_off = ef10_tx_dma_page_base(nic->vi_stride, - trs->nic[intf_i].thn_vi_rs[CI_Q_ID_TCP_APP]-> - rs.rs_instance); + trs->nic[intf_i].thn_vi_rs[CI_Q_ID_TCP_APP]->rs.rs_instance); bar_off += meta.mapped_csr_offset; - ni->nic_hw[intf_i].plugin_io = ci_ioremap(nic->ctr_ap_addr + - (bar_off & PAGE_MASK), - meta.mapped_csr_size); + ni->nic_hw[intf_i].plugin_io = ci_ioremap( + nic->ctr_ap_addr + (bar_off & PAGE_MASK), meta.mapped_csr_size); if( ! ni->nic_hw[intf_i].plugin_io ) { OO_DEBUG_ERR(ci_log("%s: Ceph app failed to map VI window (%d)", - __FUNCTION__, intf_i)); + __FUNCTION__, intf_i)); destroy_ceph_app(plugin, app_id); fail_plugin_1: efrm_ext_release(plugin); @@ -2671,8 +2629,8 @@ create_plugin_rx_app(tcp_helper_resource_t* trs) got_nic = true; } if( ! got_nic ) { - OO_DEBUG_ERR(ci_log("%s: no EF100 NICs have the requested plugin", - __FUNCTION__)); + OO_DEBUG_ERR( + ci_log("%s: no EF100 NICs have the requested plugin", __FUNCTION__)); return -ENOTSUPP; } #endif @@ -2680,13 +2638,13 @@ create_plugin_rx_app(tcp_helper_resource_t* trs) } -static void -destroy_plugin_rx_app(tcp_helper_resource_t* trs) +static void destroy_plugin_rx_app(tcp_helper_resource_t* trs) { #if CI_CFG_TCP_OFFLOAD_RECYCLER ci_netif* ni = &trs->netif; int intf_i; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { struct efrm_ext* plugin = ni->nic_hw[intf_i].plugin_rx; if( ! plugin ) continue; @@ -2699,8 +2657,7 @@ destroy_plugin_rx_app(tcp_helper_resource_t* trs) #if CI_CFG_TX_CRC_OFFLOAD -static ci_uint32 -get_plugin_tx_crc_table_region(struct oo_nic* nic) +static ci_uint32 get_plugin_tx_crc_table_region(struct oo_nic* nic) { ci_uint32 region_id; ci_uint8 first_set_bit; @@ -2721,8 +2678,8 @@ get_plugin_tx_crc_table_region(struct oo_nic* nic) return region_id; } -static void -release_plugin_tx_crc_table_region(struct oo_nic* nic, ci_uint32 region_id) +static void release_plugin_tx_crc_table_region( + struct oo_nic* nic, ci_uint32 region_id) { ci_irqlock_state_t lock_flags; @@ -2732,8 +2689,7 @@ release_plugin_tx_crc_table_region(struct oo_nic* nic, ci_uint32 region_id) } #endif -static int -create_plugin_tx_app(tcp_helper_resource_t* trs) +static int create_plugin_tx_app(tcp_helper_resource_t* trs) { #if CI_CFG_TX_CRC_OFFLOAD ci_netif* ni = &trs->netif; @@ -2741,12 +2697,13 @@ create_plugin_tx_app(tcp_helper_resource_t* trs) bool got_nic = false; OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - ni->nic_hw[intf_i].plugin_tx = NULL; + ni->nic_hw[intf_i].plugin_tx = NULL; if( NI_OPTS(ni).tcp_offload_plugin != CITP_TCP_OFFLOAD_NVME ) return 0; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { ci_uint32 region_id; #if ! CI_CFG_NVME_LOCAL_CRC_MODE struct efhw_nic* nic; @@ -2766,13 +2723,13 @@ create_plugin_tx_app(tcp_helper_resource_t* trs) continue; tx_vi = &ni->nic_hw[intf_i].vis[CI_Q_ID_NORMAL]; - create = (struct xsn_storage_tx_create_app) { - .vi_id = cpu_to_le16(ef_vi_instance(tx_vi)) - }; - rc = efrm_ext_msg(plugin, XSN_STORAGE_TX_CREATE_APP, &create, sizeof(create)); + create = (struct xsn_storage_tx_create_app) { .vi_id = cpu_to_le16( + ef_vi_instance(tx_vi)) }; + rc = efrm_ext_msg( + plugin, XSN_STORAGE_TX_CREATE_APP, &create, sizeof(create)); if( rc ) { - OO_DEBUG_ERR(ci_log("%s: TX_CREATE_APP failed (%d, %d)", - __FUNCTION__, intf_i, rc)); + OO_DEBUG_ERR(ci_log( + "%s: TX_CREATE_APP failed (%d, %d)", __FUNCTION__, intf_i, rc)); efrm_ext_release(plugin); continue; } @@ -2785,17 +2742,16 @@ create_plugin_tx_app(tcp_helper_resource_t* trs) got_nic = true; } if( ! got_nic ) { - OO_DEBUG_ERR(ci_log("%s: no EF100 NICs have the requested plugin", - __FUNCTION__)); + OO_DEBUG_ERR( + ci_log("%s: no EF100 NICs have the requested plugin", __FUNCTION__)); return -ENOTSUPP; } #endif - return 0; + return 0; } -static void -destroy_plugin_tx_app(tcp_helper_resource_t* trs) +static void destroy_plugin_tx_app(tcp_helper_resource_t* trs) { #if CI_CFG_TX_CRC_OFFLOAD ci_netif* ni = &trs->netif; @@ -2804,7 +2760,8 @@ destroy_plugin_tx_app(tcp_helper_resource_t* trs) if( NI_OPTS(ni).tcp_offload_plugin != CITP_TCP_OFFLOAD_NVME ) return; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { #if ! CI_CFG_NVME_LOCAL_CRC_MODE struct efrm_ext* plugin = ni->nic_hw[intf_i].plugin_tx; if( ! plugin ) @@ -2812,17 +2769,15 @@ destroy_plugin_tx_app(tcp_helper_resource_t* trs) efrm_ext_release(plugin); ni->nic_hw[intf_i].plugin_tx = NULL; #endif - release_plugin_tx_crc_table_region(trs->nic[intf_i].thn_oo_nic, - ni->nic_hw[intf_i].plugin_tx_region_id); + release_plugin_tx_crc_table_region( + trs->nic[intf_i].thn_oo_nic, ni->nic_hw[intf_i].plugin_tx_region_id); } #endif } -static int -allocate_netif_hw_resources(ci_resource_onload_alloc_t* alloc, - tcp_helper_cluster_t* thc, - tcp_helper_resource_t* trs) +static int allocate_netif_hw_resources(ci_resource_onload_alloc_t* alloc, + tcp_helper_cluster_t* thc, tcp_helper_resource_t* trs) { ci_netif* ni = &trs->netif; ci_netif_state* ns = ni->state; @@ -2833,9 +2788,10 @@ allocate_netif_hw_resources(ci_resource_onload_alloc_t* alloc, if( NI_OPTS(ni).prealloc_packets && trs->thc_efct_memfd ) { int n_rxqs = 0; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { struct efhw_nic* nic = - efrm_client_get_nic(trs->nic[intf_i].thn_oo_nic->efrm_client); + efrm_client_get_nic(trs->nic[intf_i].thn_oo_nic->efrm_client); n_rxqs += efhw_nic_max_shared_rxqs(nic); } if( n_rxqs ) { @@ -2843,37 +2799,42 @@ allocate_netif_hw_resources(ci_resource_onload_alloc_t* alloc, rc = vfs_fallocate(trs->thc_efct_memfd, 0, 0, bytes); if( rc < 0 ) { if( rc == -ENOSPC ) - OO_DEBUG_ERR(ci_log("tcp_helper_alloc: fallocate hugepage memory " - "for EF_PREALLOC_PACKETS failed (%d queues): " - "ENOSPC. Check /proc/sys/vm/nr_hugepages", - n_rxqs)); + OO_DEBUG_ERR( + ci_log("tcp_helper_alloc: fallocate hugepage memory " + "for EF_PREALLOC_PACKETS failed (%d queues): " + "ENOSPC. Check /proc/sys/vm/nr_hugepages", + n_rxqs)); else - OO_DEBUG_ERR(ci_log("tcp_helper_alloc: fallocate hugepage memory " - "for EF_PREALLOC_PACKETS failed (%d queues): %d", - n_rxqs, rc)); + OO_DEBUG_ERR( + ci_log("tcp_helper_alloc: fallocate hugepage memory " + "for EF_PREALLOC_PACKETS failed (%d queues): %d", + n_rxqs, rc)); goto fail0; } } } rc = allocate_vis(trs, alloc, ns + 1, thc); - if( rc < 0 ) goto fail1; + if( rc < 0 ) + goto fail1; sz = sizeof(ci_pkt_bufs) * ni->pkt_sets_max; if( (ni->pkt_bufs = vmalloc(sz)) == NULL ) { - OO_DEBUG_ERR(ci_log("tcp_helper_alloc: failed to allocate iobufset table")); + OO_DEBUG_ERR( + ci_log("tcp_helper_alloc: failed to allocate iobufset table")); rc = -ENOMEM; goto fail4; } memset(ni->pkt_bufs, 0, sz); OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - ni->nic_hw[intf_i].pkt_rs = NULL; + ni->nic_hw[intf_i].pkt_rs = NULL; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { if( (ni->nic_hw[intf_i].pkt_rs = ci_alloc(sz)) == NULL ) { - OO_DEBUG_ERR(ci_log("%s: failed to allocate iobufset tables", - __FUNCTION__)); + OO_DEBUG_ERR( + ci_log("%s: failed to allocate iobufset tables", __FUNCTION__)); rc = -ENOMEM; goto fail5; } @@ -2893,14 +2854,14 @@ allocate_netif_hw_resources(ci_resource_onload_alloc_t* alloc, ns->efct_shm_mmap_bytes = trs->efct_shm_mmap_bytes; ns->timesync_bytes = PAGE_SIZE; - OO_DEBUG_MEMSIZE(ci_log("helper=%u map_bytes=%u (0x%x)", - trs->id, - trs->mem_mmap_bytes, trs->mem_mmap_bytes)); - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_DEBUG_MEMSIZE(ci_log("helper=%u map_bytes=%u (0x%x)", trs->id, + trs->mem_mmap_bytes, trs->mem_mmap_bytes)); + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { int i; for( i = 0; i < ci_netif_num_vis(ni); ++i ) - LOG_NC(ci_log("VI[%d]=%d", i, - ef_vi_instance(&ni->nic_hw[intf_i].vis[i]))); + LOG_NC( + ci_log("VI[%d]=%d", i, ef_vi_instance(&ni->nic_hw[intf_i].vis[i]))); } rc = create_plugin_rx_app(trs); @@ -2917,23 +2878,22 @@ allocate_netif_hw_resources(ci_resource_onload_alloc_t* alloc, return 0; - fail6: - destroy_plugin_rx_app(trs); - fail5: +fail6: + destroy_plugin_rx_app(trs); +fail5: OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - if( ni->nic_hw[intf_i].pkt_rs ) - ci_free(ni->nic_hw[intf_i].pkt_rs); + if( ni->nic_hw[intf_i].pkt_rs ) + ci_free(ni->nic_hw[intf_i].pkt_rs); vfree(ni->pkt_bufs); - fail4: +fail4: release_vi(trs); - fail1: - fail0: +fail1: +fail0: return rc; } -static void -release_ep_tbl(tcp_helper_resource_t* trs) +static void release_ep_tbl(tcp_helper_resource_t* trs) { ci_netif* ni = &trs->netif; int i; @@ -2956,11 +2916,9 @@ release_ep_tbl(tcp_helper_resource_t* trs) ci_vfree(ni->ep_tbl); ni->ep_tbl = NULL; } - } -static void -release_netif_resources(tcp_helper_resource_t* trs) +static void release_netif_resources(tcp_helper_resource_t* trs) { ci_netif* ni = &trs->netif; int i; @@ -2975,10 +2933,8 @@ release_netif_resources(tcp_helper_resource_t* trs) oo_shmbuf_free(&ni->shmbuf); } -static void -release_netif_hw_resources(tcp_helper_resource_t* trs) +static void release_netif_hw_resources(tcp_helper_resource_t* trs) { - OO_DEBUG_SHM(ci_log("%s:", __func__)); destroy_plugin_rx_app(trs); @@ -2990,11 +2946,11 @@ release_netif_hw_resources(tcp_helper_resource_t* trs) } -int -oo_version_check(const char* version, const char* uk_intf_ver, int debug_lib) +int oo_version_check( + const char* version, const char* uk_intf_ver, int debug_lib) { - return oo_version_check_impl(version, uk_intf_ver, debug_lib, - oo_uk_intf_ver); + return oo_version_check_impl( + version, uk_intf_ver, debug_lib, oo_uk_intf_ver); } @@ -3007,7 +2963,7 @@ static int /* bool */ oo_nic_is_vf(const struct oo_nic* onic) ci_inline int oo_dev_get_by_name(tcp_helper_resource_t* trs, const char* name) { - struct net_device *nd; + struct net_device* nd; int ifindex; #ifdef EFRM_DEV_GET_BY_NAME_TAKES_NS nd = dev_get_by_name(trs->netif.cplane->cp_netns, name); @@ -3021,9 +2977,10 @@ ci_inline int oo_dev_get_by_name(tcp_helper_resource_t* trs, const char* name) return ifindex; } -static const char IFACELIST_DELIM[] = " \t\n\v\f\r"; /* inspired by isspace() */ +static const char IFACELIST_DELIM[] = + " \t\n\v\f\r"; /* inspired by isspace() */ static int oo_get_listed_hwports(tcp_helper_resource_t* trs, const char* list, - cicp_hwport_mask_t* hwports_out, const char* tag) + cicp_hwport_mask_t* hwports_out, const char* tag) { ci_netif* ni = &trs->netif; cicp_hwport_mask_t listed_hwports = 0; @@ -3035,13 +2992,13 @@ static int oo_get_listed_hwports(tcp_helper_resource_t* trs, const char* list, running = dup = kstrdup(list, GFP_KERNEL); if( dup == NULL ) { ci_log("%s: WARNING no memory to parse interface %s, assuming empty\n", - __FUNCTION__, tag); + __FUNCTION__, tag); return 1; } while( 1 ) { int ifindex; - + token = strsep(&running, IFACELIST_DELIM); if( token == NULL ) break; @@ -3053,20 +3010,20 @@ static int oo_get_listed_hwports(tcp_helper_resource_t* trs, const char* list, cicp_hwport_mask_t hwport_mask = 0; int rc; rc = oo_cp_find_llap(ni->cplane, ifindex, NULL, NULL, - &hwport_mask /* rx_hwports */, NULL, NULL); + &hwport_mask /* rx_hwports */, NULL, NULL); if( rc == 0 && hwport_mask != 0 ) { listed_hwports |= hwport_mask; + } else { + ci_log( + "%s: WARNING interface %s constains %s, which " + " is not identified as Solarflare interface", + __FUNCTION__, tag, token); } - else { - ci_log("%s: WARNING interface %s constains %s, which " - " is not identified as Solarflare interface", - __FUNCTION__, tag, token); - } - } - else { - ci_log("%s: WARNING interface %s contains %s, which " - "is not known an interface", - __FUNCTION__, tag, token); + } else { + ci_log( + "%s: WARNING interface %s contains %s, which " + "is not known an interface", + __FUNCTION__, tag, token); } } *hwports_out = listed_hwports; @@ -3103,30 +3060,28 @@ static int oo_get_nics(tcp_helper_resource_t* trs, int ifindices_len) for( i = 0; i < CI_CFG_MAX_HWPORTS; ++i ) ni->hwport_to_intf_i[i] = (ci_int8) -1; - + for( i = 0; i < CI_CFG_MAX_INTERFACES; ++i ) ni->intf_i_to_hwport[i] = (ci_int8) -1; hwport_mask = oo_cp_get_hwports(ni->cplane); - if( oo_get_listed_hwports(trs, NI_OPTS(ni).iface_whitelist, - &whitelist_mask, "whitelist") == 0 ) - { + if( oo_get_listed_hwports(trs, NI_OPTS(ni).iface_whitelist, &whitelist_mask, + "whitelist") == 0 ) { if( (whitelist_mask & ~hwport_mask) != 0 ) { ci_log("%s: WARNING: interface whitelist specifies unlicensed NICs", - __FUNCTION__); + __FUNCTION__); } /* We only allow whitelist to specify subset of licensed hwports * present in current namespace. */ hwport_mask &= whitelist_mask; } - if( oo_get_listed_hwports(trs, NI_OPTS(ni).iface_blacklist, - &whitelist_mask, "blacklist") == 0 ) - { + if( oo_get_listed_hwports(trs, NI_OPTS(ni).iface_blacklist, &whitelist_mask, + "blacklist") == 0 ) { if( (whitelist_mask & ~hwport_mask) != 0 ) { ci_log("%s: WARNING: interface blacklist specifies unlicensed NICs", - __FUNCTION__); + __FUNCTION__); } hwport_mask &= ~whitelist_mask; } @@ -3161,11 +3116,9 @@ static int oo_get_nics(tcp_helper_resource_t* trs, int ifindices_len) } rtnl_unlock(); - } - else if( ifindices_len == 0 ) { + } else if( ifindices_len == 0 ) { ci_assert_equal(trs->netif.nic_n, 0); - } - else { + } else { /* This code path is not used yet, but this error message will make it * obvious what needs doing if we decide to use it in future... */ @@ -3175,21 +3128,22 @@ static int oo_get_nics(tcp_helper_resource_t* trs, int ifindices_len) } if( trs->netif.nic_n == 0 && ifindices_len != 0 ) { - ci_log("%s: ERROR: No Solarflare network interfaces are active/UP,\n" - "or they are configured with packed stream firmware, disabled,\n" - "or unlicensed for Onload. Please check your configuration.", - __FUNCTION__); + ci_log( + "%s: ERROR: No Solarflare network interfaces are active/UP,\n" + "or they are configured with packed stream firmware, disabled,\n" + "or unlicensed for Onload. Please check your configuration.", + __FUNCTION__); return -ENODEV; } ni->hwport_mask = hwport_mask; return 0; - fail: +fail: return rc; } -ci_inline void efab_notify_stacklist_change(tcp_helper_resource_t *thr) +ci_inline void efab_notify_stacklist_change(tcp_helper_resource_t* thr) { /* here we should notify tcpdump process that the stack list have * changed */ @@ -3204,7 +3158,7 @@ ci_inline void efab_notify_stacklist_change(tcp_helper_resource_t *thr) #if ! CI_CFG_UL_INTERRUPT_HELPER static int tcp_helper_reprime_is_needed(ci_netif* ni) { - ci_assert_equal( NI_OPTS(ni).int_driven, 0); + ci_assert_equal(NI_OPTS(ni).int_driven, 0); if( ci_netif_is_spinner(ni) ) /* Don't reprime if someone is spinning -- let them poll the stack. */ @@ -3227,14 +3181,13 @@ static int tcp_helper_reprime_is_needed(ci_netif* ni) * this function is called, because non-atomic work item might be already * running and using the locks. */ -void -tcp_helper_defer_dl2work(tcp_helper_resource_t* trs, ci_uint32 flag) +void tcp_helper_defer_dl2work(tcp_helper_resource_t* trs, ci_uint32 flag) { - OO_DEBUG_TCPH(ci_log("%s: [%u] defer locks with flag=%x", - __FUNCTION__, trs->id, flag)); + OO_DEBUG_TCPH(ci_log( + "%s: [%u] defer locks with flag=%x", __FUNCTION__, trs->id, flag)); ci_assert(ci_netif_is_locked(&trs->netif)); CITP_STATS_NETIF_INC(&trs->netif, stack_locks_deferred); - trs->netif.flags &=~ CI_NETIF_FLAG_IN_DL_CONTEXT; + trs->netif.flags &= ~CI_NETIF_FLAG_IN_DL_CONTEXT; /* We need write memory barrier here. However, both x86 and ppc * implementations of ci_atomic32_or() include a sort of write memory * barrier at the beginning. @@ -3249,24 +3202,22 @@ tcp_helper_defer_dl2work(tcp_helper_resource_t* trs, ci_uint32 flag) queue_work(trs->wq, &trs->non_atomic_work); } -static void -oo_inject_packets_kernel_force(ci_netif* ni) +static void oo_inject_packets_kernel_force(ci_netif* ni) { ci_assert(ci_netif_is_locked(ni)); if( kernel_packets_pending(ni->state) == 0 ) return; - ef_eplock_holder_set_flag(&ni->state->lock, - CI_EPLOCK_NETIF_KERNEL_PACKETS); + ef_eplock_holder_set_flag(&ni->state->lock, CI_EPLOCK_NETIF_KERNEL_PACKETS); } -static void tcp_helper_do_non_atomic(struct work_struct *data) +static void tcp_helper_do_non_atomic(struct work_struct* data) { - tcp_helper_resource_t* trs = container_of(data, tcp_helper_resource_t, - non_atomic_work); - const unsigned handled_aflags = (OO_THR_EP_AFLAG_CLEAR_FILTERS | - OO_THR_EP_AFLAG_NEED_FREE | - OO_THR_EP_AFLAG_TCP_OFFLOAD_ISN); + tcp_helper_resource_t* trs = + container_of(data, tcp_helper_resource_t, non_atomic_work); + const unsigned handled_aflags = + (OO_THR_EP_AFLAG_CLEAR_FILTERS | OO_THR_EP_AFLAG_NEED_FREE | + OO_THR_EP_AFLAG_TCP_OFFLOAD_ISN); ci_irqlock_state_t lock_flags; tcp_helper_endpoint_t* ep; unsigned ep_aflags, new_aflags; @@ -3283,21 +3234,21 @@ static void tcp_helper_do_non_atomic(struct work_struct *data) ci_sllist_init(&trs->non_atomic_list); ci_irqlock_unlock(&trs->lock, &lock_flags); while( (link = ci_sllist_try_pop(&list)) != NULL ) { - ep = CI_CONTAINER(tcp_helper_endpoint_t, non_atomic_link , link); + ep = CI_CONTAINER(tcp_helper_endpoint_t, non_atomic_link, link); again: - do { /* grab and clear flags telling us what to do */ + do { /* grab and clear flags telling us what to do */ ep_aflags = ep->ep_aflags; new_aflags = ep_aflags & ~handled_aflags; } while( ci_cas32_fail(&ep->ep_aflags, ep_aflags, new_aflags) ); OO_DEBUG_TCPH(ci_log("%s: [%u:%d] aflags=%x", __FUNCTION__, trs->id, - OO_SP_FMT(ep->id), ep_aflags)); + OO_SP_FMT(ep->id), ep_aflags)); if( ep_aflags & OO_THR_EP_AFLAG_CLEAR_FILTERS ) tcp_helper_endpoint_clear_filters(ep, 0); if( ep_aflags & OO_THR_EP_AFLAG_NEED_FREE ) { /* make sure that the filters are released: */ tcp_helper_endpoint_clear_filters(ep, 0); - citp_waitable_obj_free_nnl(&trs->netif, - SP_TO_WAITABLE(&trs->netif, ep->id)); + citp_waitable_obj_free_nnl( + &trs->netif, SP_TO_WAITABLE(&trs->netif, ep->id)); } #if CI_CFG_TCP_OFFLOAD_RECYCLER if( ep_aflags & OO_THR_EP_AFLAG_TCP_OFFLOAD_ISN ) { @@ -3327,8 +3278,8 @@ static void tcp_helper_do_non_atomic(struct work_struct *data) * lock. */ if( trs->trs_aflags & OO_THR_AFLAG_DEFERRED_TRUSTED ) { ci_uint32 trs_aflags; - OO_DEBUG_TCPH(ci_log("%s: [%u] deferred locks trs_aflags=%d", - __FUNCTION__, trs->id, trs->trs_aflags)); + OO_DEBUG_TCPH(ci_log("%s: [%u] deferred locks trs_aflags=%d", __FUNCTION__, + trs->id, trs->trs_aflags)); ci_assert(ci_netif_is_locked(&trs->netif)); ci_assert(oo_trusted_lock_is_locked(trs)); @@ -3346,32 +3297,30 @@ static void tcp_helper_do_non_atomic(struct work_struct *data) if( trs_aflags & OO_THR_AFLAG_POLL_AND_PRIME ) { int intf_i; - OO_DEBUG_TCPH(ci_log("%s: [%u] deferred POLL_AND_PRIME", - __FUNCTION__, trs->id)); + OO_DEBUG_TCPH( + ci_log("%s: [%u] deferred POLL_AND_PRIME", __FUNCTION__, trs->id)); trs->netif.state->poll_did_wake = 0; ci_netif_poll(&trs->netif); if( NI_OPTS(&trs->netif).int_driven ) { ci_netif* ni = &trs->netif; OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - if( ci_bit_test_and_clear(&ni->state->evq_prime_deferred, intf_i) ) - tcp_helper_request_wakeup_nic(trs, intf_i); - } - else if( ! trs->netif.state->poll_did_wake && - tcp_helper_reprime_is_needed(&trs->netif) ) { + if( ci_bit_test_and_clear(&ni->state->evq_prime_deferred, intf_i) ) + tcp_helper_request_wakeup_nic(trs, intf_i); + } else if( ! trs->netif.state->poll_did_wake && + tcp_helper_reprime_is_needed(&trs->netif) ) { tcp_helper_request_wakeup(trs); CITP_STATS_NETIF_INC(&trs->netif, interrupt_primes); } } efab_tcp_helper_netif_unlock(trs, 0); - } - else if( need_unlock_shared ) + } else if( need_unlock_shared ) efab_eplock_unlock_and_wake(&trs->netif, 0); } -void tcp_helper_endpoint_queue_non_atomic(tcp_helper_endpoint_t* ep, - unsigned why_aflag) +void tcp_helper_endpoint_queue_non_atomic( + tcp_helper_endpoint_t* ep, unsigned why_aflag) { ci_irqlock_state_t lock_flags; unsigned prev_aflags; @@ -3389,11 +3338,10 @@ void tcp_helper_endpoint_queue_non_atomic(tcp_helper_endpoint_t* ep, /* Woritem routine to handle postponed stack destruction. * Should be run in global workqueue only, not in the stack workqueue * because these functions flush and destroy the stack workqueue. */ -static void -tcp_helper_destroy_work(struct work_struct *data) +static void tcp_helper_destroy_work(struct work_struct* data) { - tcp_helper_resource_t* trs = container_of(data, tcp_helper_resource_t, - work_item_dtor); + tcp_helper_resource_t* trs = + container_of(data, tcp_helper_resource_t, work_item_dtor); if( trs->ref[OO_THR_REF_BASE] == 0 ) { tcp_helper_dtor(trs); @@ -3411,16 +3359,14 @@ ci_inline void tcp_helper_init_max_mss(tcp_helper_resource_t* rs) const int max_prefix = 22; ci_netif* ni = &rs->netif; - ni->state->max_mss = CI_CFG_PKT_BUF_SIZE - 256 - max_prefix - ETH_HLEN - - ETH_VLAN_HLEN - sizeof(ci_ip4_hdr) - sizeof(ci_tcp_hdr); + ni->state->max_mss = CI_CFG_PKT_BUF_SIZE - 256 - max_prefix - ETH_HLEN - + ETH_VLAN_HLEN - sizeof(ci_ip4_hdr) - sizeof(ci_tcp_hdr); } -static int -tcp_helper_rm_alloc_proxy(ci_resource_onload_alloc_t* alloc, - const ci_netif_config_opts* opts, - int ifindices_len, - tcp_helper_resource_t** rs_out) +static int tcp_helper_rm_alloc_proxy(ci_resource_onload_alloc_t* alloc, + const ci_netif_config_opts* opts, int ifindices_len, + tcp_helper_resource_t** rs_out) { int rc; @@ -3442,14 +3388,10 @@ tcp_helper_rm_alloc_proxy(ci_resource_onload_alloc_t* alloc, * Cluster will be created if needed. */ ci_uint16 in_flags = - alloc->in_flags & ~CI_NETIF_FLAG_DO_ALLOCATE_SCALABLE_FILTERS_RSS; - rc = tcp_helper_cluster_alloc_thr(alloc->in_name, - alloc->in_cluster_size, - alloc->in_cluster_restart, - in_flags, - opts, - &rs); - if ( rc != 0 ) + alloc->in_flags & ~CI_NETIF_FLAG_DO_ALLOCATE_SCALABLE_FILTERS_RSS; + rc = tcp_helper_cluster_alloc_thr(alloc->in_name, alloc->in_cluster_size, + alloc->in_cluster_restart, in_flags, opts, &rs); + if( rc != 0 ) return rc; ni = &rs->netif; ci_assert_equal(rs->id, rs->netif.state->stack_id); @@ -3457,19 +3399,16 @@ tcp_helper_rm_alloc_proxy(ci_resource_onload_alloc_t* alloc, alloc->out_nic_set = ni->nic_set; *rs_out = rs; return 0; - } - else + } else #endif { - return tcp_helper_rm_alloc(alloc, opts, ifindices_len, - NULL, rs_out); + return tcp_helper_rm_alloc(alloc, opts, ifindices_len, NULL, rs_out); } } -void -tcp_helper_free_ephemeral_ports(struct efab_ephemeral_port_head* table, - ci_uint32 entries) +void tcp_helper_free_ephemeral_ports( + struct efab_ephemeral_port_head* table, ci_uint32 entries) { ci_uint32 i; for( i = 0; i < entries; ++i ) { @@ -3480,7 +3419,7 @@ tcp_helper_free_ephemeral_ports(struct efab_ephemeral_port_head* table, * These entries are also kept at the locations for their respective IP * addresses, so to avoid double-freeing, we detect the global list by * checking for an IP-address mismatch. */ - if( keeper != NULL && !CI_IPX_ADDR_EQ(keeper->laddr, table[i].laddr) ) { + if( keeper != NULL && ! CI_IPX_ADDR_EQ(keeper->laddr, table[i].laddr) ) { ci_assert(CI_IPX_ADDR_IS_ANY(table[i].laddr)); continue; } @@ -3503,10 +3442,9 @@ tcp_helper_free_ephemeral_ports(struct efab_ephemeral_port_head* table, * into * list_head -> LIST -> new_head -> ... -> new_tail * in a thread-safe manner. */ -static void -donate_ephemeral_ports(struct efab_ephemeral_port_head* list_head, - struct efab_ephemeral_port_keeper* new_head, - struct efab_ephemeral_port_keeper** new_tail_link) +static void donate_ephemeral_ports(struct efab_ephemeral_port_head* list_head, + struct efab_ephemeral_port_keeper* new_head, + struct efab_ephemeral_port_keeper** new_tail_link) { ci_uintptr_t ptr = (ci_uintptr_t) &list_head->head; ci_uintptr_t new = (ci_uintptr_t) new_head; @@ -3536,11 +3474,10 @@ donate_ephemeral_ports(struct efab_ephemeral_port_head* list_head, } -void -tcp_helper_donate_ephemeral_ports(struct efab_ephemeral_port_head* list_head, - struct efab_ephemeral_port_keeper* new_head, - struct efab_ephemeral_port_keeper* new_tail, - int count) +void tcp_helper_donate_ephemeral_ports( + struct efab_ephemeral_port_head* list_head, + struct efab_ephemeral_port_keeper* new_head, + struct efab_ephemeral_port_keeper* new_tail, int count) { /* The addresses of all ports on the list should be equal. */ ci_assert(CI_IPX_ADDR_EQ(list_head->laddr, new_head->laddr)); @@ -3554,11 +3491,10 @@ tcp_helper_donate_ephemeral_ports(struct efab_ephemeral_port_head* list_head, /* Add some ephemeral ports to the list of all ephemeral ports (as opposed to * an IP-specific list). */ -static void -tcp_helper_donate_global_ephemeral_ports( - struct efab_ephemeral_port_head* list_head, - struct efab_ephemeral_port_keeper* new_head, - struct efab_ephemeral_port_keeper* new_tail) +static void tcp_helper_donate_global_ephemeral_ports( + struct efab_ephemeral_port_head* list_head, + struct efab_ephemeral_port_keeper* new_head, + struct efab_ephemeral_port_keeper* new_tail) { /* This is the global list, which is headed at the INADDR_ANY entry in the * table, as that entry is otherwise unused when shared local ports are @@ -3569,10 +3505,9 @@ tcp_helper_donate_global_ephemeral_ports( } -int -tcp_helper_alloc_ephemeral_ports(struct efab_ephemeral_port_head* list_head, - struct efab_ephemeral_port_head* global_head, - ci_addr_t laddr, int count) +int tcp_helper_alloc_ephemeral_ports( + struct efab_ephemeral_port_head* list_head, + struct efab_ephemeral_port_head* global_head, ci_addr_t laddr, int count) { struct efab_ephemeral_port_keeper* new_head = NULL; struct efab_ephemeral_port_keeper* new_global_head = NULL; @@ -3603,11 +3538,12 @@ tcp_helper_alloc_ephemeral_ports(struct efab_ephemeral_port_head* list_head, existing_prev = existing; rc = efab_alloc_ephemeral_port(laddr, existing->port_be16, &keeper); if( rc != 0 && rc != -EADDRINUSE ) - OO_DEBUG_ERR(CI_RLLOG(10, "%s: unexpected failure reusing " - CI_IP_PRINTF_FORMAT":%u for %d-th ephemeral " - "port: rc=%d", __FUNCTION__, - CI_IP_PRINTF_ARGS(&existing->laddr), - CI_BSWAP_BE16(existing->port_be16), i, rc)); + OO_DEBUG_ERR(CI_RLLOG(10, + "%s: unexpected failure reusing " CI_IP_PRINTF_FORMAT + ":%u for %d-th ephemeral " + "port: rc=%d", + __FUNCTION__, CI_IP_PRINTF_ARGS(&existing->laddr), + CI_BSWAP_BE16(existing->port_be16), i, rc)); } /* Remember where we got to in the global list. We can race against other @@ -3623,8 +3559,10 @@ tcp_helper_alloc_ephemeral_ports(struct efab_ephemeral_port_head* list_head, * this IP address. */ if( keeper == NULL ) { if( (rc = efab_alloc_ephemeral_port(laddr, 0, &keeper)) != 0 ) { - OO_DEBUG_ERR(ci_log("%s: failed to allocate %d-th ephemeral port: " - "rc=%d", __FUNCTION__, i, rc)); + OO_DEBUG_ERR( + ci_log("%s: failed to allocate %d-th ephemeral port: " + "rc=%d", + __FUNCTION__, i, rc)); break; } @@ -3661,22 +3599,19 @@ tcp_helper_alloc_ephemeral_ports(struct efab_ephemeral_port_head* list_head, if( new_global_head != NULL ) { ci_assert(global_head); ci_assert(new_global_tail); - tcp_helper_donate_global_ephemeral_ports(global_head, new_global_head, - new_global_tail); + tcp_helper_donate_global_ephemeral_ports( + global_head, new_global_head, new_global_tail); } return i; } -static int -__efab_create_os_socket(tcp_helper_resource_t* trs, tcp_helper_endpoint_t* ep, - struct file* os_file, ci_int32 domain); +static int __efab_create_os_socket(tcp_helper_resource_t* trs, + tcp_helper_endpoint_t* ep, struct file* os_file, ci_int32 domain); #if CI_CFG_TCP_SHARED_LOCAL_PORTS -static -ci_active_wild* tcp_helper_alloc_active_wild( - tcp_helper_resource_t* rs, - struct efab_ephemeral_port_keeper* keeper) +static ci_active_wild* tcp_helper_alloc_active_wild( + tcp_helper_resource_t* rs, struct efab_ephemeral_port_keeper* keeper) { ci_active_wild* aw; ci_netif* netif = &rs->netif; @@ -3686,7 +3621,7 @@ ci_active_wild* tcp_helper_alloc_active_wild( /* Get a sock buf */ aw = ci_active_wild_get_state_buf(netif); - if( !aw ) + if( ! aw ) goto fail; /* Give it an OS backing socket */ @@ -3712,7 +3647,7 @@ ci_active_wild* tcp_helper_alloc_active_wild( return aw; - fail_ep: +fail_ep: /* We are already under stack lock, this ensures that we can * immediately and safely close the endpoint in * efab_tcp_helper_close_endpoint() function. @@ -3724,16 +3659,14 @@ ci_active_wild* tcp_helper_alloc_active_wild( * out-of-resources. */ efab_tcp_helper_close_endpoint(rs, ep->id, 1); - fail: +fail: return NULL; } /* Allocate active wild for the port [port]. */ -static int -tcp_helper_alloc_to_aw_pool(tcp_helper_resource_t* rs, - ci_addr_t laddr, - struct efab_ephemeral_port_keeper* port) +static int tcp_helper_alloc_to_aw_pool(tcp_helper_resource_t* rs, + ci_addr_t laddr, struct efab_ephemeral_port_keeper* port) { ci_netif* ni = &rs->netif; int idx; @@ -3758,9 +3691,10 @@ tcp_helper_alloc_to_aw_pool(tcp_helper_resource_t* rs, * use any of the pools that give us a match for the resulting 4-tuple. */ - idx = ni->state->active_wild_pools_n > 1 ? - ci_netif_active_wild_nic_hash(ni, addr_any, port->port_be16, - addr_any, 0) : 0; + idx = ni->state->active_wild_pools_n > 1 + ? ci_netif_active_wild_nic_hash( + ni, addr_any, port->port_be16, addr_any, 0) + : 0; idx &= ni->state->active_wild_pools_n - 1; list = ci_netif_get_active_wild_list(ni, idx, laddr); @@ -3779,10 +3713,8 @@ tcp_helper_alloc_to_aw_pool(tcp_helper_resource_t* rs, /* Allocate active wilds for all ports in the list [ports]. */ -static int -tcp_helper_alloc_list_to_aw_pool(tcp_helper_resource_t* rs, - ci_addr_t laddr, - struct efab_ephemeral_port_head* ports) +static int tcp_helper_alloc_list_to_aw_pool(tcp_helper_resource_t* rs, + ci_addr_t laddr, struct efab_ephemeral_port_head* ports) { ci_netif* ni = &rs->netif; int i = 0; @@ -3796,10 +3728,8 @@ tcp_helper_alloc_list_to_aw_pool(tcp_helper_resource_t* rs, * need to know that we've consumed these ports. We mark them as consumed * even if we fail to allocate the active wilds: in that case, we have * bigger problems. */ - rc = tcp_helper_get_ephemeral_port_list(rs->trs_ephem_table_consumed, - laddr, - rs->trs_ephem_table_entries, - &consumed); + rc = tcp_helper_get_ephemeral_port_list(rs->trs_ephem_table_consumed, laddr, + rs->trs_ephem_table_entries, &consumed); /* If we're calling this function, we must have a list of ephemeral ports for * the local address. This means that we should also succeed in finding * (storage for) the "consumed" pointer for that address. */ @@ -3813,8 +3743,10 @@ tcp_helper_alloc_list_to_aw_pool(tcp_helper_resource_t* rs, /* Treat the active wild pool as best effort - we can carry on * without it. */ - NI_LOG(&rs->netif, RESOURCE_WARNINGS, "%s: Only alloced %d of active" - " shared", __FUNCTION__, CI_MAX(0, i - 1)); + NI_LOG(&rs->netif, RESOURCE_WARNINGS, + "%s: Only alloced %d of active" + " shared", + __FUNCTION__, CI_MAX(0, i - 1)); return -ENOBUFS; } consumed->head = port; @@ -3824,11 +3756,13 @@ tcp_helper_alloc_list_to_aw_pool(tcp_helper_resource_t* rs, if( ! NI_OPTS(ni).tcp_shared_local_ports_per_ip ) for( i = 0; i < ni->state->active_wild_pools_n; i++ ) { struct oo_p_dllink_state list = - ci_netif_get_active_wild_list(ni, i, addr_any); + ci_netif_get_active_wild_list(ni, i, addr_any); if( list.p != OO_P_NULL && oo_p_dllink_is_empty(ni, list) ) { - NI_LOG(&rs->netif, RESOURCE_WARNINGS, "%s: Current shared local ports " - "don't provide coverage of all possible connections. Allocate " - "more to improve coverage.", __FUNCTION__); + NI_LOG(&rs->netif, RESOURCE_WARNINGS, + "%s: Current shared local ports " + "don't provide coverage of all possible connections. Allocate " + "more to improve coverage.", + __FUNCTION__); break; } } @@ -3852,10 +3786,9 @@ tcp_helper_alloc_list_to_aw_pool(tcp_helper_resource_t* rs, * This function requires no locks and uses atomic operations to ensure * thread-safety. */ -int -tcp_helper_get_ephemeral_port_list(struct efab_ephemeral_port_head* table, - ci_addr_t laddr, ci_uint32 table_entries, - struct efab_ephemeral_port_head** list_out) +int tcp_helper_get_ephemeral_port_list(struct efab_ephemeral_port_head* table, + ci_addr_t laddr, ci_uint32 table_entries, + struct efab_ephemeral_port_head** list_out) { uint32_t bucket, hash1, hash2; @@ -3874,8 +3807,8 @@ tcp_helper_get_ephemeral_port_list(struct efab_ephemeral_port_head* table, * list, but we have to guard against another concurrent instance of this * function trying to do the same thing. */ if( (*list_out)->port_count == EPHEMERAL_PORT_LIST_NO_PORT && - ci_cas32_succeed(&(*list_out)->port_count, - EPHEMERAL_PORT_LIST_NO_PORT, 0) ) { + ci_cas32_succeed( + &(*list_out)->port_count, EPHEMERAL_PORT_LIST_NO_PORT, 0) ) { (*list_out)->laddr = laddr; return 0; } @@ -3894,8 +3827,9 @@ tcp_helper_get_ephemeral_port_list(struct efab_ephemeral_port_head* table, bucket = (bucket + hash2) & (table_entries - 1); } while( bucket != hash1 ); - CI_RLLOG(1, "%s: No space in ephemeral port table for local address " - IPX_FMT, __FUNCTION__, IPX_ARG(AF_IP(laddr))); + CI_RLLOG(1, + "%s: No space in ephemeral port table for local address " IPX_FMT, + __FUNCTION__, IPX_ARG(AF_IP(laddr))); return -ENOSPC; } @@ -3905,10 +3839,8 @@ tcp_helper_get_ephemeral_port_list(struct efab_ephemeral_port_head* table, /* Allocates active wilds for ephemeral ports on the list [list_head]. The * traversal of the list will terminate when a port is reached for which we * have previously allocated an active wild. */ -static int -tcp_helper_alloc_aw_for_ephem_ports(tcp_helper_resource_t* rs, - struct efab_ephemeral_port_head* list_head, - ci_addr_t laddr) +static int tcp_helper_alloc_aw_for_ephem_ports(tcp_helper_resource_t* rs, + struct efab_ephemeral_port_head* list_head, ci_addr_t laddr) { int rc; int count = 0; @@ -3917,10 +3849,8 @@ tcp_helper_alloc_aw_for_ephem_ports(tcp_helper_resource_t* rs, ci_assert(ci_netif_is_locked(&rs->netif)); - rc = tcp_helper_get_ephemeral_port_list(rs->trs_ephem_table_consumed, - laddr, - rs->trs_ephem_table_entries, - &consumed); + rc = tcp_helper_get_ephemeral_port_list(rs->trs_ephem_table_consumed, laddr, + rs->trs_ephem_table_entries, &consumed); /* If we're calling this function, we must have a list of ephemeral ports * for the local address. This means that we should also succeed in * finding (storage for) the "consumed" pointer for that address. */ @@ -3946,8 +3876,8 @@ tcp_helper_alloc_aw_for_ephem_ports(tcp_helper_resource_t* rs, } -int tcp_helper_increase_active_wild_pool(tcp_helper_resource_t* rs, - ci_addr_t laddr) +int tcp_helper_increase_active_wild_pool( + tcp_helper_resource_t* rs, ci_addr_t laddr) { struct efab_ephemeral_port_head* list_head; struct efab_ephemeral_port_head* global_head = NULL; @@ -3964,18 +3894,16 @@ int tcp_helper_increase_active_wild_pool(tcp_helper_resource_t* rs, if( rs->trs_ephem_table == NULL ) return -EINVAL; - rc = tcp_helper_get_ephemeral_port_list(rs->trs_ephem_table, laddr, - rs->trs_ephem_table_entries, - &list_head); + rc = tcp_helper_get_ephemeral_port_list( + rs->trs_ephem_table, laddr, rs->trs_ephem_table_entries, &list_head); if( rc < 0 ) return rc; /* If we're IP-specific, we also need to get the global list. */ - if( !CI_IPX_ADDR_IS_ANY(laddr) ) { + if( ! CI_IPX_ADDR_IS_ANY(laddr) ) { rc = tcp_helper_get_ephemeral_port_list(rs->trs_ephem_table, addr_any, - rs->trs_ephem_table_entries, - &global_head); + rs->trs_ephem_table_entries, &global_head); if( rc < 0 ) return rc; @@ -3983,7 +3911,7 @@ int tcp_helper_increase_active_wild_pool(tcp_helper_resource_t* rs, if( NI_OPTS(&rs->netif).tcp_shared_local_ports_per_ip_max && list_head->port_count >= - NI_OPTS(&rs->netif).tcp_shared_local_ports_per_ip_max ) + NI_OPTS(&rs->netif).tcp_shared_local_ports_per_ip_max ) return -ENOBUFS; /* Consume any ephemeral ports that were allocated since the last time we @@ -3994,8 +3922,8 @@ int tcp_helper_increase_active_wild_pool(tcp_helper_resource_t* rs, while( to_alloc > 0 ) { /* Allocate a few more ephemeral ports. */ - rc = tcp_helper_alloc_ephemeral_ports(list_head, global_head, laddr, - CI_MIN(to_alloc, BATCH_SIZE)); + rc = tcp_helper_alloc_ephemeral_ports( + list_head, global_head, laddr, CI_MIN(to_alloc, BATCH_SIZE)); if( rc < 0 ) goto out; @@ -4006,7 +3934,7 @@ int tcp_helper_increase_active_wild_pool(tcp_helper_resource_t* rs, to_alloc -= rc; } - out: +out: /* We succeeded as long as we increased the pool at all. */ if( to_alloc < ni_opts->tcp_shared_local_ports_step ) return 0; @@ -4015,13 +3943,12 @@ int tcp_helper_increase_active_wild_pool(tcp_helper_resource_t* rs, #endif -static int -thr_install_tproxy(tcp_helper_resource_t* thr) +static int thr_install_tproxy(tcp_helper_resource_t* thr) { int ifindex_buf_size; const ci_netif_config_opts* ni_opts = &NI_OPTS(&thr->netif); thr->tproxy_ifindex_count = - ci_netif_requested_scalable_intf_count(thr->netif.cplane, ni_opts); + ci_netif_requested_scalable_intf_count(thr->netif.cplane, ni_opts); ifindex_buf_size = sizeof(*thr->tproxy_ifindex) * thr->tproxy_ifindex_count; ci_assert_equal(thr->tproxy_ifindex, NULL); @@ -4029,26 +3956,24 @@ thr_install_tproxy(tcp_helper_resource_t* thr) if( thr->tproxy_ifindex == NULL ) return -ENOMEM; memset(thr->tproxy_ifindex, 0, ifindex_buf_size); - return tcp_helper_install_tproxy(1, thr, NULL, ni_opts, thr->tproxy_ifindex, - thr->tproxy_ifindex_count); + return tcp_helper_install_tproxy( + 1, thr, NULL, ni_opts, thr->tproxy_ifindex, thr->tproxy_ifindex_count); } -static void -thr_uninstall_tproxy(tcp_helper_resource_t* thr) +static void thr_uninstall_tproxy(tcp_helper_resource_t* thr) { if( thr->tproxy_ifindex != NULL ) { - tcp_helper_install_tproxy(0, thr, NULL, NULL, thr->tproxy_ifindex, - thr->tproxy_ifindex_count); + tcp_helper_install_tproxy( + 0, thr, NULL, NULL, thr->tproxy_ifindex, thr->tproxy_ifindex_count); kfree(thr->tproxy_ifindex); thr->tproxy_ifindex = NULL; } } - -static inline void netns_get_identifiers(ci_netif_state* state, - const struct net* ns) +static inline void netns_get_identifiers( + ci_netif_state* state, const struct net* ns) { struct oo_cplane_handle* cp = cp_acquire_from_netns_if_exists(ns); @@ -4057,23 +3982,22 @@ static inline void netns_get_identifiers(ci_netif_state* state, if( cp != NULL ) { state->cplane_pid = oo_cp_get_server_pid(cp); cp_release(cp); - } - else { + } else { state->cplane_pid = 0; } } -void tcp_helper_put_ns_components(struct oo_cplane_handle* cplane, - struct oo_filter_ns* filter_ns) +void tcp_helper_put_ns_components( + struct oo_cplane_handle* cplane, struct oo_filter_ns* filter_ns) { oo_filter_ns_put(&efab_tcp_driver, filter_ns); cp_release(cplane); } -int tcp_helper_get_ns_components(struct oo_cplane_handle** cplane, - struct oo_filter_ns** filter_ns) +int tcp_helper_get_ns_components( + struct oo_cplane_handle** cplane, struct oo_filter_ns** filter_ns) { int oof_preexisted; int rc; @@ -4090,8 +4014,8 @@ int tcp_helper_get_ns_components(struct oo_cplane_handle** cplane, * Although cplane does not hold reference to oof the fact that the stack * does allocates and frees in appropriate order is expected to ensure * the condition is met */ - *filter_ns = oo_filter_ns_get(&efab_tcp_driver, - current->nsproxy->net_ns, &oof_preexisted); + *filter_ns = oo_filter_ns_get( + &efab_tcp_driver, current->nsproxy->net_ns, &oof_preexisted); if( *filter_ns == NULL ) { OO_DEBUG_ERR(ci_log("%s: failed to allocated filter_ns", __func__)); cp_release(*cplane); @@ -4108,7 +4032,7 @@ int tcp_helper_get_ns_components(struct oo_cplane_handle** cplane, return 0; - error: +error: tcp_helper_put_ns_components(*cplane, *filter_ns); return rc; } @@ -4124,8 +4048,8 @@ struct user_namespace* tcp_helper_get_user_ns(tcp_helper_resource_t* trs) } -struct efab_ephemeral_port_head* -tcp_helper_alloc_ephem_table(ci_uint32 min_entries, ci_uint32* entries_out) +struct efab_ephemeral_port_head* tcp_helper_alloc_ephem_table( + ci_uint32 min_entries, ci_uint32* entries_out) { struct efab_ephemeral_port_head* table; ci_uint32 i; @@ -4163,10 +4087,9 @@ tcp_helper_alloc_ephem_table(ci_uint32 min_entries, ci_uint32* entries_out) #if CI_CFG_UL_INTERRUPT_HELPER -static bool -stack_has_events(ci_netif* ni, ci_uint32 intfs) +static bool stack_has_events(ci_netif* ni, ci_uint32 intfs) { - for( ; intfs != 0 ; intfs &= (intfs - 1) ) { + for( ; intfs != 0; intfs &= (intfs - 1) ) { int intf = ffs(intfs) - 1; if( ci_netif_intf_has_event(ni, intf) ) return true; @@ -4174,8 +4097,8 @@ stack_has_events(ci_netif* ni, ci_uint32 intfs) return false; } -static bool -stack_has_ul_job(tcp_helper_resource_t* trs, ci_uint32* intfs, ci_uint32* flags) +static bool stack_has_ul_job( + tcp_helper_resource_t* trs, ci_uint32* intfs, ci_uint32* flags) { *intfs = ci_atomic_xchg(&trs->intr_intfs, 0); do { @@ -4183,12 +4106,11 @@ stack_has_ul_job(tcp_helper_resource_t* trs, ci_uint32* intfs, ci_uint32* flags) if( *flags == 0 ) break; } while( ci_cas32u_fail(&trs->ulh_flags, *flags, 0) ); - + return *intfs != 0 || *flags != 0; } -static void -request_pending_wakeups(tcp_helper_resource_t* trs) +static void request_pending_wakeups(tcp_helper_resource_t* trs) { ci_uint32 intfs = ci_atomic_xchg(&trs->wake_intfs, 0); for( ; intfs != 0; intfs &= intfs - 1 ) { @@ -4211,15 +4133,13 @@ int oo_wait_for_interrupt(ci_private_t* priv, void* argp) timeout = periodic_poll; /* Wait for the next interrupt or timeout */ - while(1) { + while( 1 ) { /* Request wakeups before sleeping. */ request_pending_wakeups(trs); ci_assert_ge(timeout, 1); rc = wait_event_interruptible_timeout( - trs->ulh_waitq, - stack_has_ul_job(trs, &intfs, &arg->flags), - timeout); + trs->ulh_waitq, stack_has_ul_job(trs, &intfs, &arg->flags), timeout); if( NI_OPTS(&trs->netif).int_driven && intfs != 0 ) ci_atomic_or(&trs->wake_intfs, intfs); if( rc < 0 ) @@ -4234,9 +4154,8 @@ int oo_wait_for_interrupt(ci_private_t* priv, void* argp) /* No interrupts, but the stack was not touched for too long. * Fixme: Ideally, we'd like to ensure there are no pending ip timers, * see stack_next_timer_ms(). */ - if( intfs == 0 && - ci_frc64_get() - trs->netif.state->evq_last_prime > - trs->netif.state->timer_prime_cycles * 5 ) + if( intfs == 0 && ci_frc64_get() - trs->netif.state->evq_last_prime > + trs->netif.state->timer_prime_cycles * 5 ) break; if( first ) { @@ -4248,21 +4167,21 @@ int oo_wait_for_interrupt(ci_private_t* priv, void* argp) return 0; } -static int oo_handle_wakeup_in_ul(void* context, int is_timeout, - struct efhw_nic* nic, int budget) +static int oo_handle_wakeup_in_ul( + void* context, int is_timeout, struct efhw_nic* nic, int budget) { struct tcp_helper_nic* tcph_nic = context; tcp_helper_resource_t* trs; ci_uint64 l; - trs = CI_CONTAINER(tcp_helper_resource_t, nic[tcph_nic->thn_intf_i], - tcph_nic); + trs = + CI_CONTAINER(tcp_helper_resource_t, nic[tcph_nic->thn_intf_i], tcph_nic); ci_atomic_or(&trs->intr_intfs, 1 << tcph_nic->thn_intf_i); /* Request prime if no spinner is not spinning now. */ if( ! NI_OPTS(&trs->netif).int_driven && is_timeout && - ! ci_netif_is_spinner(&trs->netif)) { + ! ci_netif_is_spinner(&trs->netif) ) { ci_atomic_or(&trs->wake_intfs, 1 << tcph_nic->thn_intf_i); } @@ -4275,8 +4194,8 @@ static int oo_handle_wakeup_in_ul(void* context, int is_timeout, l = trs->netif.state->lock.lock; if( l & CI_EPLOCK_NETIF_NEED_POLL ) break; - } while( ci_cas64u_fail(&trs->netif.state->lock.lock, - l, l | CI_EPLOCK_NETIF_NEED_POLL) ); + } while( ci_cas64u_fail( + &trs->netif.state->lock.lock, l, l | CI_EPLOCK_NETIF_NEED_POLL) ); return 1; } @@ -4285,7 +4204,7 @@ static int oo_handle_wakeup_in_ul(void* context, int is_timeout, #if defined(EFRM_DO_NAMESPACES) && defined(EFRM_HAVE_NEW_KALLSYMS) #include /* put_ipc_ns() is not exported */ -static void (*my_put_ipc_ns)(struct ipc_namespace *ns); +static void (*my_put_ipc_ns)(struct ipc_namespace* ns); #endif static void put_namespaces(tcp_helper_resource_t* rs) { @@ -4308,7 +4227,7 @@ static void generate_efct_filter_irqmask(cpumask_t* result) * app and IRQs. */ int cpu; #ifdef EFRM_TASK_HAS_CPUMASK -/* >= 5.3, backported to RHEL8 */ + /* >= 5.3, backported to RHEL8 */ cpumask_t* current_cpus = ¤t->cpus_mask; #else cpumask_t* current_cpus = ¤t->cpus_allowed; @@ -4316,7 +4235,7 @@ static void generate_efct_filter_irqmask(cpumask_t* result) cpumask_clear(result); for_each_cpu(cpu, current_cpus) - cpumask_or(result, result, cpumask_of_node(cpu_to_node(cpu))); + cpumask_or(result, result, cpumask_of_node(cpu_to_node(cpu))); /* If the app is spanned across every CPU on its node(s) then it's better * to have potential contention than to do cross-NUMA stuff. Also covers the * degenerate case of an app being unaffinitised. */ @@ -4325,13 +4244,12 @@ static void generate_efct_filter_irqmask(cpumask_t* result) } int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, - const ci_netif_config_opts* opts, - int ifindices_len, tcp_helper_cluster_t* thc, - tcp_helper_resource_t** rs_out) + const ci_netif_config_opts* opts, int ifindices_len, + tcp_helper_cluster_t* thc, tcp_helper_resource_t** rs_out) { tcp_helper_resource_t* rs; ci_irqlock_state_t lock_flags; - struct efhw_nic *nic; + struct efhw_nic* nic; int rc, intf_i; ci_netif* ni; int hw_resources_allocated = 0; @@ -4346,41 +4264,46 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, alloc->in_name[CI_CFG_STACK_NAME_LEN] = '\0'; if( (opts->packet_buffer_mode & CITP_PKTBUF_MODE_PHYS) && - !ci_in_egroup(phys_mode_gid) ) { - OO_DEBUG_ERR(ci_log("%s: ERROR: EF_PACKET_BUFFER_MODE=%d not permitted " - "(phys_mode_gid=%d egid=%d pid=%d)", __FUNCTION__, - opts->packet_buffer_mode, phys_mode_gid, ci_getegid(), - current->tgid); - ci_log("%s: HINT: See the phys_mode_gid onload module " - "option.", __FUNCTION__)); + ! ci_in_egroup(phys_mode_gid) ) { + OO_DEBUG_ERR( + ci_log("%s: ERROR: EF_PACKET_BUFFER_MODE=%d not permitted " + "(phys_mode_gid=%d egid=%d pid=%d)", + __FUNCTION__, opts->packet_buffer_mode, phys_mode_gid, + ci_getegid(), current->tgid); + ci_log("%s: HINT: See the phys_mode_gid onload module " + "option.", + __FUNCTION__)); rc = -EPERM; goto fail1; } if( opts->packet_buffer_mode & ~CITP_PKTBUF_MODE_PHYS ) { - OO_DEBUG_ERR(ci_log("%s: ERROR: EF_PACKET_BUFFER_MODE=%d not supported. It " - "was used on 6000 series NICs only.", __FUNCTION__, - opts->packet_buffer_mode); - ci_log("%s: HINT: Use EF_PACKET_BUFFER_MODE=0/2 instead.", - __FUNCTION__)); + OO_DEBUG_ERR( + ci_log("%s: ERROR: EF_PACKET_BUFFER_MODE=%d not supported. It " + "was used on 6000 series NICs only.", + __FUNCTION__, opts->packet_buffer_mode); + ci_log( + "%s: HINT: Use EF_PACKET_BUFFER_MODE=0/2 instead.", __FUNCTION__)); rc = -ENOTSUPP; goto fail1; } #if CI_CFG_WANT_BPF_NATIVE && CI_HAVE_BPF_NATIVE if( opts->xdp_mode == EF_XDP_MODE_COMPATIBLE && ! cplane_track_xdp ) { - OO_DEBUG_ERR(ci_log("%s: ERROR: EF_XDP_MODE=compatible but " - "cplane_track_xdp module parameter is off.", - __func__); - ci_log("%s: HINT: enable cplane_track_xdp module " - "parameter.", __func__)); + OO_DEBUG_ERR( + ci_log("%s: ERROR: EF_XDP_MODE=compatible but " + "cplane_track_xdp module parameter is off.", + __func__); + ci_log("%s: HINT: enable cplane_track_xdp module " + "parameter.", + __func__)); rc = -ENOTSUPP; goto fail1; } #endif rs = CI_ALLOC_OBJ(tcp_helper_resource_t); - if( !rs ) { + if( ! rs ) { rc = -ENOMEM; goto fail1; } @@ -4417,13 +4340,12 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, if( ni->cplane_init_net == NULL ) { /* We can tolerate failure to speak to init_net's control plane. Compare * the equivalent case at UL in ci_netif_init(). */ - OO_DEBUG_ERR(ci_log("%s: failed to get init_net control plane handle", - __FUNCTION__)); - OO_DEBUG_ERR(ci_log("%s: support for containers will be limited", - __FUNCTION__)); + OO_DEBUG_ERR(ci_log( + "%s: failed to get init_net control plane handle", __FUNCTION__)); + OO_DEBUG_ERR( + ci_log("%s: support for containers will be limited", __FUNCTION__)); } - } - else { + } else { ni->cplane_init_net = NULL; } @@ -4441,7 +4363,7 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, ci_irqlock_lock(&THR_TABLE.lock, &lock_flags); rs->id = ci_id_pool_alloc(&THR_TABLE.instances); ci_irqlock_unlock(&THR_TABLE.lock, &lock_flags); - if (rs->id == CI_ID_POOL_ID_NONE) { + if( rs->id == CI_ID_POOL_ID_NONE ) { OO_DEBUG_ERR(ci_log("%s: out of instances", __FUNCTION__)); rc = -EBUSY; goto fail3; @@ -4466,14 +4388,15 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, rs->usermem = NULL; spin_lock_init(&ni->swf_update_lock); - ni->swf_update_last = ni->swf_update_first = NULL; + ni->swf_update_last = ni->swf_update_first = NULL; /* Allocate buffers for shared state, etc. */ rc = allocate_netif_resources(alloc, rs, thc ? thc->thc_cluster_size : 1); - if( rc < 0 ) goto fail4; + if( rc < 0 ) + goto fail4; - /* Initialise work items. - * Some of them are used in reset handler and in error path. */ + /* Initialise work items. + * Some of them are used in reset handler and in error path. */ #if CI_CFG_NIC_RESET_SUPPORT INIT_DELAYED_WORK(&rs->purge_txq_work, tcp_helper_purge_txq_work); INIT_WORK(&rs->reset_work, tcp_helper_reset_stack_work); @@ -4517,7 +4440,7 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, /* "onload-wq:pretty_name workqueue for non-atomic works */ snprintf(rs->wq_name, sizeof(rs->wq_name), ONLOAD_WQ_NAME, - ni->state->pretty_name); + ni->state->pretty_name); /* This workqueue is used to poll NIC => WQ_CPU_INTENSIVE * This workqueue is used to postpone IRQ hanlder when we are out of NAPI * budget => WQ_HIGHPRI @@ -4526,12 +4449,13 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, * Long running CPU intensive workloads which can be better * managed by the system scheduler => WQ_UNBOUND */ - rs->wq = alloc_workqueue(rs->wq_name, - WQ_UNBOUND | WQ_CPU_INTENSIVE | - WQ_HIGHPRI | WQ_SYSFS, 0); + rs->wq = alloc_workqueue( + rs->wq_name, WQ_UNBOUND | WQ_CPU_INTENSIVE | WQ_HIGHPRI | WQ_SYSFS, 0); if( rs->wq == NULL ) { - OO_DEBUG_ERR(ci_log("%s: [%d] Failed to allocate stack due to workqueue " - "allocation failure", __func__, NI_ID(ni))); + OO_DEBUG_ERR( + ci_log("%s: [%d] Failed to allocate stack due to workqueue " + "allocation failure", + __func__, NI_ID(ni))); rc = -ENOMEM; goto fail5; } @@ -4540,16 +4464,18 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, #if CI_CFG_NIC_RESET_SUPPORT /* "onload-wq-reset:pretty_name workqueue for handling resets */ snprintf(rs->reset_wq_name, sizeof(rs->reset_wq_name), ONLOAD_RESET_WQ_NAME, - ni->state->pretty_name); + ni->state->pretty_name); /* Until we've handled a reset, other activities are pointless => WQ_HIGHPRI * Users may want to set cpu affinity => WQ_SYSFS */ - rs->reset_wq = alloc_workqueue(rs->reset_wq_name, WQ_UNBOUND | - WQ_HIGHPRI | WQ_SYSFS, 0); + rs->reset_wq = alloc_workqueue( + rs->reset_wq_name, WQ_UNBOUND | WQ_HIGHPRI | WQ_SYSFS, 0); if( rs->reset_wq == NULL ) { - OO_DEBUG_ERR(ci_log("%s: [%d] Failed to allocate stack due to reset workqueue " - "allocation failure", __func__, NI_ID(ni))); - rc = - ENOMEM; + OO_DEBUG_ERR( + ci_log("%s: [%d] Failed to allocate stack due to reset workqueue " + "allocation failure", + __func__, NI_ID(ni))); + rc = -ENOMEM; goto fail5a; } #endif @@ -4566,8 +4492,8 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, rs->thc_efct_memfd = fget(alloc->in_memfd); if( ! rs->thc_efct_memfd ) { rc = -EBADF; - OO_DEBUG_ERR(ci_log("%s: [%d] Bad fd for efct (%d).", - __func__, NI_ID(ni), alloc->in_memfd)); + OO_DEBUG_ERR(ci_log("%s: [%d] Bad fd for efct (%d).", __func__, + NI_ID(ni), alloc->in_memfd)); goto fail_memfd; } } @@ -4575,9 +4501,10 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, /* Allocate hardware resources */ ni->ep_tbl = NULL; ni->flags = alloc->in_flags; - ci_assert( ! (alloc->in_flags & CI_NETIF_FLAG_IN_DL_CONTEXT) ); + ci_assert(! (alloc->in_flags & CI_NETIF_FLAG_IN_DL_CONTEXT)); rc = allocate_netif_hw_resources(alloc, thc, rs); - if( rc < 0 ) goto fail6; + if( rc < 0 ) + goto fail6; if( ci_in_egroup(inject_kernel_gid) ) { ni->flags |= CI_NETIF_FLAG_MAY_INJECT_TO_KERNEL; @@ -4600,7 +4527,8 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, ni->keuid = ci_geteuid(); ni->error_flags = 0; ci_netif_state_init(&rs->netif, oo_timesync_cpu_khz, alloc->in_name); - OO_STACK_FOR_EACH_INTF_I(&rs->netif, intf_i) { + OO_STACK_FOR_EACH_INTF_I(&rs->netif, intf_i) + { nic = efrm_client_get_nic(rs->nic[intf_i].thn_oo_nic->efrm_client); if( nic->devtype.arch == EFHW_ARCH_AF_XDP ) ni->flags |= CI_NETIF_FLAG_AF_XDP; @@ -4637,7 +4565,7 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, rc = thr_install_tproxy(rs); if( rc != 0 ) { OO_DEBUG_ERR(ci_log("%s: [%d] Failed to set scalable filters rc=%d.", - __func__, NI_ID(ni), rc)); + __func__, NI_ID(ni), rc)); goto fail10; } } @@ -4648,21 +4576,18 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, * otherwise, we create it ourselves. */ if( thc != NULL ) { rs->trs_ephem_table = thc->thc_ephem_table; - rs->trs_ephem_table_entries = - thc->thc_ephem_table_entries; - } - else if( ci_netif_should_allocate_tcp_shared_local_ports(ni) ) { + rs->trs_ephem_table_entries = thc->thc_ephem_table_entries; + } else if( ci_netif_should_allocate_tcp_shared_local_ports(ni) ) { ci_uint32 entries = CI_MAX(NI_OPTS(ni).tcp_shared_local_ports, - NI_OPTS(ni).tcp_shared_local_ports_max); + NI_OPTS(ni).tcp_shared_local_ports_max); rs->trs_ephem_table = - tcp_helper_alloc_ephem_table(entries, &rs->trs_ephem_table_entries); + tcp_helper_alloc_ephem_table(entries, &rs->trs_ephem_table_entries); if( rs->trs_ephem_table == NULL ) { OO_DEBUG_ERR(ci_log("%s: [%d] Failed to allocate ephemeral port table.", - __func__, NI_ID(ni))); + __func__, NI_ID(ni))); goto fail11; } - } - else { + } else { rs->trs_ephem_table = NULL; } @@ -4671,14 +4596,15 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, * has already been consumed. This is always particular to the current * stack, even if the table of ephemeral ports is shared. */ rs->trs_ephem_table_consumed = - tcp_helper_alloc_ephem_table(rs->trs_ephem_table_entries, NULL); + tcp_helper_alloc_ephem_table(rs->trs_ephem_table_entries, NULL); if( rs->trs_ephem_table_consumed == NULL ) { - OO_DEBUG_ERR(ci_log("%s: [%d] Failed to allocate table of consumed " - "ephemeral ports.", __func__, NI_ID(ni))); + OO_DEBUG_ERR( + ci_log("%s: [%d] Failed to allocate table of consumed " + "ephemeral ports.", + __func__, NI_ID(ni))); goto fail12; } - } - else { + } else { rs->trs_ephem_table_consumed = NULL; } #endif @@ -4723,18 +4649,15 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, * a separate global table. */ if( thc == NULL ) { tcp_helper_get_ephemeral_port_list(rs->trs_ephem_table, addr_any, - rs->trs_ephem_table_entries, - &ephemeral_ports); - tcp_helper_alloc_ephemeral_ports(ephemeral_ports, NULL, addr_any, - NI_OPTS(ni).tcp_shared_local_ports); + rs->trs_ephem_table_entries, &ephemeral_ports); + tcp_helper_alloc_ephemeral_ports( + ephemeral_ports, NULL, addr_any, NI_OPTS(ni).tcp_shared_local_ports); /* In the event that tcp_helper_alloc_ephemeral_ports() returns an error, * there's nothing to do here: a warning will have been printed, and we * can continue with an empty list of ephemeral ports. */ - } - else { + } else { tcp_helper_get_ephemeral_port_list(thc->thc_ephem_table, addr_any, - thc->thc_ephem_table_entries, - &ephemeral_ports); + thc->thc_ephem_table_entries, &ephemeral_ports); } tcp_helper_alloc_list_to_aw_pool(rs, addr_any, ephemeral_ports); } @@ -4747,20 +4670,18 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, * it that other users can start to do things that require these to have * been initialised. */ - OO_STACK_FOR_EACH_INTF_I(&rs->netif, intf_i) { + OO_STACK_FOR_EACH_INTF_I(&rs->netif, intf_i) + { #if CI_CFG_UL_INTERRUPT_HELPER - efrm_eventq_register_callback(tcp_helper_vi(rs, intf_i), - &oo_handle_wakeup_in_ul, - &rs->nic[intf_i]); + efrm_eventq_register_callback( + tcp_helper_vi(rs, intf_i), &oo_handle_wakeup_in_ul, &rs->nic[intf_i]); #else if( NI_OPTS(ni).int_driven ) efrm_eventq_register_callback(tcp_helper_vi(rs, intf_i), - &oo_handle_wakeup_int_driven, - &rs->nic[intf_i]); + &oo_handle_wakeup_int_driven, &rs->nic[intf_i]); else efrm_eventq_register_callback(tcp_helper_vi(rs, intf_i), - &oo_handle_wakeup_or_timeout, - &rs->nic[intf_i]); + &oo_handle_wakeup_or_timeout, &rs->nic[intf_i]); #endif } #if ! CI_CFG_UL_INTERRUPT_HELPER @@ -4781,23 +4702,23 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, OO_DEBUG_RES(ci_log("tcp_helper_rm_alloc: allocated %u", rs->id)); return 0; - fail13: +fail13: #if CI_CFG_TCP_SHARED_LOCAL_PORTS vfree(rs->trs_ephem_table_consumed); - fail12: +fail12: /* Free the table of ephemeral ports unless we share it with the cluster. */ if( thc == NULL ) vfree(rs->trs_ephem_table); - fail11: +fail11: #endif - fail10: - fail9: +fail10: +fail9: release_ep_tbl(rs); - fail7: +fail7: /* Do not call release_netif_hw_resources() now - do it later, after * we're out of THR_TABLE. */ hw_resources_allocated = 1; - fail6: +fail6: /* Remove from the THR_TABLE and handle possible reset * before trying to remove VIs. */ @@ -4828,21 +4749,21 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, if( rs->thc_efct_memfd ) fput(rs->thc_efct_memfd); - fail_memfd: +fail_memfd: #if CI_CFG_NIC_RESET_SUPPORT destroy_workqueue(rs->reset_wq); - fail5a: +fail5a: #endif #if ! CI_CFG_UL_INTERRUPT_HELPER destroy_workqueue(rs->wq); - fail5: +fail5: #endif put_namespaces(rs); release_netif_resources(rs); - fail4: +fail4: ci_id_pool_free(&THR_TABLE.instances, rs->id, &THR_TABLE.lock); - fail3: - fail2: +fail3: +fail2: ci_irqlock_lock(&THR_TABLE.lock, &lock_flags); ci_assert(THR_TABLE.stack_count > 0); --THR_TABLE.stack_count; @@ -4854,8 +4775,8 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, * to report EEXIST rather than the other arbitrary error so that UL can * recover. */ if( rc != -EEXIST && alloc->in_name[0] ) { - int rc1 = efab_thr_table_check_name(alloc->in_name, - rs->netif.cplane->cp_netns); + int rc1 = + efab_thr_table_check_name(alloc->in_name, rs->netif.cplane->cp_netns); if( rc1 == -EEXIST ) rc = rc1; } @@ -4863,14 +4784,14 @@ int tcp_helper_rm_alloc(ci_resource_onload_alloc_t* alloc, if( ni->cplane_init_net != NULL ) cp_release(ni->cplane_init_net); tcp_helper_put_ns_components(ni->cplane, rs->filter_ns); - fail1a: +fail1a: CI_FREE_OBJ(rs); - fail1: +fail1: return rc; } -int tcp_helper_alloc_ul(ci_resource_onload_alloc_t* alloc, - int ifindices_len, tcp_helper_resource_t** rs_out) +int tcp_helper_alloc_ul(ci_resource_onload_alloc_t* alloc, int ifindices_len, + tcp_helper_resource_t** rs_out) { ci_netif_config_opts* opts; int rc; @@ -4882,15 +4803,15 @@ int tcp_helper_alloc_ul(ci_resource_onload_alloc_t* alloc, goto out; rc = tcp_helper_rm_alloc_proxy(alloc, opts, ifindices_len, rs_out); - out: +out: kfree(opts); return rc; } int tcp_helper_alloc_kernel(ci_resource_onload_alloc_t* alloc, - const ci_netif_config_opts* opts, - int ifindices_len, tcp_helper_resource_t** rs_out) + const ci_netif_config_opts* opts, int ifindices_len, + tcp_helper_resource_t** rs_out) { return tcp_helper_rm_alloc_proxy(alloc, opts, ifindices_len, rs_out); } @@ -4899,7 +4820,7 @@ int tcp_helper_alloc_kernel(ci_resource_onload_alloc_t* alloc, #if CI_CFG_NIC_RESET_SUPPORT static void thr_reset_stack_rx_cb(ef_request_id id, void* arg) { - tcp_helper_resource_t* thr = (tcp_helper_resource_t*)arg; + tcp_helper_resource_t* thr = (tcp_helper_resource_t*) arg; ci_netif* ni = &thr->netif; oo_pkt_p pp; ci_ip_pkt_fmt* pkt; @@ -4915,9 +4836,9 @@ struct thr_reset_stack_tx_cb_state { tcp_helper_resource_t* thr; }; -static void -thr_reset_stack_tx_cb_state_init(struct thr_reset_stack_tx_cb_state* cb_state, - tcp_helper_resource_t* thr, int intf_i) +static void thr_reset_stack_tx_cb_state_init( + struct thr_reset_stack_tx_cb_state* cb_state, tcp_helper_resource_t* thr, + int intf_i) { cb_state->intf_i = intf_i; cb_state->thr = thr; @@ -4928,7 +4849,7 @@ thr_reset_stack_tx_cb_state_init(struct thr_reset_stack_tx_cb_state* cb_state, static void thr_reset_stack_tx_cb(ef_request_id id, void* arg) { struct thr_reset_stack_tx_cb_state* cb_state = - (struct thr_reset_stack_tx_cb_state*)arg; + (struct thr_reset_stack_tx_cb_state*) arg; ci_netif* ni = &cb_state->thr->netif; oo_pkt_p pp; ci_ip_pkt_fmt* pkt; @@ -4943,11 +4864,9 @@ static void thr_reset_stack_tx_cb(ef_request_id id, void* arg) #if ! CI_CFG_UL_INTERRUPT_HELPER /* All delayed work is now run on the periodic workqueue. */ static inline int thr_queue_delayed_work(tcp_helper_resource_t* thr, - struct delayed_work *dwork, - unsigned long delay) + struct delayed_work* dwork, unsigned long delay) { - return queue_delayed_work_on(thr->periodic_timer_cpu, thr->wq, - dwork, delay); + return queue_delayed_work_on(thr->periodic_timer_cpu, thr->wq, dwork, delay); } #endif @@ -4967,7 +4886,7 @@ static void tcp_helper_purge_txq_locked(tcp_helper_resource_t* thr) int reschedule = 0; OO_DEBUG_VERB(ci_log("%s: [%d] Purging TXQs for suspended interfaces %08x", - __FUNCTION__, thr->id, intfs_suspended)); + __FUNCTION__, thr->id, intfs_suspended)); ci_assert(ci_netif_is_locked(ni)); @@ -5001,13 +4920,12 @@ static void tcp_helper_purge_txq_locked(tcp_helper_resource_t* thr) #endif -static void set_pkt_bufset_hwaddrs(ci_netif* ni, int bufset_id, int intf_i, - const ci_uint64* hw_addrs) +static void set_pkt_bufset_hwaddrs( + ci_netif* ni, int bufset_id, int intf_i, const ci_uint64* hw_addrs) { int i; int page_order = ni->packets->set[bufset_id].page_order; - ef_addr* addr = ni->dma_addrs + - ni->packets->set[bufset_id].dma_addr_base + + ef_addr* addr = ni->dma_addrs + ni->packets->set[bufset_id].dma_addr_base + intf_i * (PKTS_PER_SET >> page_order); for( i = 0; i < PKTS_PER_SET >> page_order; i++ ) { @@ -5027,7 +4945,7 @@ static void tcp_helper_reset_stack_locked(tcp_helper_resource_t* thr) ci_netif* ni = &thr->netif; ef_vi* vi; struct thr_reset_stack_tx_cb_state cb_state; - uint64_t *hw_addrs; + uint64_t* hw_addrs; #if CI_CFG_PIO struct efhw_nic* nic; int rc; @@ -5039,7 +4957,7 @@ static void tcp_helper_reset_stack_locked(tcp_helper_resource_t* thr) * it might have been invalidated while we were waiting for the lock. */ if( ci_dllink_is_free(&thr->all_stacks_link) ) { OO_DEBUG_TCPH(ci_log("%s: [%d] Stack is being destroyed; not resetting", - __FUNCTION__, thr->id)); + __FUNCTION__, thr->id)); return; } @@ -5048,15 +4966,16 @@ static void tcp_helper_reset_stack_locked(tcp_helper_resource_t* thr) thr->intfs_to_reset = 0; /* Prevent any further periodic TXQ-purges, since we're about to bring the * TXQ back up. */ - thr->intfs_suspended &=~ intfs_to_reset; + thr->intfs_suspended &= ~intfs_to_reset; ci_irqlock_unlock(&thr->lock, &lock_flags); #if CI_CFG_ENDPOINT_MOVE if( thr->thc != NULL ) { /* This warning can be removed once Bug43452 is properly addressed */ - ci_log("Stack %s:%d in cluster %s can't restore filters post-NIC-reset.\n" - "This stack will no longer receive packets", - thr->name, thr->id, thr->thc->thc_name); + ci_log( + "Stack %s:%d in cluster %s can't restore filters post-NIC-reset.\n" + "This stack will no longer receive packets", + thr->name, thr->id, thr->thc->thc_name); } #endif @@ -5074,30 +4993,30 @@ static void tcp_helper_reset_stack_locked(tcp_helper_resource_t* thr) ci_uint32 old_errors = nsn->nic_error_flags; vi = ci_netif_vi(ni, intf_i); - EFRM_WARN_LIMITED("%s: reset stack %d intf %d (0x%x)", - __FUNCTION__, thr->id, intf_i, intfs_to_reset); + EFRM_WARN_LIMITED("%s: reset stack %d intf %d (0x%x)", __FUNCTION__, + thr->id, intf_i, intfs_to_reset); #if CI_CFG_PIO nic = efrm_client_get_nic(thr->nic[intf_i].thn_oo_nic->efrm_client); - if( NI_OPTS(ni).pio && - (nic->devtype.arch == EFHW_ARCH_EF10) && + if( NI_OPTS(ni).pio && (nic->devtype.arch == EFHW_ARCH_EF10) && (thr->nic[intf_i].thn_pio_io_mmap_bytes != 0) ) { - struct efrm_pd *pd = efrm_vi_get_pd(tcp_helper_vi(thr, intf_i)); + struct efrm_pd* pd = efrm_vi_get_pd(tcp_helper_vi(thr, intf_i)); OO_DEBUG_TCPH(ci_log("%s: realloc PIO", __FUNCTION__)); /* Now try to recreate and link the PIO region */ - rc = efrm_pio_realloc(pd, thr->nic[intf_i].thn_pio_rs, - tcp_helper_vi(thr, intf_i)); + rc = efrm_pio_realloc( + pd, thr->nic[intf_i].thn_pio_rs, tcp_helper_vi(thr, intf_i)); if( rc < 0 ) { - OO_DEBUG_TCPH(ci_log("%s: [%d:%d] pio_realloc failed %d, " - "removing PIO capability", - __FUNCTION__, thr->id, intf_i, rc)); + OO_DEBUG_TCPH( + ci_log("%s: [%d:%d] pio_realloc failed %d, " + "removing PIO capability", + __FUNCTION__, thr->id, intf_i, rc)); thr->pio_mmap_bytes -= thr->nic[intf_i].thn_pio_io_mmap_bytes; /* Expose failure to user-level */ ni->state->pio_mmap_bytes -= thr->nic[intf_i].thn_pio_io_mmap_bytes; thr->nic[intf_i].thn_pio_io_mmap_bytes = 0; thr->netif.nic_hw[intf_i].pio.pio_buffer = NULL; thr->netif.nic_hw[intf_i].pio.pio_len = 0; - nsn->oo_vi_flags &=~ OO_VI_FLAGS_PIO_EN; + nsn->oo_vi_flags &= ~OO_VI_FLAGS_PIO_EN; nsn->pio_io_mmap_bytes = 0; nsn->pio_io_len = 0; ci_pio_buddy_dtor(ni, &nsn->pio_buddy); @@ -5110,12 +5029,12 @@ static void tcp_helper_reset_stack_locked(tcp_helper_resource_t* thr) #endif /* Remap packets before using them in RX q */ - nsn->nic_error_flags &=~ CI_NETIF_NIC_ERROR_REMAP; + nsn->nic_error_flags &= ~CI_NETIF_NIC_ERROR_REMAP; for( i = 0; i < pkt_sets_n; ++i ) { int rc; - rc = oo_iobufset_resource_remap_bt(ni->nic_hw[intf_i].pkt_rs[i], - hw_addrs); + rc = oo_iobufset_resource_remap_bt( + ni->nic_hw[intf_i].pkt_rs[i], hw_addrs); if( rc == -ENOSYS ) { /* This PD does not use buffer table; do not update anything and * go away. */ @@ -5129,7 +5048,7 @@ static void tcp_helper_reset_stack_locked(tcp_helper_resource_t* thr) * wait for user to kill the app, or for another reset to attempt * to recover things. */ ci_log("ERROR [%d]: failed to remap packet set %d after NIC reset", - thr->id, i); + thr->id, i); memset(hw_addrs, 0, sizeof(uint64_t) * (1 << HW_PAGES_PER_SET_S)); nsn->nic_error_flags |= CI_NETIF_NIC_ERROR_REMAP; } @@ -5139,7 +5058,7 @@ static void tcp_helper_reset_stack_locked(tcp_helper_resource_t* thr) if( ~nsn->nic_error_flags & CI_NETIF_NIC_ERROR_REMAP ) { if( old_errors & CI_NETIF_NIC_ERROR_REMAP ) ci_log("[%d]: packets remapped successfully on intf %d", thr->id, - intf_i); + intf_i); } /* Reset sw queues */ @@ -5167,7 +5086,7 @@ static void tcp_helper_reset_stack_locked(tcp_helper_resource_t* thr) ci_netif_pkt_release(ni, pkt); } - if( NI_OPTS(ni).timer_usec != 0 ) + if( NI_OPTS(ni).timer_usec != 0 ) ef_eventq_timer_prime(vi, NI_OPTS(ni).timer_usec); ci_bit_test_and_set(&ni->state->evq_primed, intf_i); @@ -5230,7 +5149,7 @@ void tcp_helper_reset_stack(ci_netif* ni, int intf_i) #if CI_CFG_NIC_RESET_SUPPORT -static void tcp_helper_purge_txq_work(struct work_struct *data) +static void tcp_helper_purge_txq_work(struct work_struct* data) { tcp_helper_resource_t* trs; @@ -5242,17 +5161,17 @@ static void tcp_helper_purge_txq_work(struct work_struct *data) /* If we can get the lock, we purge the queues now; if not, we defer to the * lock-holder. */ - if( efab_tcp_helper_netif_lock_or_set_flags(trs, OO_TRUSTED_LOCK_PURGE_TXQS, - CI_EPLOCK_NETIF_PURGE_TXQS, 0) ) { + if( efab_tcp_helper_netif_lock_or_set_flags( + trs, OO_TRUSTED_LOCK_PURGE_TXQS, CI_EPLOCK_NETIF_PURGE_TXQS, 0) ) { tcp_helper_purge_txq_locked(trs); efab_tcp_helper_netif_unlock(trs, 0); } } -static void tcp_helper_reset_stack_work(struct work_struct *data) +static void tcp_helper_reset_stack_work(struct work_struct* data) { - tcp_helper_resource_t* trs = container_of(data, tcp_helper_resource_t, - reset_work); + tcp_helper_resource_t* trs = + container_of(data, tcp_helper_resource_t, reset_work); /* Before tcp_helper_reset_stack_locked is called, * any stack activity is just silly. So we agressively wait for @@ -5262,7 +5181,7 @@ static void tcp_helper_reset_stack_work(struct work_struct *data) * for a wedged lock. If we know the lock may already be wedged we can * just trylock here. Otherwise block, but allow interruption by a wakeup. */ - while (! (trs->trs_aflags & OO_THR_AFLAG_DONT_BLOCK_SHARED) ) { + while( ! (trs->trs_aflags & OO_THR_AFLAG_DONT_BLOCK_SHARED) ) { int rc = ci_netif_lock_maybe_wedged(&trs->netif); if( rc == 0 ) { goto locked; @@ -5271,8 +5190,7 @@ static void tcp_helper_reset_stack_work(struct work_struct *data) /* In the wedged case, we'll get OO_THR_AFLAG_DONT_BLOCK_SHARED flag * sooner or later. Otherwise it does not harm to try again. */ continue; - } - else { + } else { /* Workqueue was interrupted by a signal - give up */ break; } @@ -5284,11 +5202,11 @@ static void tcp_helper_reset_stack_work(struct work_struct *data) * option, because it is set too late. */ if( ! ci_netif_trylock(&trs->netif) ) { /* It is probably OK, but let's print a warning. */ - ci_log("[%d]: unable to process NIC reset before destroying stack", - trs->id); + ci_log( + "[%d]: unable to process NIC reset before destroying stack", trs->id); return; } - locked: +locked: ci_assert(ci_netif_is_locked(&trs->netif)); tcp_helper_reset_stack_locked(trs); ci_netif_unlock(&trs->netif); @@ -5297,11 +5215,10 @@ static void tcp_helper_reset_stack_work(struct work_struct *data) #if ! CI_CFG_UL_INTERRUPT_HELPER || ! defined(NDEBUG) -static bool -current_is_proper_ul_context(void) +static bool current_is_proper_ul_context(void) { return ! in_interrupt() && - (current->flags & (PF_KTHREAD | PF_WQ_WORKER)) == 0; + (current->flags & (PF_KTHREAD | PF_WQ_WORKER)) == 0; } #endif @@ -5322,8 +5239,7 @@ current_is_proper_ul_context(void) * *--------------------------------------------------------------------*/ -static void -tcp_helper_rm_free(tcp_helper_resource_t* trs) +static void tcp_helper_rm_free(tcp_helper_resource_t* trs) { #if ! CI_CFG_UL_INTERRUPT_HELPER unsigned l, new_l; @@ -5356,7 +5272,7 @@ tcp_helper_rm_free(tcp_helper_resource_t* trs) efab_tcp_helper_rm_schedule_free(trs); else #endif - efab_tcp_helper_rm_free_locked(trs); + efab_tcp_helper_rm_free_locked(trs); OO_DEBUG_TCPH(ci_log("%s: [%u] done", __FUNCTION__, trs->id)); } @@ -5370,11 +5286,10 @@ tcp_helper_rm_free(tcp_helper_resource_t* trs) * *--------------------------------------------------------------------*/ -static void -tcp_helper_dtor_schedule(tcp_helper_resource_t * trs) +static void tcp_helper_dtor_schedule(tcp_helper_resource_t* trs) { OO_DEBUG_TCPH(ci_log("%s [%u]: starting", __FUNCTION__, trs->id)); - ci_verify( queue_work(CI_GLOBAL_WORKQUEUE, &trs->work_item_dtor) != 0); + ci_verify(queue_work(CI_GLOBAL_WORKQUEUE, &trs->work_item_dtor) != 0); } #endif @@ -5386,8 +5301,7 @@ tcp_helper_dtor_schedule(tcp_helper_resource_t * trs) * \param trs TCP helper resource *--------------------------------------------------------------------*/ -static void -efab_tcp_helper_k_ref_count_is_zero(tcp_helper_resource_t* trs) +static void efab_tcp_helper_k_ref_count_is_zero(tcp_helper_resource_t* trs) { /* although we have atomically got to zero we still have to contend * with a possible race from the resource manager destruction @@ -5401,15 +5315,14 @@ efab_tcp_helper_k_ref_count_is_zero(tcp_helper_resource_t* trs) ci_assert_equal(trs->ref[OO_THR_REF_FILE], 0); ci_assert_equal(trs->ref[OO_THR_REF_APP], 0); - OO_DEBUG_TCPH(ci_log("%s: [%u] ref "OO_THR_REF_FMT, - __FUNCTION__, trs->id, OO_THR_REF_ARG(trs->ref))); + OO_DEBUG_TCPH(ci_log("%s: [%u] ref " OO_THR_REF_FMT, __FUNCTION__, trs->id, + OO_THR_REF_ARG(trs->ref))); ci_irqlock_lock(&THR_TABLE.lock, &lock_flags); - if( !ci_dllink_is_free(&trs->all_stacks_link) ) { + if( ! ci_dllink_is_free(&trs->all_stacks_link) ) { ci_dllist_remove(&trs->all_stacks_link); ci_dllink_mark_free(&trs->all_stacks_link); - } - else + } else trs = 0; ci_irqlock_unlock(&THR_TABLE.lock, &lock_flags); @@ -5421,29 +5334,27 @@ efab_tcp_helper_k_ref_count_is_zero(tcp_helper_resource_t* trs) tcp_helper_dtor_schedule(trs); else #endif - tcp_helper_dtor(trs); + tcp_helper_dtor(trs); } OO_DEBUG_TCPH(ci_log("%s: finished", __FUNCTION__)); } -static void -tcp_helper_close_cleanup_ep(tcp_helper_resource_t* trs, - tcp_helper_endpoint_t* ep) +static void tcp_helper_close_cleanup_ep( + tcp_helper_resource_t* trs, tcp_helper_endpoint_t* ep) { - tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_PEER_CLOSED); - if( ep->alien_ref != NULL ) { - fput(ep->alien_ref->_filp); - ep->alien_ref = NULL; - } + tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_PEER_CLOSED); + if( ep->alien_ref != NULL ) { + fput(ep->alien_ref->_filp); + ep->alien_ref = NULL; + } } #if ! CI_CFG_UL_INTERRUPT_HELPER /*! Close sockets. Called with netif lock held. Kernel netif lock may or * may not be held. */ -static void -tcp_helper_close_pending_endpoints(tcp_helper_resource_t* trs) +static void tcp_helper_close_pending_endpoints(tcp_helper_resource_t* trs) { ci_irqlock_state_t lock_flags; tcp_helper_endpoint_t* ep; @@ -5453,7 +5364,7 @@ tcp_helper_close_pending_endpoints(tcp_helper_resource_t* trs) ci_assert(ci_netif_is_locked(&trs->netif)); ci_assert(! in_atomic()); - ci_assert( ~trs->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT ); + ci_assert(~trs->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT); /* Ensure we're up-to-date so we get an ordered response to all packets. ** (eg. ANVL tcp_core 9.18). Do it once here rather than per-socket. @@ -5469,7 +5380,7 @@ tcp_helper_close_pending_endpoints(tcp_helper_resource_t* trs) * NDEBUG build: we are not protected by kernel netif lock, so * we should re-check that ep_tobe_closed is non-empty for security * reasons. */ - ci_assert( ci_sllist_not_empty(&trs->ep_tobe_closed) ); + ci_assert(ci_sllist_not_empty(&trs->ep_tobe_closed)); if( ci_sllist_is_empty(&trs->ep_tobe_closed) ) { ci_irqlock_unlock(&trs->lock, &lock_flags); ci_log("%s: [%d] ERROR: stack lock corrupted", __func__, trs->id); @@ -5478,9 +5389,9 @@ tcp_helper_close_pending_endpoints(tcp_helper_resource_t* trs) link = ci_sllist_pop(&trs->ep_tobe_closed); ci_irqlock_unlock(&trs->lock, &lock_flags); - ep = CI_CONTAINER(tcp_helper_endpoint_t, tobe_closed , link); - OO_DEBUG_TCPH(ci_log("%s: [%u:%d] closing", - __FUNCTION__, trs->id, OO_SP_FMT(ep->id))); + ep = CI_CONTAINER(tcp_helper_endpoint_t, tobe_closed, link); + OO_DEBUG_TCPH(ci_log( + "%s: [%u:%d] closing", __FUNCTION__, trs->id, OO_SP_FMT(ep->id))); tcp_helper_close_cleanup_ep(trs, ep); citp_waitable_all_fds_gone(&trs->netif, ep->id); } @@ -5492,14 +5403,14 @@ int oo_wakeup_waiters(ci_private_t* priv, void* arg) struct oo_wakeup_eps* op = arg; tcp_helper_resource_t* thr = priv->thr; oo_sp ep_id[16]; - oo_sp __user * user_ptr = CI_USER_PTR_GET(op->eps); + oo_sp __user* user_ptr = CI_USER_PTR_GET(op->eps); int n, i; if( thr == NULL ) return -EINVAL; while( op->eps_num > 0 ) { - n = CI_MIN(op->eps_num, (ci_uint32)(sizeof(ep_id) / sizeof(ep_id[0]))); + n = CI_MIN(op->eps_num, (ci_uint32) (sizeof(ep_id) / sizeof(ep_id[0]))); if( copy_from_user(ep_id, user_ptr, n * sizeof(ep_id[0])) ) return -EFAULT; user_ptr += n; @@ -5518,13 +5429,12 @@ int oo_wakeup_waiters(ci_private_t* priv, void* arg) #if ! CI_CFG_UL_INTERRUPT_HELPER -static void -efab_tcp_helper_rm_reset_untrusted(tcp_helper_resource_t* trs) +static void efab_tcp_helper_rm_reset_untrusted(tcp_helper_resource_t* trs) { /* Called when closing a stack and the lock is wedged. Assume that * shared state is borked. */ - ci_netif *netif = &trs->netif; + ci_netif* netif = &trs->netif; int i; for( i = 0; i < netif->ep_tbl_n; ++i ) { @@ -5548,8 +5458,7 @@ efab_tcp_helper_rm_reset_untrusted(tcp_helper_resource_t* trs) } } -static void -efab_tcp_helper_rm_schedule_free(tcp_helper_resource_t* trs) +static void efab_tcp_helper_rm_schedule_free(tcp_helper_resource_t* trs) { OO_DEBUG_TCPH(ci_log("%s [%u]: defer", __FUNCTION__, trs->id)); queue_work(CI_GLOBAL_WORKQUEUE, &trs->work_item_dtor); @@ -5573,8 +5482,7 @@ efab_tcp_helper_rm_schedule_free(tcp_helper_resource_t* trs) #if CI_CFG_NIC_RESET_SUPPORT -static void -efab_tcp_helper_flush_reset_wq(tcp_helper_resource_t* trs) +static void efab_tcp_helper_flush_reset_wq(tcp_helper_resource_t* trs) { /* There may be both work running and work pending. We can wake up the * work currently blocked on the lock, but we don't want a queued @@ -5594,8 +5502,7 @@ void tcp_helper_flush_resets(ci_netif* ni) #endif -static void -efab_tcp_helper_rm_free_locked(tcp_helper_resource_t* trs) +static void efab_tcp_helper_rm_free_locked(tcp_helper_resource_t* trs) { ci_netif* netif; #if ! CI_CFG_UL_INTERRUPT_HELPER @@ -5604,13 +5511,13 @@ efab_tcp_helper_rm_free_locked(tcp_helper_resource_t* trs) ci_assert(NULL != trs); #if ! CI_CFG_UL_INTERRUPT_HELPER - ci_assert(trs->trusted_lock == (OO_TRUSTED_LOCK_LOCKED | - OO_TRUSTED_LOCK_AWAITING_FREE)); + ci_assert(trs->trusted_lock == + (OO_TRUSTED_LOCK_LOCKED | OO_TRUSTED_LOCK_AWAITING_FREE)); #endif netif = &trs->netif; #if ! CI_CFG_UL_INTERRUPT_HELPER - ci_assert(!in_atomic()); + ci_assert(! in_atomic()); /* Make sure all postponed actions are done and endpoints freed */ flush_workqueue(trs->wq); #endif @@ -5660,11 +5567,10 @@ efab_tcp_helper_rm_free_locked(tcp_helper_resource_t* trs) * and we'll wait for the work to complete as we flush the workqueue * again on tcp_helper_stop. */ - if( !ci_netif_trylock(&trs->netif) ) { + if( ! ci_netif_trylock(&trs->netif) ) { /* Cases 1 and 2 */ OO_DEBUG_ERR(ci_log("Stack [%d] released with lock stuck (0x%llx)", - trs->id, - (unsigned long long)trs->netif.state->lock.lock)); + trs->id, (unsigned long long) trs->netif.state->lock.lock)); netif->flags |= CI_NETIF_FLAG_WEDGED; } } @@ -5705,11 +5611,12 @@ efab_tcp_helper_rm_free_locked(tcp_helper_resource_t* trs) /* Who sets n_ep_orphaned to zero, the same code should drop the * associated BASE refcount. */ - if( ci_cas32u_succeed(&netif->n_ep_orphaned, OO_N_EP_ORPHANED_INIT, - n_orphaned) && n_orphaned == 0 ) + if( ci_cas32u_succeed( + &netif->n_ep_orphaned, OO_N_EP_ORPHANED_INIT, n_orphaned) && + n_orphaned == 0 ) oo_thr_ref_drop(trs->ref, OO_THR_REF_BASE); - /* Drop lock so that sockets can proceed towards close. */ + /* Drop lock so that sockets can proceed towards close. */ #if CI_CFG_DESTROY_WEDGED if( ! (netif->flags & CI_NETIF_FLAG_WEDGED) ) #endif @@ -5719,8 +5626,8 @@ efab_tcp_helper_rm_free_locked(tcp_helper_resource_t* trs) /* Don't need atomics here, because only we are permitted to touch * [trusted_lock] when AWAITING_FREE is set. */ - ci_assert(trs->trusted_lock == (OO_TRUSTED_LOCK_LOCKED | - OO_TRUSTED_LOCK_AWAITING_FREE)); + ci_assert(trs->trusted_lock == + (OO_TRUSTED_LOCK_LOCKED | OO_TRUSTED_LOCK_AWAITING_FREE)); trs->trusted_lock = OO_TRUSTED_LOCK_UNLOCKED; #endif /* CI_CFG_UL_INTERRUPT_HELPER*/ oo_thr_ref_drop(trs->ref, OO_THR_REF_BASE); @@ -5740,8 +5647,7 @@ efab_tcp_helper_rm_free_locked(tcp_helper_resource_t* trs) * *--------------------------------------------------------------------*/ -ci_inline void -tcp_helper_stop(tcp_helper_resource_t* trs) +ci_inline void tcp_helper_stop(tcp_helper_resource_t* trs) { int intf_i; @@ -5754,7 +5660,8 @@ tcp_helper_stop(tcp_helper_resource_t* trs) /* stop callbacks from the event queue - wait for any running callback to complete */ - OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) { + OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) + { ef_eventq_timer_clear(ci_netif_vi(&trs->netif, intf_i)); efrm_eventq_kill_callback(tcp_helper_vi(trs, intf_i)); } @@ -5769,7 +5676,7 @@ tcp_helper_stop(tcp_helper_resource_t* trs) #endif OO_DEBUG_TCPH(ci_log("%s [%d]: finished --- all async processes finished", - __FUNCTION__, trs->id)); + __FUNCTION__, trs->id)); } @@ -5798,8 +5705,7 @@ void tcp_helper_dtor(tcp_helper_resource_t* trs) TCP_HELPER_RESOURCE_ASSERT_VALID(trs, 1); OO_DEBUG_TCPH(ci_log("%s [%u]: starting %s", __FUNCTION__, trs->id, - trs->netif.flags & CI_NETIF_FLAG_WEDGED ? - "wedged" : "gracious")); + trs->netif.flags & CI_NETIF_FLAG_WEDGED ? "wedged" : "gracious")); #if ! CI_CFG_UL_INTERRUPT_HELPER if( trs->netif.flags & CI_NETIF_FLAG_WEDGED ) { @@ -5827,13 +5733,11 @@ void tcp_helper_dtor(tcp_helper_resource_t* trs) if( ~trs->netif.flags & CI_NETIF_FLAG_WEDGED ) { if( efab_tcp_helper_netif_try_lock(trs, 0) ) { oo_netif_dtor_pkts(&trs->netif); - } - else { + } else { /* Pretend to be wedged and do not check for leaks */ trs->netif.flags |= CI_NETIF_FLAG_WEDGED; OO_DEBUG_ERR(ci_log("Stack [%d] destroyed with lock stuck (0x%llx)", - trs->id, - (unsigned long long)trs->netif.state->lock.lock)); + trs->id, (unsigned long long) trs->netif.state->lock.lock)); /* I believe that after tcp_helper_stop() there are no legitimate * lock holders, so assert() in debug build. See bug 67856 comment 2 * for details why it was not so in the past. */ @@ -5867,8 +5771,8 @@ void tcp_helper_dtor(tcp_helper_resource_t* trs) trs->thc == NULL && #endif trs->trs_ephem_table != NULL ) - tcp_helper_free_ephemeral_ports(trs->trs_ephem_table, - trs->trs_ephem_table_entries); + tcp_helper_free_ephemeral_ports( + trs->trs_ephem_table, trs->trs_ephem_table_entries); /* We just vfree() the consumed table, as the pointers that it contains point * straight into the the table of ephemeral ports itself. */ @@ -5904,21 +5808,19 @@ void tcp_helper_dtor(tcp_helper_resource_t* trs) put_namespaces(trs); rc = ci_id_pool_free(&THR_TABLE.instances, trs->id, &THR_TABLE.lock); - OO_DEBUG_ERR(if (rc) - ci_log("%s [%u]: failed to free instance number", - __FUNCTION__, trs->id)); + OO_DEBUG_ERR(if( rc ) ci_log( + "%s [%u]: failed to free instance number", __FUNCTION__, trs->id)); ci_irqlock_lock(&THR_TABLE.lock, &lock_flags); ci_assert(THR_TABLE.stack_count > 0); --THR_TABLE.stack_count; ci_irqlock_unlock(&THR_TABLE.lock, &lock_flags); - OO_DEBUG_TCPH(ci_log("%s [%u]: finished", __FUNCTION__, trs->id)); + OO_DEBUG_TCPH(ci_log("%s [%u]: finished", __FUNCTION__, trs->id)); CI_FREE_OBJ(trs); } - /*-------------------------------------------------------------------- *! * TCP driver management -- here for now while it needs a NIC to be around @@ -5946,8 +5848,7 @@ static int efab_is_onloaded(void* ctx, struct net* netns, ci_ifid_t ifindex) #endif -int -efab_tcp_driver_ctor() +int efab_tcp_driver_ctor() { int rc = 0; @@ -5968,13 +5869,13 @@ efab_tcp_driver_ctor() * So, we use bound work queue without any additional flags. */ CI_GLOBAL_WORKQUEUE = alloc_workqueue("onload-wqueue", WQ_SYSFS, 0); - if (CI_GLOBAL_WORKQUEUE == NULL) { + if( CI_GLOBAL_WORKQUEUE == NULL ) { rc = -ENOMEM; goto fail_wq; } - + /* Create TCP helpers table */ - if ((rc = thr_table_ctor(&efab_tcp_driver.thr_table)) < 0) + if( (rc = thr_table_ctor(&efab_tcp_driver.thr_table)) < 0 ) goto fail_thr_table; if( (rc = oo_filter_ns_manager_ctor(&efab_tcp_driver)) < 0 ) @@ -5994,8 +5895,8 @@ efab_tcp_driver_ctor() if( efab_tcp_driver.timesync_page == NULL ) goto fail_timesync_alloc; - efab_tcp_driver.timesync = vmap(&efab_tcp_driver.timesync_page, 1, - VM_USERMAP, PAGE_KERNEL); + efab_tcp_driver.timesync = + vmap(&efab_tcp_driver.timesync_page, 1, VM_USERMAP, PAGE_KERNEL); if( (rc = oo_timesync_ctor(efab_tcp_driver.timesync)) < 0 ) goto fail_timesync; @@ -6036,8 +5937,7 @@ efab_tcp_driver_ctor() } /* Destroy all existing stacks. */ -void -efab_tcp_driver_stop(void) +void efab_tcp_driver_stop(void) { OO_DEBUG_TCPH(ci_log("%s: kill stacks", __FUNCTION__)); @@ -6046,8 +5946,7 @@ efab_tcp_driver_stop(void) flush_workqueue(CI_GLOBAL_WORKQUEUE); } -void -efab_tcp_driver_dtor(void) +void efab_tcp_driver_dtor(void) { OO_DEBUG_TCPH(ci_log("%s: free resources", __FUNCTION__)); @@ -6056,9 +5955,9 @@ efab_tcp_driver_dtor(void) ci_id_pool_dtor(&efab_tcp_driver.thr_table.instances); #ifndef NDEBUG - if (ci_atomic_read(&efab_tcp_driver.sendpage_pinpages_n) != 0) { - ci_log("%s: ERROR: sendpage_pinpages_n is %d at destruction", - __FUNCTION__, ci_atomic_read(&efab_tcp_driver.sendpage_pinpages_n)); + if( ci_atomic_read(&efab_tcp_driver.sendpage_pinpages_n) != 0 ) { + ci_log("%s: ERROR: sendpage_pinpages_n is %d at destruction", __FUNCTION__, + ci_atomic_read(&efab_tcp_driver.sendpage_pinpages_n)); } #endif @@ -6078,13 +5977,14 @@ efab_tcp_driver_dtor(void) } -static int -add_ep(tcp_helper_resource_t* trs, unsigned id, tcp_helper_endpoint_t* ep) +static int add_ep( + tcp_helper_resource_t* trs, unsigned id, tcp_helper_endpoint_t* ep) { ci_netif* ni = &trs->netif; citp_waitable_obj* wo; - if( id < ni->ep_tbl_n ) return -1; + if( id < ni->ep_tbl_n ) + return -1; ci_assert_equal(id, ni->ep_tbl_n); tcp_helper_endpoint_ctor(ep, trs, id); @@ -6095,15 +5995,14 @@ add_ep(tcp_helper_resource_t* trs, unsigned id, tcp_helper_endpoint_t* ep) ni->state->n_ep_bufs = ++ni->ep_tbl_n; wo = SP_TO_WAITABLE_OBJ(ni, ep->id); - CI_ZERO(wo); /* ??fixme */ + CI_ZERO(wo); /* ??fixme */ citp_waitable_init(ni, &wo->waitable, id); wo->waitable.state = CI_TCP_STATE_FREE; citp_waitable_add_free_list(ni, &wo->waitable); return 0; } -static int -install_socks(tcp_helper_resource_t* trs, unsigned id, int num) +static int install_socks(tcp_helper_resource_t* trs, unsigned id, int num) { tcp_helper_endpoint_t** eps; ci_irqlock_state_t lock_flags; @@ -6118,14 +6017,15 @@ install_socks(tcp_helper_resource_t* trs, unsigned id, int num) eps[i] = CI_ALLOC_OBJ(tcp_helper_endpoint_t); if( ! eps[i] ) { OO_DEBUG_ERR(ci_log("%s: allocation failed", __FUNCTION__)); - while( i-- ) ci_free(eps[i]); + while( i-- ) + ci_free(eps[i]); vfree(eps); return -ENOMEM; } } ci_irqlock_lock(&THR_TABLE.lock, &lock_flags); - for( i = 0; i < num; ++i, ++id ){ + for( i = 0; i < num; ++i, ++id ) { OO_DEBUG_SHM(ci_log("%s: add ep %d", __FUNCTION__, id)); if( add_ep(trs, id, eps[i]) == 0 ) eps[i] = NULL; @@ -6148,11 +6048,12 @@ int efab_tcp_helper_more_socks(tcp_helper_resource_t* trs) ci_netif* ni = &trs->netif; int rc; - if( ni->ep_tbl_n >= ni->ep_tbl_max ) return -ENOSPC; + if( ni->ep_tbl_n >= ni->ep_tbl_max ) + return -ENOSPC; if( ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT ) { - ef_eplock_holder_set_flag(&ni->state->lock, - CI_EPLOCK_NETIF_NEED_SOCK_BUFS); + ef_eplock_holder_set_flag( + &ni->state->lock, CI_EPLOCK_NETIF_NEED_SOCK_BUFS); return -EBUSY; } @@ -6162,8 +6063,8 @@ int efab_tcp_helper_more_socks(tcp_helper_resource_t* trs) return rc; } - return install_socks(trs, ni->ep_tbl_n, - EP_BUF_PER_PAGE << OO_SHARED_BUFFER_CHUNK_ORDER); + return install_socks( + trs, ni->ep_tbl_n, EP_BUF_PER_PAGE << OO_SHARED_BUFFER_CHUNK_ORDER); } @@ -6176,8 +6077,7 @@ int efab_tcp_helper_clear_epcache(tcp_helper_resource_t* trs) #endif -static void -efab_tcp_helper_no_more_bufs(tcp_helper_resource_t* trs) +static void efab_tcp_helper_no_more_bufs(tcp_helper_resource_t* trs) { /* We've failed to allocate more packet buffers -- we're out of resources * (probably buffer table). We don't want to keep trying to allocate and @@ -6192,35 +6092,35 @@ efab_tcp_helper_no_more_bufs(tcp_helper_resource_t* trs) ci_netif_set_rxq_limit(ni); if( ++ni->state->stats.bufset_alloc_nospace == 1 ) - OO_DEBUG_ERR(ci_log(FN_FMT "Failed to allocate packet buffers: " - "no more buffer table entries. ", - FN_PRI_ARGS(&trs->netif)); - ci_log(FN_FMT "New limits: max_packets=%d rx=%d tx=%d " - "rxq_limit=%d", FN_PRI_ARGS(ni), - NI_OPTS(ni).max_packets, NI_OPTS(ni).max_rx_packets, - NI_OPTS(ni).max_tx_packets, NI_OPTS(ni).rxq_limit)); + OO_DEBUG_ERR( + ci_log(FN_FMT "Failed to allocate packet buffers: " + "no more buffer table entries. ", + FN_PRI_ARGS(&trs->netif)); + ci_log(FN_FMT "New limits: max_packets=%d rx=%d tx=%d " + "rxq_limit=%d", + FN_PRI_ARGS(ni), NI_OPTS(ni).max_packets, + NI_OPTS(ni).max_rx_packets, NI_OPTS(ni).max_tx_packets, + NI_OPTS(ni).rxq_limit)); } /* hw_addrs below is structured as following: * hw_addrs[n_page + intf_i * n_hw_pages] */ -static int -efab_tcp_helper_iobufset_map(tcp_helper_resource_t* trs, - struct oo_buffer_pages* pages, - int n_hw_pages, - struct oo_iobufset** all_out, - uint64_t* hw_addrs, int* page_order) +static int efab_tcp_helper_iobufset_map(tcp_helper_resource_t* trs, + struct oo_buffer_pages* pages, int n_hw_pages, + struct oo_iobufset** all_out, uint64_t* hw_addrs, int* page_order) { ci_netif* ni = &trs->netif; int rc, intf_i; - struct efrm_pd *first_pd = NULL; - struct oo_iobufset *first_iobuf = NULL; + struct efrm_pd* first_pd = NULL; + struct oo_iobufset* first_iobuf = NULL; int map_order = INT_MAX; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { - struct efrm_pd *pd = efrm_vi_get_pd(tcp_helper_vi(trs, intf_i)); - struct oo_iobufset *iobuf; + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { + struct efrm_pd* pd = efrm_vi_get_pd(tcp_helper_vi(trs, intf_i)); + struct oo_iobufset* iobuf; int cur_map_order; if( first_pd != NULL && efrm_pd_share_dma_mapping(first_pd, pd) ) { @@ -6228,14 +6128,13 @@ efab_tcp_helper_iobufset_map(tcp_helper_resource_t* trs, all_out[intf_i] = first_iobuf; o_iobufset_resource_ref(first_iobuf); memcpy(&hw_addrs[intf_i * n_hw_pages], hw_addrs, - sizeof(hw_addrs[0]) * n_hw_pages); + sizeof(hw_addrs[0]) * n_hw_pages); continue; } rc = oo_iobufset_resource_alloc(pages, pd, &iobuf, - &hw_addrs[intf_i * n_hw_pages], - intfs_suspended(trs) & (1 << intf_i), - &cur_map_order); + &hw_addrs[intf_i * n_hw_pages], intfs_suspended(trs) & (1 << intf_i), + &cur_map_order); if( rc < 0 ) { while( --intf_i >= 0 ) oo_iobufset_resource_release(all_out[intf_i], 0); @@ -6254,25 +6153,25 @@ efab_tcp_helper_iobufset_map(tcp_helper_resource_t* trs, } -static int -efab_tcp_helper_min_nics_order(tcp_helper_resource_t* trs) +static int efab_tcp_helper_min_nics_order(tcp_helper_resource_t* trs) { int i, intf_i; int min_nic_order; int min_nics_order = 0; - OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) { - struct efhw_nic *nic = - efrm_client_get_nic(trs->nic[intf_i].thn_oo_nic->efrm_client); + OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) + { + struct efhw_nic* nic = + efrm_client_get_nic(trs->nic[intf_i].thn_oo_nic->efrm_client); /* If we allocate pages in buffer mode with order less than minimum NIC * page order then we cannot find acceptable NIC buffer table. * Minimum NIC page order in EF10 and production Riverhead is 0. */ min_nic_order = HW_PAGES_PER_SET_S; if( NI_OPTS(&trs->netif).packet_buffer_mode == 0 ) { - for (i = 0; i < efhw_nic_buffer_table_orders_num(nic); i++) - min_nic_order = min(min_nic_order, efhw_nic_buffer_table_orders(nic)[i]); - } - else { + for( i = 0; i < efhw_nic_buffer_table_orders_num(nic); i++ ) + min_nic_order = + min(min_nic_order, efhw_nic_buffer_table_orders(nic)[i]); + } else { min_nic_order = 0; } @@ -6284,21 +6183,18 @@ efab_tcp_helper_min_nics_order(tcp_helper_resource_t* trs) } -static int -efab_tcp_helper_iobufset_alloc(tcp_helper_resource_t* trs, - struct oo_iobufset** all_out, - struct oo_buffer_pages** pages_out, - uint64_t* hw_addrs, - int* page_order) +static int efab_tcp_helper_iobufset_alloc(tcp_helper_resource_t* trs, + struct oo_iobufset** all_out, struct oo_buffer_pages** pages_out, + uint64_t* hw_addrs, int* page_order) { ci_netif* ni = &trs->netif; int rc, intf_i; - struct oo_buffer_pages *pages; + struct oo_buffer_pages* pages; int flags; int min_nics_order = efab_tcp_helper_min_nics_order(trs); OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - all_out[intf_i] = NULL; + all_out[intf_i] = NULL; *pages_out = NULL; #ifdef OO_DO_HUGE_PAGES @@ -6311,9 +6207,9 @@ efab_tcp_helper_iobufset_alloc(tcp_helper_resource_t* trs, * Otherwise, we'll avoid them. */ if( ni->flags & CI_NETIF_FLAG_HUGE_PAGES_FAILED ) flags |= OO_IOBUFSET_FLAG_HUGE_PAGE_FAILED; - if( !in_atomic() && current->mm != NULL ) { + if( ! in_atomic() && current->mm != NULL ) { #ifdef EFRM_DO_NAMESPACES - struct nsproxy *ns; + struct nsproxy* ns; ns = task_nsproxy_start(current); /* Use huge pages if we are in the same namespace only. * ipc_ns has a pointer to user_ns, so we may compare uids @@ -6334,22 +6230,22 @@ efab_tcp_helper_iobufset_alloc(tcp_helper_resource_t* trs, #endif } #endif - rc = oo_iobufset_pages_alloc(HW_PAGES_PER_SET_S, min_nics_order, &flags, - &pages); + rc = oo_iobufset_pages_alloc( + HW_PAGES_PER_SET_S, min_nics_order, &flags, &pages); if( rc != 0 ) return rc; #if CI_CFG_PKTS_AS_HUGE_PAGES - if( (flags & OO_IOBUFSET_FLAG_HUGE_PAGE_FAILED) && - !(ni->flags & CI_NETIF_FLAG_HUGE_PAGES_FAILED) ) { - NI_LOG(ni, RESOURCE_WARNINGS, - "[%s]: unable to allocate huge page, using standard pages instead", - ni->state->pretty_name); - ni->flags |= CI_NETIF_FLAG_HUGE_PAGES_FAILED; - } + if( (flags & OO_IOBUFSET_FLAG_HUGE_PAGE_FAILED) && + ! (ni->flags & CI_NETIF_FLAG_HUGE_PAGES_FAILED) ) { + NI_LOG(ni, RESOURCE_WARNINGS, + "[%s]: unable to allocate huge page, using standard pages instead", + ni->state->pretty_name); + ni->flags |= CI_NETIF_FLAG_HUGE_PAGES_FAILED; + } #endif - rc = efab_tcp_helper_iobufset_map(trs, pages, 1 << HW_PAGES_PER_SET_S, - all_out, hw_addrs, page_order); + rc = efab_tcp_helper_iobufset_map( + trs, pages, 1 << HW_PAGES_PER_SET_S, all_out, hw_addrs, page_order); if( rc < 0 ) { oo_iobufset_pages_release(pages); return rc; @@ -6369,16 +6265,16 @@ static void efab_put_pages(struct page** pages, size_t n) static long efab_get_all_user_pages(unsigned long base, long max_pages, - struct page** pages, unsigned gup_flags) + struct page** pages, unsigned gup_flags) { long n; long rc; mmap_read_lock(current->mm); for( n = 0; n < max_pages; n += rc ) { - rc = pin_user_pages(base + n * PAGE_SIZE, max_pages - n, gup_flags, - pages + n, NULL); - if (rc <= 0) { + rc = pin_user_pages( + base + n * PAGE_SIZE, max_pages - n, gup_flags, pages + n, NULL); + if( rc <= 0 ) { efab_put_pages(pages, n); mmap_read_unlock(current->mm); return rc ? rc : -EFAULT; @@ -6404,8 +6300,8 @@ static long efab_get_all_user_pages(unsigned long base, long max_pages, * users, and if (as they normally would) users pass in correctly mmapped * and aligned hugepages then no splitting happens and they never notice * this fixup code. */ -static long efab_get_unstraddled_user_pages(unsigned long base, long max_pages, - struct page** pages) +static long efab_get_unstraddled_user_pages( + unsigned long base, long max_pages, struct page** pages) { long rc; #ifdef FOLL_SPLIT @@ -6421,7 +6317,8 @@ static long efab_get_unstraddled_user_pages(unsigned long base, long max_pages, /* * From experiments: * It is not needed for linux<=4.9, and it is not needed for linux>=5.4. - * It is definitely needed for linux-4.18 (RHEL 8) and linux-4.19 (Debain 10). + * It is definitely needed for linux-4.18 (RHEL 8) and linux-4.19 (Debain + * 10). * * However let's be on the safe side: FOLL_SPLIT is defined for * 2.6.38 <= linux <= 5.12. @@ -6429,7 +6326,7 @@ static long efab_get_unstraddled_user_pages(unsigned long base, long max_pages, if( PageTail(pages[0]) ) { /* Beginning of the region straddles a hugepage. Put those pages and * re-get them with FOLL_SPLIT */ - for( count = 1; count < max_pages; ++count) + for( count = 1; count < max_pages; ++count ) if( ! PageTail(pages[count]) ) break; efab_put_pages(pages, count); @@ -6450,11 +6347,11 @@ static long efab_get_unstraddled_user_pages(unsigned long base, long max_pages, count = max_pages; while( --count && PageTail(pages[count]) ) ; - ci_assert(!PageTail(pages[count])); + ci_assert(! PageTail(pages[count])); if( count + (1L << compound_order(pages[count])) != max_pages ) { efab_put_pages(pages + count, max_pages - count); rc = efab_get_all_user_pages(base + count * PAGE_SIZE, max_pages - count, - pages + count, FOLL_WRITE | FOLL_SPLIT); + pages + count, FOLL_WRITE | FOLL_SPLIT); if( rc < 0 ) { efab_put_pages(pages, count); return rc; @@ -6472,9 +6369,8 @@ static long efab_get_unstraddled_user_pages(unsigned long base, long max_pages, int efab_tcp_helper_map_usermem(tcp_helper_resource_t* trs, - struct oo_iobufs_usermem* ioum, - unsigned long user_base, int n_pages, - uint64_t** hw_addrs_out) + struct oo_iobufs_usermem* ioum, unsigned long user_base, int n_pages, + uint64_t** hw_addrs_out) { #if PAGE_SIZE != EFHW_NIC_PAGE_SIZE /* Not implemented, solely for simplicity */ @@ -6502,7 +6398,7 @@ int efab_tcp_helper_map_usermem(tcp_helper_resource_t* trs, mmap_read_unlock(current->mm); if( rc < 0 ) { NI_LOG(ni, RESOURCE_WARNINGS, "[%s]: pin_user_pages(%d) returned %ld", - __FUNCTION__, n_pages, rc); + __FUNCTION__, n_pages, rc); goto fail1; } @@ -6512,7 +6408,7 @@ int efab_tcp_helper_map_usermem(tcp_helper_resource_t* trs, ioum->n_groups = 0; for( i = 0; i < n_pages; ) { int order = compound_order(pages[i]); - ci_assert(!PageTail(pages[i])); + ci_assert(! PageTail(pages[i])); if( order != last_order ) ++ioum->n_groups; last_order = order; @@ -6524,14 +6420,13 @@ int efab_tcp_helper_map_usermem(tcp_helper_resource_t* trs, ci_assert_ge(i, n_pages); #endif - ioum->groups = kmalloc_array(ioum->n_groups, sizeof(*ioum->groups), - GFP_KERNEL); - hw_addrs = kmalloc(sizeof(hw_addrs[0]) * n_pages * oo_stack_intf_max(ni), - GFP_KERNEL); + ioum->groups = + kmalloc_array(ioum->n_groups, sizeof(*ioum->groups), GFP_KERNEL); + hw_addrs = kmalloc( + sizeof(hw_addrs[0]) * n_pages * oo_stack_intf_max(ni), GFP_KERNEL); if( ioum->n_groups > 1 ) - tmp_hw_addrs = kmalloc(sizeof(hw_addrs[0]) * n_pages * - oo_stack_intf_max(ni), - GFP_KERNEL); + tmp_hw_addrs = kmalloc( + sizeof(hw_addrs[0]) * n_pages * oo_stack_intf_max(ni), GFP_KERNEL); else tmp_hw_addrs = hw_addrs; if( ioum->groups == NULL || hw_addrs == NULL || tmp_hw_addrs == NULL ) { @@ -6547,7 +6442,7 @@ int efab_tcp_helper_map_usermem(tcp_helper_resource_t* trs, int intf_i; if( EFHW_GFP_ORDER_TO_NIC_ORDER(order) < min_nics_order ) { - rc = -EMSGSIZE; /* Same as oo_bufpage_alloc() */ + rc = -EMSGSIZE; /* Same as oo_bufpage_alloc() */ goto fail4; } @@ -6562,18 +6457,19 @@ int efab_tcp_helper_map_usermem(tcp_helper_resource_t* trs, for( j = 0; j < group_pages >> order; ++j ) g->pages->pages[j] = pages[i + (j << order)]; - rc = efab_tcp_helper_iobufset_map(trs, g->pages, group_pages, - g->all, tmp_hw_addrs, NULL); + rc = efab_tcp_helper_iobufset_map( + trs, g->pages, group_pages, g->all, tmp_hw_addrs, NULL); if( rc < 0 ) { oo_iobufset_kfree(g->pages); goto fail4; } if( tmp_hw_addrs != hw_addrs ) { - OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) { + OO_STACK_FOR_EACH_INTF_I(&trs->netif, intf_i) + { memcpy(hw_addrs + i + intf_i * n_pages, - tmp_hw_addrs + intf_i * group_pages, - sizeof(hw_addrs[0]) * CI_MIN(group_pages, n_pages - i)); + tmp_hw_addrs + intf_i * group_pages, + sizeof(hw_addrs[0]) * CI_MIN(group_pages, n_pages - i)); } } @@ -6597,17 +6493,15 @@ int efab_tcp_helper_map_usermem(tcp_helper_resource_t* trs, int order = compound_order(g->pages->pages[0]); int group_pages = g->pages->n_bufs << order; - if (order) { - /* The 2 CI_MIN()s below are needed for ! FOLL_SPLIT case only, - * i.e. for linux >= 5.13. */ + if( order ) { + /* The 2 CI_MIN()s below are needed for ! FOLL_SPLIT case only, + * i.e. for linux >= 5.13. */ int i_max = CI_MIN(n_pages, i + group_pages); while( i < i_max ) { - efab_put_pages(pages + i + 1, - CI_MIN(1 << order, i_max - i) - 1); + efab_put_pages(pages + i + 1, CI_MIN(1 << order, i_max - i) - 1); i += 1 << order; } - } - else { + } else { i += group_pages; } } @@ -6623,30 +6517,30 @@ int efab_tcp_helper_map_usermem(tcp_helper_resource_t* trs, *hw_addrs_out = hw_addrs; return 0; - fail4: +fail4: while( --group_i >= 0 ) { struct oo_iobufs_usermem_group* g = &ioum->groups[group_i]; OO_STACK_FOR_EACH_INTF_I(ni, i) - if( g->all[i] ) - oo_iobufset_resource_release(g->all[i], 0); + if( g->all[i] ) + oo_iobufset_resource_release(g->all[i], 0); oo_iobufset_kfree(g->pages); } - fail3: +fail3: /* kfree() handles NULL, so we do not care to create fail2 */ kfree(hw_addrs); if( tmp_hw_addrs != hw_addrs ) kfree(tmp_hw_addrs); kfree(ioum->groups); efab_put_pages(pages, n_pages); - fail1: +fail1: kfree(pages); return rc; #endif } -void efab_tcp_helper_unmap_usermem(tcp_helper_resource_t* trs, - struct oo_iobufs_usermem* ioum) +void efab_tcp_helper_unmap_usermem( + tcp_helper_resource_t* trs, struct oo_iobufs_usermem* ioum) { ci_netif* ni = &trs->netif; int group_i; @@ -6655,8 +6549,8 @@ void efab_tcp_helper_unmap_usermem(tcp_helper_resource_t* trs, for( group_i = 0; group_i < ioum->n_groups; ++group_i ) { struct oo_iobufs_usermem_group* g = &ioum->groups[group_i]; OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - if( g->all[intf_i] ) - oo_iobufset_resource_release(g->all[intf_i], 0); + if( g->all[intf_i] ) + oo_iobufset_resource_release(g->all[intf_i], 0); efab_put_pages(g->pages->pages, g->pages->n_bufs); oo_iobufset_kfree(g->pages); } @@ -6664,12 +6558,11 @@ void efab_tcp_helper_unmap_usermem(tcp_helper_resource_t* trs, } -int -efab_tcp_helper_more_bufs(tcp_helper_resource_t* trs) +int efab_tcp_helper_more_bufs(tcp_helper_resource_t* trs) { struct oo_iobufset* iobrs[CI_CFG_MAX_INTERFACES]; struct oo_buffer_pages* pages; - uint64_t *hw_addrs; + uint64_t* hw_addrs; ci_irqlock_state_t lock_flags; ci_netif* ni = &trs->netif; int i, rc, bufset_id, intf_i, page_order; @@ -6683,21 +6576,20 @@ efab_tcp_helper_more_bufs(tcp_helper_resource_t* trs) return -ENOSPC; if( ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT ) { - ef_eplock_holder_set_flag(&ni->state->lock, - CI_EPLOCK_NETIF_NEED_PKT_SET); + ef_eplock_holder_set_flag(&ni->state->lock, CI_EPLOCK_NETIF_NEED_PKT_SET); return -EBUSY; } - hw_addrs = ci_vmalloc(sizeof(uint64_t) * (1 << HW_PAGES_PER_SET_S) * - CI_CFG_MAX_INTERFACES); + hw_addrs = ci_vmalloc( + sizeof(uint64_t) * (1 << HW_PAGES_PER_SET_S) * CI_CFG_MAX_INTERFACES); if( hw_addrs == NULL ) { ci_log("%s: [%d] out of memory", __func__, trs->id); return -ENOMEM; } - rc = efab_tcp_helper_iobufset_alloc(trs, iobrs, &pages, hw_addrs, - &page_order); - if(CI_UNLIKELY( rc < 0 )) { + rc = efab_tcp_helper_iobufset_alloc( + trs, iobrs, &pages, hw_addrs, &page_order); + if( CI_UNLIKELY(rc < 0) ) { /* With highly fragmented memory, iobufset_alloc may fail in * atomic context but succeed later in non-atomic context. * We should somehow differentiate temporary failures (atomic @@ -6709,15 +6601,15 @@ efab_tcp_helper_more_bufs(tcp_helper_resource_t* trs) else { ++ni->state->stats.bufset_alloc_fails; NI_LOG(ni, RESOURCE_WARNINGS, - FN_FMT "Failed to allocate packet buffers (%d)", - FN_PRI_ARGS(&trs->netif), rc); + FN_FMT "Failed to allocate packet buffers (%d)", + FN_PRI_ARGS(&trs->netif), rc); } ci_vfree(hw_addrs); return rc; } /* check we get the size we are expecting */ OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - ci_assert(iobrs[intf_i] != NULL); + ci_assert(iobrs[intf_i] != NULL); ci_assert(pages != NULL); /* Install the new buffer allocation, protecting against multi-threads. */ @@ -6727,7 +6619,7 @@ efab_tcp_helper_more_bufs(tcp_helper_resource_t* trs) if( ni->pkt_sets_n == ni->pkt_sets_max ) { ci_irqlock_unlock(&THR_TABLE.lock, &lock_flags); OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - oo_iobufset_resource_release(iobrs[intf_i], 0); + oo_iobufset_resource_release(iobrs[intf_i], 0); oo_iobufset_pages_release(pages); ci_vfree(hw_addrs); return -ENOSPC; @@ -6742,13 +6634,12 @@ efab_tcp_helper_more_bufs(tcp_helper_resource_t* trs) CITP_STATS_NETIF_INC(ni, pkt_huge_pages); #endif OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - ni->nic_hw[intf_i].pkt_rs[bufset_id] = iobrs[intf_i]; + ni->nic_hw[intf_i].pkt_rs[bufset_id] = iobrs[intf_i]; ci_irqlock_unlock(&THR_TABLE.lock, &lock_flags); OO_DEBUG_SHM({ int i; ci_log("[%d] allocated new bufset id %d, current=%d n_freepkts=%d", - NI_ID(ni), bufset_id, ni->packets->id, - ni->packets->n_free); + NI_ID(ni), bufset_id, ni->packets->id, ni->packets->n_free); for( i = 0; i < bufset_id; i++ ) ci_log("\tpkt_set[%i]: n_free=%d", i, ni->packets->set[i].n_free); }); @@ -6765,12 +6656,11 @@ efab_tcp_helper_more_bufs(tcp_helper_resource_t* trs) #endif ni->packets->set[bufset_id].dma_addr_base = ni->dma_addr_next; if( page_order > CI_CFG_PKTS_PER_SET_S ) { - /* page_order=INT_MAX means that there are no hardware interfaces associated - * with this stack. */ + /* page_order=INT_MAX means that there are no hardware interfaces + * associated with this stack. */ ci_assert_equal(page_order, INT_MAX); page_order = CI_CFG_PKTS_PER_SET_S; - } - else + } else page_order += ci_log2_ge(PAGE_SIZE / CI_CFG_PKT_BUF_SIZE, 0); ni->packets->set[bufset_id].page_order = page_order; ni->dma_addr_next += (PKTS_PER_SET >> page_order) * CI_CFG_MAX_INTERFACES; @@ -6794,9 +6684,10 @@ efab_tcp_helper_more_bufs(tcp_helper_resource_t* trs) pkt->next = ni->packets->set[bufset_id].free; ni->packets->set[bufset_id].free = OO_PKT_P(pkt); } - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { - set_pkt_bufset_hwaddrs(ni, bufset_id, intf_i, - hw_addrs + intf_i * (1 << HW_PAGES_PER_SET_S)); + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { + set_pkt_bufset_hwaddrs( + ni, bufset_id, intf_i, hw_addrs + intf_i * (1 << HW_PAGES_PER_SET_S)); } ci_vfree(hw_addrs); @@ -6807,27 +6698,27 @@ efab_tcp_helper_more_bufs(tcp_helper_resource_t* trs) #if ! CI_CFG_UL_INTERRUPT_HELPER -void -tcp_helper_rm_dump(oo_fd_flags fd_flags, oo_sp sock_id, - tcp_helper_resource_t* trs, const char *line_prefix) +void tcp_helper_rm_dump(oo_fd_flags fd_flags, oo_sp sock_id, + tcp_helper_resource_t* trs, const char* line_prefix) { ci_netif* ni; int intf_i; unsigned i; if( trs == NULL ) { - ci_dllink *link; - CI_DLLIST_FOR_EACH(link, &THR_TABLE.all_stacks) { + ci_dllink* link; + CI_DLLIST_FOR_EACH(link, &THR_TABLE.all_stacks) + { trs = CI_CONTAINER(tcp_helper_resource_t, all_stacks_link, link); tcp_helper_rm_dump(OO_FDFLAG_STACK, OO_SP_NULL, trs, line_prefix); for( i = 0; i < trs->netif.ep_tbl_n; ++i ) - if (trs->netif.ep_tbl[i]) { - ci_sock_cmn *s = ID_TO_SOCK(&trs->netif, i); - if (s->b.state == CI_TCP_STATE_FREE || s->b.state == CI_TCP_CLOSED) + if( trs->netif.ep_tbl[i] ) { + ci_sock_cmn* s = ID_TO_SOCK(&trs->netif, i); + if( s->b.state == CI_TCP_STATE_FREE || s->b.state == CI_TCP_CLOSED ) continue; - tcp_helper_rm_dump(s->b.state == CI_TCP_STATE_UDP ? - OO_FDFLAG_EP_UDP : OO_FDFLAG_EP_TCP, - OO_SP_FROM_INT(&trs->netif, i), trs, line_prefix); + tcp_helper_rm_dump(s->b.state == CI_TCP_STATE_UDP ? OO_FDFLAG_EP_UDP + : OO_FDFLAG_EP_TCP, + OO_SP_FROM_INT(&trs->netif, i), trs, line_prefix); } } return; @@ -6835,20 +6726,19 @@ tcp_helper_rm_dump(oo_fd_flags fd_flags, oo_sp sock_id, ni = &trs->netif; - ci_log("%sfd "OO_FDFLAG_FMT, line_prefix, OO_FDFLAG_ARG(fd_flags)); + ci_log("%sfd " OO_FDFLAG_FMT, line_prefix, OO_FDFLAG_ARG(fd_flags)); if( fd_flags & OO_FDFLAG_STACK ) { ci_log("%smmap_bytes=%x", line_prefix, trs->mem_mmap_bytes); - } - else if( fd_flags & OO_FDFLAG_EP_MASK ) { + } else if( fd_flags & OO_FDFLAG_EP_MASK ) { ci_log("%sendpoint with id=%u", line_prefix, OO_SP_FMT(sock_id)); citp_waitable_dump(ni, SP_TO_WAITABLE(ni, sock_id), line_prefix); } - ci_log("%sref "OO_THR_REF_FMT, line_prefix, OO_THR_REF_ARG(trs->ref)); + ci_log("%sref " OO_THR_REF_FMT, line_prefix, OO_THR_REF_ARG(trs->ref)); OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - ci_log("%svi[%d]: %d", line_prefix, intf_i, - ef_vi_instance(ci_netif_vi(ni, intf_i))); + ci_log("%svi[%d]: %d", line_prefix, intf_i, + ef_vi_instance(ci_netif_vi(ni, intf_i))); } #endif @@ -6869,13 +6759,14 @@ tcp_helper_rm_dump(oo_fd_flags fd_flags, oo_sp sock_id, static void defer_poll_and_prime(tcp_helper_resource_t* trs) { if( efab_tcp_helper_netif_lock_or_set_flags(trs, - OO_TRUSTED_LOCK_NEED_POLL | OO_TRUSTED_LOCK_PRIME_IF_IDLE, - CI_EPLOCK_NETIF_NEED_POLL | CI_EPLOCK_NETIF_PRIME_IF_IDLE, 1) ) - tcp_helper_defer_dl2work(trs, OO_THR_AFLAG_POLL_AND_PRIME); + OO_TRUSTED_LOCK_NEED_POLL | OO_TRUSTED_LOCK_PRIME_IF_IDLE, + CI_EPLOCK_NETIF_NEED_POLL | CI_EPLOCK_NETIF_PRIME_IF_IDLE, 1) ) + tcp_helper_defer_dl2work(trs, OO_THR_AFLAG_POLL_AND_PRIME); } -static int tcp_helper_wakeup(tcp_helper_resource_t* trs, int intf_i, int budget) +static int tcp_helper_wakeup( + tcp_helper_resource_t* trs, int intf_i, int budget) { ci_netif* ni = &trs->netif; int n = 0, prime_async; @@ -6926,14 +6817,12 @@ static int tcp_helper_wakeup(tcp_helper_resource_t* trs, int intf_i, int budget) if( ni->state->poll_did_wake ) { prime_async = 0; CITP_STATS_NETIF_INC(ni, interrupt_wakes); - } - else { + } else { oo_inject_packets_kernel_force(ni); } efab_tcp_helper_netif_unlock(trs, 1); - } - else { + } else { /* Couldn't get the lock. We take this as evidence that another thread * is alive and doing stuff, so no need to re-enable interrupts. The * EF_INT_REPRIME option overrides. @@ -6965,8 +6854,7 @@ static int tcp_helper_wakeup(tcp_helper_resource_t* trs, int intf_i, int budget) ! (ni->state->nic[intf_i].oo_vi_flags & OO_VI_FLAGS_RX_SHARED) ) prime_async = 0; } - } - else { + } else { CITP_STATS_NETIF_INC(ni, interrupt_no_events); } @@ -6983,7 +6871,8 @@ static int tcp_helper_wakeup(tcp_helper_resource_t* trs, int intf_i, int budget) } -static int tcp_helper_timeout(tcp_helper_resource_t* trs, int intf_i, int budget) +static int tcp_helper_timeout( + tcp_helper_resource_t* trs, int intf_i, int budget) { int n = 0; #if CI_CFG_HW_TIMER @@ -7008,7 +6897,7 @@ static int tcp_helper_timeout(tcp_helper_resource_t* trs, int intf_i, int budget ci_frc64(&ni->state->evq_last_prime); if( NI_OPTS(ni).timer_usec != 0 ) OO_STACK_FOR_EACH_INTF_I(ni, i) - ef_eventq_timer_prime(ci_netif_vi(ni, i), NI_OPTS(ni).timer_usec); + ef_eventq_timer_prime(ci_netif_vi(ni, i), NI_OPTS(ni).timer_usec); if( ci_netif_intf_has_event(ni, intf_i) ) { if( efab_tcp_helper_netif_try_lock(trs, 1) ) { @@ -7037,25 +6926,23 @@ static int tcp_helper_timeout(tcp_helper_resource_t* trs, int intf_i, int budget } oo_inject_packets_kernel_force(ni); efab_tcp_helper_netif_unlock(trs, 1); - } - else { + } else { CITP_STATS_NETIF_INC(ni, timeout_interrupt_lock_contends); } - } - else { + } else { CITP_STATS_NETIF_INC(ni, timeout_interrupt_no_events); } #endif return n; } -static int oo_handle_wakeup_or_timeout(void* context, int is_timeout, - struct efhw_nic* nic, int budget) +static int oo_handle_wakeup_or_timeout( + void* context, int is_timeout, struct efhw_nic* nic, int budget) { struct tcp_helper_nic* tcph_nic = context; tcp_helper_resource_t* trs; - trs = CI_CONTAINER(tcp_helper_resource_t, nic[tcph_nic->thn_intf_i], - tcph_nic); + trs = + CI_CONTAINER(tcp_helper_resource_t, nic[tcph_nic->thn_intf_i], tcph_nic); if( trs->trs_aflags & OO_THR_AFLAG_POLL_AND_PRIME ) { /* OO_THR_AFLAG_POLL_AND_PRIME is set - i.e. in some sense the * previous interrupt handler is already running. @@ -7071,16 +6958,16 @@ static int oo_handle_wakeup_or_timeout(void* context, int is_timeout, } -static int oo_handle_wakeup_int_driven(void* context, int is_timeout, - struct efhw_nic* nic_, int budget) +static int oo_handle_wakeup_int_driven( + void* context, int is_timeout, struct efhw_nic* nic_, int budget) { struct tcp_helper_nic* tcph_nic = context; tcp_helper_resource_t* trs; ci_netif* ni; int n = 0; - trs = CI_CONTAINER(tcp_helper_resource_t, nic[tcph_nic->thn_intf_i], - tcph_nic); + trs = + CI_CONTAINER(tcp_helper_resource_t, nic[tcph_nic->thn_intf_i], tcph_nic); ni = &trs->netif; if( trs->trs_aflags & OO_THR_AFLAG_POLL_AND_PRIME ) { @@ -7089,13 +6976,13 @@ static int oo_handle_wakeup_int_driven(void* context, int is_timeout, * Workqueue will handle new events if any and will prime if needed. */ ci_bit_set(&ni->state->evq_prime_deferred, tcph_nic->thn_intf_i); if( trs->trs_aflags & OO_THR_AFLAG_POLL_AND_PRIME || - ! ci_bit_test_and_clear(&ni->state->evq_prime_deferred, - tcph_nic->thn_intf_i) ) + ! ci_bit_test_and_clear( + &ni->state->evq_prime_deferred, tcph_nic->thn_intf_i) ) return 0; /* otherwise continue as though POLL_AND_PRIME wasn't initially set */ } - ci_assert( ! is_timeout ); + ci_assert(! is_timeout); TCP_HELPER_RESOURCE_ASSERT_VALID(trs, -1); CITP_STATS_NETIF_INC(ni, interrupts); @@ -7106,10 +6993,11 @@ static int oo_handle_wakeup_int_driven(void* context, int is_timeout, if( ci_netif_intf_has_event(ni, tcph_nic->thn_intf_i) ) { if( efab_tcp_helper_netif_try_lock(trs, 1) ) { CITP_STATS_NETIF(++ni->state->stats.interrupt_polls); - ci_assert( ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT); + ci_assert(ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT); if( ni->flags & CI_NETIF_FLAGS_AVOID_ATOMIC || budget <= 0 ) { - /* Steal the locks and exit: don't attempt AF_XDP in atomic context */ + /* Steal the locks and exit: don't attempt AF_XDP in atomic context + */ ci_bit_set(&ni->state->evq_prime_deferred, tcph_nic->thn_intf_i); tcp_helper_defer_dl2work(trs, OO_THR_AFLAG_POLL_AND_PRIME); return 0; @@ -7144,20 +7032,18 @@ static int oo_handle_wakeup_int_driven(void* context, int is_timeout, tcp_helper_request_wakeup_nic(trs, tcph_nic->thn_intf_i); efab_tcp_helper_netif_unlock(trs, 1); break; - } - else { + } else { CITP_STATS_NETIF_INC(ni, interrupt_lock_contends); /* Drop through to set lock flags or try again... */ } - } - else { + } else { CITP_STATS_NETIF_INC(ni, interrupt_no_events); /* Requesting wakeup is tricky here. Don't want to take the * lock if avoidable as results in user-level seeing lock * contention, but need an accurate value of the evq_ptr to * write to request the wakeup. - * + * * First attempt to set the flags to request lock holder request * wakeup. If this fails, then lock is not held, so evq_ptr is * likely consistent. In case where we get it wrong it will @@ -7165,31 +7051,27 @@ static int oo_handle_wakeup_int_driven(void* context, int is_timeout, * into the feedback loop of repeated wakeups seen in bug42745. */ ci_bit_set(&ni->state->evq_prime_deferred, tcph_nic->thn_intf_i); - if( ef_eplock_set_flag_if_locked(&ni->state->lock, - CI_EPLOCK_NETIF_NEED_PRIME) ) { + if( ef_eplock_set_flag_if_locked( + &ni->state->lock, CI_EPLOCK_NETIF_NEED_PRIME) ) { break; - } - else if( oo_trusted_lock_set_flags_if_locked - (trs, OO_TRUSTED_LOCK_NEED_PRIME) ) { + } else if( oo_trusted_lock_set_flags_if_locked( + trs, OO_TRUSTED_LOCK_NEED_PRIME) ) { break; } - if( ! ci_bit_test_and_clear(&ni->state->evq_prime_deferred, - tcph_nic->thn_intf_i) ) + if( ! ci_bit_test_and_clear( + &ni->state->evq_prime_deferred, tcph_nic->thn_intf_i) ) break; - if( tcp_helper_request_wakeup_nic_from_wakeup(trs, - tcph_nic->thn_intf_i) == 0 ) + if( tcp_helper_request_wakeup_nic_from_wakeup( + trs, tcph_nic->thn_intf_i) == 0 ) break; } ci_bit_set(&ni->state->evq_prime_deferred, tcph_nic->thn_intf_i); if( ef_eplock_set_flags_if_locked(&ni->state->lock, - CI_EPLOCK_NETIF_NEED_POLL | - CI_EPLOCK_NETIF_NEED_PRIME) ) { + CI_EPLOCK_NETIF_NEED_POLL | CI_EPLOCK_NETIF_NEED_PRIME) ) { break; - } - else if( oo_trusted_lock_set_flags_if_locked(trs, - OO_TRUSTED_LOCK_NEED_POLL | - OO_TRUSTED_LOCK_NEED_PRIME) ) { + } else if( oo_trusted_lock_set_flags_if_locked(trs, + OO_TRUSTED_LOCK_NEED_POLL | OO_TRUSTED_LOCK_NEED_PRIME) ) { break; } } @@ -7200,16 +7082,15 @@ static int oo_handle_wakeup_int_driven(void* context, int is_timeout, /*-------------------------------------------------------------------- *! - * TCP helper timer implementation + * TCP helper timer implementation * *--------------------------------------------------------------------*/ #if ! CI_CFG_UL_INTERRUPT_HELPER -static void -linux_set_periodic_timer_restart(tcp_helper_resource_t* rs, - unsigned long timeout) +static void linux_set_periodic_timer_restart( + tcp_helper_resource_t* rs, unsigned long timeout) { - if (atomic_read(&rs->timer_running) == 0) + if( atomic_read(&rs->timer_running) == 0 ) return; /* The timeout is calculated from IP ticks, which are ususally smaller @@ -7226,8 +7107,7 @@ linux_set_periodic_timer_restart(tcp_helper_resource_t* rs, /* Find the delay before the next IP timer will fire, in jiffies. * Can be unreliable if the stack is not locked. */ -static unsigned long -tcp_helper_next_ip_timer(ci_netif* ni) +static unsigned long tcp_helper_next_ip_timer(ci_netif* ni) { unsigned long delay = periodic_poll; ci_ip_timer_state* ipts = IPTIMER_STATE(ni); @@ -7236,14 +7116,14 @@ tcp_helper_next_ip_timer(ci_netif* ni) /* 1 tick is roughly equal to 1ms; we do not care about delay > 1s. * Non-positive delta probably means that something is going on under * our feet, so we ignore it. */ - if( ticks_delay > 1000 || ticks_delay < 1) + if( ticks_delay > 1000 || ticks_delay < 1 ) return delay; /* We have the next IP timer closer than 1s. Let's find the time * in jiffies. */ delay = usecs_to_jiffies( - (ci_uint64)ticks_delay << - (ipts->ci_ip_time_frc2tick - ipts->ci_ip_time_frc2us)); + (ci_uint64) ticks_delay + << (ipts->ci_ip_time_frc2tick - ipts->ci_ip_time_frc2us)); /* The calculations above are imprecise. Imprecision is OK, as long * as the periodic timer fires **after** the IP timer should be * run, otherwise the IP timer subsystem refuses to run the IP @@ -7252,8 +7132,7 @@ tcp_helper_next_ip_timer(ci_netif* ni) return delay + 1; } -void -tcp_helper_request_timer(tcp_helper_resource_t* trs) +void tcp_helper_request_timer(tcp_helper_resource_t* trs) { ci_netif* ni = &trs->netif; unsigned long timer_delay; @@ -7262,7 +7141,7 @@ tcp_helper_request_timer(tcp_helper_resource_t* trs) /* If the current periodic timer expiration is too far from * jiffies + timer_delay, then we want to re-schedule the timer. */ if( TIME_GT(trs->timer.timer.expires, - jiffies + timer_delay + periodic_poll_skew) ) { + jiffies + timer_delay + periodic_poll_skew) ) { /* re-schedule the periodic timer to match the delay */ /* RHEL6 fixme: * We'd better run mod_delayed_work(), but it exists for @@ -7273,14 +7152,13 @@ tcp_helper_request_timer(tcp_helper_resource_t* trs) } } -static void -ci_netif_collect_periodic_metrics(ci_netif* ni) +static void ci_netif_collect_periodic_metrics(ci_netif* ni) { /* We have no lock here however, we are the only modifier * of lowest_free_pkts count */ uint32_t free_pkts = - ((ni->pkt_sets_max - ni->pkt_sets_n) << CI_CFG_PKTS_PER_SET_S) + - ni->packets->n_free; + ((ni->pkt_sets_max - ni->pkt_sets_n) << CI_CFG_PKTS_PER_SET_S) + + ni->packets->n_free; if( free_pkts <= 0 ) free_pkts = 1; /* cannot allow it to be set to 0 */ @@ -7290,8 +7168,8 @@ ci_netif_collect_periodic_metrics(ci_netif* ni) ni->state->stats.lowest_free_pkts = free_pkts; } -static void -linux_tcp_timer_do(tcp_helper_resource_t* rs, unsigned long* next_timer) +static void linux_tcp_timer_do( + tcp_helper_resource_t* rs, unsigned long* next_timer) { ci_netif* ni = &rs->netif; ci_uint64 now_frc; @@ -7310,7 +7188,6 @@ linux_tcp_timer_do(tcp_helper_resource_t* rs, unsigned long* next_timer) if( now_frc - ni->state->evq_last_prime > ni->state->timer_prime_cycles * 5 ) { if( efab_tcp_helper_netif_try_lock(rs, 0) ) { - rc = ci_netif_poll(ni); oo_inject_packets_kernel_force(ni); *next_timer = tcp_helper_next_ip_timer(ni); @@ -7318,23 +7195,21 @@ linux_tcp_timer_do(tcp_helper_resource_t* rs, unsigned long* next_timer) CITP_STATS_NETIF_INC(ni, periodic_polls); if( rc > 0 ) CITP_STATS_NETIF_ADD(ni, periodic_evs, rc); - } - else { + } else { CITP_STATS_NETIF_INC(ni, periodic_lock_contends); } ci_netif_collect_periodic_metrics(ni); } } -static void -linux_tcp_helper_periodic_timer(struct work_struct *work) +static void linux_tcp_helper_periodic_timer(struct work_struct* work) { tcp_helper_resource_t* rs = container_of(work, tcp_helper_resource_t, - timer + timer #ifndef EFX_NEED_WORK_API_WRAPPERS - .work + .work #endif - ); + ); unsigned long next_timer = periodic_poll; ci_assert(NULL != rs); @@ -7345,8 +7220,8 @@ linux_tcp_helper_periodic_timer(struct work_struct *work) linux_set_periodic_timer_restart(rs, next_timer); } -static void -tcp_helper_initialize_and_start_periodic_timer(tcp_helper_resource_t* rs) +static void tcp_helper_initialize_and_start_periodic_timer( + tcp_helper_resource_t* rs) { atomic_set(&rs->timer_running, 1); @@ -7359,8 +7234,7 @@ tcp_helper_initialize_and_start_periodic_timer(tcp_helper_resource_t* rs) /* This function is used when stopping a stack, and also on error paths when * creating a stack fails. The workqueue and the purge_txq_work work item * must be initialised, but the periodic timer need not be initialised. */ -static void -tcp_helper_stop_periodic_work(tcp_helper_resource_t* rs) +static void tcp_helper_stop_periodic_work(tcp_helper_resource_t* rs) { ci_irqlock_state_t lock_flags; int timer_was_running = atomic_read(&rs->timer_running); @@ -7398,9 +7272,8 @@ tcp_helper_stop_periodic_work(tcp_helper_resource_t* rs) * *--------------------------------------------------------------------*/ -static void -efab_tcp_helper_drop_os_socket(tcp_helper_resource_t* trs, - tcp_helper_endpoint_t* ep) +static void efab_tcp_helper_drop_os_socket( + tcp_helper_resource_t* trs, tcp_helper_endpoint_t* ep) { unsigned long lock_flags; struct file* os_socket; @@ -7421,9 +7294,8 @@ efab_tcp_helper_drop_os_socket(tcp_helper_resource_t* trs, * (2) tcp_helper_endpoint_clear_filters() will postpone hw filter removal. * See (1) for the result. */ -void -efab_tcp_helper_close_endpoint(tcp_helper_resource_t* trs, oo_sp ep_id, - int already_locked) +void efab_tcp_helper_close_endpoint( + tcp_helper_resource_t* trs, oo_sp ep_id, int already_locked) { ci_netif* ni; tcp_helper_endpoint_t* tep_p; @@ -7437,12 +7309,12 @@ efab_tcp_helper_close_endpoint(tcp_helper_resource_t* trs, oo_sp ep_id, w = SP_TO_WAITABLE(ni, ep_id); wo = SP_TO_WAITABLE_OBJ(&trs->netif, tep_p->id); - OO_DEBUG_TCPH(ci_log("%s: [%d:%d] ref "OO_THR_REF_FMT" %s", __FUNCTION__, - trs->id, OO_SP_FMT(ep_id), OO_THR_REF_ARG(trs->ref), - ci_tcp_state_str(wo->waitable.state))); + OO_DEBUG_TCPH(ci_log("%s: [%d:%d] ref " OO_THR_REF_FMT " %s", __FUNCTION__, + trs->id, OO_SP_FMT(ep_id), OO_THR_REF_ARG(trs->ref), + ci_tcp_state_str(wo->waitable.state))); ci_assert_impl(already_locked, ci_netif_is_locked(ni)); - ci_assert(!(w->sb_aflags & CI_SB_AFLAG_ORPHAN)); + ci_assert(! (w->sb_aflags & CI_SB_AFLAG_ORPHAN)); ci_assert(! in_atomic()); /* Drop ref to the OS socket. Won't necessarily be the last reference to it; @@ -7450,7 +7322,7 @@ efab_tcp_helper_close_endpoint(tcp_helper_resource_t* trs, oo_sp ep_id, * processes. This needs to be done here rather since fput can block. */ if( tep_p->os_socket != NULL ) { - ci_assert( !(w->sb_flags & CI_SB_FLAG_MOVED) ); + ci_assert(! (w->sb_flags & CI_SB_FLAG_MOVED)); /* Shutdown() the os_socket. This needs to be done in a blocking * context. @@ -7459,7 +7331,6 @@ efab_tcp_helper_close_endpoint(tcp_helper_resource_t* trs, oo_sp ep_id, efab_tcp_helper_shutdown_os_sock(tep_p, SHUT_RDWR); efab_tcp_helper_drop_os_socket(trs, tep_p); - } #if ! CI_CFG_UL_INTERRUPT_HELPER @@ -7473,12 +7344,11 @@ efab_tcp_helper_close_endpoint(tcp_helper_resource_t* trs, oo_sp ep_id, * (B) When handing socket over, we get here with the stack locked from * the UL, so ci_netif_lock() results in deadlock. */ - if( ! (current->flags & PF_EXITING) && - (w->state & CI_TCP_STATE_TCP) && - w->state != CI_TCP_LISTEN && w->state != CI_TCP_CLOSED && + if( ! (current->flags & PF_EXITING) && (w->state & CI_TCP_STATE_TCP) && + w->state != CI_TCP_LISTEN && w->state != CI_TCP_CLOSED && (wo->sock.s_flags & CI_SOCK_FLAG_LINGER) && wo->sock.so.linger != 0 && ci_netif_lock(&trs->netif) == 0 ) { - ci_assert( !already_locked ); + ci_assert(! already_locked); __ci_tcp_shutdown(&trs->netif, &wo->tcp, SHUT_WR); ci_tcp_linger(&trs->netif, &wo->tcp); /* ci_tcp_linger exits unlocked */ @@ -7491,17 +7361,17 @@ efab_tcp_helper_close_endpoint(tcp_helper_resource_t* trs, oo_sp ep_id, #endif /*! Add ep to the list in tcp_helper_resource_t for closing - * - we don't increment the ref count - as we need it to reach 0 when - * the application exits i.e. crashes (even if its holding the netif lock) - */ + * - we don't increment the ref count - as we need it to reach 0 when + * the application exits i.e. crashes (even if its holding the netif lock) + */ ci_irqlock_lock(&trs->lock, &lock_flags); #if ! CI_CFG_UL_INTERRUPT_HELPER if( ! ci_sllink_busy(&tep_p->tobe_closed) ) ci_sllist_push(&trs->ep_tobe_closed, &tep_p->tobe_closed); else { ci_irqlock_unlock(&trs->lock, &lock_flags); - ci_log("%s: [%d:%d] is already closing", __FUNCTION__, - trs->id, OO_SP_FMT(ep_id)); + ci_log("%s: [%d:%d] is already closing", __FUNCTION__, trs->id, + OO_SP_FMT(ep_id)); return; } #else @@ -7523,18 +7393,16 @@ efab_tcp_helper_close_endpoint(tcp_helper_resource_t* trs, oo_sp ep_id, if( (already_locked && (~trs->netif.flags & CI_NETIF_FLAG_IN_DL_CONTEXT)) || efab_tcp_helper_netif_lock_or_set_flags(trs, - OO_TRUSTED_LOCK_CLOSE_ENDPOINT, - CI_EPLOCK_NETIF_CLOSE_ENDPOINT, - 0) ) { - OO_DEBUG_TCPH(ci_log("%s: [%d:%d] closing now", - __FUNCTION__, trs->id, OO_SP_FMT(ep_id))); + OO_TRUSTED_LOCK_CLOSE_ENDPOINT, CI_EPLOCK_NETIF_CLOSE_ENDPOINT, + 0) ) { + OO_DEBUG_TCPH(ci_log( + "%s: [%d:%d] closing now", __FUNCTION__, trs->id, OO_SP_FMT(ep_id))); tcp_helper_close_pending_endpoints(trs); - if( !already_locked ) + if( ! already_locked ) efab_tcp_helper_netif_unlock(trs, 0); - } - else { + } else { OO_DEBUG_TCPH(ci_log("%s: [%d:%d] closing deferred to lock holder", - __FUNCTION__, trs->id, OO_SP_FMT(ep_id))); + __FUNCTION__, trs->id, OO_SP_FMT(ep_id))); } #else ci_atomic_or(&trs->netif.state->action_flags, OO_ACTION_CLOSE_EP); @@ -7562,12 +7430,12 @@ void generic_tcp_helper_close(ci_private_t* priv) wo = SP_TO_WAITABLE_OBJ(&trs->netif, ep->id); #endif - if (ep->fasync_queue) { + if( ep->fasync_queue ) { OO_DEBUG_SHM(ci_log("generic_tcp_helper_close removing fasync helper")); linux_tcp_helper_fop_fasync(-1, priv->_filp, 0); } - if( priv->fd_flags & OO_FDFLAG_EP_ALIEN ) + if( priv->fd_flags & OO_FDFLAG_EP_ALIEN ) fput(priv->_filp); #if CI_CFG_FD_CACHING @@ -7599,16 +7467,15 @@ void generic_tcp_helper_close(ci_private_t* priv) * cope properly with this, just logging that it occurred. */ if( (priv->fd_flags & OO_FDFLAG_EP_TCP) && - (wo->waitable.sb_aflags & CI_SB_AFLAG_IN_CACHE) ) { + (wo->waitable.sb_aflags & CI_SB_AFLAG_IN_CACHE) ) { /* Clear file_ptr before NO_FD flag to ensure correct behaviour of * efab_tcp_helper_detach_file */ ep->file_ptr = NULL; ci_wmb(); ci_atomic32_or(&wo->waitable.sb_aflags, CI_SB_AFLAG_IN_CACHE_NO_FD); LOG_EP(ci_log("%s: %d:%d fd close while cached - not freeing endpoint", - __FUNCTION__, ep->thr->id, OO_SP_FMT(ep->id))); - } - else + __FUNCTION__, ep->thr->id, OO_SP_FMT(ep->id))); + } else #endif { ep->file_ptr = NULL; @@ -7621,8 +7488,7 @@ void generic_tcp_helper_close(ci_private_t* priv) * CI_RESOURCE_OPs. */ -int -efab_attach_os_socket(tcp_helper_endpoint_t* ep, struct file* os_file) +int efab_attach_os_socket(tcp_helper_endpoint_t* ep, struct file* os_file) { struct file* old_os_socket; struct file* new_os_socket; @@ -7635,15 +7501,14 @@ efab_attach_os_socket(tcp_helper_endpoint_t* ep, struct file* os_file) new_os_socket = os_file; /* Check that this os_socket is really a socket. */ - if( !S_ISSOCK(os_file->f_path.dentry->d_inode->i_mode) || - SOCKET_I(os_file->f_path.dentry->d_inode)->file != os_file) { + if( ! S_ISSOCK(os_file->f_path.dentry->d_inode->i_mode) || + SOCKET_I(os_file->f_path.dentry->d_inode)->file != os_file ) { fput(new_os_socket); - OO_DEBUG_ERR(ci_log("%s: %d:%d os_file=%p is not a socket", - __FUNCTION__, ep->thr->id, OO_SP_FMT(ep->id), - os_file)); + OO_DEBUG_ERR(ci_log("%s: %d:%d os_file=%p is not a socket", __FUNCTION__, + ep->thr->id, OO_SP_FMT(ep->id), os_file)); return -EBUSY; } - + spin_lock_irqsave(&ep->lock, lock_flags); old_os_socket = oo_file_xchg(&ep->os_socket, new_os_socket); new_os_socket = get_file(new_os_socket); @@ -7662,17 +7527,16 @@ efab_attach_os_socket(tcp_helper_endpoint_t* ep, struct file* os_file) } -int -__efab_create_os_socket(tcp_helper_resource_t* trs, tcp_helper_endpoint_t* ep, - struct file* os_file, ci_int32 domain) +int __efab_create_os_socket(tcp_helper_resource_t* trs, + tcp_helper_endpoint_t* ep, struct file* os_file, ci_int32 domain) { int rc; citp_waitable_obj* wo; rc = efab_attach_os_socket(ep, os_file); if( rc < 0 ) { - LOG_E(ci_log("%s: ERROR: efab_attach_os_socket failed (%d)", - __FUNCTION__, rc)); + LOG_E(ci_log( + "%s: ERROR: efab_attach_os_socket failed (%d)", __FUNCTION__, rc)); /* NB. efab_attach_os_socket() consumes [os_file] even on error. */ return rc; } @@ -7680,7 +7544,7 @@ __efab_create_os_socket(tcp_helper_resource_t* trs, tcp_helper_endpoint_t* ep, wo = SP_TO_WAITABLE_OBJ(&trs->netif, ep->id); wo->sock.domain = domain; wo->sock.uuid = ci_from_kuid_munged(tcp_helper_get_user_ns(trs), - __kuid_val(ep->os_socket->f_path.dentry->d_inode->i_uid)); + __kuid_val(ep->os_socket->f_path.dentry->d_inode->i_uid)); /* Advertise the existence of the backing socket to user-level. */ ci_atomic32_or(&wo->waitable.sb_aflags, CI_SB_AFLAG_OS_BACKED); @@ -7689,24 +7553,23 @@ __efab_create_os_socket(tcp_helper_resource_t* trs, tcp_helper_endpoint_t* ep, } -int -efab_create_os_socket(tcp_helper_resource_t* trs, tcp_helper_endpoint_t* ep, - ci_int32 domain, ci_int32 type, int flags) +int efab_create_os_socket(tcp_helper_resource_t* trs, + tcp_helper_endpoint_t* ep, ci_int32 domain, ci_int32 type, int flags) { int rc; - struct socket *sock; - struct file *os_file; + struct socket* sock; + struct file* os_file; rc = sock_create(domain, type, 0, &sock); if( rc < 0 ) { LOG_E(ci_log("%s: ERROR: pid %d: sock_create(%d, %d, 0) failed (%d)", - __FUNCTION__, current->pid, domain, type, rc)); + __FUNCTION__, current->pid, domain, type, rc)); return rc; } os_file = sock_alloc_file(sock, flags, NULL); if( IS_ERR(os_file) ) { - LOG_E(ci_log("%s: ERROR: sock_alloc_file failed (%ld)", - __FUNCTION__, PTR_ERR(os_file))); + LOG_E(ci_log("%s: ERROR: sock_alloc_file failed (%ld)", __FUNCTION__, + PTR_ERR(os_file))); /* sock_alloc_file() releases the socket in case of failure */ return PTR_ERR(os_file); } @@ -7721,8 +7584,8 @@ efab_create_os_socket(tcp_helper_resource_t* trs, tcp_helper_endpoint_t* ep, ***************** Wakeups, callbacks, signals, events. **************** **********************************************************************/ -void tcp_helper_endpoint_wakeup(tcp_helper_resource_t* thr, - tcp_helper_endpoint_t* ep) +void tcp_helper_endpoint_wakeup( + tcp_helper_resource_t* thr, tcp_helper_endpoint_t* ep) { citp_waitable* w = SP_TO_WAITABLE(&thr->netif, ep->id); int wq_active; @@ -7740,7 +7603,7 @@ void tcp_helper_endpoint_wakeup(tcp_helper_resource_t* thr, /* Check to see if application has requested ASYNC notification */ if( ep->fasync_queue ) { LOG_TV(ci_log(NWS_FMT "async notification sigown=%d", - NWS_PRI_ARGS(&thr->netif, w), w->sigown)); + NWS_PRI_ARGS(&thr->netif, w), w->sigown)); kill_fasync(&ep->fasync_queue, SIGIO, POLL_IN); CITP_STATS_NETIF_INC(&thr->netif, sock_wakes_signal); if( w->sigown ) @@ -7751,8 +7614,7 @@ void tcp_helper_endpoint_wakeup(tcp_helper_resource_t* thr, #if CI_CFG_EPOLL3 -static void -get_os_ready_list(tcp_helper_resource_t* thr, int ready_list) +static void get_os_ready_list(tcp_helper_resource_t* thr, int ready_list) { ci_netif* ni = &thr->netif; tcp_helper_endpoint_t* ep; @@ -7766,8 +7628,8 @@ get_os_ready_list(tcp_helper_resource_t* thr, int ready_list) ci_sb_epoll_state* epoll; lnk = ci_dllist_head(&thr->os_ready_lists[ready_list]); - ep = CI_CONTAINER(tcp_helper_endpoint_t, - epoll[ready_list].os_ready_link, lnk); + ep = CI_CONTAINER( + tcp_helper_endpoint_t, epoll[ready_list].os_ready_link, lnk); ci_dllist_remove_safe(&ep->epoll[ready_list].os_ready_link); w = SP_TO_WAITABLE(ni, ep->id); @@ -7781,8 +7643,7 @@ get_os_ready_list(tcp_helper_resource_t* thr, int ready_list) ready_link = ci_sb_epoll_ready_link(ni, epoll, ready_list); oo_p_dllink_del(ni, ready_link); oo_p_dllink_add_tail(ni, - oo_p_dllink_ptr(ni, &ni->state->ready_lists[ready_list]), - ready_link); + oo_p_dllink_ptr(ni, &ni->state->ready_lists[ready_list]), ready_link); } spin_unlock_irqrestore(&thr->os_ready_list_lock, lock_flags); } @@ -7790,28 +7651,27 @@ get_os_ready_list(tcp_helper_resource_t* thr, int ready_list) #if ! CI_CFG_UL_INTERRUPT_HELPER -static void -wakeup_post_poll_list(tcp_helper_resource_t* thr) +static void wakeup_post_poll_list(tcp_helper_resource_t* thr) { ci_netif* ni = &thr->netif; tcp_helper_endpoint_t* ep; int n = ni->ep_tbl_n; struct oo_p_dllink_state post_poll_list = - oo_p_dllink_ptr(ni, &ni->state->post_poll_list); + oo_p_dllink_ptr(ni, &ni->state->post_poll_list); citp_waitable* w; #if CI_CFG_EPOLL3 int tmp; #endif LOG_TV(if( oo_p_dllink_is_empty(ni, post_poll_list) ) - ci_log("netif_lock_callback: need_wake but empty")); + ci_log("netif_lock_callback: need_wake but empty")); /* [n] ensures the loop will terminate in reasonable time no matter how ** badly u/l behaves. */ while( n-- > 0 && ! oo_p_dllink_is_empty(ni, post_poll_list) ) { struct oo_p_dllink_state lnk = - oo_p_dllink_statep(ni, post_poll_list.l->next); + oo_p_dllink_statep(ni, post_poll_list.l->next); oo_p_dllink_del_init(ni, lnk); w = CI_CONTAINER(citp_waitable, post_poll_link, lnk.l); @@ -7820,29 +7680,28 @@ wakeup_post_poll_list(tcp_helper_resource_t* thr) } #if CI_CFG_EPOLL3 - CI_READY_LIST_EACH(ni->state->ready_lists_in_use, tmp, n) { + CI_READY_LIST_EACH(ni->state->ready_lists_in_use, tmp, n) + { get_os_ready_list(thr, n); - if( ! oo_p_dllink_is_empty(ni, oo_p_dllink_ptr(ni, - &ni->state->ready_lists[n])) ) + if( ! oo_p_dllink_is_empty( + ni, oo_p_dllink_ptr(ni, &ni->state->ready_lists[n])) ) efab_tcp_helper_ready_list_wakeup(thr, n); } #endif } #endif -static inline void -tcp_helper_unlock_prime(tcp_helper_resource_t* thr) +static inline void tcp_helper_unlock_prime(tcp_helper_resource_t* thr) { - ci_netif *ni = &thr->netif; + ci_netif* ni = &thr->netif; int intf_i; CITP_STATS_NETIF_INC(ni, unlock_slow_need_prime); if( NI_OPTS(ni).int_driven ) { OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - if( ci_bit_test_and_clear(&ni->state->evq_prime_deferred, intf_i) ) - tcp_helper_request_wakeup_nic(thr, intf_i); - } - else { + if( ci_bit_test_and_clear(&ni->state->evq_prime_deferred, intf_i) ) + tcp_helper_request_wakeup_nic(thr, intf_i); + } else { tcp_helper_request_wakeup(thr); } } @@ -7868,43 +7727,43 @@ tcp_helper_unlock_prime(tcp_helper_resource_t* thr) * *--------------------------------------------------------------------*/ -ci_uint64 -efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, - int in_dl_context) +ci_uint64 efab_tcp_helper_netif_lock_callback( + eplock_helper_t* epl, ci_uint64 lock_val, int in_dl_context) { - tcp_helper_resource_t* thr = CI_CONTAINER(tcp_helper_resource_t, - netif.eplock_helper, epl); - const ci_uint64 all_after_unlock_flags = (CI_EPLOCK_NETIF_NEED_PRIME | - CI_EPLOCK_NETIF_PKT_WAKE); + tcp_helper_resource_t* thr = + CI_CONTAINER(tcp_helper_resource_t, netif.eplock_helper, epl); + const ci_uint64 all_after_unlock_flags = + (CI_EPLOCK_NETIF_NEED_PRIME | CI_EPLOCK_NETIF_PKT_WAKE); ci_uint64 all_handled_flags = - CI_EPLOCK_NETIF_UNLOCK_FLAGS | - CI_EPLOCK_NETIF_SOCKET_LIST | + CI_EPLOCK_NETIF_UNLOCK_FLAGS | CI_EPLOCK_NETIF_SOCKET_LIST | CI_EPLOCK_NETIF_UL_COMMON_MASK; /* adds PKT_WAITER | DEFERRED_PKTS */ ci_netif* ni = &thr->netif; ci_uint64 flags_set; ci_uint64 after_unlock_flags = 0; ci_uint64 defer_flags = 0; - int/*bool*/ pkt_waiter_retried = 0; + int /*bool*/ pkt_waiter_retried = 0; bool orphaned; ci_assert(ci_netif_is_locked(ni)); - /* from dl context we can only run a subset of work, for other work we need to defer. - * defer_flags contains all work items that will cause us to defer to non-atomic */ + /* from dl context we can only run a subset of work, for other work we need + * to defer. defer_flags contains all work items that will cause us to defer + * to non-atomic */ if( in_dl_context ) { defer_flags = CI_EPLOCK_NETIF_DL_CONTEXT_DEFER_MASK; - if(! oo_avoid_wakeup_from_dl() ) - defer_flags &=~ CI_EPLOCK_NETIF_NEED_WAKE; + if( ! oo_avoid_wakeup_from_dl() ) + defer_flags &= ~CI_EPLOCK_NETIF_NEED_WAKE; } do { if( in_dl_context ) ni->flags |= CI_NETIF_FLAG_IN_DL_CONTEXT; - again: + again: /* We expect this to sort out CI_EPLOCK_NETIF_UL_COMMON flags and clear - * all_handled_flags. Note the flags might have re-emerged. */ - lock_val = ci_netif_unlock_slow_common(ni, lock_val, all_handled_flags & ~defer_flags); + * all_handled_flags. Note the flags might have re-emerged. */ + lock_val = ci_netif_unlock_slow_common( + ni, lock_val, all_handled_flags & ~defer_flags); orphaned = (thr->ref[OO_THR_REF_APP] == 0); @@ -7912,7 +7771,8 @@ efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, ** e.g. we tried to unlock the eplock (bottom of loop) but found ** someone had tried to lock it and therefore set the "need wake" bit. */ - flags_set = lock_val & CI_EPLOCK_NETIF_UNLOCK_FLAGS &~ CI_EPLOCK_NETIF_UL_COMMON_MASK; + flags_set = lock_val & CI_EPLOCK_NETIF_UNLOCK_FLAGS & + ~CI_EPLOCK_NETIF_UL_COMMON_MASK; after_unlock_flags |= flags_set & all_after_unlock_flags; /* All code between here and the bottom of the loop should use @@ -7928,14 +7788,14 @@ efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, if( flags_set & CI_EPLOCK_NETIF_CLOSE_ENDPOINT ) { /* prevent flag ping pong */ ef_eplock_clear_flags(&ni->state->lock, CI_EPLOCK_NETIF_CLOSE_ENDPOINT); - flags_set &=~ CI_EPLOCK_NETIF_CLOSE_ENDPOINT; - if( oo_trusted_lock_lock_and_set_flags(thr, - OO_TRUSTED_LOCK_CLOSE_ENDPOINT) ) { + flags_set &= ~CI_EPLOCK_NETIF_CLOSE_ENDPOINT; + if( oo_trusted_lock_lock_and_set_flags( + thr, OO_TRUSTED_LOCK_CLOSE_ENDPOINT) ) { /* We've got both locks. If in non-atomic context, do the work, * else defer work and locks to workitem. */ - OO_DEBUG_TCPH(ci_log("%s: [%u] CLOSE_ENDPOINT now", - __FUNCTION__, thr->id)); + OO_DEBUG_TCPH( + ci_log("%s: [%u] CLOSE_ENDPOINT now", __FUNCTION__, thr->id)); /* set flags in case we get deferred */ ef_eplock_holder_set_flags(&ni->state->lock, flags_set); @@ -7944,16 +7804,15 @@ efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, if( in_dl_context ) ni->flags &= ~CI_NETIF_FLAG_IN_DL_CONTEXT; /* unlock will take care of CLOSE_ENDPOINT work */ - if( oo_trusted_lock_drop(thr, in_dl_context, 1/*has_shared=1*/) ) + if( oo_trusted_lock_drop(thr, in_dl_context, 1 /*has_shared=1*/) ) return 0; /* unlock has been deferred with both locks */ CITP_STATS_NETIF(++ni->state->stats.unlock_slow_close); /* best to go around to refresh flags_set */ continue; - } - else { + } else { /* Trusted lock holder now responsible for non-atomic work. */ OO_DEBUG_TCPH(ci_log("%s: [%u] defer CLOSE_ENDPOINT to trusted lock", - __FUNCTION__, thr->id)); + __FUNCTION__, thr->id)); } } #endif @@ -7967,7 +7826,7 @@ efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, /* We cannot finish this work in DL context, let's defer to work item * immediately without any partial work */ OO_DEBUG_TCPH(ci_log("%s: [%u] defer to workitem, flags %llx", - __FUNCTION__, thr->id, lock_val)); + __FUNCTION__, thr->id, lock_val)); /* set flags so work item can pick up the work */ ef_eplock_holder_set_flags(&ni->state->lock, flags_set); tcp_helper_defer_dl2work(thr, OO_THR_AFLAG_UNLOCK_UNTRUSTED); @@ -7978,15 +7837,15 @@ efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, if( flags_set & CI_EPLOCK_NETIF_SWF_UPDATE ) { oof_cb_sw_filter_apply(ni); CITP_STATS_NETIF(++ni->state->stats.unlock_slow_swf_update); - flags_set &=~ CI_EPLOCK_NETIF_SWF_UPDATE; + flags_set &= ~CI_EPLOCK_NETIF_SWF_UPDATE; } if( flags_set & CI_EPLOCK_NETIF_NEED_WAKE ) { - OO_DEBUG_TCPH(ci_log("%s: [%u] wake up endpoints", - __FUNCTION__, thr->id)); + OO_DEBUG_TCPH( + ci_log("%s: [%u] wake up endpoints", __FUNCTION__, thr->id)); wakeup_post_poll_list(thr); CITP_STATS_NETIF(++ni->state->stats.unlock_slow_wake); - flags_set &=~ CI_EPLOCK_NETIF_NEED_WAKE; + flags_set &= ~CI_EPLOCK_NETIF_NEED_WAKE; } /* Monitor the number of free packets: pretend that @@ -7994,22 +7853,21 @@ efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, ** short of packets. */ if( (flags_set & CI_EPLOCK_NETIF_NEED_PKT_SET) || - (!orphaned && oo_want_proactive_packet_allocation(ni)) ) { - OO_DEBUG_TCPH(ci_log("%s: [%u] NEED_PKT_SET now", - __FUNCTION__, thr->id)); + (! orphaned && oo_want_proactive_packet_allocation(ni)) ) { + OO_DEBUG_TCPH( + ci_log("%s: [%u] NEED_PKT_SET now", __FUNCTION__, thr->id)); efab_tcp_helper_more_bufs(thr); - flags_set &=~ CI_EPLOCK_NETIF_NEED_PKT_SET; + flags_set &= ~CI_EPLOCK_NETIF_NEED_PKT_SET; } /* Monitor the number of socket buffers. */ if( (flags_set & CI_EPLOCK_NETIF_NEED_SOCK_BUFS) || - (!orphaned && oo_want_proactive_socket_allocation(ni)) ) { - OO_DEBUG_TCPH(ci_log("%s: [%u] NEED_SOCK_BUFS now", - __FUNCTION__, thr->id)); + (! orphaned && oo_want_proactive_socket_allocation(ni)) ) { + OO_DEBUG_TCPH( + ci_log("%s: [%u] NEED_SOCK_BUFS now", __FUNCTION__, thr->id)); efab_tcp_helper_more_socks(thr); - flags_set &=~ CI_EPLOCK_NETIF_NEED_SOCK_BUFS; - + flags_set &= ~CI_EPLOCK_NETIF_NEED_SOCK_BUFS; } #endif @@ -8022,8 +7880,7 @@ efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, if( flags_set & CI_EPLOCK_NETIF_KERNEL_PACKETS ) { OO_DEBUG_TCPH(ci_log("%s: [%u] forward %u packets to kernel", - __FUNCTION__, thr->id, - kernel_packets_pending(ni->state))); + __FUNCTION__, thr->id, kernel_packets_pending(ni->state))); oo_inject_packets_kernel(thr, 0); flags_set &= ~CI_EPLOCK_NETIF_KERNEL_PACKETS; } @@ -8064,9 +7921,8 @@ efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, if( in_dl_context ) ni->flags &= ~CI_NETIF_FLAG_IN_DL_CONTEXT; - } while ( !ef_eplock_try_unlock(&ni->state->lock, &lock_val, - CI_EPLOCK_NETIF_UNLOCK_FLAGS | - CI_EPLOCK_NETIF_SOCKET_LIST) ); + } while( ! ef_eplock_try_unlock(&ni->state->lock, &lock_val, + CI_EPLOCK_NETIF_UNLOCK_FLAGS | CI_EPLOCK_NETIF_SOCKET_LIST) ); if( after_unlock_flags & CI_EPLOCK_NETIF_NEED_PRIME ) tcp_helper_unlock_prime(thr); @@ -8083,9 +7939,8 @@ efab_tcp_helper_netif_lock_callback(eplock_helper_t* epl, ci_uint64 lock_val, return lock_val; } -#else /* CI_CFG_UL_INTERRUPT_HELPER */ -int -efab_eplock_wake_and_do(ci_netif *ni, ci_uint64 l) +#else /* CI_CFG_UL_INTERRUPT_HELPER */ +int efab_eplock_wake_and_do(ci_netif* ni, ci_uint64 l) { tcp_helper_resource_t* thr = netif2tcp_helper_resource(ni); @@ -8142,45 +7997,44 @@ efab_eplock_wake_and_do(ci_netif *ni, ci_uint64 l) * *--------------------------------------------------------------------*/ -extern int -iterate_netifs_unlocked(ci_netif **p_ni, enum oo_thr_ref_type ref_type, - enum oo_thr_ref_type ref_zero) +extern int iterate_netifs_unlocked(ci_netif** p_ni, + enum oo_thr_ref_type ref_type, enum oo_thr_ref_type ref_zero) { - ci_netif *ni_prev = *p_ni; + ci_netif* ni_prev = *p_ni; ci_irqlock_state_t lock_flags; - tcp_helper_resource_t * thr_prev = NULL; - ci_dllink *link = NULL; + tcp_helper_resource_t* thr_prev = NULL; + ci_dllink* link = NULL; int rc = -ENOENT; ci_assert_lt(ref_type, ref_zero); - if (ni_prev) { + if( ni_prev ) { thr_prev = CI_CONTAINER(tcp_helper_resource_t, netif, ni_prev); TCP_HELPER_RESOURCE_ASSERT_VALID(thr_prev, -1); } - /* We need a lock to protect the link and thr from removing + /* We need a lock to protect the link and thr from removing * after we've got the link and before taking refcount */ ci_irqlock_lock(&THR_TABLE.lock, &lock_flags); - if (ni_prev != NULL) { + if( ni_prev != NULL ) { link = thr_prev->all_stacks_link.next; - if (ci_dllist_end(&THR_TABLE.all_stacks) == link) + if( ci_dllist_end(&THR_TABLE.all_stacks) == link ) link = NULL; - } else if (ci_dllist_not_empty(&THR_TABLE.all_stacks)) + } else if( ci_dllist_not_empty(&THR_TABLE.all_stacks) ) link = ci_dllist_start(&THR_TABLE.all_stacks); - if (link) { - tcp_helper_resource_t * thr; + if( link ) { + tcp_helper_resource_t* thr; /* Skip dead thr's */ -again: + again: thr = CI_CONTAINER(tcp_helper_resource_t, all_stacks_link, link); if( ! oo_thr_ref_is_zero(thr->ref, ref_zero) || oo_thr_ref_get(thr->ref, ref_type) != 0 ) { link = link->next; - if (ci_dllist_end(&THR_TABLE.all_stacks) == link) { + if( ci_dllist_end(&THR_TABLE.all_stacks) == link ) { *p_ni = NULL; goto out; } @@ -8193,21 +8047,20 @@ iterate_netifs_unlocked(ci_netif **p_ni, enum oo_thr_ref_type ref_type, out: ci_irqlock_unlock(&THR_TABLE.lock, &lock_flags); - if (ni_prev != NULL) + if( ni_prev != NULL ) iterate_netifs_unlocked_dropref(&thr_prev->netif, ref_type); return rc; } - static int efab_ipid_alloc(efab_ipid_cb_t* ipid) { int i; int rv; ci_irqlock_state_t lock_flags; - ci_assert( ipid->init == EFAB_IPID_INIT ); - ci_irqlock_lock( &ipid->lock, &lock_flags ); + ci_assert(ipid->init == EFAB_IPID_INIT); + ci_irqlock_lock(&ipid->lock, &lock_flags); /* go find an unused block */ i = ipid->last_block_used; @@ -8215,22 +8068,22 @@ static int efab_ipid_alloc(efab_ipid_cb_t* ipid) i = (i + 1) % CI_IPID_BLOCK_COUNT; if( i == ipid->last_block_used ) break; - if( !ipid->range[i] ) { + if( ! ipid->range[i] ) { ipid->range[i]++; rv = CI_IPID_MIN + (i << CI_IPID_BLOCK_SHIFT); - ci_assert((rv >= CI_IPID_MIN) && + ci_assert((rv >= CI_IPID_MIN) && (rv <= CI_IPID_MAX - CI_IPID_BLOCK_LENGTH + 1)); ipid->last_block_used = i; goto alloc_exit; } else { - ci_assert( ipid->range[i] == 1 ); + ci_assert(ipid->range[i] == 1); } - } while(1); + } while( 1 ); /* !!Out of blocks!! */ rv = -ENOMEM; - alloc_exit: - ci_irqlock_unlock( &ipid->lock, &lock_flags ); +alloc_exit: + ci_irqlock_unlock(&ipid->lock, &lock_flags); return rv; } @@ -8240,26 +8093,25 @@ static int efab_ipid_free(efab_ipid_cb_t* ipid, int base) int i; ci_irqlock_state_t lock_flags; - ci_assert( ipid->init == EFAB_IPID_INIT ); + ci_assert(ipid->init == EFAB_IPID_INIT); - if( (base & CI_IPID_BLOCK_MASK) != 0 ) - return -EINVAL; /* not actually on a block boundary */ + if( (base & CI_IPID_BLOCK_MASK) != 0 ) + return -EINVAL; /* not actually on a block boundary */ - ci_assert((base >= CI_IPID_MIN) && + ci_assert((base >= CI_IPID_MIN) && (base <= CI_IPID_MAX - CI_IPID_BLOCK_LENGTH + 1)); - ci_irqlock_lock( &ipid->lock, &lock_flags ); + ci_irqlock_lock(&ipid->lock, &lock_flags); i = (base - CI_IPID_MIN) >> CI_IPID_BLOCK_SHIFT; - ci_assert( ipid->range[i] == 1 ); + ci_assert(ipid->range[i] == 1); ipid->range[i] = 0; - ci_irqlock_unlock( &ipid->lock, &lock_flags ); + ci_irqlock_unlock(&ipid->lock, &lock_flags); return 0; } -int -efab_tcp_helper_vi_stats_query(tcp_helper_resource_t* trs, unsigned int intf_i, - void* data, size_t data_len, int do_reset) +int efab_tcp_helper_vi_stats_query(tcp_helper_resource_t* trs, + unsigned int intf_i, void* data, size_t data_len, int do_reset) { struct efrm_vi* virs; @@ -8320,9 +8172,8 @@ static int oo_inject_packet_kernel(ci_netif* ni, ci_ip_pkt_fmt* pkt) offbuf_end + offsetof(typeof(*frag), buf) > CI_CFG_PKT_BUF_SIZE ) goto corrupted; - memcpy(skb->data + len, - (void*)((uintptr_t)(&frag->buf) + offbuf_off), - offbuf_end - offbuf_off); + memcpy(skb->data + len, (void*) ((uintptr_t) (&frag->buf) + offbuf_off), + offbuf_end - offbuf_off); len += offbuf_end - offbuf_off; if( OO_PP_IS_NULL(frag->frag_next) ) @@ -8361,13 +8212,13 @@ struct oo_inject_packets_work_data { static void oo_inject_packets_work(struct work_struct* work) { struct oo_inject_packets_work_data* data = - container_of(work, struct oo_inject_packets_work_data, work); + container_of(work, struct oo_inject_packets_work_data, work); ci_netif* ni = &data->trs->netif; ci_ip_pkt_fmt* pkt; int netif_is_locked; /* Part one: inject all packets to the kernel */ - for( pkt = PKT_CHK(ni, data->pkt_head); ; pkt = PKT_CHK(ni, pkt->next) ) { + for( pkt = PKT_CHK(ni, data->pkt_head);; pkt = PKT_CHK(ni, pkt->next) ) { /* No need to check the return value here. If the function fails, the * packet is dropped, and a counter is incremented. */ oo_inject_packet_kernel(ni, pkt); @@ -8378,7 +8229,8 @@ static void oo_inject_packets_work(struct work_struct* work) /* Part two: free Onload packets */ netif_is_locked = 0; - for( pkt = PKT_CHK(ni, data->pkt_head); ; pkt = PKT_CHK(ni, data->pkt_head)) { + for( pkt = PKT_CHK(ni, data->pkt_head);; + pkt = PKT_CHK(ni, data->pkt_head) ) { data->pkt_head = pkt->next; ci_netif_pkt_release_mnl(ni, pkt, &netif_is_locked); if( OO_PP_IS_NULL(data->pkt_head) ) @@ -8407,7 +8259,7 @@ void oo_inject_packets_kernel(tcp_helper_resource_t* trs, int sync) if( ni->state->kernel_packets_pending == 0 ) return; - ci_assert( ! OO_PP_IS_NULL(ni->state->kernel_packets_head) ); + ci_assert(! OO_PP_IS_NULL(ni->state->kernel_packets_head)); /* Are we allowed to inject any packets? */ if( ! (ni->flags & CI_NETIF_FLAG_MAY_INJECT_TO_KERNEL) ) { @@ -8433,8 +8285,7 @@ void oo_inject_packets_kernel(tcp_helper_resource_t* trs, int sync) if( sync ) { oo_inject_packets_work(&data->work); - } - else { + } else { /* Push data to kernel without holding the stack lock */ queue_work(trs->wq, &data->work); } @@ -8455,16 +8306,15 @@ void oo_inject_packets_kernel(tcp_helper_resource_t* trs, int sync) #include -ci_inline int oo_xdp_rx_pkt_locked(ci_netif* ni, - struct net_device* dev, - ci_ip_pkt_fmt* pkt) +ci_inline int oo_xdp_rx_pkt_locked( + ci_netif* ni, struct net_device* dev, ci_ip_pkt_fmt* pkt) { /* TODO: ensure that packet: * * is linear * * has enough headroom (256 bytes) * see netif_receive_generic_xdp in kernel */ - struct bpf_prog* xdp_prog = rcu_dereference( - oo_nics[ni->intf_i_to_hwport[pkt->intf_i] ].prog); + struct bpf_prog* xdp_prog = + rcu_dereference(oo_nics[ni->intf_i_to_hwport[pkt->intf_i]].prog); struct xdp_buff _xdp; struct xdp_buff* xdp = &_xdp; void *orig_data, *orig_data_end; @@ -8474,13 +8324,14 @@ ci_inline int oo_xdp_rx_pkt_locked(ci_netif* ni, int act; if( xdp_prog == NULL ) - return XDP_PASS; + return XDP_PASS; /* The XDP program wants to see the packet starting at the MAC * header. */ xdp->data = oo_ether_hdr(pkt); - xdp->data_meta = xdp->data; /* note: netdriver does not support metadata at all, we could do the same */ + xdp->data_meta = xdp->data; /* note: netdriver does not support metadata at + all, we could do the same */ /* There are two commonly-discussed behaviours for jumbograms: * 1) Drop the packet here (it would be bad to bypass XDP without dropping @@ -8491,7 +8342,8 @@ ci_inline int oo_xdp_rx_pkt_locked(ci_netif* ni, * https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org */ xdp->data_end = xdp->data + oo_offbuf_left(&pkt->buf); - xdp->data_hard_start = xdp->data; /* no headroom, should be 256 bytes at least */ + xdp->data_hard_start = + xdp->data; /* no headroom, should be 256 bytes at least */ orig_data_end = xdp->data_end; orig_data = xdp->data; @@ -8503,8 +8355,8 @@ ci_inline int oo_xdp_rx_pkt_locked(ci_netif* ni, } -/* bool */ int -efab_tcp_helper_xdp_rx_pkt(tcp_helper_resource_t* trs, ci_ip_pkt_fmt* pkt) +/* bool */ int efab_tcp_helper_xdp_rx_pkt( + tcp_helper_resource_t* trs, ci_ip_pkt_fmt* pkt) { int ret = XDP_PASS; struct tcp_helper_nic* trs_nic = &trs->nic[pkt->intf_i]; @@ -8561,8 +8413,8 @@ efab_tcp_helper_xdp_rx_pkt(tcp_helper_resource_t* trs, ci_ip_pkt_fmt* pkt) } #endif -int efab_tcp_helper_tcp_offload_set_isn(tcp_helper_resource_t* trs, - oo_sp ep_id, ci_uint32 isn) +int efab_tcp_helper_tcp_offload_set_isn( + tcp_helper_resource_t* trs, oo_sp ep_id, ci_uint32 isn) { #if CI_CFG_TCP_OFFLOAD_RECYCLER ci_netif* ni = &trs->netif; @@ -8571,15 +8423,16 @@ int efab_tcp_helper_tcp_offload_set_isn(tcp_helper_resource_t* trs, ci_assert(! in_atomic()); - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { if( ni->nic_hw[intf_i].plugin_rx && tep_p->plugin_stream_id[intf_i] != INVALID_PLUGIN_HANDLE ) { struct xsn_tcp_sync_stream sync = { .in_conn_id = tep_p->plugin_stream_id[intf_i], .in_seq = isn, }; - int rc = efrm_ext_msg(ni->nic_hw[intf_i].plugin_rx, - XSN_CEPH_SYNC_STREAM, &sync, sizeof(sync)); + int rc = efrm_ext_msg(ni->nic_hw[intf_i].plugin_rx, XSN_CEPH_SYNC_STREAM, + &sync, sizeof(sync)); if( rc < 0 ) return rc; } @@ -8591,10 +8444,8 @@ int efab_tcp_helper_tcp_offload_set_isn(tcp_helper_resource_t* trs, } int efab_tcp_helper_tcp_offload_get_stream_id(tcp_helper_resource_t* trs, - oo_sp ep_id, ci_int32 intf_i, - ci_uint32* stream_id, - ci_uint64* ddr_base, - ci_uint64* ddr_size) + oo_sp ep_id, ci_int32 intf_i, ci_uint32* stream_id, ci_uint64* ddr_base, + ci_uint64* ddr_size) { #if CI_CFG_TCP_OFFLOAD_RECYCLER ci_netif* ni = &trs->netif; @@ -8623,8 +8474,7 @@ int efab_tcp_helper_tcp_offload_get_stream_id(tcp_helper_resource_t* trs, } int efab_tcp_helper_efct_superbuf_config_refresh( - tcp_helper_resource_t* trs, - oo_efct_superbuf_config_refresh_t* op) + tcp_helper_resource_t* trs, oo_efct_superbuf_config_refresh_t* op) { struct tcp_helper_nic* nic; if( op->intf_i >= oo_stack_intf_max(&trs->netif) ) @@ -8634,13 +8484,11 @@ int efab_tcp_helper_efct_superbuf_config_refresh( ! nic->thn_efct_rxq[op->qid] ) return -EINVAL; return efrm_rxq_refresh(nic->thn_efct_rxq[op->qid], - (unsigned long)CI_USER_PTR_GET(op->superbufs), - CI_USER_PTR_GET(op->current_mappings), - op->max_superbufs); + (unsigned long) CI_USER_PTR_GET(op->superbufs), + CI_USER_PTR_GET(op->current_mappings), op->max_superbufs); } -static tcp_helper_resource_t* -thr_ref2thr(oo_thr_ref_t ref) +static tcp_helper_resource_t* thr_ref2thr(oo_thr_ref_t ref) { return CI_CONTAINER(tcp_helper_resource_t, ref[0], &ref[0]); } @@ -8648,24 +8496,24 @@ thr_ref2thr(oo_thr_ref_t ref) static void thr_release_base(oo_thr_ref_t ref) { tcp_helper_resource_t* thr = thr_ref2thr(ref); - OO_DEBUG_TCPH(ci_log("%s [%d] "OO_THR_REF_FMT, __func__, thr->id, - OO_THR_REF_ARG(ref))); + OO_DEBUG_TCPH(ci_log( + "%s [%d] " OO_THR_REF_FMT, __func__, thr->id, OO_THR_REF_ARG(ref))); efab_tcp_helper_k_ref_count_is_zero(thr); } static void thr_release_file(oo_thr_ref_t ref) { tcp_helper_resource_t* thr = thr_ref2thr(ref); - OO_DEBUG_TCPH(ci_log("%s [%d] "OO_THR_REF_FMT, __func__, thr->id, - OO_THR_REF_ARG(ref))); + OO_DEBUG_TCPH(ci_log( + "%s [%d] " OO_THR_REF_FMT, __func__, thr->id, OO_THR_REF_ARG(ref))); tcp_helper_rm_free(thr); } static void thr_release_app(oo_thr_ref_t ref) { tcp_helper_resource_t* thr = thr_ref2thr(ref); - OO_DEBUG_TCPH(ci_log("%s [%d] "OO_THR_REF_FMT, __func__, thr->id, - OO_THR_REF_ARG(ref))); + OO_DEBUG_TCPH(ci_log( + "%s [%d] " OO_THR_REF_FMT, __func__, thr->id, OO_THR_REF_ARG(ref))); efab_notify_stacklist_change(thr); /* We could run the most part of efab_tcp_helper_rm_free_locked() here. * But as we don't trust the "services" running in UL, we'll have to @@ -8678,11 +8526,8 @@ static void thr_release_app(oo_thr_ref_t ref) oo_thr_ref_drop(ref, OO_THR_REF_FILE); } -oo_thr_ref_release_fn oo_thr_ref_release[OO_THR_REF_INFTY] = -{ - thr_release_base, - thr_release_file, - thr_release_app +oo_thr_ref_release_fn oo_thr_ref_release[OO_THR_REF_INFTY] = { + thr_release_base, thr_release_file, thr_release_app }; /*! \cidoxg_end */ diff --git a/src/lib/transport/ip/ip_internal.h b/src/lib/transport/ip/ip_internal.h index 4808b64b7..07972fad5 100644 --- a/src/lib/transport/ip/ip_internal.h +++ b/src/lib/transport/ip/ip_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2004-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author ctk ** \brief Decls & defs for IP library internal to our libraries. @@ -22,10 +22,10 @@ #include #include #ifdef __KERNEL__ -# include -# include +#include +#include #else -# include +#include #endif #include @@ -36,7 +36,7 @@ * the driver. However, the driver compiles code that uses this * definition so just define it to 0. */ -#define ONLOAD_MSG_WARM 0 +#define ONLOAD_MSG_WARM 0 /* ONLOAD_MSG_ONEPKT is only used in user space receive calls, so use * the same trick as for ONLOAD_MSG_WARM above. @@ -67,27 +67,32 @@ extern unsigned ci_tp_log CI_HV; extern unsigned ci_tp_max_dump CI_HV; -#define log ci_log +#define log ci_log -ci_inline unsigned raw_pkt_dump_len(unsigned len) { +ci_inline unsigned raw_pkt_dump_len(unsigned len) +{ #if defined(__ci_driver__) - if( len > 80 ) len = 80; + if( len > 80 ) + len = 80; #else - if( len > ci_tp_max_dump ) len = ci_tp_max_dump; + if( len > ci_tp_max_dump ) + len = ci_tp_max_dump; #endif return len; } -ci_inline unsigned ip_pkt_dump_len(unsigned len) { +ci_inline unsigned ip_pkt_dump_len(unsigned len) +{ len += ETH_HLEN; /* ?? Cout VLAN tag as well ?? */ - if( len > ETH_FRAME_LEN ) len = 80; + if( len > ETH_FRAME_LEN ) + len = 80; return raw_pkt_dump_len(len); } #ifdef __ci_driver__ /* definitions for installing/removing IP filters */ -# include -# include +#include +#include #endif @@ -106,8 +111,8 @@ ci_inline void ci_tcp_update_rtt(ci_netif* netif, ci_tcp_state* ts, int m) /* It's possible to get here if the timestamp has been corrupted. If so * it's probably best not to use it to update the rtt. */ - LOG_TL(ci_log("TCP RX %d:%d ditching bad timestamp echo", - LNT_PRI_ARGS(netif, ts))); + LOG_TL(ci_log( + "TCP RX %d:%d ditching bad timestamp echo", LNT_PRI_ARGS(netif, ts))); return; } m = CI_MAX(1, m); @@ -117,13 +122,13 @@ ci_inline void ci_tcp_update_rtt(ci_netif* netif, ci_tcp_state* ts, int m) ** RFC2988 */ m -= (ts->sa >> 3u); - ts->sa += m; /* SRTT <- SRTT + 0.125*(M-SRTT) */ - if( m < 0 ) m = -m; + ts->sa += m; /* SRTT <- SRTT + 0.125*(M-SRTT) */ + if( m < 0 ) + m = -m; m -= (ts->sv >> 2u); - ts->sv += m; /* RTTVAR <- 0.75*RTTVAR + 0.25*|M-SRTT| */ - ts->rto = tcp_srtt(ts) + ts->sv; /* RTO <- SRTT + 4*RTTVAR */ - } - else { + ts->sv += m; /* RTTVAR <- 0.75*RTTVAR + 0.25*|M-SRTT| */ + ts->rto = tcp_srtt(ts) + ts->sv; /* RTO <- SRTT + 4*RTTVAR */ + } else { /* first rtt estimate so follow (2.2) of RFC2988 */ ts->sa = (m << 3u); ts->sv = (m << 1u); @@ -132,21 +137,20 @@ ci_inline void ci_tcp_update_rtt(ci_netif* netif, ci_tcp_state* ts, int m) ci_tcp_rto_bound(netif, ts); - CI_IP_SOCK_STATS_VAL_RTT_SRTT_RTO( ts, ts->sv >> 2, ts->sa >> 3, ts->rto ); + CI_IP_SOCK_STATS_VAL_RTT_SRTT_RTO(ts, ts->sv >> 2, ts->sa >> 3, ts->rto); LOG_TR(ci_log("TCP RX %d UPDATE RTT sa=%u sv=%u SRTT=%u RTTVAR=%u RTO=%u", - S_FMT(ts), ts->sa, ts->sv, - tcp_srtt(ts), tcp_rttvar(ts), ts->rto)); + S_FMT(ts), ts->sa, ts->sv, tcp_srtt(ts), tcp_rttvar(ts), ts->rto)); } /* ** Turn timestamps into cmsg entries. */ -void ip_cmsg_recv_timestamp(ci_netif *ni, ci_uint64 timestamp, - struct cmsg_state *cmsg_state); -void ip_cmsg_recv_timestampns(ci_netif *ni, ci_uint64 timestamp, - struct cmsg_state *cmsg_state); -void ip_cmsg_recv_timestamping(ci_netif *ni, const ci_ip_pkt_fmt *pkt, - int flags, struct cmsg_state *cmsg_state); +void ip_cmsg_recv_timestamp( + ci_netif* ni, ci_uint64 timestamp, struct cmsg_state* cmsg_state); +void ip_cmsg_recv_timestampns( + ci_netif* ni, ci_uint64 timestamp, struct cmsg_state* cmsg_state); +void ip_cmsg_recv_timestamping(ci_netif* ni, const ci_ip_pkt_fmt* pkt, + int flags, struct cmsg_state* cmsg_state); /********************************************************************** @@ -155,28 +159,28 @@ void ip_cmsg_recv_timestamping(ci_netif *ni, const ci_ip_pkt_fmt *pkt, /* Macro for sleeping until [cond] is not true (or timeout, or error). */ /* TODO timeout should be re-calculated when looping */ -#define CITP_WAITABLE_SLEEP_WHILE(ni, w, why, timeout, cond, prc) \ - do { \ - ci_uint64 __sleep_seq; \ - ci_uint32 t = (timeout); \ - *(prc) = 0; \ - while( 1 ) { \ - __sleep_seq = (w)->sleep_seq.all; \ - ci_rmb(); \ - if( !(cond) ) break; \ - (*prc) = ci_sock_sleep((ni), (w), (why), \ - CI_SLEEP_NETIF_LOCKED | \ - CI_SLEEP_NETIF_RQ, \ - __sleep_seq, &t); \ - /* TODO (Bug24547) handle case where netif lock fails */ \ - CI_TEST(ci_netif_lock(ni) == 0); \ - if( *(prc) ) break; \ - ci_netif_poll(ni); \ - } \ - } while(0) - - -#define CI_TCP_SLEEP_WHILE(ni, ts, why, timeout, cond, prc) \ +#define CITP_WAITABLE_SLEEP_WHILE(ni, w, why, timeout, cond, prc) \ + do { \ + ci_uint64 __sleep_seq; \ + ci_uint32 t = (timeout); \ + *(prc) = 0; \ + while( 1 ) { \ + __sleep_seq = (w)->sleep_seq.all; \ + ci_rmb(); \ + if( ! (cond) ) \ + break; \ + (*prc) = ci_sock_sleep((ni), (w), (why), \ + CI_SLEEP_NETIF_LOCKED | CI_SLEEP_NETIF_RQ, __sleep_seq, &t); \ + /* TODO (Bug24547) handle case where netif lock fails */ \ + CI_TEST(ci_netif_lock(ni) == 0); \ + if( *(prc) ) \ + break; \ + ci_netif_poll(ni); \ + } \ + } while( 0 ) + + +#define CI_TCP_SLEEP_WHILE(ni, ts, why, timeout, cond, prc) \ CITP_WAITABLE_SLEEP_WHILE((ni), &(ts)->s.b, (why), (timeout), (cond), (prc)) @@ -200,63 +204,55 @@ void ip_cmsg_recv_timestamping(ci_netif *ni, const ci_ip_pkt_fmt *pkt, * *--------------------------------------------------------------------*/ -ci_inline int -ci_tcp_ep_set_filters(ci_netif * ni, - oo_sp sock_id, - ci_ifid_t bindto_ifindex, - oo_sp from_tcp_id) +ci_inline int ci_tcp_ep_set_filters( + ci_netif* ni, oo_sp sock_id, ci_ifid_t bindto_ifindex, oo_sp from_tcp_id) { int rc; ci_assert(ni); - LOG_TC(ci_log("%s: %d:%d bindto_ifindex=%d port_sock=%d", - __FUNCTION__, NI_ID(ni), OO_SP_FMT(sock_id), - (int) bindto_ifindex, OO_SP_FMT(from_tcp_id))); + LOG_TC(ci_log("%s: %d:%d bindto_ifindex=%d port_sock=%d", __FUNCTION__, + NI_ID(ni), OO_SP_FMT(sock_id), (int) bindto_ifindex, + OO_SP_FMT(from_tcp_id))); #ifdef __ci_driver__ - rc = tcp_helper_endpoint_set_filters(ci_netif_get_valid_ep(ni, sock_id), - bindto_ifindex, from_tcp_id); + rc = tcp_helper_endpoint_set_filters( + ci_netif_get_valid_ep(ni, sock_id), bindto_ifindex, from_tcp_id); #else - if( ci_tcp_can_set_filter_in_ul(ni, SP_TO_SOCK(ni, sock_id)) ) + if( ci_tcp_can_set_filter_in_ul(ni, SP_TO_SOCK(ni, sock_id)) ) { rc = ci_tcp_sock_set_stack_filter(ni, SP_TO_SOCK(ni, sock_id)); - else - rc = ci_tcp_helper_ep_set_filters(ci_netif_get_driver_handle(ni), sock_id, - bindto_ifindex, from_tcp_id); + } else + rc = ci_tcp_helper_ep_set_filters( + ci_netif_get_driver_handle(ni), sock_id, bindto_ifindex, from_tcp_id); #endif - LOG_TC( if(rc < 0) - ci_log(" ---> %s (rc=%d)", __FUNCTION__, rc) ); + LOG_TC(if( rc < 0 ) ci_log(" ---> %s (rc=%d)", __FUNCTION__, rc)); return rc; } -#if !defined(__KERNEL__) && CI_CFG_ENDPOINT_MOVE -ci_inline int -ci_tcp_ep_reuseport_bind(ci_fd_t fd, const char* cluster_name, - ci_int32 cluster_size, ci_uint32 cluster_restart_opt, - ci_uint32 cluster_hot_restart_opt, - ci_addr_t addr, ci_uint16 port_be16) +#if ! defined(__KERNEL__) && CI_CFG_ENDPOINT_MOVE +ci_inline int ci_tcp_ep_reuseport_bind(ci_fd_t fd, const char* cluster_name, + ci_int32 cluster_size, ci_uint32 cluster_restart_opt, + ci_uint32 cluster_hot_restart_opt, ci_addr_t addr, ci_uint16 port_be16) { int rc; if( port_be16 == 0 ) { - /* There should be a non-zero port value to perform reuseport bind properly. - * This condition will be violated when socket deferred bind happens, e.g. - * when combining SO_REUSEPORT with EF_TCP_SHARED_LOCAL_PORTS option. */ + /* There should be a non-zero port value to perform reuseport bind + * properly. This condition will be violated when socket deferred bind + * happens, e.g. when combining SO_REUSEPORT with EF_TCP_SHARED_LOCAL_PORTS + * option. */ LOG_TC(ci_log("%s: Trying to perform reuseport bind with 0 port value", - __FUNCTION__)); + __FUNCTION__)); return EINVAL; } LOG_TC(ci_log("%s: %d addr: " IPX_FMT " port: %d", __FUNCTION__, fd, - IPX_ARG(AF_IP_L3(addr)), port_be16)); + IPX_ARG(AF_IP_L3(addr)), port_be16)); rc = ci_tcp_helper_ep_reuseport_bind(fd, cluster_name, cluster_size, - cluster_restart_opt, - cluster_hot_restart_opt, - addr, port_be16); - LOG_TC( if(rc < 0) - ci_log(" ---> %s (rc=%d)", __FUNCTION__, rc) ); + cluster_restart_opt, cluster_hot_restart_opt, addr, port_be16); + LOG_TC(if( rc < 0 ) ci_log(" ---> %s (rc=%d)", __FUNCTION__, rc)); return rc; } #endif @@ -274,10 +270,8 @@ ci_tcp_ep_reuseport_bind(ci_fd_t fd, const char* cluster_name, * *--------------------------------------------------------------------*/ -ci_inline int -ci_tcp_ep_clear_filters(ci_netif* ni, - oo_sp sock_id, - int need_update) +ci_inline int ci_tcp_ep_clear_filters( + ci_netif* ni, oo_sp sock_id, int need_update) { int rc; #ifdef __ci_driver__ @@ -285,34 +279,32 @@ ci_tcp_ep_clear_filters(ci_netif* ni, #endif ci_assert(ni); - LOG_TC(ci_log("%s: %d:%d (%d)", __FUNCTION__, - ni->state->stack_id, OO_SP_FMT(sock_id), need_update)); + LOG_TC(ci_log("%s: %d:%d (%d)", __FUNCTION__, ni->state->stack_id, + OO_SP_FMT(sock_id), need_update)); ci_assert(ci_netif_is_locked(ni)); #ifdef __ci_driver__ - rc = tcp_helper_endpoint_clear_filters( - ci_netif_get_valid_ep(ni, sock_id), - (supress_hw_ops ? EP_CLEAR_FILTERS_FLAG_SUPRESS_HW : 0) | - (need_update ? EP_CLEAR_FILTERS_FLAG_NEED_UPDATE : 0)); + rc = tcp_helper_endpoint_clear_filters(ci_netif_get_valid_ep(ni, sock_id), + (supress_hw_ops ? EP_CLEAR_FILTERS_FLAG_SUPRESS_HW : 0) | + (need_update ? EP_CLEAR_FILTERS_FLAG_NEED_UPDATE : 0)); #else if( (SP_TO_SOCK(ni, sock_id)->s_flags & CI_SOCK_FLAG_STACK_FILTER) && ci_tcp_can_set_filter_in_ul(ni, SP_TO_SOCK(ni, sock_id)) ) { ci_tcp_sock_clear_stack_filter(ni, SP_TO_TCP(ni, sock_id)); rc = 0; - } - else - rc = ci_tcp_helper_ep_clear_filters(ci_netif_get_driver_handle(ni), sock_id, - need_update); + } else + rc = ci_tcp_helper_ep_clear_filters( + ci_netif_get_driver_handle(ni), sock_id, need_update); #if CI_CFG_UL_INTERRUPT_HELPER - /* When called from stack poll, it is important to remove sw filters - * immediately, before receiving next packets. */ - if( ni->state->in_poll ) - ci_netif_handle_actions(ni); + /* When called from stack poll, it is important to remove sw filters + * immediately, before receiving next packets. */ + if( ni->state->in_poll ) + ci_netif_handle_actions(ni); #endif #endif - LOG_TC( if (rc < 0 && rc != -EAGAIN) - ci_log(" ---> %s (rc=%d)", __FUNCTION__, rc) ); + LOG_TC(if( rc < 0 && rc != -EAGAIN ) + ci_log(" ---> %s (rc=%d)", __FUNCTION__, rc)); return rc; } @@ -324,7 +316,7 @@ ci_tcp_ep_clear_filters(ci_netif* ni, * address. If the socket is not bound, the function just add multicast * address to the list, and bind() should install the filter. * or - * Delete a multicast address from a socket list of multicast addresses. + * Delete a multicast address from a socket list of multicast addresses. * If the socket is already bound, this function removes filters for this * address. If the socket is not bound, the function just deletes multicast * address from the list. @@ -342,26 +334,20 @@ ci_tcp_ep_clear_filters(ci_netif* ni, *--------------------------------------------------------------------*/ #ifndef __ci_driver__ -ci_inline int -ci_tcp_ep_mcast_add_del(ci_netif* ni, - oo_sp sock_id, - ci_ifid_t ifindex, - ci_uint32 mcast_addr, - int add) +ci_inline int ci_tcp_ep_mcast_add_del(ci_netif* ni, oo_sp sock_id, + ci_ifid_t ifindex, ci_uint32 mcast_addr, int add) { int rc; ci_assert(ni); - LOG_TC(ci_log("%s: id=%d (ifid=%d, maddr=%s)", - __FUNCTION__, OO_SP_FMT(sock_id), ifindex, - ip_addr_str(mcast_addr))); + LOG_TC(ci_log("%s: id=%d (ifid=%d, maddr=%s)", __FUNCTION__, + OO_SP_FMT(sock_id), ifindex, ip_addr_str(mcast_addr))); - rc = ci_tcp_helper_ep_mcast_add_del(ci_netif_get_driver_handle(ni), - sock_id, mcast_addr, ifindex, add); + rc = ci_tcp_helper_ep_mcast_add_del( + ci_netif_get_driver_handle(ni), sock_id, mcast_addr, ifindex, add); - LOG_TC( if(rc < 0) - ci_log(" ---> %s (rc=%d)", __FUNCTION__, rc) ); + LOG_TC(if( rc < 0 ) ci_log(" ---> %s (rc=%d)", __FUNCTION__, rc)); return rc; } #endif @@ -372,22 +358,22 @@ ci_tcp_ep_mcast_add_del(ci_netif* ni, *********************************************************************/ #ifdef __KERNEL__ -# define verify_fail() return +#define verify_fail() return #else -# define verify_fail() ci_fail(("STOP.")) +#define verify_fail() ci_fail(("STOP.")) #endif -#define verify(exp) \ - do{ \ - if( CI_UNLIKELY(!(exp)) ) { \ +#define verify(exp) \ + do { \ + if( CI_UNLIKELY(! (exp)) ) { \ ci_log("********** verify(%s) at %s:%d", #exp, __FILE__, __LINE__); \ - ci_log("********** from %s:%d", file?file:"", line); \ - verify_fail(); \ - } \ - }while(0) + ci_log("********** from %s:%d", file ? file : "", line); \ + verify_fail(); \ + } \ + } while( 0 ) #undef verify -#define verify(exp) ci_assert(exp) +#define verify(exp) ci_assert(exp) /********************************************************************* @@ -395,18 +381,18 @@ ci_tcp_ep_mcast_add_del(ci_netif* ni, *********************************************************************/ #ifndef SO_TIMESTAMPNS -# define SO_TIMESTAMPNS 35 +#define SO_TIMESTAMPNS 35 #endif #ifndef SO_REUSEPORT -# define SO_REUSEPORT 15 +#define SO_REUSEPORT 15 #endif #if CI_CFG_TIMESTAMPING /* The following value needs to match its counterpart * in kernel headers. */ -#define ONLOAD_SO_TIMESTAMPING 37 +#define ONLOAD_SO_TIMESTAMPING 37 #define ONLOAD_SCM_TIMESTAMPING ONLOAD_SO_TIMESTAMPING #endif @@ -414,10 +400,10 @@ ci_tcp_ep_mcast_add_del(ci_netif* ni, * the headers in use. */ struct oo_sock_extended_err { ci_uint32 ee_errno; - ci_uint8 ee_origin; - ci_uint8 ee_type; - ci_uint8 ee_code; - ci_uint8 ee_pad; + ci_uint8 ee_origin; + ci_uint8 ee_type; + ci_uint8 ee_code; + ci_uint8 ee_pad; ci_uint32 ee_info; ci_uint32 ee_data; }; @@ -434,48 +420,44 @@ struct oo_sock_extended_err { /* check [ov] is a non-NULL ptr & [ol] indicates the right space for * type [ty] */ -#define opt_ok(ov,ol,ty) ((ov) && (ol) >= sizeof(ty)) -#define opt_not_ok(ov,ol,ty) \ - ((ol) < sizeof(ty) ? -EINVAL : (ov) ? 0 : -EFAULT) +#define opt_ok(ov, ol, ty) ((ov) && (ol) >= sizeof(ty)) +#define opt_not_ok(ov, ol, ty) \ + ((ol) < sizeof(ty) ? -EINVAL : (ov) ? 0 : -EFAULT) -ci_inline unsigned -ci_get_optval(const void *optval, socklen_t optlen) +ci_inline unsigned ci_get_optval(const void* optval, socklen_t optlen) { - if (optlen >= sizeof(unsigned)) - return (*(unsigned*)optval); - else return (unsigned)(*(unsigned char*)optval); + if( optlen >= sizeof(unsigned) ) + return (*(unsigned*) optval); + else + return (unsigned) (*(unsigned char*) optval); } /*! Do not call it, use ci_getsockopt_final(). */ -ci_inline int -ci_getsockopt_final_pre(void *optval, socklen_t *optlen, int level, - void *val, socklen_t val_size) +ci_inline int ci_getsockopt_final_pre( + void* optval, socklen_t* optlen, int level, void* val, socklen_t val_size) { if( *optlen > 0 ) memcpy(optval, val, CI_MIN(*optlen, val_size)); if( *optlen > val_size ) *optlen = val_size; /* TODO AFAIK, Solaris returns error if *optlen < val_size. */ - return 0; + return 0; } /*! Common getsockopt() part - push value to the user according to the * particular OS expectations. Return -1 with errno being set or 0. */ -ci_inline int -ci_getsockopt_final(void *optval, socklen_t *optlen, int level, - void *val, size_t val_size) +ci_inline int ci_getsockopt_final( + void* optval, socklen_t* optlen, int level, void* val, size_t val_size) { - if( (level == SOL_SOCKET || level == SOL_IP) && - val_size == sizeof(int) && + if( (level == SOL_SOCKET || level == SOL_IP) && val_size == sizeof(int) && *optlen >= sizeof(char) && *optlen < sizeof(int) ) { - int ival = *((int *)val); - unsigned char ucval = (unsigned char)ival; - if( ival >=0 && ival <= 255) - return ci_getsockopt_final_pre(optval, optlen, level, - &ucval, sizeof(ucval)); + int ival = *((int*) val); + unsigned char ucval = (unsigned char) ival; + if( ival >= 0 && ival <= 255 ) + return ci_getsockopt_final_pre( + optval, optlen, level, &ucval, sizeof(ucval)); } - return ci_getsockopt_final_pre(optval, optlen, level, - val, val_size); + return ci_getsockopt_final_pre(optval, optlen, level, val, val_size); } @@ -487,9 +469,8 @@ ci_getsockopt_final(void *optval, socklen_t *optlen, int level, * \param optlen [in/out] Length of buffer ref'd by [optval] * \return As for getsockopt() */ -extern int ci_get_sol_tcp(ci_netif* netif, ci_sock_cmn* s, - int optname, void *optval, - socklen_t *optlen) CI_HF; +extern int ci_get_sol_tcp(ci_netif* netif, ci_sock_cmn* s, int optname, + void* optval, socklen_t* optlen) CI_HF; #ifdef __KERNEL__ extern int ci_ip_mtu_discover_from_sflags(int s_flags, int af) CI_HF; @@ -504,9 +485,8 @@ extern int ci_ip_mtu_discover_from_sflags(int s_flags, int af) CI_HF; * \param optlen [in/out] Length of buffer ref'd by [optval] * \return As for getsockopt() */ -extern int ci_get_sol_ip( ci_netif* netif, ci_sock_cmn* s, ci_fd_t fd, - int optname, void *optval, - socklen_t *optlen ) CI_HF; +extern int ci_get_sol_ip(ci_netif* netif, ci_sock_cmn* s, ci_fd_t fd, + int optname, void* optval, socklen_t* optlen) CI_HF; #endif #if CI_CFG_FAKE_IPV6 @@ -519,9 +499,8 @@ extern int ci_get_sol_ip( ci_netif* netif, ci_sock_cmn* s, ci_fd_t fd, * \param optlen [in/out] Length of buffer ref'd by [optval] * \return As for getsockopt() */ -extern int ci_get_sol_ip6( ci_netif* netif, ci_sock_cmn* s, ci_fd_t fd, - int optname, void *optval, - socklen_t *optlen ) CI_HF; +extern int ci_get_sol_ip6(ci_netif* netif, ci_sock_cmn* s, ci_fd_t fd, + int optname, void* optval, socklen_t* optlen) CI_HF; #endif #if defined(__KERNEL__) && ! defined(EFRM_HAS_STRUCT_TIMEVAL) @@ -538,9 +517,8 @@ extern int ci_get_sol_ip6( ci_netif* netif, ci_sock_cmn* s, ci_fd_t fd, * \param optlen [in/out] Length of buffer ref'd by [optval] * \return As for getsockopt() */ -extern int ci_get_sol_socket( ci_netif* netif, ci_sock_cmn* s, - int optname, void *optval, - socklen_t *optlen ) CI_HF; +extern int ci_get_sol_socket(ci_netif* netif, ci_sock_cmn* s, int optname, + void* optval, socklen_t* optlen) CI_HF; /*! Handler for common setsockopt:SOL_IP handlers. * \param netif [in] Netif context @@ -550,9 +528,8 @@ extern int ci_get_sol_socket( ci_netif* netif, ci_sock_cmn* s, * \param optlen [in] Length of buffer ref'd by [optval] * \return As for setsockopt() */ -extern int -ci_set_sol_ip( ci_netif* netif, ci_sock_cmn* s, - int optname, const void *optval, socklen_t optlen) CI_HF; +extern int ci_set_sol_ip(ci_netif* netif, ci_sock_cmn* s, int optname, + const void* optval, socklen_t optlen) CI_HF; #if CI_CFG_FAKE_IPV6 /*! Handler for common setsockopt:SOL_IPV6 handlers. @@ -563,9 +540,8 @@ ci_set_sol_ip( ci_netif* netif, ci_sock_cmn* s, * \param optlen [in] Length of buffer ref'd by [optval] * \return As for setsockopt() */ -extern int -ci_set_sol_ip6( ci_netif* netif, ci_sock_cmn* s, - int optname, const void *optval, socklen_t optlen) CI_HF; +extern int ci_set_sol_ip6(ci_netif* netif, ci_sock_cmn* s, int optname, + const void* optval, socklen_t optlen) CI_HF; #endif /*! Handler for common setsockopt:SOL_SOCKET handlers. @@ -576,52 +552,71 @@ ci_set_sol_ip6( ci_netif* netif, ci_sock_cmn* s, * \param optlen [in] Length of buffer ref'd by [optval] * \return As for setsockopt() */ -extern int -ci_set_sol_socket( ci_netif* netif, ci_sock_cmn* s, - int optname, const void *optval, socklen_t optlen) CI_HF; +extern int ci_set_sol_socket(ci_netif* netif, ci_sock_cmn* s, int optname, + const void* optval, socklen_t optlen) CI_HF; /*! Handles socket options that don't require the netif lock. */ -extern int -ci_set_sol_socket_nolock(ci_netif*, ci_sock_cmn* s, int optname, - const void *optval, socklen_t optlen) CI_HF; +extern int ci_set_sol_socket_nolock(ci_netif*, ci_sock_cmn* s, int optname, + const void* optval, socklen_t optlen) CI_HF; /********************************************************************* ******************************* Ioctls ****************************** -*********************************************************************/ + *********************************************************************/ #ifdef __KERNEL__ -#define CI_IOCTL_ARG_OK(t,a) ({t _v; int _rc = get_user(_v, (t*)(a)); (void)_v; _rc==0;}) -#define CI_IOCTL_SETARG(a,v) do { put_user(v,a); } while(0) -#define CI_IOCTL_GETARG(t,a) ({t _v; get_user(_v, (t*)(a)); _v; }) +#define CI_IOCTL_ARG_OK(t, a) \ + ({ \ + t _v; \ + int _rc = get_user(_v, (t*) (a)); \ + (void) _v; \ + _rc == 0; \ + }) +#define CI_IOCTL_SETARG(a, v) \ + do { \ + put_user(v, a); \ + } while( 0 ) +#define CI_IOCTL_GETARG(t, a) \ + ({ \ + t _v; \ + get_user(_v, (t*) (a)); \ + _v; \ + }) #else -#define CI_IOCTL_ARG_OK(t,a) ((a) != 0) -#define CI_IOCTL_SETARG(a,v) do { *(a)=(v); } while(0) -#define CI_IOCTL_GETARG(t,v) (*(t*)(v)) +#define CI_IOCTL_ARG_OK(t, a) ((a) != 0) +#define CI_IOCTL_SETARG(a, v) \ + do { \ + *(a) = (v); \ + } while( 0 ) +#define CI_IOCTL_GETARG(t, v) (*(t*) (v)) #endif #if defined(__KERNEL) /* Bug 18959: should be __KERNEL__ */ -/* Common handler for FIONBIO - called in per-protocol handler to +/* Common handler for FIONBIO - called in per-protocol handler to * keep the request efficient */ -#define CI_CMN_IOCTL_FIONBIO(s, arg) do { \ - int v, _rc = get_user(v, arg); \ - if( v ) { \ - LOG_SV( ci_log("%s: set non-blocking mode", __FUNCTION__ ) ); \ - ci_bit_set(&(s)->b.sb_aflags, CI_SB_AFLAG_O_NONBLOCK_BIT); \ - } else { \ - LOG_SV( ci_log("%s: clear non-blocking mode", __FUNCTION__ ) ); \ - ci_bit_clear(&(s)->b.sb_aflags, CI_SB_AFLAG_O_NONBLOCK_BIT); \ - } } while (0) +#define CI_CMN_IOCTL_FIONBIO(s, arg) \ + do { \ + int v, _rc = get_user(v, arg); \ + if( v ) { \ + LOG_SV(ci_log("%s: set non-blocking mode", __FUNCTION__)); \ + ci_bit_set(&(s)->b.sb_aflags, CI_SB_AFLAG_O_NONBLOCK_BIT); \ + } else { \ + LOG_SV(ci_log("%s: clear non-blocking mode", __FUNCTION__)); \ + ci_bit_clear(&(s)->b.sb_aflags, CI_SB_AFLAG_O_NONBLOCK_BIT); \ + } \ + } while( 0 ) #else -/* Common handler for FIONBIO - called in per-protocol handler to +/* Common handler for FIONBIO - called in per-protocol handler to * keep the request efficient */ -#define CI_CMN_IOCTL_FIONBIO(s, arg) do { \ - if( *(int*)(arg) ) { \ - LOG_SV( ci_log("%s: set non-blocking mode", __FUNCTION__ ) ); \ - ci_bit_set(&(s)->b.sb_aflags, CI_SB_AFLAG_O_NONBLOCK_BIT); \ - } else { \ - LOG_SV( ci_log("%s: clear non-blocking mode", __FUNCTION__ ) ); \ - ci_bit_clear(&(s)->b.sb_aflags, CI_SB_AFLAG_O_NONBLOCK_BIT); \ - } } while (0) +#define CI_CMN_IOCTL_FIONBIO(s, arg) \ + do { \ + if( *(int*) (arg) ) { \ + LOG_SV(ci_log("%s: set non-blocking mode", __FUNCTION__)); \ + ci_bit_set(&(s)->b.sb_aflags, CI_SB_AFLAG_O_NONBLOCK_BIT); \ + } else { \ + LOG_SV(ci_log("%s: clear non-blocking mode", __FUNCTION__)); \ + ci_bit_clear(&(s)->b.sb_aflags, CI_SB_AFLAG_O_NONBLOCK_BIT); \ + } \ + } while( 0 ) #endif /*! Common handler for IOCTL calls. @@ -633,22 +628,22 @@ ci_set_sol_socket_nolock(ci_netif*, ci_sock_cmn* s, int optname, * \param os_socket_exists Non-zero if OS socket extsts * \return As for ioctl() */ -extern int ci_cmn_ioctl(ci_netif* netif, ci_sock_cmn* s, int request, - void* arg, int os_rc, int os_socket_exists); +extern int ci_cmn_ioctl(ci_netif* netif, ci_sock_cmn* s, int request, + void* arg, int os_rc, int os_socket_exists); /*! Compute the time stamp delta for the given packet time stamp and * return in in ts */ -extern void ci_udp_compute_stamp(ci_netif *netif, ci_uint64 stamp, - struct timespec *ts); +extern void ci_udp_compute_stamp( + ci_netif* netif, ci_uint64 stamp, struct timespec* ts); /* Return from getsockopt(level=SOL_INVALID) with appropriate errno */ -# define SOCKOPT_RET_INVALID_LEVEL(s) \ - if ((s)->domain == AF_INET6 ) \ - RET_WITH_ERRNO(ENOPROTOOPT); \ - else \ - RET_WITH_ERRNO(EOPNOTSUPP) +#define SOCKOPT_RET_INVALID_LEVEL(s) \ + if( (s)->domain == AF_INET6 ) \ + RET_WITH_ERRNO(ENOPROTOOPT); \ + else \ + RET_WITH_ERRNO(EOPNOTSUPP) /********************************************************************* ***************************** Async IO ****************************** @@ -661,14 +656,13 @@ extern void ci_udp_compute_stamp(ci_netif *netif, ci_uint64 stamp, #if defined(__KERNEL__) -extern void ci_ip_queue_enqueue_nnl(ci_netif* netif, ci_ip_pkt_queue*qu, - ci_ip_pkt_fmt* pkt) CI_HF; +extern void ci_ip_queue_enqueue_nnl( + ci_netif* netif, ci_ip_pkt_queue* qu, ci_ip_pkt_fmt* pkt) CI_HF; #endif extern ci_ip_pkt_fmt* ci_pkt_alloc_n(ci_netif* ni, int n) CI_HF; extern ci_ip_pkt_fmt* ci_pkt_alloc_n_nnl(ci_netif* ni, int n) CI_HF; - /********************************************************************* ******************************** UDP ******************************** *********************************************************************/ @@ -677,20 +671,21 @@ extern ci_ip_pkt_fmt* ci_pkt_alloc_n_nnl(ci_netif* ni, int n) CI_HF; * boundary requirements (multiple of 64 bits) */ /* How much payload space in a first fragment packet */ -#define UDP_PAYLOAD1_SPACE_PMTU(af, pmtu) \ - (((pmtu) - CI_IPX_HDR_SIZE(af) - CI_IPX_FRAG_HDR_SIZE(af) - \ - sizeof(ci_udp_hdr)) & 0xfff8) +#define UDP_PAYLOAD1_SPACE_PMTU(af, pmtu) \ + (((pmtu) -CI_IPX_HDR_SIZE(af) - CI_IPX_FRAG_HDR_SIZE(af) - \ + sizeof(ci_udp_hdr)) & \ + 0xfff8) /* How much space in a second fragment packet */ #define UDP_PAYLOAD2_SPACE_PMTU(af, pmtu) \ - (((pmtu) - CI_IPX_HDR_SIZE(af) + CI_IPX_FRAG_HDR_SIZE(af)) & 0xfff8) + (((pmtu) -CI_IPX_HDR_SIZE(af) + CI_IPX_FRAG_HDR_SIZE(af)) & 0xfff8) -#define UDP_HAS_SENDQ_SPACE(us,l) \ - ((us)->s.so.sndbuf >= (int)((us)->tx_count + (l))) +#define UDP_HAS_SENDQ_SPACE(us, l) \ + ((us)->s.so.sndbuf >= (int) ((us)->tx_count + (l))) /* Linux sets twice the buffer size that the application requests. */ -#define oo_adjust_SO_XBUF(v) ((v) * 2) +#define oo_adjust_SO_XBUF(v) ((v) *2) /********************************************************************** @@ -699,49 +694,45 @@ extern ci_ip_pkt_fmt* ci_pkt_alloc_n_nnl(ci_netif* ni, int n) CI_HF; #ifdef __KERNEL__ -ci_inline int -oo_spinloop_pause_check_signals(ci_netif* ni, ci_uint64 now_frc, - ci_uint64* schedule_frc, int have_timeout) +ci_inline int oo_spinloop_pause_check_signals( + ci_netif* ni, ci_uint64 now_frc, ci_uint64* schedule_frc, int have_timeout) { - if(CI_UNLIKELY( signal_pending(current) )) + if( CI_UNLIKELY(signal_pending(current)) ) return have_timeout ? -EINTR : -ERESTARTSYS; if( now_frc - *schedule_frc > IPTIMER_STATE(ni)->khz ) { - schedule(); /* schedule() every 1ms */ + schedule(); /* schedule() every 1ms */ *schedule_frc = now_frc; } return 0; } -#define OO_SPINLOOP_PAUSE_CHECK_SIGNALS(ni, now_frc, schedule_frc, \ - have_timeout, w, si) \ +#define OO_SPINLOOP_PAUSE_CHECK_SIGNALS( \ + ni, now_frc, schedule_frc, have_timeout, w, si) \ oo_spinloop_pause_check_signals(ni, now_frc, schedule_frc, have_timeout) #else #include "ci/internal/ip_signal.h" -extern int oo_spinloop_run_pending_sigs(ci_netif*, citp_waitable*, - citp_signal_info*, int) CI_HF; - -ci_inline int -oo_spinloop_pause_check_signals(ci_netif* ni, - ci_uint64 now_frc /*unused*/, - ci_uint64* schedule_frc /*unused*/, - int have_timeout, - citp_waitable* w, citp_signal_info* si) +extern int oo_spinloop_run_pending_sigs( + ci_netif*, citp_waitable*, citp_signal_info*, int) CI_HF; + +ci_inline int oo_spinloop_pause_check_signals(ci_netif* ni, + ci_uint64 now_frc /*unused*/, ci_uint64* schedule_frc /*unused*/, + int have_timeout, citp_waitable* w, citp_signal_info* si) { ci_assert_gt(si->c.inside_lib, 0); ci_assert(~si->c.aflags & OO_SIGNAL_FLAG_FDTABLE_LOCKED); - if(CI_LIKELY( ! (si->c.aflags & OO_SIGNAL_FLAG_HAVE_PENDING) )) + if( CI_LIKELY(! (si->c.aflags & OO_SIGNAL_FLAG_HAVE_PENDING)) ) return 0; else return oo_spinloop_run_pending_sigs(ni, w, si, have_timeout); } -#define OO_SPINLOOP_PAUSE_CHECK_SIGNALS(ni, now_frc, schedule_frc, \ - have_timeout, w, si) \ - oo_spinloop_pause_check_signals(ni, now_frc, schedule_frc, \ - have_timeout, w, si) +#define OO_SPINLOOP_PAUSE_CHECK_SIGNALS( \ + ni, now_frc, schedule_frc, have_timeout, w, si) \ + oo_spinloop_pause_check_signals( \ + ni, now_frc, schedule_frc, have_timeout, w, si) #endif @@ -779,9 +770,9 @@ static inline int ci_intf_i_to_ifindex(ci_netif* ni, int intf_i) *********************************************************************/ /* Returns true if the packet is freed. */ -ci_inline int/*bool*/ -ci_netif_pkt_release_in_poll(ci_netif* netif, ci_ip_pkt_fmt* pkt, - struct ci_netif_poll_state* ps) +ci_inline int /*bool*/ +ci_netif_pkt_release_in_poll( + ci_netif* netif, ci_ip_pkt_fmt* pkt, struct ci_netif_poll_state* ps) { if( pkt->refcount == 1 ) { /* We are going to free the packet, so it is not in use @@ -794,57 +785,55 @@ ci_netif_pkt_release_in_poll(ci_netif* netif, ci_ip_pkt_fmt* pkt, __ci_netif_pkt_clean(pkt); if( ! (pkt->flags & CI_PKT_FLAG_NONB_POOL) ) { ci_netif_pkt_put(netif, pkt); - } - else if( ps != NULL ) { + } else if( ps != NULL ) { *ps->tx_pkt_free_list_insert = OO_PKT_P(pkt); ps->tx_pkt_free_list_insert = &pkt->next; ++ps->tx_pkt_free_list_n; - } - else { + } else { ci_netif_pkt_free_nonb_list(netif, OO_PKT_P(pkt), pkt); - netif->state->n_async_pkts ++; + netif->state->n_async_pkts++; } return CI_TRUE; - } - else { + } else { ci_assert_gt(pkt->refcount, 1); --pkt->refcount; return CI_FALSE; } } - + #ifdef __KERNEL__ extern void ci_netif_set_merge_atomic_flag(ci_netif* ni); -#define CI_NETIF_STATE_MOD(ni, is_locked, field, mod) \ - do { \ - if( is_locked ) { \ - mod##mod ni->state->field; \ - } \ - else { \ - ci_int32 val; \ - do { \ - val = ni->state->atomic_##field; \ - } while( ci_cas32u_fail(&ni->state->atomic_##field, val, val mod 1) );\ - } \ - } while(0) +#define CI_NETIF_STATE_MOD(ni, is_locked, field, mod) \ + do { \ + if( is_locked ) { \ + mod##mod ni->state->field; \ + } else { \ + ci_int32 val; \ + do { \ + val = ni->state->atomic_##field; \ + } while( ci_cas32u_fail(&ni->state->atomic_##field, val, val mod 1) ); \ + } \ + } while( 0 ) #else #define CI_NETIF_STATE_MOD(ni, is_locked, field, mod) \ - do { mod##mod ni->state->field; } while(0) + do { \ + mod##mod ni->state->field; \ + } while( 0 ) #endif -void oo_pkt_calc_checksums(ci_netif* ni, ci_ip_pkt_fmt* pkt, - struct iovec* host_iov); +void oo_pkt_calc_checksums( + ci_netif* ni, ci_ip_pkt_fmt* pkt, struct iovec* host_iov); #ifndef __KERNEL__ -ci_inline void ci_pkt_zc_free_clean(ci_ip_pkt_fmt* pkt, - enum onload_zc_callback_rc cb_rc) +ci_inline void ci_pkt_zc_free_clean( + ci_ip_pkt_fmt* pkt, enum onload_zc_callback_rc cb_rc) { if( ! (cb_rc & ONLOAD_ZC_KEEP) ) { /* Remove the ref we added earlier iff the user didn't retain it */ - pkt->rx_flags &=~ CI_PKT_RX_FLAG_KEEP; - pkt->pio_addr = -1; /* Reset to normal after user_refcount overwrote it */ + pkt->rx_flags &= ~CI_PKT_RX_FLAG_KEEP; + pkt->pio_addr = -1; /* Reset to normal after user_refcount overwrote it */ } } #endif @@ -854,14 +843,10 @@ ci_inline void ci_pkt_zc_free_clean(ci_ip_pkt_fmt* pkt, ****************************** ZC send offloads ********************* *********************************************************************/ -ci_int8 -ci_tcp_offload_zc_send_accum_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, - struct ci_pkt_zc_payload* zcp, - unsigned payload_offset, void* prefix); -ci_uint8 -ci_tcp_offload_zc_send_insert_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, - struct ci_pkt_zc_payload* zcp, - unsigned payload_offset, void* prefix); +ci_int8 ci_tcp_offload_zc_send_accum_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, + struct ci_pkt_zc_payload* zcp, unsigned payload_offset, void* prefix); +ci_uint8 ci_tcp_offload_zc_send_insert_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, + struct ci_pkt_zc_payload* zcp, unsigned payload_offset, void* prefix); #endif /* __CI_LIB_IP_INTERNAL_H__ */ /*! \cidoxg_end */ diff --git a/src/lib/transport/ip/netif.c b/src/lib/transport/ip/netif.c index 9fcddcfbd..23679bbe6 100644 --- a/src/lib/transport/ip/netif.c +++ b/src/lib/transport/ip/netif.c @@ -42,14 +42,14 @@ ci_inline void ci_netif_timeout_set_timer(ci_netif* ni, ci_iptime_t prev_time) for( i = 0; i < OO_TIMEOUT_Q_MAX; i++ ) { ci_tcp_state* ts; struct oo_p_dllink_state timeout_q = - oo_p_dllink_ptr(ni, &ni->state->timeout_q[i]); + oo_p_dllink_ptr(ni, &ni->state->timeout_q[i]); if( oo_p_dllink_is_empty(ni, timeout_q) ) continue; ts = TCP_STATE_FROM_LINK(oo_p_dllink_statep(ni, timeout_q.l->next).l); if( TIME_LE(ts->t_last_sent, prev_time) ) return; - if( !found || TIME_LT(ts->t_last_sent, time) ) { + if( ! found || TIME_LT(ts->t_last_sent, time) ) { found = 1; time = ts->t_last_sent; } @@ -71,11 +71,11 @@ ci_inline void ci_netif_timeout_add(ci_netif* ni, ci_tcp_state* ts, int idx) { int is_first; struct oo_p_dllink_state my_list = - oo_p_dllink_ptr(ni, &ni->state->timeout_q[idx]); + oo_p_dllink_ptr(ni, &ni->state->timeout_q[idx]); struct oo_p_dllink_state other_list = - oo_p_dllink_ptr(ni, &ni->state->timeout_q[1-idx]); + oo_p_dllink_ptr(ni, &ni->state->timeout_q[1 - idx]); struct oo_p_dllink_state link = - oo_p_dllink_sb(ni, &ts->s.b, &ts->timeout_q_link); + oo_p_dllink_sb(ni, &ts->s.b, &ts->timeout_q_link); ci_tcp_state* other_ts; OO_P_DLLINK_ASSERT_EMPTY(ni, link); @@ -104,11 +104,11 @@ void ci_netif_timeout_remove(ci_netif* ni, ci_tcp_state* ts) { int is_first, idx; struct oo_p_dllink_state link = - oo_p_dllink_sb(ni, &ts->s.b, &ts->timeout_q_link); + oo_p_dllink_sb(ni, &ts->s.b, &ts->timeout_q_link); - ci_assert( (ts->s.b.state == CI_TCP_TIME_WAIT) || - ci_tcp_is_timeout_orphan(ts)); - ci_assert( !oo_p_dllink_is_empty(ni, link) ); + ci_assert( + (ts->s.b.state == CI_TCP_TIME_WAIT) || ci_tcp_is_timeout_orphan(ts)); + ci_assert(! oo_p_dllink_is_empty(ni, link)); if( ts->s.b.state == CI_TCP_TIME_WAIT ) idx = OO_TIMEOUT_Q_TIMEWAIT; @@ -131,14 +131,14 @@ void ci_netif_timeout_leave(ci_netif* netif, ci_tcp_state* ts) { ci_assert(netif); ci_assert(ts); - ci_assert( (ts->s.b.state == CI_TCP_TIME_WAIT) || - ci_tcp_is_timeout_orphan(ts) ); + ci_assert( + (ts->s.b.state == CI_TCP_TIME_WAIT) || ci_tcp_is_timeout_orphan(ts)); #ifndef NDEBUG - if (ts->s.b.state == CI_TCP_TIME_WAIT) - LOG_TC(log(LPF "%d TIME_WAIT->CLOSED (2MSL expired)", S_FMT(ts))); + if( ts->s.b.state == CI_TCP_TIME_WAIT ) + LOG_TC(log(LPF "%d TIME_WAIT->CLOSED (2MSL expired)", S_FMT(ts))); else - LOG_TC(log(LPF "%d Droping ORPHANed %s", S_FMT(ts), state_str(ts))); + LOG_TC(log(LPF "%d Droping ORPHANed %s", S_FMT(ts), state_str(ts))); #endif /* drop will call ci_netif_timeout_remove; @@ -161,10 +161,11 @@ void ci_netif_timeout_reap(ci_netif* ni) for( i = 0; i < OO_TIMEOUT_Q_MAX; i++ ) { struct oo_p_dllink_state list = - oo_p_dllink_ptr(ni, &ni->state->timeout_q[i]); + oo_p_dllink_ptr(ni, &ni->state->timeout_q[i]); struct oo_p_dllink_state l, tmp; - oo_p_dllink_for_each_safe(ni, l, tmp, list) { + oo_p_dllink_for_each_safe(ni, l, tmp, list) + { ci_tcp_state* ts = TCP_STATE_FROM_LINK(l.l); #if CI_CFG_FD_CACHING @@ -190,8 +191,7 @@ void ci_netif_timeout_reap(ci_netif* ni) } /*! this is the timeout timer callback function */ -void -ci_netif_timeout_state(ci_netif* ni) +void ci_netif_timeout_state(ci_netif* ni) { int i; @@ -202,12 +202,12 @@ ci_netif_timeout_state(ci_netif* ni) for( i = 0; i < OO_TIMEOUT_Q_MAX; i++ ) { ci_tcp_state* ts; struct oo_p_dllink_state list = - oo_p_dllink_ptr(ni, &ni->state->timeout_q[i]); + oo_p_dllink_ptr(ni, &ni->state->timeout_q[i]); while( ! oo_p_dllink_is_empty(ni, list) ) { ts = TCP_STATE_FROM_LINK(oo_p_dllink_statep(ni, list.l->next).l); - ci_assert( (ts->s.b.state == CI_TCP_TIME_WAIT) || - ci_tcp_is_timeout_orphan(ts) ); + ci_assert( + (ts->s.b.state == CI_TCP_TIME_WAIT) || ci_tcp_is_timeout_orphan(ts)); if( TIME_GT(ts->t_last_sent, ci_ip_time_now(ni)) ) break; /* break from the inner loop */ @@ -231,22 +231,21 @@ ci_netif_timeout_state(ci_netif* ni) * - add back onto timeout list */ -void ci_netif_timeout_restart(ci_netif *ni, ci_tcp_state *ts) +void ci_netif_timeout_restart(ci_netif* ni, ci_tcp_state* ts) { int is_tw = (ts->s.b.state == CI_TCP_TIME_WAIT); ci_assert(ts); - ci_assert( is_tw || ci_tcp_is_timeout_orphan(ts)); + ci_assert(is_tw || ci_tcp_is_timeout_orphan(ts)); /* take it off the list */ ci_netif_timeout_remove(ni, ts); /* store time to leave TIMEWAIT state */ - ts->t_last_sent = ci_ip_time_now(ni) + - ( is_tw ? - NI_CONF(ni).tconst_2msl_time : NI_CONF(ni).tconst_fin_timeout ); + ts->t_last_sent = + ci_ip_time_now(ni) + + (is_tw ? NI_CONF(ni).tconst_2msl_time : NI_CONF(ni).tconst_fin_timeout); /* add to list */ ci_netif_timeout_add( - ni, ts, - is_tw ? OO_TIMEOUT_Q_TIMEWAIT : OO_TIMEOUT_Q_FINWAIT); + ni, ts, is_tw ? OO_TIMEOUT_Q_TIMEWAIT : OO_TIMEOUT_Q_FINWAIT); } @@ -270,7 +269,7 @@ void ci_netif_timewait_enter(ci_netif* ni, ci_tcp_state* ts) /* called before the state is changed to TIME_WAIT */ ci_assert(ts->s.b.state != CI_TCP_TIME_WAIT); /* if already in the timeout list */ - if ( ci_tcp_is_timeout_orphan(ts) ) { + if( ci_tcp_is_timeout_orphan(ts) ) { ci_netif_timeout_remove(ni, ts); } OO_P_DLLINK_ASSERT_EMPTY_SB(ni, &ts->s.b, &ts->timeout_q_link); @@ -292,17 +291,19 @@ int ci_netif_timewait_try_to_free_filter(ci_netif* ni) ci_assert(ci_netif_is_locked(ni)); for( i = 0; i < OO_TIMEOUT_Q_MAX; i++ ) { - struct oo_p_dllink_state list = oo_p_dllink_ptr(ni, &ni->state->timeout_q[i]); + struct oo_p_dllink_state list = + oo_p_dllink_ptr(ni, &ni->state->timeout_q[i]); struct oo_p_dllink_state l, tmp; - oo_p_dllink_for_each_safe(ni, l, tmp, list) { + oo_p_dllink_for_each_safe(ni, l, tmp, list) + { ci_tcp_state* ts = TCP_STATE_FROM_LINK(l.l); if( ts->s.s_flags & CI_SOCK_FLAG_FILTER ) { /* No cached sockets here: orphaned or timewait only. * They really free the hw filter when we drop them. */ - ci_assert( (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN) || - ts->s.b.state == CI_TCP_TIME_WAIT ); + ci_assert((ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN) || + ts->s.b.state == CI_TCP_TIME_WAIT); ci_netif_timeout_leave(ni, ts); CITP_STATS_NETIF(++ni->state->stats.timewait_reap_filter); @@ -335,11 +336,11 @@ int ci_netif_timewait_try_to_free_filter(ci_netif* ni) void ci_netif_fin_timeout_enter(ci_netif* ni, ci_tcp_state* ts) { struct oo_p_dllink_state link = - oo_p_dllink_sb(ni, &ts->s.b, &ts->timeout_q_link); + oo_p_dllink_sb(ni, &ts->s.b, &ts->timeout_q_link); /* check endpoint is an orphan */ #if CI_CFG_FD_CACHING - ci_assert(ts->s.b.sb_aflags & (CI_SB_AFLAG_ORPHAN|CI_SB_AFLAG_IN_CACHE)); + ci_assert(ts->s.b.sb_aflags & (CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_IN_CACHE)); #else ci_assert(ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN); #endif @@ -368,9 +369,8 @@ void ci_netif_fin_timeout_enter(ci_netif* ni, ci_tcp_state* ts) } -static int ci_netif_try_to_reap_udp_recv_q(ci_netif* ni, - ci_udp_recv_q* recv_q, - int* add_to_reap_list) +static int ci_netif_try_to_reap_udp_recv_q( + ci_netif* ni, ci_udp_recv_q* recv_q, int* add_to_reap_list) { int freed_n; ci_uint32 reaped_b4 = recv_q->pkts_reaped; @@ -387,14 +387,14 @@ void ci_netif_try_to_reap(ci_netif* ni, int stop_once_freed_n) /* Look for packet buffers that can be reaped. */ struct oo_p_dllink_state reap_list = - oo_p_dllink_ptr(ni, &ni->state->reap_list); + oo_p_dllink_ptr(ni, &ni->state->reap_list); struct oo_p_dllink_state lnk, tmp; oo_p lnk_to_stop = OO_P_NULL; citp_waitable_obj* wo; int freed_n = 0; int add_to_reap_list; - int reap_harder = ni->packets->sets_n == ni->packets->sets_max - || ni->state->mem_pressure; + int reap_harder = + ni->packets->sets_n == ni->packets->sets_max || ni->state->mem_pressure; if( oo_p_dllink_is_empty(ni, reap_list) ) return; @@ -405,7 +405,8 @@ void ci_netif_try_to_reap(ci_netif* ni, int stop_once_freed_n) */ stop_once_freed_n <<= 1u; - oo_p_dllink_for_each_safe(ni, lnk, tmp, reap_list) { + oo_p_dllink_for_each_safe(ni, lnk, tmp, reap_list) + { add_to_reap_list = 0; if( lnk.p == lnk_to_stop ) @@ -422,8 +423,8 @@ void ci_netif_try_to_reap(ci_netif* ni, int stop_once_freed_n) freed_n += q_num_b4 - ts->recv1.num; #if CI_CFG_TIMESTAMPING - freed_n += ci_netif_try_to_reap_udp_recv_q(ni, &ts->timestamp_q, - &add_to_reap_list); + freed_n += ci_netif_try_to_reap_udp_recv_q( + ni, &ts->timestamp_q, &add_to_reap_list); #endif /* Try to reap the last packet */ @@ -436,18 +437,17 @@ void ci_netif_try_to_reap(ci_netif* ni, int stop_once_freed_n) } if( ts->recv1.num > 1 ) add_to_reap_list = 1; - } - else if( wo->waitable.state == CI_TCP_STATE_UDP ) { + } else if( wo->waitable.state == CI_TCP_STATE_UDP ) { ci_udp_state* us = &wo->udp; - freed_n += ci_netif_try_to_reap_udp_recv_q(ni, &us->recv_q, - &add_to_reap_list); + freed_n += + ci_netif_try_to_reap_udp_recv_q(ni, &us->recv_q, &add_to_reap_list); #if CI_CFG_TIMESTAMPING - freed_n += ci_netif_try_to_reap_udp_recv_q(ni, &us->timestamp_q, - &add_to_reap_list); + freed_n += ci_netif_try_to_reap_udp_recv_q( + ni, &us->timestamp_q, &add_to_reap_list); #endif } - if( add_to_reap_list) { + if( add_to_reap_list ) { oo_p_dllink_add_tail(ni, reap_list, lnk); if( OO_P_IS_NULL(lnk_to_stop) ) lnk_to_stop = lnk.p; @@ -461,8 +461,7 @@ void ci_netif_try_to_reap(ci_netif* ni, int stop_once_freed_n) * because it uses stop_once_freed_n=1. */ freed_n += ci_netif_pkt_try_to_free(ni, 0, stop_once_freed_n - freed_n); if( freed_n < (stop_once_freed_n >> 1) && reap_harder ) { - freed_n += ci_netif_pkt_try_to_free(ni, 1, - stop_once_freed_n - freed_n); + freed_n += ci_netif_pkt_try_to_free(ni, 1, stop_once_freed_n - freed_n); } } @@ -470,8 +469,7 @@ void ci_netif_try_to_reap(ci_netif* ni, int stop_once_freed_n) } -void ci_netif_rxq_low_on_recv(ci_netif* ni, ci_sock_cmn* s, - int bytes_freed) +void ci_netif_rxq_low_on_recv(ci_netif* ni, ci_sock_cmn* s, int bytes_freed) { /* Called by the recv() paths when [ni->state->rxq_low] is non-zero. It * is moderately hard to track exactly how many packet buffers were freed @@ -505,8 +503,7 @@ void ci_netif_rxq_low_on_recv(ci_netif* ni, ci_sock_cmn* s, #if CI_CFG_TIMESTAMPING ci_udp_recv_q_reap(ni, &SOCK_TO_UDP(s)->timestamp_q); #endif - } - else if( s->b.state & CI_TCP_STATE_TCP_CONN ) { + } else if( s->b.state & CI_TCP_STATE_TCP_CONN ) { ci_tcp_rx_reap_rxq_bufs(ni, SOCK_TO_TCP(s)); #if CI_CFG_TIMESTAMPING ci_udp_recv_q_reap(ni, &SOCK_TO_TCP(s)->timestamp_q); @@ -521,7 +518,7 @@ void ci_netif_rxq_low_on_recv(ci_netif* ni, ci_sock_cmn* s, CITP_STATS_NETIF_INC(ni, memory_pressure_exit_recv); OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - ci_netif_rx_post_all_batch(ni, intf_i); + ci_netif_rx_post_all_batch(ni, intf_i); CITP_STATS_NETIF_INC(ni, rx_refill_recv); ci_netif_unlock(ni); } @@ -533,7 +530,7 @@ void ci_netif_mem_pressure_pkt_pool_fill(ci_netif* ni) ci_ip_pkt_fmt* pkt; int intf_i, n = 0; OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - n += (2*CI_CFG_RX_DESC_BATCH); + n += (2 * CI_CFG_RX_DESC_BATCH); while( ni->state->mem_pressure_pkt_pool_n < n && (pkt = ci_netif_pkt_alloc(ni, 0)) != NULL ) { pkt->flags |= CI_PKT_FLAG_RX; @@ -572,7 +569,7 @@ static void ci_netif_mem_pressure_enter_critical(ci_netif* ni, int intf_i) CITP_STATS_NETIF_INC(ni, memory_pressure_enter); ni->state->mem_pressure |= OO_MEM_PRESSURE_CRITICAL; - ni->state->rxq_limit = 2*CI_CFG_RX_DESC_BATCH; + ni->state->rxq_limit = 2 * CI_CFG_RX_DESC_BATCH; ci_netif_mem_pressure_pkt_pool_use(ni); ci_netif_rx_post_all_batch(ni, intf_i); } @@ -597,7 +594,8 @@ int ci_netif_mem_pressure_try_exit(ci_netif* ni) int intf_i, pkts_needed = 0; ci_ip_pkt_fmt* pkt; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { ef_vi* vi = ci_netif_vi(ni, intf_i); pkts_needed += NI_OPTS(ni).rxq_limit - ef_vi_receive_fill_level(vi); } @@ -639,8 +637,8 @@ int ci_netif_mem_pressure_try_exit(ci_netif* ni) * *--------------------------------------------------------------------*/ -static int __ci_netif_rx_post(ci_netif* ni, ef_vi* vi, int intf_i, - int bufset_id, int max) +static int __ci_netif_rx_post( + ci_netif* ni, ef_vi* vi, int intf_i, int bufset_id, int max) { ci_ip_pkt_fmt* pkt; int i; @@ -665,8 +663,8 @@ static int __ci_netif_rx_post(ci_netif* ni, ef_vi* vi, int intf_i, pkt->intf_i = intf_i; pkt->pkt_start_off = ef_vi_receive_prefix_len(vi); ci_netif_poison_rx_pkt(pkt); - ef_vi_receive_init(vi, pkt_dma_addr_bufset(ni, pkt, intf_i, bufset), - OO_PKT_ID(pkt)); + ef_vi_receive_init( + vi, pkt_dma_addr_bufset(ni, pkt, intf_i, bufset), OO_PKT_ID(pkt)); #ifdef __powerpc__ { /* Flush RX buffer from cache. This saves significant latency when @@ -692,7 +690,7 @@ static int __ci_netif_rx_post(ci_netif* ni, ef_vi* vi, int intf_i, } ni->packets->set[bufset_id].n_free -= CI_CFG_RX_DESC_BATCH; ni->packets->n_free -= CI_CFG_RX_DESC_BATCH; - ni->state->n_rx_pkts += CI_CFG_RX_DESC_BATCH; + ni->state->n_rx_pkts += CI_CFG_RX_DESC_BATCH; ef_vi_receive_push(vi); posted += CI_CFG_RX_DESC_BATCH; } while( max - posted >= CI_CFG_RX_DESC_BATCH ); @@ -701,7 +699,7 @@ static int __ci_netif_rx_post(ci_netif* ni, ef_vi* vi, int intf_i, } -#define low_thresh(ni) ((ni)->state->rxq_limit / 2) +#define low_thresh(ni) ((ni)->state->rxq_limit / 2) int ci_netif_rx_post(ci_netif* netif, int intf_i, ef_vi* vi) @@ -729,7 +727,7 @@ int ci_netif_rx_post(ci_netif* netif, int intf_i, ef_vi* vi) rx_allowed = NI_OPTS(netif).max_rx_packets - netif->state->n_rx_pkts; if( max_n_to_post > rx_allowed ) goto rx_limited; - not_rx_limited: +not_rx_limited: ci_assert_ge(max_n_to_post, CI_CFG_RX_DESC_BATCH); /* We could have enough packets in all sets together, but we need them @@ -737,7 +735,7 @@ int ci_netif_rx_post(ci_netif* netif, int intf_i, ef_vi* vi) if( netif->packets->set[bufset_id].n_free < CI_CFG_RX_DESC_BATCH ) goto find_new_bufset; - good_bufset: +good_bufset: do { int n; n_to_post = CI_MIN(max_n_to_post, netif->packets->set[bufset_id].n_free); @@ -748,25 +746,23 @@ int ci_netif_rx_post(ci_netif* netif, int intf_i, ef_vi* vi) if( max_n_to_post < CI_CFG_RX_DESC_BATCH ) { if( bufset_id != netif->packets->id ) { - ci_netif_pkt_set_change(netif, bufset_id, - ask_for_more_packets); + ci_netif_pkt_set_change(netif, bufset_id, ask_for_more_packets); } CHECK_FREEPKTS(netif); return n_posted; } - find_new_bufset: + find_new_bufset: bufset_id = ci_netif_pktset_best(netif); if( bufset_id == -1 || netif->packets->set[bufset_id].n_free < CI_CFG_RX_DESC_BATCH ) goto not_enough_pkts; - ask_for_more_packets = ci_netif_pkt_set_is_underfilled(netif, - bufset_id); + ask_for_more_packets = ci_netif_pkt_set_is_underfilled(netif, bufset_id); } while( 1 ); /* unreachable */ - rx_limited: +rx_limited: /* [rx_allowed] can go negative. */ if( rx_allowed < 0 ) rx_allowed = 0; @@ -799,7 +795,7 @@ int ci_netif_rx_post(ci_netif* netif, int intf_i, ef_vi* vi) } #endif max_n_to_post = CI_MIN(max_n_to_post, rx_allowed); - if(CI_LIKELY( max_n_to_post >= CI_CFG_RX_DESC_BATCH )) + if( CI_LIKELY(max_n_to_post >= CI_CFG_RX_DESC_BATCH) ) goto not_rx_limited; CITP_STATS_NETIF_INC(netif, refill_rx_limited); #if OO_DO_STACK_POLL @@ -808,7 +804,7 @@ int ci_netif_rx_post(ci_netif* netif, int intf_i, ef_vi* vi) #endif return n_posted; - not_enough_pkts: +not_enough_pkts: /* The best packet set has less than CI_CFG_RX_DESC_BATCH packets. * We should free some packets or allocate a new set. */ @@ -831,8 +827,8 @@ int ci_netif_rx_post(ci_netif* netif, int intf_i, ef_vi* vi) if( netif->packets->sets_n < netif->packets->sets_max && ci_tcp_helper_more_bufs(netif) == 0 ) { bufset_id = netif->packets->sets_n - 1; - ci_assert_equal(netif->packets->set[bufset_id].n_free, - 1 << CI_CFG_PKTS_PER_SET_S); + ci_assert_equal( + netif->packets->set[bufset_id].n_free, 1 << CI_CFG_PKTS_PER_SET_S); ask_for_more_packets = 0; goto good_bufset; } @@ -871,24 +867,25 @@ static void citp_waitable_deferred_work(ci_netif* ni, citp_waitable* w) /* This happens when we move socket and continue to use it from another * thread or signal handler */ ci_log("%s: unexpected status %s for socket [%d:%d]", __func__, - ci_tcp_state_str(wo->waitable.state), NI_ID(ni), w->bufid); + ci_tcp_state_str(wo->waitable.state), NI_ID(ni), w->bufid); } } int ci_netif_lock_or_defer_work(ci_netif* ni, citp_waitable* w) { -#if CI_CFG_FD_CACHING && !defined(NDEBUG) - /* Cached sockets should not be deferring work - there are no user references +#if CI_CFG_FD_CACHING && ! defined(NDEBUG) + /* Cached sockets should not be deferring work - there are no user + * references */ - if( (w->state & CI_TCP_STATE_TCP) && !(w->state == CI_TCP_LISTEN) ) - ci_assert(!ci_tcp_is_cached(&CI_CONTAINER(citp_waitable_obj, - waitable, w)->tcp)); + if( (w->state & CI_TCP_STATE_TCP) && ! (w->state == CI_TCP_LISTEN) ) + ci_assert(! ci_tcp_is_cached( + &CI_CONTAINER(citp_waitable_obj, waitable, w)->tcp)); #endif - /* Orphaned sockets should not be deferring work - no-one has a reference to - * them, and the queue link can be used for other things. + /* Orphaned sockets should not be deferring work - no-one has a reference + * to them, and the queue link can be used for other things. */ - ci_assert(!(w->sb_aflags & CI_SB_AFLAG_ORPHAN)); + ci_assert(! (w->sb_aflags & CI_SB_AFLAG_ORPHAN)); if( ni->state->defer_work_count >= NI_OPTS(ni).defer_work_limit ) { int rc = ci_netif_lock(ni); @@ -945,8 +942,7 @@ int ci_netif_lock_or_defer_work(ci_netif* ni, citp_waitable* w) citp_waitable_deferred_work(ni, w); return 1; } - } - else { + } else { w->next_id = v & CI_EPLOCK_NETIF_SOCKET_LIST; new_v = (v & ~CI_EPLOCK_NETIF_SOCKET_LIST) | (W_ID(w) + 1); if( ci_cas64u_succeed(&ni->state->lock.lock, v, new_v) ) { @@ -958,8 +954,8 @@ int ci_netif_lock_or_defer_work(ci_netif* ni, citp_waitable* w) } -static void ci_netif_perform_deferred_socket_work(ci_netif* ni, - unsigned sock_id) +static void ci_netif_perform_deferred_socket_work( + ci_netif* ni, unsigned sock_id) { citp_waitable* w; oo_sp sockp; @@ -976,8 +972,7 @@ static void ci_netif_perform_deferred_socket_work(ci_netif* ni, CITP_STATS_NETIF(++ni->state->stats.deferred_work); citp_waitable_deferred_work(ni, w); - } - while( sock_id > 0 ); + } while( sock_id > 0 ); } @@ -988,10 +983,10 @@ ci_uint64 ci_netif_purge_deferred_socket_list(ci_netif* ni) ci_assert(ci_netif_is_locked(ni)); while( (l = ni->state->lock.lock) & CI_EPLOCK_NETIF_SOCKET_LIST ) { - if( ci_cas64u_succeed(&ni->state->lock.lock, l, - l &~ CI_EPLOCK_NETIF_SOCKET_LIST) ) - ci_netif_perform_deferred_socket_work(ni, - l & CI_EPLOCK_NETIF_SOCKET_LIST); + if( ci_cas64u_succeed( + &ni->state->lock.lock, l, l & ~CI_EPLOCK_NETIF_SOCKET_LIST) ) + ci_netif_perform_deferred_socket_work( + ni, l & CI_EPLOCK_NETIF_SOCKET_LIST); /* It is not possible to clear defer_work_count atomically together * with NETIF_SOCKET_LIST. We can do it before or after. @@ -1007,10 +1002,10 @@ ci_uint64 ci_netif_purge_deferred_socket_list(ci_netif* ni) void ci_netif_merge_atomic_counters(ci_netif* ni) { ci_int32 val; -#define merge(ni, field) \ +#define merge(ni, field) \ do { \ val = ni->state->atomic_##field; \ - } while( ci_cas32_fail(&ni->state->atomic_##field, val, 0) );\ + } while( ci_cas32_fail(&ni->state->atomic_##field, val, 0) ); \ ni->state->field += val; merge(ni, n_rx_pkts); @@ -1022,7 +1017,8 @@ void ci_netif_merge_atomic_counters(ci_netif* ni) #if CI_CFG_UL_INTERRUPT_HELPER static #endif -int oo_want_proactive_packet_allocation(ci_netif* ni) + int + oo_want_proactive_packet_allocation(ci_netif* ni) { ci_uint32 current_free; @@ -1063,11 +1059,12 @@ int oo_want_proactive_packet_allocation(ci_netif* ni) } CITP_STATS_NETIF_INC(ni, proactive_packet_allocation); - LOG_NC(ci_log("%s: [%d] proactive packet allocation: " - "%d sets n_freepkts=%d free_packets_low=%d " - "current_set.n_free=%d", __func__, NI_ID(ni), - pkt_sets_n(ni), ni->packets->n_free, - NI_OPTS(ni).free_packets_low, current_free)); + LOG_NC( + ci_log("%s: [%d] proactive packet allocation: " + "%d sets n_freepkts=%d free_packets_low=%d " + "current_set.n_free=%d", + __func__, NI_ID(ni), pkt_sets_n(ni), ni->packets->n_free, + NI_OPTS(ni).free_packets_low, current_free)); return 1; } @@ -1077,8 +1074,8 @@ int oo_want_proactive_packet_allocation(ci_netif* ni) * flags_to_handle will be cleared from lock and return value * unless work failed/need redoing. */ -ci_uint64 ci_netif_unlock_slow_common(ci_netif* ni, ci_uint64 lock_val, - ci_uint64 flags_to_handle) +ci_uint64 ci_netif_unlock_slow_common( + ci_netif* ni, ci_uint64 lock_val, ci_uint64 flags_to_handle) { ci_uint64 set_flags = 0; ci_uint64 test_val; @@ -1095,8 +1092,8 @@ ci_uint64 ci_netif_unlock_slow_common(ci_netif* ni, ci_uint64 lock_val, /* Clear all flags before we handle them, to avoid racing against other * threads that set those flags. (note: SOCKET_LIST got handled above) */ - lock_val = ef_eplock_clear_flags(&ni->state->lock, - flags_to_handle & ~CI_EPLOCK_NETIF_SOCKET_LIST); + lock_val = ef_eplock_clear_flags( + &ni->state->lock, flags_to_handle & ~CI_EPLOCK_NETIF_SOCKET_LIST); /* Restrict work below to what has been requested */ test_val = lock_val & flags_to_handle; @@ -1105,8 +1102,7 @@ ci_uint64 ci_netif_unlock_slow_common(ci_netif* ni, ci_uint64 lock_val, if( ci_netif_pkt_tx_can_alloc_now(ni) ) { set_flags |= CI_EPLOCK_NETIF_PKT_WAKE; CITP_STATS_NETIF_INC(ni, unlock_slow_pkt_waiter); - } - else { + } else { set_flags |= CI_EPLOCK_NETIF_IS_PKT_WAITER; } } @@ -1125,7 +1121,7 @@ ci_uint64 ci_netif_unlock_slow_common(ci_netif* ni, ci_uint64 lock_val, set_flags |= CI_EPLOCK_NETIF_NEED_PRIME; } -#if CI_CFG_UL_INTERRUPT_HELPER && ! defined (__KERNEL__) +#if CI_CFG_UL_INTERRUPT_HELPER && ! defined(__KERNEL__) if( test_val & CI_EPLOCK_NETIF_CLOSE_ENDPOINT ) { ci_netif_close_pending(ni); } @@ -1133,7 +1129,7 @@ ci_uint64 ci_netif_unlock_slow_common(ci_netif* ni, ci_uint64 lock_val, if( test_val & CI_EPLOCK_NETIF_NEED_WAKE ) { /* Tell kernel to wake up endpoints */ struct oo_p_dllink_state post_poll_list = - oo_p_dllink_ptr(ni, &ni->state->post_poll_list); + oo_p_dllink_ptr(ni, &ni->state->post_poll_list); struct oo_p_dllink_state lnk, tmp_lnk; citp_waitable* w; struct oo_wakeup_eps op; @@ -1142,7 +1138,8 @@ ci_uint64 ci_netif_unlock_slow_common(ci_netif* ni, ci_uint64 lock_val, op.eps_num = 0; CI_USER_PTR_SET(op.eps, eps); - oo_p_dllink_for_each_safe(ni, lnk, tmp_lnk, post_poll_list) { + oo_p_dllink_for_each_safe(ni, lnk, tmp_lnk, post_poll_list) + { oo_p_dllink_del_init(ni, lnk); w = CI_CONTAINER(citp_waitable, post_poll_link, lnk.l); @@ -1153,17 +1150,15 @@ ci_uint64 ci_netif_unlock_slow_common(ci_netif* ni, ci_uint64 lock_val, * one go. */ if( op.eps_num == sizeof(eps) / sizeof(eps[0]) ) { - oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_WAKEUP_WAITERS, &op); + oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_WAKEUP_WAITERS, &op); op.eps_num = 0; - } } if( op.eps_num != 0 ) - oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_WAKEUP_WAITERS, &op); - + oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_WAKEUP_WAITERS, &op); } if( test_val & CI_EPLOCK_NETIF_NEED_PKT_SET || @@ -1212,12 +1207,12 @@ static void ci_netif_unlock_slow(ci_netif* ni) int intf_i; int rc = 0; ci_uint64 all_handled_flags = - CI_EPLOCK_NETIF_UL_MASK | CI_EPLOCK_NETIF_SOCKET_LIST; + CI_EPLOCK_NETIF_UL_MASK | CI_EPLOCK_NETIF_SOCKET_LIST; if( ~ni->state->flags & CI_NETIF_FLAG_EVQ_KERNEL_PRIME_ONLY ) all_handled_flags |= CI_EPLOCK_NETIF_NEED_PRIME; - ci_assert(ci_netif_is_locked(ni)); /* double unlock? */ + ci_assert(ci_netif_is_locked(ni)); /* double unlock? */ CITP_STATS_NETIF_INC(ni, unlock_slow); @@ -1231,31 +1226,30 @@ static void ci_netif_unlock_slow(ci_netif* ni) CITP_STATS_NETIF_INC(ni, unlock_slow_prime_ul); ci_assert(NI_OPTS(ni).int_driven); /* TODO: When interrupt driven, evq_primed is never cleared, so we - * don't know here which subset of interfaces needs to be primed. - * Would be more efficient if we did. - */ + * don't know here which subset of interfaces needs to be primed. + * Would be more efficient if we did. + */ OO_STACK_FOR_EACH_INTF_I(ni, intf_i) - ef_eventq_prime(ci_netif_vi(ni, intf_i)); + ef_eventq_prime(ci_netif_vi(ni, intf_i)); } /* If some flags should be handled in kernel, then there is no point in * looping here. Dive! */ - k_flags |= l & ((CI_EPLOCK_NETIF_UNLOCK_FLAGS & ~all_handled_flags) | CI_EPLOCK_FL_NEED_WAKE); + k_flags |= l & ((CI_EPLOCK_NETIF_UNLOCK_FLAGS & ~all_handled_flags) | + CI_EPLOCK_FL_NEED_WAKE); #if ! CI_CFG_UL_INTERRUPT_HELPER if( k_flags != 0 ) break; #else /* In kernel we can handle following flags only: */ - ci_assert_nflags(k_flags, - ~(CI_EPLOCK_NETIF_PKT_WAKE | - CI_EPLOCK_NETIF_NEED_PRIME | - CI_EPLOCK_FL_NEED_WAKE)); + ci_assert_nflags( + k_flags, ~(CI_EPLOCK_NETIF_PKT_WAKE | CI_EPLOCK_NETIF_NEED_PRIME | + CI_EPLOCK_FL_NEED_WAKE)); l = ef_eplock_clear_flags(&ni->state->lock, k_flags); #endif - } while ( !ef_eplock_try_unlock(&ni->state->lock, &l, - CI_EPLOCK_NETIF_UNLOCK_FLAGS | - CI_EPLOCK_NETIF_SOCKET_LIST | - CI_EPLOCK_FL_NEED_WAKE) ); + } while( ! ef_eplock_try_unlock(&ni->state->lock, &l, + CI_EPLOCK_NETIF_UNLOCK_FLAGS | CI_EPLOCK_NETIF_SOCKET_LIST | + CI_EPLOCK_FL_NEED_WAKE) ); /* We've handled everything we needed to, so can return without * dropping to the kernel. @@ -1265,14 +1259,15 @@ static void ci_netif_unlock_slow(ci_netif* ni) CITP_STATS_NETIF_INC(ni, unlock_slow_syscall); #if ! CI_CFG_UL_INTERRUPT_HELPER - rc = oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_EPLOCK_WAKE, NULL); + rc = + oo_resource_op(ci_netif_get_driver_handle(ni), OO_IOC_EPLOCK_WAKE, NULL); #else - rc = oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_EPLOCK_WAKE_AND_DO, &k_flags); + rc = oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_EPLOCK_WAKE_AND_DO, &k_flags); #endif - if( rc < 0 ) LOG_NV(ci_log("%s: rc=%d", __FUNCTION__, rc)); + if( rc < 0 ) + LOG_NV(ci_log("%s: rc=%d", __FUNCTION__, rc)); } #endif /* __KERNEL__ */ @@ -1287,9 +1282,9 @@ void ci_netif_unlock(ci_netif* ni) ci_assert_nflags(ni->state->flags, CI_NETIF_FLAG_PKT_ACCOUNT_PENDING); ci_assert_equal(ni->state->in_poll, 0); - if(CI_LIKELY( ni->state->lock.lock == CI_EPLOCK_LOCKED && - ci_cas64u_succeed(&ni->state->lock.lock, - CI_EPLOCK_LOCKED, 0) )) + if( CI_LIKELY( + ni->state->lock.lock == CI_EPLOCK_LOCKED && + ci_cas64u_succeed(&ni->state->lock.lock, CI_EPLOCK_LOCKED, 0)) ) return; ci_netif_unlock_slow(ni); @@ -1298,7 +1293,7 @@ void ci_netif_unlock(ci_netif* ni) ci_assert_equal(saved_errno, errno); #endif } -#else /* OO_DO_STACK_POLL */ +#else /* OO_DO_STACK_POLL */ /* FIXME Sort it out somehow. * This call is used from: * (1) efab_tcp_helper_sock_sleep() when CI_SLEEP_NETIF_LOCKED is set; @@ -1317,16 +1312,16 @@ void ci_netif_unlock(ci_netif* ni) #endif /* OO_DO_STACK_POLL */ -void ci_netif_error_detected(ci_netif* ni, unsigned error_flag, - const char* caller) +void ci_netif_error_detected( + ci_netif* ni, unsigned error_flag, const char* caller) { if( ni->error_flags & error_flag ) return; - ci_log("%s: ERROR: [%d] runtime error %x detected in %s()", - __FUNCTION__, NI_ID(ni), error_flag, caller); - ci_log("%s: ERROR: [%d] errors detected: %x %x "CI_NETIF_ERRORS_FMT, - __FUNCTION__, NI_ID(ni), ni->error_flags, ni->state->error_flags, - CI_NETIF_ERRORS_PRI_ARG(ni->error_flags | ni->state->error_flags)); + ci_log("%s: ERROR: [%d] runtime error %x detected in %s()", __FUNCTION__, + NI_ID(ni), error_flag, caller); + ci_log("%s: ERROR: [%d] errors detected: %x %x " CI_NETIF_ERRORS_FMT, + __FUNCTION__, NI_ID(ni), ni->error_flags, ni->state->error_flags, + CI_NETIF_ERRORS_PRI_ARG(ni->error_flags | ni->state->error_flags)); ni->error_flags |= error_flag; ni->state->error_flags |= ni->error_flags; } @@ -1341,7 +1336,7 @@ int ci_netif_get_ready_list(ci_netif* ni) ci_netif_lock(ni); do { - if( !((ni->state->ready_lists_in_use >> i) & 1) ) { + if( ! ((ni->state->ready_lists_in_use >> i) & 1) ) { ni->state->ready_list_pid[i] = getpid(); ni->state->ready_lists_in_use |= 1 << i; break; @@ -1353,27 +1348,26 @@ int ci_netif_get_ready_list(ci_netif* ni) } #endif -static inline void -ci_netif_put_ready_list_one(ci_netif* ni, struct oo_p_dllink_state list, - int id) +static inline void ci_netif_put_ready_list_one( + ci_netif* ni, struct oo_p_dllink_state list, int id) { while( ! oo_p_dllink_is_empty(ni, list) ) { struct oo_p_dllink_state lnk = oo_p_dllink_statep(ni, list.l->next); - ci_sb_epoll_state* epoll = CI_CONTAINER(ci_sb_epoll_state, - e[id].ready_link, lnk.l); + ci_sb_epoll_state* epoll = + CI_CONTAINER(ci_sb_epoll_state, e[id].ready_link, lnk.l); oo_p_dllink_del(ni, lnk); oo_p_dllink_init(ni, lnk); - SP_TO_WAITABLE(ni, epoll->sock_id)->ready_lists_in_use &=~ (1 << id); + SP_TO_WAITABLE(ni, epoll->sock_id)->ready_lists_in_use &= ~(1 << id); } } static void ci_netif_put_ready_list_locked(ci_netif* ni, int id) { - ci_netif_put_ready_list_one(ni, oo_p_dllink_ptr(ni, - &ni->state->ready_lists[id]), id); - ci_netif_put_ready_list_one(ni, oo_p_dllink_ptr(ni, - &ni->state->unready_lists[id]), id); + ci_netif_put_ready_list_one( + ni, oo_p_dllink_ptr(ni, &ni->state->ready_lists[id]), id); + ci_netif_put_ready_list_one( + ni, oo_p_dllink_ptr(ni, &ni->state->unready_lists[id]), id); ni->state->ready_lists_in_use &= ~(1 << id); ni->state->ready_list_pid[id] = 0; } @@ -1382,9 +1376,10 @@ void ci_netif_free_ready_lists(ci_netif* ni) { int i; for( i = 0; i < CI_CFG_N_READY_LISTS; i++ ) { - if( (ni->state->ready_list_flags[i] & CI_NI_READY_LIST_FLAG_PENDING_FREE) ) { + if( (ni->state->ready_list_flags[i] & + CI_NI_READY_LIST_FLAG_PENDING_FREE) ) { ci_atomic32_and(&ni->state->ready_list_flags[i], - ~CI_NI_READY_LIST_FLAG_PENDING_FREE); + ~CI_NI_READY_LIST_FLAG_PENDING_FREE); ci_netif_put_ready_list_locked(ni, i); } } @@ -1392,22 +1387,21 @@ void ci_netif_free_ready_lists(ci_netif* ni) void ci_netif_put_ready_list(ci_netif* ni, int id) { - ci_assert(ni->state->ready_lists_in_use & (1 << id)); #ifdef __KERNEL__ ci_assert(current); - if( current->flags & PF_EXITING ? ! ci_netif_trylock(ni) : - ci_netif_lock(ni) ) { - ci_atomic32_or(&ni->state->ready_list_flags[id], - CI_NI_READY_LIST_FLAG_PENDING_FREE); - if(! ef_eplock_lock_or_set_flag(&ni->state->lock, - CI_EPLOCK_NETIF_FREE_READY_LIST) ) { + if( current->flags & PF_EXITING ? ! ci_netif_trylock(ni) + : ci_netif_lock(ni) ) { + ci_atomic32_or( + &ni->state->ready_list_flags[id], CI_NI_READY_LIST_FLAG_PENDING_FREE); + if( ! ef_eplock_lock_or_set_flag( + &ni->state->lock, CI_EPLOCK_NETIF_FREE_READY_LIST) ) { /* lock holder will release the ready list */ return; } - ci_atomic32_and(&ni->state->ready_list_flags[id], - ~CI_NI_READY_LIST_FLAG_PENDING_FREE); + ci_atomic32_and( + &ni->state->ready_list_flags[id], ~CI_NI_READY_LIST_FLAG_PENDING_FREE); } #else ci_netif_lock(ni); @@ -1419,8 +1413,8 @@ void ci_netif_put_ready_list(ci_netif* ni, int id) #ifndef __KERNEL__ -int ci_netif_raw_send(ci_netif* ni, int intf_i, - const ci_iovec *iov, int iovlen) +int ci_netif_raw_send( + ci_netif* ni, int intf_i, const ci_iovec* iov, int iovlen) { ci_ip_pkt_fmt* pkt; ci_uint8* p; @@ -1457,7 +1451,7 @@ int ci_netif_raw_send(ci_netif* ni, int intf_i, pkt->pkt_eth_payload_off = pkt->pkt_start_off + ETH_HLEN + ETH_VLAN_HLEN; #if CI_CFG_IPV6 if( oo_pkt_ether_type(pkt) == CI_ETHERTYPE_IP ) - pkt->flags &=~ CI_PKT_FLAG_IS_IP6; + pkt->flags &= ~CI_PKT_FLAG_IS_IP6; else pkt->flags |= CI_PKT_FLAG_IS_IP6; #endif @@ -1475,29 +1469,98 @@ int ci_netif_raw_send(ci_netif* ni, int intf_i, #if CI_CFG_TCP_SHARED_LOCAL_PORTS -static ci_uint32 __ci_netif_active_wild_hash(ci_netif *ni, - ci_addr_t laddr, ci_uint16 lport, - ci_addr_t raddr, ci_uint16 rport) +static ci_uint32 __ci_netif_active_wild_hash(ci_netif* ni, ci_addr_t laddr, + ci_uint16 lport, ci_addr_t raddr, ci_uint16 rport) { /* FIXME lots of insights into efrm */ /* FIXME this is copy of hash in efrm_vi_set.c */ static const uint8_t rx_hash_key[40] = { - 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, - 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, - 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, - 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, - 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, + 0x6d, + 0x5a, }; #ifndef __KERNEL__ /* We use a transformed key for our optimised Toeplitz hash. */ - __attribute__((aligned(sizeof(ci_uint32)))) - static const uint8_t rx_hash_key_sse[40] = { - 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, - 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, - 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, - 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, - 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, 0xb5, 0x6c, + __attribute__(( + aligned(sizeof(ci_uint32)))) static const uint8_t rx_hash_key_sse[40] = { + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, + 0xb5, + 0x6c, }; #endif @@ -1516,8 +1579,8 @@ static ci_uint32 __ci_netif_active_wild_hash(ci_netif *ni, data.lport_be16 = lport; #ifndef __KERNEL__ - return ci_toeplitz_hash_ul(rx_hash_key, rx_hash_key_sse, (ci_uint8*) &data, - data_size); + return ci_toeplitz_hash_ul( + rx_hash_key, rx_hash_key_sse, (ci_uint8*) &data, data_size); #endif return ci_toeplitz_hash(rx_hash_key, (ci_uint8*) &data, data_size); } @@ -1528,15 +1591,14 @@ static ci_uint32 __ci_netif_active_wild_hash(ci_netif *ni, ci_uint32 laddr_be32; ci_uint16 rport_be16; ci_uint16 lport_be16; - } __attribute__((packed)) data = { - raddr.ip4, laddr.ip4, rport, lport }; + } __attribute__((packed)) data = { raddr.ip4, laddr.ip4, rport, lport }; int data_size = sizeof(data); #ifndef __KERNEL__ - /* N.B.: Only the lower byte is guaranteed to be accurate here, but this is - * good enough for our purposes. */ - return ci_toeplitz_hash_ul(rx_hash_key, rx_hash_key_sse, (ci_uint8*) &data, - data_size); + /* N.B.: Only the lower byte is guaranteed to be accurate here, but this + * is good enough for our purposes. */ + return ci_toeplitz_hash_ul( + rx_hash_key, rx_hash_key_sse, (ci_uint8*) &data, data_size); #endif return ci_toeplitz_hash(rx_hash_key, (ci_uint8*) &data, data_size); } @@ -1545,9 +1607,8 @@ static ci_uint32 __ci_netif_active_wild_hash(ci_netif *ni, /* Returns the index in the NIC's RSS indirection table to which the supplied * four-tuple would be hashed. */ -int ci_netif_active_wild_nic_hash(ci_netif *ni, - ci_addr_t laddr, ci_uint16 lport, - ci_addr_t raddr, ci_uint16 rport) +int ci_netif_active_wild_nic_hash(ci_netif* ni, ci_addr_t laddr, + ci_uint16 lport, ci_addr_t raddr, ci_uint16 rport) { return __ci_netif_active_wild_hash(ni, laddr, lport, raddr, rport) & RSS_HASH_MASK; @@ -1555,8 +1616,8 @@ int ci_netif_active_wild_nic_hash(ci_netif *ni, /* Returns the hash table of active wilds for the specified pool. */ -ci_inline struct oo_p_dllink* -ci_netif_active_wild_pool_table(ci_netif* ni, int aw_pool) +ci_inline struct oo_p_dllink* ci_netif_active_wild_pool_table( + ci_netif* ni, int aw_pool) { ci_assert(ci_netif_is_locked(ni)); ci_assert_lt(aw_pool, ni->state->active_wild_pools_n); @@ -1566,16 +1627,16 @@ ci_netif_active_wild_pool_table(ci_netif* ni, int aw_pool) } -/* Returns the list of active wilds in a specified pool for the specified local - * address. If such a list does not exist, an empty list is returned, into - * which new active wilds for that IP address may be inserted. The list does - * not become 'owned' by that address until it becomes non-empty. This means - * that the stack lock must not be dropped between retrieving the address of a - * list using this function and ceasing to use the returned pointer to that - * list. - * Returns link state with .p==OO_P_NULL in case of failure. */ -struct oo_p_dllink_state -ci_netif_get_active_wild_list(ci_netif* ni, int aw_pool, ci_addr_t laddr) +/* Returns the list of active wilds in a specified pool for the specified + * local address. If such a list does not exist, an empty list is returned, + * into which new active wilds for that IP address may be inserted. The list + * does not become 'owned' by that address until it becomes non-empty. This + * means that the stack lock must not be dropped between retrieving the + * address of a list using this function and ceasing to use the returned + * pointer to that list. Returns link state with .p==OO_P_NULL in case of + * failure. */ +struct oo_p_dllink_state ci_netif_get_active_wild_list( + ci_netif* ni, int aw_pool, ci_addr_t laddr) { struct oo_p_dllink* table = ci_netif_active_wild_pool_table(ni, aw_pool); struct oo_p_dllink_state list; @@ -1583,8 +1644,8 @@ ci_netif_get_active_wild_list(ci_netif* ni, int aw_pool, ci_addr_t laddr) ci_assert(ci_netif_is_locked(ni)); - ci_addr_simple_hash(laddr, ni->state->active_wild_table_entries_n, - &hash1, &hash2); + ci_addr_simple_hash( + laddr, ni->state->active_wild_table_entries_n, &hash1, &hash2); bucket = hash1; list.p = OO_P_NULL; @@ -1594,10 +1655,10 @@ ci_netif_get_active_wild_list(ci_netif* ni, int aw_pool, ci_addr_t laddr) list = oo_p_dllink_ptr(ni, &table[bucket]); - /* If we've found an empty list, it means there's no entry in the table for - * the specified IP address. This empty list is also at the correct - * location for the insertion of a new list for that IP address, and so we - * return it. */ + /* If we've found an empty list, it means there's no entry in the table + * for the specified IP address. This empty list is also at the correct + * location for the insertion of a new list for that IP address, and so + * we return it. */ if( oo_p_dllink_is_empty(ni, list) ) return list; @@ -1608,13 +1669,14 @@ ci_netif_get_active_wild_list(ci_netif* ni, int aw_pool, ci_addr_t laddr) if( CI_IPX_ADDR_EQ(sock_ipx_laddr(&aw->s), laddr) ) return list; - /* This list is for the wrong IP address, so advance to the next bucket. */ + /* This list is for the wrong IP address, so advance to the next bucket. + */ bucket = (bucket + hash2) & (ni->state->active_wild_table_entries_n - 1); } while( bucket != hash1 ); NI_LOG_ONCE(ni, RESOURCE_WARNINGS, - "No space in active wild table %d for local address " - IPX_FMT, aw_pool, IPX_ARG(AF_IP_L3(laddr))); + "No space in active wild table %d for local address " IPX_FMT, aw_pool, + IPX_ARG(AF_IP_L3(laddr))); return list; } @@ -1622,9 +1684,8 @@ ci_netif_get_active_wild_list(ci_netif* ni, int aw_pool, ci_addr_t laddr) #ifndef __KERNEL__ #ifndef NDEBUG -static int __ci_netif_active_wild_rss_ok(ci_netif* ni, - ci_addr_t laddr, ci_uint16 lport, - ci_addr_t raddr, ci_uint16 rport) +static int __ci_netif_active_wild_rss_ok(ci_netif* ni, ci_addr_t laddr, + ci_uint16 lport, ci_addr_t raddr, ci_uint16 rport) { /* This function checks the compatability of a 4-tuple with this stack. * To do so implies we have a destination address and port, have selected @@ -1633,26 +1694,25 @@ static int __ci_netif_active_wild_rss_ok(ci_netif* ni, */ ci_assert_nequal(lport, 0); ci_assert_nequal(rport, 0); - ci_assert(!CI_IPX_ADDR_IS_ANY(laddr)); - ci_assert(!CI_IPX_ADDR_IS_ANY(raddr)); + ci_assert(! CI_IPX_ADDR_IS_ANY(laddr)); + ci_assert(! CI_IPX_ADDR_IS_ANY(raddr)); /* It's always ok if we don't have a multi-instance cluster */ if( ni->state->cluster_size < 2 ) return 1; - if( ci_netif_active_wild_nic_hash(ni, laddr, lport, raddr, rport) - % ni->state->cluster_size == ni->state->rss_instance ) + if( ci_netif_active_wild_nic_hash(ni, laddr, lport, raddr, rport) % + ni->state->cluster_size == + ni->state->rss_instance ) return 1; else return 0; - } #endif static int __ci_netif_active_wild_pool_select(ci_netif* ni, ci_addr_t laddr, - ci_addr_t raddr, ci_uint16 rport, - int offset) + ci_addr_t raddr, ci_uint16 rport, int offset) { ci_uint32 pool_index = 0; ci_uint32 select_hash; @@ -1676,10 +1736,10 @@ static int __ci_netif_active_wild_pool_select(ci_netif* ni, ci_addr_t laddr, * we need to ensure that we don't cause the peer to think we're reopening * that connection. * - * To do that we record the details of the last closed connection on this port - * in a way that would leave the peer in TIME-WAIT (if we're in TIME-WAIT we - * won't re-use the port, as we still have a sw filter for the 4-tuple, if - * the connection is reset then we're ok). + * To do that we record the details of the last closed connection on this + * port in a way that would leave the peer in TIME-WAIT (if we're in + * TIME-WAIT we won't re-use the port, as we still have a sw filter for the + * 4-tuple, if the connection is reset then we're ok). * * When we assign a new port we check if we expect the peer to be out of * TIME-WAIT by now (assuming they're using the same length of timer as us). @@ -1687,29 +1747,26 @@ static int __ci_netif_active_wild_pool_select(ci_netif* ni, ci_addr_t laddr, * keep looking (potentially increasing the pool). */ static int __ci_netif_active_wild_allow_reuse(ci_netif* ni, ci_active_wild* aw, - ci_addr_t laddr, ci_addr_t raddr, - unsigned rport) + ci_addr_t laddr, ci_addr_t raddr, unsigned rport) { if( ci_ip_time_now(ni) > aw->expiry || NI_OPTS(ni).tcp_shared_local_ports_reuse_fast ) return 1; else - return !CI_IPX_ADDR_EQ(aw->last_laddr, laddr) || - !CI_IPX_ADDR_EQ(aw->last_raddr, raddr) || + return ! CI_IPX_ADDR_EQ(aw->last_laddr, laddr) || + ! CI_IPX_ADDR_EQ(aw->last_raddr, raddr) || (aw->last_rport != rport); } static oo_sp __ci_netif_active_wild_pool_get(ci_netif* ni, int aw_pool, - ci_addr_t laddr, ci_addr_t raddr, - unsigned rport, - ci_uint16* port_out, - ci_uint32* prev_seq_out) + ci_addr_t laddr, ci_addr_t raddr, unsigned rport, ci_uint16* port_out, + ci_uint32* prev_seq_out) { ci_active_wild* aw; ci_uint16 lport; ci_addr_t laddr_aw = - NI_OPTS(ni).tcp_shared_local_ports_per_ip ? laddr : addr_any; + NI_OPTS(ni).tcp_shared_local_ports_per_ip ? laddr : addr_any; int af_space = AF_SPACE_FLAG_IP4; oo_sp sp; struct oo_p_dllink_state list; @@ -1731,15 +1788,17 @@ static oo_sp __ci_netif_active_wild_pool_get(ci_netif* ni, int aw_pool, if( list.p == OO_P_NULL ) return OO_SP_NULL; - /* This can happen if active wilds are configured, but we failed to allocate - * any at stack creation time, for example because there were no filters - * available, or if none of them give a valid hash for this 4-tuple. + /* This can happen if active wilds are configured, but we failed to + * allocate any at stack creation time, for example because there were no + * filters available, or if none of them give a valid hash for this + * 4-tuple. */ if( oo_p_dllink_is_empty(ni, list) ) return OO_SP_NULL; last = oo_p_dllink_statep(ni, list.l->prev); - oo_p_dllink_for_each_safe(ni, link, tmp, list) { + oo_p_dllink_for_each_safe(ni, link, tmp, list) + { oo_p_dllink_del(ni, link); oo_p_dllink_add_tail(ni, list, link); @@ -1753,8 +1812,8 @@ static oo_sp __ci_netif_active_wild_pool_get(ci_netif* ni, int aw_pool, */ ci_assert(__ci_netif_active_wild_rss_ok(ni, laddr, lport, raddr, rport)); - sp = ci_netif_filter_lookup(ni, af_space, laddr, lport, raddr, rport, - sock_protocol(&aw->s)); + sp = ci_netif_filter_lookup( + ni, af_space, laddr, lport, raddr, rport, sock_protocol(&aw->s)); if( OO_SP_NOT_NULL(sp) ) { ci_sock_cmn* s = ID_TO_SOCK(ni, sp); @@ -1766,12 +1825,13 @@ static oo_sp __ci_netif_active_wild_pool_get(ci_netif* ni, int aw_pool, */ ci_tcp_state* ts = SOCK_TO_TCP(s); CITP_STATS_NETIF_INC(ni, tcp_shared_local_ports_reused_tw); - /* Setting *prev_seq_out to zero indicates to the caller that it should - * fall back to the clock-driven ISN. However, sometimes we really do - * want to report a previous sequence number of zero. To work around - * this, report a value of 1 in such cases. This is valid in practice, - * as the purpose of this is to allow the selection of an ISN for the - * next connection that is greater in sequence space than the old one. + /* Setting *prev_seq_out to zero indicates to the caller that it + * should fall back to the clock-driven ISN. However, sometimes we + * really do want to report a previous sequence number of zero. To + * work around this, report a value of 1 in such cases. This is + * valid in practice, as the purpose of this is to allow the + * selection of an ISN for the next connection that is greater in + * sequence space than the old one. */ seq = ts->snd_nxt + NI_OPTS(ni).tcp_isn_offset; if( seq == 0 ) @@ -1797,9 +1857,8 @@ static oo_sp __ci_netif_active_wild_pool_get(ci_netif* ni, int aw_pool, } CITP_STATS_NETIF_INC(ni, tcp_shared_local_ports_skipped_in_use); - } - else if( __ci_netif_active_wild_allow_reuse(ni, aw, laddr, - raddr, rport) ) { + } else if( __ci_netif_active_wild_allow_reuse( + ni, aw, laddr, raddr, rport) ) { /* If no-one's using this 4-tuple we can let the caller share this * active wild. */ @@ -1820,9 +1879,8 @@ static oo_sp __ci_netif_active_wild_pool_get(ci_netif* ni, int aw_pool, static oo_sp __ci_netif_active_wild_get(ci_netif* ni, ci_addr_t laddr, - ci_addr_t raddr, unsigned rport, - ci_uint16* port_out, - ci_uint32* prev_seq_out) + ci_addr_t raddr, unsigned rport, ci_uint16* port_out, + ci_uint32* prev_seq_out) { int aw_pool; int offset; @@ -1833,10 +1891,10 @@ static oo_sp __ci_netif_active_wild_get(ci_netif* ni, ci_addr_t laddr, for( offset = ni->state->rss_instance; offset < ni->state->active_wild_pools_n; offset += ni->state->cluster_size ) { - aw_pool = __ci_netif_active_wild_pool_select(ni, laddr, raddr, rport, - offset); - aw = __ci_netif_active_wild_pool_get(ni, aw_pool, laddr, raddr, rport, - port_out, prev_seq_out); + aw_pool = + __ci_netif_active_wild_pool_select(ni, laddr, raddr, rport, offset); + aw = __ci_netif_active_wild_pool_get( + ni, aw_pool, laddr, raddr, rport, port_out, prev_seq_out); if( aw != OO_SP_NULL ) break; } @@ -1845,9 +1903,8 @@ static oo_sp __ci_netif_active_wild_get(ci_netif* ni, ci_addr_t laddr, } -oo_sp ci_netif_active_wild_get(ci_netif* ni, ci_addr_t laddr, - ci_addr_t raddr, unsigned rport, - ci_uint16* port_out, ci_uint32* prev_seq_out) +oo_sp ci_netif_active_wild_get(ci_netif* ni, ci_addr_t laddr, ci_addr_t raddr, + unsigned rport, ci_uint16* port_out, ci_uint32* prev_seq_out) { oo_sp active_wild; @@ -1856,31 +1913,29 @@ oo_sp ci_netif_active_wild_get(ci_netif* ni, ci_addr_t laddr, if( ! ci_netif_should_allocate_tcp_shared_local_ports(ni) ) return OO_SP_NULL; - active_wild = __ci_netif_active_wild_get(ni, laddr, raddr, rport, - port_out, prev_seq_out); + active_wild = __ci_netif_active_wild_get( + ni, laddr, raddr, rport, port_out, prev_seq_out); /* If we failed to get an active wild try and grow the pool */ while( active_wild == OO_SP_NULL && ni->state->active_wild_n < NI_OPTS(ni).tcp_shared_local_ports_max ) { int rc; ci_addr_t laddr_aw = - NI_OPTS(ni).tcp_shared_local_ports_per_ip ? laddr : addr_any; - LOG_TC(ci_log(FN_FMT "Didn't get active wild, getting more", - FN_PRI_ARGS(ni))); + NI_OPTS(ni).tcp_shared_local_ports_per_ip ? laddr : addr_any; + LOG_TC(ci_log( + FN_FMT "Didn't get active wild, getting more", FN_PRI_ARGS(ni))); rc = ci_tcp_helper_alloc_active_wild(ni, laddr_aw); if( rc >= 0 ) { CITP_STATS_NETIF_INC(ni, tcp_shared_local_ports_grow); - active_wild = __ci_netif_active_wild_get(ni, laddr, raddr, rport, - port_out, prev_seq_out); - } - else if( rc == -ENOBUFS ) { + active_wild = __ci_netif_active_wild_get( + ni, laddr, raddr, rport, port_out, prev_seq_out); + } else if( rc == -ENOBUFS ) { break; - } - else { - LOG_TC(ci_log(FN_FMT "Alloc active wild for "IPX_FMT":0 " - IPX_FMT":%u FAILED - rc %d", - FN_PRI_ARGS(ni), IPX_ARG(AF_IP(laddr)), - IPX_ARG(AF_IP(raddr)), htons(rport), rc)); + } else { + LOG_TC(ci_log(FN_FMT "Alloc active wild for " IPX_FMT ":0 " IPX_FMT + ":%u FAILED - rc %d", + FN_PRI_ARGS(ni), IPX_ARG(AF_IP(laddr)), IPX_ARG(AF_IP(raddr)), + htons(rport), rc)); CITP_STATS_NETIF_INC(ni, tcp_shared_local_ports_grow_failed); break; } @@ -1888,18 +1943,16 @@ oo_sp ci_netif_active_wild_get(ci_netif* ni, ci_addr_t laddr, if( active_wild != OO_SP_NULL ) { CITP_STATS_NETIF_INC(ni, tcp_shared_local_ports_used); - LOG_TC(ci_log(FN_FMT "Lookup active wild for "IPX_FMT":0 " - IPX_FMT":%u FOUND - lport %u", - FN_PRI_ARGS(ni), IPX_ARG(AF_IP(laddr)), - IPX_ARG(AF_IP(raddr)), - htons(rport), htons(*port_out))); - } - else { + LOG_TC(ci_log(FN_FMT "Lookup active wild for " IPX_FMT ":0 " IPX_FMT + ":%u FOUND - lport %u", + FN_PRI_ARGS(ni), IPX_ARG(AF_IP(laddr)), IPX_ARG(AF_IP(raddr)), + htons(rport), htons(*port_out))); + } else { CITP_STATS_NETIF_INC(ni, tcp_shared_local_ports_exhausted); - LOG_TC(ci_log(FN_FMT "Lookup active wild for "IPX_FMT":0 " - IPX_FMT":%u NOT AVAILABLE", - FN_PRI_ARGS(ni), IPX_ARG(AF_IP(laddr)), - IPX_ARG(AF_IP(raddr)), htons(rport))); + LOG_TC(ci_log(FN_FMT "Lookup active wild for " IPX_FMT ":0 " IPX_FMT + ":%u NOT AVAILABLE", + FN_PRI_ARGS(ni), IPX_ARG(AF_IP(laddr)), IPX_ARG(AF_IP(raddr)), + htons(rport))); } return active_wild; } @@ -1913,9 +1966,8 @@ void ci_netif_active_wild_sharer_closed(ci_netif* ni, ci_sock_cmn* s) oo_sp id; ci_active_wild* aw; - id = ci_netif_filter_lookup(ni, sock_af_space(s), - sock_ipx_laddr(s), sock_lport_be16(s), - addr_any, 0, sock_protocol(s)); + id = ci_netif_filter_lookup(ni, sock_af_space(s), sock_ipx_laddr(s), + sock_lport_be16(s), addr_any, 0, sock_protocol(s)); if( OO_SP_NOT_NULL(id) ) { aw = SP_TO_ACTIVE_WILD(ni, id); @@ -1978,8 +2030,7 @@ void ci_netif_handle_actions(ci_netif* ni) /* Poll to process incoming FIN, and close endpoint from unlock hook. */ if( val & OO_ACTION_CLOSE_EP ) ef_eplock_holder_set_flags(&ni->state->lock, - CI_EPLOCK_NETIF_CLOSE_ENDPOINT | - CI_EPLOCK_NETIF_NEED_POLL); + CI_EPLOCK_NETIF_CLOSE_ENDPOINT | CI_EPLOCK_NETIF_NEED_POLL); if( val & OO_ACTION_SWF_UPDATE ) oo_ringbuffer_iterate(&ni->sw_filter_ops, sw_update_cb, ni); diff --git a/src/lib/transport/ip/netif_event.c b/src/lib/transport/ip/netif_event.c index 722d2311f..3284e146e 100644 --- a/src/lib/transport/ip/netif_event.c +++ b/src/lib/transport/ip/netif_event.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr ** \brief Event handling @@ -41,13 +41,13 @@ #define SAMPLE(n) (n) -#define LPF "netif: " +#define LPF "netif: " #ifndef __KERNEL__ enum { FUTURE_DROP = 0x01, - FUTURE_IP4 = 0x02, - FUTURE_TCP = 0x04, /* else UDP */ + FUTURE_IP4 = 0x02, + FUTURE_TCP = 0x04, /* else UDP */ FUTURE_NONE = 0, FUTURE_UDP4 = FUTURE_IP4, @@ -73,7 +73,7 @@ struct oo_rx_state { /* Without RX Merge: A running total of bytes received for this packet * With RX Merge: The full length of this packet */ - int frag_bytes; + int frag_bytes; }; @@ -122,8 +122,7 @@ static void ci_parse_rx_vlan(ci_ip_pkt_fmt* pkt) if( *p_ether_type != CI_ETHERTYPE_8021Q ) { pkt->pkt_eth_payload_off = pkt->pkt_start_off + ETH_HLEN; pkt->vlan = 0; - } - else { + } else { pkt->pkt_eth_payload_off = pkt->pkt_start_off + ETH_HLEN + ETH_VLAN_HLEN; pkt->vlan = CI_BSWAP_BE16(p_ether_type[1]) & 0xfff; } @@ -138,68 +137,71 @@ int ci_ip_options_parse(ci_netif* netif, ci_ip4_hdr* ip, const int hdr_size) char* opt_end = (char*) ip + hdr_size; while( *options != IPOPT_EOL && options < opt_end && ! error ) { switch( (ci_uint8) *options ) { - case IPOPT_NOP: - ++options; - break; - case IPOPT_RR: /* Record Packet Route */ - case IPOPT_TS: /* Time-stamp */ - case IPOPT_SEC: /* Security */ - case IPOPT_SID: /* Stream ID */ - if( options[1] < IPOPT_MINOFF || options[1] > opt_end - options ) { - LOG_U( log(LPF "[%d] IP Option invalid offset; type=%u(op:%u), " - "offset=%u", netif->state->stack_id, (ci_uint8) *options, - (ci_uint8) (0x1f & *options), (ci_uint8) options[1]) ); + case IPOPT_NOP: + ++options; + break; + case IPOPT_RR: /* Record Packet Route */ + case IPOPT_TS: /* Time-stamp */ + case IPOPT_SEC: /* Security */ + case IPOPT_SID: /* Stream ID */ + if( options[1] < IPOPT_MINOFF || options[1] > opt_end - options ) { + LOG_U( + log(LPF "[%d] IP Option invalid offset; type=%u(op:%u), " + "offset=%u", + netif->state->stack_id, (ci_uint8) *options, + (ci_uint8) (0x1f & *options), (ci_uint8) options[1])); + error = 1; + } else { + options += options[1]; + } + break; + case IPOPT_SSRR: /* Strict Source Routing */ + case IPOPT_LSRR: /* Loose Source Routing */ + LOG_U( + log(LPF "[%d] IP Options: Source Routing unsupported; " + "type=%u(op:%u)", + netif->state->stack_id, (ci_uint8) *options, + (ci_uint8) (0x1f & *options))); error = 1; - } - else { - options += options[1]; - } - break; - case IPOPT_SSRR: /* Strict Source Routing */ - case IPOPT_LSRR: /* Loose Source Routing */ - LOG_U( log(LPF "[%d] IP Options: Source Routing unsupported; " - "type=%u(op:%u)", netif->state->stack_id, (ci_uint8) *options, - (ci_uint8) (0x1f & *options)) ); - error = 1; - break; - default: - LOG_U( log(LPF "[%d] IP Option unsupported; type=%u(op:%u)", - netif->state->stack_id, (ci_uint8) *options, - (ci_uint8) (0x1f & *options)) ); - error = 1; - break; + break; + default: + LOG_U(log(LPF "[%d] IP Option unsupported; type=%u(op:%u)", + netif->state->stack_id, (ci_uint8) *options, + (ci_uint8) (0x1f & *options))); + error = 1; + break; } } if( error ) { CITP_STATS_NETIF_INC(netif, rx_discard_ip_options_bad); CI_IPV4_STATS_INC_IN_HDR_ERRS(netif); - } - else { + } else { CITP_STATS_NETIF_INC(netif, ip_options); } return error; } +#if CI_CFG_TIMESTAMPING static void record_rx_timestamp(ci_netif* netif, ci_netif_state_nic_t* nsn, - ci_ip_pkt_fmt* pkt, - ef_timespec stamp, unsigned sync_flags) + ci_ip_pkt_fmt* pkt, ef_timespec stamp, unsigned sync_flags) { int tsf = (NI_OPTS(netif).timestamping_reporting & - CITP_TIMESTAMPING_RECORDING_FLAG_CHECK_SYNC) ? - EF_VI_SYNC_FLAG_CLOCK_IN_SYNC : - EF_VI_SYNC_FLAG_CLOCK_SET; + CITP_TIMESTAMPING_RECORDING_FLAG_CHECK_SYNC) + ? EF_VI_SYNC_FLAG_CLOCK_IN_SYNC + : EF_VI_SYNC_FLAG_CLOCK_SET; pkt->hw_stamp.tv_sec = stamp.tv_sec; pkt->hw_stamp.tv_nsec = stamp.tv_nsec = - (stamp.tv_nsec & ~CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC) | - ((sync_flags & tsf) ? CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC : 0); + (stamp.tv_nsec & ~CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC) | + ((sync_flags & tsf) ? CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC : 0); nsn->last_rx_timestamp = pkt->hw_stamp; nsn->last_sync_flags = sync_flags; - LOG_NR(log(LPF "RX id=%d timestamp: %lu.%09lu sync %d", - OO_PKT_FMT(pkt), (long)stamp.tv_sec, stamp.tv_nsec, sync_flags)); + LOG_NR(log(LPF "RX id=%d timestamp: %lu.%09lu sync %d", OO_PKT_FMT(pkt), + (long) stamp.tv_sec, stamp.tv_nsec, sync_flags)); } +#endif static void get_rx_timestamp(ci_netif* netif, ci_ip_pkt_fmt* pkt) { @@ -212,7 +214,7 @@ static void get_rx_timestamp(ci_netif* netif, ci_ip_pkt_fmt* pkt) unsigned sync_flags; ef_timespec stamp; int rc = ef_vi_receive_get_timestamp_with_sync_flags( - vi, PKT_START(pkt) - nsn->rx_prefix_len, &stamp, &sync_flags); + vi, PKT_START(pkt) - nsn->rx_prefix_len, &stamp, &sync_flags); if( rc == 0 ) record_rx_timestamp(netif, nsn, pkt, stamp, sync_flags); @@ -220,8 +222,8 @@ static void get_rx_timestamp(ci_netif* netif, ci_ip_pkt_fmt* pkt) LOG_NR(log(LPF "RX id=%d missing timestamp", OO_PKT_FMT(pkt))); } #else - (void)netif; - (void)pkt; + (void) netif; + (void) pkt; #endif } @@ -229,15 +231,14 @@ static void get_rx_timestamp(ci_netif* netif, ci_ip_pkt_fmt* pkt) #if CI_CFG_TCP_OFFLOAD_RECYCLER /* Process a packet which has been received on a 'secondary' VI about which * we know nothing - they may not even have Ethernet headers */ -static void handle_rx_plugin_data(ci_netif* netif, - struct ci_netif_poll_state* ps, - ci_ip_pkt_fmt* pkt) +static void handle_rx_plugin_data( + ci_netif* netif, struct ci_netif_poll_state* ps, ci_ip_pkt_fmt* pkt) { /* Since we don't know anything about this packet, we can't use the * ip_pkt_dump_len function (which adds ETH_HLEN) to calculate length to * dump.*/ LOG_DR(ci_hex_dump(ci_log_fn, PKT_START(pkt), - raw_pkt_dump_len(oo_offbuf_left(&pkt->buf)), 0)); + raw_pkt_dump_len(oo_offbuf_left(&pkt->buf)), 0)); /* Writing these things to the pcap will confuse Wireshark, but it's an * important debugging feature so let's do it anyway */ @@ -250,12 +251,12 @@ static void handle_rx_plugin_data(ci_netif* netif, static inline unsigned unexpected_rx_log_flag(ci_ip_pkt_fmt* pkt) { - return (pkt->rx_flags & CI_PKT_RX_FLAG_RX_SHARED) ? - CI_TP_LOG_NR : CI_TP_LOG_U; + return (pkt->rx_flags & CI_PKT_RX_FLAG_RX_SHARED) ? CI_TP_LOG_NR + : CI_TP_LOG_U; } -static void handle_rx_pkt(ci_netif* netif, struct ci_netif_poll_state* ps, - ci_ip_pkt_fmt* pkt) +static void handle_rx_pkt( + ci_netif* netif, struct ci_netif_poll_state* ps, ci_ip_pkt_fmt* pkt) { /* On entry: [pkt] may be a whole packet, or a linked list of scatter * fragments linked by [pkt->frag_next]. [pkt->pay_len] contains the @@ -264,55 +265,56 @@ static void handle_rx_pkt(ci_netif* netif, struct ci_netif_poll_state* ps, */ int not_fast, ip_paylen, hdr_size; - ci_uint16 ether_type = *((ci_uint16*)oo_l3_hdr(pkt) - 1); + ci_uint16 ether_type = *((ci_uint16*) oo_l3_hdr(pkt) - 1); ci_assert_nequal(pkt->pkt_eth_payload_off, PKT_START_OFF_BAD); -#if CI_CFG_RANDOM_DROP && !defined(__KERNEL__) - if( CI_UNLIKELY(rand() < NI_OPTS(netif).rx_drop_rate) ) goto drop; +#if CI_CFG_RANDOM_DROP && ! defined(__KERNEL__) + if( CI_UNLIKELY(rand() < NI_OPTS(netif).rx_drop_rate) ) + goto drop; #endif pkt->tstamp_frc = IPTIMER_STATE(netif)->frc; /* Is this an IP packet? */ - if(CI_LIKELY( ether_type == CI_ETHERTYPE_IP )) { + if( CI_LIKELY(ether_type == CI_ETHERTYPE_IP) ) { int ip_tot_len; - ci_ip4_hdr *ip = oo_ip_hdr(pkt); + ci_ip4_hdr* ip = oo_ip_hdr(pkt); #if CI_CFG_IPV6 - pkt->flags &=~ CI_PKT_FLAG_IS_IP6; + pkt->flags &= ~CI_PKT_FLAG_IS_IP6; #endif LOG_NR(log(LPF "RX id=%d ip_proto=0x%x", OO_PKT_FMT(pkt), - (unsigned) ip->ip_protocol)); + (unsigned) ip->ip_protocol)); LOG_AR(ci_analyse_pkt(PKT_START(pkt), pkt->pay_len)); - CI_IPV4_STATS_INC_IN_RECVS( netif ); + CI_IPV4_STATS_INC_IN_RECVS(netif); /* Do the byte-swap just once! */ ip_tot_len = CI_BSWAP_BE16(ip->ip_tot_len_be16); - LOG_DR(ci_hex_dump(ci_log_fn, PKT_START(pkt), - ip_pkt_dump_len(ip_tot_len), 0)); + LOG_DR(ci_hex_dump( + ci_log_fn, PKT_START(pkt), ip_pkt_dump_len(ip_tot_len), 0)); /* Hardware should not deliver us fragments when using scalable * filters, but it happens in some corner cases. We can't handle them. * Also check for valid IP length for non-fragmented packets.*/ - not_fast = (ip->ip_frag_off_be16 & - (CI_IP4_OFFSET_MASK | CI_IP4_FRAG_MORE)) | - (ip_tot_len > pkt->pay_len - oo_pre_l3_len(pkt)) + not_fast = + (ip->ip_frag_off_be16 & (CI_IP4_OFFSET_MASK | CI_IP4_FRAG_MORE)) | + (ip_tot_len > pkt->pay_len - oo_pre_l3_len(pkt)) #if CI_CFG_TCP_OFFLOAD_RECYCLER - /* All plugins are assumed to have done sufficient checks that - * their mangled packets are fast-path */ - && pkt->q_id == CI_Q_ID_NORMAL; + /* All plugins are assumed to have done sufficient checks that + * their mangled packets are fast-path */ + && pkt->q_id == CI_Q_ID_NORMAL; #endif - ; + ; hdr_size = CI_IP4_IHL(ip); /* Accepting but ignoring IP options. ** Quick parse to check there is no badness - */ - if(CI_UNLIKELY( hdr_size > sizeof(ci_ip4_hdr) && ! not_fast )) + */ + if( CI_UNLIKELY(hdr_size > sizeof(ci_ip4_hdr) && ! not_fast) ) not_fast = ci_ip_options_parse(netif, ip, hdr_size); /* We are not checking for certain other illegalities here (invalid @@ -337,39 +339,37 @@ static void handle_rx_pkt(ci_netif* netif, struct ci_netif_poll_state* ps, /* Demux to appropriate protocol. */ if( ip->ip_protocol == IPPROTO_TCP ) { ci_tcp_handle_rx(netif, ps, pkt, (ci_tcp_hdr*) payload, ip_paylen); - CI_IPV4_STATS_INC_IN_DELIVERS( netif ); + CI_IPV4_STATS_INC_IN_DELIVERS(netif); return; - } - else if(CI_LIKELY( ip->ip_protocol == IPPROTO_UDP )) { + } else if( CI_LIKELY(ip->ip_protocol == IPPROTO_UDP) ) { ci_udp_handle_rx(netif, pkt, (ci_udp_hdr*) payload, ip_paylen); - CI_IPV4_STATS_INC_IN_DELIVERS( netif ); + CI_IPV4_STATS_INC_IN_DELIVERS(netif); return; } LOG_FL(unexpected_rx_log_flag(pkt), - CI_RLLOG(10, LPF "IGNORE IP protocol=%d", (int) ip->ip_protocol)); - } - else if( ~pkt->rx_flags & CI_PKT_RX_FLAG_RX_SHARED ) { + CI_RLLOG(10, LPF "IGNORE IP protocol=%d", (int) ip->ip_protocol)); + } else if( ~pkt->rx_flags & CI_PKT_RX_FLAG_RX_SHARED ) { /*! \todo IP slow path. Don't want to deal with this yet. - * + * * It is probably bad idea to print all IP fragments, but we should * not receive them in the first place. */ - LOG_U(CI_RLLOG(10, LPF "[%d] IP HARD " - "(ihl_ver=%x ihl=%d frag=%x ip_len=%d frame_len=%d)" - PKT_DBG_FMT, - netif->state->stack_id, - (int) ip->ip_ihl_version, (int) CI_IP4_IHL(ip), - (unsigned) ip->ip_frag_off_be16, - ip_tot_len, pkt->pay_len, PKT_DBG_ARGS(pkt))); + LOG_U(CI_RLLOG(10, + LPF + "[%d] IP HARD " + "(ihl_ver=%x ihl=%d frag=%x ip_len=%d frame_len=%d)" PKT_DBG_FMT, + netif->state->stack_id, (int) ip->ip_ihl_version, + (int) CI_IP4_IHL(ip), (unsigned) ip->ip_frag_off_be16, ip_tot_len, + pkt->pay_len, PKT_DBG_ARGS(pkt))); LOG_DU(ci_hex_dump(ci_log_fn, PKT_START(pkt), 64, 0)); } - CI_IPV4_STATS_INC_IN_DISCARDS( netif ); + CI_IPV4_STATS_INC_IN_DISCARDS(netif); - /* On architectures with RX_SHARED (EFCT), we expect unexpected packets to show up - * as the queue is shared with kernel stack and potentially other onload/ef_vi stacks, - * we need to ignore those packets. */ + /* On architectures with RX_SHARED (EFCT), we expect unexpected packets to + * show up as the queue is shared with kernel stack and potentially other + * onload/ef_vi stacks, we need to ignore those packets. */ if( pkt->rx_flags & CI_PKT_RX_FLAG_RX_SHARED ) { CITP_STATS_NETIF_INC(netif, no_match_pass_to_kernel_ip_other); ci_netif_pkt_release_rx_1ref(netif, pkt); @@ -386,15 +386,15 @@ static void handle_rx_pkt(ci_netif* netif, struct ci_netif_poll_state* ps, return; } #if CI_CFG_IPV6 - else if(CI_LIKELY( ether_type == CI_ETHERTYPE_IP6 )) { - ci_ip6_hdr *ip6_hdr = oo_ip6_hdr(pkt); - void *payload = ip6_hdr + 1; + else if( CI_LIKELY(ether_type == CI_ETHERTYPE_IP6) ) { + ci_ip6_hdr* ip6_hdr = oo_ip6_hdr(pkt); + void* payload = ip6_hdr + 1; - LOG_NR(log(LPF "RX id=%d ip6_proto=0x%x", OO_PKT_FMT(pkt), - ip6_hdr->next_hdr)); + LOG_NR(log( + LPF "RX id=%d ip6_proto=0x%x", OO_PKT_FMT(pkt), ip6_hdr->next_hdr)); pkt->flags |= CI_PKT_FLAG_IS_IP6; - CI_IP_STATS_INC_IN6_RECVS( netif ); + CI_IP_STATS_INC_IN6_RECVS(netif); get_rx_timestamp(netif, pkt); @@ -403,22 +403,21 @@ static void handle_rx_pkt(ci_netif* netif, struct ci_netif_poll_state* ps, if( ip6_hdr->next_hdr == IPPROTO_TCP ) { ci_tcp_handle_rx(netif, ps, pkt, (ci_tcp_hdr*) payload, - CI_BSWAP_BE16(ip6_hdr->payload_len)); - CI_IP_STATS_INC_IN6_DELIVERS( netif ); + CI_BSWAP_BE16(ip6_hdr->payload_len)); + CI_IP_STATS_INC_IN6_DELIVERS(netif); return; - } - else if( ip6_hdr->next_hdr == IPPROTO_UDP ) { + } else if( ip6_hdr->next_hdr == IPPROTO_UDP ) { ci_udp_handle_rx(netif, pkt, (ci_udp_hdr*) payload, - CI_BSWAP_BE16(ip6_hdr->payload_len)); - CI_IP_STATS_INC_IN6_DELIVERS( netif ); + CI_BSWAP_BE16(ip6_hdr->payload_len)); + CI_IP_STATS_INC_IN6_DELIVERS(netif); return; } - CI_IP_STATS_INC_IN6_DISCARDS( netif ); + CI_IP_STATS_INC_IN6_DISCARDS(netif); - /* On architectures with RX_SHARED (EFCT), we expect unexpected packets to show up - * as the queue is shared with kernel stack and potentially other onload/ef_vi stacks, - * we need to ignore those packets. */ + /* On architectures with RX_SHARED (EFCT), we expect unexpected packets to + * show up as the queue is shared with kernel stack and potentially other + * onload/ef_vi stacks, we need to ignore those packets. */ if( pkt->rx_flags & CI_PKT_RX_FLAG_RX_SHARED ) { CITP_STATS_NETIF_INC(netif, no_match_pass_to_kernel_ip6_other); ci_netif_pkt_release_rx_1ref(netif, pkt); @@ -433,9 +432,9 @@ static void handle_rx_pkt(ci_netif* netif, struct ci_netif_poll_state* ps, } #endif - /* On architectures with RX_SHARED (EFCT), we expect unexpected packets to show up - * as the queue is shared with kernel stack and potentially other onload/ef_vi stacks, - * we need to ignore those packets. */ + /* On architectures with RX_SHARED (EFCT), we expect unexpected packets to + * show up as the queue is shared with kernel stack and potentially other + * onload/ef_vi stacks, we need to ignore those packets. */ if( pkt->rx_flags & CI_PKT_RX_FLAG_RX_SHARED ) { CITP_STATS_NETIF_INC(netif, no_match_pass_to_kernel_non_ip); ci_netif_pkt_release_rx_1ref(netif, pkt); @@ -447,18 +446,16 @@ static void handle_rx_pkt(ci_netif* netif, struct ci_netif_poll_state* ps, ci_assert_equal(pkt->q_id, CI_Q_ID_NORMAL); if( ci_netif_pkt_pass_to_kernel(netif, pkt) ) { CITP_STATS_NETIF_INC(netif, no_match_pass_to_kernel_non_ip); - } - else - { - LOG_U(CI_RLLOG(10, LPF "UNEXPECTED ether_type "PKT_DBG_FMT, - PKT_DBG_ARGS(pkt))); + } else { + LOG_U(CI_RLLOG( + 10, LPF "UNEXPECTED ether_type " PKT_DBG_FMT, PKT_DBG_ARGS(pkt))); LOG_DU(ci_hex_dump(ci_log_fn, PKT_START(pkt), 64, 0)); ci_netif_pkt_release_rx_1ref(netif, pkt); } return; -#if CI_CFG_RANDOM_DROP && !defined(__ci_driver__) - drop: +#if CI_CFG_RANDOM_DROP && ! defined(__ci_driver__) +drop: LOG_NR(log(LPF "DROP")); LOG_DR(ci_hex_dump(ci_log_fn, pkt, 40, 0)); ci_netif_pkt_release_rx_1ref(netif, pkt); @@ -470,7 +467,7 @@ static void handle_rx_pkt(ci_netif* netif, struct ci_netif_poll_state* ps, static ci_ip_pkt_fmt* alloc_rx_efct_pkt(ci_netif* ni, int intf_i, int pay_len) { ci_ip_pkt_fmt* pkt = ci_netif_pkt_alloc(ni, 0); - if(CI_UNLIKELY( ! pkt )) + if( CI_UNLIKELY(! pkt) ) return NULL; pkt->pkt_start_off = 0; pkt->intf_i = intf_i; @@ -484,8 +481,8 @@ static ci_ip_pkt_fmt* alloc_rx_efct_pkt(ci_netif* ni, int intf_i, int pay_len) return pkt; } -static void get_efct_timestamp(ci_netif* netif, ef_vi* vi, - uint32_t pkt_id, ci_ip_pkt_fmt* pkt) +static void get_efct_timestamp( + ci_netif* netif, ef_vi* vi, uint32_t pkt_id, ci_ip_pkt_fmt* pkt) { #if CI_CFG_TIMESTAMPING ci_netif_state_nic_t* nsn = &netif->state->nic[pkt->intf_i]; @@ -499,18 +496,18 @@ static void get_efct_timestamp(ci_netif* netif, ef_vi* vi, record_rx_timestamp(netif, nsn, pkt, stamp, sync_flags); else LOG_NR(log(LPF "RX pkt=%d efct_id=%08x missing timestamp", - OO_PKT_FMT(pkt), pkt_id)); + OO_PKT_FMT(pkt), pkt_id)); } #else - (void)netif; - (void)vi; - (void)pkt_id; - (void)pkt; + (void) netif; + (void) vi; + (void) pkt_id; + (void) pkt; #endif } -static void copy_efct_to_pkt(ci_netif* netif, ef_vi* vi, - uint32_t pkt_id, ci_ip_pkt_fmt* pkt) +static void copy_efct_to_pkt( + ci_netif* netif, ef_vi* vi, uint32_t pkt_id, ci_ip_pkt_fmt* pkt) { const void* payload = efct_vi_rxpkt_get(vi, pkt_id); memcpy(pkt->dma_start, payload, pkt->pay_len); @@ -538,8 +535,8 @@ static unsigned convert_discard_flags_efct_ef10(unsigned flags) return 0; } -static int convert_efct_to_pkts(ci_netif* ni, int intf_i, ef_event* evs, - int n_evs) +static int convert_efct_to_pkts( + ci_netif* ni, int intf_i, ef_event* evs, int n_evs) { int i; ef_vi* evq = ci_netif_vi(ni, intf_i); @@ -550,13 +547,11 @@ static int convert_efct_to_pkts(ci_netif* ni, int intf_i, ef_event* evs, if( EF_EVENT_TYPE(evs[i]) == EF_EVENT_TYPE_RX_REF ) { new_ev.rx.type = EF_EVENT_TYPE_RX; - } - else if( EF_EVENT_TYPE(evs[i]) == EF_EVENT_TYPE_RX_REF_DISCARD ) { + } else if( EF_EVENT_TYPE(evs[i]) == EF_EVENT_TYPE_RX_REF_DISCARD ) { new_ev.rx_discard.type = EF_EVENT_TYPE_RX_DISCARD; new_ev.rx_discard.subtype = - convert_discard_flags_efct_ef10(evs[i].rx_ref_discard.flags); - } - else { + convert_discard_flags_efct_ef10(evs[i].rx_ref_discard.flags); + } else { continue; } @@ -588,15 +583,16 @@ int ci_netif_evq_poll(ci_netif* ni, int intf_i) { ef_vi* evq = ci_netif_vi(ni, intf_i); int n_evs; - size_t evs_per_poll = ef_vi_flags(evq) & EF_VI_RX_EVENT_MERGE ? - NI_OPTS(ni).evs_per_poll / 4 : NI_OPTS(ni).evs_per_poll; + size_t evs_per_poll = ef_vi_flags(evq) & EF_VI_RX_EVENT_MERGE + ? NI_OPTS(ni).evs_per_poll / 4 + : NI_OPTS(ni).evs_per_poll; #if CI_CFG_WANT_BPF_NATIVE && CI_HAVE_BPF_NATIVE - ef_event *ev = ni->state->events; + ef_event* ev = ni->state->events; #endif ci_assert_lt(intf_i, CI_CFG_MAX_INTERFACES); if( intf_i >= oo_stack_intf_max(ni) ) - return 0; /* for simplicity no error reported */ + return 0; /* for simplicity no error reported */ /* The 4 below is empirical: with rx merging we generally see 8ish packets * per rx_multi; we assume that another half are tx events, hence on average * a VI with merging is 4 times more efficient than one without. We don't @@ -606,8 +602,8 @@ int ci_netif_evq_poll(ci_netif* ni, int intf_i) * mode, so by default we tune evs_per_poll to be notably larger than the * normal default. */ n_evs = ef_eventq_poll(evq, ni->state->events, - CI_MIN(sizeof(ni->state->events) / sizeof(ni->state->events[0]), - evs_per_poll)); + CI_MIN(sizeof(ni->state->events) / sizeof(ni->state->events[0]), + evs_per_poll)); /* Converting EVENT_TYPE_RX_REF to EVENT_TYPE_RX is a dirty trick, but we're * faced with two problems with X3: @@ -651,12 +647,11 @@ int ci_netif_evq_poll(ci_netif* ni, int intf_i) uint16_t pay_len; ef_vi_receive_get_bytes(evq, pkt->dma_start, &pay_len); pkt->pay_len = pay_len; - } - else + } else pkt->pay_len = len - evq->rx_prefix_len; oo_offbuf_init(&pkt->buf, PKT_START(pkt), pkt->pay_len); ci_parse_rx_vlan(pkt); - if( !efab_tcp_helper_xdp_rx_pkt(netif2tcp_helper_resource(ni), pkt) ) + if( ! efab_tcp_helper_xdp_rx_pkt(netif2tcp_helper_resource(ni), pkt) ) pkt->flags |= CI_PKT_FLAG_XDP_DROP; /* schedule drop */ /* We called ci_parse_rx_vlan() above, which initialised * pkt_eth_payload_off. However, the main RX loop will call that @@ -667,7 +662,7 @@ int ci_netif_evq_poll(ci_netif* ni, int intf_i) } #endif - return n_evs; + return n_evs; } #endif @@ -693,8 +688,7 @@ ci_inline int oo_xdp_check_pkt(ci_netif* ni, ci_ip_pkt_fmt** pkt) #if ! defined(__KERNEL__) && CI_CFG_WANT_BPF_NATIVE ci_inline int oo_xdp_check_pkt(ci_netif* ni, ci_ip_pkt_fmt** pkt) { - if( NI_OPTS(ni).xdp_mode != 0 && - ((*pkt)->flags & CI_PKT_FLAG_XDP_DROP) ) { + if( NI_OPTS(ni).xdp_mode != 0 && ((*pkt)->flags & CI_PKT_FLAG_XDP_DROP) ) { /* just drop */ (*pkt)->flags &= ~CI_PKT_FLAG_XDP_DROP; ci_netif_pkt_release_rx_1ref(ni, *pkt); @@ -712,8 +706,8 @@ ci_inline int oo_xdp_check_pkt(ci_netif* ni, ci_ip_pkt_fmt** pkt) #endif -ci_inline void __handle_rx_pkt(ci_netif* ni, struct ci_netif_poll_state* ps, - ci_ip_pkt_fmt** pkt) +ci_inline void __handle_rx_pkt( + ci_netif* ni, struct ci_netif_poll_state* ps, ci_ip_pkt_fmt** pkt) { if( *pkt ) { #if CI_CFG_TCP_OFFLOAD_RECYCLER @@ -734,8 +728,8 @@ ci_inline void __handle_rx_pkt(ci_netif* ni, struct ci_netif_poll_state* ps, /* Partially handle an incoming packet before its completion event. * As much work as possible should be done here, before waiting for the packet * to arrive, to minimise work done on the critical path after arrival. */ -ci_inline int handle_rx_pre_future(ci_netif* ni, ci_ip_pkt_fmt* pkt, - struct oo_rx_future* future) +ci_inline int handle_rx_pre_future( + ci_netif* ni, ci_ip_pkt_fmt* pkt, struct oo_rx_future* future) { /* On entry: [pkt] contains the first cache line of an incoming packet. * [pkt->frag_next] and [pkt->pay_len] may be invalid. @@ -743,8 +737,8 @@ ci_inline int handle_rx_pre_future(ci_netif* ni, ci_ip_pkt_fmt* pkt, ci_uint16 ether_type; int valid_bytes = CI_CACHE_LINE_SIZE - pkt->pkt_start_off; -#if CI_CFG_RANDOM_DROP && !defined(__KERNEL__) - if(CI_UNLIKELY( rand() < NI_OPTS(ni).rx_drop_rate )) { +#if CI_CFG_RANDOM_DROP && ! defined(__KERNEL__) + if( CI_UNLIKELY(rand() < NI_OPTS(ni).rx_drop_rate) ) { LOG_NR(log(LPF "DROP")); LOG_DR(ci_hex_dump(ci_log_fn, pkt, 40, 0)); return FUTURE_DROP; @@ -755,42 +749,42 @@ ci_inline int handle_rx_pre_future(ci_netif* ni, ci_ip_pkt_fmt* pkt, ci_parse_rx_vlan(pkt); ci_assert_le(pkt->pkt_eth_payload_off, valid_bytes); - ether_type = *((ci_uint16*)oo_l3_hdr(pkt) - 1); + ether_type = *((ci_uint16*) oo_l3_hdr(pkt) - 1); pkt->tstamp_frc = IPTIMER_STATE(ni)->frc; if( ether_type == CI_ETHERTYPE_IP ) { - ci_ip4_hdr *ip = oo_ip_hdr(pkt); + ci_ip4_hdr* ip = oo_ip_hdr(pkt); int hdr_size = CI_IP4_IHL(ip); int ip_tot_len = CI_BSWAP_BE16(ip->ip_tot_len_be16); int ip_paylen = ip_tot_len - hdr_size; int ip_payload_offset = pkt->pkt_eth_payload_off + hdr_size; - void* payload = (char*)ip + hdr_size; + void* payload = (char*) ip + hdr_size; if( ip_payload_offset > valid_bytes || (hdr_size > sizeof(ci_ip4_hdr) && - ci_ip_options_parse(ni, ip, hdr_size)) ) + ci_ip_options_parse(ni, ip, hdr_size)) ) goto no_future; - CI_IPV4_STATS_INC_IN_RECVS( ni ); + CI_IPV4_STATS_INC_IN_RECVS(ni); #if CI_CFG_IPV6 - pkt->flags &=~ CI_PKT_FLAG_IS_IP6; + pkt->flags &= ~CI_PKT_FLAG_IS_IP6; #endif get_rx_timestamp(ni, pkt); if( ip->ip_protocol == IPPROTO_TCP ) { - CI_IPV4_STATS_INC_IN_DELIVERS( ni ); + CI_IPV4_STATS_INC_IN_DELIVERS(ni); if( ip_payload_offset + sizeof(ci_tcp_hdr) <= valid_bytes ) ci_tcp_handle_rx_pre_future(ni, pkt, payload, ip_paylen, &future->tcp); else future->tcp.socket = NULL; return FUTURE_TCP4; } - if(CI_LIKELY( ip->ip_protocol == IPPROTO_UDP )) { - CI_IPV4_STATS_INC_IN_DELIVERS( ni ); + if( CI_LIKELY(ip->ip_protocol == IPPROTO_UDP) ) { + CI_IPV4_STATS_INC_IN_DELIVERS(ni); if( ip_payload_offset + sizeof(ci_udp_hdr) <= valid_bytes ) - ci_udp_handle_rx_pre_future(ni, pkt, payload, ip_paylen, - CI_ETHERTYPE_IP, &future->udp); + ci_udp_handle_rx_pre_future( + ni, pkt, payload, ip_paylen, CI_ETHERTYPE_IP, &future->udp); else future->udp.socket = NULL; return FUTURE_UDP4; @@ -805,8 +799,8 @@ ci_inline int handle_rx_pre_future(ci_netif* ni, ci_ip_pkt_fmt* pkt, /* Undo partial handling of a packet which did not complete successfully. */ -ci_inline void rollback_rx_future(ci_netif* ni, ci_ip_pkt_fmt* pkt, int status, - struct oo_rx_future* future) +ci_inline void rollback_rx_future( + ci_netif* ni, ci_ip_pkt_fmt* pkt, int status, struct oo_rx_future* future) { CITP_STATS_NETIF_INC(ni, rx_future_rollback); @@ -831,26 +825,25 @@ ci_inline void rollback_rx_future(ci_netif* ni, ci_ip_pkt_fmt* pkt, int status, * here. Any work which doesn't require the complete packet should be done * in handle_rx_pre_future if possible. */ ci_inline void handle_rx_post_future(ci_netif* ni, - struct ci_netif_poll_state* ps, - ci_ip_pkt_fmt* pkt, int status, - struct oo_rx_future* future) + struct ci_netif_poll_state* ps, ci_ip_pkt_fmt* pkt, int status, + struct oo_rx_future* future) { /* On entry: see handle_rx_pkt */ ci_assert_nequal(status, FUTURE_NONE); - if(CI_LIKELY( status & FUTURE_IP4 )) { + if( CI_LIKELY(status & FUTURE_IP4) ) { int ip_tot_len; - ci_ip4_hdr *ip = oo_ip_hdr(pkt); + ci_ip4_hdr* ip = oo_ip_hdr(pkt); LOG_NR(log(LPF "RX id=%d ip_proto=0x%x", OO_PKT_FMT(pkt), - (unsigned) ip->ip_protocol)); + (unsigned) ip->ip_protocol)); LOG_AR(ci_analyse_pkt(PKT_START(pkt), pkt->pay_len)); /* Do the byte-swap just once! */ ip_tot_len = CI_BSWAP_BE16(ip->ip_tot_len_be16); - LOG_DR(ci_hex_dump(ci_log_fn, PKT_START(pkt), - ip_pkt_dump_len(ip_tot_len), 0)); + LOG_DR(ci_hex_dump( + ci_log_fn, PKT_START(pkt), ip_pkt_dump_len(ip_tot_len), 0)); if( oo_tcpdump_check(ni, pkt, pkt->intf_i) ) oo_tcpdump_dump_pkt(ni, pkt); @@ -865,7 +858,7 @@ ci_inline void handle_rx_post_future(ci_netif* ni, ** they can be checked for free in the transport. It is the ** transport's responsibility to check these as necessary. */ - if(CI_LIKELY( ip_tot_len <= pkt->pay_len - oo_pre_l3_len(pkt) )) { + if( CI_LIKELY(ip_tot_len <= pkt->pay_len - oo_pre_l3_len(pkt)) ) { int hdr_size = CI_IP4_IHL(ip); void* payload = (char*) ip + hdr_size; int len = ip_tot_len - hdr_size; @@ -874,23 +867,21 @@ ci_inline void handle_rx_post_future(ci_netif* ni, */ /* Demux to appropriate protocol. */ - if(CI_LIKELY( status & FUTURE_TCP )) + if( CI_LIKELY(status & FUTURE_TCP) ) ci_tcp_handle_rx_post_future(ni, ps, pkt, payload, len, &future->tcp); else ci_udp_handle_rx_post_future(ni, pkt, payload, len, &future->udp); - } - else { + } else { CITP_STATS_NETIF_INC(ni, rx_future_rollback_pkt); rollback_rx_future(ni, pkt, status, future); - LOG_U(log(LPF "[%d] IP HARD " - "(ihl_ver=%x ihl=%d frag=%x ip_len=%d frame_len=%d)" - PKT_DBG_FMT, - ni->state->stack_id, - (int) ip->ip_ihl_version, (int) CI_IP4_IHL(ip), - (unsigned) ip->ip_frag_off_be16, - ip_tot_len, pkt->pay_len, PKT_DBG_ARGS(pkt))); + LOG_U(log(LPF + "[%d] IP HARD " + "(ihl_ver=%x ihl=%d frag=%x ip_len=%d frame_len=%d)" PKT_DBG_FMT, + ni->state->stack_id, (int) ip->ip_ihl_version, (int) CI_IP4_IHL(ip), + (unsigned) ip->ip_frag_off_be16, ip_tot_len, pkt->pay_len, + PKT_DBG_ARGS(pkt))); LOG_DU(ci_hex_dump(ci_log_fn, PKT_START(pkt), 64, 0)); - CI_IPV4_STATS_INC_IN_DISCARDS( ni ); + CI_IPV4_STATS_INC_IN_DISCARDS(ni); if( pkt->rx_flags & CI_PKT_RX_FLAG_RX_SHARED ) { CITP_STATS_NETIF_INC(ni, no_match_pass_to_kernel_ip_other); @@ -902,8 +893,7 @@ ci_inline void handle_rx_post_future(ci_netif* ni, else ci_netif_pkt_release_rx_1ref(ni, pkt); } - } - else { + } else { ci_assert_equal(status, FUTURE_DROP); ci_netif_pkt_release_rx_1ref(ni, pkt); } @@ -918,8 +908,8 @@ ci_inline void handle_rx_post_future(ci_netif* ni, * This function takes the accumulated state, together with the final fragment, * and sorts that out. */ -static void handle_rx_scatter_last_frag(ci_netif* ni, struct oo_rx_state* s, - ci_ip_pkt_fmt* pkt) +static void handle_rx_scatter_last_frag( + ci_netif* ni, struct oo_rx_state* s, ci_ip_pkt_fmt* pkt) { oo_pkt_p next_p; @@ -928,7 +918,7 @@ static void handle_rx_scatter_last_frag(ci_netif* ni, struct oo_rx_state* s, ci_assert(OO_PP_IS_NULL(pkt->frag_next)); pkt->n_buffers = 1; - while( 1 ) { /* reverse the chain of fragments */ + while( 1 ) { /* reverse the chain of fragments */ next_p = s->frag_pkt->frag_next; s->frag_pkt->frag_next = OO_PKT_P(pkt); s->frag_pkt->n_buffers = pkt->n_buffers + 1; @@ -947,25 +937,24 @@ static void handle_rx_scatter_last_frag(ci_netif* ni, struct oo_rx_state* s, /* When not using RX event merging we get a running total of bytes accumulated * in the jumbo. * - * In this case s->frag_bytes tracks the accumulated length from received frags. + * In this case s->frag_bytes tracks the accumulated length from received + * frags. */ static void handle_rx_scatter(ci_netif* ni, struct oo_rx_state* s, - ci_ip_pkt_fmt* pkt, int frame_bytes, - unsigned flags) + ci_ip_pkt_fmt* pkt, int frame_bytes, unsigned flags) { s->rx_pkt = NULL; if( flags & EF_EVENT_FLAG_SOP ) { /* First fragment. */ ci_assert(s->frag_pkt == NULL); - ci_assert_le(frame_bytes, - (int) (CI_CFG_PKT_BUF_SIZE - - CI_MEMBER_OFFSET(ci_ip_pkt_fmt, dma_start))); + ci_assert_le( + frame_bytes, (int) (CI_CFG_PKT_BUF_SIZE - + CI_MEMBER_OFFSET(ci_ip_pkt_fmt, dma_start))); s->frag_pkt = pkt; pkt->buf_len = s->frag_bytes = frame_bytes; oo_offbuf_init(&pkt->buf, PKT_START(pkt), s->frag_bytes); - } - else { + } else { ci_assert(s->frag_pkt != NULL); ci_assert_gt(s->frag_bytes, 0); ci_assert_gt(frame_bytes, s->frag_bytes); @@ -977,8 +966,7 @@ static void handle_rx_scatter(ci_netif* ni, struct oo_rx_state* s, /* Middle fragment. */ pkt->frag_next = OO_PKT_P(s->frag_pkt); s->frag_pkt = pkt; - } - else { + } else { /* Last fragment. */ handle_rx_scatter_last_frag(ni, s, pkt); } @@ -994,8 +982,7 @@ static void handle_rx_scatter(ci_netif* ni, struct oo_rx_state* s, * when we receive the SOP. */ static void handle_rx_scatter_merge(ci_netif* ni, struct oo_rx_state* s, - ci_ip_pkt_fmt* pkt, int prefix_bytes, - ef_vi* vi, unsigned flags) + ci_ip_pkt_fmt* pkt, int prefix_bytes, ef_vi* vi, unsigned flags) { int full_buffer = ef_vi_receive_buffer_len(vi); uint16_t pkt_bytes; @@ -1013,8 +1000,7 @@ static void handle_rx_scatter_merge(ci_netif* ni, struct oo_rx_state* s, oo_offbuf_init(&pkt->buf, PKT_START(pkt), pkt->buf_len); s->frag_pkt = pkt; s->frag_bytes = pkt_bytes; - } - else { + } else { ci_assert(s->frag_pkt != NULL); ci_assert_gt(s->frag_bytes, full_buffer - prefix_bytes); @@ -1027,8 +1013,7 @@ static void handle_rx_scatter_merge(ci_netif* ni, struct oo_rx_state* s, pkt->frag_next = OO_PKT_P(s->frag_pkt); s->frag_pkt = pkt; - } - else { + } else { /* Last fragment. */ /* The first buffer contains a prefix, but all intervening buffers are * are filled, so this contains whatever's leftover. @@ -1043,9 +1028,8 @@ static void handle_rx_scatter_merge(ci_netif* ni, struct oo_rx_state* s, } - static int handle_rx_csum_bad(ci_netif* ni, struct ci_netif_poll_state* ps, - ci_ip_pkt_fmt* pkt, int frame_len) + ci_ip_pkt_fmt* pkt, int frame_len) { int ip_paylen; int ip_proto; @@ -1062,25 +1046,24 @@ static int handle_rx_csum_bad(ci_netif* ni, struct ci_netif_poll_state* ps, * start touching it. */ if( pkt->pay_len < oo_pre_l3_len(pkt) + sizeof(ci_ip4_hdr) ) { CI_IPV4_STATS_INC_IN_HDR_ERRS(ni); - LOG_U(log(FN_FMT "BAD frame_len=%d", - FN_PRI_ARGS(ni), pkt->pay_len)); + LOG_U(log(FN_FMT "BAD frame_len=%d", FN_PRI_ARGS(ni), pkt->pay_len)); goto drop; } - ether_type = *((ci_uint16*)oo_l3_hdr(pkt) - 1); + ether_type = *((ci_uint16*) oo_l3_hdr(pkt) - 1); - if(CI_LIKELY( ether_type == CI_ETHERTYPE_IP )) { - ci_ip4_hdr *ip = oo_ip_hdr(pkt); + if( CI_LIKELY(ether_type == CI_ETHERTYPE_IP) ) { + ci_ip4_hdr* ip = oo_ip_hdr(pkt); int ip_len = CI_BSWAP_BE16(ip->ip_tot_len_be16); ip_paylen = ip_len - CI_IP4_IHL(ip); ip_proto = ip->ip_protocol; #if CI_CFG_IPV6 - pkt->flags &=~ CI_PKT_FLAG_IS_IP6; + pkt->flags &= ~CI_PKT_FLAG_IS_IP6; #endif - if( ip_paylen <= 0 || pkt->pay_len < oo_pre_l3_len(pkt) + ip_len ){ + if( ip_paylen <= 0 || pkt->pay_len < oo_pre_l3_len(pkt) + ip_len ) { CI_IPV4_STATS_INC_IN_HDR_ERRS(ni); - LOG_U(log(FN_FMT "BAD ip_len=%d frame_len=%d", - FN_PRI_ARGS(ni), ip_len, pkt->pay_len)); + LOG_U(log(FN_FMT "BAD ip_len=%d frame_len=%d", FN_PRI_ARGS(ni), ip_len, + pkt->pay_len)); goto drop; } @@ -1092,7 +1075,7 @@ static int handle_rx_csum_bad(ci_netif* ni, struct ci_netif_poll_state* ps, } #if CI_CFG_IPV6 else if( ether_type == CI_ETHERTYPE_IP6 ) { - ci_ip6_hdr *ip = oo_ip6_hdr(pkt); + ci_ip6_hdr* ip = oo_ip6_hdr(pkt); ip_paylen = CI_BSWAP_BE16(ip->payload_len); ip_proto = ip->next_hdr; pkt->flags |= CI_PKT_FLAG_IS_IP6; @@ -1100,8 +1083,8 @@ static int handle_rx_csum_bad(ci_netif* ni, struct ci_netif_poll_state* ps, if( ip_paylen <= 0 || pkt->pay_len < oo_pre_l3_len(pkt) + sizeof(ci_ip6_hdr) + ip_paylen ) { CI_IP_STATS_INC_IN6_HDR_ERRS(ni); - LOG_U(log(FN_FMT "BAD frame_len=%d or IPv6 paylen=%d", - FN_PRI_ARGS(ni), pkt->pay_len, ip_paylen)); + LOG_U(log(FN_FMT "BAD frame_len=%d or IPv6 paylen=%d", FN_PRI_ARGS(ni), + pkt->pay_len, ip_paylen)); goto drop; } @@ -1110,7 +1093,7 @@ static int handle_rx_csum_bad(ci_netif* ni, struct ci_netif_poll_state* ps, #endif else { LOG_FL(unexpected_rx_log_flag(pkt), - log(FN_FMT "BAD frame ether_type=%d", FN_PRI_ARGS(ni), ether_type)); + log(FN_FMT "BAD frame ether_type=%d", FN_PRI_ARGS(ni), ether_type)); goto drop; } @@ -1120,35 +1103,31 @@ static int handle_rx_csum_bad(ci_netif* ni, struct ci_netif_poll_state* ps, if( ip_paylen < sizeof(ci_tcp_hdr) ) { LOG_U(log(FN_FMT "BAD TCP ip_paylen=%d", FN_PRI_ARGS(ni), ip_paylen)); goto drop; - } - else if( ci_tcp_csum_correct(pkt, ip_paylen) ) { + } else if( ci_tcp_csum_correct(pkt, ip_paylen) ) { CI_DEBUG(pkt->pkt_eth_payload_off = PKT_START_OFF_BAD); __handle_rx_pkt(ni, ps, &pkt); return 1; - } - else { - LOG_U(log(FN_FMT "BAD TCP CHECKSUM %04x "PKT_DBG_FMT, FN_PRI_ARGS(ni), - (unsigned) PKT_IPX_TCP_HDR(oo_pkt_af(pkt), pkt)->tcp_check_be16, - PKT_DBG_ARGS(pkt))); + } else { + LOG_U(log(FN_FMT "BAD TCP CHECKSUM %04x " PKT_DBG_FMT, FN_PRI_ARGS(ni), + (unsigned) PKT_IPX_TCP_HDR(oo_pkt_af(pkt), pkt)->tcp_check_be16, + PKT_DBG_ARGS(pkt))); goto drop; } - } - else if( ip_proto == IPPROTO_UDP ) { + } else if( ip_proto == IPPROTO_UDP ) { ci_udp_hdr* udp = PKT_IPX_UDP_HDR(oo_pkt_af(pkt), pkt); - pkt->pf.udp.pay_len = CI_BSWAP_BE16(udp->udp_len_be16) - sizeof(ci_udp_hdr); + pkt->pf.udp.pay_len = + CI_BSWAP_BE16(udp->udp_len_be16) - sizeof(ci_udp_hdr); if( ip_paylen < sizeof(ci_udp_hdr) ) { LOG_U(log(FN_FMT "BAD UDP ip_paylen=%d", FN_PRI_ARGS(ni), ip_paylen)); goto drop; - } - else if( ci_udp_csum_correct(pkt, udp) ) { + } else if( ci_udp_csum_correct(pkt, udp) ) { CI_DEBUG(pkt->pkt_eth_payload_off = PKT_START_OFF_BAD); __handle_rx_pkt(ni, ps, &pkt); return 1; - } - else { + } else { CI_UDP_STATS_INC_IN_ERRS(ni); LOG_U(log(FN_FMT "BAD UDP CHECKSUM %04x", FN_PRI_ARGS(ni), - (unsigned) udp->udp_check_be16)); + (unsigned) udp->udp_check_be16)); goto drop; } } @@ -1162,16 +1141,15 @@ static int handle_rx_csum_bad(ci_netif* ni, struct ci_netif_poll_state* ps, static void discard_rx_multi_pkts(ci_netif* ni, struct ci_netif_poll_state* ps, - int intf_i, struct oo_rx_state* s, - int frame_len, unsigned discard_flags, - ci_ip_pkt_fmt* pkt) + int intf_i, struct oo_rx_state* s, int frame_len, unsigned discard_flags, + ci_ip_pkt_fmt* pkt) { int is_frag = OO_PP_NOT_NULL(pkt->frag_next); int handled = 0; LOG_FL(unexpected_rx_log_flag(pkt), - log(LPF "[%d] intf %d discard RX_MULTI_PKTS 0x%x", - NI_ID(ni), intf_i, discard_flags)); + log(LPF "[%d] intf %d discard RX_MULTI_PKTS 0x%x", NI_ID(ni), intf_i, + discard_flags)); /* Previous packet is already handled, s->rx_pkt can contain only current * packet. Fragmented packet must be processed and linked, i.e. it is in @@ -1182,23 +1160,23 @@ static void discard_rx_multi_pkts(ci_netif* ni, struct ci_netif_poll_state* ps, /* Fragmented packets cannot be processed by handle_rx_csum_bad(). * See also comment in __handle_rx_discard(). */ - if( (discard_flags & (EF_VI_DISCARD_RX_L3_CSUM_ERR | - EF_VI_DISCARD_RX_L4_CSUM_ERR)) && - !is_frag ) + if( (discard_flags & + (EF_VI_DISCARD_RX_L3_CSUM_ERR | EF_VI_DISCARD_RX_L4_CSUM_ERR)) && + ! is_frag ) handled = handle_rx_csum_bad(ni, ps, pkt, frame_len); if( discard_flags & EF_VI_DISCARD_RX_ETH_LEN_ERR ) CITP_STATS_NETIF_INC(ni, rx_discard_len_err); else if( discard_flags & EF_VI_DISCARD_RX_ETH_FCS_ERR ) CITP_STATS_NETIF_INC(ni, rx_discard_crc_bad); - else if( discard_flags & (EF_VI_DISCARD_RX_L3_CSUM_ERR | - EF_VI_DISCARD_RX_L4_CSUM_ERR) ) + else if( discard_flags & + (EF_VI_DISCARD_RX_L3_CSUM_ERR | EF_VI_DISCARD_RX_L4_CSUM_ERR) ) CITP_STATS_NETIF_INC(ni, rx_discard_csum_bad); - if( !handled ) { + if( ! handled ) { if( oo_tcpdump_check(ni, pkt, pkt->intf_i) ) { - pkt->pay_len = frame_len; - oo_tcpdump_dump_pkt(ni, pkt); + pkt->pay_len = frame_len; + oo_tcpdump_dump_pkt(ni, pkt); } ci_netif_pkt_release_rx_1ref(ni, pkt); @@ -1206,7 +1184,8 @@ static void discard_rx_multi_pkts(ci_netif* ni, struct ci_netif_poll_state* ps, } -static ci_ip_pkt_fmt* rx_multi_get_next_desc(ci_netif* ni, ef_vi* vi, int intf_i) +static ci_ip_pkt_fmt* rx_multi_get_next_desc( + ci_netif* ni, ef_vi* vi, int intf_i) { ef_request_id di; oo_pkt_p pp; @@ -1221,9 +1200,8 @@ static ci_ip_pkt_fmt* rx_multi_get_next_desc(ci_netif* ni, ef_vi* vi, int intf_i static void handle_rx_multi_pkts(ci_netif* ni, struct oo_rx_state* s, - int prefix_bytes, - ef_vi* vi, int intf_i, - struct ci_netif_poll_state* ps, int q_id) + int prefix_bytes, ef_vi* vi, int intf_i, struct ci_netif_poll_state* ps, + int q_id) { int full_buffer = ef_vi_receive_buffer_len(vi); uint16_t pkt_bytes, total_bytes, cur_bytes; @@ -1250,7 +1228,8 @@ static void handle_rx_multi_pkts(ci_netif* ni, struct oo_rx_state* s, * this is OK, because the plugin itself verified the checksum before * handling the packet, and the path from the plugin to Onload is assumed * to be error-free. */ - if( ci_tcp_plugin_elided_payload(pkt) || ci_tcp_plugin_tcp_app_packet(pkt) ) + if( ci_tcp_plugin_elided_payload(pkt) || + ci_tcp_plugin_tcp_app_packet(pkt) ) discard_flags = 0; } #endif @@ -1258,7 +1237,7 @@ static void handle_rx_multi_pkts(ci_netif* ni, struct oo_rx_state* s, /* if 1 pkt = 1 desc */ if( total_bytes <= full_buffer ) { /* Whole packet in a single buffer. */ - if(CI_UNLIKELY( discard_flags != 0 )) { + if( CI_UNLIKELY(discard_flags != 0) ) { discard_rx_multi_pkts(ni, ps, intf_i, s, pkt_bytes, discard_flags, pkt); return; } @@ -1271,7 +1250,7 @@ static void handle_rx_multi_pkts(ci_netif* ni, struct oo_rx_state* s, s->rx_pkt = NULL; /* - First fragment of packet - */ ci_assert(s->frag_pkt == NULL); - ci_assert_gt(total_bytes, full_buffer ); + ci_assert_gt(total_bytes, full_buffer); /* The packet prefix is present in the first buffer */ pkt->buf_len = full_buffer - prefix_bytes; @@ -1304,25 +1283,26 @@ static void handle_rx_multi_pkts(ci_netif* ni, struct oo_rx_state* s, handle_rx_scatter_last_frag(ni, s, pkt); - if(CI_UNLIKELY( discard_flags != 0 )) { - /* Discard the fragmented packet in the end of processing to unbundle RxQ */ - discard_rx_multi_pkts(ni, ps, intf_i, s, pkt_bytes, discard_flags, s->rx_pkt); + if( CI_UNLIKELY(discard_flags != 0) ) { + /* Discard the fragmented packet in the end of processing to unbundle RxQ + */ + discard_rx_multi_pkts( + ni, ps, intf_i, s, pkt_bytes, discard_flags, s->rx_pkt); } } static void handle_rx_no_desc_trunc(ci_netif* ni, - struct ci_netif_poll_state* ps, - int intf_i, - struct oo_rx_state* s, ef_event ev) + struct ci_netif_poll_state* ps, int intf_i, struct oo_rx_state* s, + ef_event ev) { - LOG_U(log(LPF "[%d] intf %d RX_NO_DESC_TRUNC "EF_EVENT_FMT, - NI_ID(ni), intf_i, EF_EVENT_PRI_ARG(ev))); + LOG_U(log(LPF "[%d] intf %d RX_NO_DESC_TRUNC " EF_EVENT_FMT, NI_ID(ni), + intf_i, EF_EVENT_PRI_ARG(ev))); __handle_rx_pkt(ni, ps, &s->rx_pkt); s->rx_pkt = NULL; ci_assert(s->frag_pkt != NULL); - if( s->frag_pkt != NULL ) { /* belt and braces! */ + if( s->frag_pkt != NULL ) { /* belt and braces! */ ci_netif_pkt_release_rx_1ref(ni, s->frag_pkt); s->frag_pkt = NULL; } @@ -1330,16 +1310,15 @@ static void handle_rx_no_desc_trunc(ci_netif* ni, static void __handle_rx_discard(ci_netif* ni, struct ci_netif_poll_state* ps, - int intf_i, struct oo_rx_state* s, ef_event ev, - int frame_len, int discard_type, oo_pkt_p pp) + int intf_i, struct oo_rx_state* s, ef_event ev, int frame_len, + int discard_type, oo_pkt_p pp) { int is_frag; ci_ip_pkt_fmt* pkt; int handled = 0; - LOG_U(log(LPF "[%d] intf %d RX_DISCARD %d "EF_EVENT_FMT, - NI_ID(ni), intf_i, - (int) discard_type, EF_EVENT_PRI_ARG(ev))); + LOG_U(log(LPF "[%d] intf %d RX_DISCARD %d " EF_EVENT_FMT, NI_ID(ni), intf_i, + (int) discard_type, EF_EVENT_PRI_ARG(ev))); __handle_rx_pkt(ni, ps, &s->rx_pkt); s->rx_pkt = NULL; @@ -1360,43 +1339,43 @@ static void __handle_rx_discard(ci_netif* ni, struct ci_netif_poll_state* ps, pkt = PKT_CHK(ni, pp); - if( discard_type == EF_EVENT_RX_DISCARD_CSUM_BAD && !is_frag ) + if( discard_type == EF_EVENT_RX_DISCARD_CSUM_BAD && ! is_frag ) handled = handle_rx_csum_bad(ni, ps, pkt, frame_len); - + switch( discard_type ) { - case EF_EVENT_RX_DISCARD_CSUM_BAD: - CITP_STATS_NETIF_INC(ni, rx_discard_csum_bad); - break; - case EF_EVENT_RX_DISCARD_INNER_CSUM_BAD: - CITP_STATS_NETIF_INC(ni, rx_discard_inner_csum_bad); - break; - case EF_EVENT_RX_DISCARD_MCAST_MISMATCH: - CITP_STATS_NETIF_INC(ni, rx_discard_mcast_mismatch); - break; - case EF_EVENT_RX_DISCARD_CRC_BAD: - CITP_STATS_NETIF_INC(ni, rx_discard_crc_bad); - break; - case EF_EVENT_RX_DISCARD_TRUNC: - CITP_STATS_NETIF_INC(ni, rx_discard_trunc); - break; - case EF_EVENT_RX_DISCARD_RIGHTS: - CITP_STATS_NETIF_INC(ni, rx_discard_rights); - break; - case EF_EVENT_RX_DISCARD_OTHER: - CITP_STATS_NETIF_INC(ni, rx_discard_other); - break; + case EF_EVENT_RX_DISCARD_CSUM_BAD: + CITP_STATS_NETIF_INC(ni, rx_discard_csum_bad); + break; + case EF_EVENT_RX_DISCARD_INNER_CSUM_BAD: + CITP_STATS_NETIF_INC(ni, rx_discard_inner_csum_bad); + break; + case EF_EVENT_RX_DISCARD_MCAST_MISMATCH: + CITP_STATS_NETIF_INC(ni, rx_discard_mcast_mismatch); + break; + case EF_EVENT_RX_DISCARD_CRC_BAD: + CITP_STATS_NETIF_INC(ni, rx_discard_crc_bad); + break; + case EF_EVENT_RX_DISCARD_TRUNC: + CITP_STATS_NETIF_INC(ni, rx_discard_trunc); + break; + case EF_EVENT_RX_DISCARD_RIGHTS: + CITP_STATS_NETIF_INC(ni, rx_discard_rights); + break; + case EF_EVENT_RX_DISCARD_OTHER: + CITP_STATS_NETIF_INC(ni, rx_discard_other); + break; } - if( !handled ) { + if( ! handled ) { /* Only dump the packet if the NIC actually delivered it */ if( (discard_type == EF_EVENT_RX_DISCARD_CSUM_BAD || - discard_type == EF_EVENT_RX_DISCARD_MCAST_MISMATCH || - discard_type == EF_EVENT_RX_DISCARD_CRC_BAD || - discard_type == EF_EVENT_RX_DISCARD_TRUNC || - discard_type == EF_EVENT_RX_DISCARD_OTHER) && + discard_type == EF_EVENT_RX_DISCARD_MCAST_MISMATCH || + discard_type == EF_EVENT_RX_DISCARD_CRC_BAD || + discard_type == EF_EVENT_RX_DISCARD_TRUNC || + discard_type == EF_EVENT_RX_DISCARD_OTHER) && oo_tcpdump_check(ni, pkt, pkt->intf_i) ) { - pkt->pay_len = frame_len; - oo_tcpdump_dump_pkt(ni, pkt); + pkt->pay_len = frame_len; + oo_tcpdump_dump_pkt(ni, pkt); } ci_netif_pkt_release_rx_1ref(ni, pkt); @@ -1405,11 +1384,11 @@ static void __handle_rx_discard(ci_netif* ni, struct ci_netif_poll_state* ps, static void handle_rx_discard(ci_netif* ni, struct ci_netif_poll_state* ps, - int intf_i, struct oo_rx_state* s, ef_event ev) + int intf_i, struct oo_rx_state* s, ef_event ev) { int discard_type = EF_EVENT_RX_DISCARD_TYPE(ev); - int frame_len = EF_EVENT_RX_DISCARD_BYTES(ev) - - ci_netif_vi(ni, intf_i)->rx_prefix_len; + int frame_len = + EF_EVENT_RX_DISCARD_BYTES(ev) - ci_netif_vi(ni, intf_i)->rx_prefix_len; oo_pkt_p pp; OO_PP_INIT(ni, pp, EF_EVENT_RX_DISCARD_RQ_ID(ev)); @@ -1418,9 +1397,8 @@ static void handle_rx_discard(ci_netif* ni, struct ci_netif_poll_state* ps, static void handle_rx_multi_discard(ci_netif* ni, - struct ci_netif_poll_state* ps, int intf_i, - struct oo_rx_state* s, ef_event ev, - ef_request_id id, ef_vi* vi) + struct ci_netif_poll_state* ps, int intf_i, struct oo_rx_state* s, + ef_event ev, ef_request_id id, ef_vi* vi) { int discard_type = EF_EVENT_RX_MULTI_DISCARD_TYPE(ev); uint16_t frame_len; @@ -1449,7 +1427,7 @@ static void process_post_poll_list(ci_netif* ni) struct oo_p_dllink_state lnk; struct oo_p_dllink_state tmp_lnk; struct oo_p_dllink_state post_poll_list = - oo_p_dllink_ptr(ni, &ni->state->post_poll_list); + oo_p_dllink_ptr(ni, &ni->state->post_poll_list); int need_wake = 0; citp_waitable* sb; #if CI_CFG_EPOLL3 @@ -1459,9 +1437,10 @@ static void process_post_poll_list(ci_netif* ni) int i = 0; #endif - oo_p_dllink_for_each_safe(ni, lnk, tmp_lnk, post_poll_list) { + oo_p_dllink_for_each_safe(ni, lnk, tmp_lnk, post_poll_list) + { #ifdef __KERNEL__ - if(CI_UNLIKELY( i++ > ni->ep_tbl_n )) { + if( CI_UNLIKELY(i++ > ni->ep_tbl_n) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_POST_POLL_LIST, __FUNCTION__); return; } @@ -1487,16 +1466,14 @@ static void process_post_poll_list(ci_netif* ni) if( ! (sb->sb_flags & sb->wake_request) ) { sb->sb_flags = 0; - } - else { + } else { #ifdef __KERNEL__ /* In realtime kernel, citp_waitable_wakeup() from NAPI context is * harmful */ - if( !((ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT) && - oo_avoid_wakeup_from_dl()) ) { + if( ! ((ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT) && + oo_avoid_wakeup_from_dl()) ) { citp_waitable_wakeup(ni, sb); - } - else + } else #endif { /* Leave endpoints that need waking on the post-poll list so they can @@ -1524,14 +1501,15 @@ static void process_post_poll_list(ci_netif* ni) #if CI_CFG_EPOLL3 /* Shouldn't have had a wake for a list we don't think exists */ - ci_assert_equal(lists_need_wake & ~((1 << CI_CFG_N_READY_LISTS)-1), 0); + ci_assert_equal(lists_need_wake & ~((1 << CI_CFG_N_READY_LISTS) - 1), 0); #ifndef __KERNEL__ /* See if any of the ready lists need a wake. We only bother checking if * we're not going to do a wake anyway. */ if( need_wake == 0 && lists_need_wake != 0 ) { - CI_READY_LIST_EACH(lists_need_wake, lists_need_wake, i) { + CI_READY_LIST_EACH(lists_need_wake, lists_need_wake, i) + { if( ni->state->ready_list_flags[i] & CI_NI_READY_LIST_FLAG_WAKE ) { need_wake = 1; break; @@ -1548,7 +1526,8 @@ static void process_post_poll_list(ci_netif* ni) #ifdef __KERNEL__ /* Check whether any ready lists associated with a set need to be woken. */ - CI_READY_LIST_EACH(lists_need_wake, lists_need_wake, i) { + CI_READY_LIST_EACH(lists_need_wake, lists_need_wake, i) + { if( (lists_need_wake & (1 << i)) && (ni->state->ready_list_flags[i] & CI_NI_READY_LIST_FLAG_WAKE) ) efab_tcp_helper_ready_list_wakeup(netif2tcp_helper_resource(ni), i); @@ -1558,7 +1537,7 @@ static void process_post_poll_list(ci_netif* ni) } -#define UDP_CAN_FREE(us) ((us)->tx_count == 0) +#define UDP_CAN_FREE(us) ((us)->tx_count == 0) #if CI_CFG_TCP_OFFLOAD_RECYCLER #define CI_NETIF_RX_VI(ni, nic_i, label) (&(ni)->nic_hw[(nic_i)].vis[(label)]) @@ -1567,12 +1546,11 @@ static void process_post_poll_list(ci_netif* ni) * vi index known to be a constant so it's more optimisable */ #define CI_NETIF_RX_VI(ni, nic_i, label) (&(ni)->nic_hw[(nic_i)].vis[0]) #endif -#define CI_NETIF_TX_VI CI_NETIF_RX_VI +#define CI_NETIF_TX_VI CI_NETIF_RX_VI -static void ci_netif_tx_pkt_complete_udp(ci_netif* netif, - struct ci_netif_poll_state* ps, - ci_ip_pkt_fmt* pkt) +static void ci_netif_tx_pkt_complete_udp( + ci_netif* netif, struct ci_netif_poll_state* ps, ci_ip_pkt_fmt* pkt) { ci_udp_state* us; oo_pkt_p frag_next; @@ -1598,10 +1576,9 @@ static void ci_netif_tx_pkt_complete_udp(ci_netif* netif, * and we do the same. */ ci_udp_wake_possibly_not_in_poll(netif, us, CI_SB_FLAG_WAKE_TX); ci_netif_put_on_post_poll(netif, &us->s.b); - } - else if( UDP_CAN_FREE(us) ) { - oo_p_dllink_del_init(netif, oo_p_dllink_sb(netif, &us->s.b, - &us->s.b.post_poll_link)); + } else if( UDP_CAN_FREE(us) ) { + oo_p_dllink_del_init( + netif, oo_p_dllink_sb(netif, &us->s.b, &us->s.b.post_poll_link)); ci_udp_state_free(netif, us); } } @@ -1633,9 +1610,8 @@ static void ci_netif_tx_pkt_complete_udp(ci_netif* netif, } -static void ci_netif_rx_pkt_complete_tcp(ci_netif* ni, - struct ci_netif_poll_state* ps, - ci_ip_pkt_fmt* pkt) +static void ci_netif_rx_pkt_complete_tcp( + ci_netif* ni, struct ci_netif_poll_state* ps, ci_ip_pkt_fmt* pkt) { #if CI_CFG_TIMESTAMPING if( pkt->flags & (CI_PKT_FLAG_TX_TIMESTAMPED | CI_PKT_FLAG_INDIRECT) ) { @@ -1676,14 +1652,13 @@ static void ci_netif_rx_pkt_complete_tcp(ci_netif* ni, ci_inline void __ci_netif_tx_pkt_complete(ci_netif* ni, - struct ci_netif_poll_state* ps, - ci_ip_pkt_fmt* pkt, ef_event* ev) + struct ci_netif_poll_state* ps, ci_ip_pkt_fmt* pkt, ef_event* ev) { ci_netif_state_nic_t* nic = &ni->state->nic[pkt->intf_i]; /* debug check - take back ownership of buffer from NIC */ ci_assert(pkt->flags & CI_PKT_FLAG_TX_PENDING); nic->tx_bytes_removed += TX_PKT_LEN(pkt); - ci_assert((int) (nic->tx_bytes_added - nic->tx_bytes_removed) >=0); + ci_assert((int) (nic->tx_bytes_added - nic->tx_bytes_removed) >= 0); #if CI_CFG_PIO if( pkt->pio_addr >= 0 ) { ci_pio_buddy_free(ni, &nic->pio_buddy, pkt->pio_addr, pkt->pio_order); @@ -1694,31 +1669,29 @@ ci_inline void __ci_netif_tx_pkt_complete(ci_netif* ni, if( pkt->flags & CI_PKT_FLAG_TX_TIMESTAMPED ) { if( ev != NULL && EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_TX_WITH_TIMESTAMP ) { int opt_tsf = ((NI_OPTS(ni).timestamping_reporting) & - CITP_TIMESTAMPING_RECORDING_FLAG_CHECK_SYNC) ? - EF_VI_SYNC_FLAG_CLOCK_IN_SYNC : - EF_VI_SYNC_FLAG_CLOCK_SET; + CITP_TIMESTAMPING_RECORDING_FLAG_CHECK_SYNC) + ? EF_VI_SYNC_FLAG_CLOCK_IN_SYNC + : EF_VI_SYNC_FLAG_CLOCK_SET; int pkt_tsf = EF_EVENT_TX_WITH_TIMESTAMP_SYNC_FLAGS(*ev); pkt->hw_stamp.tv_sec = EF_EVENT_TX_WITH_TIMESTAMP_SEC(*ev); pkt->hw_stamp.tv_nsec = - (EF_EVENT_TX_WITH_TIMESTAMP_NSEC(*ev) & - (~CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC)) | - ((pkt_tsf & opt_tsf) ? - CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC : 0); - } - else if( ev == NULL ) { + (EF_EVENT_TX_WITH_TIMESTAMP_NSEC(*ev) & + (~CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC)) | + ((pkt_tsf & opt_tsf) ? CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC : 0); + } else if( ev == NULL ) { /* This is NIC reset. The TIMESTAMPED flag needs to stay * to ensure client is notified of missing timestamp - * important to keep TCP timestamps in sync with * TCP stream */ pkt->hw_stamp.tv_sec = 0; pkt->hw_stamp.tv_nsec = 0; - } - else { + } else { if( CI_NETIF_TX_VI(ni, pkt->intf_i, ev->tx_timestamp.q_id)->vi_flags & EF_VI_TX_TIMESTAMPS ) { - ci_log("ERROR: TX timestamp requested, but non-timestamped " - "TX complete event received."); + ci_log( + "ERROR: TX timestamp requested, but non-timestamped " + "TX complete event received."); } pkt->flags &= ~CI_PKT_FLAG_TX_TIMESTAMPED; } @@ -1741,29 +1714,28 @@ ci_inline void __ci_netif_tx_pkt_complete(ci_netif* ni, } #endif - pkt->flags &=~ CI_PKT_FLAG_TX_PENDING; + pkt->flags &= ~CI_PKT_FLAG_TX_PENDING; if( pkt->flags & CI_PKT_FLAG_UDP ) ci_netif_tx_pkt_complete_udp(ni, ps, pkt); else ci_netif_rx_pkt_complete_tcp(ni, ps, pkt); - } -void ci_netif_tx_pkt_complete(ci_netif* ni, struct ci_netif_poll_state* ps, - ci_ip_pkt_fmt* pkt) +void ci_netif_tx_pkt_complete( + ci_netif* ni, struct ci_netif_poll_state* ps, ci_ip_pkt_fmt* pkt) { __ci_netif_tx_pkt_complete(ni, ps, pkt, NULL); } -static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, - int intf_i, int n_evs) +static int ci_netif_poll_evq( + ci_netif* ni, struct ci_netif_poll_state* ps, int intf_i, int n_evs) { struct oo_rx_state s; ef_vi* evq = ci_netif_vi(ni, intf_i); unsigned total_evs = 0; ci_ip_pkt_fmt* pkt; - ef_event *ev = ni->state->events; + ef_event* ev = ni->state->events; int i; oo_pkt_p pp; int completed_tx = 0; @@ -1771,7 +1743,7 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, int poll_in_kernel; #endif s.frag_pkt = NULL; - s.frag_bytes = 0; /*??*/ + s.frag_bytes = 0; /*??*/ if( OO_PP_NOT_NULL(ni->state->nic[intf_i].rx_frags) ) { pkt = PKT_CHK(ni, ni->state->nic[intf_i].rx_frags); @@ -1794,8 +1766,7 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, n_evs = 0; if( ci_netif_intf_has_event(ni, intf_i) ) n_evs = ci_netif_evq_poll_k(ni, intf_i); - } - else + } else #endif n_evs = ef_eventq_poll(evq, ev, 16); /* The 16 above is a heuristic. We want a big number for efficiency, but @@ -1808,7 +1779,7 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, if( n_evs == 0 ) break; -have_events: + have_events: /* This loop is implemented with a 1 packet lag on processing (i.e. * __handle_rx_pkt() is called for the packet from the previous loop * iteration just as the next packet is being picked up, due to a @@ -1821,26 +1792,29 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, CITP_STATS_NETIF_INC(ni, rx_evs); OO_PP_INIT(ni, pp, EF_EVENT_RX_RQ_ID(ev[i])); pkt = PKT_CHK(ni, pp); - /* AF_XDP has potentially variable offset and this is taken it into account here, - * but we shouldn't touch pkt_start_off for ef10 case as it is used to calculate - * pkt_eth_payload_off properly. */ + /* AF_XDP has potentially variable offset and this is taken it into + * account here, but we shouldn't touch pkt_start_off for ef10 case as + * it is used to calculate pkt_eth_payload_off properly. */ if( evq->nic_type.arch == EF_VI_ARCH_AF_XDP ) { - pkt->pkt_start_off = ev[i].rx.ofs - - CI_MEMBER_OFFSET(ci_ip_pkt_fmt, dma_start); + pkt->pkt_start_off = + ev[i].rx.ofs - CI_MEMBER_OFFSET(ci_ip_pkt_fmt, dma_start); } ci_assert_equal(pkt->intf_i, intf_i); __handle_rx_pkt(ni, ps, &s.rx_pkt); - if( (ev[i].rx.flags & (EF_EVENT_FLAG_SOP | EF_EVENT_FLAG_CONT)) - == EF_EVENT_FLAG_SOP ) { + if( (ev[i].rx.flags & (EF_EVENT_FLAG_SOP | EF_EVENT_FLAG_CONT)) == + EF_EVENT_FLAG_SOP ) { /* Whole packet in a single buffer. */ pkt->pay_len = EF_EVENT_RX_BYTES(ev[i]) - evq->rx_prefix_len; oo_offbuf_init(&pkt->buf, PKT_START(pkt), pkt->pay_len); s.rx_pkt = pkt; - } - else { +#ifndef __KERNEL__ + /* This is a temporary hack to convert dpdk mbufs into the + * descriptors that onload expects. */ + ef_fill_rx_data(evq, PKT_START(pkt), i); +#endif + } else { handle_rx_scatter(ni, &s, pkt, - EF_EVENT_RX_BYTES(ev[i]) - evq->rx_prefix_len, - ev[i].rx.flags); + EF_EVENT_RX_BYTES(ev[i]) - evq->rx_prefix_len, ev[i].rx.flags); } } @@ -1857,8 +1831,8 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, efct_vi_rxpkt_release(evq, ev[i].rx_ref.pkt_id); } - else if(CI_LIKELY( EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX )) { - ef_request_id *ids = ni->tx_events; + else if( CI_LIKELY(EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) ) { + ef_request_id* ids = ni->tx_events; int n_ids, j; ef_vi* vi = CI_NETIF_TX_VI(ni, intf_i, ev[i].tx.q_id); CITP_STATS_NETIF_INC(ni, tx_evs); @@ -1875,7 +1849,7 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, } else if( EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_MULTI ) { - ef_request_id *ids = ni->rx_events; + ef_request_id* ids = ni->rx_events; int n_ids, j; ef_vi* vi = CI_NETIF_RX_VI(ni, intf_i, ev[i].rx.q_id); CITP_STATS_NETIF_INC(ni, rx_evs); @@ -1890,18 +1864,18 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, ci_prefetch_ppc(pkt); ci_assert_equal(pkt->intf_i, intf_i); __handle_rx_pkt(ni, ps, &s.rx_pkt); - if( (ev[i].rx_multi.flags & (EF_EVENT_FLAG_SOP | EF_EVENT_FLAG_CONT)) - == EF_EVENT_FLAG_SOP ) { + if( (ev[i].rx_multi.flags & + (EF_EVENT_FLAG_SOP | EF_EVENT_FLAG_CONT)) == + EF_EVENT_FLAG_SOP ) { /* Whole packet in a single buffer. */ uint16_t len; ef_vi_receive_get_bytes(vi, pkt->dma_start, &len); pkt->pay_len = len; oo_offbuf_init(&pkt->buf, PKT_START(pkt), pkt->pay_len); s.rx_pkt = pkt; - } - else { - handle_rx_scatter_merge(ni, &s, pkt, evq->rx_prefix_len, vi, - ev[i].rx_multi.flags); + } else { + handle_rx_scatter_merge( + ni, &s, pkt, evq->rx_prefix_len, vi, ev[i].rx_multi.flags); } } } @@ -1914,8 +1888,8 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, n_pkts = ev[i].rx_multi_pkts.n_pkts; for( j = 0; j < n_pkts; ++j ) { __handle_rx_pkt(ni, ps, &s.rx_pkt); - handle_rx_multi_pkts(ni, &s, evq->rx_prefix_len, vi, intf_i, ps, - q_id); + handle_rx_multi_pkts( + ni, &s, evq->rx_prefix_len, vi, intf_i, ps, q_id); } } @@ -1937,7 +1911,7 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, } else if( EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_MULTI_DISCARD ) { - ef_request_id *ids = ni->rx_events; + ef_request_id* ids = ni->rx_events; int n_ids, j; ef_vi* vi = CI_NETIF_RX_VI(ni, intf_i, ev[i].rx.q_id); n_ids = ef_vi_receive_unbundle(vi, &ev[i], ids); @@ -1956,17 +1930,16 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, __handle_rx_pkt(ni, ps, &s.rx_pkt); copy_efct_to_pkt(ni, evq, ev[i].rx_ref.pkt_id, pkt); oo_offbuf_init(&pkt->buf, pkt->dma_start, pay_len); - discard_rx_multi_pkts(ni, ps, intf_i, &s, pay_len, - ev[i].rx_ref_discard.flags, pkt); + discard_rx_multi_pkts( + ni, ps, intf_i, &s, pay_len, ev[i].rx_ref_discard.flags, pkt); } efct_vi_rxpkt_release(evq, ev[i].rx_ref.pkt_id); } else if( EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX_ERROR ) { - LOG_U(log(LPF "[%d] intf %d TX_ERROR %d "EF_EVENT_FMT, - NI_ID(ni), intf_i, - (int) EF_EVENT_TX_ERROR_TYPE(ev[i]), - EF_EVENT_PRI_ARG(ev[i]))); + LOG_U(log(LPF "[%d] intf %d TX_ERROR %d " EF_EVENT_FMT, NI_ID(ni), + intf_i, (int) EF_EVENT_TX_ERROR_TYPE(ev[i]), + EF_EVENT_PRI_ARG(ev[i]))); CITP_STATS_NETIF_INC(ni, tx_error_events); } @@ -1978,23 +1951,26 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, else { /* NB. If you see this for an RX event, then perhaps some code * which I thought was obsolete is needed. */ - ci_assert( EF_EVENT_TYPE(ev[i]) != EF_EVENT_TYPE_RX ); - LOG_E(log(LPF "***** UNKNOWN EVENT "EF_EVENT_FMT" (abstracted type:%d)" - " *****", - EF_EVENT_PRI_ARG(ev[i]), EF_EVENT_TYPE(ev[i]))); + ci_assert(EF_EVENT_TYPE(ev[i]) != EF_EVENT_TYPE_RX); + LOG_E( + log(LPF "***** UNKNOWN EVENT " EF_EVENT_FMT " (abstracted type:%d)" + " *****", + EF_EVENT_PRI_ARG(ev[i]), EF_EVENT_TYPE(ev[i]))); } } #ifndef NDEBUG - if( CI_NETIF_TX_VI(ni, intf_i, 0)->nic_type.arch != EF_VI_ARCH_AF_XDP ) { + if( ! (CI_NETIF_TX_VI(ni, intf_i, 0)->nic_type.arch == EF_VI_ARCH_AF_XDP || + CI_NETIF_TX_VI(ni, intf_i, 0)->nic_type.arch == + EF_VI_ARCH_SWXTCH) ) { int vi_i; int txq_level = 0; - for( vi_i = 0; vi_i < ci_netif_num_vis(ni); ++vi_i) + for( vi_i = 0; vi_i < ci_netif_num_vis(ni); ++vi_i ) txq_level += ef_vi_transmit_fill_level(&ni->nic_hw[intf_i].vis[vi_i]) + ni->state->nic[intf_i].dmaq[vi_i].num; - ci_assert_equiv(txq_level == 0, - (ni->state->nic[intf_i].tx_dmaq_insert_seq == - ni->state->nic[intf_i].tx_dmaq_done_seq)); + ci_assert_equiv( + txq_level == 0, (ni->state->nic[intf_i].tx_dmaq_insert_seq == + ni->state->nic[intf_i].tx_dmaq_done_seq)); } #endif @@ -2004,8 +1980,7 @@ static int ci_netif_poll_evq(ci_netif* ni, struct ci_netif_poll_state* ps, } while( total_evs < NI_OPTS(ni).evs_per_poll ); /* If we've drained the TXQ, we can start trying CTPIO again. */ - if( completed_tx && - ef_vi_transmit_fill_level(ci_netif_vi(ni, intf_i)) == 0 ) + if( completed_tx && ef_vi_transmit_fill_level(ci_netif_vi(ni, intf_i)) == 0 ) ci_netif_ctpio_resume(ni, intf_i); if( s.frag_pkt != NULL ) { @@ -2037,8 +2012,7 @@ static int ci_netif_poll_intf(ci_netif* ni, int intf_i, int max_evs) if( rc > 0 ) { total_evs += rc; process_post_poll_list(ni); - } - else + } else break; } while( total_evs < max_evs ); @@ -2077,11 +2051,13 @@ int ci_netif_poll_intf_future(ci_netif* ni, int intf_i, ci_uint64 start_frc) struct ci_netif_poll_state ps; ci_ip_pkt_fmt* pkt; const uint8_t* dma; - int (*future_poll)(ef_vi* vi, ef_event* evs, int evs_len) = evq->ops.eventq_poll; + int (*future_poll)(ef_vi * vi, ef_event * evs, int evs_len) = + evq->ops.eventq_poll; /* Number of data bytes in the first cache line of efct packets */ - static const size_t efct_begin_len = CI_CACHE_LINE_SIZE - - (EFCT_RX_HEADER_NEXT_FRAME_LOC_1 & (CI_CACHE_LINE_SIZE - 1)); + static const size_t efct_begin_len = + CI_CACHE_LINE_SIZE - + (EFCT_RX_HEADER_NEXT_FRAME_LOC_1 & (CI_CACHE_LINE_SIZE - 1)); ci_assert(ci_netif_is_locked(ni)); ci_assert(ni->state->in_poll == 0); @@ -2098,8 +2074,7 @@ int ci_netif_poll_intf_future(ci_netif* ni, int intf_i, ci_uint64 start_frc) return 0; memcpy(pkt->dma_start, dma, efct_begin_len); future_poll = efct_vi_rx_future_poll; - } - else { + } else { pkt = ci_netif_intf_next_rx_pkt(ni, evq); dma = pkt->dma_start; if( pkt == NULL ) @@ -2159,13 +2134,12 @@ int ci_netif_poll_intf_future(ci_netif* ni, int intf_i, ci_uint64 start_frc) ++ni->state->in_poll; if( EF_EVENT_TYPE(ev[0]) == EF_EVENT_TYPE_RX ) { ci_assert_equal(OO_PP_ID(OO_PKT_P(pkt)), EF_EVENT_RX_RQ_ID(ev[0])); - if( (ev[0].rx.flags & (EF_EVENT_FLAG_SOP | EF_EVENT_FLAG_CONT)) - == EF_EVENT_FLAG_SOP ) { + if( (ev[0].rx.flags & (EF_EVENT_FLAG_SOP | EF_EVENT_FLAG_CONT)) == + EF_EVENT_FLAG_SOP ) { pkt->pay_len = EF_EVENT_RX_BYTES(ev[0]) - evq->rx_prefix_len; handle_future = true; } - } - else if( EF_EVENT_TYPE(ev[0]) == EF_EVENT_TYPE_RX_REF ) { + } else if( EF_EVENT_TYPE(ev[0]) == EF_EVENT_TYPE_RX_REF ) { #ifndef NDEBUG { const void* pkt_start = efct_vi_rxpkt_get(evq, ev[0].rx_ref.pkt_id); @@ -2175,7 +2149,7 @@ int ci_netif_poll_intf_future(ci_netif* ni, int intf_i, ci_uint64 start_frc) pkt->pay_len = ev[0].rx_ref.len; if( pkt->pay_len > efct_begin_len ) memcpy(pkt->dma_start + efct_begin_len, dma + efct_begin_len, - pkt->pay_len - efct_begin_len); + pkt->pay_len - efct_begin_len); get_efct_timestamp(ni, evq, ev[0].rx_ref.pkt_id, pkt); efct_vi_rxpkt_release(evq, ev[0].rx_ref.pkt_id); handle_future = true; @@ -2186,7 +2160,7 @@ int ci_netif_poll_intf_future(ci_netif* ni, int intf_i, ci_uint64 start_frc) oo_offbuf_init(&pkt->buf, PKT_START(pkt), pkt->pay_len); handle_rx_post_future(ni, &ps, pkt, status, &future); - if(CI_UNLIKELY( rc > 1 )) { + if( CI_UNLIKELY(rc > 1) ) { /* We have handled the first event, so remove it from the array and * handle the rest normally. Add one to the returned count to include * the one handled here. @@ -2195,8 +2169,7 @@ int ci_netif_poll_intf_future(ci_netif* ni, int intf_i, ci_uint64 start_frc) ev[i - 1] = ev[i]; rc = 1 + ci_netif_poll_evq(ni, &ps, intf_i, rc - 1); } - } - else { + } else { CITP_STATS_NETIF_INC(ni, rx_future_rollback_event); rollback_rx_future(ni, pkt, status, &future); if( evq->nic_type.arch == EF_VI_ARCH_EFCT ) @@ -2231,16 +2204,14 @@ void ci_netif_loopback_pkts_send(ci_netif* ni) int i = 0; #endif - CI_BUILD_ASSERT( - CI_MEMBER_OFFSET(ci_ip_pkt_fmt_prefix, tcp_tx.lo.rx_sock) == - CI_MEMBER_OFFSET(ci_ip_pkt_fmt_prefix, tcp_rx.lo.rx_sock) ); - CI_BUILD_ASSERT( - CI_MEMBER_OFFSET(ci_ip_pkt_fmt_prefix, tcp_tx.lo.tx_sock) == - CI_MEMBER_OFFSET(ci_ip_pkt_fmt_prefix, tcp_rx.lo.tx_sock) ); + CI_BUILD_ASSERT(CI_MEMBER_OFFSET(ci_ip_pkt_fmt_prefix, tcp_tx.lo.rx_sock) == + CI_MEMBER_OFFSET(ci_ip_pkt_fmt_prefix, tcp_rx.lo.rx_sock)); + CI_BUILD_ASSERT(CI_MEMBER_OFFSET(ci_ip_pkt_fmt_prefix, tcp_tx.lo.tx_sock) == + CI_MEMBER_OFFSET(ci_ip_pkt_fmt_prefix, tcp_rx.lo.tx_sock)); while( OO_PP_NOT_NULL(ni->state->looppkts) ) { #ifdef __KERNEL__ - if(CI_UNLIKELY( i++ > ni->pkt_sets_n * PKTS_PER_SET )) { + if( CI_UNLIKELY(i++ > ni->pkt_sets_n * PKTS_PER_SET) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_LOOP_PKTS_LIST, __FUNCTION__); return; } @@ -2257,9 +2228,8 @@ void ci_netif_loopback_pkts_send(ci_netif* ni) ni->state->n_looppkts--; LOG_NR(ci_log(N_FMT "loopback RX pkt %d: %d->%d", N_PRI_ARGS(ni), - OO_PKT_FMT(pkt), - OO_SP_FMT(pkt->pf.tcp_tx.lo.tx_sock), - OO_SP_FMT(pkt->pf.tcp_tx.lo.rx_sock))); + OO_PKT_FMT(pkt), OO_SP_FMT(pkt->pf.tcp_tx.lo.tx_sock), + OO_SP_FMT(pkt->pf.tcp_tx.lo.rx_sock))); oo_offbuf_init(&pkt->buf, PKT_START(pkt), pkt->buf_len); pkt->intf_i = OO_INTF_I_LOOPBACK; @@ -2272,16 +2242,16 @@ void ci_netif_loopback_pkts_send(ci_netif* ni) oo_tcpdump_dump_pkt(ni, pkt); pkt->next = OO_PP_NULL; #if CI_CFG_IPV6 - if( oo_pkt_ether_type(pkt) == CI_ETHERTYPE_IP6 ) - pkt->flags |= CI_PKT_FLAG_IS_IP6; - else - pkt->flags &=~ CI_PKT_FLAG_IS_IP6; + if( oo_pkt_ether_type(pkt) == CI_ETHERTYPE_IP6 ) + pkt->flags |= CI_PKT_FLAG_IS_IP6; + else + pkt->flags &= ~CI_PKT_FLAG_IS_IP6; #endif ip = oo_ipx_hdr(pkt); af = oo_pkt_af(pkt); ci_tcp_handle_rx(ni, NULL, pkt, PKT_IPX_TCP_HDR(af, pkt), - ipx_hdr_tot_len(af, ip) - CI_IPX_IHL(af, ip)); + ipx_hdr_tot_len(af, ip) - CI_IPX_IHL(af, ip)); } } @@ -2313,15 +2283,16 @@ int ci_netif_poll_n(ci_netif* netif, int max_evs) if( ci_netif_need_timer_prime(netif, IPTIMER_STATE(netif)->frc) ) { if( NI_OPTS(netif).timer_usec != 0 ) OO_STACK_FOR_EACH_INTF_I(netif, intf_i) - ef_eventq_timer_prime(ci_netif_vi(netif, intf_i), - NI_OPTS(netif).timer_usec); + ef_eventq_timer_prime( + ci_netif_vi(netif, intf_i), NI_OPTS(netif).timer_usec); netif->state->evq_last_prime = IPTIMER_STATE(netif)->frc; } #endif ci_assert(netif->state->in_poll == 0); ++netif->state->in_poll; - OO_STACK_FOR_EACH_INTF_I(netif, intf_i) { + OO_STACK_FOR_EACH_INTF_I(netif, intf_i) + { int n = ci_netif_poll_intf(netif, intf_i, max_evs); ci_assert(n >= 0); n_evs_handled += n; @@ -2345,11 +2316,11 @@ int ci_netif_poll_n(ci_netif* netif, int max_evs) ci_assert_gt(netif->state->kernel_packets_pending, 0); if( netif->state->kernel_packets_pending >= - NI_OPTS(netif).kernel_packets_batch_size || + NI_OPTS(netif).kernel_packets_batch_size || frc - netif->state->kernel_packets_last_forwarded >= - netif->state->kernel_packets_cycles ) - ef_eplock_holder_set_flag(&netif->state->lock, - CI_EPLOCK_NETIF_KERNEL_PACKETS); + netif->state->kernel_packets_cycles ) + ef_eplock_holder_set_flag( + &netif->state->lock, CI_EPLOCK_NETIF_KERNEL_PACKETS); } #endif @@ -2365,16 +2336,16 @@ int ci_netif_poll_n(ci_netif* netif, int max_evs) * from the unlock hook only. */ if( oo_want_proactive_socket_allocation(netif) ) - ef_eplock_holder_set_flag(&netif->state->lock, - CI_EPLOCK_NETIF_NEED_SOCK_BUFS); + ef_eplock_holder_set_flag( + &netif->state->lock, CI_EPLOCK_NETIF_NEED_SOCK_BUFS); - if(CI_LIKELY( netif->state->rxq_low <= 1 )) + if( CI_LIKELY(netif->state->rxq_low <= 1) ) netif->state->mem_pressure &= ~OO_MEM_PRESSURE_LOW; else netif->state->mem_pressure |= OO_MEM_PRESSURE_LOW; /* ?? TODO: move this into an unlock flag. */ - if(CI_UNLIKELY( netif->state->mem_pressure & OO_MEM_PRESSURE_CRITICAL )) + if( CI_UNLIKELY(netif->state->mem_pressure & OO_MEM_PRESSURE_CRITICAL) ) if( ci_netif_mem_pressure_try_exit(netif) ) CITP_STATS_NETIF_INC(netif, memory_pressure_exit_poll); diff --git a/src/lib/transport/ip/netif_init.c b/src/lib/transport/ip/netif_init.c index dddda7ce1..4b2a57249 100644 --- a/src/lib/transport/ip/netif_init.c +++ b/src/lib/transport/ip/netif_init.c @@ -45,11 +45,9 @@ const char* oo_uk_intf_ver = OO_UK_INTF_VER; *****************************************************************************/ - - #ifdef __KERNEL__ -#define assert_zero(x) ci_assert_equal((x), 0) +#define assert_zero(x) ci_assert_equal((x), 0) void ci_netif_state_init(ci_netif* ni, int cpu_khz, const char* name) { @@ -62,7 +60,8 @@ void ci_netif_state_init(ci_netif* ni, int cpu_khz, const char* name) nis->opts = ni->opts; /* TX DMA overflow queue and id allocator init for nvme plugin */ - OO_STACK_FOR_EACH_INTF_I(ni, nic_i) { + OO_STACK_FOR_EACH_INTF_I(ni, nic_i) + { nn = &nis->nic[nic_i]; for( i = 0; i < sizeof(nn->dmaq) / sizeof(nn->dmaq[0]); ++i ) oo_pktq_init(&nn->dmaq[i]); @@ -74,10 +73,10 @@ void ci_netif_state_init(ci_netif* ni, int cpu_khz, const char* name) nn->rx_frags = OO_PP_NULL; #if CI_CFG_TX_CRC_OFFLOAD - if( NI_OPTS(ni).tcp_offload_plugin == CITP_TCP_OFFLOAD_NVME ) - ci_nvme_plugin_crc_id_init(&nis->nvme_crc_plugin_idp[nic_i], - ni->nic_hw[nic_i].plugin_tx_region_id, - NI_OPTS(ni).nvme_crc_table_cap); + if( NI_OPTS(ni).tcp_offload_plugin == CITP_TCP_OFFLOAD_NVME ) + ci_nvme_plugin_crc_id_init(&nis->nvme_crc_plugin_idp[nic_i], + ni->nic_hw[nic_i].plugin_tx_region_id, + NI_OPTS(ni).nvme_crc_table_cap); #endif } @@ -102,40 +101,37 @@ void ci_netif_state_init(ci_netif* ni, int cpu_khz, const char* name) oo_p_dllink_init(ni, list); for( i = 0; i < NI_OPTS(ni).defer_arp_pkts; i++ ) { struct oo_p_dllink_state link = - oo_p_dllink_ptr(ni, &ni->deferred_pkts[i].link); + oo_p_dllink_ptr(ni, &ni->deferred_pkts[i].link); oo_p_dllink_init(ni, link); oo_p_dllink_add(ni, list, link); } ci_netif_filter_init(ni, ci_log2_le(ci_netif_filter_table_size(ni))); #if CI_CFG_IPV6 - ci_ip6_netif_filter_init(ni->ip6_filter_table, - ci_log2_le(NI_OPTS(ni).max_ep_bufs) + 1); + ci_ip6_netif_filter_init( + ni->ip6_filter_table, ci_log2_le(NI_OPTS(ni).max_ep_bufs) + 1); #endif - oo_p_dllink_init(ni, oo_p_dllink_ptr(ni, - &nis->timeout_q[OO_TIMEOUT_Q_TIMEWAIT])); - oo_p_dllink_init(ni, oo_p_dllink_ptr(ni, - &nis->timeout_q[OO_TIMEOUT_Q_FINWAIT])); - ci_ip_timer_init(ni, &nis->timeout_tid, - oo_ptr_to_statep(ni, &nis->timeout_tid), - "ttid"); + oo_p_dllink_init( + ni, oo_p_dllink_ptr(ni, &nis->timeout_q[OO_TIMEOUT_Q_TIMEWAIT])); + oo_p_dllink_init( + ni, oo_p_dllink_ptr(ni, &nis->timeout_q[OO_TIMEOUT_Q_FINWAIT])); + ci_ip_timer_init( + ni, &nis->timeout_tid, oo_ptr_to_statep(ni, &nis->timeout_tid), "ttid"); nis->timeout_tid.fn = CI_IP_TIMER_NETIF_TIMEOUT; #if CI_CFG_TCP_OFFLOAD_RECYCLER - ci_ip_timer_init(ni, &nis->recycle_tid, - oo_ptr_to_statep(ni, &nis->recycle_tid), - "rctq"); + ci_ip_timer_init( + ni, &nis->recycle_tid, oo_ptr_to_statep(ni, &nis->recycle_tid), "rctq"); nis->recycle_tid.fn = CI_IP_TIMER_NETIF_TCP_RECYCLE; oo_p_dllink_init(ni, oo_p_dllink_ptr(ni, &nis->recycle_retry_q)); #endif #if CI_CFG_SUPPORT_STATS_COLLECTION - ci_ip_timer_init(ni, &nis->stats_tid, - oo_ptr_to_statep(ni, &nis->stats_tid), - "stat"); + ci_ip_timer_init( + ni, &nis->stats_tid, oo_ptr_to_statep(ni, &nis->stats_tid), "stat"); nis->stats_tid.fn = CI_IP_TIMER_NETIF_STATS; ci_ip_stats_clear(&nis->stats_snapshot); @@ -166,21 +162,19 @@ void ci_netif_state_init(ci_netif* ni, int cpu_khz, const char* name) oo_p_dllink_init(ni, oo_p_dllink_ptr(ni, &nis->post_poll_list)); nis->sock_spin_cycles = - __oo_usec_to_cycles64(cpu_khz, NI_OPTS(ni).spin_usec); - nis->buzz_cycles = - __oo_usec_to_cycles64(cpu_khz, NI_OPTS(ni).buzz_usec); + __oo_usec_to_cycles64(cpu_khz, NI_OPTS(ni).spin_usec); + nis->buzz_cycles = __oo_usec_to_cycles64(cpu_khz, NI_OPTS(ni).buzz_usec); nis->timer_prime_cycles = - __oo_usec_to_cycles64(cpu_khz, NI_OPTS(ni).timer_prime_usec); + __oo_usec_to_cycles64(cpu_khz, NI_OPTS(ni).timer_prime_usec); #if CI_CFG_INJECT_PACKETS nis->kernel_packets_cycles = - __oo_usec_to_cycles64(cpu_khz, - NI_OPTS(ni).kernel_packets_timer_usec); + __oo_usec_to_cycles64(cpu_khz, NI_OPTS(ni).kernel_packets_timer_usec); #endif ci_ip_timer_state_init(ni, cpu_khz); nis->last_spin_poll_frc = IPTIMER_STATE(ni)->frc; nis->last_sleep_frc = IPTIMER_STATE(ni)->frc; - + oo_timesync_update(efab_tcp_driver.timesync); assert_zero(nis->defer_work_count); @@ -219,8 +213,7 @@ void ci_netif_state_init(ci_netif* ni, int cpu_khz, const char* name) #endif #if CI_CFG_TCP_SHARED_LOCAL_PORTS - for( i = 0; - i < nis->active_wild_table_entries_n * nis->active_wild_pools_n; + for( i = 0; i < nis->active_wild_table_entries_n * nis->active_wild_pools_n; ++i ) { oo_p_dllink_init(ni, oo_p_dllink_ptr(ni, &ni->active_wild_table[i])); } @@ -244,8 +237,8 @@ void ci_netif_state_init(ci_netif* ni, int cpu_khz, const char* name) list = oo_p_dllink_ptr(ni, &nis->active_cache.fd_states); oo_p_dllink_init(ni, list); - nis->active_cache.avail_stack = oo_ptr_to_statep(ni, - &nis->active_cache_avail_stack); + nis->active_cache.avail_stack = + oo_ptr_to_statep(ni, &nis->active_cache_avail_stack); nis->active_cache_avail_stack = nis->opts.sock_cache_max; list = oo_p_dllink_ptr(ni, &nis->passive_scalable_cache.cache); @@ -255,8 +248,8 @@ void ci_netif_state_init(ci_netif* ni, int cpu_khz, const char* name) list = oo_p_dllink_ptr(ni, &nis->passive_scalable_cache.fd_states); oo_p_dllink_init(ni, list); - nis->passive_scalable_cache.avail_stack = oo_ptr_to_statep - (ni, &ni->state->passive_cache_avail_stack); + nis->passive_scalable_cache.avail_stack = + oo_ptr_to_statep(ni, &ni->state->passive_cache_avail_stack); #endif #if CI_CFG_INJECT_PACKETS @@ -264,7 +257,6 @@ void ci_netif_state_init(ci_netif* ni, int cpu_khz, const char* name) assert_zero(nis->kernel_packets_last_forwarded); assert_zero(nis->kernel_packets_pending); #endif - } #endif @@ -286,21 +278,21 @@ static ci_uint32 citp_tcp_adv_win_scale_max = CI_TCP_WSCL_MAX; static ci_uint32 citp_fin_timeout = CI_CFG_TCP_FIN_TIMEOUT; static ci_uint32 citp_retransmit_threshold = CI_TCP_RETRANSMIT_THRESHOLD; static ci_uint32 citp_retransmit_threshold_orphan = - CI_TCP_RETRANSMIT_THRESHOLD_ORPHAN; + CI_TCP_RETRANSMIT_THRESHOLD_ORPHAN; static ci_uint32 citp_retransmit_threshold_syn = - CI_TCP_RETRANSMIT_THRESHOLD_SYN; + CI_TCP_RETRANSMIT_THRESHOLD_SYN; static ci_uint32 citp_retransmit_threshold_synack = - CI_TCP_RETRANSMIT_THRESHOLD_SYN; + CI_TCP_RETRANSMIT_THRESHOLD_SYN; static ci_uint32 citp_keepalive_probes = CI_TCP_KEEPALIVE_PROBES; static ci_uint32 citp_keepalive_time = CI_TCP_TCONST_KEEPALIVE_TIME; static ci_uint32 citp_keepalive_intvl = CI_TCP_TCONST_KEEPALIVE_INTVL; static ci_uint32 citp_syn_opts = CI_TCPT_SYN_FLAGS; static ci_uint32 citp_tcp_dsack = CI_CFG_TCP_DSACK; static ci_uint32 citp_tcp_time_wait_assassinate = CI_CFG_TIME_WAIT_ASSASSINATE; -static ci_uint32 citp_tcp_early_retransmit = 3; /* default as of 3.10 */ +static ci_uint32 citp_tcp_early_retransmit = 3; /* default as of 3.10 */ static ci_uint32 citp_challenge_ack_limit = CI_CFG_CHALLENGE_ACK_LIMIT; static ci_uint32 citp_tcp_invalid_ratelimit = - CI_CFG_TCP_OUT_OF_WINDOW_ACK_RATELIMIT; + CI_CFG_TCP_OUT_OF_WINDOW_ACK_RATELIMIT; #if CI_CFG_IPV6 static ci_uint32 citp_auto_flowlabels = CI_AUTO_FLOWLABELS_DEFAULT; @@ -308,19 +300,19 @@ static ci_uint32 citp_auto_flowlabels = CI_AUTO_FLOWLABELS_DEFAULT; #ifndef __KERNEL__ /* Interface for sysctl. */ -ci_inline int ci_sysctl_get_values(char *path, ci_uint32 *ret, int n) +ci_inline int ci_sysctl_get_values(char* path, ci_uint32* ret, int n) { char name[CI_CFG_PROC_PATH_LEN_MAX + strlen(CI_CFG_PROC_PATH)]; char buf[CI_CFG_PROC_LINE_LEN_MAX]; int buflen; - char *p = buf; + char* p = buf; int fd; int i = 0; strcpy(name, CI_CFG_PROC_PATH); strncpy(name + strlen(CI_CFG_PROC_PATH), path, CI_CFG_PROC_PATH_LEN_MAX); fd = ci_sys_open(name, O_RDONLY); - if (fd < 0) { + if( fd < 0 ) { /* There are a lot of reasons to fail: * - too old kernel does not know this parameter; * - we are in chroot, and/or /proc is not mounted; @@ -349,8 +341,7 @@ ci_inline int ci_sysctl_get_values(char *path, ci_uint32 *ret, int n) * passive mode, when they call listen(), accept(), chroot() and listen(). */ -int -ci_setup_ipstack_params(void) +int ci_setup_ipstack_params(void) { ci_uint32 opt[3]; @@ -358,13 +349,13 @@ ci_setup_ipstack_params(void) * - we have 2 netifs in one application; * - chroot() was called after another intercepted call. */ - if (citp_ipstack_params_inited) + if( citp_ipstack_params_inited ) return 0; { - int fd = ci_sys_open(CI_CFG_PROC_PATH"net/ipv4", O_RDONLY | O_DIRECTORY); + int fd = ci_sys_open(CI_CFG_PROC_PATH "net/ipv4", O_RDONLY | O_DIRECTORY); if( fd < 0 ) { - ci_log("ERROR: failed to open "CI_CFG_PROC_PATH"net/ipv4"); + ci_log("ERROR: failed to open " CI_CFG_PROC_PATH "net/ipv4"); return -1; } ci_sys_close(fd); @@ -390,75 +381,75 @@ ci_setup_ipstack_params(void) if( ci_sysctl_get_values("net/core/rmem_default", opt, 1) == 0 ) citp_udp_rcvbuf_def = opt[0]; - if (ci_sysctl_get_values("net/ipv4/tcp_max_syn_backlog", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_max_syn_backlog", opt, 1) == 0 ) citp_tcp_backlog_max = opt[0]; /* We should not use non-zero winscale if tcp_window_scaling == 0 */ - if (ci_sysctl_get_values("net/ipv4/tcp_window_scaling", opt, 1) == 0 && - opt[0] == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_window_scaling", opt, 1) == 0 && + opt[0] == 0 ) citp_tcp_adv_win_scale_max = 0; /* Get fin_timeout value from Linux if it is possible */ - if (ci_sysctl_get_values("net/ipv4/tcp_fin_timeout", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_fin_timeout", opt, 1) == 0 ) citp_fin_timeout = opt[0]; /* Number of retransmits */ - if (ci_sysctl_get_values("net/ipv4/tcp_retries2", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_retries2", opt, 1) == 0 ) citp_retransmit_threshold = opt[0]; /* tcp_orphan_retries is usually 0, but Linux uses value 8 internally in * such a case. See linux/net/ipv4/tcp_timer.c: tcp_orphan_retries() * for details. */ - if (ci_sysctl_get_values("net/ipv4/tcp_orphan_retries", opt, 1) == 0 && + if( ci_sysctl_get_values("net/ipv4/tcp_orphan_retries", opt, 1) == 0 && opt[0] > 0 ) { citp_retransmit_threshold_orphan = opt[0]; } - if (ci_sysctl_get_values("net/ipv4/tcp_syn_retries", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_syn_retries", opt, 1) == 0 ) citp_retransmit_threshold_syn = opt[0]; - if (ci_sysctl_get_values("net/ipv4/tcp_synack_retries", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_synack_retries", opt, 1) == 0 ) citp_retransmit_threshold_synack = opt[0]; /* Keepalive parameters */ - if (ci_sysctl_get_values("net/ipv4/tcp_keepalive_probes", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_keepalive_probes", opt, 1) == 0 ) citp_keepalive_probes = opt[0]; /* These values are stored in secs, we scale to ms here */ - if (ci_sysctl_get_values("net/ipv4/tcp_keepalive_time", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_keepalive_time", opt, 1) == 0 ) citp_keepalive_time = opt[0] * 1000; - if (ci_sysctl_get_values("net/ipv4/tcp_keepalive_intvl", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_keepalive_intvl", opt, 1) == 0 ) citp_keepalive_intvl = opt[0] * 1000; /* SYN options */ - if (ci_sysctl_get_values("net/ipv4/tcp_sack", opt, 1) == 0) { + if( ci_sysctl_get_values("net/ipv4/tcp_sack", opt, 1) == 0 ) { if( opt[0] ) citp_syn_opts |= CI_TCPT_FLAG_SACK; else - citp_syn_opts &=~ CI_TCPT_FLAG_SACK; + citp_syn_opts &= ~CI_TCPT_FLAG_SACK; } - if (ci_sysctl_get_values("net/ipv4/tcp_timestamps", opt, 1) == 0) { + if( ci_sysctl_get_values("net/ipv4/tcp_timestamps", opt, 1) == 0 ) { if( opt[0] ) citp_syn_opts |= CI_TCPT_FLAG_TSO; else - citp_syn_opts &=~ CI_TCPT_FLAG_TSO; + citp_syn_opts &= ~CI_TCPT_FLAG_TSO; } - if (ci_sysctl_get_values("net/ipv4/tcp_window_scaling", opt, 1) == 0) { + if( ci_sysctl_get_values("net/ipv4/tcp_window_scaling", opt, 1) == 0 ) { if( opt[0] ) citp_syn_opts |= CI_TCPT_FLAG_WSCL; else - citp_syn_opts &=~ CI_TCPT_FLAG_WSCL; + citp_syn_opts &= ~CI_TCPT_FLAG_WSCL; } - if (ci_sysctl_get_values("net/ipv4/tcp_dsack", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_dsack", opt, 1) == 0 ) citp_tcp_dsack = opt[0]; - if (ci_sysctl_get_values("net/ipv4/tcp_rfc1337", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_rfc1337", opt, 1) == 0 ) citp_tcp_time_wait_assassinate = ! opt[0]; - if (ci_sysctl_get_values("net/ipv4/tcp_early_retrans", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_early_retrans", opt, 1) == 0 ) citp_tcp_early_retransmit = opt[0]; - if (ci_sysctl_get_values("net/ipv4/tcp_challenge_ack_limit", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_challenge_ack_limit", opt, 1) == 0 ) citp_challenge_ack_limit = opt[0]; - if (ci_sysctl_get_values("net/ipv4/tcp_invalid_ratelimit", opt, 1) == 0) + if( ci_sysctl_get_values("net/ipv4/tcp_invalid_ratelimit", opt, 1) == 0 ) citp_tcp_invalid_ratelimit = opt[0]; #if CI_CFG_IPV6 @@ -472,8 +463,7 @@ ci_setup_ipstack_params(void) #else -int -ci_setup_ipstack_params(void) +int ci_setup_ipstack_params(void) { citp_ipstack_params_inited = 0; return 0; @@ -483,24 +473,24 @@ ci_setup_ipstack_params(void) void ci_netif_config_opts_defaults(ci_netif_config_opts* opts) { -# undef CI_CFG_OPTFILE_VERSION -# undef CI_CFG_OPTGROUP -# undef CI_CFG_OPT -# undef CI_CFG_STR_OPT -# define CI_CFG_OPT(env, name, type, doc, type_modifider, group, \ - default, minimum, maximum, presentation) \ +#undef CI_CFG_OPTFILE_VERSION +#undef CI_CFG_OPTGROUP +#undef CI_CFG_OPT +#undef CI_CFG_STR_OPT +#define CI_CFG_OPT(env, name, type, doc, type_modifider, group, default, \ + minimum, maximum, presentation) \ opts->name = default; -# define CI_CFG_STR_OPT(env, name, type, doc, type_modifider, group, \ - default, minimum, maximum, presentation) \ - strncpy(opts->name, default, sizeof(opts->name)); \ +#define CI_CFG_STR_OPT(env, name, type, doc, type_modifider, group, default, \ + minimum, maximum, presentation) \ + strncpy(opts->name, default, sizeof(opts->name)); \ opts->name[sizeof(opts->name) - 1] = 0; -# include +#include /* now modify defaults with information from the operating system */ ci_setup_ipstack_params(); - if (citp_ipstack_params_inited) { + if( citp_ipstack_params_inited ) { opts->tcp_sndbuf_min = citp_tcp_sndbuf_min; opts->tcp_sndbuf_def = citp_tcp_sndbuf_def; opts->tcp_sndbuf_max = citp_tcp_sndbuf_max; @@ -514,8 +504,7 @@ void ci_netif_config_opts_defaults(ci_netif_config_opts* opts) opts->udp_rcvbuf_def = citp_udp_rcvbuf_def; opts->tcp_backlog_max = citp_tcp_backlog_max; - opts->tcp_synrecv_max = citp_tcp_backlog_max * - CI_CFG_ASSUME_LISTEN_SOCKS; + opts->tcp_synrecv_max = citp_tcp_backlog_max * CI_CFG_ASSUME_LISTEN_SOCKS; opts->tcp_adv_win_scale_max = citp_tcp_adv_win_scale_max; opts->fin_timeout = citp_fin_timeout; @@ -533,8 +522,8 @@ void ci_netif_config_opts_defaults(ci_netif_config_opts* opts) opts->time_wait_assassinate = citp_tcp_time_wait_assassinate; /* Early retransmit itself has gone from modern kernels, so look in an * old kernel's ip-sysctl.txt for the meaning of these values. */ - opts->tcp_early_retransmit = citp_tcp_early_retransmit > 0 && - citp_tcp_early_retransmit < 4; + opts->tcp_early_retransmit = + citp_tcp_early_retransmit > 0 && citp_tcp_early_retransmit < 4; opts->tail_drop_probe = citp_tcp_early_retransmit >= 3; opts->challenge_ack_limit = citp_challenge_ack_limit; opts->oow_ack_ratelimit = citp_tcp_invalid_ratelimit; @@ -549,19 +538,23 @@ void ci_netif_config_opts_rangecheck(ci_netif_config_opts* opts) { ci_uint64 MIN; ci_uint64 MAX; - ci_int64 SMIN; - ci_int64 SMAX; + ci_int64 SMIN; + ci_int64 SMAX; int _optbits; int _bitwidth; /* stop compiler complaining if these values are not used */ - (void)MIN; (void)MAX; (void)SMIN; (void)SMAX; - (void)_optbits; (void)_bitwidth; - -#undef CI_CFG_OPTFILE_VERSION -#undef CI_CFG_OPTGROUP -#undef CI_CFG_OPT -#undef CI_CFG_STR_OPT + (void) MIN; + (void) MAX; + (void) SMIN; + (void) SMAX; + (void) _optbits; + (void) _bitwidth; + +#undef CI_CFG_OPTFILE_VERSION +#undef CI_CFG_OPTGROUP +#undef CI_CFG_OPT +#undef CI_CFG_STR_OPT #define _CI_CFG_BITVAL _optbits #define _CI_CFG_BITVAL1 1 @@ -577,55 +570,58 @@ void ci_netif_config_opts_rangecheck(ci_netif_config_opts* opts) #undef MAX #undef SMIN #undef SMAX - + #define CI_CFG_REDRESS(opt, val) opt = val; -#define CI_CFG_MSG "ERROR" +#define CI_CFG_MSG "ERROR" #define CI_CFG_STR_OPT(...) -#define CI_CFG_OPT(env, name, type, doc, bits, group, default, minimum, maximum, pres) \ -{ type _val = opts->name; \ - type _max; \ - type _min; \ - _optbits=sizeof(type)*8; \ - _bitwidth=_CI_CFG_BITVAL##bits; \ - MIN = 0; \ - MAX = ((1ull<<(_bitwidth-1))<<1) - 1ull; \ - SMAX = MAX >> 1; SMIN = -SMAX-1; \ - _max = (type)(maximum); /* try to stop the compiler warning */ \ - _min = (type)(minimum); /* about silly comparisons */ \ - if (_val > _max) { \ - ci_log("config: "CI_CFG_MSG" - option " #name \ - " (%"CI_PRIu64") larger than maximum " #maximum" (%"CI_PRIu64")", \ - (ci_uint64)_val, (ci_uint64) _max); \ - CI_CFG_REDRESS(opts->name, _max); \ - } \ - if (_val < _min) { \ - ci_log("config: "CI_CFG_MSG" - option " #name \ - " (%"CI_PRIu64") smaller than minimum " #minimum, \ - (ci_uint64)_val); \ - CI_CFG_REDRESS(opts->name, _min); \ - } \ -} - -# include +#define CI_CFG_OPT( \ + env, name, type, doc, bits, group, default, minimum, maximum, pres) \ + { \ + type _val = opts->name; \ + type _max; \ + type _min; \ + _optbits = sizeof(type) * 8; \ + _bitwidth = _CI_CFG_BITVAL##bits; \ + MIN = 0; \ + MAX = ((1ull << (_bitwidth - 1)) << 1) - 1ull; \ + SMAX = MAX >> 1; \ + SMIN = -SMAX - 1; \ + _max = (type) (maximum); /* try to stop the compiler warning */ \ + _min = (type) (minimum); /* about silly comparisons */ \ + if( _val > _max ) { \ + ci_log("config: " CI_CFG_MSG " - option " #name " (%" CI_PRIu64 \ + ") larger than maximum " #maximum " (%" CI_PRIu64 ")", \ + (ci_uint64) _val, (ci_uint64) _max); \ + CI_CFG_REDRESS(opts->name, _max); \ + } \ + if( _val < _min ) { \ + ci_log("config: " CI_CFG_MSG " - option " #name " (%" CI_PRIu64 \ + ") smaller than minimum " #minimum, \ + (ci_uint64) _val); \ + CI_CFG_REDRESS(opts->name, _min); \ + } \ + } + +#include /* EF_MAX_ENDPOINTS should must be divisible by 2048 */ if( opts->max_ep_bufs % EP_BUF_PER_CHUNK != 0 ) { unsigned new_max = opts->max_ep_bufs; new_max = CI_ROUND_UP(new_max, EP_BUF_PER_CHUNK); - ci_log("config: EF_MAX_ENDPOINTS is rounded up from %u to %u", opts->max_ep_bufs, new_max); + ci_log("config: EF_MAX_ENDPOINTS is rounded up from %u to %u", + opts->max_ep_bufs, new_max); opts->max_ep_bufs = new_max; } } - #ifndef __KERNEL__ struct string_to_bitmask { - int stb_index; - const char*const stb_str; + int stb_index; + const char* const stb_str; }; @@ -635,8 +631,7 @@ struct string_to_bitmask { * in str turns it off by using '-' or some option in str enables it. */ static void convert_string_to_bitmask(const char* str, - const struct string_to_bitmask* opts, - int opts_len, ci_uint32* bitmask_out) + const struct string_to_bitmask* opts, int opts_len, ci_uint32* bitmask_out) { int len, i, opt_found, negate; @@ -656,8 +651,7 @@ static void convert_string_to_bitmask(const char* str, negate = 1; ++str; --len; - } - else { + } else { negate = 0; } @@ -686,29 +680,26 @@ static void convert_string_to_bitmask(const char* str, static void ci_netif_config_opts_getenv_ef_log(ci_netif_config_opts* opts) { struct string_to_bitmask options[EF_LOG_MAX] = { - {EF_LOG_BANNER, "banner"}, - {EF_LOG_RESOURCE_WARNINGS, "resource_warnings"}, - {EF_LOG_CONN_DROP, "conn_drop"}, - {EF_LOG_CONFIG_WARNINGS, "config_warnings"}, - {EF_LOG_USAGE_WARNINGS, "usage_warnings"}, + { EF_LOG_BANNER, "banner" }, + { EF_LOG_RESOURCE_WARNINGS, "resource_warnings" }, + { EF_LOG_CONN_DROP, "conn_drop" }, + { EF_LOG_CONFIG_WARNINGS, "config_warnings" }, + { EF_LOG_USAGE_WARNINGS, "usage_warnings" }, }; - convert_string_to_bitmask(getenv("EF_LOG"), options, EF_LOG_MAX, - &opts->log_category); + convert_string_to_bitmask( + getenv("EF_LOG"), options, EF_LOG_MAX, &opts->log_category); } -static void -ci_netif_config_opts_getenv_ef_scalable_filters(ci_netif_config_opts* opts); +static void ci_netif_config_opts_getenv_ef_scalable_filters( + ci_netif_config_opts* opts); -static int -handle_str_opt(ci_netif_config_opts* opts, - const char* optname, char* optval_buf, size_t optval_buflen); +static int handle_str_opt(ci_netif_config_opts* opts, const char* optname, + char* optval_buf, size_t optval_buflen); -static int -parse_enum(ci_netif_config_opts* opts, - const char* name, const char* const* options, - const char* default_val); +static int parse_enum(ci_netif_config_opts* opts, const char* name, + const char* const* options, const char* default_val); void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) @@ -751,7 +742,8 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) static const char* const xdp_mode_opts[] = { "disabled", "compatible", 0 }; opts->xdp_mode = parse_enum(opts, "EF_XDP_MODE", xdp_mode_opts, "disabled"); if( opts->xdp_mode ) { - /* for now only in-kernel XDP is supported - enabling in-kernel mode implicitly */ + /* for now only in-kernel XDP is supported - enabling in-kernel mode + * implicitly */ opts->poll_in_kernel = 1; } #endif @@ -776,12 +768,14 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) */ #if CI_CFG_POISON_BUFS - if( (s = getenv("EF_POISON")) ) opts->poison_rx_buf = atoi(s); + if( (s = getenv("EF_POISON")) ) + opts->poison_rx_buf = atoi(s); #endif #if CI_CFG_RANDOM_DROP if( (s = getenv("EF_RX_DROP_RATE")) ) { int r = atoi(s); - if( r ) opts->rx_drop_rate = RAND_MAX / r; + if( r ) + opts->rx_drop_rate = RAND_MAX / r; } #endif if( (s = getenv("EF_URG_RFC")) ) @@ -813,7 +807,7 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) opts->evs_per_poll = atoi(s); #if CI_CFG_WANT_BPF_NATIVE else if( opts->poll_in_kernel ) - opts->evs_per_poll = 192; /* See EF_EVS_PER_POLL documentation */ + opts->evs_per_poll = 192; /* See EF_EVS_PER_POLL documentation */ #endif if( (s = getenv("EF_TCP_TCONST_MSL")) ) opts->msl_seconds = atoi(s); @@ -828,68 +822,69 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) opts->syn_opts = citp_syn_opts = v; } - if ( (s = getenv("EF_MAX_PACKETS")) ) { + if( (s = getenv("EF_MAX_PACKETS")) ) { int max_packets_rq = atoi(s); - opts->max_packets = (max_packets_rq + PKTS_PER_SET - 1) & - ~(PKTS_PER_SET - 1); + opts->max_packets = + (max_packets_rq + PKTS_PER_SET - 1) & ~(PKTS_PER_SET - 1); if( opts->max_packets != max_packets_rq ) /* ?? TODO: log message */ ; opts->max_rx_packets = opts->max_packets * 3 / 4; opts->max_tx_packets = opts->max_packets * 3 / 4; } - if ( (s = getenv("EF_MAX_RX_PACKETS")) ) { + if( (s = getenv("EF_MAX_RX_PACKETS")) ) { opts->max_rx_packets = atoi(s); if( opts->max_rx_packets > opts->max_packets ) opts->max_rx_packets = opts->max_packets; } - if ( (s = getenv("EF_MAX_TX_PACKETS")) ) { + if( (s = getenv("EF_MAX_TX_PACKETS")) ) { opts->max_tx_packets = atoi(s); if( opts->max_tx_packets > opts->max_packets ) opts->max_tx_packets = opts->max_packets; } - if ( (s = getenv("EF_PREALLOC_PACKETS")) ) + if( (s = getenv("EF_PREALLOC_PACKETS")) ) opts->prealloc_packets = atoi(s); - if ( (s = getenv("EF_RXQ_MIN")) ) + if( (s = getenv("EF_RXQ_MIN")) ) opts->rxq_min = atoi(s); - if ( (s = getenv("EF_MIN_FREE_PACKETS")) ) + if( (s = getenv("EF_MIN_FREE_PACKETS")) ) opts->min_free_packets = atoi(s); if( (s = getenv("EF_PREFAULT_PACKETS")) ) opts->prefault_packets = atoi(s); - if ( (s = getenv("EF_MAX_ENDPOINTS")) ) + if( (s = getenv("EF_MAX_ENDPOINTS")) ) opts->max_ep_bufs = atoi(s); - if ( (s = getenv("EF_ENDPOINT_PACKET_RESERVE")) ) + if( (s = getenv("EF_ENDPOINT_PACKET_RESERVE")) ) opts->endpoint_packet_reserve = atoi(s); - if ( (s = getenv("EF_DEFER_ARP_MAX")) ) + if( (s = getenv("EF_DEFER_ARP_MAX")) ) opts->defer_arp_pkts = atoi(s); - if ( (s = getenv("EF_DEFER_ARP_TIMEOUT")) ) + if( (s = getenv("EF_DEFER_ARP_TIMEOUT")) ) opts->defer_arp_timeout = atoi(s); - if ( (s = getenv("EF_SHARE_WITH")) ) + if( (s = getenv("EF_SHARE_WITH")) ) opts->share_with = atoi(s); #if CI_CFG_PKTS_AS_HUGE_PAGES if( (s = getenv("EF_USE_HUGE_PAGES")) ) opts->huge_pages = atoi(s); if( opts->huge_pages != 0 && opts->share_with != 0 ) { - CONFIG_LOG(opts, CONFIG_WARNINGS, "Turning huge pages off because the " - "stack is going to be used by multiple users"); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "Turning huge pages off because the " + "stack is going to be used by multiple users"); opts->huge_pages = 0; } #endif - if ( (s = getenv("EF_COMPOUND_PAGES_MODE")) ) + if( (s = getenv("EF_COMPOUND_PAGES_MODE")) ) opts->compound_pages = atoi(s); - if ( (s = getenv("EF_RXQ_SIZE")) ) + if( (s = getenv("EF_RXQ_SIZE")) ) opts->rxq_size = atoi(s); - if ( (s = getenv("EF_RXQ_LIMIT")) ) + if( (s = getenv("EF_RXQ_LIMIT")) ) opts->rxq_limit = atoi(s); - if ( (s = getenv("EF_SHARED_RXQ_NUM")) ) + if( (s = getenv("EF_SHARED_RXQ_NUM")) ) opts->shared_rxq_num = atoi(s); - if ( (s = getenv("EF_TXQ_SIZE")) ) + if( (s = getenv("EF_TXQ_SIZE")) ) opts->txq_size = atoi(s); - if ( (s = getenv("EF_SEND_POLL_THRESH")) ) + if( (s = getenv("EF_SEND_POLL_THRESH")) ) opts->send_poll_thresh = atoi(s); - if ( (s = getenv("EF_SEND_POLL_MAX_EVS")) ) + if( (s = getenv("EF_SEND_POLL_MAX_EVS")) ) opts->send_poll_max_events = atoi(s); - if ( (s = getenv("EF_DEFER_WORK_LIMIT")) ) + if( (s = getenv("EF_DEFER_WORK_LIMIT")) ) opts->defer_work_limit = atoi(s); if( (s = getenv("EF_UDP_SEND_UNLOCK_THRESH")) ) opts->udp_send_unlock_thresh = atoi(s); @@ -905,25 +900,25 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) opts->udp_port_handover3_min = atoi(s); if( (s = getenv("EF_UDP_PORT_HANDOVER3_MAX")) ) opts->udp_port_handover3_max = atoi(s); - if ( (s = getenv("EF_DELACK_THRESH")) ) + if( (s = getenv("EF_DELACK_THRESH")) ) opts->delack_thresh = atoi(s); #if CI_CFG_DYNAMIC_ACK_RATE - if ( (s = getenv("EF_DYNAMIC_ACK_THRESH")) ) + if( (s = getenv("EF_DYNAMIC_ACK_THRESH")) ) opts->dynack_thresh = atoi(s); /* Always want this value to be >= delack_thresh to simplify code - * that uses it + * that uses it */ opts->dynack_thresh = CI_MAX(opts->dynack_thresh, opts->delack_thresh); #endif - if ( (s = getenv("EF_CHALLENGE_ACK_LIMIT")) ) + if( (s = getenv("EF_CHALLENGE_ACK_LIMIT")) ) opts->challenge_ack_limit = atoi(s); - if ( (s = getenv("EF_INVALID_ACK_RATELIMIT")) ) + if( (s = getenv("EF_INVALID_ACK_RATELIMIT")) ) opts->oow_ack_ratelimit = atoi(s); #if CI_CFG_FD_CACHING - if ( (s = getenv("EF_SOCKET_CACHE_MAX")) ) + if( (s = getenv("EF_SOCKET_CACHE_MAX")) ) opts->sock_cache_max = atoi(s); - if ( (s = getenv("EF_PER_SOCKET_CACHE_MAX")) ) + if( (s = getenv("EF_PER_SOCKET_CACHE_MAX")) ) opts->per_sock_cache_max = atoi(s); if( opts->per_sock_cache_max < 0 ) opts->per_sock_cache_max = opts->sock_cache_max; @@ -931,18 +926,18 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) #if CI_CFG_PORT_STRIPING /* configuration opttions for striping */ - if ( (s = getenv("EF_STRIPE_NETMASK")) ) { + if( (s = getenv("EF_STRIPE_NETMASK")) ) { int a1, a2, a3, a4; sscanf(s, "%d.%d.%d.%d", &a1, &a2, &a3, &a4); opts->stripe_netmask_be32 = (a1 << 24) | (a2 << 16) | (a3 << 8) | a4; opts->stripe_netmask_be32 = CI_BSWAP_BE32(opts->stripe_netmask_be32); } - if ( (s = getenv("EF_STRIPE_DUPACK_THRESH")) ) { + if( (s = getenv("EF_STRIPE_DUPACK_THRESH")) ) { opts->stripe_dupack_threshold = atoi(s); opts->stripe_dupack_threshold = - CI_MAX(opts->stripe_dupack_threshold, CI_CFG_TCP_DUPACK_THRESH_BASE); - opts->stripe_dupack_threshold = - CI_MIN(opts->stripe_dupack_threshold, CI_CFG_TCP_DUPACK_THRESH_MAX); + CI_MAX(opts->stripe_dupack_threshold, CI_CFG_TCP_DUPACK_THRESH_BASE); + opts->stripe_dupack_threshold = + CI_MIN(opts->stripe_dupack_threshold, CI_CFG_TCP_DUPACK_THRESH_MAX); } if( (s = getenv("EF_STRIPE_TCP_OPT")) ) opts->stripe_tcp_opt = atoi(s); @@ -1020,13 +1015,13 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) if( (s = getenv("EF_ACCEPTQ_MIN_BACKLOG")) ) opts->acceptq_min_backlog = atoi(s); - if ( (s = getenv("EF_TCP_SNDBUF")) ) + if( (s = getenv("EF_TCP_SNDBUF")) ) opts->tcp_sndbuf_user = atoi(s); - if ( (s = getenv("EF_TCP_RCVBUF")) ) + if( (s = getenv("EF_TCP_RCVBUF")) ) opts->tcp_rcvbuf_user = atoi(s); - if ( (s = getenv("EF_UDP_SNDBUF")) ) + if( (s = getenv("EF_UDP_SNDBUF")) ) opts->udp_sndbuf_user = atoi(s); - if ( (s = getenv("EF_UDP_RCVBUF")) ) + if( (s = getenv("EF_UDP_RCVBUF")) ) opts->udp_rcvbuf_user = atoi(s); if( (s = getenv("EF_TCP_SNDBUF_ESTABLISHED_DEFAULT")) ) @@ -1051,23 +1046,23 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) opts->udp_rcvbuf_def = oo_adjust_SO_XBUF(opts->udp_rcvbuf_user); } - if ( (s = getenv("EF_RETRANSMIT_THRESHOLD_SYNACK")) ) + if( (s = getenv("EF_RETRANSMIT_THRESHOLD_SYNACK")) ) opts->retransmit_threshold_synack = atoi(s); - if ( (s = getenv("EF_RETRANSMIT_THRESHOLD_SYN")) ) + if( (s = getenv("EF_RETRANSMIT_THRESHOLD_SYN")) ) opts->retransmit_threshold_syn = atoi(s); - if ( (s = getenv("EF_RETRANSMIT_THRESHOLD")) ) + if( (s = getenv("EF_RETRANSMIT_THRESHOLD")) ) opts->retransmit_threshold = atoi(s); - if ( (s = getenv("EF_TCP_BACKLOG_MAX")) ) { + if( (s = getenv("EF_TCP_BACKLOG_MAX")) ) { opts->tcp_backlog_max = atoi(s); - if ( getenv("EF_TCP_SYNRECV_MAX") == NULL ) { - opts->tcp_synrecv_max = opts->tcp_backlog_max * - CI_CFG_ASSUME_LISTEN_SOCKS; + if( getenv("EF_TCP_SYNRECV_MAX") == NULL ) { + opts->tcp_synrecv_max = + opts->tcp_backlog_max * CI_CFG_ASSUME_LISTEN_SOCKS; } } - if ( (s = getenv("EF_TCP_SYNRECV_MAX")) ) { + if( (s = getenv("EF_TCP_SYNRECV_MAX")) ) { opts->tcp_synrecv_max = atoi(s); } /* Number of aux buffers is tcp_synrecv_max * 2. @@ -1075,94 +1070,97 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) * tcp_synrecv_max * 2 / 7. * And we need some space for real endpoints. */ if( opts->tcp_synrecv_max * 4 > opts->max_ep_bufs * 7 ) { - CONFIG_LOG(opts, CONFIG_WARNINGS, "%s: EF_TCP_SYNRECV_MAX=%d and " - "EF_MAX_ENDPOINTS=%d are inconsistent.", - opts->tcp_synrecv_max * 2 > opts->max_ep_bufs * 7 ? - "ERROR" : "WARNING", - opts->tcp_synrecv_max, opts->max_ep_bufs); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "%s: EF_TCP_SYNRECV_MAX=%d and " + "EF_MAX_ENDPOINTS=%d are inconsistent.", + opts->tcp_synrecv_max * 2 > opts->max_ep_bufs * 7 ? "ERROR" + : "WARNING", + opts->tcp_synrecv_max, opts->max_ep_bufs); if( getenv("EF_TCP_SYNRECV_MAX") == NULL ) { - CONFIG_LOG(opts, CONFIG_WARNINGS, "EF_TCP_SYNRECV_MAX is set to %d " - "based on %s value and assuming up to %d listening " - "sockets in the Onload stack", - opts->tcp_synrecv_max, - getenv("EF_TCP_BACKLOG_MAX") == NULL ? - "/proc/sys/net/ipv4/tcp_max_syn_backlog" : - "EF_TCP_BACKLOG_MAX", - CI_CFG_ASSUME_LISTEN_SOCKS); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "EF_TCP_SYNRECV_MAX is set to %d " + "based on %s value and assuming up to %d listening " + "sockets in the Onload stack", + opts->tcp_synrecv_max, + getenv("EF_TCP_BACKLOG_MAX") == NULL + ? "/proc/sys/net/ipv4/tcp_max_syn_backlog" + : "EF_TCP_BACKLOG_MAX", + CI_CFG_ASSUME_LISTEN_SOCKS); } - CONFIG_LOG(opts, CONFIG_WARNINGS, "Too few endpoints requested: ~4 " - "syn-receive states consume one endpoint. "); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "Too few endpoints requested: ~4 " + "syn-receive states consume one endpoint. "); } - if ( (s = getenv("EF_TCP_INITIAL_CWND")) ) + if( (s = getenv("EF_TCP_INITIAL_CWND")) ) opts->initial_cwnd = atoi(s); - if ( (s = getenv("EF_TCP_LOSS_MIN_CWND")) ) + if( (s = getenv("EF_TCP_LOSS_MIN_CWND")) ) opts->loss_min_cwnd = atoi(s); - if ( (s = getenv("EF_TCP_MIN_CWND")) ) + if( (s = getenv("EF_TCP_MIN_CWND")) ) opts->min_cwnd = atoi(s); #if CI_CFG_TCP_FASTSTART - if ( (s = getenv("EF_TCP_FASTSTART_INIT")) ) + if( (s = getenv("EF_TCP_FASTSTART_INIT")) ) opts->tcp_faststart_init = atoi(s); - if ( (s = getenv("EF_TCP_FASTSTART_IDLE")) ) + if( (s = getenv("EF_TCP_FASTSTART_IDLE")) ) opts->tcp_faststart_idle = atoi(s); - if ( (s = getenv("EF_TCP_FASTSTART_LOSS")) ) + if( (s = getenv("EF_TCP_FASTSTART_LOSS")) ) opts->tcp_faststart_loss = atoi(s); #endif - if ( (s = getenv("EF_RFC_RTO_INITIAL"))) + if( (s = getenv("EF_RFC_RTO_INITIAL")) ) opts->rto_initial = atoi(s); - if ( (s = getenv("EF_RFC_RTO_MIN"))) + if( (s = getenv("EF_RFC_RTO_MIN")) ) opts->rto_min = atoi(s); - if ( (s = getenv("EF_RFC_RTO_MAX"))) + if( (s = getenv("EF_RFC_RTO_MAX")) ) opts->rto_max = atoi(s); - if ( (s = getenv("EF_KEEPALIVE_TIME"))) + if( (s = getenv("EF_KEEPALIVE_TIME")) ) opts->keepalive_time = atoi(s); - if ( (s = getenv("EF_KEEPALIVE_INTVL"))) + if( (s = getenv("EF_KEEPALIVE_INTVL")) ) opts->keepalive_intvl = atoi(s); - if ( (s = getenv("EF_KEEPALIVE_PROBES"))) + if( (s = getenv("EF_KEEPALIVE_PROBES")) ) opts->keepalive_probes = atoi(s); #ifndef NDEBUG - if( (s = getenv("EF_TCP_MAX_SEQERR_MSGS"))) + if( (s = getenv("EF_TCP_MAX_SEQERR_MSGS")) ) opts->tcp_max_seqerr_msg = atoi(s); #endif #if CI_CFG_BURST_CONTROL - if ( (s = getenv("EF_BURST_CONTROL_LIMIT"))) + if( (s = getenv("EF_BURST_CONTROL_LIMIT")) ) opts->burst_control_limit = atoi(s); #endif #if CI_CFG_CONG_AVOID_NOTIFIED - if ( (s = getenv("EF_CONG_NOTIFY_THRESH"))) + if( (s = getenv("EF_CONG_NOTIFY_THRESH")) ) opts->cong_notify_thresh = atoi(s); #endif #if CI_CFG_TAIL_DROP_PROBE - if ( (s = getenv("EF_TAIL_DROP_PROBE"))) + if( (s = getenv("EF_TAIL_DROP_PROBE")) ) opts->tail_drop_probe = atoi(s); #endif #if CI_CFG_CONG_AVOID_SCALE_BACK - if ( (s = getenv("EF_CONG_AVOID_SCALE_BACK"))) + if( (s = getenv("EF_CONG_AVOID_SCALE_BACK")) ) opts->cong_avoid_scale_back = atoi(s); #endif - if ( (s = getenv("EF_TCP_TIME_WAIT_ASSASSINATION"))) + if( (s = getenv("EF_TCP_TIME_WAIT_ASSASSINATION")) ) opts->time_wait_assassinate = atoi(s); /* Get our netifs to inherit flags if the O/S is being forced to */ - if (CITP_OPTS.accept_force_inherit_nonblock) + if( CITP_OPTS.accept_force_inherit_nonblock ) opts->accept_inherit_nonblock = 1; - if ( (s = getenv("EF_FREE_PACKETS_LOW_WATERMARK")) ) + if( (s = getenv("EF_FREE_PACKETS_LOW_WATERMARK")) ) opts->free_packets_low = atoi(s); if( opts->free_packets_low == 0 ) opts->free_packets_low = opts->rxq_size / 2; #if CI_CFG_PIO - if ( (s = getenv("EF_PIO")) ) + if( (s = getenv("EF_PIO")) ) opts->pio = atoi(s); if( opts->pio == 0 ) /* Makes for more efficient checking on fast data path */ opts->pio_thresh = 0; - else if ( (s = getenv("EF_PIO_THRESHOLD")) ) + else if( (s = getenv("EF_PIO_THRESHOLD")) ) opts->pio_thresh = atoi(s); #endif @@ -1170,8 +1168,8 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) opts->rx_timestamping = atoi(s); static const char* const timestamping_opts[] = { "nic", "cpacket", 0 }; - opts->rx_timestamping_ordering = - parse_enum(opts, "EF_RX_TIMESTAMPING_ORDERING", timestamping_opts, "nic"); + opts->rx_timestamping_ordering = parse_enum( + opts, "EF_RX_TIMESTAMPING_ORDERING", timestamping_opts, "nic"); if( (s = getenv("EF_TX_TIMESTAMPING")) ) opts->tx_timestamping = atoi(s); @@ -1181,8 +1179,8 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) if( (s = getenv("EF_TCP_TSOPT_MODE")) ) { opts->tcp_tsopt_mode = atoi(s); - if( !(opts->tcp_tsopt_mode == 2) ) { - citp_syn_opts &=~ CI_TCPT_FLAG_TSO; + if( ! (opts->tcp_tsopt_mode == 2) ) { + citp_syn_opts &= ~CI_TCPT_FLAG_TSO; citp_syn_opts |= (opts->tcp_tsopt_mode ? CI_TCPT_FLAG_TSO : 0); opts->syn_opts = citp_syn_opts; } @@ -1194,8 +1192,9 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) if( (s = getenv("EF_PERIODIC_TIMER_CPU")) ) { int cpu = atoi(s); if( cpu >= sysconf(_SC_NPROCESSORS_ONLN) ) { - CONFIG_LOG(opts, CONFIG_WARNINGS, "Value of EF_PERIODIC_TIMER_CPU is " - "invalid. Periodic work will not be affinitised."); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "Value of EF_PERIODIC_TIMER_CPU is " + "invalid. Periodic work will not be affinitised."); cpu = -1; } opts->periodic_timer_cpu = cpu; @@ -1207,11 +1206,9 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) if( (s = getenv("EF_CLUSTER_IGNORE")) ) { ci_log("EF_CLUSTER_IGNORE is deprecated use EF_CLUSTER_SIZE instead"); opts->cluster_ignore = atoi(s); - } - else if( (s = getenv("EF_CLUSTER_SIZE")) ) { + } else if( (s = getenv("EF_CLUSTER_SIZE")) ) { opts->cluster_ignore = (atoi(s) == 0); - } - else + } else opts->cluster_ignore = 1; #if CI_CFG_TCP_SHARED_LOCAL_PORTS @@ -1222,8 +1219,8 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) if( (s = getenv("EF_TCP_SHARED_LOCAL_PORTS_MAX")) ) opts->tcp_shared_local_ports_max = atoi(s); if( (s = getenv("EF_TCP_SHARED_LOCAL_PORTS_NO_FALLBACK")) ) - opts->tcp_shared_local_no_fallback = atoi(s) && - opts->tcp_shared_local_ports > 0; + opts->tcp_shared_local_no_fallback = + atoi(s) && opts->tcp_shared_local_ports > 0; if( (s = getenv("EF_TCP_SHARED_LOCAL_PORTS_PER_IP")) ) opts->tcp_shared_local_ports_per_ip = atoi(s); if( (s = getenv("EF_TCP_SHARED_LOCAL_PORTS_PER_IP_MAX")) ) @@ -1236,9 +1233,9 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) opts->rx_merge_mode = atoi(s); handle_str_opt(opts, "EF_INTERFACE_WHITELIST", opts->iface_whitelist, - sizeof(opts->iface_whitelist)); + sizeof(opts->iface_whitelist)); handle_str_opt(opts, "EF_INTERFACE_BLACKLIST", opts->iface_blacklist, - sizeof(opts->iface_blacklist)); + sizeof(opts->iface_blacklist)); if( (s = getenv("EF_KERNEL_PACKETS_BATCH_SIZE")) ) opts->kernel_packets_batch_size = atoi(s); @@ -1248,7 +1245,7 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) static const char* const tcp_isn_opts[] = { "clocked", "clocked+cache", 0 }; opts->tcp_isn_mode = - parse_enum(opts, "EF_TCP_ISN_MODE", tcp_isn_opts, "clocked+cache"); + parse_enum(opts, "EF_TCP_ISN_MODE", tcp_isn_opts, "clocked+cache"); if( (s = getenv("EF_TCP_ISN_2MSL")) ) opts->tcp_isn_2msl = atoi(s); if( (s = getenv("EF_TCP_ISN_CACHE_SIZE")) ) @@ -1294,9 +1291,10 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) opts->icmp_msg_max = atoi(s); #if CI_CFG_TCP_OFFLOAD_RECYCLER || CI_CFG_TX_CRC_OFFLOAD - static const char* const tcp_offload_opts[] = { "off", "tcp", "ceph", "nvme", 0 }; - opts->tcp_offload_plugin = parse_enum(opts, "EF_TCP_OFFLOAD", - tcp_offload_opts, "off"); + static const char* const tcp_offload_opts[] = { "off", "tcp", "ceph", "nvme", + 0 }; + opts->tcp_offload_plugin = + parse_enum(opts, "EF_TCP_OFFLOAD", tcp_offload_opts, "off"); if( (s = getenv("EF_CEPH_DATA_BUF_BYTES")) ) opts->ceph_data_buf_bytes = atoi(s); @@ -1306,30 +1304,28 @@ void ci_netif_config_opts_getenv(ci_netif_config_opts* opts) #endif } -static int -handle_str_opt(ci_netif_config_opts* opts, - const char* optname, char* optval_buf, size_t optval_buflen) +static int handle_str_opt(ci_netif_config_opts* opts, const char* optname, + char* optval_buf, size_t optval_buflen) { - char* s; + char* s; if( (s = getenv(optname)) ) { if( strlen(s) >= optval_buflen ) { - CONFIG_LOG(opts, CONFIG_WARNINGS, "Value of %s" - "too long - truncating. ", optname); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "Value of %s" + "too long - truncating. ", + optname); } strncpy(optval_buf, s, optval_buflen); optval_buf[optval_buflen - 1] = 0; return 1; - } - else { + } else { return 0; } } -static int -parse_enum(ci_netif_config_opts* opts, - const char* name, const char* const* options, - const char* default_val) +static int parse_enum(ci_netif_config_opts* opts, const char* name, + const char* const* options, const char* default_val) { const char* value; int i; @@ -1343,20 +1339,20 @@ parse_enum(ci_netif_config_opts* opts, return i; CONFIG_LOG(opts, CONFIG_WARNINGS, - "%s='%s' not recognised, defaulting to '%s'", - name, value, default_val); + "%s='%s' not recognised, defaulting to '%s'", name, value, + default_val); value = default_val; } } -static const char* strmchrnul(const char *s, const char* delims) +static const char* strmchrnul(const char* s, const char* delims) { const char* r = NULL; while( *delims ) { - const char * t = strchrnul(s, *delims); - if( !r || t < r ) - r = t; - ++delims; + const char* t = strchrnul(s, *delims); + if( ! r || t < r ) + r = t; + ++delims; } return r; } @@ -1365,9 +1361,7 @@ static const char* strmchrnul(const char *s, const char* delims) /* Note that all ifindices in this function must be signed to allow for the * extra magic values such as CITP_SCALABLE_FILTERS_ALL. */ static int ci_opts_parse_scalable_filters_nic(ci_netif_config_opts* opts, - const char** spec_in_out, - int* mode_out, - ci_int32* ifindex_out) + const char** spec_in_out, int* mode_out, ci_int32* ifindex_out) { char ifname[IFNAMSIZ] = {}; const char* s = *spec_in_out; @@ -1380,8 +1374,8 @@ static int ci_opts_parse_scalable_filters_nic(ci_netif_config_opts* opts, strncpy(ifname, s, CI_MIN(modestr - s, sizeof(ifname) - 1)); ifindex = if_nametoindex(ifname); - if( ifindex == CI_IFID_BAD && (strcmp(ifname, "any") == 0 || - strcmp(ifname, ".") == 0) ) + if( ifindex == CI_IFID_BAD && + (strcmp(ifname, "any") == 0 || strcmp(ifname, ".") == 0) ) ifindex = CITP_SCALABLE_FILTERS_ALL; /* If we've got a valid ifindex then we need to determine the mode */ @@ -1398,27 +1392,32 @@ static int ci_opts_parse_scalable_filters_nic(ci_netif_config_opts* opts, if( modestr && modestr != strmchrnul(modestr, ",") ) { int mode_value = CITP_SCALABLE_MODE_NONE; int mode_set = 0; - struct {const char* name; int mode;} modes[] = { - {"transparent_active", CITP_SCALABLE_MODE_TPROXY_ACTIVE}, - {"passive", CITP_SCALABLE_MODE_PASSIVE}, - {"active", CITP_SCALABLE_MODE_ACTIVE}, - {"rss", CITP_SCALABLE_MODE_RSS}, + struct { + const char* name; + int mode; + } modes[] = { + { "transparent_active", CITP_SCALABLE_MODE_TPROXY_ACTIVE }, + { "passive", CITP_SCALABLE_MODE_PASSIVE }, + { "active", CITP_SCALABLE_MODE_ACTIVE }, + { "rss", CITP_SCALABLE_MODE_RSS }, }; - while ( modestr != strmchrnul(modestr, ",") ) { + while( modestr != strmchrnul(modestr, ",") ) { const char* mode_end = strmchrnul(modestr, ":,"); - int len = mode_end - modestr; + int len = mode_end - modestr; int i; for( i = 0; i < sizeof(modes) / sizeof(*modes); ++i ) if( strncmp(modes[i].name, modestr, len) == 0 && modes[i].name[len] == 0 ) { - mode_value |= modes[i].mode; + mode_value |= modes[i].mode; mode_set |= 3; break; - } + } if( ! (mode_set & 1) ) { - CONFIG_LOG(opts, CONFIG_WARNINGS, "config: Error parsing " - "EF_SCALABLE_FILTERS, token '%s', disabling scalable " - "filter mode", modestr); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "config: Error parsing " + "EF_SCALABLE_FILTERS, token '%s', disabling scalable " + "filter mode", + modestr); mode = CITP_SCALABLE_MODE_NONE; mode_set = 0; rc = -EINVAL; @@ -1431,41 +1430,40 @@ static int ci_opts_parse_scalable_filters_nic(ci_netif_config_opts* opts, } if( mode_set ) { - int modes_supported[] = { - CITP_SCALABLE_MODE_TPROXY_ACTIVE, - CITP_SCALABLE_MODE_PASSIVE, - CITP_SCALABLE_MODE_ACTIVE, + int modes_supported[] = { CITP_SCALABLE_MODE_TPROXY_ACTIVE, + CITP_SCALABLE_MODE_PASSIVE, CITP_SCALABLE_MODE_ACTIVE, CITP_SCALABLE_MODE_TPROXY_ACTIVE | CITP_SCALABLE_MODE_PASSIVE, CITP_SCALABLE_MODE_TPROXY_ACTIVE | CITP_SCALABLE_MODE_RSS, CITP_SCALABLE_MODE_ACTIVE | CITP_SCALABLE_MODE_RSS, CITP_SCALABLE_MODE_PASSIVE | CITP_SCALABLE_MODE_RSS, CITP_SCALABLE_MODE_ACTIVE | CITP_SCALABLE_MODE_PASSIVE | - CITP_SCALABLE_MODE_RSS - }; - int n_modes = sizeof(modes_supported)/sizeof(*modes_supported); + CITP_SCALABLE_MODE_RSS }; + int n_modes = sizeof(modes_supported) / sizeof(*modes_supported); int fail = 1; int i; mode = mode_value; - for( i = 0; i < n_modes; ++i) { + for( i = 0; i < n_modes; ++i ) { if( mode == modes_supported[i] ) { fail = 0; break; } } if( fail ) { - CONFIG_LOG(opts, CONFIG_WARNINGS, "config: Unsupported scalable " - "mode selected, disabling scalable filter mode."); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "config: Unsupported scalable " + "mode selected, disabling scalable filter mode."); mode = CITP_SCALABLE_MODE_NONE; rc = -EINVAL; } } } - } - else { - CONFIG_LOG(opts, CONFIG_WARNINGS, "config: Could not determine ifindex " - "from name '%s', disabling scalable filter mode.", ifname); + } else { + CONFIG_LOG(opts, CONFIG_WARNINGS, + "config: Could not determine ifindex " + "from name '%s', disabling scalable filter mode.", + ifname); mode = CITP_SCALABLE_MODE_NONE; rc = -EINVAL; } @@ -1480,10 +1478,15 @@ static int ci_opts_parse_scalable_filters_nic(ci_netif_config_opts* opts, } -#define swap(x,y) ({ typeof(x) t = (x); (x) = (y); (y) = (t); }) +#define swap(x, y) \ + ({ \ + typeof(x) t = (x); \ + (x) = (y); \ + (y) = (t); \ + }) -static void -ci_netif_config_opts_getenv_ef_scalable_filters(ci_netif_config_opts* opts) +static void ci_netif_config_opts_getenv_ef_scalable_filters( + ci_netif_config_opts* opts) { const char* s; int enable = 0; @@ -1501,78 +1504,81 @@ ci_netif_config_opts_getenv_ef_scalable_filters(ci_netif_config_opts* opts) int cluster_name_len; int i; - strncpy(opts->scalable_filter_string, s, - sizeof(opts->scalable_filter_string)); + strncpy( + opts->scalable_filter_string, s, sizeof(opts->scalable_filter_string)); opts->scalable_filter_string[sizeof(opts->scalable_filter_string) - 1] = 0; /* parse interfaces in EF_SCALABLE_FILTERS until: got two of them, * run out of the string or hit parsing error */ - for( i = 0; - i < 2 && s && *s && - 0 == (rc = ci_opts_parse_scalable_filters_nic(opts, &s, &modes[i], - &ifindexes[i])); - ++i); + for( i = 0; i < 2 && s && *s && + 0 == (rc = ci_opts_parse_scalable_filters_nic( + opts, &s, &modes[i], &ifindexes[i])); + ++i ) + ; if( rc != 0 ) { /* message has already been printed */ goto invalid_mode; - } - else if( i == 0 ) { + } else if( i == 0 ) { mode = CITP_SCALABLE_MODE_NONE; - } - else if( i == 1 ) { + } else if( i == 1 ) { /* If the mode was not set explicitly then default to non-rss mode, * otherwise check the mode is supported */ if( modes[0] < 0 ) - modes[0] = CITP_SCALABLE_MODE_TPROXY_ACTIVE | - CITP_SCALABLE_MODE_PASSIVE; + modes[0] = + CITP_SCALABLE_MODE_TPROXY_ACTIVE | CITP_SCALABLE_MODE_PASSIVE; ifindexes[1] = ifindexes[0]; - } - else { + } else { /* Multiple modes specified. */ if( ifindexes[0] == CITP_SCALABLE_FILTERS_ALL || ifindexes[1] == CITP_SCALABLE_FILTERS_ALL ) { CONFIG_LOG(opts, CONFIG_WARNINGS, - "config: Multiple scalable interfaces specified when " - "requesting scalable filters on all interfaces."); + "config: Multiple scalable interfaces specified when " + "requesting scalable filters on all interfaces."); goto invalid_mode; } if( modes[0] < 0 && modes[1] < 0 ) { modes[0] = CITP_SCALABLE_MODE_PASSIVE; modes[1] = CITP_SCALABLE_MODE_TPROXY_ACTIVE; - } - else { + } else { if( modes[1] < 0 ) { swap(modes[0], modes[1]); swap(ifindexes[0], ifindexes[1]); } if( modes[0] < 0 ) { - if( modes[1] & (CITP_SCALABLE_MODE_ACTIVE | CITP_SCALABLE_MODE_TPROXY_ACTIVE) ) { + if( modes[1] & (CITP_SCALABLE_MODE_ACTIVE | + CITP_SCALABLE_MODE_TPROXY_ACTIVE) ) { if( modes[1] & CITP_SCALABLE_MODE_PASSIVE ) { - CONFIG_LOG(opts, CONFIG_WARNINGS, "config: With two scalable interfaces " - "one needs to be exclusively active while other exclusively passive."); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "config: With two scalable interfaces " + "one needs to be exclusively active while other exclusively " + "passive."); goto invalid_mode; } - modes[0] = CITP_SCALABLE_MODE_PASSIVE | (modes[1] & CITP_SCALABLE_MODE_RSS); - } - else if( modes[1] & CITP_SCALABLE_MODE_PASSIVE ) { - modes[0] = CITP_SCALABLE_MODE_TPROXY_ACTIVE | (modes[1] & CITP_SCALABLE_MODE_RSS); + modes[0] = CITP_SCALABLE_MODE_PASSIVE | + (modes[1] & CITP_SCALABLE_MODE_RSS); + } else if( modes[1] & CITP_SCALABLE_MODE_PASSIVE ) { + modes[0] = CITP_SCALABLE_MODE_TPROXY_ACTIVE | + (modes[1] & CITP_SCALABLE_MODE_RSS); swap(modes[0], modes[1]); swap(ifindexes[0], ifindexes[1]); } } /* now we have both modes resolved, passive at index 1 */ - ci_assert_nflags(modes[1], CITP_SCALABLE_MODE_ACTIVE | CITP_SCALABLE_MODE_TPROXY_ACTIVE); - ci_assert(modes[0] & (CITP_SCALABLE_MODE_ACTIVE | CITP_SCALABLE_MODE_TPROXY_ACTIVE)); + ci_assert_nflags(modes[1], + CITP_SCALABLE_MODE_ACTIVE | CITP_SCALABLE_MODE_TPROXY_ACTIVE); + ci_assert(modes[0] & (CITP_SCALABLE_MODE_ACTIVE | + CITP_SCALABLE_MODE_TPROXY_ACTIVE)); ci_assert_flags(modes[1], CITP_SCALABLE_MODE_PASSIVE); ci_assert_nflags(modes[0], CITP_SCALABLE_MODE_PASSIVE); } if( (modes[0] ^ modes[1]) & CITP_SCALABLE_MODE_RSS ) { - CONFIG_LOG(opts, CONFIG_WARNINGS, "config: When specifying two scalable " - "modes RSS setting needs to be identical."); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "config: When specifying two scalable " + "modes RSS setting needs to be identical."); goto invalid_mode; } } @@ -1593,43 +1599,44 @@ ci_netif_config_opts_getenv_ef_scalable_filters(ci_netif_config_opts* opts) if( (s = getenv("EF_SCALABLE_ACTIVE_WILDS_NEED_FILTER")) ) active_wilds_need_filter = atoi(s); #endif - } - else { + } else { enable = CITP_SCALABLE_FILTERS_DISABLE; } /* Stacks cannot be named by EF_NAME in clustered scalable modes. */ if( enable == CITP_SCALABLE_FILTERS_ENABLE && - mode & CITP_SCALABLE_MODE_RSS && - (s = getenv("EF_NAME")) && s[0] != '\0' ) + mode & CITP_SCALABLE_MODE_RSS && (s = getenv("EF_NAME")) && + s[0] != '\0' ) CONFIG_LOG(opts, CONFIG_WARNINGS, - "config: Stacks cannot be named by EF_NAME while in a " - "clustered scalable mode.") + "config: Stacks cannot be named by EF_NAME while in a " + "clustered scalable mode.") /* In scalable mode, cluster name has a max length of 5. See bug78935. */ cluster_name_len = 5 - (CITP_OPTS.cluster_size > 9); if( strlen(CITP_OPTS.cluster_name) > cluster_name_len ) { CITP_OPTS.cluster_name[cluster_name_len] = '\0'; CONFIG_LOG(opts, CONFIG_WARNINGS, - "config: The supplied EF_CLUSTER_NAME is too long and is " - "being truncated to: %s.", CITP_OPTS.cluster_name); + "config: The supplied EF_CLUSTER_NAME is too long and is " + "being truncated to: %s.", + CITP_OPTS.cluster_name); } - } - else { + } else { if( (s = getenv("EF_SCALABLE_FILTERS_ENABLE")) ) - CONFIG_LOG(opts, CONFIG_WARNINGS, "config: EF_SCALABLE_FILTERS_ENABLE " - "ignored as no valid config for EF_SCALABLE_FILTERS found."); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "config: EF_SCALABLE_FILTERS_ENABLE " + "ignored as no valid config for EF_SCALABLE_FILTERS found."); enable = CITP_SCALABLE_FILTERS_DISABLE; } if( enable == CITP_SCALABLE_FILTERS_DISABLE ) { if( (s = getenv("EF_SCALABLE_LISTEN_MODE")) ) - CONFIG_LOG(opts, CONFIG_WARNINGS, "config: EF_SCALABLE_LISTEN_MODE " - "ignored as no valid config for EF_SCALABLE_FILTERS found."); + CONFIG_LOG(opts, CONFIG_WARNINGS, + "config: EF_SCALABLE_LISTEN_MODE " + "ignored as no valid config for EF_SCALABLE_FILTERS found."); if( (s = getenv("EF_SCALABLE_ACTIVE_WILDS_NEED_FILTER")) ) CONFIG_LOG(opts, CONFIG_WARNINGS, - "config: EF_SCALABLE_ACTIVE_WILDS_NEED_FILTER " - "ignored as no valid config for EF_SCALABLE_FILTERS found."); + "config: EF_SCALABLE_ACTIVE_WILDS_NEED_FILTER " + "ignored as no valid config for EF_SCALABLE_FILTERS found."); } opts->scalable_filter_ifindex_passive = ifindexes[1]; opts->scalable_filter_ifindex_active = ifindexes[0]; @@ -1659,44 +1666,43 @@ static void netif_tcp_helper_build2(ci_netif* ni) { #if CI_CFG_TCP_SHARED_LOCAL_PORTS ni->active_wild_table = - (struct oo_p_dllink*) ((char*) ni->state + ni->state->active_wild_ofs); + (struct oo_p_dllink*) ((char*) ni->state + ni->state->active_wild_ofs); #endif ni->seq_table = - (ci_tcp_prev_seq_t*) ((char*) ni->state + ni->state->seq_table_ofs); - ni->deferred_pkts = - (struct oo_deferred_pkt*) ((char*) ni->state + - ni->state->deferred_pkts_ofs); + (ci_tcp_prev_seq_t*) ((char*) ni->state + ni->state->seq_table_ofs); + ni->deferred_pkts = (struct oo_deferred_pkt*) ((char*) ni->state + + ni->state->deferred_pkts_ofs); ni->filter_table = - (ci_netif_filter_table*) ((char*) ni->state + ni->state->table_ofs); + (ci_netif_filter_table*) ((char*) ni->state + ni->state->table_ofs); ni->filter_table_ext = - (ci_netif_filter_table_entry_ext*) ((char*) ni->state + - ni->state->table_ext_ofs); + (ci_netif_filter_table_entry_ext*) ((char*) ni->state + + ni->state->table_ext_ofs); #if CI_CFG_IPV6 ni->ip6_filter_table = - (ci_ip6_netif_filter_table*) ((char*) ni->state + ni->state->ip6_table_ofs); + (ci_ip6_netif_filter_table*) ((char*) ni->state + + ni->state->ip6_table_ofs); #endif ni->packets = (oo_pktbuf_manager*) ((char*) ni->state + ni->state->buf_ofs); ni->dma_addrs = (ef_addr*) ((char*) ni->state + ni->state->dma_ofs); #if CI_CFG_UL_INTERRUPT_HELPER oo_ringbuffer_init(&ni->closed_eps, &ni->state->closed_eps, "closed_eps", - (void*)((char*) ni->state + ni->state->closed_eps_ofs)); + (void*) ((char*) ni->state + ni->state->closed_eps_ofs)); oo_ringbuffer_init(&ni->sw_filter_ops, &ni->state->sw_filter_ops, - "sw_filters", - (void*)((char*) ni->state + ni->state->sw_filter_ofs)); + "sw_filters", (void*) ((char*) ni->state + ni->state->sw_filter_ofs)); #endif } - static void netif_tcp_helper_munmap(ci_netif* ni) { int rc; if( ni->timesync != NULL ) { - rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->timesync, ni->state->timesync_bytes); - if( rc < 0 ) LOG_NV(ci_log("%s: munmap timesync %d", __FUNCTION__, rc)); + rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), ni->timesync, + ni->state->timesync_bytes); + if( rc < 0 ) + LOG_NV(ci_log("%s: munmap timesync %d", __FUNCTION__, rc)); } /* Buffer mapping. */ @@ -1713,8 +1719,7 @@ static void netif_tcp_helper_munmap(ci_netif* ni) #endif { rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->pkt_bufs[id], - CI_CFG_PKT_BUF_SIZE * PKTS_PER_SET); + ni->pkt_bufs[id], CI_CFG_PKT_BUF_SIZE * PKTS_PER_SET); } if( rc < 0 ) LOG_NV(ci_log("%s: munmap packets %d", __FUNCTION__, rc)); @@ -1723,51 +1728,58 @@ static void netif_tcp_helper_munmap(ci_netif* ni) } if( ni->efct_shm_ptr != NULL ) { - rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->efct_shm_ptr, ni->state->efct_shm_mmap_bytes); - if( rc < 0 ) LOG_NV(ci_log("%s: munmap efct shm %d", __FUNCTION__, rc)); + rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), ni->efct_shm_ptr, + ni->state->efct_shm_mmap_bytes); + if( rc < 0 ) + LOG_NV(ci_log("%s: munmap efct shm %d", __FUNCTION__, rc)); } if( ni->buf_ptr != NULL ) { - rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->buf_ptr, ni->state->buf_mmap_bytes); - if( rc < 0 ) LOG_NV(ci_log("%s: munmap bufs %d", __FUNCTION__, rc)); + rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), ni->buf_ptr, + ni->state->buf_mmap_bytes); + if( rc < 0 ) + LOG_NV(ci_log("%s: munmap bufs %d", __FUNCTION__, rc)); } #if CI_CFG_PIO if( ni->pio_bytes_mapped != 0 && ni->pio_ptr != NULL ) { - rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->pio_ptr, ni->pio_bytes_mapped); - if( rc < 0 ) LOG_NV(ci_log("%s: munmap pio %d", __FUNCTION__, rc)); + rc = oo_resource_munmap( + ci_netif_get_driver_handle(ni), ni->pio_ptr, ni->pio_bytes_mapped); + if( rc < 0 ) + LOG_NV(ci_log("%s: munmap pio %d", __FUNCTION__, rc)); } #endif #if CI_CFG_CTPIO if( ni->ctpio_bytes_mapped != 0 && ni->ctpio_ptr != NULL ) { - rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->ctpio_ptr, ni->ctpio_bytes_mapped); - if( rc < 0 ) LOG_NV(ci_log("%s: munmap pio %d", __FUNCTION__, rc)); + rc = oo_resource_munmap( + ci_netif_get_driver_handle(ni), ni->ctpio_ptr, ni->ctpio_bytes_mapped); + if( rc < 0 ) + LOG_NV(ci_log("%s: munmap pio %d", __FUNCTION__, rc)); } #endif #if CI_CFG_TCP_OFFLOAD_RECYCLER if( ni->plugin_ptr != NULL ) { - rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->plugin_ptr, CI_PAGE_SIZE); - if( rc < 0 ) LOG_NV(ci_log("%s: munmap plugin %d", __FUNCTION__, rc)); + rc = oo_resource_munmap( + ci_netif_get_driver_handle(ni), ni->plugin_ptr, CI_PAGE_SIZE); + if( rc < 0 ) + LOG_NV(ci_log("%s: munmap plugin %d", __FUNCTION__, rc)); } #endif if( ni->io_ptr != NULL ) { - rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->io_ptr, ni->state->io_mmap_bytes); - if( rc < 0 ) LOG_NV(ci_log("%s: munmap io %d", __FUNCTION__, rc)); + rc = oo_resource_munmap( + ci_netif_get_driver_handle(ni), ni->io_ptr, ni->state->io_mmap_bytes); + if( rc < 0 ) + LOG_NV(ci_log("%s: munmap io %d", __FUNCTION__, rc)); } - rc = oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->state, ni->mmap_bytes); + rc = oo_resource_munmap( + ci_netif_get_driver_handle(ni), ni->state, ni->mmap_bytes); ni->state = NULL; - if( rc < 0 ) LOG_NV(ci_log("%s: munmap shared state %d", __FUNCTION__, rc)); + if( rc < 0 ) + LOG_NV(ci_log("%s: munmap shared state %d", __FUNCTION__, rc)); } @@ -1799,10 +1811,9 @@ static int netif_tcp_helper_mmap(ci_netif* ni) * Create timesync mapping. */ if( ns->timesync_bytes != 0 ) { - rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), - OO_MMAP_TYPE_NETIF, - CI_NETIF_MMAP_ID_TIMESYNC, ns->timesync_bytes, - OO_MMAP_FLAG_READONLY | OO_MMAP_FLAG_POPULATE, &p); + rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), OO_MMAP_TYPE_NETIF, + CI_NETIF_MMAP_ID_TIMESYNC, ns->timesync_bytes, + OO_MMAP_FLAG_READONLY | OO_MMAP_FLAG_POPULATE, &p); if( rc < 0 ) { LOG_NV(ci_log("%s: oo_resource_mmap timesync %d", __FUNCTION__, rc)); goto fail1; @@ -1815,10 +1826,8 @@ static int netif_tcp_helper_mmap(ci_netif* ni) * Create the I/O mapping. */ if( ns->io_mmap_bytes != 0 ) { - rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), - OO_MMAP_TYPE_NETIF, - CI_NETIF_MMAP_ID_IO, ns->io_mmap_bytes, - OO_MMAP_FLAG_POPULATE, &p); + rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), OO_MMAP_TYPE_NETIF, + CI_NETIF_MMAP_ID_IO, ns->io_mmap_bytes, OO_MMAP_FLAG_POPULATE, &p); if( rc < 0 ) { LOG_NV(ci_log("%s: oo_resource_mmap io %d", __FUNCTION__, rc)); goto fail1; @@ -1831,10 +1840,8 @@ static int netif_tcp_helper_mmap(ci_netif* ni) * Create the PIO mapping. */ if( ns->pio_mmap_bytes != 0 ) { - rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), - OO_MMAP_TYPE_NETIF, - CI_NETIF_MMAP_ID_PIO, ns->pio_mmap_bytes, - OO_MMAP_FLAG_POPULATE, &p); + rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), OO_MMAP_TYPE_NETIF, + CI_NETIF_MMAP_ID_PIO, ns->pio_mmap_bytes, OO_MMAP_FLAG_POPULATE, &p); if( rc < 0 ) { LOG_NV(ci_log("%s: oo_resource_mmap pio %d", __FUNCTION__, rc)); goto fail2; @@ -1851,10 +1858,9 @@ static int netif_tcp_helper_mmap(ci_netif* ni) * Create the CTPIO mapping. */ if( ns->ctpio_mmap_bytes != 0 ) { - rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), - OO_MMAP_TYPE_NETIF, - CI_NETIF_MMAP_ID_CTPIO, ns->ctpio_mmap_bytes, - OO_MMAP_FLAG_POPULATE, &p); + rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), OO_MMAP_TYPE_NETIF, + CI_NETIF_MMAP_ID_CTPIO, ns->ctpio_mmap_bytes, OO_MMAP_FLAG_POPULATE, + &p); if( rc < 0 ) { LOG_NV(ci_log("%s: oo_resource_mmap ctpio %d", __FUNCTION__, rc)); goto fail2; @@ -1875,14 +1881,12 @@ static int netif_tcp_helper_mmap(ci_netif* ni) int nic_i; OO_STACK_FOR_EACH_INTF_I(ni, nic_i) - if( ns->nic[nic_i].oo_vi_flags & OO_VI_FLAGS_PLUGIN_IO_EN ) - have_plugin_io = true; + if( ns->nic[nic_i].oo_vi_flags & OO_VI_FLAGS_PLUGIN_IO_EN ) + have_plugin_io = true; if( have_plugin_io ) { - rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), - OO_MMAP_TYPE_NETIF, - CI_NETIF_MMAP_ID_PLUGIN, CI_PAGE_SIZE, - OO_MMAP_FLAG_POPULATE, &p); + rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), OO_MMAP_TYPE_NETIF, + CI_NETIF_MMAP_ID_PLUGIN, CI_PAGE_SIZE, OO_MMAP_FLAG_POPULATE, &p); if( rc < 0 ) { LOG_NV(ci_log("%s: oo_resource_mmap plugin %d", __FUNCTION__, rc)); goto fail2; @@ -1896,10 +1900,9 @@ static int netif_tcp_helper_mmap(ci_netif* ni) * Create the I/O buffer mapping. */ if( ns->buf_mmap_bytes != 0 ) { - rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), - OO_MMAP_TYPE_NETIF, - CI_NETIF_MMAP_ID_IOBUFS, ns->buf_mmap_bytes, - OO_MMAP_FLAG_POPULATE, &p); + rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), OO_MMAP_TYPE_NETIF, + CI_NETIF_MMAP_ID_IOBUFS, ns->buf_mmap_bytes, OO_MMAP_FLAG_POPULATE, + &p); if( rc < 0 ) { LOG_NV(ci_log("%s: oo_resource_mmap iobufs %d", __FUNCTION__, rc)); goto fail2; @@ -1911,10 +1914,9 @@ static int netif_tcp_helper_mmap(ci_netif* ni) * Create the efct rxq shm mapping. */ if( ns->efct_shm_mmap_bytes != 0 ) { - rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), - OO_MMAP_TYPE_NETIF, - CI_NETIF_MMAP_ID_EFCT_SHM, ns->efct_shm_mmap_bytes, - OO_MMAP_FLAG_POPULATE, &p); + rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), OO_MMAP_TYPE_NETIF, + CI_NETIF_MMAP_ID_EFCT_SHM, ns->efct_shm_mmap_bytes, + OO_MMAP_FLAG_POPULATE, &p); if( rc < 0 ) { LOG_NV(ci_log("%s: oo_resource_mmap rxq shm %d", __FUNCTION__, rc)); goto fail2; @@ -1924,9 +1926,9 @@ static int netif_tcp_helper_mmap(ci_netif* ni) return 0; - fail2: +fail2: netif_tcp_helper_munmap(ni); - fail1: +fail1: return rc; } @@ -1938,17 +1940,16 @@ static int oo_efct_superbuf_config_refresh(ef_vi* vi, int qid) op.intf_i = rxq->resource_id; op.qid = qid; op.max_superbufs = CI_EFCT_MAX_SUPERBUFS; - CI_USER_PTR_SET(op.superbufs,rxq->superbuf); + CI_USER_PTR_SET(op.superbufs, rxq->superbuf); CI_USER_PTR_SET(op.current_mappings, rxq->current_mappings); return oo_resource_op(vi->dh, OO_IOC_EFCT_SUPERBUF_CONFIG_REFRESH, &op); } static int init_ef_vi(ci_netif* ni, int nic_i, int vi_state_offset, - int vi_io_offset, int vi_efct_shm_offset, - char** vi_mem_ptr, - ef_vi* vi, unsigned vi_instance, unsigned abs_idx, - int evq_bytes, int txq_size, ef_vi_stats* vi_stats) + int vi_io_offset, int vi_efct_shm_offset, char** vi_mem_ptr, ef_vi* vi, + unsigned vi_instance, unsigned abs_idx, int evq_bytes, int txq_size, + ef_vi_stats* vi_stats) { ef_vi_state* state = (void*) ((char*) ni->state + vi_state_offset); ci_netif_state_nic_t* nsn = &(ni->state->nic[nic_i]); @@ -1956,7 +1957,7 @@ static int init_ef_vi(ci_netif* ni, int nic_i, int vi_state_offset, unsigned vi_bar_off = vi_instance * 8192; ef_vi_init(vi, ef_vi_arch_from_efhw_arch(nsn->vi_arch), nsn->vi_variant, - nsn->vi_revision, nsn->vi_flags, nsn->vi_nic_flags, state); + nsn->vi_revision, nsn->vi_flags, nsn->vi_nic_flags, state); ef_vi_init_out_flags(vi, nsn->vi_out_flags); vi_io_offset += vi_bar_off & (CI_PAGE_SIZE - 1); ef_vi_init_io(vi, ni->io_ptr + vi_io_offset); @@ -1965,16 +1966,19 @@ static int init_ef_vi(ci_netif* ni, int nic_i, int vi_state_offset, vi->abs_idx = abs_idx; vi->dh = ci_netif_get_driver_handle(ni); *vi_mem_ptr = ef_vi_init_qs(vi, *vi_mem_ptr, ids, evq_bytes / 8, - nsn->vi_rxq_size, nsn->rx_prefix_len, txq_size); + nsn->vi_rxq_size, nsn->rx_prefix_len, txq_size); if( vi->max_efct_rxq ) { int i; - int rc = efct_vi_mmap_init_internal(vi, - (void*)((char*)ni->efct_shm_ptr + vi_efct_shm_offset)); + int rc = efct_vi_mmap_init_internal( + vi, (void*) ((char*) ni->efct_shm_ptr + vi_efct_shm_offset)); if( rc < 0 ) return rc; for( i = 0; i < vi->max_efct_rxq; ++i ) - efct_vi_attach_rxq_internal(vi, i, nic_i, - oo_efct_superbuf_config_refresh); + efct_vi_attach_rxq_internal( + vi, i, nic_i, oo_efct_superbuf_config_refresh); + } + if( vi->nic_type.arch == EF_VI_ARCH_SWXTCH ) { + ni->nic_hw[nic_i].poll_in_kernel = 0; } ef_vi_set_ts_format(vi, nsn->ts_format); ef_vi_init_rx_timestamping(vi, nsn->rx_ts_correction); @@ -1995,7 +1999,8 @@ static void cleanup_ef_vi(ef_vi* vi) static void cleanup_all_vis(ci_netif* ni, unsigned vis_inited) { int nic_i; - OO_STACK_FOR_EACH_INTF_I(ni, nic_i) { + OO_STACK_FOR_EACH_INTF_I(ni, nic_i) + { int i; int num_vis = ci_netif_num_vis(ni); @@ -2014,7 +2019,8 @@ unsigned ci_netif_build_future_intf_mask(ci_netif* ni) int nic_i; unsigned mask = 0; - OO_STACK_FOR_EACH_INTF_I(ni, nic_i) { + OO_STACK_FOR_EACH_INTF_I(ni, nic_i) + { /* Disable future when there's an XDP prog attached because that prog may * alter the destination socket, in which case the future code would be * wrong. XDP-attachment implies poll_in_kernel, which is what we actually @@ -2022,12 +2028,13 @@ unsigned ci_netif_build_future_intf_mask(ci_netif* ni) ef_vi* vi = ci_netif_vi(ni, nic_i); if( #ifdef OO_HAS_POLL_IN_KERNEL - ! ni->nic_hw[nic_i].poll_in_kernel && + ! ni->nic_hw[nic_i].poll_in_kernel && #endif ~ef_vi_flags(vi) & EF_VI_RX_EVENT_MERGE && vi->nic_type.arch != EF_VI_ARCH_EF100 && /* TODO AF_XDP future detection is not currently supported */ - vi->nic_type.arch != EF_VI_ARCH_AF_XDP ) + ! (vi->nic_type.arch == EF_VI_ARCH_AF_XDP || + vi->nic_type.arch == EF_VI_ARCH_SWXTCH) ) mask |= 1u << nic_i; } return mask; @@ -2043,6 +2050,33 @@ static int af_xdp_kick(ef_vi* vi) return oo_resource_op(fd, OO_IOC_AF_XDP_KICK, &intf_i); } +static int ci_netif_swxtch_rx_init(ci_netif* ni) +{ + int nic_i, i, rc, out = 1; + ef_vi* vi; + + ci_netif_lock(ni); + OO_STACK_FOR_EACH_INTF_I(ni, nic_i) + { + int num_vis = ci_netif_num_vis(ni); + for( i = 0; i < num_vis; ++i ) { + vi = &ni->nic_hw[nic_i].vis[i]; + if( vi->nic_type.arch == EF_VI_ARCH_SWXTCH ) { + // reset state + vi->ep_state->rxq.added = 0; + vi->ep_state->rxq.removed = 0; + if( (rc = ci_netif_rx_post(ni, nic_i, vi)) == 0 ) { + LOG_S(ci_log("Unable to initialize swxtch VI with RX Buffers")); + out = rc; + } + } + } + } + ci_netif_unlock(ni); + return out; +} + + static int netif_tcp_helper_build(ci_netif* ni) { /* On entry we require the following to be initialised: @@ -2088,21 +2122,23 @@ static int netif_tcp_helper_build(ci_netif* ni) ni->future_intf_mask = 0; - OO_STACK_FOR_EACH_INTF_I(ni, nic_i) { + OO_STACK_FOR_EACH_INTF_I(ni, nic_i) + { ci_netif_state_nic_t* nsn = &ns->nic[nic_i]; ef_vi* vi; int i; int num_vis = ci_netif_num_vis(ni); /* Get interface properties. */ - rc = oo_cp_get_hwport_properties(ni->cplane, ns->intf_i_to_hwport[nic_i], - NULL, NULL); + rc = oo_cp_get_hwport_properties( + ni->cplane, ns->intf_i_to_hwport[nic_i], NULL, NULL); if( rc < 0 ) goto fail1; - LOG_NV(ci_log("%s: ni->io_ptr=%p io_offset=%d mem_ptr=%p " - "state_offset=%d", __FUNCTION__, ni->io_ptr, - vi_io_offset, vi_mem_ptr, vi_state_offset)); + LOG_NV(ci_log( + "%s: ni->io_ptr=%p io_offset=%d mem_ptr=%p " + "state_offset=%d", + __FUNCTION__, ni->io_ptr, vi_io_offset, vi_mem_ptr, vi_state_offset)); ci_assert(((vi_mem_ptr - ni->buf_ptr) & (CI_PAGE_SIZE - 1)) == 0); @@ -2112,11 +2148,10 @@ static int netif_tcp_helper_build(ci_netif* ni) vi_state_bytes = 0; for( i = 0; i < num_vis; ++i ) { vi = &ni->nic_hw[nic_i].vis[i]; - rc = init_ef_vi(ni, nic_i, vi_state_offset + vi_state_bytes, vi_io_offset, - vi_efct_shm_offset, - &vi_mem_ptr, vi, nsn->vi_instance[i], nsn->vi_abs_idx[i], - i ? 0 : nsn->vi_evq_bytes, nsn->vi_txq_size, - &ni->state->vi_stats); + rc = init_ef_vi(ni, nic_i, vi_state_offset + vi_state_bytes, + vi_io_offset, vi_efct_shm_offset, &vi_mem_ptr, vi, + nsn->vi_instance[i], nsn->vi_abs_idx[i], i ? 0 : nsn->vi_evq_bytes, + nsn->vi_txq_size, &ni->state->vi_stats); if( rc ) goto fail2; ++vis_inited; @@ -2125,8 +2160,8 @@ static int netif_tcp_helper_build(ci_netif* ni) if( NI_OPTS(ni).tx_push ) ef_vi_set_tx_push_threshold(vi, NI_OPTS(ni).tx_push_thresh); - vi_state_bytes += ef_vi_calc_state_bytes(nsn->vi_rxq_size, - nsn->vi_txq_size); + vi_state_bytes += + ef_vi_calc_state_bytes(nsn->vi_rxq_size, nsn->vi_txq_size); vi_io_offset += nsn->vi_io_mmap_bytes; vi_efct_shm_offset += nsn->vi_efct_shm_mmap_bytes; } @@ -2150,8 +2185,8 @@ static int netif_tcp_helper_build(ci_netif* ni) /* Although the PIO regions are each in their own page, we have a * dense mapping for the host memory copy, starting at pio_bufs_ofs */ - ni->nic_hw[nic_i].pio.pio_buffer = (uint8_t*)ns + ns->pio_bufs_ofs + - pio_buf_offset; + ni->nic_hw[nic_i].pio.pio_buffer = + (uint8_t*) ns + ns->pio_bufs_ofs + pio_buf_offset; pio_buf_offset += nsn->pio_io_len; /* And set up rest of PIO struct so we can call ef_vi_pio_memcpy */ vi_bar_off = nsn->vi_instance[0] * 8192; @@ -2205,10 +2240,9 @@ static int netif_tcp_helper_build(ci_netif* ni) expected_buf_ofs = sizeof(ci_netif_state); expected_buf_ofs = CI_ROUND_UP(expected_buf_ofs, __alignof__(ef_vi_state)); expected_buf_ofs += ns->vi_state_bytes * oo_stack_intf_max(ni); - expected_buf_ofs = CI_ROUND_UP(expected_buf_ofs, - __alignof__(oo_pktbuf_manager)); - if( ns->buf_ofs != expected_buf_ofs || - ni->packets->sets_max < 1 || + expected_buf_ofs = + CI_ROUND_UP(expected_buf_ofs, __alignof__(oo_pktbuf_manager)); + if( ns->buf_ofs != expected_buf_ofs || ni->packets->sets_max < 1 || size / sizeof(oo_pktbuf_set) < ni->packets->sets_max ) { /* This typically happens if someone puts a variable width type such as * long in ci_netif_state_s, and a 32 bit user level library is used @@ -2216,24 +2250,26 @@ static int netif_tcp_helper_build(ci_netif* ni) */ /* Omitted check that size % sizeof(oo_pktbuf_set) == 0 because the * padding to nearest cache line makes it not necessarily true */ - ci_log("%d %d %d", ns->buf_ofs != sizeof(ci_netif_state) + - ns->vi_state_bytes * oo_stack_intf_max(ni), - ni->packets->sets_max < 1, - size / sizeof(oo_pktbuf_set) < ni->packets->sets_max); - ci_log("ERROR: data structure layout mismatch between kernel and " - "user level detected!"); + ci_log("%d %d %d", + ns->buf_ofs != sizeof(ci_netif_state) + + ns->vi_state_bytes * oo_stack_intf_max(ni), + ni->packets->sets_max < 1, + size / sizeof(oo_pktbuf_set) < ni->packets->sets_max); + ci_log( + "ERROR: data structure layout mismatch between kernel and " + "user level detected!"); ci_log("ns->buf_ofs=%d (expected %d)", ns->buf_ofs, expected_buf_ofs); ci_log(" sizeof(ci_netif_state) = %zd", sizeof(ci_netif_state)); ci_log(" alignof(ef_vi_state) = %zd", __alignof__(ef_vi_state)); ci_log(" vi_state_bytes = %d", ns->vi_state_bytes); ci_log(" stack_intf_max = %d", oo_stack_intf_max(ni)); - ci_log(" alignof(oo_pktbuf_manager) = %zd", - __alignof__(oo_pktbuf_manager)); - ci_log("oo_pktbuf_set=%zd, size=%d, sets_max=%d", - sizeof(oo_pktbuf_set), size, ni->packets->sets_max); + ci_log( + " alignof(oo_pktbuf_manager) = %zd", __alignof__(oo_pktbuf_manager)); + ci_log("oo_pktbuf_set=%zd, size=%d, sets_max=%d", sizeof(oo_pktbuf_set), + size, ni->packets->sets_max); ci_log("a: %zd != 0", size % sizeof(oo_pktbuf_set)); - ci_log("b: 1 <= %zd <= %d ", size / sizeof(oo_pktbuf_set), - ni->packets->sets_max); + ci_log("b: 1 <= %zd <= %d ", size / sizeof(oo_pktbuf_set), + ni->packets->sets_max); rc = -EINVAL; goto fail3; } @@ -2246,7 +2282,7 @@ static int netif_tcp_helper_build(ci_netif* ni) { int i; struct ci_extra_ep ref = { CI_FD_BAD }; - for( i = 0; i < ni->state->max_ep_bufs; ++ i ) + for( i = 0; i < ni->state->max_ep_bufs; ++i ) ni->eps[i] = ref; } @@ -2258,6 +2294,10 @@ static int netif_tcp_helper_build(ci_netif* ni) if( ni->cplane_init_net == NULL ) ns->flags |= CI_NETIF_FLAG_NO_INIT_NET_CPLANE; + if( (rc = ci_netif_swxtch_rx_init(ni)) == 0 ) { + goto fail3; + } + return 0; fail3: @@ -2271,19 +2311,15 @@ static int netif_tcp_helper_build(ci_netif* ni) #endif - #ifndef __KERNEL__ -static int -netif_tcp_helper_restore(ci_netif* ni, unsigned netif_mmap_bytes) +static int netif_tcp_helper_restore(ci_netif* ni, unsigned netif_mmap_bytes) { void* p; int rc; - rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), - OO_MMAP_TYPE_NETIF, - CI_NETIF_MMAP_ID_STATE, netif_mmap_bytes, - OO_MMAP_FLAG_DEFAULT, &p); + rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), OO_MMAP_TYPE_NETIF, + CI_NETIF_MMAP_ID_STATE, netif_mmap_bytes, OO_MMAP_FLAG_DEFAULT, &p); if( rc < 0 ) { LOG_NV(ci_log("netif_tcp_helper_restore: oo_resource_mmap %d", rc)); return rc; @@ -2294,8 +2330,8 @@ netif_tcp_helper_restore(ci_netif* ni, unsigned netif_mmap_bytes) rc = netif_tcp_helper_build(ni); if( rc < 0 ) { ci_log("%s: netif_tcp_helper_build %d", __FUNCTION__, rc); - oo_resource_munmap(ci_netif_get_driver_handle(ni), - ni->state, netif_mmap_bytes); + oo_resource_munmap( + ci_netif_get_driver_handle(ni), ni->state, netif_mmap_bytes); return rc; } @@ -2320,8 +2356,7 @@ ci_inline void netif_tcp_helper_free(ci_netif* ni) static void init_resource_alloc(ci_resource_onload_alloc_t* ra, - const ci_netif_config_opts* opts, - unsigned flags, const char* name) + const ci_netif_config_opts* opts, unsigned flags, const char* name) { memset(ra, 0, sizeof(*ra)); CI_USER_PTR_SET(ra->in_opts, opts); @@ -2335,9 +2370,7 @@ static void init_resource_alloc(ci_resource_onload_alloc_t* ra, ra->in_cluster_size = CITP_OPTS.cluster_size; ra->in_cluster_restart = CITP_OPTS.cluster_restart_opt; strncpy(ra->in_name, CITP_OPTS.cluster_name, CI_CFG_STACK_NAME_LEN); - } - else - if( name != NULL ) + } else if( name != NULL ) strncpy(ra->in_name, name, CI_CFG_STACK_NAME_LEN); ra->in_memfd = -1; @@ -2355,10 +2388,8 @@ static void init_resource_alloc(ci_resource_onload_alloc_t* ra, } -static int -netif_tcp_helper_alloc_u(ef_driver_handle fd, ci_netif* ni, - const ci_netif_config_opts* opts, unsigned flags, - const char* stack_name) +static int netif_tcp_helper_alloc_u(ef_driver_handle fd, ci_netif* ni, + const ci_netif_config_opts* opts, unsigned flags, const char* stack_name) { ci_resource_onload_alloc_t ra; int rc; @@ -2373,46 +2404,51 @@ netif_tcp_helper_alloc_u(ef_driver_handle fd, ci_netif* ni, /* oo_resource_alloc's ioctl does an interruptible sleep while waiting for * the cplane. If a non-fatal signal is received while we're asleep, * we get an EINTR and want to try again. */ - while( (rc = oo_resource_alloc(fd, &ra)) == -EINTR ); + while( (rc = oo_resource_alloc(fd, &ra)) == -EINTR ) + ; if( ra.in_memfd >= 0 ) ci_sys_close(ra.in_memfd); if( rc < 0 ) { switch( rc ) { - case -ELIBACC: { - static int once; - if( ! once ) { - once = 1; - ci_log("ERROR: Driver/Library version mismatch detected."); - ci_log("This application will not be accelerated."); - ci_log("HINT: Most likely you need to reload the sfc and onload " - "drivers"); + case -ELIBACC: { + static int once; + if( ! once ) { + once = 1; + ci_log("ERROR: Driver/Library version mismatch detected."); + ci_log("This application will not be accelerated."); + ci_log( + "HINT: Most likely you need to reload the sfc and onload " + "drivers"); + } + break; } - break; - } - case -EEXIST: - /* This is not really an error. It means we "raced" with another thread - * to create a stack with this name, and the other guy won the race. We - * return the error code and further up the call-chain we'll retry to - * attach to the stack with the given name. - */ - break; - case -ENODEV: - LOG_E(ci_log("%s: ENODEV.\n" -"This error can occur if:\n" -" - no Solarflare network interfaces are active/UP, or they are running\n" -" packed stream firmware or are disabled, and\n" -" - there are no AF_XDP interfaces registered with sfc_resource\n" -"Please check your configuration.", - __FUNCTION__)); - break; - default: - LOG_E(ci_log("%s: ERROR: Failed to allocate stack (rc=%d)\n" + case -EEXIST: + /* This is not really an error. It means we "raced" with another + * thread to create a stack with this name, and the other guy won the + * race. We return the error code and further up the call-chain we'll + * retry to attach to the stack with the given name. + */ + break; + case -ENODEV: + LOG_E(ci_log( + "%s: ENODEV.\n" + "This error can occur if:\n" + " - no Solarflare network interfaces are active/UP, or they are " + "running\n" + " packed stream firmware or are disabled, and\n" + " - there are no AF_XDP interfaces registered with sfc_resource\n" + "Please check your configuration.", + __FUNCTION__)); + break; + default: + LOG_E( + ci_log("%s: ERROR: Failed to allocate stack (rc=%d)\n" "See kernel messages in dmesg or /var/log/syslog " "for more details of this failure", - __FUNCTION__, rc)); - break; + __FUNCTION__, rc)); + break; } return rc; } @@ -2422,14 +2458,14 @@ netif_tcp_helper_alloc_u(ef_driver_handle fd, ci_netif* ni, */ ni->nic_set = ra.out_nic_set; LOG_NC(ci_log("%s: nic set %" EFRM_NIC_SET_FMT, __FUNCTION__, - efrm_nic_set_pri_arg(&ni->nic_set))); + efrm_nic_set_pri_arg(&ni->nic_set))); ni->mmap_bytes = ra.out_netif_mmap_bytes; /**************************************************************************** * Set up the mem mmaping. */ rc = oo_resource_mmap(fd, OO_MMAP_TYPE_NETIF, CI_NETIF_MMAP_ID_STATE, - ra.out_netif_mmap_bytes, OO_MMAP_FLAG_DEFAULT, &p); + ra.out_netif_mmap_bytes, OO_MMAP_FLAG_DEFAULT, &p); if( rc < 0 ) { LOG_E(ci_log("%s: oo_resource_mmap %d", __FUNCTION__, rc)); return rc; @@ -2441,16 +2477,16 @@ netif_tcp_helper_alloc_u(ef_driver_handle fd, ci_netif* ni, /**************************************************************************** * Final Debug consistency check */ - if( !!(ns->flags & CI_NETIF_FLAG_DEBUG) != CI_DEBUG(1+)0 ) { + if( ! ! (ns->flags & CI_NETIF_FLAG_DEBUG) != CI_DEBUG(1 +) 0 ) { ci_log("ERROR: Driver/Library debug build mismatch detected (%d,%d)", - !!(ns->flags & CI_NETIF_FLAG_DEBUG), CI_DEBUG(1+)0 ); + ! ! (ns->flags & CI_NETIF_FLAG_DEBUG), CI_DEBUG(1 +) 0); rc = -ELIBACC; goto fail; } if( ns->flags & CI_NETIF_FLAG_ONLOAD_UNSUPPORTED ) { - ci_log("*** Warning: use of %s with this adapter is likely", - onload_product); + ci_log( + "*** Warning: use of %s with this adapter is likely", onload_product); ci_log("*** to show suboptimal performance for all cases other than the"); ci_log("*** most trivial benchmarks. Please see your Solarflare"); ci_log("*** representative/reseller to obtain an Onload-capable"); @@ -2488,23 +2524,23 @@ netif_tcp_helper_alloc_u(ef_driver_handle fd, ci_netif* ni, static void ci_netif_sanity_checks(void) { /* These had better be true, or there'll be trouble! */ - CI_BUILD_ASSERT( sizeof(citp_waitable_obj) <= CI_PAGE_SIZE ); - CI_BUILD_ASSERT( sizeof(citp_waitable_obj) <= EP_BUF_SIZE ); - CI_BUILD_ASSERT( EP_BUF_SIZE * EP_BUF_PER_PAGE == CI_PAGE_SIZE ); - CI_BUILD_ASSERT( (1u << CI_SB_FLAG_WAKE_RX_B) == CI_SB_FLAG_WAKE_RX ); - CI_BUILD_ASSERT( (1u << CI_SB_FLAG_WAKE_TX_B) == CI_SB_FLAG_WAKE_TX ); - CI_BUILD_ASSERT( sizeof(ci_ni_aux_mem) == CI_AUX_MEM_SIZE ); + CI_BUILD_ASSERT(sizeof(citp_waitable_obj) <= CI_PAGE_SIZE); + CI_BUILD_ASSERT(sizeof(citp_waitable_obj) <= EP_BUF_SIZE); + CI_BUILD_ASSERT(EP_BUF_SIZE * EP_BUF_PER_PAGE == CI_PAGE_SIZE); + CI_BUILD_ASSERT((1u << CI_SB_FLAG_WAKE_RX_B) == CI_SB_FLAG_WAKE_RX); + CI_BUILD_ASSERT((1u << CI_SB_FLAG_WAKE_TX_B) == CI_SB_FLAG_WAKE_TX); + CI_BUILD_ASSERT(sizeof(ci_ni_aux_mem) == CI_AUX_MEM_SIZE); /* AUX_PER_BUF aux buffers + header = ep buffer, where header is * oo_ep_header and fits in exactly one cache line. */ - CI_BUILD_ASSERT( sizeof(struct oo_ep_header) <= CI_AUX_HEADER_SIZE ); - CI_BUILD_ASSERT( CI_AUX_MEM_SIZE * AUX_PER_BUF + CI_AUX_HEADER_SIZE - <= EP_BUF_SIZE ); + CI_BUILD_ASSERT(sizeof(struct oo_ep_header) <= CI_AUX_HEADER_SIZE); + CI_BUILD_ASSERT( + CI_AUX_MEM_SIZE * AUX_PER_BUF + CI_AUX_HEADER_SIZE <= EP_BUF_SIZE); /* This constraint isn't strictly necessary for functionality, but it makes * debugging/dumping saner */ - CI_BUILD_ASSERT( offsetof(citp_waitable, sb_aflags) + - sizeof(((citp_waitable*)0)->sb_aflags) - <= CI_AUX_HEADER_SIZE ); + CI_BUILD_ASSERT(offsetof(citp_waitable, sb_aflags) + + sizeof(((citp_waitable*) 0)->sb_aflags) <= + CI_AUX_HEADER_SIZE); #ifndef NDEBUG { @@ -2519,7 +2555,7 @@ static void ci_netif_sanity_checks(void) /* Warn if we're wasting memory. */ if( sizeof(citp_waitable_obj) * 2 <= EP_BUF_SIZE ) ci_log("%s: EP_BUF_SIZE=%d larger than necessary (citp_waitable_obj=%zu)", - __FUNCTION__, EP_BUF_SIZE, sizeof(citp_waitable_obj)); + __FUNCTION__, EP_BUF_SIZE, sizeof(citp_waitable_obj)); } #endif @@ -2557,8 +2593,7 @@ static void ci_netif_pkt_reserve_free(ci_netif* ni, oo_pkt_p pkt_list, int n) #if CI_CFG_TCP_OFFLOAD_RECYCLER void ci_netif_send_plugin_app_ctrl(ci_netif* ni, int nic_index, - ci_ip_pkt_fmt* pkt, - const void* payload, size_t paylen) + ci_ip_pkt_fmt* pkt, const void* payload, size_t paylen) { pkt->intf_i = nic_index; pkt->q_id = CI_Q_ID_TCP_APP; @@ -2622,7 +2657,7 @@ static int ci_netif_pkt_prefault(ci_netif* ni) n = ni->packets->n_pkts_allocated; for( i = 0; i < n; ++i ) { pkt = PKT(ni, i); - rc += *(volatile ci_int32*)(&pkt->refcount); + rc += *(volatile ci_int32*) (&pkt->refcount); } } return rc; @@ -2639,9 +2674,8 @@ static void ci_netif_pkt_prefault_reserve(ci_netif* ni) * configured EF_MAX_PACKETS - ensures we will always max out the buffers * when EF_PREFAULT_PACKETS is bigger than both. */ - int target_allocated = CI_MIN( NI_OPTS(ni).prefault_packets, - CI_MAX(NI_OPTS(ni).max_packets, - actual_max_packets) ); + int target_allocated = CI_MIN(NI_OPTS(ni).prefault_packets, + CI_MAX(NI_OPTS(ni).max_packets, actual_max_packets)); int already_reserved = (ni->packets->n_pkts_allocated - ni->packets->n_free); if( ! NI_OPTS(ni).prefault_packets ) @@ -2652,10 +2686,7 @@ static void ci_netif_pkt_prefault_reserve(ci_netif* ni) n = ci_netif_pkt_reserve(ni, target_allocated - already_reserved, &pkt_list); if( ni->packets->n_pkts_allocated < target_allocated ) LOG_E(ci_log("%s: Prefaulting only allocated %d of %d (reserved +%d)", - __FUNCTION__, - ni->packets->n_pkts_allocated, - target_allocated, - n)); + __FUNCTION__, ni->packets->n_pkts_allocated, target_allocated, n)); ci_netif_pkt_reserve_free(ni, pkt_list, n); ci_netif_unlock(ni); } @@ -2705,13 +2736,12 @@ static int ci_netif_init(ci_netif* ni, ef_driver_handle fd) rc = ef_onload_driver_open(&init_net_fd, OO_STACK_DEV, 1); if( rc != 0 ) { ci_log("%s: failed to open driver handle: %d", __func__, rc); - } - else { + } else { rc = oo_cp_create(init_net_fd, ni->cplane_init_net, - CITP_OPTS.sync_cplane, CP_CREATE_FLAGS_INIT_NET); + CITP_OPTS.sync_cplane, CP_CREATE_FLAGS_INIT_NET); if( rc != 0 ) { ci_log("%s: failed to get init_net control plane handle: %d", __func__, - rc); + rc); ef_onload_driver_close(init_net_fd); } } @@ -2726,7 +2756,7 @@ static int ci_netif_init(ci_netif* ni, ef_driver_handle fd) return 0; - fail: +fail: free(ni->cplane); return rc; } @@ -2771,9 +2801,9 @@ static void ci_netif_start_helper2(ci_netif* ni) rc = ci_sys_execvpe(ONLOAD_HELPER_NAME, argv, NULL); ci_assert_lt(rc, 0); - (void)rc; /* appease gcc in NDEBUG build */ - ci_log("spawning "ONLOAD_HELPER_NAME" for [%s]: execve() failed: %s", - ni->state->pretty_name, strerror(errno)); + (void) rc; /* appease gcc in NDEBUG build */ + ci_log("spawning " ONLOAD_HELPER_NAME " for [%s]: execve() failed: %s", + ni->state->pretty_name, strerror(errno)); _exit(4); } @@ -2800,16 +2830,16 @@ static void ci_netif_start_helper1(ci_netif* ni) /* Get a new session. */ rc = setsid(); if( rc == -1 ) { - ci_log("spawning "ONLOAD_HELPER_NAME" for [%s]: setsid() failed: %s", - ni->state->pretty_name, strerror(errno)); + ci_log("spawning " ONLOAD_HELPER_NAME " for [%s]: setsid() failed: %s", + ni->state->pretty_name, strerror(errno)); _exit(1); } umask(0); rc = chdir("/"); if( rc == -1 ) { - ci_log("spawning "ONLOAD_HELPER_NAME" for [%s]: chdir(/) failed: %s", - ni->state->pretty_name, strerror(errno)); + ci_log("spawning " ONLOAD_HELPER_NAME " for [%s]: chdir(/) failed: %s", + ni->state->pretty_name, strerror(errno)); _exit(1); } /* The second part of "man 7 daemon" is in onload_helper itself. */ @@ -2822,15 +2852,15 @@ static void ci_netif_start_helper1(ci_netif* ni) * and CLONE_VM is really scary. All the danderous things in man vfork * come from CLONE_VM. */ - rc = my_do_syscall3(__NR_clone, CLONE_FILES | CLONE_VFORK | SIGCHLD, - 0, 0); + rc = my_do_syscall3(__NR_clone, CLONE_FILES | CLONE_VFORK | SIGCHLD, 0, 0); if( rc == 0 ) ci_netif_start_helper2(ni); if( rc < 0 ) { - ci_log("spawning "ONLOAD_HELPER_NAME" for [%s]: " + ci_log("spawning " ONLOAD_HELPER_NAME + " for [%s]: " "second clone() failed %s", - ni->state->pretty_name, strerror(errno)); + ni->state->pretty_name, strerror(errno)); _exit(2); } @@ -2857,9 +2887,10 @@ static int ci_netif_start_helper(ci_netif* ni) ci_netif_start_helper1(ni); if( rc < 0 ) { - ci_log("spawning "ONLOAD_HELPER_NAME" for [%s]: " + ci_log("spawning " ONLOAD_HELPER_NAME + " for [%s]: " "first clone() failed %s", - ni->state->pretty_name, strerror(errno)); + ni->state->pretty_name, strerror(errno)); _exit(1); } @@ -2867,23 +2898,23 @@ static int ci_netif_start_helper(ci_netif* ni) if( WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0 ) return 0; - LOG_S(ci_log("%s: spawning "ONLOAD_HELPER_NAME" for [%s]: exit status=%d", - __func__, ni->state->pretty_name, wstatus)); + LOG_S(ci_log("%s: spawning " ONLOAD_HELPER_NAME " for [%s]: exit status=%d", + __func__, ni->state->pretty_name, wstatus)); return -1; } #endif -int ci_netif_ctor(ci_netif* ni, ef_driver_handle fd, const char* stack_name, - unsigned flags) +int ci_netif_ctor( + ci_netif* ni, ef_driver_handle fd, const char* stack_name, unsigned flags) { ci_netif_config_opts* opts; struct oo_per_thread* per_thread; int rc; per_thread = oo_per_thread_get(); - opts = per_thread->thread_local_netif_opts != NULL? - per_thread->thread_local_netif_opts: - &ci_cfg_opts.netif_opts; + opts = per_thread->thread_local_netif_opts != NULL + ? per_thread->thread_local_netif_opts + : &ci_cfg_opts.netif_opts; ci_assert(ni); ci_netif_sanity_checks(); @@ -2893,8 +2924,8 @@ int ci_netif_ctor(ci_netif* ni, ef_driver_handle fd, const char* stack_name, return rc; /*************************************** - * Allocate kernel helper and link into netif - */ + * Allocate kernel helper and link into netif + */ if( (rc = netif_tcp_helper_alloc_u(fd, ni, opts, flags, stack_name)) < 0 ) { ci_netif_deinit(ni); return rc; @@ -2919,7 +2950,7 @@ int ci_netif_ctor(ci_netif* ni, ef_driver_handle fd, const char* stack_name, return 0; } -#else /* __KERNEL__ */ +#else /* __KERNEL__ */ int ci_netif_set_rxq_limit(ci_netif* ni) { @@ -2931,7 +2962,8 @@ int ci_netif_set_rxq_limit(ci_netif* ni) * RX ring at the expense of the last. */ n_intf = 0; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { ef_vi* vi = ci_netif_vi(ni, intf_i); rxq_cap = ef_vi_receive_capacity(vi); ++n_intf; @@ -2947,23 +2979,23 @@ int ci_netif_set_rxq_limit(ci_netif* ni) fill_limit = max_ring_pkts / n_intf; if( fill_limit < NI_OPTS(ni).rxq_limit ) { if( fill_limit < rxq_cap ) - LOG_W(ci_log("WARNING: "N_FMT "RX ring fill level reduced from %d to %d " - "max_ring_pkts=%d rxq_cap=%d n_intf=%d", - N_PRI_ARGS(ni), NI_OPTS(ni).rxq_limit, fill_limit, - max_ring_pkts, rxq_cap, n_intf)); + LOG_W( + ci_log("WARNING: " N_FMT "RX ring fill level reduced from %d to %d " + "max_ring_pkts=%d rxq_cap=%d n_intf=%d", + N_PRI_ARGS(ni), NI_OPTS(ni).rxq_limit, fill_limit, max_ring_pkts, + rxq_cap, n_intf)); ni->opts.rxq_limit = fill_limit; ni->state->opts.rxq_limit = fill_limit; } if( ni->nic_n == 0 ) { /* we do not use .rxq_limit, but let's make all checkers happy */ - NI_OPTS(ni).rxq_limit = CI_CFG_RX_DESC_BATCH; - } - else if( NI_OPTS(ni).rxq_limit < NI_OPTS(ni).rxq_min ) { + NI_OPTS(ni).rxq_limit = CI_CFG_RX_DESC_BATCH; + } else if( NI_OPTS(ni).rxq_limit < NI_OPTS(ni).rxq_min ) { /* Do not allow user to create a stack that is too severely * constrained. */ - LOG_E(ci_log("ERROR: "N_FMT "rxq_limit=%d is too small (rxq_min=%d)", - N_PRI_ARGS(ni), NI_OPTS(ni).rxq_limit, NI_OPTS(ni).rxq_min); + LOG_E(ci_log("ERROR: " N_FMT "rxq_limit=%d is too small (rxq_min=%d)", + N_PRI_ARGS(ni), NI_OPTS(ni).rxq_limit, NI_OPTS(ni).rxq_min); ci_log("HINT: Use a larger value for EF_RXQ_LIMIT or " "EF_MAX_RX_PACKETS or EF_MAX_PACKETS")); rc = -ENOMEM; @@ -2979,7 +3011,8 @@ int ci_netif_set_rxq_limit(ci_netif* ni) static void ci_netif_af_xdp_post_fill(ci_netif* ni) { - /* some ZC UMEM implementation can take a jiffy to schedule HW rx ring refill */ + /* some ZC UMEM implementation can take a jiffy to schedule HW rx ring refill + */ /* FIXME AF_XDP: fill umem rings before binding to umem */ if( ni->flags & CI_NETIF_FLAG_AF_XDP ) usleep_range(TICK_USEC * 2, TICK_USEC * 3); @@ -2991,7 +3024,8 @@ static int __ci_netif_init_fill_rx_rings(ci_netif* ni) * ci_netif_rx_post(). */ int intf_i, rxq_limit = ni->state->rxq_limit; - OO_STACK_FOR_EACH_INTF_I(ni, intf_i) { + OO_STACK_FOR_EACH_INTF_I(ni, intf_i) + { int vi_i; int n_posted = 0; for( vi_i = 0; vi_i < ci_netif_num_vis(ni); ++vi_i ) { @@ -3000,7 +3034,7 @@ static int __ci_netif_init_fill_rx_rings(ci_netif* ni) if( ef_vi_receive_fill_level(vi) < rxq_limit ) return -ENOMEM; } - (void)n_posted; + (void) n_posted; #if CI_CFG_TCP_OFFLOAD_RECYCLER /* See ci_netif_rx_post_all_batch() for the description of what's going on * here */ @@ -3019,7 +3053,7 @@ int ci_netif_init_fill_rx_rings(ci_netif* ni) rc = ci_tcp_helper_more_bufs(ni); if( ni->packets->n_free == 0 ) { LOG_E(ci_log("%s: [%d] ERROR: failed to allocate initial packet set: %d", - __func__, NI_ID(ni), rc)); + __func__, NI_ID(ni), rc)); return -ENOMEM; } ni->packets->id = 0; @@ -3038,12 +3072,15 @@ int ci_netif_init_fill_rx_rings(ci_netif* ni) n_accounted += ni->state->mem_pressure_pkt_pool_n; if( n_accounted < n_requested ) { if( NI_OPTS(ni).prealloc_packets ) - LOG_E(ci_log("%s: ERROR: Insufficient packet buffers available for " - "EF_PREALLOC_PACKETS=1 EF_MAX_PACKETS=%d got %d", - __FUNCTION__, n_requested, n_accounted)); + LOG_E( + ci_log("%s: ERROR: Insufficient packet buffers available for " + "EF_PREALLOC_PACKETS=1 EF_MAX_PACKETS=%d got %d", + __FUNCTION__, n_requested, n_accounted)); else - LOG_E(ci_log("%s: ERROR: Insufficient packet buffers available for " - "EF_MIN_FREE_PACKETS=%d", __FUNCTION__, n_requested)); + LOG_E( + ci_log("%s: ERROR: Insufficient packet buffers available for " + "EF_MIN_FREE_PACKETS=%d", + __FUNCTION__, n_requested)); return -ENOMEM; } @@ -3062,14 +3099,17 @@ int ci_netif_init_fill_rx_rings(ci_netif* ni) if( (rc = __ci_netif_init_fill_rx_rings(ni)) < 0 || ni->state->rxq_low ) { rc = -ENOMEM; if( lim < NI_OPTS(ni).rxq_min ) - LOG_E(ci_log("%s: ERROR: Insufficient packet buffers to fill RX rings " - "(rxq_limit=%d rxq_low=%d rxq_min=%d)", __FUNCTION__, - NI_OPTS(ni).rxq_limit, ni->state->rxq_low, - NI_OPTS(ni).rxq_min)); + LOG_E( + ci_log("%s: ERROR: Insufficient packet buffers to fill RX rings " + "(rxq_limit=%d rxq_low=%d rxq_min=%d)", + __FUNCTION__, NI_OPTS(ni).rxq_limit, ni->state->rxq_low, + NI_OPTS(ni).rxq_min)); #if CI_CFG_PKTS_AS_HUGE_PAGES else if( NI_OPTS(ni).huge_pages == OO_IOBUFSET_FLAG_HUGE_PAGE_FORCE ) - LOG_E(ci_log("%s: ERROR: Failed to allocate huge pages to fill RX " - "rings", __FUNCTION__)); + LOG_E( + ci_log("%s: ERROR: Failed to allocate huge pages to fill RX " + "rings", + __FUNCTION__)); else #endif rc = 0; @@ -3084,7 +3124,7 @@ int ci_netif_init_fill_rx_rings(ci_netif* ni) * packets as indicated by EF_MIN_FREE_PACKETS */ ci_netif_pkt_reserve_free(ni, pkt_list, n_reserved); } - ni->state->rxq_limit = NI_OPTS(ni).rxq_limit; + ni->state->rxq_limit = NI_OPTS(ni).rxq_limit; #if CI_CFG_PKTS_AS_HUGE_PAGES /* Initial packets allocated: allow other packets to be in non-huge pages @@ -3097,7 +3137,6 @@ int ci_netif_init_fill_rx_rings(ci_netif* ni) } - #endif #ifndef __KERNEL__ @@ -3114,7 +3153,6 @@ int ci_netif_dtor(ci_netif* ni) } - static int install_stack_by_id(ci_fd_t fp, unsigned id, bool is_service) { oo_stack_lookup_and_attach_t op; @@ -3181,8 +3219,8 @@ int ci_netif_restore_name(ci_netif* ni, const char* name) goto fail1; if( (rc = install_stack_by_name(fd2, name)) < 0 ) goto fail2; - if( (rc = fd = ci_tcp_helper_stack_attach(fd2, - &ni->nic_set, &map_size)) < 0 ) + if( (rc = fd = ci_tcp_helper_stack_attach(fd2, &ni->nic_set, &map_size)) < + 0 ) goto fail3; if( (rc = ci_netif_restore(ni, fd, map_size)) < 0 ) goto fail4; @@ -3192,12 +3230,12 @@ int ci_netif_restore_name(ci_netif* ni, const char* name) return 0; - fail4: +fail4: ef_onload_driver_close(fd); - fail3: - fail2: +fail3: +fail2: ef_onload_driver_close(fd2); - fail1: +fail1: return rc; } @@ -3206,19 +3244,20 @@ int ci_netif_restore_name(ci_netif* ni, const char* name) * a netif to a new process (e.g. if the fd is used after a fork/exec). For * now we still need the handle but this parameter may be removed one day. */ -int ci_netif_restore(ci_netif* ni, ef_driver_handle fd, - unsigned netif_mmap_bytes) +int ci_netif_restore( + ci_netif* ni, ef_driver_handle fd, unsigned netif_mmap_bytes) { int rc = 0; ci_assert(ni); - + LOG_NV(ci_log("%s: fd=%d", __FUNCTION__, fd)); CI_TRY_RET(ci_netif_init(ni, fd)); - if( (rc = netif_tcp_helper_restore(ni, netif_mmap_bytes)) != 0) { + if( (rc = netif_tcp_helper_restore(ni, netif_mmap_bytes)) != 0 ) { ci_netif_deinit(ni); - ci_log("netif_tcp_helper_restore returned %d at %s:%d", rc, __FILE__, __LINE__); \ + ci_log("netif_tcp_helper_restore returned %d at %s:%d", rc, __FILE__, + __LINE__); return rc; } diff --git a/src/lib/transport/ip/netif_pkt.c b/src/lib/transport/ip/netif_pkt.c index b42c134ad..29ee20260 100644 --- a/src/lib/transport/ip/netif_pkt.c +++ b/src/lib/transport/ip/netif_pkt.c @@ -12,7 +12,7 @@ /*! \cidoxg_lib_transport_ip */ #include "ip_internal.h" -#if !defined(__KERNEL__) +#if ! defined(__KERNEL__) #include #include @@ -23,9 +23,9 @@ ci_ip_pkt_fmt* __ci_netif_pkt(ci_netif* ni, unsigned id) int rc; ci_ip_pkt_fmt* pkt = 0; unsigned setid = id >> CI_CFG_PKTS_PER_SET_S; - void *p; + void* p; - ci_assert(id != (unsigned)(-1)); + ci_assert(id != (unsigned) (-1)); pthread_mutex_lock(&citp_pkt_map_lock); /* Recheck the condition now we have the lock */ @@ -36,44 +36,40 @@ ci_ip_pkt_fmt* __ci_netif_pkt(ci_netif* ni, unsigned id) #if CI_CFG_PKTS_AS_HUGE_PAGES if( ni->packets->set[setid].shm_id >= 0 ) { p = shmat(ni->packets->set[setid].shm_id, NULL, 0); - if( p == (void *)-1) { + if( p == (void*) -1 ) { if( errno == EACCES ) { - ci_log("Failed to mmap packet buffer for [%s] with errno=EACCES.\n" - "Probably, you are using this stack from processes with " - "different UIDs.\n" - "Try either allowing user stack sharing: EF_SHARE_WITH=-1\n" - "or turn off huge pages: EF_USE_HUGE_PAGES=0\n", - ni->state->pretty_name); - } - else { + ci_log( + "Failed to mmap packet buffer for [%s] with errno=EACCES.\n" + "Probably, you are using this stack from processes with " + "different UIDs.\n" + "Try either allowing user stack sharing: EF_SHARE_WITH=-1\n" + "or turn off huge pages: EF_USE_HUGE_PAGES=0\n", + ni->state->pretty_name); + } else { ci_log("%s: shmat(0x%x) failed for pkt set %d (%d)", __FUNCTION__, - ni->packets->set[setid].shm_id, setid, -errno); + ni->packets->set[setid].shm_id, setid, -errno); } goto out; } - } - else + } else #endif { - rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), - OO_MMAP_TYPE_NETIF, - CI_NETIF_MMAP_ID_PKTSET(setid), - CI_CFG_PKT_BUF_SIZE * PKTS_PER_SET, - OO_MMAP_FLAG_POPULATE, - &p); + rc = oo_resource_mmap(ci_netif_get_driver_handle(ni), OO_MMAP_TYPE_NETIF, + CI_NETIF_MMAP_ID_PKTSET(setid), CI_CFG_PKT_BUF_SIZE * PKTS_PER_SET, + OO_MMAP_FLAG_POPULATE, &p); if( rc < 0 ) { - ci_log("%s: oo_resource_mmap for pkt set %d failed (%d)", - __FUNCTION__, setid, rc); + ci_log("%s: oo_resource_mmap for pkt set %d failed (%d)", __FUNCTION__, + setid, rc); goto out; } } ci_assert(p); ni->pkt_bufs[setid] = p; - got_pkt_out: +got_pkt_out: pkt = (ci_ip_pkt_fmt*) __PKT_BUF(ni, id); - out: +out: pthread_mutex_unlock(&citp_pkt_map_lock); if( CI_UNLIKELY(pkt == NULL) ) { ci_log("Failed to map packets!"); @@ -89,8 +85,8 @@ ci_ip_pkt_fmt* __ci_netif_pkt(ci_netif* ni, unsigned id) int ci_netif_pktset_best(ci_netif* ni) { int i, ret = -1, n_free = 0; - - for( i = 0; i < ni->packets->sets_n; i ++ ) { + + for( i = 0; i < ni->packets->sets_n; i++ ) { if( ni->packets->set[i].n_free > n_free ) { n_free = ni->packets->set[i].n_free; ret = i; @@ -119,7 +115,7 @@ ci_ip_pkt_fmt* ci_netif_pkt_alloc_slow(ci_netif* ni, int flags) if( (flags & CI_PKT_ALLOC_USE_NONB) || (ni->packets->n_free == 0 && - ni->packets->sets_n == ni->packets->sets_max) ) + ni->packets->sets_n == ni->packets->sets_max) ) if( (pkt = ci_netif_pkt_alloc_nonb(ni)) != NULL ) { --ni->state->n_async_pkts; CITP_STATS_NETIF_INC(ni, pkt_nonb_steal); @@ -128,19 +124,19 @@ ci_ip_pkt_fmt* ci_netif_pkt_alloc_slow(ci_netif* ni, int flags) } if( flags & CI_PKT_ALLOC_FOR_TCP_TX ) - if(CI_UNLIKELY( ! ci_netif_pkt_tx_may_alloc(ni) )) + if( CI_UNLIKELY(! ci_netif_pkt_tx_may_alloc(ni)) ) return NULL; ci_assert_equal(ni->packets->id, NI_PKT_SET(ni)); ci_assert_equal(ni->packets->set[NI_PKT_SET(ni)].n_free, 0); ci_assert(OO_PP_IS_NULL(ni->packets->set[NI_PKT_SET(ni)].free)); #if OO_DO_STACK_POLL - again: +again: #endif bufset_id = ci_netif_pktset_best(ni); if( bufset_id != -1 ) { - ci_netif_pkt_set_change(ni, bufset_id, - ci_netif_pkt_set_is_underfilled(ni, bufset_id)); + ci_netif_pkt_set_change( + ni, bufset_id, ci_netif_pkt_set_is_underfilled(ni, bufset_id)); return ci_netif_pkt_get(ni, bufset_id); } @@ -169,14 +165,15 @@ ci_ip_pkt_fmt* ci_netif_pkt_alloc_slow(ci_netif* ni, int flags) } -ci_inline void __ci_dbg_poison_header(ci_ip_pkt_fmt* pkt, ci_uint32 pattern) +ci_inline void __ci_dbg_poison_header(ci_ip_pkt_fmt* pkt, ci_uint32 pattern) { unsigned i; - ci_uint32* pkt_u32 = (ci_uint32 *)oo_ether_hdr(pkt); + ci_uint32* pkt_u32 = (ci_uint32*) oo_ether_hdr(pkt); ci_uint32 patn_u32 = CI_BSWAP_BE32(pattern); - ci_uint32 len = (ETH_HLEN + ETH_VLAN_HLEN + 2) + sizeof(ci_ip4_hdr) + - sizeof(ci_tcp_hdr); - for( i = 0; i < len/4; i++ ) pkt_u32[i] = patn_u32; + ci_uint32 len = + (ETH_HLEN + ETH_VLAN_HLEN + 2) + sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr); + for( i = 0; i < len / 4; i++ ) + pkt_u32[i] = patn_u32; } @@ -190,33 +187,33 @@ void ci_netif_set_merge_atomic_flag(ci_netif* ni) if( val & CI_EPLOCK_NETIF_MERGE_ATOMIC_COUNTERS ) break; else if( ef_eplock_set_flags_if_locked( - &ni->state->lock, - CI_EPLOCK_NETIF_MERGE_ATOMIC_COUNTERS) ) { + &ni->state->lock, CI_EPLOCK_NETIF_MERGE_ATOMIC_COUNTERS) ) { break; - } - else if( ci_netif_trylock(ni) ) { - ef_eplock_holder_set_flag(&ni->state->lock, - CI_EPLOCK_NETIF_MERGE_ATOMIC_COUNTERS); + } else if( ci_netif_trylock(ni) ) { + ef_eplock_holder_set_flag( + &ni->state->lock, CI_EPLOCK_NETIF_MERGE_ATOMIC_COUNTERS); ci_netif_unlock(ni); break; } if( iter-- == 0 ) { - ci_log("%s: [%d] failed to set MERGE_ATOMIC_COUNTERS flag, " - "something nasty is going on with the shared state " - "of the Onload stack", __func__, NI_ID(ni)); + ci_log( + "%s: [%d] failed to set MERGE_ATOMIC_COUNTERS flag, " + "something nasty is going on with the shared state " + "of the Onload stack", + __func__, NI_ID(ni)); break; } } } #endif -void ci_netif_pkt_free(ci_netif* ni, ci_ip_pkt_fmt* pkt - CI_KERNEL_ARG(int* p_netif_is_locked)) +void ci_netif_pkt_free( + ci_netif* ni, ci_ip_pkt_fmt* pkt CI_KERNEL_ARG(int* p_netif_is_locked)) { ci_assert(pkt->refcount == 0); #ifdef __KERNEL__ ci_assert(p_netif_is_locked); - ci_assert(!*p_netif_is_locked || ci_netif_is_locked(ni)); + ci_assert(! *p_netif_is_locked || ci_netif_is_locked(ni)); #else ci_assert(ci_netif_is_locked(ni)); #endif @@ -224,8 +221,8 @@ void ci_netif_pkt_free(ci_netif* ni, ci_ip_pkt_fmt* pkt if( OO_PP_NOT_NULL(pkt->frag_next) ) { #ifdef __KERNEL__ - ci_netif_pkt_release_mnl(ni, PKT_CHK(ni, pkt->frag_next), - p_netif_is_locked); + ci_netif_pkt_release_mnl( + ni, PKT_CHK(ni, pkt->frag_next), p_netif_is_locked); #else ci_netif_pkt_release(ni, PKT_CHK(ni, pkt->frag_next)); #endif @@ -233,15 +230,14 @@ void ci_netif_pkt_free(ci_netif* ni, ci_ip_pkt_fmt* pkt } #if defined(__KERNEL__) && OO_DO_STACK_POLL - if( CI_UNLIKELY( (! *p_netif_is_locked) && - (~pkt->flags & CI_PKT_FLAG_NONB_POOL)) ) { + if( CI_UNLIKELY( + (! *p_netif_is_locked) && (~pkt->flags & CI_PKT_FLAG_NONB_POOL)) ) { /* It is useless to call ci_netif_lock(), because we can get here only * if previous call to ci_netif_lock() have failed with -ERESTARTSYS. * But we can try trylock(). */ if( ci_netif_trylock(ni) ) { *p_netif_is_locked = 1; - } - else { + } else { /* We've failed to get the lock. Release the packet to non-blocking * pool - it is better than nothing. */ pkt->flags |= CI_PKT_FLAG_NONB_POOL; @@ -257,17 +253,16 @@ void ci_netif_pkt_free(ci_netif* ni, ci_ip_pkt_fmt* pkt __ci_dbg_poison_header(pkt, 0xDECEA5ED); #endif - if( pkt->flags & CI_PKT_FLAG_NONB_POOL ) { + if( pkt->flags & CI_PKT_FLAG_NONB_POOL ) { ci_netif_pkt_free_nonb_list(ni, OO_PKT_P(pkt), pkt); CI_NETIF_STATE_MOD(ni, *p_netif_is_locked, n_async_pkts, +); - } - else { + } else { ci_assert(ci_netif_is_locked(ni)); ci_netif_pkt_put(ni, pkt); } #if defined(__KERNEL__) && OO_DO_STACK_POLL - if( CI_UNLIKELY( ! *p_netif_is_locked ) ) { + if( CI_UNLIKELY(! *p_netif_is_locked) ) { ci_netif_set_merge_atomic_flag(ni); } #endif @@ -275,7 +270,8 @@ void ci_netif_pkt_free(ci_netif* ni, ci_ip_pkt_fmt* pkt #if OO_DO_STACK_POLL -int ci_netif_pkt_try_to_free(ci_netif* ni, int desperation, int stop_once_freed_n) +int ci_netif_pkt_try_to_free( + ci_netif* ni, int desperation, int stop_once_freed_n) { unsigned id; int freed = 0; @@ -288,8 +284,9 @@ int ci_netif_pkt_try_to_free(ci_netif* ni, int desperation, int stop_once_freed_ * that we have an array. This assertion should give some protection * against changes that break our assumption. */ - ci_assert(&ni->state->stats.pkt_scramble2 - &ni->state->stats.pkt_scramble0 - == CI_NETIF_PKT_TRY_TO_FREE_MAX_DESP); + ci_assert( + &ni->state->stats.pkt_scramble2 - &ni->state->stats.pkt_scramble0 == + CI_NETIF_PKT_TRY_TO_FREE_MAX_DESP); CITP_STATS_NETIF(++(&ni->state->stats.pkt_scramble0)[desperation]); for( id = 0; id < ni->state->n_ep_bufs; ++id ) { @@ -305,17 +302,15 @@ int ci_netif_pkt_try_to_free(ci_netif* ni, int desperation, int stop_once_freed_ } -int ci_netif_pkt_alloc_block(ci_netif* ni, ci_sock_cmn* s, - int* p_netif_locked, - int can_block, - ci_ip_pkt_fmt** p_pkt) +int ci_netif_pkt_alloc_block(ci_netif* ni, ci_sock_cmn* s, int* p_netif_locked, + int can_block, ci_ip_pkt_fmt** p_pkt) { int was_locked = *p_netif_locked; ci_ip_pkt_fmt* pkt; int rc; ci_tcp_state* ts = NULL; - again: +again: if( *p_netif_locked == 0 ) { if( (pkt = ci_netif_pkt_alloc_nonb(ni)) ) { *p_pkt = pkt; @@ -323,13 +318,12 @@ int ci_netif_pkt_alloc_block(ci_netif* ni, ci_sock_cmn* s, } if( ! ci_netif_trylock(ni) ) { rc = ci_netif_lock(ni); - if(CI_UNLIKELY( ci_netif_lock_was_interrupted(rc) )) + if( CI_UNLIKELY(ci_netif_lock_was_interrupted(rc)) ) return rc; - CITP_STATS_NETIF_INC(ni, udp_send_ni_lock_contends/*??*/); + CITP_STATS_NETIF_INC(ni, udp_send_ni_lock_contends /*??*/); } *p_netif_locked = 1; } - if( s->b.state & CI_TCP_STATE_TCP_CONN ) ts = SOCK_TO_TCP(s); @@ -345,9 +339,9 @@ int ci_netif_pkt_alloc_block(ci_netif* ni, ci_sock_cmn* s, return 0; } - if( !can_block ) + if( ! can_block ) return -ENOBUFS; - + *p_netif_locked = 0; rc = ci_netif_pkt_wait(ni, s, CI_SLEEP_NETIF_LOCKED); if( ci_netif_pkt_wait_was_interrupted(rc) ) @@ -383,8 +377,7 @@ int ci_netif_pkt_pass_to_kernel(ci_netif* ni, ci_ip_pkt_fmt* pkt) ci_assert(OO_PP_IS_NULL(ni->state->kernel_packets_tail)); ci_assert_equal(ni->state->kernel_packets_pending, 0); ni->state->kernel_packets_head = OO_PKT_P(pkt); - } - else { + } else { PKT_CHK(ni, ni->state->kernel_packets_tail)->next = OO_PKT_P(pkt); } ++ni->state->kernel_packets_pending; diff --git a/src/lib/transport/ip/netif_table.c b/src/lib/transport/ip/netif_table.c index d7f071b9d..a150c6a0f 100644 --- a/src/lib/transport/ip/netif_table.c +++ b/src/lib/transport/ip/netif_table.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr ** \brief Software implemented endpoint lookup. @@ -10,7 +10,7 @@ ** *//* \**************************************************************************/ - + /*! \cidoxg_lib_transport_ip */ #include @@ -36,9 +36,9 @@ #define FILTER_TABLE_STATE_MASK (~FILTER_TABLE_ID_MASK) enum { OCCUPIED_PREFERRED = 0, - OCCUPIED_REHASHED = (1u << FILTER_TABLE_ID_BITS), - EMPTY = (2u << FILTER_TABLE_ID_BITS), - TOMBSTONE = (3u << FILTER_TABLE_ID_BITS), + OCCUPIED_REHASHED = (1u << FILTER_TABLE_ID_BITS), + EMPTY = (2u << FILTER_TABLE_ID_BITS), + TOMBSTONE = (3u << FILTER_TABLE_ID_BITS), }; ci_inline ci_uint32 STATE(ci_netif_filter_table_entry_fast* entry) @@ -59,33 +59,32 @@ ci_inline ci_uint32 ID(ci_netif_filter_table_entry_fast* entry) return __ID(entry); } -ci_inline void -set_entry_state(ci_netif_filter_table_entry_fast* entry, ci_uint32 state) +ci_inline void set_entry_state( + ci_netif_filter_table_entry_fast* entry, ci_uint32 state) { entry->__id_and_state = __ID(entry) | state; } #if OO_DO_STACK_POLL -ci_inline void -set_entry_id(ci_netif_filter_table_entry_fast* entry, ci_uint32 id) +ci_inline void set_entry_id( + ci_netif_filter_table_entry_fast* entry, ci_uint32 id) { ci_assert_nflags(id, FILTER_TABLE_STATE_MASK); entry->__id_and_state = STATE(entry) | id; } -#define CI_NETIF_FILTER_ID_TO_SOCK_ID(ni, filter_id) \ +#define CI_NETIF_FILTER_ID_TO_SOCK_ID(ni, filter_id) \ OO_SP_FROM_INT((ni), ID(&(ni)->filter_table->table[filter_id])) #if CI_CFG_IPV6 -#define CI_NETIF_IP6_FILTER_ID_TO_SOCK_ID(ni, filter_id) \ +#define CI_NETIF_IP6_FILTER_ID_TO_SOCK_ID(ni, filter_id) \ OO_SP_FROM_INT((ni), (ni)->ip6_filter_table->table[filter_id].id) #endif /* Returns table entry index, or -1 if lookup failed. */ -static int -ci_ip4_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport, - unsigned raddr, unsigned rport, unsigned protocol) +static int ci_ip4_netif_filter_lookup(ci_netif* netif, unsigned laddr, + unsigned lport, unsigned raddr, unsigned rport, unsigned protocol) { unsigned hash1, hash2 = 0; ci_netif_filter_table* tbl; @@ -96,16 +95,15 @@ ci_ip4_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport, ci_assert(netif->filter_table); tbl = netif->filter_table; - hash1 = __onload_hash1(tbl->table_size_mask, laddr, lport, - raddr, rport, protocol); + hash1 = __onload_hash1( + tbl->table_size_mask, laddr, lport, raddr, rport, protocol); first = hash1; - LOG_NV(log("tbl_lookup: %s %s:%u->%s:%u hash=%u:%u at=%u", - CI_IP_PROTOCOL_STR(protocol), - ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), - ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), - first, __onload_hash2(laddr, lport, raddr, rport, protocol), - hash1)); + LOG_NT(log("tbl_lookup: %s %s:%u->%s:%u hash=%u:%u at=%u", + CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), + (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), + (unsigned) CI_BSWAP_BE16(rport), first, + __onload_hash2(laddr, lport, raddr, rport, protocol), hash1)); while( 1 ) { ci_netif_filter_table_entry_fast* entry = &tbl->table[hash1]; @@ -117,14 +115,13 @@ ci_ip4_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport, if( CI_LIKELY(OCCUPIED(entry)) ) { ci_sock_cmn* s = ID_TO_SOCK(netif, ID(entry)); - if( ((laddr - entry->laddr ) | - (lport - entry_ext->lport ) | - (raddr - sock_raddr_be32(s)) | - (rport - sock_rport_be16(s)) | - (protocol - sock_protocol(s) )) == 0 ) - return hash1; + if( ((laddr - entry->laddr) | (lport - entry_ext->lport) | + (raddr - sock_raddr_be32(s)) | (rport - sock_rport_be16(s)) | + (protocol - sock_protocol(s))) == 0 ) + return hash1; } - if( STATE(entry) == EMPTY ) break; + if( STATE(entry) == EMPTY ) + break; /* We defer calculating hash2 until it's needed, just to make the fast * case that little bit faster. */ if( hash1 == first ) @@ -132,8 +129,8 @@ ci_ip4_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport, hash1 = (hash1 + hash2) & tbl->table_size_mask; if( hash1 == first ) { LOG_E(ci_log(FN_FMT "ERROR: LOOP %s:%u->%s:%u hash=%u:%u", - FN_PRI_ARGS(netif), ip_addr_str(laddr), lport, - ip_addr_str(raddr), rport, hash1, hash2)); + FN_PRI_ARGS(netif), ip_addr_str(laddr), lport, ip_addr_str(raddr), + rport, hash1, hash2)); return -ELOOP; } } @@ -144,46 +141,42 @@ ci_ip4_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport, /* Sometimes user is not interested in particular entry id; they may be * interested in yes/no. This functions looks up in both IPv4 and IPv6 * tables and returns the answer. */ -oo_sp -ci_netif_filter_lookup(ci_netif* netif, int af_space, - ci_addr_t laddr, unsigned lport, - ci_addr_t raddr, unsigned rport, - unsigned protocol) +oo_sp ci_netif_filter_lookup(ci_netif* netif, int af_space, ci_addr_t laddr, + unsigned lport, ci_addr_t raddr, unsigned rport, unsigned protocol) { int rc = -ENOENT; #if CI_CFG_IPV6 if( IS_AF_SPACE_IP6(af_space) ) { - rc = ci_ip6_netif_filter_lookup(netif, laddr, lport, - raddr, rport, protocol); + rc = ci_ip6_netif_filter_lookup( + netif, laddr, lport, raddr, rport, protocol); if( rc >= 0 ) return CI_NETIF_IP6_FILTER_ID_TO_SOCK_ID(netif, rc); } if( IS_AF_SPACE_IP4(af_space) ) #endif - rc = ci_ip4_netif_filter_lookup(netif, laddr.ip4, lport, - raddr.ip4, rport, protocol); + rc = ci_ip4_netif_filter_lookup( + netif, laddr.ip4, lport, raddr.ip4, rport, protocol); if( rc >= 0 ) return CI_NETIF_FILTER_ID_TO_SOCK_ID(netif, rc); return OO_SP_NULL; } -int ci_netif_listener_lookup(ci_netif* netif, int af_space, - ci_addr_t laddr, unsigned lport) +int ci_netif_listener_lookup( + ci_netif* netif, int af_space, ci_addr_t laddr, unsigned lport) { - oo_sp sock = ci_netif_filter_lookup(netif, af_space, laddr, lport, - addr_any, 0, IPPROTO_TCP); + oo_sp sock = ci_netif_filter_lookup( + netif, af_space, laddr, lport, addr_any, 0, IPPROTO_TCP); if( OO_SP_IS_NULL(sock) ) - sock = ci_netif_filter_lookup(netif, af_space, addr_any, lport, - addr_any, 0, IPPROTO_TCP); + sock = ci_netif_filter_lookup( + netif, af_space, addr_any, lport, addr_any, 0, IPPROTO_TCP); return sock; } -ci_uint32 -ci_netif_filter_hash(ci_netif* ni, ci_addr_t laddr, unsigned lport, - ci_addr_t raddr, unsigned rport, unsigned protocol) +ci_uint32 ci_netif_filter_hash(ci_netif* ni, ci_addr_t laddr, unsigned lport, + ci_addr_t raddr, unsigned rport, unsigned protocol) { return onload_hash3(laddr, lport, raddr, rport, protocol); } @@ -191,11 +184,10 @@ ci_netif_filter_hash(ci_netif* ni, ci_addr_t laddr, unsigned lport, ci_inline int /*bool*/ handle_entry(ci_netif* ni, ci_netif_filter_table_entry_fast* entry, - ci_netif_filter_table_entry_ext* entry_ext, - unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, - unsigned protocol, int intf_i, int vlan, - int (*callback)(ci_sock_cmn*, void*), void* callback_arg, - int /*bool*/ check_lport) + ci_netif_filter_table_entry_ext* entry_ext, unsigned laddr, unsigned lport, + unsigned raddr, unsigned rport, unsigned protocol, int intf_i, int vlan, + int (*callback)(ci_sock_cmn*, void*), void* callback_arg, + int /*bool*/ check_lport) { ci_sock_cmn* s = ID_TO_SOCK(ni, ID(entry)); int is_match = 0; @@ -204,23 +196,22 @@ handle_entry(ci_netif* ni, ci_netif_filter_table_entry_fast* entry, * packets, but it has IPv4 ipcache, so its sock_raddr_be32() is 0 and * can be used without checking for CI_SOCK_FLAG_CONNECTED, in contrast * to the equivlalent test in ci_netif_filter_for_each_match_ip6(). */ - if( ((laddr - entry->laddr ) | - /* check_lport is expected to be a compile-time constant, so when - * inlining this function the compiler should either generate sensible - * code here. */ - (lport - entry_ext->lport) * !! check_lport | - (raddr - sock_raddr_be32(s)) | - (rport - sock_rport_be16(s)) | - (protocol - sock_protocol(s) )) == 0 ) + if( ((laddr - entry->laddr) | + /* check_lport is expected to be a compile-time constant, so when + * inlining this function the compiler should either generate + * sensible code here. */ + (lport - entry_ext->lport) * ! ! check_lport | + (raddr - sock_raddr_be32(s)) | (rport - sock_rport_be16(s)) | + (protocol - sock_protocol(s))) == 0 ) is_match = 1; LOG_NV(ci_log("%s match=%d: %s %s:%u->%s:%u hash=%u:%u at=%u check_lport=%d", - __FUNCTION__, is_match, CI_IP_PROTOCOL_STR(protocol), - ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), - ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), - __onload_hash1(ni->filter_table->table_size_mask, laddr, lport, - raddr, rport, protocol), - __onload_hash2(laddr, lport, raddr, rport, protocol), - (unsigned) (entry - ni->filter_table->table), check_lport)); + __FUNCTION__, is_match, CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), + (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), + (unsigned) CI_BSWAP_BE16(rport), + __onload_hash1(ni->filter_table->table_size_mask, laddr, lport, raddr, + rport, protocol), + __onload_hash2(laddr, lport, raddr, rport, protocol), + (unsigned) (entry - ni->filter_table->table), check_lport)); if( is_match && CI_LIKELY((s->rx_bind2dev_ifindex == CI_IFID_BAD || @@ -231,13 +222,10 @@ handle_entry(ci_netif* ni, ci_netif_filter_table_entry_fast* entry, } -int -ci_netif_filter_for_each_match(ci_netif* ni, - unsigned laddr, unsigned lport, - unsigned raddr, unsigned rport, - unsigned protocol, int intf_i, int vlan, - int (*callback)(ci_sock_cmn*, void*), - void* callback_arg, ci_uint32* hash_out) +int ci_netif_filter_for_each_match(ci_netif* ni, unsigned laddr, + unsigned lport, unsigned raddr, unsigned rport, unsigned protocol, + int intf_i, int vlan, int (*callback)(ci_sock_cmn*, void*), + void* callback_arg, ci_uint32* hash_out) { ci_netif_filter_table* tbl = NULL; unsigned hash1, hash2 = 0; @@ -249,16 +237,15 @@ ci_netif_filter_for_each_match(ci_netif* ni, if( hash_out != NULL ) *hash_out = __onload_hash3(laddr, lport, raddr, rport, protocol); - hash1 = __onload_hash1(table_size_mask, laddr, lport, raddr, rport, - protocol); + hash1 = + __onload_hash1(table_size_mask, laddr, lport, raddr, rport, protocol); first = hash1; - LOG_NV(log("%s: %s %s:%u->%s:%u hash=%u:%u at=%u", - __FUNCTION__, CI_IP_PROTOCOL_STR(protocol), - ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), - ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), - first, __onload_hash2(laddr, lport, raddr, rport, protocol), - hash1)); + LOG_NV(log("%s: %s %s:%u->%s:%u hash=%u:%u at=%u", __FUNCTION__, + CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), + (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), + (unsigned) CI_BSWAP_BE16(rport), first, + __onload_hash2(laddr, lport, raddr, rport, protocol), hash1)); /* The loop a little way below iterates over the hash table looking for * matches. The test of the first entry in our walk through the table is @@ -283,8 +270,8 @@ ci_netif_filter_for_each_match(ci_netif* ni, /* We pass the entry in filter_table_ext here, but as check_lport is false * it won't be used, and moreover the inlining will drop it entirely. */ if( handle_entry(ni, entry, &ni->filter_table_ext[hash1], laddr, lport, - raddr, rport, protocol, intf_i, vlan, callback, - callback_arg, 0 /*check_lport*/) ) + raddr, rport, protocol, intf_i, vlan, callback, callback_arg, + 0 /*check_lport*/) ) return 1; } /* If the state of that first entry was OCCUPIED_REHASHED, it's a guaranteed @@ -302,16 +289,16 @@ ci_netif_filter_for_each_match(ci_netif* ni, hash1 = (hash1 + hash2) & table_size_mask; if( hash1 == first ) { LOG_NV(ci_log(FN_FMT "ITERATE FULL %s:%u->%s:%u hash=%u:%u", - FN_PRI_ARGS(ni), ip_addr_str(laddr), CI_BSWAP_BE16(lport), - ip_addr_str(raddr), CI_BSWAP_BE16(rport), hash1, hash2)); + FN_PRI_ARGS(ni), ip_addr_str(laddr), CI_BSWAP_BE16(lport), + ip_addr_str(raddr), CI_BSWAP_BE16(rport), hash1, hash2)); break; } entry = &tbl->table[hash1]; entry_ext = &ni->filter_table_ext[hash1]; if( OCCUPIED(entry) ) { if( handle_entry(ni, entry, entry_ext, laddr, lport, raddr, rport, - protocol, intf_i, vlan, callback, callback_arg, - 1 /*check_lport*/) ) + protocol, intf_i, vlan, callback, callback_arg, + 1 /*check_lport*/) ) return 1; } } @@ -320,23 +307,20 @@ ci_netif_filter_for_each_match(ci_netif* ni, /* Insert for either TCP or UDP */ -static int -ci_ip4_netif_filter_insert(ci_netif_filter_table* tbl, - ci_netif* netif, oo_sp tcp_id, - unsigned laddr, unsigned lport, - unsigned raddr, unsigned rport, - unsigned protocol) +static int ci_ip4_netif_filter_insert(ci_netif_filter_table* tbl, + ci_netif* netif, oo_sp tcp_id, unsigned laddr, unsigned lport, + unsigned raddr, unsigned rport, unsigned protocol) { ci_netif_filter_table_entry_fast* entry; ci_netif_filter_table_entry_ext* entry_ext; unsigned hash1, hash2; -#if !defined(NDEBUG) || CI_CFG_STATS_NETIF +#if ! defined(NDEBUG) || CI_CFG_STATS_NETIF unsigned hops = 1; #endif unsigned first; - hash1 = __onload_hash1(tbl->table_size_mask, laddr, lport, - raddr, rport, protocol); + hash1 = __onload_hash1( + tbl->table_size_mask, laddr, lport, raddr, rport, protocol); hash2 = __onload_hash2(laddr, lport, raddr, rport, protocol); first = hash1; @@ -344,10 +328,11 @@ ci_ip4_netif_filter_insert(ci_netif_filter_table* tbl, while( 1 ) { entry = &tbl->table[hash1]; entry_ext = &netif->filter_table_ext[hash1]; - if( ! OCCUPIED(entry) ) break; + if( ! OCCUPIED(entry) ) + break; ++entry_ext->route_count; -#if !defined(NDEBUG) || CI_CFG_STATS_NETIF +#if ! defined(NDEBUG) || CI_CFG_STATS_NETIF ++hops; #endif @@ -355,19 +340,18 @@ ci_ip4_netif_filter_insert(ci_netif_filter_table* tbl, * entry has a different [laddr]. */ ci_assert( - !((ID(entry) == OO_SP_TO_INT(tcp_id)) && (laddr == entry->laddr)) ); + ! ((ID(entry) == OO_SP_TO_INT(tcp_id)) && (laddr == entry->laddr))); hash1 = (hash1 + hash2) & tbl->table_size_mask; if( hash1 == first ) { - ci_sock_cmn *s = SP_TO_SOCK_CMN(netif, tcp_id); + ci_sock_cmn* s = SP_TO_SOCK_CMN(netif, tcp_id); if( ! (s->s_flags & CI_SOCK_FLAG_SW_FILTER_FULL) ) { LOG_E(ci_log(FN_FMT "%d FULL %s %s:%u->%s:%u hops=%u", - FN_PRI_ARGS(netif), - OO_SP_FMT(tcp_id), CI_IP_PROTOCOL_STR(protocol), - ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), - ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), - hops)); + FN_PRI_ARGS(netif), OO_SP_FMT(tcp_id), + CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), + (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), + (unsigned) CI_BSWAP_BE16(rport), hops)); s->s_flags |= CI_SOCK_FLAG_SW_FILTER_FULL; } @@ -377,12 +361,13 @@ ci_ip4_netif_filter_insert(ci_netif_filter_table* tbl, } /* Now insert the new entry. */ - LOG_TC(ci_log(FN_FMT "%d INSERT %s %s:%u->%s:%u hash=%u:%u at=%u " - "over=%u:%u hops=%u", FN_PRI_ARGS(netif), OO_SP_FMT(tcp_id), - CI_IP_PROTOCOL_STR(protocol), - ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), - ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), - first, hash2, hash1, STATE(entry), __ID(entry), hops)); + LOG_TC( + ci_log(FN_FMT "%d INSERT %s %s:%u->%s:%u hash=%u:%u at=%u " + "over=%u:%u hops=%u", + FN_PRI_ARGS(netif), OO_SP_FMT(tcp_id), CI_IP_PROTOCOL_STR(protocol), + ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), + ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), first, hash2, + hash1, STATE(entry), __ID(entry), hops)); #if CI_CFG_STATS_NETIF if( hops > netif->state->stats.table_max_hops ) @@ -391,15 +376,15 @@ ci_ip4_netif_filter_insert(ci_netif_filter_table* tbl, if( netif->state->stats.table_mean_hops == 0 ) netif->state->stats.table_mean_hops = 1; netif->state->stats.table_mean_hops = - (netif->state->stats.table_mean_hops * 9 + hops) / 10; + (netif->state->stats.table_mean_hops * 9 + hops) / 10; if( STATE(entry) == EMPTY ) ++netif->state->stats.table_n_slots; ++netif->state->stats.table_n_entries; #endif - set_entry_state(entry, - hash1 == first ? OCCUPIED_PREFERRED : OCCUPIED_REHASHED); + set_entry_state( + entry, hash1 == first ? OCCUPIED_PREFERRED : OCCUPIED_REHASHED); set_entry_id(entry, OO_SP_TO_INT(tcp_id)); entry->laddr = laddr; entry_ext->lport = lport; @@ -407,10 +392,9 @@ ci_ip4_netif_filter_insert(ci_netif_filter_table* tbl, } -static void -__ci_ip4_netif_filter_remove(ci_netif_filter_table* tbl, ci_netif* ni, - unsigned hash1, unsigned hash2, - int hops, unsigned last_tbl_i) +static void __ci_ip4_netif_filter_remove(ci_netif_filter_table* tbl, + ci_netif* ni, unsigned hash1, unsigned hash2, int hops, + unsigned last_tbl_i) { ci_netif_filter_table_entry_fast* entry; ci_netif_filter_table_entry_ext* entry_ext; @@ -437,19 +421,15 @@ __ci_ip4_netif_filter_remove(ci_netif_filter_table* tbl, ci_netif* ni, if( entry_ext->route_count == 0 ) { CITP_STATS_NETIF(--ni->state->stats.table_n_slots); set_entry_state(entry, EMPTY); - } - else { + } else { set_entry_state(entry, TOMBSTONE); } } -static void -ci_ip4_netif_filter_remove(ci_netif_filter_table* tbl, - ci_netif* netif, oo_sp sock_p, - unsigned laddr, unsigned lport, - unsigned raddr, unsigned rport, - unsigned protocol) +static void ci_ip4_netif_filter_remove(ci_netif_filter_table* tbl, + ci_netif* netif, oo_sp sock_p, unsigned laddr, unsigned lport, + unsigned raddr, unsigned rport, unsigned protocol) { ci_netif_filter_table_entry_fast* entry; unsigned hash1, hash2, tbl_i; @@ -462,19 +442,17 @@ ci_ip4_netif_filter_remove(ci_netif_filter_table* tbl, * Do not complain about this. */ || netif2tcp_helper_resource(netif)->ref[OO_THR_REF_BASE] == 0 #endif - ); + ); - hash1 = __onload_hash1(tbl->table_size_mask, laddr, lport, - raddr, rport, protocol); + hash1 = __onload_hash1( + tbl->table_size_mask, laddr, lport, raddr, rport, protocol); hash2 = __onload_hash2(laddr, lport, raddr, rport, protocol); first = hash1; - LOG_TC(ci_log("%s: [%d:%d] REMOVE %s %s:%u->%s:%u hash=%u:%u", - __FUNCTION__, NI_ID(netif), OO_SP_FMT(sock_p), - CI_IP_PROTOCOL_STR(protocol), - ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), - ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), - hash1, hash2)); + LOG_NV(ci_log("%s: [%d:%d] REMOVE %s %s:%u->%s:%u hash=%u:%u", __FUNCTION__, + NI_ID(netif), OO_SP_FMT(sock_p), CI_IP_PROTOCOL_STR(protocol), + ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), + (unsigned) CI_BSWAP_BE16(rport), hash1, hash2)); tbl_i = hash1; while( 1 ) { @@ -482,8 +460,7 @@ ci_ip4_netif_filter_remove(ci_netif_filter_table* tbl, if( OCCUPIED(entry) && ID(entry) == OO_SP_TO_INT(sock_p) ) { if( laddr == entry->laddr ) break; - } - else if( STATE(entry) == EMPTY ) { + } else if( STATE(entry) == EMPTY ) { /* We allow multiple removes of the same filter -- helps avoid some * complexity in the filter module. */ @@ -493,10 +470,9 @@ ci_ip4_netif_filter_remove(ci_netif_filter_table* tbl, ++hops; if( tbl_i == first ) { LOG_E(ci_log(FN_FMT "ERROR: LOOP [%d] %s %s:%u->%s:%u", - FN_PRI_ARGS(netif), OO_SP_FMT(sock_p), - CI_IP_PROTOCOL_STR(protocol), - ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), - ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport))); + FN_PRI_ARGS(netif), OO_SP_FMT(sock_p), CI_IP_PROTOCOL_STR(protocol), + ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), + ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport))); return; } } @@ -504,11 +480,9 @@ ci_ip4_netif_filter_remove(ci_netif_filter_table* tbl, __ci_ip4_netif_filter_remove(tbl, netif, hash1, hash2, hops, tbl_i); } -int -ci_netif_filter_insert(ci_netif* netif, oo_sp tcp_id, int af_space, - const ci_addr_t laddr, unsigned lport, - const ci_addr_t raddr, unsigned rport, - unsigned protocol) +int ci_netif_filter_insert(ci_netif* netif, oo_sp tcp_id, int af_space, + const ci_addr_t laddr, unsigned lport, const ci_addr_t raddr, + unsigned rport, unsigned protocol) { ci_netif_filter_table* ip4_tbl; int rc = 0; @@ -525,8 +499,7 @@ ci_netif_filter_insert(ci_netif* netif, oo_sp tcp_id, int af_space, ip6_tbl = netif->ip6_filter_table; rc = ci_ip6_netif_filter_insert(ip6_tbl, netif, tcp_id, laddr, lport, - CI_IPX_ADDR_IS_ANY(raddr) ? addr_any : raddr, - rport, protocol); + CI_IPX_ADDR_IS_ANY(raddr) ? addr_any : raddr, rport, protocol); if( rc < 0 ) return rc; } @@ -537,8 +510,8 @@ ci_netif_filter_insert(ci_netif* netif, oo_sp tcp_id, int af_space, ci_assert(netif->filter_table); ip4_tbl = netif->filter_table; - rc = ci_ip4_netif_filter_insert(ip4_tbl, netif, tcp_id, laddr.ip4, lport, - raddr.ip4, rport, protocol); + rc = ci_ip4_netif_filter_insert( + ip4_tbl, netif, tcp_id, laddr.ip4, lport, raddr.ip4, rport, protocol); /* Fixme: should we roll back the IPv6 insertion when trying to listen * in the both worlds, and IPv4 fails? */ if( rc < 0 ) @@ -548,11 +521,9 @@ ci_netif_filter_insert(ci_netif* netif, oo_sp tcp_id, int af_space, return 0; } -void -ci_netif_filter_remove(ci_netif* netif, oo_sp sock_p, int af_space, - const ci_addr_t laddr, unsigned lport, - const ci_addr_t raddr, unsigned rport, - unsigned protocol) +void ci_netif_filter_remove(ci_netif* netif, oo_sp sock_p, int af_space, + const ci_addr_t laddr, unsigned lport, const ci_addr_t raddr, + unsigned rport, unsigned protocol) { ci_netif_filter_table* ip4_tbl; #if CI_CFG_IPV6 @@ -567,8 +538,7 @@ ci_netif_filter_remove(ci_netif* netif, oo_sp sock_p, int af_space, ip6_tbl = netif->ip6_filter_table; ci_ip6_netif_filter_remove(ip6_tbl, netif, sock_p, laddr, lport, - CI_IPX_ADDR_IS_ANY(raddr) ? addr_any : raddr, - rport, protocol); + CI_IPX_ADDR_IS_ANY(raddr) ? addr_any : raddr, rport, protocol); } if( IS_AF_SPACE_IP4(af_space) ) @@ -577,8 +547,8 @@ ci_netif_filter_remove(ci_netif* netif, oo_sp sock_p, int af_space, ci_assert(netif->filter_table); ip4_tbl = netif->filter_table; - ci_ip4_netif_filter_remove(ip4_tbl, netif, sock_p, laddr.ip4, lport, - raddr.ip4, rport, protocol); + ci_ip4_netif_filter_remove( + ip4_tbl, netif, sock_p, laddr.ip4, lport, raddr.ip4, rport, protocol); } } #endif @@ -597,7 +567,7 @@ void ci_netif_filter_init(ci_netif* ni, int size_lg2) ci_assert(ni); ci_assert(ni->filter_table); ci_assert(ni->filter_table_ext); - ci_assert_ge(size_lg2, 16); /* For ci_netif_filter_for_each_match(). */ + ci_assert_ge(size_lg2, 16); /* For ci_netif_filter_for_each_match(). */ ci_assert_le(size_lg2, 32); ni->filter_table->table_size_mask = size - 1; @@ -613,60 +583,53 @@ void ci_netif_filter_init(ci_netif* ni, int size_lg2) #endif #if OO_DO_STACK_POLL -int -__ci_ip4_netif_filter_lookup(ci_netif* netif, - unsigned laddr, unsigned lport, - unsigned raddr, unsigned rport, - unsigned protocol) +int __ci_ip4_netif_filter_lookup(ci_netif* netif, unsigned laddr, + unsigned lport, unsigned raddr, unsigned rport, unsigned protocol) { int rc; /* try full lookup */ - rc = ci_ip4_netif_filter_lookup(netif, laddr, lport, raddr, rport, protocol); - LOG_NV(log(LPF "FULL LOOKUP %s:%u->%s:%u rc=%d", - ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), - ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), - rc)); + rc = ci_ip4_netif_filter_lookup(netif, laddr, lport, raddr, rport, protocol); + LOG_NV(log(LPF "FULL LOOKUP %s:%u->%s:%u rc=%d", ip_addr_str(laddr), + (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), + (unsigned) CI_BSWAP_BE16(rport), rc)); - if(CI_LIKELY( rc >= 0 )) + if( CI_LIKELY(rc >= 0) ) return rc; /* try wildcard lookup */ raddr = rport = 0; rc = ci_ip4_netif_filter_lookup(netif, laddr, lport, raddr, rport, protocol); - LOG_NV(log(LPF "WILD LOOKUP %s:%u->%s:%u rc=%d", - ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), - ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), - rc)); + LOG_NV(log(LPF "WILD LOOKUP %s:%u->%s:%u rc=%d", ip_addr_str(laddr), + (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), + (unsigned) CI_BSWAP_BE16(rport), rc)); - if(CI_LIKELY( rc >= 0 )) + if( CI_LIKELY(rc >= 0) ) return rc; return -ENOENT; } -ci_sock_cmn* -__ci_netif_filter_lookup(ci_netif* netif, int af_space, - ci_addr_t laddr, unsigned lport, - ci_addr_t raddr, unsigned rport, - unsigned protocol) +ci_sock_cmn* __ci_netif_filter_lookup(ci_netif* netif, int af_space, + ci_addr_t laddr, unsigned lport, ci_addr_t raddr, unsigned rport, + unsigned protocol) { int rc; #if CI_CFG_IPV6 if( IS_AF_SPACE_IP6(af_space) ) { - rc = __ci_ip6_netif_filter_lookup(netif, laddr, lport, raddr, rport, - protocol); - if(CI_LIKELY( rc >= 0 )) + rc = __ci_ip6_netif_filter_lookup( + netif, laddr, lport, raddr, rport, protocol); + if( CI_LIKELY(rc >= 0) ) return ID_TO_SOCK(netif, netif->ip6_filter_table->table[rc].id); } if( IS_AF_SPACE_IP4(af_space) ) #endif { - rc = __ci_ip4_netif_filter_lookup(netif, laddr.ip4, lport, raddr.ip4, rport, - protocol); - if(CI_LIKELY( rc >= 0 )) + rc = __ci_ip4_netif_filter_lookup( + netif, laddr.ip4, lport, raddr.ip4, rport, protocol); + if( CI_LIKELY(rc >= 0) ) return ID_TO_SOCK(netif, ID(&netif->filter_table->table[rc])); } @@ -704,15 +667,15 @@ void ci_netif_filter_dump(ci_netif* ni) unsigned raddr = sock_raddr_be32(s); int rport = sock_rport_be16(s); int protocol = sock_protocol(s); - unsigned hash1 = __onload_hash1(tbl->table_size_mask, laddr, lport, - raddr, rport, protocol); + unsigned hash1 = __onload_hash1( + tbl->table_size_mask, laddr, lport, raddr, rport, protocol); unsigned hash2 = __onload_hash2(laddr, lport, raddr, rport, protocol); - log("%010d state=%u id=%-10d rt_ct=%d %s "CI_IP_PRINTF_FORMAT":%d " - CI_IP_PRINTF_FORMAT":%d %010d:%010d", + log("%010d state=%u id=%-10d rt_ct=%d %s " CI_IP_PRINTF_FORMAT + ":%d " CI_IP_PRINTF_FORMAT ":%d %010d:%010d", i, STATE(entry) >> FILTER_TABLE_ID_BITS, ID(entry), entry_ext->route_count, CI_IP_PROTOCOL_STR(protocol), CI_IP_PRINTF_ARGS(&laddr), CI_BSWAP_BE16(lport), - CI_IP_PRINTF_ARGS(&raddr), CI_BSWAP_BE16(rport), hash1, hash2); + CI_IP_PRINTF_ARGS(&raddr), CI_BSWAP_BE16(rport), hash1, hash2); } } #if CI_CFG_IPV6 diff --git a/src/lib/transport/ip/netif_tx.c b/src/lib/transport/ip/netif_tx.c index de7004dc5..4d7b4578c 100644 --- a/src/lib/transport/ip/netif_tx.c +++ b/src/lib/transport/ip/netif_tx.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr ** \brief Raw packet transmit. @@ -40,20 +40,21 @@ static inline int pkt_q_id(ci_ip_pkt_fmt* pkt) } -static inline void calc_csum_if_needed(ci_netif* ni, ef_vi* vi, - ci_ip_pkt_fmt* pkt) +static inline void calc_csum_if_needed( + ci_netif* ni, ef_vi* vi, ci_ip_pkt_fmt* pkt) { /* Calculate packet checksum in case of AF_XDP */ - if( CI_UNLIKELY(vi->nic_type.arch == EF_VI_ARCH_AF_XDP && - is_to_primary_vi(pkt)) ) { + if( CI_LIKELY((vi->nic_type.arch == EF_VI_ARCH_AF_XDP || + vi->nic_type.arch == EF_VI_ARCH_SWXTCH) && + is_to_primary_vi(pkt)) ) { struct iovec my_iov[CI_IP_PKT_SEGMENTS_MAX]; ci_uint8 protocol; - ci_netif_pkt_to_host_iovec(ni, pkt, my_iov, - sizeof(my_iov) / sizeof(my_iov[0])); + ci_netif_pkt_to_host_iovec( + ni, pkt, my_iov, sizeof(my_iov) / sizeof(my_iov[0])); - protocol = ipx_hdr_protocol(ci_ethertype2af(oo_tx_ether_type_get(pkt)), - oo_ipx_hdr(pkt)); + protocol = ipx_hdr_protocol( + ci_ethertype2af(oo_tx_ether_type_get(pkt)), oo_ipx_hdr(pkt)); if( protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ) oo_pkt_calc_checksums(ni, pkt, my_iov); } @@ -62,27 +63,25 @@ static inline void calc_csum_if_needed(ci_netif* ni, ef_vi* vi, #if CI_CFG_CTPIO static inline int tx_ctpio(ci_netif* ni, int intf_i, ef_vi* vi, - ci_ip_pkt_fmt* pkt, const ef_iovec *iov, - int iov_len) + ci_ip_pkt_fmt* pkt, const ef_iovec* iov, int iov_len) { ci_netif_state_nic_t* nsn = &ni->state->nic[intf_i]; struct iovec host_iov[CI_IP_PKT_SEGMENTS_MAX]; int total_length; int rc; - total_length = ci_netif_pkt_to_host_iovec(ni, pkt, host_iov, - sizeof(host_iov) / sizeof(host_iov[0])); + total_length = ci_netif_pkt_to_host_iovec( + ni, pkt, host_iov, sizeof(host_iov) / sizeof(host_iov[0])); if( (nsn->oo_vi_flags & OO_VI_FLAGS_TX_CTPIO_ONLY) && - ef_vi_transmit_space_bytes(vi) < total_length) + ef_vi_transmit_space_bytes(vi) < total_length ) return -ENOSPC; oo_pkt_calc_checksums(ni, pkt, host_iov); - ef_vi_transmitv_ctpio(vi, total_length, host_iov, - iov_len, nsn->ctpio_ct_threshold); + ef_vi_transmitv_ctpio( + vi, total_length, host_iov, iov_len, nsn->ctpio_ct_threshold); CITP_STATS_NETIF_INC(ni, ctpio_pkts); - rc = ef_vi_transmitv_ctpio_fallback(vi, iov, iov_len, - OO_PKT_ID(pkt)); + rc = ef_vi_transmitv_ctpio_fallback(vi, iov, iov_len, OO_PKT_ID(pkt)); ci_assert_equal(rc, 0); return rc; } @@ -91,17 +90,17 @@ static inline int tx_ctpio(ci_netif* ni, int intf_i, ef_vi* vi, /* [is_fresh] is a hint indicating that the requested TXs are latency- * sensitive. */ -static void __ci_netif_dmaq_shove(ci_netif* ni, oo_pktq* dmaq, ef_vi* vi, - int intf_i, int is_fresh) +static void __ci_netif_dmaq_shove( + ci_netif* ni, oo_pktq* dmaq, ef_vi* vi, int intf_i, int is_fresh) { ci_ip_pkt_fmt* pkt = PKT_CHK(ni, dmaq->head); int rc; #if CI_CFG_CTPIO - #ifdef __KERNEL__ +#ifdef __KERNEL__ int ctpio = 0; - #else +#else int ctpio = is_fresh; - #endif +#endif /* In a non-CTPIO world, we don't need to track whether we've posted any DMA * descriptors because the caller has checked that we have available TXQ @@ -132,17 +131,19 @@ static void __ci_netif_dmaq_shove(ci_netif* ni, oo_pktq* dmaq, ef_vi* vi, * we could. */ ef_remote_iovec remote_iov_storage[CI_IP_PKT_SEGMENTS_MAX + 1]; ef_remote_iovec* remote_iov = remote_iov_storage; - struct ef_vi_tx_extra extra = { .flags = EF_VI_TX_EXTRA_MARK, .mark = 0 }; + struct ef_vi_tx_extra extra = { .flags = EF_VI_TX_EXTRA_MARK, + .mark = 0 }; ci_tcp_state* ts = SP_TO_TCP(ni, pkt->pf.tcp_tx.sock_id); ci_uint32 prev_crc_id = ts->current_crc_id; - iov_len = ci_netif_pkt_to_remote_iovec(ni, pkt, &remote_iov, &extra.mark, - sizeof(remote_iov_storage) / sizeof(remote_iov_storage[0])); + iov_len = + ci_netif_pkt_to_remote_iovec(ni, pkt, &remote_iov, &extra.mark, + sizeof(remote_iov_storage) / sizeof(remote_iov_storage[0])); if( CI_UNLIKELY(iov_len < 0) ) { rc = iov_len; - } - else { - rc = ef_vi_transmitv_init_extra(vi, extra.mark ? &extra : NULL, remote_iov, iov_len, OO_PKT_ID(pkt)); + } else { + rc = ef_vi_transmitv_init_extra(vi, extra.mark ? &extra : NULL, + remote_iov, iov_len, OO_PKT_ID(pkt)); #if CI_CFG_CTPIO if( rc >= 0 ) posted_dma = 1; @@ -153,25 +154,28 @@ static void __ci_netif_dmaq_shove(ci_netif* ni, oo_pktq* dmaq, ef_vi* vi, ts->current_crc_id = prev_crc_id; ci_nvme_plugin_crc_packet_cleanup(ni, ts, pkt); } - } - else { - iov_len = ci_netif_pkt_to_iovec(ni, pkt, iov, - sizeof(iov) / sizeof(iov[0])); + } else { + iov_len = + ci_netif_pkt_to_iovec(ni, pkt, iov, sizeof(iov) / sizeof(iov[0])); if( CI_UNLIKELY(iov_len < 0) ) break; #if CI_CFG_CTPIO if( ctpio && (iov_len < 1 || iov_len > CI_IP_PKT_SEGMENTS_MAX || - ! ci_netif_may_ctpio(ni, intf_i, pkt->pay_len) || - pkt->flags & CI_PKT_FLAG_INDIRECT) ) + ! ci_netif_may_ctpio(ni, intf_i, pkt->pay_len) || + pkt->flags & CI_PKT_FLAG_INDIRECT) ) ctpio = 0; - ctpio |= !! (ni->state->nic[pkt->intf_i].oo_vi_flags & OO_VI_FLAGS_TX_CTPIO_ONLY); + ctpio |= ! ! (ni->state->nic[pkt->intf_i].oo_vi_flags & + OO_VI_FLAGS_TX_CTPIO_ONLY); if( ctpio ) { ci_assert(! posted_dma); rc = tx_ctpio(ni, intf_i, vi, pkt, iov, iov_len); - } - else + } else #endif { +#ifndef __KERNEL__ + /* This is a hack to convert onload packets into dpdk mbufs*/ + ef_fill_tx_data(vi, PKT_START(pkt), pkt->buf_len); +#endif rc = ef_vi_transmitv_init(vi, iov, iov_len, OO_PKT_ID(pkt)); #if CI_CFG_CTPIO if( rc >= 0 ) @@ -182,8 +186,7 @@ static void __ci_netif_dmaq_shove(ci_netif* ni, oo_pktq* dmaq, ef_vi* vi, if( rc >= 0 ) { __oo_pktq_next(ni, dmaq, pkt, netif.tx.dmaq_next); CI_DEBUG(pkt->netif.tx.dmaq_next = OO_PP_NULL); - } - else { + } else { /* Descriptor ring or plugin id pool is full. */ #if CI_CFG_STATS_NETIF if( (ci_uint32) dmaq->num > ni->state->stats.tx_dma_max ) @@ -192,8 +195,7 @@ static void __ci_netif_dmaq_shove(ci_netif* ni, oo_pktq* dmaq, ef_vi* vi, break; } } - } - while( oo_pktq_not_empty(dmaq) ); + } while( oo_pktq_not_empty(dmaq) ); #if CI_CFG_CTPIO /* If everything went out by CTPIO, there will be no outstanding DMA @@ -216,8 +218,8 @@ void ci_netif_dmaq_shove1(ci_netif* ni, int intf_i) { ef_vi* vi = ci_netif_vi(ni, intf_i); if( ef_vi_transmit_space(vi) >= (ef_vi_transmit_capacity(vi) >> 1) ) - __ci_netif_dmaq_shove(ni, ci_netif_dmaq(ni, intf_i), vi, intf_i, - 0 /*is_fresh*/); + __ci_netif_dmaq_shove( + ni, ci_netif_dmaq(ni, intf_i), vi, intf_i, 0 /*is_fresh*/); } @@ -235,8 +237,8 @@ void ci_netif_dmaq_shove_plugin(ci_netif* ni, int intf_i, int q_id) ef_vi* vi = &ni->nic_hw[intf_i].vis[q_id]; ci_assert_ge(q_id, 1); if( ef_vi_transmit_space(vi) > CI_IP_PKT_SEGMENTS_MAX ) - __ci_netif_dmaq_shove(ni, &ni->state->nic[intf_i].dmaq[q_id], vi, intf_i, - 0 /*is_fresh*/); + __ci_netif_dmaq_shove( + ni, &ni->state->nic[intf_i].dmaq[q_id], vi, intf_i, 0 /*is_fresh*/); } #endif @@ -262,17 +264,17 @@ void __ci_netif_send(ci_netif* netif, ci_ip_pkt_fmt* pkt) ___ci_netif_dmaq_insert_prep_pkt(netif, pkt); - LOG_NT(log("%s: [%d] id=%d nseg=%d 0:["EF_ADDR_FMT":%d] dhost=" - CI_MAC_PRINTF_FORMAT, __FUNCTION__, NI_ID(netif), - OO_PKT_FMT(pkt), pkt->n_buffers, - pkt_dma_addr(netif, pkt, pkt->intf_i), - pkt->buf_len, CI_MAC_PRINTF_ARGS(oo_ether_dhost(pkt)))); + LOG_NT(log("%s: [%d] id=%d nseg=%d 0:[" EF_ADDR_FMT + ":%d] dhost=" CI_MAC_PRINTF_FORMAT, + __FUNCTION__, NI_ID(netif), OO_PKT_FMT(pkt), pkt->n_buffers, + pkt_dma_addr(netif, pkt, pkt->intf_i), pkt->buf_len, + CI_MAC_PRINTF_ARGS(oo_ether_dhost(pkt)))); /* Packets to non-primary VIs could be control messages to a plugin, so * there's no requirement that they be Ethernet (or any other recognisable * protocol). */ - ci_check( ! is_to_primary_vi(pkt) || - ! ci_eth_addr_is_zero((ci_uint8 *)oo_ether_dhost(pkt))); + ci_check(! is_to_primary_vi(pkt) || + ! ci_eth_addr_is_zero((ci_uint8*) oo_ether_dhost(pkt))); /* * Packets can be now be n fragments long. If the packet at the head of the @@ -299,41 +301,38 @@ void __ci_netif_send(ci_netif* netif, ci_ip_pkt_fmt* pkt) is_to_primary_vi(pkt) ) { if( pkt->pay_len <= NI_OPTS(netif).pio_thresh && pkt->n_buffers == 1 ) { if( (offset = ci_pio_buddy_alloc(netif, buddy, order)) >= 0 ) { - rc = ef_vi_transmit_copy_pio(vi, - offset, PKT_START(pkt), pkt->buf_len, - OO_PKT_ID(pkt)); + rc = ef_vi_transmit_copy_pio( + vi, offset, PKT_START(pkt), pkt->buf_len, OO_PKT_ID(pkt)); if( rc == 0 ) { CITP_STATS_NETIF_INC(netif, pio_pkts); ci_assert(pkt->pio_addr == -1); pkt->pio_addr = offset; pkt->pio_order = order; goto done; - } - else { + } else { CITP_STATS_NETIF_INC(netif, no_pio_err); ci_pio_buddy_free(netif, buddy, offset, order); /* Continue and do normal send. */ } - } - else { + } else { CI_DEBUG(CITP_STATS_NETIF_INC(netif, no_pio_busy)); } - } - else { + } else { CI_DEBUG(CITP_STATS_NETIF_INC(netif, no_pio_too_long)); } } #endif calc_csum_if_needed(netif, vi, pkt); - iov_len = ci_netif_pkt_to_iovec(netif, pkt, iov, - sizeof(iov) / sizeof(iov[0])); + iov_len = + ci_netif_pkt_to_iovec(netif, pkt, iov, sizeof(iov) / sizeof(iov[0])); /* CTPIO only NICs always claim to be able to do CTPIO, so the only * things that might stop them are packets that are split over multiple * buffers, which should be prevented by the declared MTU and indirect * packets, which aren't used with this NIC type. */ - if( netif->state->nic[pkt->intf_i].oo_vi_flags & OO_VI_FLAGS_TX_CTPIO_ONLY ) { + if( netif->state->nic[pkt->intf_i].oo_vi_flags & + OO_VI_FLAGS_TX_CTPIO_ONLY ) { ci_assert_gt(iov_len, 0); ci_assert_le(iov_len, CI_IP_PKT_SEGMENTS_MAX); ci_assert(is_to_primary_vi(pkt)); @@ -344,14 +343,19 @@ void __ci_netif_send(ci_netif* netif, ci_ip_pkt_fmt* pkt) ci_netif_may_ctpio(netif, intf_i, pkt->pay_len) && is_to_primary_vi(pkt) ) { rc = tx_ctpio(netif, intf_i, vi, pkt, iov, iov_len); - } - else + } else { +#else + { /* this is some real garbage */ #endif - if( (rc = ef_vi_transmitv(vi, iov, iov_len, OO_PKT_ID(pkt))) == 0 ) { - /* After a DMA send, stop attempting CTPIO sends until the TXQ has - * drained. */ - ci_netif_ctpio_desist(netif, intf_i); - CITP_STATS_NETIF_INC(netif, tx_dma_doorbells); +#ifndef __KERNEL__ + ef_fill_tx_data(vi, PKT_START(pkt), pkt->buf_len); +#endif + if( (rc = ef_vi_transmitv(vi, iov, iov_len, OO_PKT_ID(pkt))) == 0 ) { + /* After a DMA send, stop attempting CTPIO sends until the TXQ has + * drained. */ + ci_netif_ctpio_desist(netif, intf_i); + CITP_STATS_NETIF_INC(netif, tx_dma_doorbells); + } } if( rc == 0 ) { LOG_AT(ci_analyse_pkt(oo_ether_hdr(pkt), pkt->buf_len)); @@ -366,7 +370,7 @@ void __ci_netif_send(ci_netif* netif, ci_ip_pkt_fmt* pkt) LOG_NT(log("%s: ENQ id=%d", __FUNCTION__, OO_PKT_FMT(pkt))); __ci_netif_dmaq_put(netif, dmaq, pkt); - done: +done: /* Poll every now and then to ensure we keep up with completions. If we * don't do this then we can ignore completions for so long that we start @@ -395,8 +399,8 @@ void __ci_netif_send(ci_netif* netif, ci_ip_pkt_fmt* pkt) * low-level function used by VIs which are used for communicating with * plugins, where the caller typically has their own reliability policy and * hence they don't want automatic handling of it behind the scenes. */ -bool ci_netif_send_immediate(ci_netif* netif, ci_ip_pkt_fmt* pkt, - const struct ef_vi_tx_extra* extra) +bool ci_netif_send_immediate( + ci_netif* netif, ci_ip_pkt_fmt* pkt, const struct ef_vi_tx_extra* extra) { int intf_i; ef_vi* vi; @@ -410,36 +414,38 @@ bool ci_netif_send_immediate(ci_netif* netif, ci_ip_pkt_fmt* pkt, ci_assert_flags(pkt->flags, CI_PKT_FLAG_TX_PENDING); ci_assert_nflags(pkt->flags, CI_PKT_FLAG_INDIRECT); - LOG_NT(log("%s: [%d] id=%d nseg=%d 0:["EF_ADDR_FMT":%d] dhost=" - CI_MAC_PRINTF_FORMAT, __FUNCTION__, NI_ID(netif), - OO_PKT_FMT(pkt), pkt->n_buffers, - pkt_dma_addr(netif, pkt, pkt->intf_i), - pkt->buf_len, CI_MAC_PRINTF_ARGS(oo_ether_dhost(pkt)))); + LOG_NT(log("%s: [%d] id=%d nseg=%d 0:[" EF_ADDR_FMT + ":%d] dhost=" CI_MAC_PRINTF_FORMAT, + __FUNCTION__, NI_ID(netif), OO_PKT_FMT(pkt), pkt->n_buffers, + pkt_dma_addr(netif, pkt, pkt->intf_i), pkt->buf_len, + CI_MAC_PRINTF_ARGS(oo_ether_dhost(pkt)))); intf_i = pkt->intf_i; ci_assert_lt(pkt->q_id, CI_MAX_VIS_PER_INTF); vi = &netif->nic_hw[intf_i].vis[pkt_q_id(pkt)]; - iov_len = ci_netif_pkt_to_iovec(netif, pkt, iov, - sizeof(iov) / sizeof(iov[0])); + iov_len = + ci_netif_pkt_to_iovec(netif, pkt, iov, sizeof(iov) / sizeof(iov[0])); if( extra ) { int i; ef_remote_iovec riov[CI_IP_PKT_SEGMENTS_MAX]; - for( i = 0; i < iov_len; ++i) { - riov[i] = (ef_remote_iovec){ + for( i = 0; i < iov_len; ++i ) { + riov[i] = (ef_remote_iovec) { .iov_base = iov[i].iov_base, .iov_len = iov[i].iov_len, .flags = 0, .addrspace = EF_ADDRSPACE_LOCAL, }; } - if( ef_vi_transmitv_init_extra(vi, extra, riov, iov_len, - OO_PKT_ID(pkt)) != 0 ) + if( ef_vi_transmitv_init_extra(vi, extra, riov, iov_len, OO_PKT_ID(pkt)) != + 0 ) return false; ef_vi_transmit_push(vi); - } - else { + } else { +#ifndef __KERNEL__ + ef_fill_tx_data(vi, PKT_START(pkt), pkt->buf_len); +#endif if( ef_vi_transmitv(vi, iov, iov_len, OO_PKT_ID(pkt)) != 0 ) return false; } diff --git a/src/lib/transport/ip/tcp_close.c b/src/lib/transport/ip/tcp_close.c index b2c82d0e4..ad45d7717 100644 --- a/src/lib/transport/ip/tcp_close.c +++ b/src/lib/transport/ip/tcp_close.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2004-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr/ctk ** \brief TCP connection routines: @@ -16,7 +16,7 @@ #include "ip_internal.h" #ifdef __KERNEL__ -# include +#include #endif #include #include @@ -30,7 +30,7 @@ void __ci_tcp_listen_to_normal(ci_netif* netif, ci_tcp_socket_listen* tls) { citp_waitable_obj* wo = SOCK_TO_WAITABLE_OBJ(&tls->s); - ci_tcp_state* ts = &wo->tcp; + ci_tcp_state* ts = &wo->tcp; #if CI_CFG_IPV6 int af = ipcache_af(&ts->s.pkt); #endif @@ -41,8 +41,8 @@ void __ci_tcp_listen_to_normal(ci_netif* netif, ci_tcp_socket_listen* tls) ci_ip_timer_clear(netif, &tls->listenq_tid); - oo_p_dllink_del_init(netif, oo_p_dllink_sb(netif, &tls->s.b, - &tls->s.b.post_poll_link)); + oo_p_dllink_del_init( + netif, oo_p_dllink_sb(netif, &tls->s.b, &tls->s.b.post_poll_link)); #if CI_CFG_IPV6 ts->s.cp.laddr = ip4_addr_any; @@ -66,26 +66,89 @@ void __ci_tcp_listen_to_normal(ci_netif* netif, ci_tcp_socket_listen* tls) #endif } +#ifndef __KERNEL__ +/* This function is a temporary thing to at least attempt to make sure + * conncected sockets receive a last ACK */ +int ci_tcp_fin_added(ci_tcp_state* ts, ci_netif* netif) +{ + ci_assert(ci_netif_is_locked(netif)); + + if( ts->s.b.state == CI_TCP_FIN_WAIT1 ) { + ci_uint64 start_frc, now_frc, schedule_frc; + ci_uint32 timeout = ts->s.so.sndtimeo_msec; + ci_uint64 max_spin = ts->s.b.spin_cycles; + int rc = 0; + citp_signal_info* si = citp_signal_get_specific_inited(); + + if( ts->s.so.sndtimeo_msec ) { + ci_uint64 max_so_spin = + (ci_uint64) ts->s.so.sndtimeo_msec * IPTIMER_STATE(netif)->khz; + if( max_so_spin <= max_spin ) { + max_spin = max_so_spin; + } + } + + ci_frc64(&start_frc); + schedule_frc = start_frc; + now_frc = start_frc; + do { + if( ci_netif_may_poll(netif) ) { + if( ci_netif_need_poll_spinning(netif, now_frc) ) { + ci_netif_poll(netif); + } else if( ! netif->state->is_spinner ) + netif->state->is_spinner = 1; + } + + if( ts->s.b.state != CI_TCP_FIN_WAIT1 ) { + netif->state->is_spinner = 0; + return 0; + } + + ci_frc64(&now_frc); + rc = OO_SPINLOOP_PAUSE_CHECK_SIGNALS( + netif, now_frc, &schedule_frc, ts->s.so.sndtimeo_msec, NULL, si); + if( rc != 0 ) { + netif->state->is_spinner = 0; + } + } while( now_frc - start_frc < max_spin ); + + netif->state->is_spinner = 0; + + if( timeout ) { + ci_uint32 spin_ms = (now_frc - start_frc) / IPTIMER_STATE(netif)->khz; + if( spin_ms < timeout ) + timeout -= spin_ms; + else { + if( ts->s.b.state == CI_TCP_FIN_WAIT1 ) + return -EAGAIN; + } + } + } + + return 0; +} +#endif + int ci_tcp_add_fin(ci_tcp_state* ts, ci_netif* netif) { ci_ip_pkt_queue* sendq = &ts->send; - ci_ip_pkt_fmt* pkt; - ci_tcp_hdr *tcp_hdr; + ci_ip_pkt_fmt* pkt; + ci_tcp_hdr* tcp_hdr; ci_assert(ci_netif_is_locked(netif)); LOG_TC(log(FNTS_FMT "sendq_num=%d cork=%d", FNTS_PRI_ARGS(netif, ts), - sendq->num, !!(ts->s.s_aflags & CI_SOCK_AFLAG_CORK))); + sendq->num, ! ! (ts->s.s_aflags & CI_SOCK_AFLAG_CORK))); if( sendq->num ) { /* Bang the fin on the end of the send queue. */ - pkt = PKT_CHK(netif, sendq->tail); + pkt = PKT_CHK(netif, sendq->tail); tcp_hdr = TX_PKT_IPX_TCP(ipcache_af(&ts->s.pkt), pkt); tcp_hdr->tcp_flags |= CI_TCP_FLAG_FIN | CI_TCP_FLAG_PSH; tcp_enq_nxt(ts) += 1; pkt->pf.tcp_tx.end_seq = tcp_enq_nxt(ts); - pkt->flags &=~ CI_PKT_FLAG_TX_MORE; + pkt->flags &= ~CI_PKT_FLAG_TX_MORE; ci_tcp_tx_advance(ts, netif); return 0; } @@ -98,10 +161,9 @@ int ci_tcp_add_fin(ci_tcp_state* ts, ci_netif* netif) if( pkt ) { ci_tcp_enqueue_no_data(ts, netif, pkt); - } - else { - LOG_U(log(LNTS_FMT "%s: out of pkt bufs", - LNTS_PRI_ARGS(netif, ts), __FUNCTION__)); + } else { + LOG_U(log(LNTS_FMT "%s: out of pkt bufs", LNTS_PRI_ARGS(netif, ts), + __FUNCTION__)); CITP_STATS_NETIF_INC(netif, tcp_cant_fin); return -ENOBUFS; } @@ -135,18 +197,18 @@ int __ci_tcp_shutdown(ci_netif* netif, ci_tcp_state* ts, int how) */ if( ts->s.b.state & CI_TCP_STATE_NOT_CONNECTED ) { switch( ts->s.b.state ) { - case CI_TCP_SYN_SENT: - ci_tcp_drop(netif, ts, ECONNRESET); - ts->s.rx_errno = ENOTCONN; - return 0; - - case CI_TCP_CLOSING: - case CI_TCP_LAST_ACK: - /* already shut down */ - return 0; - - default: - return -ENOTCONN; + case CI_TCP_SYN_SENT: + ci_tcp_drop(netif, ts, ECONNRESET); + ts->s.rx_errno = ENOTCONN; + return 0; + + case CI_TCP_CLOSING: + case CI_TCP_LAST_ACK: + /* already shut down */ + return 0; + + default: + return -ENOTCONN; } } @@ -175,8 +237,7 @@ int __ci_tcp_shutdown(ci_netif* netif, ci_tcp_state* ts, int how) /* peer is going to TIME_WAIT and * ISN for next connection needs to be recorded */ ci_tcp_prev_seq_remember(netif, ts); - } - else { + } else { ci_tcp_set_slow_state(netif, ts, CI_TCP_FIN_WAIT1); } /* if not tied to an fd, make sure we leave this state at some point */ @@ -203,9 +264,14 @@ int __ci_tcp_shutdown(ci_netif* netif, ci_tcp_state* ts, int how) ci_tcp_rto_set(netif, ts); } - ci_tcp_wake_not_in_poll(netif, ts, - CI_SB_FLAG_WAKE_TX | - (how == SHUT_RDWR ? CI_SB_FLAG_WAKE_RX : 0)); +#ifndef __KERNEL__ + if( rc == 0 ) { + if( (rc = ci_tcp_fin_added(ts, netif)) != 0 ) { + LOG_TC(ci_log("Unable to poll for final FIN ACK while shutting down")); + } + } +#endif + return 0; } @@ -218,15 +284,15 @@ int __ci_tcp_shutdown(ci_netif* netif, ci_tcp_state* ts, int how) /* Closes a cached fd. In the typical case, this boils down to sys_close. */ static void uncache_fd(ci_netif* ni, ci_tcp_state* ts) { - int fd = ts->cached_on_fd; - int pid = ts->cached_on_pid; + int fd = ts->cached_on_fd; + int pid = ts->cached_on_pid; int cur_tgid = task_tgid_vnr(current); - LOG_EP(ci_log("Uncaching fd %d on pid %d running pid %d:%s", fd, - pid, cur_tgid, current->comm)); + LOG_EP(ci_log("Uncaching fd %d on pid %d running pid %d:%s", fd, pid, + cur_tgid, current->comm)); /* No tasklets or other bottom-halves - we always have "current" */ ci_assert(current); - if( !(ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE_NO_FD) && - (~current->flags & PF_EXITING) ) { + if( ! (ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE_NO_FD) && + (~current->flags & PF_EXITING) ) { /* If the process is exiting, there is nothing to do. * Otherwise, we try to close fd. */ /* There is peril here. We don't currently have the NO_FD flag set, so @@ -249,39 +315,34 @@ static void uncache_fd(ci_netif* ni, ci_tcp_state* ts) if( current->files != NULL ) { if( pid != cur_tgid ) { NI_LOG(ni, RESOURCE_WARNINGS, - "%s: pid mismatch: cached_on_pid=%d current=%d:%s", __func__, - pid, cur_tgid, current->comm); - } - else if( (filp = fget(fd)) == NULL ) { + "%s: pid mismatch: cached_on_pid=%d current=%d:%s", __func__, pid, + cur_tgid, current->comm); + } else if( (filp = fget(fd)) == NULL ) { NI_LOG(ni, RESOURCE_WARNINGS, - "%s: pid %d does not has cached file under fd=%d", - __func__, fd, pid); - } - else if( filp->f_op != &linux_tcp_helper_fops_tcp ) { + "%s: pid %d does not has cached file under fd=%d", __func__, fd, + pid); + } else if( filp->f_op != &linux_tcp_helper_fops_tcp ) { NI_LOG(ni, RESOURCE_WARNINGS, - "%s: pid %d has unexpected file under fd=%d", - __func__, fd, pid); + "%s: pid %d has unexpected file under fd=%d", __func__, fd, pid); fput(filp); - } - else { + } else { fput(filp); efab_linux_sys_close(fd); } - } - else { + } else { /* This should not happen, as uncache_fd() must not be deferred. */ ci_log("%s: called from workqueue - cannot close file descriptor %d.", - __func__, fd); + __func__, fd); ci_assert(0); } } } -ci_inline void clear_cached_state(ci_tcp_state *ts) +ci_inline void clear_cached_state(ci_tcp_state* ts) { ci_atomic32_and(&ts->s.b.sb_aflags, - ~(CI_SB_AFLAG_IN_CACHE|CI_SB_AFLAG_IN_PASSIVE_CACHE)); - ts->cached_on_fd = -1; + ~(CI_SB_AFLAG_IN_CACHE | CI_SB_AFLAG_IN_PASSIVE_CACHE)); + ts->cached_on_fd = -1; ts->cached_on_pid = -1; } @@ -301,14 +362,14 @@ ci_inline void clear_cached_state(ci_tcp_state *ts) * sockets on the listening socket. Else removes the cached sockets * on the stack a.k.a. active caching. */ -static void uncache_ep(ci_netif *netif, ci_tcp_socket_listen* tls, - ci_tcp_state *ts) +static void uncache_ep( + ci_netif* netif, ci_tcp_socket_listen* tls, ci_tcp_state* ts) { LOG_EP(ci_log("Uncaching ep %d", S_FMT(ts))); - ci_assert( ci_tcp_is_cached(ts) ); + ci_assert(ci_tcp_is_cached(ts)); - oo_p_dllink_del_init(netif, oo_p_dllink_sb(netif, &ts->s.b, - &ts->epcache_link)); + oo_p_dllink_del_init( + netif, oo_p_dllink_sb(netif, &ts->s.b, &ts->epcache_link)); /* EPs on the cached list have hw filters present, even though notionally * they are 'freed'. So we clear filters here. Note that we leave the @@ -318,13 +379,13 @@ static void uncache_ep(ci_netif *netif, ci_tcp_socket_listen* tls, * pending or accept-q, because it will be in the closed state if and only * if it is on the cache list. */ - if (ts->s.b.state == CI_TCP_CLOSED) + if( ts->s.b.state == CI_TCP_CLOSED ) ci_tcp_ep_clear_filters(netif, S_SP(ts), 0); /* After we clear CI_SB_AFLAG_IN_CACHE flag, we are not * ci_tcp_is_timeout_orphan() any more. Do not confuse other * parts of code: get out from timewait list. */ - if ( ci_tcp_is_timeout_orphan(ts) ) + if( ci_tcp_is_timeout_orphan(ts) ) ci_netif_timeout_remove(netif, ts); clear_cached_state(ts); @@ -339,27 +400,26 @@ static void uncache_ep(ci_netif *netif, ci_tcp_socket_listen* tls, * control this using [CI_SB_AFLAG_IN_CACHE_NO_FD_BIT]. */ /* Fixme: what about timewait? */ if( ci_bit_test_and_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_IN_CACHE_NO_FD_BIT) ) - efab_tcp_helper_close_endpoint(netif2tcp_helper_resource(netif), S_SP(ts), 0); + efab_tcp_helper_close_endpoint( + netif2tcp_helper_resource(netif), S_SP(ts), 0); if( tls ) { /* increase per socket counter even if passive cache is shared */ - ci_atomic32_inc((volatile ci_uint32*) - CI_NETIF_PTR(netif, tls->epcache.avail_stack)); + ci_atomic32_inc( + (volatile ci_uint32*) CI_NETIF_PTR(netif, tls->epcache.avail_stack)); ci_atomic32_inc(&tls->cache_avail_sock); ci_assert_le(netif->state->passive_cache_avail_stack, - netif->state->opts.sock_cache_max); + netif->state->opts.sock_cache_max); if( ~NI_OPTS(netif).scalable_filter_mode & CITP_SCALABLE_MODE_PASSIVE ) - ci_assert_le(tls->cache_avail_sock, - netif->state->opts.per_sock_cache_max); - } - else if( ts->s.s_flags & CI_SOCK_FLAG_SCALPASSIVE ) { + ci_assert_le( + tls->cache_avail_sock, netif->state->opts.per_sock_cache_max); + } else if( ts->s.s_flags & CI_SOCK_FLAG_SCALPASSIVE ) { ci_atomic32_inc(&netif->state->passive_cache_avail_stack); /* we do not know tls, so per-socket statistic cannot be updated */ ci_assert_le(netif->state->passive_cache_avail_stack, - netif->state->opts.sock_cache_max); - } - else { + netif->state->opts.sock_cache_max); + } else { ci_netif_state* ns = netif->state; ci_atomic32_inc(&ns->active_cache_avail_stack); ci_assert_le(ns->active_cache_avail_stack, ns->opts.sock_cache_max); @@ -371,13 +431,13 @@ static void uncache_ep(ci_netif *netif, ci_tcp_socket_listen* tls, * sockets on the listening socket. Else removes the cached sockets * on the stack a.k.a. active caching. */ -static void -uncache_ep_list(ci_netif *netif, ci_tcp_socket_listen* tls, - struct oo_p_dllink_state thelist) +static void uncache_ep_list(ci_netif* netif, ci_tcp_socket_listen* tls, + struct oo_p_dllink_state thelist) { struct oo_p_dllink_state l, tmp; ci_assert(ci_netif_is_locked(netif)); - oo_p_dllink_for_each_safe(netif, l, tmp, thelist) { + oo_p_dllink_for_each_safe(netif, l, tmp, thelist) + { ci_tcp_state* cached_state = CI_CONTAINER(ci_tcp_state, epcache_link, l.l); ci_assert(cached_state); ci_assert(ci_tcp_is_cached(cached_state)); @@ -396,8 +456,8 @@ uncache_ep_list(ci_netif *netif, ci_tcp_socket_listen* tls, void ci_tcp_listen_uncache_fds(ci_netif* netif, ci_tcp_socket_listen* tls) { /* For scalable passive there will be nothing to do here */ - struct oo_p_dllink_state fd_states = oo_p_dllink_sb(netif, &tls->s.b, - &tls->epcache.fd_states); + struct oo_p_dllink_state fd_states = + oo_p_dllink_sb(netif, &tls->s.b, &tls->epcache.fd_states); struct oo_p_dllink_state l; l = oo_p_dllink_statep(netif, ci_xchg32(&fd_states.l->next, OO_P_NULL)); @@ -405,8 +465,8 @@ void ci_tcp_listen_uncache_fds(ci_netif* netif, ci_tcp_socket_listen* tls) if( tls->s.s_flags & CI_SOCK_FLAG_SCALPASSIVE ) return; while( l.p != fd_states.p ) { - ci_tcp_state* cached_state = CI_CONTAINER(ci_tcp_state, - epcache_fd_link, l.l); + ci_tcp_state* cached_state = + CI_CONTAINER(ci_tcp_state, epcache_fd_link, l.l); l = oo_p_dllink_statep(netif, l.l->next); /* We don't free up cached state directly. We call uncache_fd(), which @@ -431,14 +491,14 @@ void ci_tcp_epcache_drop_cache(ci_netif* ni) for( id = 0; id < ni->state->n_ep_bufs; ++id ) { citp_waitable_obj* wo = ID_TO_WAITABLE_OBJ(ni, id); if( wo->waitable.state == CI_TCP_LISTEN ) { - citp_waitable* w = &wo->waitable; - ci_sock_cmn* s = CI_CONTAINER(ci_sock_cmn, b, w); + citp_waitable* w = &wo->waitable; + ci_sock_cmn* s = CI_CONTAINER(ci_sock_cmn, b, w); ci_tcp_socket_listen* tls = SOCK_TO_TCP_LISTEN(s); ci_tcp_listen_uncache_fds(ni, tls); - uncache_ep_list(ni, tls, - oo_p_dllink_sb(ni, &tls->s.b, &tls->epcache.pending)); - uncache_ep_list(ni, tls, - oo_p_dllink_sb(ni, &tls->s.b, &tls->epcache.cache)); + uncache_ep_list( + ni, tls, oo_p_dllink_sb(ni, &tls->s.b, &tls->epcache.pending)); + uncache_ep_list( + ni, tls, oo_p_dllink_sb(ni, &tls->s.b, &tls->epcache.cache)); } } } @@ -448,10 +508,8 @@ void ci_tcp_active_cache_drop_cache(ci_netif* ni) { ci_netif_state* ns = ni->state; ci_assert(ci_netif_is_locked(ni)); - uncache_ep_list(ni, NULL, - oo_p_dllink_ptr(ni, &ns->active_cache.pending)); - uncache_ep_list(ni, NULL, - oo_p_dllink_ptr(ni, &ns->active_cache.cache)); + uncache_ep_list(ni, NULL, oo_p_dllink_ptr(ni, &ns->active_cache.pending)); + uncache_ep_list(ni, NULL, oo_p_dllink_ptr(ni, &ns->active_cache.cache)); } @@ -459,10 +517,10 @@ void ci_tcp_passive_scalable_cache_drop_cache(ci_netif* ni) { ci_netif_state* ns = ni->state; ci_assert(ci_netif_is_locked(ni)); - uncache_ep_list(ni, NULL, - oo_p_dllink_ptr(ni, &ns->passive_scalable_cache.pending)); - uncache_ep_list(ni, NULL, - oo_p_dllink_ptr(ni, &ns->passive_scalable_cache.cache)); + uncache_ep_list( + ni, NULL, oo_p_dllink_ptr(ni, &ns->passive_scalable_cache.pending)); + uncache_ep_list( + ni, NULL, oo_p_dllink_ptr(ni, &ns->passive_scalable_cache.cache)); } #endif @@ -474,9 +532,10 @@ void ci_tcp_passive_scalable_cache_drop_cache(ci_netif* ni) #ifndef __KERNEL__ static #endif -/* Wait for SO_LINGER timeout (or ACKed send queue). - * Starts with the stack locked, exits with the stack unlocked. */ -void ci_tcp_linger(ci_netif* ni, ci_tcp_state* ts) + /* Wait for SO_LINGER timeout (or ACKed send queue). + * Starts with the stack locked, exits with the stack unlocked. */ + void + ci_tcp_linger(ci_netif* ni, ci_tcp_state* ts) { /* This is called at user-level when a socket is closed if linger is ** enabled and has a timeout, and there is TX data outstanding. @@ -485,11 +544,11 @@ void ci_tcp_linger(ci_netif* ni, ci_tcp_state* ts) ** until timeout. */ ci_uint64 sleep_seq; - int rc = 0; + int rc = 0; ci_uint32 timeout = ts->s.so.linger * 1000; - int flags = CI_SLEEP_NETIF_LOCKED; + int flags = CI_SLEEP_NETIF_LOCKED; - LOG_TC(log("%s: "NTS_FMT, __FUNCTION__, NTS_PRI_ARGS(ni, ts))); + LOG_TC(log("%s: " NTS_FMT, __FUNCTION__, NTS_PRI_ARGS(ni, ts))); ci_assert(ci_netif_is_locked(ni)); #ifndef __KERNEL__ @@ -504,8 +563,8 @@ void ci_tcp_linger(ci_netif* ni, ci_tcp_state* ts) if( SEQ_EQ(tcp_enq_nxt(ts), tcp_snd_una(ts)) ) { break; } - rc = ci_sock_sleep(ni, &ts->s.b, CI_SB_FLAG_WAKE_TX, flags, - sleep_seq, &timeout); + rc = ci_sock_sleep( + ni, &ts->s.b, CI_SB_FLAG_WAKE_TX, flags, sleep_seq, &timeout); flags = 0; if( rc ) break; @@ -519,7 +578,8 @@ void ci_tcp_linger(ci_netif* ni, ci_tcp_state* ts) #if defined(__KERNEL__) static #endif -int ci_tcp_close(ci_netif* netif, ci_tcp_state* ts) + int + ci_tcp_close(ci_netif* netif, ci_tcp_state* ts) { ci_assert(netif); ci_assert(ts); @@ -539,8 +599,8 @@ int ci_tcp_close(ci_netif* netif, ci_tcp_state* ts) goto drop; #if CI_CFG_TCP_OFFLOAD_RECYCLER - oo_p_dllink_del_init(netif, oo_p_dllink_sb(netif, &ts->s.b, - &ts->recycle_link)); + oo_p_dllink_del_init( + netif, oo_p_dllink_sb(netif, &ts->s.b, &ts->recycle_link)); #endif if( tcp_rcv_usr(ts) != 0 ) { @@ -548,9 +608,10 @@ int ci_tcp_close(ci_netif* netif, ci_tcp_state* ts) * connection if all rx data not read. */ CI_TCP_EXT_STATS_INC_TCP_ABORT_ON_CLOSE(netif); - LOG_TV(log(LPF "%d CLOSE sent RST, as rx data present added %u " - "delivered %u tcp_rcv_usr=%u", S_FMT(ts), ts->rcv_added, - ts->rcv_delivered, tcp_rcv_usr(ts))); + LOG_TV( + log(LPF "%d CLOSE sent RST, as rx data present added %u " + "delivered %u tcp_rcv_usr=%u", + S_FMT(ts), ts->rcv_added, ts->rcv_delivered, tcp_rcv_usr(ts))); ci_tcp_send_rst(netif, ts); goto drop; } @@ -576,12 +637,11 @@ int ci_tcp_close(ci_netif* netif, ci_tcp_state* ts) goto drop; } - if( CI_UNLIKELY( ts->tcpflags & CI_TCPT_FLAG_FIN_PENDING ) ) + if( CI_UNLIKELY(ts->tcpflags & CI_TCPT_FLAG_FIN_PENDING) ) ci_tcp_resend_fin(ts, netif); if( (ts->s.b.state == CI_TCP_TIME_WAIT) || - (ts->s.b.state == CI_TCP_CLOSING) || - (ts->s.b.state == CI_TCP_LAST_ACK) ) + (ts->s.b.state == CI_TCP_CLOSING) || (ts->s.b.state == CI_TCP_LAST_ACK) ) return 0; if( ! (ts->s.b.state & CI_TCP_STATE_NOT_CONNECTED) ) { @@ -610,28 +670,27 @@ int ci_tcp_close(ci_netif* netif, ci_tcp_state* ts) #ifndef __KERNEL__ /* Socket caching + SO_LINGER. In-kernel case is handled in * efab_tcp_helper_close_endpoint() */ - else if( - (ts->s.s_flags & CI_SOCK_FLAG_LINGER) && + else if( (ts->s.s_flags & CI_SOCK_FLAG_LINGER) && ! SEQ_EQ(tcp_enq_nxt(ts), tcp_snd_una(ts)) #if ! CI_CFG_UL_INTERRUPT_HELPER - && (ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE) + && (ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE) #else - /* Fixme: we should guarantee that this is running the right - * process. - */ + /* Fixme: we should guarantee that this is running the right + * process. + */ #endif - ) { - ci_assert(ts->s.so.linger != 0); - ci_tcp_linger(netif, ts); - ci_netif_lock(netif); + ) { + ci_assert(ts->s.so.linger != 0); + ci_tcp_linger(netif, ts); + ci_netif_lock(netif); } #endif return rc; } - drop: - LOG_TC(log(LPF "%d drop connection in %s state", S_FMT(ts), - ci_tcp_state_str(ts->s.b.state))); +drop: + LOG_TC(log(LPF "%d drop connection in %s state", S_FMT(ts), + ci_tcp_state_str(ts->s.b.state))); /* ci_tcp_drop should really drop connection instead of leaking it, * because we can get here only when asyncronyously closing alien * non-accepted connection from listen socket closure. */ @@ -642,7 +701,8 @@ int ci_tcp_close(ci_netif* netif, ci_tcp_state* ts) #endif /* OO_DO_STACK_POLL */ -#if (defined(__KERNEL__) && ! CI_CFG_UL_INTERRUPT_HELPER) || (! defined(__KERNEL__) && CI_CFG_UL_INTERRUPT_HELPER) +#if( defined(__KERNEL__) && ! CI_CFG_UL_INTERRUPT_HELPER ) || \ + (! defined(__KERNEL__) && CI_CFG_UL_INTERRUPT_HELPER) void ci_tcp_listen_shutdown_queues(ci_netif* netif, ci_tcp_socket_listen* tls) { int synrecvs; @@ -652,8 +712,7 @@ void ci_tcp_listen_shutdown_queues(ci_netif* netif, ci_tcp_socket_listen* tls) ci_assert(ci_netif_is_locked(netif)); /* clear up synrecv queue */ - LOG_TV(ci_log("%s: %d clear out synrecv queue", __FUNCTION__, - S_FMT(tls))); + LOG_TV(ci_log("%s: %d clear out synrecv queue", __FUNCTION__, S_FMT(tls))); if( tls->n_listenq != 0 ) ci_ip_timer_clear(netif, &tls->listenq_tid); synrecvs = ci_tcp_listenq_drop_all(netif, tls); @@ -667,20 +726,20 @@ void ci_tcp_listen_shutdown_queues(ci_netif* netif, ci_tcp_socket_listen* tls) ** and free resources associated with sockets on acceptq */ LOG_TV(log("%s: %d clear out accept queue (%d entries)", __FUNCTION__, - S_FMT(tls), ci_tcp_acceptq_n(tls))); + S_FMT(tls), ci_tcp_acceptq_n(tls))); while( ci_tcp_acceptq_not_empty(tls) ) { citp_waitable* w; - ci_tcp_state* ats; /* accepted ts */ + ci_tcp_state* ats; /* accepted ts */ w = ci_tcp_acceptq_get(netif, tls); #if defined(__KERNEL__) && CI_CFG_ENDPOINT_MOVE if( w->sb_aflags & CI_SB_AFLAG_MOVED_AWAY ) { - tcp_helper_resource_t *thr = NULL; - oo_sp sp; - ci_uint32 stack_id; - ci_netif *ani; /* netif of the accepted socket */ + tcp_helper_resource_t* thr = NULL; + oo_sp sp; + ci_uint32 stack_id; + ci_netif* ani; /* netif of the accepted socket */ #ifdef NDEBUG if( in_interrupt() ) { @@ -689,32 +748,35 @@ void ci_tcp_listen_shutdown_queues(ci_netif* netif, ci_tcp_socket_listen* tls) continue; } #else - ci_assert(!in_interrupt()); + ci_assert(! in_interrupt()); #endif - sp = w->moved_to_sock_id; + sp = w->moved_to_sock_id; stack_id = w->moved_to_stack_id; citp_waitable_obj_free(netif, w); /* do not use w any more */ LOG_TV(log("%s: alien socket %d:%d in accept queue %d:%d", __FUNCTION__, - stack_id, OO_SP_FMT(sp), NI_ID(netif), S_FMT(tls))); + stack_id, OO_SP_FMT(sp), NI_ID(netif), S_FMT(tls))); if( efab_thr_table_lookup(NULL, NULL, stack_id, - EFAB_THR_TABLE_LOOKUP_CHECK_USER, - OO_THR_REF_BASE, &thr) != 0 ) { - LOG_U(log("%s: listening socket %d:%d can't find " - "acceptq memeber %d:%d", __FUNCTION__, - netif->state->stack_id, tls->s.b.bufid, stack_id, sp)); + EFAB_THR_TABLE_LOOKUP_CHECK_USER, OO_THR_REF_BASE, &thr) != 0 ) { + LOG_U( + log("%s: listening socket %d:%d can't find " + "acceptq memeber %d:%d", + __FUNCTION__, netif->state->stack_id, tls->s.b.bufid, stack_id, + sp)); continue; } ani = &thr->netif; - if( !(SP_TO_WAITABLE(ani, sp)->state & CI_TCP_STATE_TCP) || + if( ! (SP_TO_WAITABLE(ani, sp)->state & CI_TCP_STATE_TCP) || SP_TO_WAITABLE(ani, sp)->state == CI_TCP_LISTEN ) { - LOG_U(log("%s: listening socket %d:%d has non-TCP " - "acceptq memeber %d:%d", __FUNCTION__, - netif->state->stack_id, tls->s.b.bufid, stack_id, sp)); + LOG_U( + log("%s: listening socket %d:%d has non-TCP " + "acceptq memeber %d:%d", + __FUNCTION__, netif->state->stack_id, tls->s.b.bufid, stack_id, + sp)); oo_thr_ref_drop(thr->ref, OO_THR_REF_BASE); continue; } @@ -731,18 +793,18 @@ void ci_tcp_listen_shutdown_queues(ci_netif* netif, ci_tcp_socket_listen* tls) ats = &CI_CONTAINER(citp_waitable_obj, waitable, w)->tcp; - ci_assert(ci_tcp_is_cached(ats) || - (ats->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); + ci_assert( + ci_tcp_is_cached(ats) || (ats->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); ci_assert(ats->s.b.sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ); #if CI_CFG_FD_CACHING /* We leave the acceptq flag for cached eps - the state free will be * triggered from the close once we've closed the fd. */ - if( !ci_tcp_is_cached(ats) ) + if( ! ci_tcp_is_cached(ats) ) #endif /* Remove acceptq flag to allow state free on drop */ - ci_bit_clear(&ats->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT); + ci_bit_clear(&ats->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT); if( ats->s.b.state != CI_TCP_CLOSED && ats->s.b.state != CI_TCP_TIME_WAIT ) { @@ -754,7 +816,7 @@ void ci_tcp_listen_shutdown_queues(ci_netif* netif, ci_tcp_socket_listen* tls) #if CI_CFG_FD_CACHING if( ci_tcp_is_cached(ats) ) { - LOG_EP(ci_log ("listen_shutdown - uncache from acceptq")); + LOG_EP(ci_log("listen_shutdown - uncache from acceptq")); uncache_ep(netif, tls, ats); /* Remove acceptq flag to allow state free on drop */ @@ -773,11 +835,11 @@ void ci_tcp_listen_shutdown_queues(ci_netif* netif, ci_tcp_socket_listen* tls) */ /* There will be nothing to do here for scalable passive */ LOG_EP(ci_log("listen_shutdown - uncache all on cache list")); - uncache_ep_list(netif, tls, - oo_p_dllink_sb(netif, &tls->s.b, &tls->epcache.cache)); + uncache_ep_list( + netif, tls, oo_p_dllink_sb(netif, &tls->s.b, &tls->epcache.cache)); LOG_EP(ci_log("listen_shutdown - uncache all on pending list")); - uncache_ep_list(netif, tls, - oo_p_dllink_sb(netif, &tls->s.b, &tls->epcache.pending)); + uncache_ep_list( + netif, tls, oo_p_dllink_sb(netif, &tls->s.b, &tls->epcache.pending)); #endif } #endif @@ -785,9 +847,9 @@ void ci_tcp_listen_shutdown_queues(ci_netif* netif, ci_tcp_socket_listen* tls) #if defined(__KERNEL__) && CI_CFG_FD_CACHING void ci_tcp_listen_update_cached(ci_netif* netif, ci_tcp_socket_listen* tls) { - tcp_helper_endpoint_t * cached_ep; + tcp_helper_endpoint_t* cached_ep; struct oo_p_dllink_state list, link, tmp; - ci_tcp_state *cached_state; + ci_tcp_state* cached_state; /* Before we clear our filters we must update the filters for any connected * sockets which were accepted from our cache. We do not update the filter @@ -798,7 +860,8 @@ void ci_tcp_listen_update_cached(ci_netif* netif, ci_tcp_socket_listen* tls) */ list = oo_p_dllink_sb(netif, &tls->s.b, &tls->epcache_connected); - oo_p_dllink_for_each_safe(netif, link, tmp, list) { + oo_p_dllink_for_each_safe(netif, link, tmp, list) + { cached_state = CI_CONTAINER(ci_tcp_state, epcache_link, link.l); oo_p_dllink_del_init(netif, link); @@ -816,9 +879,10 @@ void ci_tcp_listen_update_cached(ci_netif* netif, ci_tcp_socket_listen* tls) * shutdown cleanly. */ list = oo_p_dllink_sb(netif, &tls->s.b, &tls->epcache.pending); - oo_p_dllink_for_each(netif, link, list) { + oo_p_dllink_for_each(netif, link, list) + { cached_state = CI_CONTAINER(ci_tcp_state, epcache_link, link.l); - cached_ep = ci_netif_ep_get(netif, cached_state->s.b.bufid); + cached_ep = ci_netif_ep_get(netif, cached_state->s.b.bufid); tcp_helper_endpoint_update_filter_details(cached_ep); } @@ -847,31 +911,31 @@ void __ci_tcp_listen_shutdown(ci_netif* netif, ci_tcp_socket_listen* tls) * OS. */ if( ! (tls->s.s_flags & CI_SOCK_FLAG_PORT_BOUND) ) tls->s.s_flags &= ~CI_SOCK_FLAG_BOUND; - /* Shutdown the OS socket and clear out the filters. */ -# ifdef __KERNEL__ - rc = tcp_helper_endpoint_shutdown(netif2tcp_helper_resource(netif), - S_SP(tls), SHUT_RDWR, CI_TCP_LISTEN); + /* Shutdown the OS socket and clear out the filters. */ +#ifdef __KERNEL__ + rc = tcp_helper_endpoint_shutdown( + netif2tcp_helper_resource(netif), S_SP(tls), SHUT_RDWR, CI_TCP_LISTEN); if( rc == -EINVAL ) /* This means there is no O/S socket. This is expected when socket has * been closed, as the O/S socket has already been shutdown and * released. */ rc = 0; -# else - rc = ci_tcp_helper_endpoint_shutdown(netif, S_SP(tls), - SHUT_RDWR, CI_TCP_LISTEN); -# endif +#else + rc = ci_tcp_helper_endpoint_shutdown( + netif, S_SP(tls), SHUT_RDWR, CI_TCP_LISTEN); +#endif #if CI_CFG_UL_INTERRUPT_HELPER if( rc == -1 && errno == EINVAL ) { /* See the comment above - the socket is already closed */ - rc = 0; + rc = 0; errno = saved_errno; } ci_tcp_listen_shutdown_queues(netif, tls); #endif if( rc < 0 ) - LOG_E(ci_log("%s: [%d:%d] shutdown(os_sock) failed %d", - __FUNCTION__, NI_ID(netif), S_FMT(tls), rc)); + LOG_E(ci_log("%s: [%d:%d] shutdown(os_sock) failed %d", __FUNCTION__, + NI_ID(netif), S_FMT(tls), rc)); } @@ -889,19 +953,19 @@ void ci_tcp_all_fds_gone_common(ci_netif* ni, ci_tcp_state* ts) /* If we are in a state where we time out orphaned connections: */ if( (ts->s.b.state & CI_TCP_STATE_TIMEOUT_ORPHAN) && - !(ts->s.b.sb_flags & CI_SB_FLAG_MOVED) ) + ! (ts->s.b.sb_flags & CI_SB_FLAG_MOVED) ) ci_netif_fin_timeout_enter(ni, ts); /* Orphaned sockets do not need keepalive */ if( ts->s.s_flags & CI_SOCK_FLAG_KALIVE ) { - ts->s.s_flags &=~ CI_SOCK_FLAG_KALIVE; + ts->s.s_flags &= ~CI_SOCK_FLAG_KALIVE; ci_tcp_kalive_check_and_clear(ni, ts); ts->ka_probes = 0; } } -void ci_tcp_listen_all_fds_gone(ci_netif* ni, ci_tcp_socket_listen* tls, - int do_free) +void ci_tcp_listen_all_fds_gone( + ci_netif* ni, ci_tcp_socket_listen* tls, int do_free) { /* All process references to this socket have gone. So we should * shutdown() if necessary, and arrange for all resources to eventually diff --git a/src/lib/transport/ip/tcp_connect.c b/src/lib/transport/ip/tcp_connect.c index 8c962edfc..688975a9a 100644 --- a/src/lib/transport/ip/tcp_connect.c +++ b/src/lib/transport/ip/tcp_connect.c @@ -2,7 +2,7 @@ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr/ctk ** \brief TCP connection routines: @@ -12,9 +12,9 @@ ** *//* \**************************************************************************/ - + /*! \cidoxg_lib_transport_ip */ - + #include "ip_internal.h" #include #include @@ -29,56 +29,56 @@ #define LPF "tcp_connect: " - - -#if !defined(__KERNEL__) || CI_CFG_ENDPOINT_MOVE +#if ! defined(__KERNEL__) || CI_CFG_ENDPOINT_MOVE /* TCP connect() implementation is not needed in-kernel except for loopback */ #ifndef __KERNEL__ /*! * Tests for valid sockaddr & sockaddr length & AF_INET or AF_INET6. */ -static int ci_tcp_validate_sa( sa_family_t domain, - const struct sockaddr* sa, socklen_t sa_len ) +static int ci_tcp_validate_sa( + sa_family_t domain, const struct sockaddr* sa, socklen_t sa_len) { /* * Linux deviates from documented behaviour here; * On Linux we return EINVAL if sa and sa_len are NULL and 0 respectively, * and we return EFAULT if sa is NULL and sa_len != 0.... */ - if( !sa ) { - LOG_U(ci_log(LPF "invalid sockaddr : sa = %lx, sa_len = %d", - (long) sa, sa_len)); + if( ! sa ) { + LOG_U(ci_log( + LPF "invalid sockaddr : sa = %lx, sa_len = %d", (long) sa, sa_len)); if( sa_len == 0 ) - RET_WITH_ERRNO( EINVAL ); + RET_WITH_ERRNO(EINVAL); else - RET_WITH_ERRNO( EFAULT ); + RET_WITH_ERRNO(EFAULT); } - if( sa_len < sizeof(struct sockaddr_in) + if( sa_len < sizeof(struct sockaddr_in) #if CI_CFG_FAKE_IPV6 || (domain == AF_INET6 && sa_len < SIN6_LEN_RFC2133) #endif - ) { - LOG_U( ci_log(LPF "struct too short to be sockaddr_in(6)" )); - RET_WITH_ERRNO( EINVAL ); + ) { + LOG_U(ci_log(LPF "struct too short to be sockaddr_in(6)")); + RET_WITH_ERRNO(EINVAL); } /* It should be sa->sa_family, but MS wdm does not understand it, * so let's use CI_SIN(sa)->sin_family. */ - if (CI_SIN(sa)->sin_family != domain && - CI_SIN(sa)->sin_family != AF_UNSPEC) { - LOG_U(ci_log(LPF "address family %d does not match " - "with socket domain %d", CI_SIN(sa)->sin_family, domain)); + if( CI_SIN(sa)->sin_family != domain && + CI_SIN(sa)->sin_family != AF_UNSPEC ) { + LOG_U( + ci_log(LPF "address family %d does not match " + "with socket domain %d", + CI_SIN(sa)->sin_family, domain)); RET_WITH_ERRNO(EAFNOSUPPORT); } -#if CI_CFG_FAKE_IPV6 && !CI_CFG_IPV6 - if (sa->sa_family == AF_INET6 && !ci_tcp_ipv6_is_ipv4(sa)) { +#if CI_CFG_FAKE_IPV6 && ! CI_CFG_IPV6 + if( sa->sa_family == AF_INET6 && ! ci_tcp_ipv6_is_ipv4(sa) ) { LOG_TC(ci_log(LPF "Pure IPv6 address is not supported")); RET_WITH_ERRNO(EAFNOSUPPORT); } -#endif +#endif return 0; } #endif @@ -91,19 +91,19 @@ ci_inline void ci_tcp_bind_flags_assert_valid(ci_sock_cmn* s) { if( s->s_flags & CI_SOCK_FLAG_DEFERRED_BIND ) { /* If we deferred the bind we need to know that we should bind later */ - ci_assert( s->s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ); + ci_assert(s->s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND); /* We can only defer bind in cases where the application doesn't bind to * a specific port. */ - ci_assert( s->s_flags & ~CI_SOCK_FLAG_PORT_BOUND ); + ci_assert(s->s_flags & ~CI_SOCK_FLAG_PORT_BOUND); } } #ifndef __ci_driver__ /* Set CI_SOCK_FLAG_BOUND_ALIEN if needed */ -static void ci_tcp_bind_check_laddr(ci_netif *ni, ci_sock_cmn *s, - ci_addr_t addr) +static void ci_tcp_bind_check_laddr( + ci_netif* ni, ci_sock_cmn* s, ci_addr_t addr) { if( (s->s_flags & CI_SOCK_FLAG_TPROXY) || CI_IPX_ADDR_IS_ANY(addr) || cicp_user_addr_is_local_efab(ni, addr) ) @@ -124,9 +124,8 @@ static void ci_tcp_bind_check_laddr(ci_netif *ni, ci_sock_cmn *s, * CI_SOCKET_HANDOVER, Pass to OS, OS bound ok, (no error) * CI_SOCKET_ERROR & errno set */ -static int -__ci_tcp_bind(ci_netif *ni, ci_sock_cmn *s, ci_fd_t fd, - ci_addr_t addr, ci_uint16* port_be16, int may_defer) +static int __ci_tcp_bind(ci_netif* ni, ci_sock_cmn* s, ci_fd_t fd, + ci_addr_t addr, ci_uint16* port_be16, int may_defer) { int rc = 0; ci_uint16 user_port; /* Port number specified by user, not by OS. @@ -134,28 +133,29 @@ __ci_tcp_bind(ci_netif *ni, ci_sock_cmn *s, ci_fd_t fd, union ci_sockaddr_u sa_u; ci_assert(s->domain == AF_INET || s->domain == AF_INET6); - ci_assert( port_be16 ); - ci_assert(s->b.state & CI_TCP_STATE_TCP || - s->b.state == CI_TCP_STATE_ACTIVE_WILD); + ci_assert(port_be16); + ci_assert( + s->b.state & CI_TCP_STATE_TCP || s->b.state == CI_TCP_STATE_ACTIVE_WILD); ci_tcp_bind_flags_assert_valid(s); user_port = *port_be16; - if( !(s->s_flags & CI_SOCK_FLAG_TPROXY) ) { + if( ! (s->s_flags & CI_SOCK_FLAG_TPROXY) ) { #if CI_CFG_TCP_SHARED_LOCAL_PORTS /* In active-wild mode we might not want to bind yet. */ - if( !may_defer || !NI_OPTS(ni).tcp_shared_local_ports || user_port != 0 ) + if( ! may_defer || ! NI_OPTS(ni).tcp_shared_local_ports || user_port != 0 ) #endif { #if CI_CFG_FAKE_IPV6 ci_assert(s->domain == AF_INET || s->domain == AF_INET6); if( s->domain == AF_INET ) ci_make_sockaddr_from_ip4(&sa_u.sin, user_port, addr.ip4); - else if( !CI_IS_ADDR_IP6(addr) ) + else if( ! CI_IS_ADDR_IP6(addr) ) ci_make_sockaddr_in6_from_ip4(&sa_u.sin6, user_port, addr.ip4); #if CI_CFG_IPV6 else { - ci_make_sockaddr_in6_from_ip6(&sa_u.sin6, user_port, (ci_uint32*)addr.ip6); + ci_make_sockaddr_in6_from_ip6( + &sa_u.sin6, user_port, (ci_uint32*) addr.ip6); /* Bind to link-local address requires an interface */ sa_u.sin6.sin6_scope_id = s->cp.so_bindtodevice; } @@ -167,14 +167,13 @@ __ci_tcp_bind(ci_netif *ni, ci_sock_cmn *s, ci_fd_t fd, #ifdef __ci_driver__ rc = efab_tcp_helper_bind_os_sock_kernel(netif2tcp_helper_resource(ni), - SC_SP(s), &sa_u.sa, - sizeof(sa_u), port_be16); + SC_SP(s), &sa_u.sa, sizeof(sa_u), port_be16); #else rc = ci_tcp_helper_bind_os_sock(fd, &sa_u.sa, sizeof(sa_u), port_be16); #endif if( rc == 0 ) - s->s_flags &= ~(CI_SOCK_FLAG_CONNECT_MUST_BIND | - CI_SOCK_FLAG_DEFERRED_BIND); + s->s_flags &= + ~(CI_SOCK_FLAG_CONNECT_MUST_BIND | CI_SOCK_FLAG_DEFERRED_BIND); } #if CI_CFG_TCP_SHARED_LOCAL_PORTS /* We can defer this bind. We need to make an extra check for the socket @@ -184,8 +183,7 @@ __ci_tcp_bind(ci_netif *ni, ci_sock_cmn *s, ci_fd_t fd, ! (s->s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND) ) { /* Already bound. */ CI_SET_ERROR(rc, EINVAL); - } - else { + } else { /* CI_SOCK_FLAG_DEFERRED_BIND is clear, so either we never set it * (meaning nobody called bind()) or we've since cleared it (meaning that * the deferred bind has been performed). Only in the former case are we @@ -200,8 +198,7 @@ __ci_tcp_bind(ci_netif *ni, ci_sock_cmn *s, ci_fd_t fd, rc = 0; } #endif - } - else { + } else { /* CI_SOCK_FLAG_TPROXY is set. We don't use OS backing sockets for these, * and we don't support deferred binds either. */ @@ -209,13 +206,13 @@ __ci_tcp_bind(ci_netif *ni, ci_sock_cmn *s, ci_fd_t fd, s->s_flags &= ~CI_SOCK_FLAG_CONNECT_MUST_BIND; } - /* bug1781: only do this if the earlier bind succeeded. + /* bug1781: only do this if the earlier bind succeeded. * check if we can handle this socket */ if( rc != 0 ) return rc; if( user_port != 0 ) s->s_flags |= CI_SOCK_FLAG_PORT_BOUND; - if( !CI_IPX_ADDR_IS_ANY(addr) ) + if( ! CI_IPX_ADDR_IS_ANY(addr) ) s->cp.sock_cp_flags |= OO_SCP_BOUND_ADDR; ci_tcp_bind_flags_assert_valid(s); @@ -224,17 +221,17 @@ __ci_tcp_bind(ci_netif *ni, ci_sock_cmn *s, ci_fd_t fd, static int /*bool*/ -ci_tcp_connect_check_local_dst_addr(ci_tcp_socket_listen* tls, - ci_addr_t dst_addr) +ci_tcp_connect_check_local_dst_addr( + ci_tcp_socket_listen* tls, ci_addr_t dst_addr) { - if( !CI_IPX_ADDR_IS_ANY(tls->s.laddr) ) + if( ! CI_IPX_ADDR_IS_ANY(tls->s.laddr) ) return CI_IPX_ADDR_EQ(tls->s.laddr, dst_addr); #if CI_CFG_IPV6 else { if( CI_IS_ADDR_IP6(dst_addr) ) return CI_IS_ADDR_IP6(tls->s.laddr); else - return !(tls->s.s_flags & CI_SOCK_FLAG_V6ONLY); + return ! (tls->s.s_flags & CI_SOCK_FLAG_V6ONLY); } #endif @@ -242,8 +239,8 @@ ci_tcp_connect_check_local_dst_addr(ci_tcp_socket_listen* tls, } -oo_sp ci_tcp_connect_find_local_peer(ci_netif *ni, int locked, - ci_addr_t dst_addr, int dport_be16) +oo_sp ci_tcp_connect_find_local_peer( + ci_netif* ni, int locked, ci_addr_t dst_addr, int dport_be16) { ci_tcp_socket_listen* tls; int i; @@ -255,15 +252,15 @@ oo_sp ci_tcp_connect_find_local_peer(ci_netif *ni, int locked, * socket */ /* FIXME: make ci_netif_listener_lookup() work for unlocked stacks */ /* FIXME: enable IPv6, bug 84048 */ - sock = ci_netif_listener_lookup(ni, CI_IS_ADDR_IP6(dst_addr) ? - AF_SPACE_FLAG_IP6 : AF_SPACE_FLAG_IP4, - dst_addr, dport_be16); + sock = ci_netif_listener_lookup(ni, + CI_IS_ADDR_IP6(dst_addr) ? AF_SPACE_FLAG_IP6 : AF_SPACE_FLAG_IP4, + dst_addr, dport_be16); } if( OO_SP_NOT_NULL(sock) ) { tls = ID_TO_TCP_LISTEN(ni, sock); - if( ( ~tls->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN ) && - ( tls->s.cp.so_bindtodevice == CI_IFID_BAD ) ) + if( (~tls->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN) && + (tls->s.cp.so_bindtodevice == CI_IFID_BAD) ) goto found; } @@ -271,21 +268,26 @@ oo_sp ci_tcp_connect_find_local_peer(ci_netif *ni, int locked, * Perform full search to cover the case when destination address * does not belong to SF interface. */ - for( i = 0; i < (int)ni->state->n_ep_bufs; ++i ) { + for( i = 0; i < (int) ni->state->n_ep_bufs; ++i ) { citp_waitable_obj* wo = ID_TO_WAITABLE_OBJ(ni, i); - if( wo->waitable.state != CI_TCP_LISTEN ) continue; - if( wo->waitable.sb_aflags & CI_SB_AFLAG_ORPHAN ) continue; + if( wo->waitable.state != CI_TCP_LISTEN ) + continue; + if( wo->waitable.sb_aflags & CI_SB_AFLAG_ORPHAN ) + continue; tls = SOCK_TO_TCP_LISTEN(&wo->sock); - if( tls->s.cp.lport_be16 != dport_be16 ) continue; - if( !ci_tcp_connect_check_local_dst_addr(tls, dst_addr) ) continue; - if( tls->s.cp.so_bindtodevice != CI_IFID_BAD ) continue; + if( tls->s.cp.lport_be16 != dport_be16 ) + continue; + if( ! ci_tcp_connect_check_local_dst_addr(tls, dst_addr) ) + continue; + if( tls->s.cp.so_bindtodevice != CI_IFID_BAD ) + continue; goto found; } return OO_SP_NULL; found: /* this is our tls - connect to it! */ - if( (int)ci_tcp_acceptq_n(tls) < tls->acceptq_max ) + if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) return tls->s.b.bufid; else return OO_SP_INVALID; @@ -297,7 +299,8 @@ static void ci_tcp_init_ipcache_ip4_hdr(ci_tcp_state* ts) { ci_init_ipcache_ip4_hdr(&ts->s); memmove(&ts->s.pkt.ipx.ip4 + 1, &ts->s.pkt.ipx.ip6 + 1, sizeof(ci_tcp_hdr)); - ts->outgoing_hdrs_len -= CI_IPX_HDR_SIZE(AF_INET6) - CI_IPX_HDR_SIZE(AF_INET); + ts->outgoing_hdrs_len -= + CI_IPX_HDR_SIZE(AF_INET6) - CI_IPX_HDR_SIZE(AF_INET); if( CI_IS_ADDR_IP6(ts->s.cp.laddr) ) { ci_assert(CI_IPX_ADDR_IS_ANY(ts->s.cp.laddr)); @@ -309,7 +312,8 @@ static void ci_tcp_init_ipcache_ip6_hdr(ci_tcp_state* ts) { memmove(&ts->s.pkt.ipx.ip6 + 1, &ts->s.pkt.ipx.ip4 + 1, sizeof(ci_tcp_hdr)); ci_init_ipcache_ip6_hdr(&ts->s); - ts->outgoing_hdrs_len += CI_IPX_HDR_SIZE(AF_INET6) - CI_IPX_HDR_SIZE(AF_INET); + ts->outgoing_hdrs_len += + CI_IPX_HDR_SIZE(AF_INET6) - CI_IPX_HDR_SIZE(AF_INET); if( ! CI_IS_ADDR_IP6(ts->s.cp.laddr) ) { ci_assert(CI_IPX_ADDR_IS_ANY(ts->s.cp.laddr)); @@ -325,10 +329,9 @@ static void ci_tcp_init_ipcache_ip6_hdr(ci_tcp_state* ts) void ci_tcp_ipcache_convert(int af, ci_tcp_state* ts) { if( IS_AF_INET6(af) ) { - if( !ipcache_is_ipv6(&ts->s.pkt) ) + if( ! ipcache_is_ipv6(&ts->s.pkt) ) ci_tcp_init_ipcache_ip6_hdr(ts); - } - else if( ipcache_is_ipv6(&ts->s.pkt) ) { + } else if( ipcache_is_ipv6(&ts->s.pkt) ) { ci_tcp_init_ipcache_ip4_hdr(ts); } } @@ -337,15 +340,16 @@ void ci_tcp_ipcache_convert(int af, ci_tcp_state* ts) #ifndef __KERNEL__ /* check that we can handle this destination */ -static int ci_tcp_connect_check_dest(citp_socket* ep, ci_addr_t dst, - int dport_be16) +static int ci_tcp_connect_check_dest( + citp_socket* ep, ci_addr_t dst, int dport_be16) { ci_ip_cached_hdrs* ipcache = &ep->s->pkt; ci_addr_t src = sock_ipx_laddr(ep->s); #if CI_CFG_IPV6 /* - * Socket was bound to IPv4 and connecting to IPv6 or vice versa - hand it over. + * Socket was bound to IPv4 and connecting to IPv6 or vice versa - hand it + * over. */ if( (ep->s->cp.sock_cp_flags & OO_SCP_BOUND_ADDR) && CI_ADDR_AF(dst) != CI_ADDR_AF(ep->s->laddr) ) { @@ -367,15 +371,13 @@ static int ci_tcp_connect_check_dest(citp_socket* ep, ci_addr_t dst, /* Control plane has selected a source address for us -- remember it. */ if( ipcache->status != retrrc_noroute && - ipcache->status != retrrc_alienroute && - CI_IPX_ADDR_IS_ANY(src) ) { + ipcache->status != retrrc_alienroute && CI_IPX_ADDR_IS_ANY(src) ) { ci_sock_set_laddr(ep->s, ipcache_laddr(ipcache)); ep->s->cp.laddr = ep->s->laddr; } - if(CI_LIKELY( ipcache->status == retrrc_success || - ipcache->status == retrrc_nomac || - ipcache->status < 0 )) { + if( CI_LIKELY(ipcache->status == retrrc_success || + ipcache->status == retrrc_nomac || ipcache->status < 0) ) { /* Onloadable. */ if( ipcache->encap.type & CICP_LLAP_TYPE_XMIT_HASH_LAYER4 ) /* We don't yet have a local port number, so the result of that @@ -388,19 +390,19 @@ static int ci_tcp_connect_check_dest(citp_socket* ep, ci_addr_t dst, else if( ipcache->status == retrrc_localroute ) { ci_tcp_state* ts = SOCK_TO_TCP(ep->s); - if( NI_OPTS(ep->netif).tcp_client_loopback == CITP_TCP_LOOPBACK_OFF) + if( NI_OPTS(ep->netif).tcp_client_loopback == CITP_TCP_LOOPBACK_OFF ) return CI_SOCKET_HANDOVER; ep->s->s_flags |= CI_SOCK_FLAG_BOUND_ALIEN; if( NI_OPTS(ep->netif).tcp_server_loopback != CITP_TCP_LOOPBACK_OFF ) - ts->local_peer = ci_tcp_connect_find_local_peer(ep->netif, 1 /* locked */, - dst, dport_be16); + ts->local_peer = ci_tcp_connect_find_local_peer( + ep->netif, 1 /* locked */, dst, dport_be16); else ts->local_peer = OO_SP_NULL; if( OO_SP_NOT_NULL(ts->local_peer) || NI_OPTS(ep->netif).tcp_client_loopback != - CITP_TCP_LOOPBACK_SAMESTACK ) { + CITP_TCP_LOOPBACK_SAMESTACK ) { ipcache->flags |= CI_IP_CACHE_IS_LOCALROUTE; ipcache->ether_offset = 4; /* lo is non-VLAN */ ipcache->dport_be16 = dport_be16; @@ -416,16 +418,16 @@ static int ci_tcp_connect_check_dest(citp_socket* ep, ci_addr_t dst, #endif -static int/*bool*/ -cicp_check_ipif_ifindex(struct oo_cplane_handle* cp, - ci_ifid_t ifindex, void* data) +static int /*bool*/ +cicp_check_ipif_ifindex( + struct oo_cplane_handle* cp, ci_ifid_t ifindex, void* data) { - return ifindex == *(ci_ifid_t*)data; + return ifindex == *(ci_ifid_t*) data; } -int -ci_tcp_use_mac_filter_listen(ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex) +int ci_tcp_use_mac_filter_listen( + ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex) { int mode; @@ -439,9 +441,9 @@ ci_tcp_use_mac_filter_listen(ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex) /* Listening sockets bound to an IP address on an interface that we have * a MAC filter for share that MAC filter. Clustering setting of listening * socket needs to match scalable mode rss-wise. */ - if( ((NI_OPTS(ni).cluster_ignore == 1 ) || - ! (s->s_flags & CI_SOCK_FLAG_REUSEPORT)) == - !(mode & CITP_SCALABLE_MODE_RSS) ) { + if( ((NI_OPTS(ni).cluster_ignore == 1) || + ! (s->s_flags & CI_SOCK_FLAG_REUSEPORT)) == + ! (mode & CITP_SCALABLE_MODE_RSS) ) { /* If we've been configured to use scalable filters on all interfaces, then * we can do so without further ado. */ if( NI_OPTS(ni).scalable_filter_ifindex_passive == @@ -451,9 +453,9 @@ ci_tcp_use_mac_filter_listen(ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex) /* based on bind to device we might be using scalable iface */ if( ifindex <= 0 ) { /* Determine which ifindex the IP address being bound to is on. */ - ifindex = NI_OPTS(ni).scalable_filter_ifindex_passive; - return cicp_find_ifindex_by_ip(ni->cplane, sock_laddr(s), - cicp_check_ipif_ifindex, &ifindex); + ifindex = NI_OPTS(ni).scalable_filter_ifindex_passive; + return cicp_find_ifindex_by_ip( + ni->cplane, sock_laddr(s), cicp_check_ipif_ifindex, &ifindex); } return (NI_OPTS(ni).scalable_filter_ifindex_passive == ifindex); } @@ -461,9 +463,13 @@ ci_tcp_use_mac_filter_listen(ci_netif* ni, ci_sock_cmn* s, ci_ifid_t ifindex) } -#if !defined(__KERNEL__) || CI_CFG_ENDPOINT_MOVE -int ci_tcp_can_set_filter_in_ul(ci_netif *ni, ci_sock_cmn* s) +#if ! defined(__KERNEL__) || CI_CFG_ENDPOINT_MOVE +int ci_tcp_can_set_filter_in_ul(ci_netif* ni, ci_sock_cmn* s) { + /* swxtch architectures ONLY work in UL */ + if( ni->nic_hw->vis[0].nic_type.arch == EF_VI_ARCH_SWXTCH ) { + return 1; + } if( (s->s_flags & CI_SOCK_FLAGS_SCALABLE) == 0 ) return 0; if( s->b.state == CI_TCP_LISTEN ) @@ -481,33 +487,33 @@ int ci_tcp_can_set_filter_in_ul(ci_netif *ni, ci_sock_cmn* s) return 0; ci_assert_nequal(s->b.state, CI_TCP_LISTEN); - ci_assert(!CI_IPX_ADDR_IS_ANY(sock_ipx_laddr(s))); + ci_assert(! CI_IPX_ADDR_IS_ANY(sock_ipx_laddr(s))); ci_assert_nequal(sock_lport_be16(s), 0); return 1; } #endif -int ci_tcp_sock_set_stack_filter(ci_netif *ni, ci_sock_cmn* s) +int ci_tcp_sock_set_stack_filter(ci_netif* ni, ci_sock_cmn* s) { int rc; oo_sp sock; - LOG_TC(log( NSS_FMT " %s", NSS_PRI_ARGS(ni, s), __FUNCTION__)); - ci_assert((s->s_flags & CI_SOCK_FLAG_STACK_FILTER) == 0); + LOG_TC(log(NSS_FMT " %s %d", NSS_PRI_ARGS(ni, s), __FUNCTION__, s->s_flags)); + ci_assert_equal(s->s_flags & CI_SOCK_FLAG_STACK_FILTER, 0); + + // todo figure out something here - sock = ci_netif_filter_lookup(ni, sock_af_space(s), - sock_ipx_laddr(s), sock_lport_be16(s), - sock_ipx_raddr(s), sock_rport_be16(s), - sock_protocol(s)); + sock = ci_netif_filter_lookup(ni, sock_af_space(s), sock_ipx_laddr(s), + sock_lport_be16(s), sock_ipx_raddr(s), sock_rport_be16(s), + sock_protocol(s)); if( OO_SP_NOT_NULL(sock) ) return -EADDRINUSE; rc = ci_netif_filter_insert(ni, SC_ID(s), sock_af_space(s), - sock_ipx_laddr(s), sock_lport_be16(s), - sock_ipx_raddr(s), sock_rport_be16(s), - sock_protocol(s)); + sock_ipx_laddr(s), sock_lport_be16(s), sock_ipx_raddr(s), + sock_rport_be16(s), sock_protocol(s)); if( rc == 0 ) { s->s_flags |= CI_SOCK_FLAG_STACK_FILTER; if( (s->s_flags & CI_SOCK_FLAGS_SCALABLE) != 0 ) @@ -517,13 +523,12 @@ int ci_tcp_sock_set_stack_filter(ci_netif *ni, ci_sock_cmn* s) } -void ci_tcp_sock_clear_stack_filter(ci_netif *ni, ci_tcp_state* ts) +void ci_tcp_sock_clear_stack_filter(ci_netif* ni, ci_tcp_state* ts) { - LOG_TC(log( LNT_FMT " %s", LNT_PRI_ARGS(ni, ts), __FUNCTION__)); ci_assert((ts->s.s_flags & CI_SOCK_FLAG_STACK_FILTER) != 0); - ci_netif_filter_remove(ni, S_ID(ts), sock_af_space(&ts->s), tcp_ipx_laddr(ts), - tcp_lport_be16(ts), tcp_ipx_raddr(ts), - tcp_rport_be16(ts), tcp_protocol(ts)); + ci_netif_filter_remove(ni, S_ID(ts), sock_af_space(&ts->s), + tcp_ipx_laddr(ts), tcp_lport_be16(ts), tcp_ipx_raddr(ts), + tcp_rport_be16(ts), tcp_protocol(ts)); ts->s.s_flags &= ~CI_SOCK_FLAG_STACK_FILTER; } @@ -536,39 +541,35 @@ ci_tcp_prev_seq_older(const ci_tcp_prev_seq_t* a, const ci_tcp_prev_seq_t* b) } -ci_inline ci_uint32 -ci_tcp_prev_seq_initial_seqno(ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq) +ci_inline ci_uint32 ci_tcp_prev_seq_initial_seqno( + ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq) { - return ci_tcp_initial_seqno(ni, prev_seq->laddr, prev_seq->lport, - prev_seq->raddr, prev_seq->rport); + return ci_tcp_initial_seqno( + ni, prev_seq->laddr, prev_seq->lport, prev_seq->raddr, prev_seq->rport); } -ci_inline ci_uint32 -ci_tcp_prev_seq_future_isn(ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq, - ci_iptime_t ticks) +ci_inline ci_uint32 ci_tcp_prev_seq_future_isn( + ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq, ci_iptime_t ticks) { return ci_tcp_future_isn(ni, prev_seq->laddr, prev_seq->lport, - prev_seq->raddr, prev_seq->rport, ticks); + prev_seq->raddr, prev_seq->rport, ticks); } -ci_inline ci_uint32 -ci_tcp_prev_seq_hash1(ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq) +ci_inline ci_uint32 ci_tcp_prev_seq_hash1( + ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq) { - return onload_hash1(ni->state->seq_table_entries_n - 1, - prev_seq->laddr, prev_seq->lport, - prev_seq->raddr, prev_seq->rport, - IPPROTO_TCP); + return onload_hash1(ni->state->seq_table_entries_n - 1, prev_seq->laddr, + prev_seq->lport, prev_seq->raddr, prev_seq->rport, IPPROTO_TCP); } -ci_inline ci_uint32 -ci_tcp_prev_seq_hash2(ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq) +ci_inline ci_uint32 ci_tcp_prev_seq_hash2( + ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq) { - return onload_hash2(prev_seq->laddr, prev_seq->lport, - prev_seq->raddr, prev_seq->rport, - IPPROTO_TCP); + return onload_hash2(prev_seq->laddr, prev_seq->lport, prev_seq->raddr, + prev_seq->rport, IPPROTO_TCP); } @@ -583,15 +584,14 @@ ci_tcp_clock_isn_safe(ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq) ci_uint32 prev_seq_no = prev_seq->seq_no; /* We assume that all peers have 2 MSL <= 240 s. The argument to this * function is in ticks, but a tick is between one and two milliseconds. */ - ci_uint32 isn_after_2msl = ci_tcp_prev_seq_future_isn - (ni, prev_seq, NI_CONF(ni).tconst_peer2msl_time); + ci_uint32 isn_after_2msl = ci_tcp_prev_seq_future_isn( + ni, prev_seq, NI_CONF(ni).tconst_peer2msl_time); return SEQ_GT(isn_now, prev_seq_no) && SEQ_GT(isn_after_2msl, prev_seq_no); } - ci_inline void ci_tcp_prev_seq_from_ts(ci_netif* ni, const ci_tcp_state* ts, - ci_tcp_prev_seq_t* prev_seq /* out */) + ci_tcp_prev_seq_t* prev_seq /* out */) { prev_seq->laddr = tcp_ipx_laddr(ts); prev_seq->raddr = tcp_ipx_raddr(ts); @@ -606,8 +606,7 @@ ci_inline void ci_tcp_prev_seq_from_ts(ci_netif* ni, const ci_tcp_state* ts, /* Insert [prev_seq_from] (copy 4-tuple and seq_no) * into the table at location [prev_seq]. */ ci_inline void ci_tcp_prev_seq_remember_at(ci_netif* ni, - const ci_tcp_prev_seq_t* prev_seq_from, - ci_tcp_prev_seq_t* prev_seq) + const ci_tcp_prev_seq_t* prev_seq_from, ci_tcp_prev_seq_t* prev_seq) { ci_uint32 isn_now; prev_seq->laddr = prev_seq_from->laddr; @@ -616,8 +615,7 @@ ci_inline void ci_tcp_prev_seq_remember_at(ci_netif* ni, prev_seq->rport = prev_seq_from->rport; prev_seq->seq_no = prev_seq_from->seq_no; - prev_seq->expiry = ci_tcp_time_now(ni) + - NI_CONF(ni).tconst_peer2msl_time; + prev_seq->expiry = ci_tcp_time_now(ni) + NI_CONF(ni).tconst_peer2msl_time; /* In many cases clock based ISN catches with entry's seq_no sooner * then nominal expiry time. Once this happen clock based ISN would be @@ -641,19 +639,18 @@ ci_inline void ci_tcp_prev_seq_remember_at(ci_netif* ni, but can expire at after_2msl XXXXXXXXXXXXXXXXXXXXXXXXXXXX| bad: same as case 2 */ - isn_now = ci_tcp_initial_seqno(ni, prev_seq->laddr, prev_seq->lport, - prev_seq->raddr, prev_seq->rport); + isn_now = ci_tcp_initial_seqno( + ni, prev_seq->laddr, prev_seq->lport, prev_seq->raddr, prev_seq->rport); if( SEQ_GT(prev_seq->seq_no, isn_now) ) { - ci_uint32 isn_after_2msl = ci_tcp_future_isn - (ni, prev_seq->laddr, prev_seq->lport, - prev_seq->raddr, prev_seq->rport, - NI_CONF(ni).tconst_peer2msl_time); + ci_uint32 isn_after_2msl = ci_tcp_future_isn(ni, prev_seq->laddr, + prev_seq->lport, prev_seq->raddr, prev_seq->rport, + NI_CONF(ni).tconst_peer2msl_time); if( SEQ_GT(isn_after_2msl, prev_seq->seq_no) ) { /* The clock based ISN will catch up with entry between 0 and 2msl. * Let's calculate exactly how much earlier this will happen * and adjust expiry time accordingly */ - ci_iptime_t expiry_reduce = ci_tcp_isn2tick - (ni, isn_after_2msl - prev_seq->seq_no); + ci_iptime_t expiry_reduce = + ci_tcp_isn2tick(ni, isn_after_2msl - prev_seq->seq_no); ci_assert_ge(NI_CONF(ni).tconst_peer2msl_time, expiry_reduce); prev_seq->expiry -= expiry_reduce; @@ -672,9 +669,9 @@ ci_inline void ci_tcp_prev_seq_remember_at(ci_netif* ni, * [prev_seq_val] is used to generate hashes, * [prev_seq_entry] is pointer to the hash table entry, we'd got match with * [prev_seq_val] 4 tuple. */ -static void -__ci_tcp_prev_seq_free(ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq_val, - const ci_tcp_prev_seq_t* prev_seq_entry) +static void __ci_tcp_prev_seq_free(ci_netif* ni, + const ci_tcp_prev_seq_t* prev_seq_val, + const ci_tcp_prev_seq_t* prev_seq_entry) { unsigned hash; unsigned hash2 = 0; @@ -696,7 +693,7 @@ __ci_tcp_prev_seq_free(ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq_val, hash = (hash + hash2) & (ni->state->seq_table_entries_n - 1); depth++; ci_assert_le(depth, TCP_PREV_SEQ_DEPTH_LIMIT); - if(CI_UNLIKELY( depth > TCP_PREV_SEQ_DEPTH_LIMIT )) { + if( CI_UNLIKELY(depth > TCP_PREV_SEQ_DEPTH_LIMIT) ) { LOG_U(ci_log("%s: reached search depth", __FUNCTION__)); break; } @@ -704,8 +701,8 @@ __ci_tcp_prev_seq_free(ci_netif* ni, const ci_tcp_prev_seq_t* prev_seq_val, } -static void -ci_tcp_prev_seq_free(ci_netif* ni, ci_tcp_prev_seq_t* prev_seq) { +static void ci_tcp_prev_seq_free(ci_netif* ni, ci_tcp_prev_seq_t* prev_seq) +{ __ci_tcp_prev_seq_free(ni, prev_seq, prev_seq); prev_seq->laddr = addr_any; } @@ -725,7 +722,7 @@ __ci_tcp_prev_seq_remember(ci_netif* ni, const ci_tcp_prev_seq_t* ts_prev_seq) /* Oldest amongst the entries that we've traversed. */ ci_tcp_prev_seq_t* oldest_seq = NULL; ci_tcp_prev_seq_t* prev_seq; - + int depth; /* If the clock ISN is safe, no need to remember the sequence number. */ @@ -741,7 +738,7 @@ __ci_tcp_prev_seq_remember(ci_netif* ni, const ci_tcp_prev_seq_t* ts_prev_seq) ci_assert_lt(hash, ni->state->seq_table_entries_n); ci_assert_impl(CI_TCP_PREV_SEQ_IS_TERMINAL(*prev_seq), - CI_TCP_PREV_SEQ_IS_FREE(*prev_seq)); + CI_TCP_PREV_SEQ_IS_FREE(*prev_seq)); ++prev_seq->route_count; ni->state->stats.tcp_seq_table_steps++; @@ -749,15 +746,13 @@ __ci_tcp_prev_seq_remember(ci_netif* ni, const ci_tcp_prev_seq_t* ts_prev_seq) /* Free entry. Use it. */ ci_tcp_prev_seq_remember_at(ni, ts_prev_seq, prev_seq); break; - } - else if( ci_ip_time_before(prev_seq->expiry, ci_ip_time_now(ni)) ) { + } else if( ci_ip_time_before(prev_seq->expiry, ci_ip_time_now(ni)) ) { /* Expired entry. Free it and reuse it. */ ci_tcp_prev_seq_free(ni, prev_seq); ci_tcp_prev_seq_remember_at(ni, ts_prev_seq, prev_seq); ni->state->stats.tcp_seq_table_expiries++; break; - } - else if( depth == 0 || ci_tcp_prev_seq_older(prev_seq, oldest_seq) ) { + } else if( depth == 0 || ci_tcp_prev_seq_older(prev_seq, oldest_seq) ) { /* Entry is live and in use, and the oldest that we've seen so far. * Remember it so that we can purge the oldest if we don't find any free * or expired entries. */ @@ -787,8 +782,8 @@ __ci_tcp_prev_seq_remember(ci_netif* ni, const ci_tcp_prev_seq_t* ts_prev_seq) } -static ci_tcp_prev_seq_t* -__ci_tcp_prev_seq_lookup(ci_netif* ni, const ci_tcp_prev_seq_t* ts_prev_seq) +static ci_tcp_prev_seq_t* __ci_tcp_prev_seq_lookup( + ci_netif* ni, const ci_tcp_prev_seq_t* ts_prev_seq) { unsigned hash = ci_tcp_prev_seq_hash1(ni, ts_prev_seq); unsigned hash2 = 0; @@ -853,36 +848,34 @@ void ci_tcp_prev_seq_remember(ci_netif* ni, ci_tcp_state* ts) if( __ci_tcp_prev_seq_remember(ni, &ts_prev_seq) ) ts->tcpflags |= CI_TCPT_FLAG_SEQNO_REMEMBERED; - } -#if !defined(__KERNEL__) || CI_CFG_ENDPOINT_MOVE +#if ! defined(__KERNEL__) || CI_CFG_ENDPOINT_MOVE /* Linux clears implicit address on connect failure */ -ci_inline void ci_tcp_connect_drop_implicit_address(ci_tcp_state *ts) +ci_inline void ci_tcp_connect_drop_implicit_address(ci_tcp_state* ts) { - if( ! (ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) { + if( ! (ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) { #if CI_CFG_IPV6 - if( ts->s.domain == AF_INET6 ) - ts->s.cp.laddr = ts->s.laddr = addr_any; - else + if( ts->s.domain == AF_INET6 ) + ts->s.cp.laddr = ts->s.laddr = addr_any; + else #endif - ts->s.cp.laddr = ts->s.laddr = ip4_addr_any; - } + ts->s.cp.laddr = ts->s.laddr = ip4_addr_any; + } } /* Return codes from ci_tcp_connect_ul_start(). */ -#define CI_CONNECT_UL_OK 0 -#define CI_CONNECT_UL_FAIL -1 -#define CI_CONNECT_UL_START_AGAIN -2 -#define CI_CONNECT_UL_LOCK_DROPPED -3 -#define CI_CONNECT_UL_ALIEN_BOUND -4 +#define CI_CONNECT_UL_OK 0 +#define CI_CONNECT_UL_FAIL -1 +#define CI_CONNECT_UL_START_AGAIN -2 +#define CI_CONNECT_UL_LOCK_DROPPED -3 +#define CI_CONNECT_UL_ALIEN_BOUND -4 /* The fd parameter is ignored when this is called in the kernel */ -static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, - ci_addr_t dst, unsigned dport_be16, - int* fail_rc) +static int ci_tcp_connect_ul_start(ci_netif* ni, ci_tcp_state* ts, ci_fd_t fd, + ci_addr_t dst, unsigned dport_be16, int* fail_rc) { ci_ip_pkt_fmt* pkt; int rc = 0; @@ -894,7 +887,7 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, ci_assert(ts->s.pkt.mtu); /* Recover from previous connection via the same socket: */ - ts->tcpflags &=~ CI_TCPT_FLAG_FIN_RECEIVED; + ts->tcpflags &= ~CI_TCPT_FLAG_FIN_RECEIVED; /* send_prequeue may be set to OO_PP_ID_INVALID by a previous connection * attempt. However there is no need to reinit send_prequeue_in and * other counters, because no real send was possible. */ @@ -923,16 +916,16 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, */ ts->ka_probes = 0; - /* + /* * 3. State and address are OK. It's address routed through our NIC. * Do connect(). */ - ci_assert(!CI_IPX_ADDR_IS_ANY(ipcache_laddr(&ts->s.pkt))); + ci_assert(! CI_IPX_ADDR_IS_ANY(ipcache_laddr(&ts->s.pkt))); /* socket can only could have gotten scalative on prior * implicit bind */ ci_assert_impl(ts->s.s_flags & CI_SOCK_FLAG_SCALACTIVE, - ~ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND); + ~ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND); if( ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ) { ci_uint16 source_be16 = 0; @@ -941,26 +934,24 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, saddr = sock_ipx_laddr(s); #ifndef __KERNEL__ - /* In the normal case, we only install filters for IP addresses configured on - * acceleratable interfaces, and so if the socket is bound to an alien - * address, we can't accelerate it. Using a MAC filter overcomes this - * limitation, however. */ - if( ! (ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) - ci_tcp_bind_check_laddr(ni, &ts->s, saddr); - if( ~ni->state->flags & CI_NETIF_FLAG_USE_ALIEN_LADDRS && - (ts->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN) && - ! (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE || - ts->s.s_flags & CI_SOCK_FLAG_SCALACTIVE) ) { - return CI_CONNECT_UL_ALIEN_BOUND; - } + /* In the normal case, we only install filters for IP addresses configured + * on acceleratable interfaces, and so if the socket is bound to an alien + * address, we can't accelerate it. Using a MAC filter overcomes this + * limitation, however. */ + if( ! (ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) + ci_tcp_bind_check_laddr(ni, &ts->s, saddr); + if( ~ni->state->flags & CI_NETIF_FLAG_USE_ALIEN_LADDRS && + (ts->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN) && + ! (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE || + ts->s.s_flags & CI_SOCK_FLAG_SCALACTIVE) ) { + return CI_CONNECT_UL_ALIEN_BOUND; + } #endif -#if !defined(__KERNEL__) && CI_CFG_TCP_SHARED_LOCAL_PORTS +#if ! defined(__KERNEL__) && CI_CFG_TCP_SHARED_LOCAL_PORTS if( ! (ts->s.s_flags & CI_SOCK_FLAG_FILTER) ) active_wild = ci_netif_active_wild_get(ni, sock_ipx_laddr(&ts->s), - sock_ipx_raddr(&ts->s), - dport_be16, &source_be16, - &prev_seq); + sock_ipx_raddr(&ts->s), dport_be16, &source_be16, &prev_seq); #endif /* Defer active_wild related state update to after potential lock drops @@ -972,29 +963,29 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, CI_SET_ERROR(rc, EADDRNOTAVAIL); else #endif - rc = __ci_tcp_bind(ni, &ts->s, fd, - s->cp.sock_cp_flags & OO_SCP_BOUND_ADDR ? saddr : addr_any, - &source_be16, 0); + rc = __ci_tcp_bind(ni, &ts->s, fd, + s->cp.sock_cp_flags & OO_SCP_BOUND_ADDR ? saddr : addr_any, + &source_be16, 0); - if(CI_UNLIKELY( rc != 0 )) { - LOG_U(ci_log("__ci_tcp_bind returned %d at %s:%d", rc, - __FILE__, __LINE__)); + if( CI_UNLIKELY(rc != 0) ) { + LOG_U(ci_log( + "__ci_tcp_bind returned %d at %s:%d", rc, __FILE__, __LINE__)); *fail_rc = rc; return CI_CONNECT_UL_FAIL; } - if(CI_UNLIKELY( CI_IPX_ADDR_IS_ANY(saddr) )) { + if( CI_UNLIKELY(CI_IPX_ADDR_IS_ANY(saddr)) ) { /* FIXME is this an impossible branch? */ CI_SET_ERROR(*fail_rc, EINVAL); return CI_CONNECT_UL_FAIL; } } - /* Commit source port now. In case of failure down the lane, an implicit port - * might be overwritten by following attempt */ + /* Commit source port now. In case of failure down the lane, an implicit + * port might be overwritten by following attempt */ TS_IPX_TCP(ts)->tcp_source_be16 = source_be16; ts->s.cp.lport_be16 = source_be16; LOG_TC(log(LNT_FMT "connect: our bind returned " IPX_PORT_FMT, - LNT_PRI_ARGS(ni, ts), IPX_ARG(AF_IP(saddr)), - (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_source_be16))); + LNT_PRI_ARGS(ni, ts), IPX_ARG(AF_IP(saddr)), + (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_source_be16))); } /* Commit peer now - these are OK to be overwritten by following attempt */ @@ -1005,13 +996,15 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, if( CI_UNLIKELY(! pkt) ) { /* Should we block or return error? */ if( NI_OPTS(ni).tcp_nonblock_no_pkts_mode && - (ts->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY)) ) { + (ts->s.b.sb_aflags & + (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY)) ) { CI_SET_ERROR(*fail_rc, ENOBUFS); rc = CI_CONNECT_UL_FAIL; goto fail; } /* NB. We've already done a poll above. */ - rc = ci_netif_pkt_wait(ni, &ts->s, CI_SLEEP_NETIF_LOCKED|CI_SLEEP_NETIF_RQ); + rc = ci_netif_pkt_wait( + ni, &ts->s, CI_SLEEP_NETIF_LOCKED | CI_SLEEP_NETIF_RQ); if( ci_netif_pkt_wait_was_interrupted(rc) ) { CI_SET_ERROR(*fail_rc, -rc); rc = CI_CONNECT_UL_LOCK_DROPPED; @@ -1028,28 +1021,28 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, if( active_wild != OO_SP_NULL && (ts->s.s_flags & CI_SOCK_FLAG_TPROXY) == 0 ) { - /* Need to set the flag now for consumption by ci_tcp_ep_set_filters */ - ts->s.s_flags |= CI_SOCK_FLAG_SCALACTIVE; - added_scalable = 1; + /* Need to set the flag now for consumption by ci_tcp_ep_set_filters */ + ts->s.s_flags |= CI_SOCK_FLAG_SCALACTIVE; + added_scalable = 1; } - rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice, - active_wild); + rc = ci_tcp_ep_set_filters( + ni, S_SP(ts), ts->s.cp.so_bindtodevice, active_wild); if( rc < 0 ) { /* Perhaps we've run out of filters? See if we can push a socket out * of timewait and steal its filter. */ ci_assert_nequal(rc, -EFILTERSSOME); if( rc != -EBUSY || ! ci_netif_timewait_try_to_free_filter(ni) || - (rc = ci_tcp_ep_set_filters(ni, S_SP(ts), - ts->s.cp.so_bindtodevice, - active_wild)) < 0 ) { + (rc = ci_tcp_ep_set_filters( + ni, S_SP(ts), ts->s.cp.so_bindtodevice, active_wild)) < 0 ) { ci_assert_nequal(rc, -EFILTERSSOME); /* Either a different error, or our efforts to free a filter did not * work. */ if( added_scalable ) - ts->s.s_flags &= ~CI_SOCK_FLAG_SCALACTIVE; /* rollback scalactive flag */ + ts->s.s_flags &= + ~CI_SOCK_FLAG_SCALACTIVE; /* rollback scalactive flag */ ci_netif_pkt_release(ni, pkt); CI_SET_ERROR(*fail_rc, -rc); rc = CI_CONNECT_UL_FAIL; @@ -1062,16 +1055,15 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, /* Commit active_wild related flags */ if( active_wild != OO_SP_NULL ) { ts->tcpflags |= CI_TCPT_FLAG_ACTIVE_WILD; - ts->s.s_flags &= ~(CI_SOCK_FLAG_DEFERRED_BIND | - CI_SOCK_FLAG_CONNECT_MUST_BIND); + ts->s.s_flags &= + ~(CI_SOCK_FLAG_DEFERRED_BIND | CI_SOCK_FLAG_CONNECT_MUST_BIND); } LOG_TC(log(LNT_FMT "CONNECT " IPX_PORT_FMT "->" IPX_PORT_FMT, - LNT_PRI_ARGS(ni, ts), - IPX_ARG(AF_IP(ipcache_laddr(&ts->s.pkt))), - (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_source_be16), - IPX_ARG(AF_IP(ipcache_raddr(&ts->s.pkt))), - (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_dest_be16))); + LNT_PRI_ARGS(ni, ts), IPX_ARG(AF_IP(ipcache_laddr(&ts->s.pkt))), + (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_source_be16), + IPX_ARG(AF_IP(ipcache_raddr(&ts->s.pkt))), + (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_dest_be16))); /* We are going to send the SYN - set states appropriately */ @@ -1082,8 +1074,7 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, if( NI_OPTS(ni).tcp_isn_mode == 1 ) { if( prev_seq == 0 ) { prev_seq = ci_tcp_prev_seq_lookup(ni, ts); - } - else { + } else { #ifndef NDEBUG /* If we got a sequence number from TIME_WAIT-reuse, the table should not * have an entry for this four-tuple, as any such entry would now @@ -1101,49 +1092,44 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, */ tcp_snd_nxt(ts) = prev_seq; else - tcp_snd_nxt(ts) = ci_tcp_initial_seqno(ni, - tcp_ipx_laddr(ts), - TS_IPX_TCP(ts)->tcp_source_be16, - tcp_ipx_laddr(ts), - TS_IPX_TCP(ts)->tcp_dest_be16); + tcp_snd_nxt(ts) = ci_tcp_initial_seqno(ni, tcp_ipx_laddr(ts), + TS_IPX_TCP(ts)->tcp_source_be16, tcp_ipx_laddr(ts), + TS_IPX_TCP(ts)->tcp_dest_be16); tcp_snd_una(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) = tcp_snd_nxt(ts); ts->snd_max = tcp_snd_nxt(ts) + 1; /* Must be after initialising snd_una. */ ci_tcp_clear_rtt_timing(ts); ci_tcp_set_flags(ts, CI_TCP_FLAG_SYN); - ts->tcpflags &=~ CI_TCPT_FLAG_OPT_MASK; + ts->tcpflags &= ~CI_TCPT_FLAG_OPT_MASK; ts->tcpflags |= NI_OPTS(ni).syn_opts; if( (ts->tcpflags & CI_TCPT_FLAG_WSCL) ) { if( NI_OPTS(ni).tcp_rcvbuf_mode == 1 ) - ts->rcv_wscl = - ci_tcp_wscl_by_buff(ni, ci_tcp_max_rcvbuf(ni, ts->amss)); + ts->rcv_wscl = ci_tcp_wscl_by_buff(ni, ci_tcp_max_rcvbuf(ni, ts->amss)); else ts->rcv_wscl = - ci_tcp_wscl_by_buff(ni, ci_tcp_rcvbuf_established(ni, &ts->s)); + ci_tcp_wscl_by_buff(ni, ci_tcp_rcvbuf_established(ni, &ts->s)); CI_IP_SOCK_STATS_VAL_RXWSCL(ts, ts->rcv_wscl); - } - else { + } else { ts->rcv_wscl = 0; CI_IP_SOCK_STATS_VAL_RXWSCL(ts, 0); } ci_tcp_set_rcvbuf(ni, ts); - TS_IPX_TCP(ts)->tcp_window_be16 = ci_tcp_calc_rcv_wnd_syn(ts->s.so.rcvbuf, - ts->amss, - ts->rcv_wscl); - tcp_rcv_wnd_right_edge_sent(ts) = tcp_rcv_nxt(ts) + - TS_IPX_TCP(ts)->tcp_window_be16; + TS_IPX_TCP(ts)->tcp_window_be16 = + ci_tcp_calc_rcv_wnd_syn(ts->s.so.rcvbuf, ts->amss, ts->rcv_wscl); + tcp_rcv_wnd_right_edge_sent(ts) = + tcp_rcv_nxt(ts) + TS_IPX_TCP(ts)->tcp_window_be16; ts->rcv_wnd_advertised = TS_IPX_TCP(ts)->tcp_window_be16; TS_IPX_TCP(ts)->tcp_window_be16 = CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_window_be16); /* outgoing_hdrs_len is initialised to include timestamp option. */ if( ! (ts->tcpflags & CI_TCPT_FLAG_TSO) ) - ts->outgoing_hdrs_len = CI_IPX_HDR_SIZE(ipcache_af(&ts->s.pkt)) + - sizeof(ci_tcp_hdr); + ts->outgoing_hdrs_len = + CI_IPX_HDR_SIZE(ipcache_af(&ts->s.pkt)) + sizeof(ci_tcp_hdr); if( ci_tcp_can_stripe(ni, ts->s.pkt.ipx.ip4.ip_saddr_be32, - ts->s.pkt.ipx.ip4.ip_daddr_be32) ) + ts->s.pkt.ipx.ip4.ip_daddr_be32) ) ts->tcpflags |= CI_TCPT_FLAG_STRIPE; ci_tcp_set_slow_state(ni, ts, CI_TCP_SYN_SENT); @@ -1156,21 +1142,21 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, ** ** Clearing tx_errno and rx_errno acheive this. The transmit window ** is set to 1 byte which ensures that only the SYN packet gets - ** sent until the ACK is received with more window. + ** sent until the ACK is received with more window. */ ci_assert(ts->snd_max == tcp_snd_nxt(ts) + 1); ts->s.rx_errno = 0; - ts->s.tx_errno = 0; + ts->s.tx_errno = 0; /* If ARP resolution fails, we have to drop the connection, so we store * the socket id in the SYN packet. */ pkt->pf.tcp_tx.sock_id = ts->s.b.bufid; ci_tcp_enqueue_no_data(ts, ni, pkt); - ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK); + ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK); if( ts->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) { ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT; - LOG_TC(log( LNT_FMT "Non-blocking connect - return EINPROGRESS", - LNT_PRI_ARGS(ni, ts))); + LOG_TC(log(LNT_FMT "Non-blocking connect - return EINPROGRESS", + LNT_PRI_ARGS(ni, ts))); CI_SET_ERROR(*fail_rc, EINPROGRESS); /* We don't jump to the "fail" label here, as this is a failure only from * the point of view of the connect() API, and we don't want to tear down @@ -1180,12 +1166,12 @@ static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_fd_t fd, return CI_CONNECT_UL_OK; - fail: +fail: ci_tcp_connect_drop_implicit_address(ts); return rc; } -ci_inline int ci_tcp_connect_handle_so_error(ci_sock_cmn *s) +ci_inline int ci_tcp_connect_handle_so_error(ci_sock_cmn* s) { ci_int32 rc = ci_get_so_error(s); if( rc == 0 ) @@ -1194,7 +1180,7 @@ ci_inline int ci_tcp_connect_handle_so_error(ci_sock_cmn *s) return rc; } -static int ci_tcp_connect_ul_syn_sent(ci_netif *ni, ci_tcp_state *ts) +static int ci_tcp_connect_ul_syn_sent(ci_netif* ni, ci_tcp_state* ts) { int rc = 0; @@ -1219,8 +1205,8 @@ static int ci_tcp_connect_ul_syn_sent(ci_netif *ni, ci_tcp_state *ts) int stack_locked = 1; if( ts->s.so.sndtimeo_msec ) { - ci_uint64 max_so_spin = (ci_uint64)ts->s.so.sndtimeo_msec * - IPTIMER_STATE(ni)->khz; + ci_uint64 max_so_spin = + (ci_uint64) ts->s.so.sndtimeo_msec * IPTIMER_STATE(ni)->khz; if( max_so_spin <= max_spin ) max_spin = max_so_spin; } @@ -1237,8 +1223,7 @@ static int ci_tcp_connect_ul_syn_sent(ci_netif *ni, ci_tcp_state *ts) ci_netif_unlock(ni); stack_locked = 0; } - } - else if( ! ni->state->is_spinner ) + } else if( ! ni->state->is_spinner ) ni->state->is_spinner = 1; } if( ts->s.b.state != CI_TCP_SYN_SENT ) { @@ -1256,8 +1241,8 @@ static int ci_tcp_connect_ul_syn_sent(ci_netif *ni, ci_tcp_state *ts) } ci_frc64(&now_frc); - rc = OO_SPINLOOP_PAUSE_CHECK_SIGNALS(ni, now_frc, &schedule_frc, - ts->s.so.sndtimeo_msec, NULL, si); + rc = OO_SPINLOOP_PAUSE_CHECK_SIGNALS( + ni, now_frc, &schedule_frc, ts->s.so.sndtimeo_msec, NULL, si); if( rc != 0 ) { ni->state->is_spinner = 0; if( ! stack_locked ) @@ -1286,28 +1271,24 @@ static int ci_tcp_connect_ul_syn_sent(ci_netif *ni, ci_tcp_state *ts) } #endif - CI_TCP_SLEEP_WHILE(ni, ts, CI_SB_FLAG_WAKE_RX, - timeout, - ts->s.b.state == CI_TCP_SYN_SENT, &rc); + CI_TCP_SLEEP_WHILE(ni, ts, CI_SB_FLAG_WAKE_RX, timeout, + ts->s.b.state == CI_TCP_SYN_SENT, &rc); } #ifndef __KERNEL__ - out: +out: #endif if( rc == -EAGAIN ) { - LOG_TC(log( LNT_FMT "timeout on sleep: %d", - LNT_PRI_ARGS(ni, ts), -rc)); + LOG_TC(log(LNT_FMT "timeout on sleep: %d", LNT_PRI_ARGS(ni, ts), -rc)); if( ! (ts->tcpflags & CI_TCPT_FLAG_NONBLOCK_CONNECT) ) { ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT; CI_SET_ERROR(rc, EINPROGRESS); - } - else + } else CI_SET_ERROR(rc, EALREADY); return rc; - } - else if( rc == -EINTR ) { - LOG_TC(log(LNT_FMT "connect() was interrupted by a signal", - LNT_PRI_ARGS(ni, ts))); + } else if( rc == -EINTR ) { + LOG_TC(log(LNT_FMT "connect() was interrupted by a signal", + LNT_PRI_ARGS(ni, ts))); ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT; CI_SET_ERROR(rc, EINTR); return rc; @@ -1316,14 +1297,13 @@ static int ci_tcp_connect_ul_syn_sent(ci_netif *ni, ci_tcp_state *ts) /*! \TODO propagate the correct error code: CONNREFUSED, NOROUTE, etc. */ if( ts->s.b.state == CI_TCP_CLOSED ) { - /* Bug 3558: + /* Bug 3558: * Set OS socket state to allow/disallow next bind(). * It is Linux hack. */ if( ts->s.b.sb_aflags & CI_SB_AFLAG_OS_BACKED ) { #ifdef __ci_driver__ CI_TRY(efab_tcp_helper_set_tcp_close_os_sock( - netif2tcp_helper_resource(ni), - S_SP(ts))); + netif2tcp_helper_resource(ni), S_SP(ts))); #else CI_TRY(ci_tcp_helper_set_tcp_close_os_sock(ni, S_SP(ts))); #endif @@ -1338,8 +1318,8 @@ static int ci_tcp_connect_ul_syn_sent(ci_netif *ni, ci_tcp_state *ts) /* - if SO_ERROR is set, handle it and return this value; * - else if rx_errno is set, return it; * - else (TCP_RX_ERRNO==0, socket is CI_SHUT_RD) return ECONNABORTED */ - if( (rc = ci_tcp_connect_handle_so_error(&ts->s)) == 0) - rc = TCP_RX_ERRNO(ts) ? TCP_RX_ERRNO(ts) : ECONNABORTED; + if( (rc = ci_tcp_connect_handle_so_error(&ts->s)) == 0 ) + rc = TCP_RX_ERRNO(ts) ? TCP_RX_ERRNO(ts) : ECONNABORTED; CI_SET_ERROR(rc, rc); ci_tcp_connect_drop_implicit_address(ts); @@ -1351,8 +1331,7 @@ static int ci_tcp_connect_ul_syn_sent(ci_netif *ni, ci_tcp_state *ts) #ifndef __KERNEL__ -static int -complete_deferred_bind(ci_netif* netif, ci_sock_cmn* s, ci_fd_t fd) +static int complete_deferred_bind(ci_netif* netif, ci_sock_cmn* s, ci_fd_t fd) { ci_uint16 source_be16 = 0; int rc; @@ -1360,29 +1339,27 @@ complete_deferred_bind(ci_netif* netif, ci_sock_cmn* s, ci_fd_t fd) ci_assert_flags(s->s_flags, CI_SOCK_FLAG_DEFERRED_BIND); rc = __ci_tcp_bind(netif, s, fd, - s->cp.sock_cp_flags & OO_SCP_BOUND_ADDR ? s->laddr : addr_any, - &source_be16, 0); + s->cp.sock_cp_flags & OO_SCP_BOUND_ADDR ? s->laddr : addr_any, + &source_be16, 0); - if(CI_LIKELY( rc == 0 )) { - s->s_flags &= ~(CI_SOCK_FLAG_DEFERRED_BIND | - CI_SOCK_FLAG_CONNECT_MUST_BIND); + if( CI_LIKELY(rc == 0) ) { + s->s_flags &= + ~(CI_SOCK_FLAG_DEFERRED_BIND | CI_SOCK_FLAG_CONNECT_MUST_BIND); sock_lport_be16(s) = source_be16; s->cp.lport_be16 = source_be16; LOG_TC(log(NSS_FMT "Deferred bind returned " IPX_FMT " :%u", - NSS_PRI_ARGS(netif, s), - IPX_ARG(AF_IP(addr_any)), ntohs(sock_lport_be16(s)))); - } - else { + NSS_PRI_ARGS(netif, s), IPX_ARG(AF_IP(addr_any)), + ntohs(sock_lport_be16(s)))); + } else { LOG_U(ci_log("__ci_tcp_bind returned %d at %s:%d", CI_GET_ERROR(rc), - __FILE__, __LINE__)); + __FILE__, __LINE__)); } return rc; } -static int -ci_tcp_retrieve_addr(ci_netif* netif, const struct sockaddr* serv_addr, - ci_addr_t* dst_addr, ci_uint16* dst_port) +static int ci_tcp_retrieve_addr(ci_netif* netif, + const struct sockaddr* serv_addr, ci_addr_t* dst_addr, ci_uint16* dst_port) { /* Address family is validated to be AF_INET or AF_INET6 earlier. */ const struct sockaddr_in* inaddr = (struct sockaddr_in*) serv_addr; @@ -1402,7 +1379,7 @@ ci_tcp_retrieve_addr(ci_netif* netif, const struct sockaddr* serv_addr, /* Returns: * 0 on success - * + * * CI_SOCKET_ERROR (and errno set) * this is a normal error that is returned to the * the application @@ -1411,7 +1388,7 @@ ci_tcp_retrieve_addr(ci_netif* netif, const struct sockaddr* serv_addr, * to set errno since it isn't a real error */ int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr, - socklen_t addrlen, ci_fd_t fd, int *p_moved) + socklen_t addrlen, ci_fd_t fd, int* p_moved) { ci_sock_cmn* s = ep->s; ci_tcp_state* ts = &SOCK_TO_WAITABLE_OBJ(s)->tcp; @@ -1432,9 +1409,9 @@ int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr, * 1. Check if state of the socket is OK for connect operation. */ - start_again: +start_again: - if( (rc = ci_tcp_connect_handle_so_error(s)) != 0) { + if( (rc = ci_tcp_connect_handle_so_error(s)) != 0 ) { CI_SET_ERROR(rc, rc); goto unlock_out; } @@ -1445,36 +1422,35 @@ int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr, if( s->b.state & CI_TCP_STATE_SYNCHRONISED ) { if( ts->tcpflags & CI_TCPT_FLAG_NONBLOCK_CONNECT ) { ts->tcpflags &= ~CI_TCPT_FLAG_NONBLOCK_CONNECT; - rc = 0; - goto unlock_out; + rc = 0; + goto unlock_out; } if( serv_addr->sa_family == AF_UNSPEC ) - LOG_E(ci_log("Onload does not support TCP disconnect via " + LOG_E( + ci_log("Onload does not support TCP disconnect via " - "connect(addr->sa_family==AF_UNSPEC)")); + "connect(addr->sa_family==AF_UNSPEC)")); CI_SET_ERROR(rc, EISCONN); - } - else if( s->b.state == CI_TCP_LISTEN ) { + } else if( s->b.state == CI_TCP_LISTEN ) { #if CI_CFG_POSIX_CONNECT_AFTER_LISTEN CI_SET_ERROR(rc, EOPNOTSUPP); #else if( ci_tcp_validate_sa(s->domain, serv_addr, addrlen) ) { /* Request should be forwarded to OS */ rc = CI_SOCKET_HANDOVER; - goto unlock_out; + goto unlock_out; } if( serv_addr->sa_family == AF_UNSPEC ) { /* Linux does listen shutdown on disconnect (AF_UNSPEC) */ ci_netif_unlock(ep->netif); rc = ci_tcp_shutdown(ep, SHUT_RD, fd); - goto out; + goto out; } else { /* Linux has curious error reporting in this case */ CI_SET_ERROR(rc, EISCONN); } #endif - } - else { + } else { /* Socket is in SYN-SENT state. Let's block for receiving SYN-ACK */ ci_assert_equal(s->b.state, CI_TCP_SYN_SENT); if( s->b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) @@ -1491,24 +1467,23 @@ int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr, goto unlock_out; } - /* + /* * 2. Check address parameter, if it's inappropriate for handover * decision or handover should be done, try to to call OS and * do handover on success. */ - if ( - /* Af first, check that address family and length is OK. */ - ci_tcp_validate_sa(s->domain, serv_addr, addrlen) - /* Check for NAT. */ - || (dnat = ci_tcp_retrieve_addr(ep->netif, serv_addr, &dst_addr, - &dst_port)) < 0 - /* rfc793 p54 if the foreign socket is unspecified return */ - /* "error: foreign socket unspecified" (EINVAL), but keep it to OS */ - || CI_IPX_ADDR_IS_ANY(dst_addr) - /* Zero destination port is tricky as well, keep it to OS */ - || dst_port == 0 ) - { + if( + /* Af first, check that address family and length is OK. */ + ci_tcp_validate_sa(s->domain, serv_addr, addrlen) + /* Check for NAT. */ + || (dnat = ci_tcp_retrieve_addr( + ep->netif, serv_addr, &dst_addr, &dst_port)) < 0 + /* rfc793 p54 if the foreign socket is unspecified return */ + /* "error: foreign socket unspecified" (EINVAL), but keep it to OS */ + || CI_IPX_ADDR_IS_ANY(dst_addr) + /* Zero destination port is tricky as well, keep it to OS */ + || dst_port == 0 ) { rc = CI_SOCKET_HANDOVER; goto unlock_out; } @@ -1523,7 +1498,8 @@ int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr, /* is this a socket that we can handle? */ rc = ci_tcp_connect_check_dest(ep, dst_addr, dst_port); - if( rc ) goto unlock_out; + if( rc ) + goto unlock_out; #if CI_CFG_ENDPOINT_MOVE if( (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) && @@ -1534,12 +1510,12 @@ int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr, op.dst_addr = dst_addr; /* this operation unlocks netif */ rc = oo_resource_op(fd, OO_IOC_TCP_LOOPBACK_CONNECT, &op); - if( rc < 0) + if( rc < 0 ) return CI_SOCKET_HANDOVER; if( op.out_moved ) *p_moved = 1; if( op.out_rc == -EINPROGRESS ) - RET_WITH_ERRNO( EINPROGRESS ); + RET_WITH_ERRNO(EINPROGRESS); else if( op.out_rc == -EAGAIN ) RET_WITH_ERRNO(EAGAIN); else if( op.out_rc != 0 ) @@ -1554,33 +1530,32 @@ int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr, ts->pre_nat.dport_be16 = ((struct sockaddr_in*) serv_addr)->sin_port; } - crc = ci_tcp_connect_ul_start(ep->netif, ts, fd, dst_addr, dst_port, - &rc); + crc = ci_tcp_connect_ul_start(ep->netif, ts, fd, dst_addr, dst_port, &rc); if( crc != CI_CONNECT_UL_OK ) { switch( crc ) { - case CI_CONNECT_UL_ALIEN_BOUND: - rc = CI_SOCKET_HANDOVER; - ci_fallthrough; - case CI_CONNECT_UL_FAIL: - /* Check non-blocking */ - if( errno == EINPROGRESS ) { - CI_TCP_STATS_INC_ACTIVE_OPENS( ep->netif ); - } - goto unlock_out; - case CI_CONNECT_UL_LOCK_DROPPED: - goto out; - case CI_CONNECT_UL_START_AGAIN: - goto start_again; + case CI_CONNECT_UL_ALIEN_BOUND: + rc = CI_SOCKET_HANDOVER; + ci_fallthrough; + case CI_CONNECT_UL_FAIL: + /* Check non-blocking */ + if( errno == EINPROGRESS ) { + CI_TCP_STATS_INC_ACTIVE_OPENS(ep->netif); + } + goto unlock_out; + case CI_CONNECT_UL_LOCK_DROPPED: + goto out; + case CI_CONNECT_UL_START_AGAIN: + goto start_again; } } - CI_TCP_STATS_INC_ACTIVE_OPENS( ep->netif ); + CI_TCP_STATS_INC_ACTIVE_OPENS(ep->netif); - syn_sent: +syn_sent: rc = ci_tcp_connect_ul_syn_sent(ep->netif, ts); - unlock_out: +unlock_out: ci_netif_unlock(ep->netif); - out: +out: if( rc == CI_SOCKET_HANDOVER && (s->s_flags & CI_SOCK_FLAG_DEFERRED_BIND) ) { int rc1 = complete_deferred_bind(ep->netif, &ts->s, fd); if( rc1 < 0 ) @@ -1590,7 +1565,7 @@ int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr, } #endif -int ci_tcp_listen_init(ci_netif *ni, ci_tcp_socket_listen *tls) +int ci_tcp_listen_init(ci_netif* ni, ci_tcp_socket_listen* tls) { int i; oo_p sp; @@ -1622,8 +1597,7 @@ int ci_tcp_listen_init(ci_netif *ni, ci_tcp_socket_listen *tls) /* Initialize the cache and pending lists for the EP-cache. * See comment at definition for details */ - LOG_EP (log ("Initialise cache and pending list for id %d", - S_FMT(tls))); + LOG_EP(log("Initialise cache and pending list for id %d", S_FMT(tls))); #if CI_CFG_FD_CACHING oo_p_dllink_init(ni, oo_p_dllink_sb(ni, &tls->s.b, &tls->epcache.cache)); @@ -1631,8 +1605,8 @@ int ci_tcp_listen_init(ci_netif *ni, ci_tcp_socket_listen *tls) oo_p_dllink_init(ni, oo_p_dllink_sb(ni, &tls->s.b, &tls->epcache_connected)); oo_p_dllink_init(ni, oo_p_dllink_sb(ni, &tls->s.b, &tls->epcache.fd_states)); - tls->epcache.avail_stack = oo_ptr_to_statep - (ni, &ni->state->passive_cache_avail_stack); + tls->epcache.avail_stack = + oo_ptr_to_statep(ni, &ni->state->passive_cache_avail_stack); tls->cache_avail_sock = ni->state->opts.per_sock_cache_max; #endif @@ -1642,8 +1616,8 @@ int ci_tcp_listen_init(ci_netif *ni, ci_tcp_socket_listen *tls) #ifdef __KERNEL__ -int ci_tcp_connect_lo_samestack(ci_netif *ni, ci_tcp_state *ts, oo_sp tls_id, - int *stack_locked) +int ci_tcp_connect_lo_samestack( + ci_netif* ni, ci_tcp_state* ts, oo_sp tls_id, int* stack_locked) { int crc, rc = 0; @@ -1651,8 +1625,8 @@ int ci_tcp_connect_lo_samestack(ci_netif *ni, ci_tcp_state *ts, oo_sp tls_id, *stack_locked = 1; ts->local_peer = tls_id; - crc = ci_tcp_connect_ul_start(ni, ts, CI_FD_BAD, sock_ipx_raddr(&ts->s), - ts->s.pkt.dport_be16, &rc); + crc = ci_tcp_connect_ul_start( + ni, ts, CI_FD_BAD, sock_ipx_raddr(&ts->s), ts->s.pkt.dport_be16, &rc); /* The connect is really finished, but we should return EINPROGRESS * for non-blocking connect and 0 for normal. */ @@ -1665,13 +1639,13 @@ int ci_tcp_connect_lo_samestack(ci_netif *ni, ci_tcp_state *ts, oo_sp tls_id, /* c_ni is assumed to be locked on enterance and is always unlocked on * exit. */ -int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, - ci_netif *l_ni, oo_sp l_id) +int ci_tcp_connect_lo_toconn( + ci_netif* c_ni, oo_sp c_id, ci_addr_t dst, ci_netif* l_ni, oo_sp l_id) { - ci_tcp_state *ts; + ci_tcp_state* ts; ci_tcp_socket_listen *tls, *alien_tls; - citp_waitable_obj *wo; - citp_waitable *w; + citp_waitable_obj* wo; + citp_waitable* w; int rc; int stack_locked; @@ -1679,12 +1653,11 @@ int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, ci_assert(OO_SP_NOT_NULL(c_id)); ci_assert(OO_SP_NOT_NULL(l_id)); - LOG_TC(log("%s: connect %d:%d to %d:%d", __FUNCTION__, - c_ni->state->stack_id, OO_SP_TO_INT(c_id), - l_ni->state->stack_id, OO_SP_TO_INT(l_id))); + LOG_TC(log("%s: connect %d:%d to %d:%d", __FUNCTION__, c_ni->state->stack_id, + OO_SP_TO_INT(c_id), l_ni->state->stack_id, OO_SP_TO_INT(l_id))); alien_tls = SP_TO_TCP_LISTEN(l_ni, l_id); - if( (int)ci_tcp_acceptq_n(alien_tls) >= alien_tls->acceptq_max ) { + if( (int) ci_tcp_acceptq_n(alien_tls) >= alien_tls->acceptq_max ) { ci_netif_unlock(c_ni); return -EBUSY; } @@ -1736,18 +1709,19 @@ int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, int rc1 = ci_netif_lock(c_ni); if( rc1 != 0 ) { /* we leak the shadow listener and a synrecv state, but so be it */ - ci_log("%s([%d:%d] to [%d:%d]): leaking the shadow listener " - "[%d:%d] rc=%d", - __func__, c_ni->state->stack_id, OO_SP_TO_INT(c_id), - l_ni->state->stack_id, OO_SP_TO_INT(l_id), - c_ni->state->stack_id, tls->s.b.bufid, rc); + ci_log( + "%s([%d:%d] to [%d:%d]): leaking the shadow listener " + "[%d:%d] rc=%d", + __func__, c_ni->state->stack_id, OO_SP_TO_INT(c_id), + l_ni->state->stack_id, OO_SP_TO_INT(l_id), c_ni->state->stack_id, + tls->s.b.bufid, rc); /* rc is usually -ERESTARTSYS, and it does not help user */ return -ENOBUFS; } } /* Accept as from tls */ - if( !ci_tcp_acceptq_not_empty(tls) ) { + if( ! ci_tcp_acceptq_not_empty(tls) ) { /* it is possible, for example, if ci_tcp_listenq_try_promote() failed * because there are no endpoints */ ci_tcp_listenq_drop_all(c_ni, tls); @@ -1757,12 +1731,10 @@ int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, } w = ci_tcp_acceptq_get(c_ni, tls); ci_assert(w); - LOG_TV(ci_log("%s: %d:%d to %d:%d shadow %d:%d accepted %d:%d", - __FUNCTION__, - c_ni->state->stack_id, OO_SP_TO_INT(c_id), - l_ni->state->stack_id, OO_SP_TO_INT(l_id), - c_ni->state->stack_id, tls->s.b.bufid, - c_ni->state->stack_id, w->bufid)); + LOG_TV(ci_log("%s: %d:%d to %d:%d shadow %d:%d accepted %d:%d", __FUNCTION__, + c_ni->state->stack_id, OO_SP_TO_INT(c_id), l_ni->state->stack_id, + OO_SP_TO_INT(l_id), c_ni->state->stack_id, tls->s.b.bufid, + c_ni->state->stack_id, w->bufid)); ci_assert(w->state & CI_TCP_STATE_TCP); ci_assert(w->state != CI_TCP_LISTEN); @@ -1793,8 +1765,7 @@ int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, spin_unlock_irqrestore(&l_ep->lock, lock_flags); if( os_sock_ref != NULL ) fput(os_sock_ref); - } - else { + } else { spin_unlock_irqrestore(&l_ep->lock, lock_flags); goto cleanup; } @@ -1807,15 +1778,15 @@ int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, if( alien_tls->s.b.state != CI_TCP_LISTEN || (alien_tls->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN) || S_IPX_TCP_HDR(&alien_tls->s)->tcp_source_be16 != - TS_IPX_TCP(ts)->tcp_dest_be16 || - (!CI_IPX_ADDR_IS_ANY(alien_tls->s.laddr) && - !CI_IPX_ADDR_EQ(alien_tls->s.laddr, sock_ipx_raddr(&ts->s))) ) { + TS_IPX_TCP(ts)->tcp_dest_be16 || + (! CI_IPX_ADDR_IS_ANY(alien_tls->s.laddr) && + ! CI_IPX_ADDR_EQ(alien_tls->s.laddr, sock_ipx_raddr(&ts->s))) ) { ci_netif_unlock(l_ni); goto cleanup; } - ci_bit_mask_set(&w->sb_aflags, - CI_SB_AFLAG_TCP_IN_ACCEPTQ | CI_SB_AFLAG_ORPHAN); + ci_bit_mask_set( + &w->sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ | CI_SB_AFLAG_ORPHAN); wo = citp_waitable_obj_alloc(l_ni); if( wo == NULL ) { @@ -1827,8 +1798,8 @@ int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, wo->waitable.moved_to_stack_id = c_ni->state->stack_id; wo->waitable.moved_to_sock_id = W_SP(w); LOG_TC(log("%s: put to acceptq %d:%d referencing %d:%d", __func__, - l_ni->state->stack_id, OO_SP_TO_INT(W_SP(&wo->waitable)), - c_ni->state->stack_id, OO_SP_TO_INT(W_SP(w)))); + l_ni->state->stack_id, OO_SP_TO_INT(W_SP(&wo->waitable)), + c_ni->state->stack_id, OO_SP_TO_INT(W_SP(w)))); ci_tcp_acceptq_put(l_ni, alien_tls, &wo->waitable); citp_waitable_wake_not_in_poll(l_ni, &alien_tls->s.b, CI_SB_FLAG_WAKE_RX); @@ -1838,8 +1809,8 @@ int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, cleanup: ci_assert(w->sb_aflags & CI_SB_AFLAG_ORPHAN); - ci_bit_mask_clear(&w->sb_aflags, - CI_SB_AFLAG_TCP_IN_ACCEPTQ | CI_SB_AFLAG_ORPHAN); + ci_bit_mask_clear( + &w->sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ | CI_SB_AFLAG_ORPHAN); efab_tcp_helper_close_endpoint(netif2tcp_helper_resource(c_ni), w->bufid, 0); /* we can not guarantee c_ni lock, so we can' call * ci_tcp_drop(c_ni, ts). So, we return error; UL will handover @@ -1849,7 +1820,6 @@ int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_addr_t dst, #endif - #ifndef __KERNEL__ #if CI_CFG_ENDPOINT_MOVE @@ -1860,12 +1830,10 @@ int ci_tcp_reuseport_bind(ci_sock_cmn* sock, ci_fd_t fd) int rc; ci_assert_nequal(sock->s_flags & CI_SOCK_FLAG_REUSEPORT, 0); - if ( (rc = ci_tcp_ep_reuseport_bind(fd, CITP_OPTS.cluster_name, - CITP_OPTS.cluster_size, - CITP_OPTS.cluster_restart_opt, - CITP_OPTS.cluster_hot_restart_opt, - sock_ipx_laddr(sock), - sock_lport_be16(sock))) != 0 ) { + if( (rc = ci_tcp_ep_reuseport_bind(fd, CITP_OPTS.cluster_name, + CITP_OPTS.cluster_size, CITP_OPTS.cluster_restart_opt, + CITP_OPTS.cluster_hot_restart_opt, sock_ipx_laddr(sock), + sock_lport_be16(sock))) != 0 ) { errno = -rc; return -1; } @@ -1874,10 +1842,10 @@ int ci_tcp_reuseport_bind(ci_sock_cmn* sock, ci_fd_t fd) #endif /* In this bind handler we just check that the address to which - * are binding is either "any" or one of ours. + * are binding is either "any" or one of ours. */ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, - socklen_t addrlen, ci_fd_t fd ) + socklen_t addrlen, ci_fd_t fd) { ci_uint16 new_port; ci_addr_t addr; @@ -1891,34 +1859,34 @@ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, /* Check if state of the socket is OK for bind operation. */ /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used. * What is better? */ - if (my_addr == NULL) - RET_WITH_ERRNO( EINVAL ); + if( my_addr == NULL ) + RET_WITH_ERRNO(EINVAL); - if (s->b.state != CI_TCP_CLOSED) - RET_WITH_ERRNO( EINVAL ); + if( s->b.state != CI_TCP_CLOSED ) + RET_WITH_ERRNO(EINVAL); - if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB) - RET_WITH_ERRNO( EINVAL ); + if( c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB ) + RET_WITH_ERRNO(EINVAL); /* There should be address length check before address family validation to * match Linux errno value set in inet6_bind(). */ - if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133) - RET_WITH_ERRNO( EINVAL ); + if( s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133 ) + RET_WITH_ERRNO(EINVAL); if( my_addr->sa_family != s->domain ) - RET_WITH_ERRNO( EAFNOSUPPORT ); + RET_WITH_ERRNO(EAFNOSUPPORT); /* sin_port and sin6_port share tha same place in the sockaddr */ - new_port = ((struct sockaddr_in*)my_addr)->sin_port; + new_port = ((struct sockaddr_in*) my_addr)->sin_port; - /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) + /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) * Linux is also relaxed about overlength data areas. */ - if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in)) - RET_WITH_ERRNO( EINVAL ); + if( s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in) ) + RET_WITH_ERRNO(EINVAL); #if CI_CFG_FAKE_IPV6 #if ! CI_CFG_IPV6 - if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) ) + if( s->domain == PF_INET6 && ! ci_tcp_ipv6_is_ipv4(my_addr) ) goto handover; #else if( s->domain == PF_INET6 && (s->s_flags & CI_SOCK_FLAG_V6ONLY) && @@ -1927,10 +1895,10 @@ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, #endif #endif - if( ((s->s_flags & CI_SOCK_FLAG_TPROXY) != 0) && - (new_port == 0) ) { - NI_LOG(ep->netif, USAGE_WARNINGS, "Sockets with IP_TRANSPARENT set must " - "be explicitly bound to a port to be accelerated"); + if( ((s->s_flags & CI_SOCK_FLAG_TPROXY) != 0) && (new_port == 0) ) { + NI_LOG(ep->netif, USAGE_WARNINGS, + "Sockets with IP_TRANSPARENT set must " + "be explicitly bound to a port to be accelerated"); goto handover; } @@ -1938,49 +1906,51 @@ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, /* In scalable RSS mode accelerated 127.* sockets cause issues: * * with SO_REUSEPORT they would fail at listen - * * without SO_REUSEPORT they would end up in non-rss stack degrading performance - * with lock contention, epoll3 and accelerated loopback */ + * * without SO_REUSEPORT they would end up in non-rss stack degrading + * performance with lock contention, epoll3 and accelerated loopback */ if( CI_IPX_IS_LOOPBACK(addr) && - NI_OPTS(ep->netif).scalable_filter_enable != CITP_SCALABLE_FILTERS_DISABLE && + NI_OPTS(ep->netif).scalable_filter_enable != + CITP_SCALABLE_FILTERS_DISABLE && ((NI_OPTS(ep->netif).scalable_filter_mode & - (CITP_SCALABLE_MODE_PASSIVE | CITP_SCALABLE_MODE_RSS)) == - (CITP_SCALABLE_MODE_PASSIVE | CITP_SCALABLE_MODE_RSS)) ) + (CITP_SCALABLE_MODE_PASSIVE | CITP_SCALABLE_MODE_RSS)) == + (CITP_SCALABLE_MODE_PASSIVE | CITP_SCALABLE_MODE_RSS)) ) goto handover; - if( ((s->s_flags & CI_SOCK_FLAG_TPROXY) != 0) && - CI_IPX_ADDR_IS_ANY(addr) ) { - NI_LOG(ep->netif, USAGE_WARNINGS, "Sockets with IP_TRANSPARENT set must " - "be explicitly bound to an address to be accelerated"); + if( ((s->s_flags & CI_SOCK_FLAG_TPROXY) != 0) && CI_IPX_ADDR_IS_ANY(addr) ) { + NI_LOG(ep->netif, USAGE_WARNINGS, + "Sockets with IP_TRANSPARENT set must " + "be explicitly bound to an address to be accelerated"); goto handover; } /* Using the port number provided, see if we can do this bind */ if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) { - struct ci_port_list *force_reuseport; + struct ci_port_list* force_reuseport; CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link, - (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) { + (ci_dllist*) (ci_uintptr_t) CITP_OPTS.tcp_reuseports) + { if( force_reuseport->port == new_port ) { int one = 1; if( ep->s->b.sb_aflags & CI_SB_AFLAG_OS_BACKED ) { ci_fd_t os_sock = ci_get_os_sock_fd(fd); ci_assert(CI_IS_VALID_SOCKET(os_sock)); - rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one, - sizeof(one)); + rc = ci_sys_setsockopt( + os_sock, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); ci_rel_os_sock_fd(os_sock); /* Fixme: shouldn't we handle errors? */ - } - else if( (s->s_flags & CI_SOCK_FLAG_TPROXY) == 0 ) { + } else if( (s->s_flags & CI_SOCK_FLAG_TPROXY) == 0 ) { rc = ci_tcp_helper_os_sock_create_and_set(ep->netif, fd, s, - SOL_SOCKET, SO_REUSEPORT, - (char*)&one, sizeof(one)); + SOL_SOCKET, SO_REUSEPORT, (char*) &one, sizeof(one)); } if( rc != 0 ) { log("%s: failed to set SO_REUSEPORT on OS socket: " - "rc=%d errno=%d", __func__, rc, errno); + "rc=%d errno=%d", + __func__, rc, errno); } ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT; - LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u", - __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port)); + LOG_TC( + log("%s " SF_FMT ", applied legacy SO_REUSEPORT flag for port %u", + __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port)); } } } @@ -1991,9 +1961,8 @@ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, goto handover; #endif - CI_LOGLEVEL_TRY_RET(LOG_TV, - __ci_tcp_bind(ep->netif, ep->s, fd, addr, - &new_port, 1)); + CI_LOGLEVEL_TRY_RET( + LOG_TV, __ci_tcp_bind(ep->netif, ep->s, fd, addr, &new_port, 1)); ci_tcp_bind_check_laddr(ep->netif, ep->s, addr); ep->s->s_flags |= CI_SOCK_FLAG_BOUND; #if CI_CFG_IPV6 @@ -2002,16 +1971,16 @@ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, ci_sock_cmn_set_laddr(ep->s, addr, new_port); ci_sock_set_raddr_port(s, addr_any, 0); - LOG_TC(log(LPF "bind to "IPX_FMT":%u n_p:%u lp:%u", IPX_ARG(AF_IP(addr)), - CI_BSWAP_BE16(((struct sockaddr_in*)my_addr)->sin_port), - CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); + LOG_TC(log(LPF "bind to " IPX_FMT ":%u n_p:%u lp:%u", IPX_ARG(AF_IP(addr)), + CI_BSWAP_BE16(((struct sockaddr_in*) my_addr)->sin_port), + CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); return 0; - handover: - if( !(ep->s->b.sb_aflags & CI_SB_AFLAG_OS_BACKED) ) { - rc = ci_tcp_helper_os_sock_create_and_set(ep->netif, fd, s, -1, 0, NULL, - 0); +handover: + if( ! (ep->s->b.sb_aflags & CI_SB_AFLAG_OS_BACKED) ) { + rc = + ci_tcp_helper_os_sock_create_and_set(ep->netif, fd, s, -1, 0, NULL, 0); if( rc < 0 ) RET_WITH_ERRNO(errno); } @@ -2028,7 +1997,7 @@ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) { - /* + /* ** ?? error handling on possible fails not handled robustly... ** ?? Need to check port number is valid TODO */ @@ -2045,8 +2014,8 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) int scalable; int will_accelerate = 1; - LOG_TC(log("%s "SK_FMT" listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), - backlog)); + LOG_TC(log("%s " SK_FMT " listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), + backlog)); CHECK_TEP(ep); scalable = ci_tcp_use_mac_filter_listen(netif, s, s->cp.so_bindtodevice); @@ -2054,22 +2023,27 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) if( s->s_flags & CI_SOCK_FLAG_DEFERRED_BIND ) complete_deferred_bind(netif, s, fd); - if( NI_OPTS(netif).tcp_listen_handover ) + if( NI_OPTS(netif).tcp_listen_handover ) { return CI_SOCKET_HANDOVER; + } /* We should handover if the socket is bound to alien address. */ if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) { + ci_log("I am alien"); /* We MUST NOT install filters for a loopback address */ - will_accelerate = (netif->state->flags & CI_NETIF_FLAG_USE_ALIEN_LADDRS || - scalable) && !CI_IPX_IS_LOOPBACK(s->laddr); + will_accelerate = + (netif->state->flags & CI_NETIF_FLAG_USE_ALIEN_LADDRS || scalable) && + ! CI_IPX_IS_LOOPBACK(s->laddr); } if( #if CI_CFG_ENDPOINT_MOVE - !NI_OPTS(netif).tcp_server_loopback && + ! NI_OPTS(netif).tcp_server_loopback && #endif - ! will_accelerate ) + ! will_accelerate ) { + ci_log("I am handing over"); return CI_SOCKET_HANDOVER; + } if( ul_backlog < 0 ) ul_backlog = NI_OPTS(netif).max_ep_bufs; @@ -2081,7 +2055,8 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) ci_netif_lock(ep->netif); tls->acceptq_max = ul_backlog; if( (s->s_flags & CI_SOCK_FLAG_SCALPASSIVE) == 0 || - NI_OPTS(netif).scalable_listen != CITP_SCALABLE_LISTEN_ACCELERATED_ONLY ) + NI_OPTS(netif).scalable_listen != + CITP_SCALABLE_LISTEN_ACCELERATED_ONLY ) ci_tcp_helper_listen_os_sock(fd, ul_backlog); ci_netif_unlock(ep->netif); return 0; @@ -2096,7 +2071,7 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) /* Bug 3376: if socket used for a previous, failed, connect then the error * numbers will not be as expected. Only seen when not using listening - * netifs (as moving the EP to the new netif resets them). + * netifs (as moving the EP to the new netif resets them). */ ts->s.tx_errno = EPIPE; @@ -2108,7 +2083,7 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) if( ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ) { ci_uint16 source_be16; - /* They haven't previously done a bind, so we need to choose + /* They haven't previously done a bind, so we need to choose * a port. As we haven't been given a hint we let the OS choose. * * NB We don't need to call ci_tcp_bind_check_laddr() here, @@ -2117,21 +2092,21 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) source_be16 = 0; rc = __ci_tcp_bind(ep->netif, ep->s, fd, ts->s.laddr, &source_be16, 0); - if (CI_LIKELY( rc==0 )) { + if( CI_LIKELY(rc == 0) ) { TS_IPX_TCP(ts)->tcp_source_be16 = source_be16; ts->s.cp.lport_be16 = source_be16; - LOG_TC(log(LNT_FMT "listen: our bind returned "IPX_FMT":%u", - LNT_PRI_ARGS(ep->netif, ts), IPX_ARG(AF_IP(ts->s.laddr)), - (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_source_be16))); + LOG_TC(log(LNT_FMT "listen: our bind returned " IPX_FMT ":%u", + LNT_PRI_ARGS(ep->netif, ts), IPX_ARG(AF_IP(ts->s.laddr)), + (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_source_be16))); } else { LOG_U(ci_log("__ci_tcp_bind returned %d at %s:%d", CI_GET_ERROR(rc), - __FILE__, __LINE__)); + __FILE__, __LINE__)); ci_netif_unlock(ep->netif); ci_sock_unlock(netif, &ts->s.b); return rc; } - } + } ci_tcp_set_slow_state(netif, ts, CI_TCP_LISTEN); tls = SOCK_TO_TCP_LISTEN(&ts->s); @@ -2159,10 +2134,10 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) CITP_STATS_TCP_LISTEN(CI_ZERO(&tls->stats)); - /* install all the filters needed for this connection + /* install all the filters needed for this connection * - tcp_laddr_be32(ts) = 0 for IPADDR_ANY * - * TODO: handle BINDTODEVICE by setting phys_port paramter to correct + * TODO: handle BINDTODEVICE by setting phys_port paramter to correct * physical L5 port index * TODO: handle REUSEADDR by setting last paramter to TRUE */ @@ -2170,8 +2145,8 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) if( scalable ) tls->s.s_flags |= CI_SOCK_FLAG_SCALPASSIVE; - rc = ci_tcp_ep_set_filters(netif, S_SP(tls), tls->s.cp.so_bindtodevice, - OO_SP_NULL); + rc = ci_tcp_ep_set_filters( + netif, S_SP(tls), tls->s.cp.so_bindtodevice, OO_SP_NULL); if( rc == -EFILTERSSOME ) { if( CITP_OPTS.no_fail ) rc = 0; @@ -2182,19 +2157,18 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) } ci_assert_nequal(rc, -EFILTERSSOME); VERB(ci_log("%s: set_filters returned %d", __FUNCTION__, rc)); - if (rc < 0) { + if( rc < 0 ) { if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN #if CI_CFG_ENDPOINT_MOVE && NI_OPTS(netif).tcp_server_loopback #endif - ) { + ) { /* That alien address can't be served by filters despite * CI_NETIF_FLAG_USE_ALIEN_LADDRS. We'll accelerate loopback in * any case. */ rc = 0; - } - else { + } else { CI_SET_ERROR(rc, -rc); goto post_listen_fail; } @@ -2203,22 +2177,23 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) ci_assert_equal(rc, 0); - /* + /* * Call of system listen() is required for listen any, local host * communications server and multi-homed server (to accept connections * to L5 assigned address(es), but incoming from other interfaces). * The exception is scalable passive mode where we avoid listen on * OS socket to avoid kernel LHTABLE related performance degradation. */ if( (s->s_flags & CI_SOCK_FLAG_SCALPASSIVE) == 0 || - NI_OPTS(netif).scalable_listen != CITP_SCALABLE_LISTEN_ACCELERATED_ONLY ) { + NI_OPTS(netif).scalable_listen != + CITP_SCALABLE_LISTEN_ACCELERATED_ONLY ) { #ifdef __ci_driver__ - rc = efab_tcp_helper_listen_os_sock( netif2tcp_helper_resource(netif), - S_SP(tls), backlog); + rc = efab_tcp_helper_listen_os_sock( + netif2tcp_helper_resource(netif), S_SP(tls), backlog); #else rc = ci_tcp_helper_listen_os_sock(fd, backlog); #endif } - if ( rc < 0 ) { + if( rc < 0 ) { /* clear the filter we've just set */ ci_tcp_ep_clear_filters(netif, S_SP(tls), 0); goto post_listen_fail; @@ -2226,9 +2201,9 @@ int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) ci_netif_unlock(ep->netif); return 0; - post_listen_fail: +post_listen_fail: ci_tcp_listenq_drop_all(netif, tls); - listen_fail: +listen_fail: /* revert TCP state to a non-listening socket format */ __ci_tcp_listen_to_normal(netif, tls); /* Above function sets orphan flag but we are attached to an FD. */ @@ -2251,7 +2226,7 @@ static int ci_tcp_shutdown_listen(citp_socket* ep, int how, ci_fd_t fd) ci_sock_lock(ep->netif, &tls->s.b); ci_netif_lock(ep->netif); - LOG_TC(ci_log(SK_FMT" shutdown(SHUT_RD)", SK_PRI_ARGS(ep))); + LOG_TC(ci_log(SK_FMT " shutdown(SHUT_RD)", SK_PRI_ARGS(ep))); __ci_tcp_listen_shutdown(ep->netif, tls); __ci_tcp_listen_to_normal(ep->netif, tls); { @@ -2261,8 +2236,8 @@ static int ci_tcp_shutdown_listen(citp_socket* ep, int how, ci_fd_t fd) CI_TRY(ci_sys_fcntl(os_sock, F_SETFL, flags)); ci_rel_os_sock_fd(os_sock); } - citp_waitable_wake_not_in_poll(ep->netif, &tls->s.b, - CI_SB_FLAG_WAKE_RX | CI_SB_FLAG_WAKE_TX); + citp_waitable_wake_not_in_poll( + ep->netif, &tls->s.b, CI_SB_FLAG_WAKE_RX | CI_SB_FLAG_WAKE_TX); ci_netif_unlock(ep->netif); ci_sock_unlock(ep->netif, &tls->s.b); return 0; @@ -2288,10 +2263,10 @@ int ci_tcp_shutdown(citp_socket* ep, int how, ci_fd_t fd) /* Can't get lock, so try to defer shutdown to the lock holder. */ unsigned flags = 0; switch( s->b.state ) { - case CI_TCP_CLOSED: - case CI_TCP_TIME_WAIT: - CI_SET_ERROR(rc, ENOTCONN); - return rc; + case CI_TCP_CLOSED: + case CI_TCP_TIME_WAIT: + CI_SET_ERROR(rc, ENOTCONN); + return rc; } if( how == SHUT_RD || how == SHUT_RDWR ) flags |= CI_SOCK_AFLAG_NEED_SHUT_RD; @@ -2311,20 +2286,20 @@ int ci_tcp_shutdown(citp_socket* ep, int how, ci_fd_t fd) } -void ci_tcp_get_peer_addr(ci_tcp_state* ts, struct sockaddr* name, - socklen_t* namelen) +void ci_tcp_get_peer_addr( + ci_tcp_state* ts, struct sockaddr* name, socklen_t* namelen) { int af = ipcache_af(&ts->s.pkt); int /*bool*/ dnat = ts->s.s_flags & CI_SOCK_FLAG_DNAT; ci_addr_t raddr = dnat ? ts->pre_nat.daddr_be32 : tcp_ipx_raddr(ts); - ci_uint16 port = dnat ? ts->pre_nat.dport_be16 : - TS_IPX_TCP(ts)->tcp_dest_be16; + ci_uint16 port = + dnat ? ts->pre_nat.dport_be16 : TS_IPX_TCP(ts)->tcp_dest_be16; ci_addr_to_user(name, namelen, af, ts->s.domain, port, - CI_IPX_ADDR_PTR(af, raddr), ts->s.cp.so_bindtodevice); + CI_IPX_ADDR_PTR(af, raddr), ts->s.cp.so_bindtodevice); } -int ci_tcp_getpeername(citp_socket* ep, struct sockaddr* name, - socklen_t* namelen) +int ci_tcp_getpeername( + citp_socket* ep, struct sockaddr* name, socklen_t* namelen) { ci_sock_cmn* s = ep->s; int rc; @@ -2347,8 +2322,9 @@ int ci_tcp_getpeername(citp_socket* ep, struct sockaddr* name, } -int ci_tcp_getsockname(citp_socket* ep, ci_fd_t fd, struct sockaddr* sa, - socklen_t* p_sa_len) { +int ci_tcp_getsockname( + citp_socket* ep, ci_fd_t fd, struct sockaddr* sa, socklen_t* p_sa_len) +{ ci_sock_cmn* s = ep->s; int rc = 0; @@ -2365,4 +2341,3 @@ int ci_tcp_getsockname(citp_socket* ep, ci_fd_t fd, struct sockaddr* sa, #endif #endif - diff --git a/src/lib/transport/ip/tcp_helper.c b/src/lib/transport/ip/tcp_helper.c index 0213c7cba..7e49469d7 100644 --- a/src/lib/transport/ip/tcp_helper.c +++ b/src/lib/transport/ip/tcp_helper.c @@ -9,8 +9,8 @@ ** \**************************************************************************/ -/* - * stg 2006/11/06 : Moved to transport/ip from transport/ciul +/* + * stg 2006/11/06 : Moved to transport/ip from transport/ciul */ #include "ip_internal.h" @@ -29,21 +29,21 @@ int ci_tcp_helper_more_bufs(ci_netif* ni) { - return oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_TCP_MORE_BUFS, NULL); + return oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_TCP_MORE_BUFS, NULL); } int ci_tcp_helper_more_socks(ci_netif* ni) { - return oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_TCP_MORE_SOCKS, NULL); + return oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_TCP_MORE_SOCKS, NULL); } #if CI_CFG_FD_CACHING int ci_tcp_helper_clear_epcache(ci_netif* ni) { - return oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_TCP_CLEAR_EPCACHE, NULL); + return oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_TCP_CLEAR_EPCACHE, NULL); } #endif @@ -64,24 +64,22 @@ int ci_tcp_helper_clear_epcache(ci_netif* ni) * \return standard error codes * *--------------------------------------------------------------------*/ -int ci_tcp_helper_ep_set_filters(ci_fd_t fd, - oo_sp ep, - ci_ifid_t bindto_ifindex, - oo_sp from_tcp_id) +int ci_tcp_helper_ep_set_filters( + ci_fd_t fd, oo_sp ep, ci_ifid_t bindto_ifindex, oo_sp from_tcp_id) { oo_tcp_filter_set_t op; int rc; - op.tcp_id = ep; + op.tcp_id = ep; op.bindto_ifindex = bindto_ifindex; - op.from_tcp_id = from_tcp_id; + op.from_tcp_id = from_tcp_id; VERB(ci_log("%s: id=%d", __FUNCTION__, ep)); rc = oo_resource_op(fd, OO_IOC_EP_FILTER_SET, &op); if( rc < 0 ) - LOG_SV(ci_log("%s: failed for %d (rc=%d)", __FUNCTION__, - OO_SP_FMT(ep), rc)); + LOG_SV( + ci_log("%s: failed for %d (rc=%d)", __FUNCTION__, OO_SP_FMT(ep), rc)); return rc; } @@ -92,13 +90,9 @@ int ci_tcp_helper_ep_set_filters(ci_fd_t fd, * TODO * *--------------------------------------------------------------------*/ -int ci_tcp_helper_ep_reuseport_bind(ci_fd_t fd, - const char* cluster_name, - ci_int32 cluster_size, - ci_uint32 cluster_restart_opt, - ci_uint32 cluster_hot_restart_opt, - ci_addr_t addr, - ci_uint16 port_be16) +int ci_tcp_helper_ep_reuseport_bind(ci_fd_t fd, const char* cluster_name, + ci_int32 cluster_size, ci_uint32 cluster_restart_opt, + ci_uint32 cluster_hot_restart_opt, ci_addr_t addr, ci_uint16 port_be16) { oo_tcp_reuseport_bind_t op; int rc; @@ -107,8 +101,7 @@ int ci_tcp_helper_ep_reuseport_bind(ci_fd_t fd, op.cluster_name[CI_CFG_CLUSTER_NAME_LEN] = '\0'; op.cluster_size = cluster_size; op.cluster_restart_opt = cluster_restart_opt; - op.cluster_hot_restart_opt = cluster_hot_restart_opt, - op.addr = addr; + op.cluster_hot_restart_opt = cluster_hot_restart_opt, op.addr = addr; op.port_be16 = port_be16; VERB(ci_log("%s: id=%d", __FUNCTION__, fd)); rc = oo_resource_op(fd, OO_IOC_EP_REUSEPORT_BIND, &op); @@ -151,15 +144,15 @@ int ci_tcp_helper_ep_clear_filters(ci_fd_t fd, oo_sp ep, int need_update) oo_tcp_filter_clear_t op; int rc; - op.tcp_id = ep; - op.need_update = !!need_update; + op.tcp_id = ep; + op.need_update = ! ! need_update; VERB(ci_log("%s: id=%d", __FUNCTION__, ep)); rc = oo_resource_op(fd, OO_IOC_EP_FILTER_CLEAR, &op); if( rc < 0 ) - LOG_SV(ci_log("%s: failed for %d (rc=%d)", __FUNCTION__, - OO_SP_FMT(ep), rc)); + LOG_SV( + ci_log("%s: failed for %d (rc=%d)", __FUNCTION__, OO_SP_FMT(ep), rc)); return rc; } @@ -189,7 +182,7 @@ int ci_tcp_helper_ep_filter_dump(void* opaque, void* buf, int buf_len) /*-------------------------------------------------------------------- *! * Adds or deletes multicast address to/from socket list. - * + * * \param fd File descriptor of tcp_helper * \param ep TCP control block id * \param phys_port L5 physcial port index to support SO_BINDTODEVICE @@ -200,37 +193,30 @@ int ci_tcp_helper_ep_filter_dump(void* opaque, void* buf, int buf_len) * \return standard error codes * *--------------------------------------------------------------------*/ -int ci_tcp_helper_ep_mcast_add_del(ci_fd_t fd, - oo_sp ep, - ci_uint32 mcast_addr, - ci_ifid_t ifindex, - int add) +int ci_tcp_helper_ep_mcast_add_del( + ci_fd_t fd, oo_sp ep, ci_uint32 mcast_addr, ci_ifid_t ifindex, int add) { oo_tcp_filter_mcast_t op; int rc; - op.tcp_id = ep; - op.ifindex = ifindex; - op.addr = mcast_addr; + op.tcp_id = ep; + op.ifindex = ifindex; + op.addr = mcast_addr; - VERB(ci_log("%s: id=%d %s", __FUNCTION__, OO_SP_FMT(ep), - add ? "add" : "del")); - rc = oo_resource_op(fd, - add ? OO_IOC_EP_FILTER_MCAST_ADD : - OO_IOC_EP_FILTER_MCAST_DEL, - &op); + VERB(ci_log( + "%s: id=%d %s", __FUNCTION__, OO_SP_FMT(ep), add ? "add" : "del")); + rc = oo_resource_op( + fd, add ? OO_IOC_EP_FILTER_MCAST_ADD : OO_IOC_EP_FILTER_MCAST_DEL, &op); if( rc < 0 ) - LOG_SV(ci_log("%s: %s failed for %d (rc=%d)", - __FUNCTION__, add ? "add" : "del", OO_SP_FMT(ep), rc)); + LOG_SV(ci_log("%s: %s failed for %d (rc=%d)", __FUNCTION__, + add ? "add" : "del", OO_SP_FMT(ep), rc)); return rc; } int __ci_tcp_helper_stack_attach(ci_fd_t from_fd, - efrm_nic_set_t *out_ptr_nic_set, - ci_uint32 *out_map_size, - bool is_service) + efrm_nic_set_t* out_ptr_nic_set, ci_uint32* out_map_size, bool is_service) { int rc; oo_stack_attach_t op; @@ -246,16 +232,15 @@ int __ci_tcp_helper_stack_attach(ci_fd_t from_fd, return op.fd; } -int ci_tcp_helper_stack_attach(ci_fd_t from_fd, - efrm_nic_set_t *out_ptr_nic_set, - ci_uint32 *out_map_size) +int ci_tcp_helper_stack_attach( + ci_fd_t from_fd, efrm_nic_set_t* out_ptr_nic_set, ci_uint32* out_map_size) { - return __ci_tcp_helper_stack_attach(from_fd, out_ptr_nic_set, - out_map_size, false); + return __ci_tcp_helper_stack_attach( + from_fd, out_ptr_nic_set, out_map_size, false); } -int ci_tcp_helper_sock_attach(ci_fd_t stack_fd, oo_sp ep_id, - int domain, int type) +int ci_tcp_helper_sock_attach( + ci_fd_t stack_fd, oo_sp ep_id, int domain, int type) { int rc; oo_sock_attach_t op; @@ -265,7 +250,7 @@ int ci_tcp_helper_sock_attach(ci_fd_t stack_fd, oo_sp ep_id, op.domain = domain; oo_rwlock_lock_read(&citp_dup2_lock); rc = oo_resource_op(stack_fd, OO_IOC_SOCK_ATTACH, &op); - oo_rwlock_unlock_read (&citp_dup2_lock); + oo_rwlock_unlock_read(&citp_dup2_lock); if( rc < 0 ) return rc; return op.fd; @@ -280,7 +265,7 @@ int ci_tcp_helper_sock_attach_to_existing_file(ci_fd_t stack_fd, oo_sp ep_id) op.ep_id = ep_id; oo_rwlock_lock_read(&citp_dup2_lock); rc = oo_resource_op(stack_fd, OO_IOC_SOCK_ATTACH_TO_EXISTING, &op); - oo_rwlock_unlock_read (&citp_dup2_lock); + oo_rwlock_unlock_read(&citp_dup2_lock); if( rc < 0 ) return rc; return op.fd; @@ -289,8 +274,8 @@ int ci_tcp_helper_sock_attach_to_existing_file(ci_fd_t stack_fd, oo_sp ep_id) #endif -int ci_tcp_helper_tcp_accept_sock_attach(ci_fd_t stack_fd, oo_sp ep_id, - int type) +int ci_tcp_helper_tcp_accept_sock_attach( + ci_fd_t stack_fd, oo_sp ep_id, int type) { int rc; oo_tcp_accept_sock_attach_t op; @@ -299,14 +284,14 @@ int ci_tcp_helper_tcp_accept_sock_attach(ci_fd_t stack_fd, oo_sp ep_id, op.type = type; oo_rwlock_lock_read(&citp_dup2_lock); rc = oo_resource_op(stack_fd, OO_IOC_TCP_ACCEPT_SOCK_ATTACH, &op); - oo_rwlock_unlock_read (&citp_dup2_lock); + oo_rwlock_unlock_read(&citp_dup2_lock); if( rc < 0 ) return rc; return op.fd; } -int ci_tcp_helper_pipe_attach(ci_fd_t stack_fd, oo_sp ep_id, - int flags, int fds[2]) +int ci_tcp_helper_pipe_attach( + ci_fd_t stack_fd, oo_sp ep_id, int flags, int fds[2]) { int rc; oo_pipe_attach_t op; @@ -336,7 +321,7 @@ ci_fd_t ci_tcp_helper_get_sock_fd(ci_fd_t fd) rc = oo_resource_op(fd, OO_IOC_OS_SOCK_FD_GET, &op); if( rc == 0 ) return op.fd_out; - oo_rwlock_unlock_read (&citp_dup2_lock); + oo_rwlock_unlock_read(&citp_dup2_lock); return (ci_fd_t) rc; /*! \TODO FIXME: remove cast */ } @@ -344,14 +329,14 @@ ci_fd_t ci_tcp_helper_get_sock_fd(ci_fd_t fd) int ci_tcp_helper_rel_sock_fd(ci_fd_t fd) { int rc = 0; - rc = ci_sys_close (fd); - oo_rwlock_unlock_read (&citp_dup2_lock); + rc = ci_sys_close(fd); + oo_rwlock_unlock_read(&citp_dup2_lock); return rc; } int ci_tcp_helper_bind_os_sock(ci_fd_t fd, const struct sockaddr* address, - size_t addrlen, ci_uint16* out_port) + size_t addrlen, ci_uint16* out_port) { int rc; oo_tcp_bind_os_sock_t op; @@ -361,15 +346,16 @@ int ci_tcp_helper_bind_os_sock(ci_fd_t fd, const struct sockaddr* address, rc = oo_resource_op(fd, OO_IOC_TCP_BIND_OS_SOCK, &op); - if (rc < 0) { + if( rc < 0 ) { errno = -rc; return -1; } ci_assert(rc == 0); /* Bug 646: only write back source port if bind succeeds! */ - if (out_port) + if( out_port ) *out_port = op.addrlen; + return rc; } @@ -379,17 +365,17 @@ int ci_tcp_helper_listen_os_sock(ci_fd_t fd, int backlog) int rc; rc = oo_resource_op(fd, OO_IOC_TCP_LISTEN_OS_SOCK, &backlog); - if (rc < 0) { + if( rc < 0 ) { errno = -rc; return -1; } - ci_assert (rc == 0); + ci_assert(rc == 0); return rc; } -int ci_tcp_helper_endpoint_shutdown(ci_netif *ni, oo_sp sock_id, - int how, ci_uint32 old_state) +int ci_tcp_helper_endpoint_shutdown( + ci_netif* ni, oo_sp sock_id, int how, ci_uint32 old_state) { oo_tcp_endpoint_shutdown_t op; int rc; @@ -397,21 +383,21 @@ int ci_tcp_helper_endpoint_shutdown(ci_netif *ni, oo_sp sock_id, op.sock_id = sock_id; op.how = how; op.old_state = old_state; - rc = oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_TCP_ENDPOINT_SHUTDOWN, &op); - if (rc < 0) { + rc = oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_TCP_ENDPOINT_SHUTDOWN, &op); + if( rc < 0 ) { errno = -rc; return -1; } - ci_assert (rc == 0); + ci_assert(rc == 0); return rc; } -int ci_tcp_helper_set_tcp_close_os_sock(ci_netif *ni, oo_sp sock_id) +int ci_tcp_helper_set_tcp_close_os_sock(ci_netif* ni, oo_sp sock_id) { - return oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_TCP_CLOSE_OS_SOCK, &sock_id); + return oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_TCP_CLOSE_OS_SOCK, &sock_id); } @@ -419,10 +405,8 @@ int ci_tcp_helper_set_tcp_close_os_sock(ci_netif *ni, oo_sp sock_id) * option is passed with level >= 0 then that option is synced to the OS * socket after creation. */ -int ci_tcp_helper_os_sock_create_and_set(ci_netif *ni, ci_fd_t fd, - ci_sock_cmn *s, int level, - int optname, const void* optval, - int optlen) +int ci_tcp_helper_os_sock_create_and_set(ci_netif* ni, ci_fd_t fd, + ci_sock_cmn* s, int level, int optname, const void* optval, int optlen) { int rc; oo_tcp_create_set_t op; @@ -435,7 +419,8 @@ int ci_tcp_helper_os_sock_create_and_set(ci_netif *ni, ci_fd_t fd, * we need to have created the OS socket (if needed) before installing a * filter. */ - ci_assert_nflags(s->s_flags, CI_SOCK_FLAG_FILTER | CI_SOCK_FLAG_STACK_FILTER); + ci_assert_nflags( + s->s_flags, CI_SOCK_FLAG_FILTER | CI_SOCK_FLAG_STACK_FILTER); /* This must be called before we turn into a listening socket. If F_SETFL * is used after a socket enters the listening state onload filters the * request to ensure that the OS socket remains non-blocking. @@ -457,13 +442,13 @@ int ci_tcp_helper_os_sock_create_and_set(ci_netif *ni, ci_fd_t fd, #if CI_CFG_TCP_SHARED_LOCAL_PORTS -int ci_tcp_helper_alloc_active_wild(ci_netif *ni, ci_addr_t laddr) +int ci_tcp_helper_alloc_active_wild(ci_netif* ni, ci_addr_t laddr) { oo_alloc_active_wild_t aaw = { .laddr = laddr, }; - return oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_ALLOC_ACTIVE_WILD, &aaw); + return oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_ALLOC_ACTIVE_WILD, &aaw); } #endif @@ -474,22 +459,22 @@ int ci_netif_evq_poll_k(ci_netif* ni, int _n) ci_uint32 intf_i = _n; CITP_STATS_NETIF_INC(ni, ioctl_evq_polls); - return oo_resource_op(ci_netif_get_driver_handle(ni), OO_IOC_EVQ_POLL, - &intf_i); + return oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_EVQ_POLL, &intf_i); } #endif -int ci_tcp_helper_zc_register_buffers(ci_netif* ni, void* base, int num_pages, - uint64_t* hw_addrs, uint64_t* id) +int ci_tcp_helper_zc_register_buffers( + ci_netif* ni, void* base, int num_pages, uint64_t* hw_addrs, uint64_t* id) { oo_zc_register_buffers_t arg = { - .base_ptr = (uintptr_t)base, + .base_ptr = (uintptr_t) base, .num_pages = num_pages, - .hw_addrs_ptr = (uintptr_t)hw_addrs, + .hw_addrs_ptr = (uintptr_t) hw_addrs, }; - int rc = oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_ZC_REGISTER_BUFFERS, &arg); + int rc = oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_ZC_REGISTER_BUFFERS, &arg); if( rc < 0 ) return rc; *id = arg.id; @@ -498,6 +483,6 @@ int ci_tcp_helper_zc_register_buffers(ci_netif* ni, void* base, int num_pages, int ci_tcp_helper_zc_unregister_buffers(ci_netif* ni, uint64_t id) { - return oo_resource_op(ci_netif_get_driver_handle(ni), - OO_IOC_ZC_UNREGISTER_BUFFERS, &id); + return oo_resource_op( + ci_netif_get_driver_handle(ni), OO_IOC_ZC_UNREGISTER_BUFFERS, &id); } diff --git a/src/lib/transport/ip/tcp_recv.c b/src/lib/transport/ip/tcp_recv.c index 2f549a4be..dba27ad9e 100644 --- a/src/lib/transport/ip/tcp_recv.c +++ b/src/lib/transport/ip/tcp_recv.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr ** \brief TCP recvmsg() etc. @@ -29,24 +29,24 @@ struct tcp_recv_info; typedef int (*pkt_copy_t)(ci_netif* netif, struct tcp_recv_info* rinf, - ci_ip_pkt_fmt* pkt, int peek_off, int* rc); + ci_ip_pkt_fmt* pkt, int peek_off, int* rc); struct tcp_recv_info { - int rc; - int stack_locked; - ci_iovec_ptr piov; - const ci_tcp_recvmsg_args* a; - pkt_copy_t copier; - int msg_flags; + int rc; + int stack_locked; + ci_iovec_ptr piov; + const ci_tcp_recvmsg_args* a; + pkt_copy_t copier; + int msg_flags; struct onload_zc_recv_args* zc_args; - size_t controllen; + size_t controllen; }; #ifndef __KERNEL__ -static int ci_tcp_recvmsg_urg(struct tcp_recv_info *rinf); +static int ci_tcp_recvmsg_urg(struct tcp_recv_info* rinf); #endif -static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf); +static int ci_tcp_recvmsg_recv2(struct tcp_recv_info* rinf); static bool iovec_roll_over(ci_iovec_ptr* piov) @@ -64,9 +64,8 @@ static bool iovec_roll_over(ci_iovec_ptr* piov) /* * \todo It looks like it's common with getpeername(). */ -ci_inline void -ci_tcp_recv_fill_msgname(ci_tcp_state* ts, struct sockaddr *name, - socklen_t *namelen) +ci_inline void ci_tcp_recv_fill_msgname( + ci_tcp_state* ts, struct sockaddr* name, socklen_t* namelen) { #if CI_CFG_TCP_RECVMSG_MSGNAME if( name ) { @@ -77,21 +76,20 @@ ci_tcp_recv_fill_msgname(ci_tcp_state* ts, struct sockaddr *name, ci_assert(namelen); if( CI_LIKELY(*namelen >= sizeof(struct sockaddr_in)) ) { - sinp = (struct sockaddr_in *)name; - sinp->sin_family = AF_INET; - sinp->sin_port = TS_IPX_TCP(ts)->tcp_dest_be16; + sinp = (struct sockaddr_in*) name; + sinp->sin_family = AF_INET; + sinp->sin_port = TS_IPX_TCP(ts)->tcp_dest_be16; sinp->sin_addr.s_addr = ts->s.pkt.ip.ip_daddr_be32; - *namelen = sizeof(struct sockaddr_in); - } - else { - sin_buf.sin_family = AF_INET; - sin_buf.sin_port = TS_IPX_TCP(ts)->tcp_dest_be16; + *namelen = sizeof(struct sockaddr_in); + } else { + sin_buf.sin_family = AF_INET; + sin_buf.sin_port = TS_IPX_TCP(ts)->tcp_dest_be16; sin_buf.sin_addr.s_addr = ts->s.pkt.ip.ip_daddr_be32; memcpy(name, &sin_buf, *namelen); } } #else - *namelen = 0; + *namelen = 0; #endif } @@ -110,14 +108,15 @@ static void ci_tcp_recvmsg_send_wnd_update(ci_netif* ni, ci_tcp_state* ts) CHECK_TS(ni, ts); - LOG_TR(log(LNTS_FMT "ack_trigger=%x c/w rcv_delivered=%x " - "rcv_added=%u buff=%u wnd_rhs=%x current=%u", - LNTS_PRI_ARGS(ni, ts), ts->ack_trigger, ts->rcv_delivered, - ts->rcv_added, ts->rcv_window_max, - tcp_rcv_wnd_right_edge_sent(ts), - tcp_rcv_wnd_current(ts))); + LOG_TR( + log(LNTS_FMT "ack_trigger=%x c/w rcv_delivered=%x " + "rcv_added=%u buff=%u wnd_rhs=%x current=%u", + LNTS_PRI_ARGS(ni, ts), ts->ack_trigger, ts->rcv_delivered, + ts->rcv_added, ts->rcv_window_max, tcp_rcv_wnd_right_edge_sent(ts), + tcp_rcv_wnd_current(ts))); - if( ts->s.b.state & CI_TCP_STATE_NOT_CONNECTED ) goto out; + if( ts->s.b.state & CI_TCP_STATE_NOT_CONNECTED ) + goto out; /* Free-up some receive buffers now we have the netif lock. */ ci_tcp_rx_reap_rxq_bufs(ni, ts); @@ -136,12 +135,11 @@ static void ci_tcp_recvmsg_send_wnd_update(ci_netif* ni, ci_tcp_state* ts) /* Reset [ack_trigger] so it'll fire when we would advertise a window ** which is at least tcp_rcv_wnd_advertised() + delta. */ - ts->ack_trigger = ts->rcv_delivered - + ci_tcp_ack_trigger_delta(ts) - - SEQ_SUB(ts->rcv_delivered + ts->rcv_window_max, - tcp_rcv_wnd_right_edge_sent(ts)); + ts->ack_trigger = ts->rcv_delivered + ci_tcp_ack_trigger_delta(ts) - + SEQ_SUB(ts->rcv_delivered + ts->rcv_window_max, + tcp_rcv_wnd_right_edge_sent(ts)); - out: +out: CHECK_TS(ni, ts); ci_netif_unlock(ni); @@ -154,20 +152,20 @@ static void ci_tcp_recvmsg_send_wnd_update(ci_netif* ni, ci_tcp_state* ts) void ci_tcp_rcvbuf_drs(ci_netif* netif, ci_tcp_state* ts) { ci_iptime_t time; - ci_uint32 rcv_bytes; + ci_uint32 rcv_bytes; /* Set an upper ceiling on rcvbuf for a single socket. * In general, DRS will pick a smaller size than this, based on how much * data the socket is transfering each RTT. - * Useful to make this fairly big so that a stack with a single socket can + * Useful to make this fairly big so that a stack with a single socket can * achieve good throughput. If we find that resource contention with many * sockets is a problem can adjust via EF_TCP_SOCKBUF_MAX_FRACTION. * If neceesary, could implement a fairness algorithm to control access to * the buffers (similar to flow WFQ). But general advice would be to make * sufficient packet buffers available (e.g. at least sum of Bandwidth Delay * Products * 4) */ - ci_uint64 max_rcvbuf_packets = - NI_OPTS(netif).max_rx_packets >> NI_OPTS(netif).tcp_sockbuf_max_fraction; + ci_uint64 max_rcvbuf_packets = + NI_OPTS(netif).max_rx_packets >> NI_OPTS(netif).tcp_sockbuf_max_fraction; time = ci_tcp_time_now(netif) - ts->rcvbuf_drs.time; if( time < (ts->sa >> 3) || ts->sa == 0 ) @@ -192,15 +190,15 @@ void ci_tcp_rcvbuf_drs(ci_netif* netif, ci_tcp_state* ts) if( rcv_bytes >= ts->rcvbuf_drs.bytes + (ts->rcvbuf_drs.bytes >> 2) ) { /* traffic grew, but by how much ? */ - if (rcv_bytes >= ts->rcvbuf_drs.bytes + (ts->rcvbuf_drs.bytes >> 1)) - /* looks like 2x growth per RTT so we need rcv_win > 4 * rcv_bytes */ - rcv_wnd <<= 1; + if( rcv_bytes >= ts->rcvbuf_drs.bytes + (ts->rcvbuf_drs.bytes >> 1) ) + /* looks like 2x growth per RTT so we need rcv_win > 4 * rcv_bytes */ + rcv_wnd <<= 1; else - /* looks like slow start, so want rcv_win > 3 * rcv_bytes */ - rcv_wnd += (rcv_wnd >> 1); + /* looks like slow start, so want rcv_win > 3 * rcv_bytes */ + rcv_wnd += (rcv_wnd >> 1); } - rcvbuf = CI_MIN((ci_uint64)rcv_wnd, max_rcvbuf_packets * ts->amss); + rcvbuf = CI_MIN((ci_uint64) rcv_wnd, max_rcvbuf_packets * ts->amss); if( rcvbuf > ts->s.so.rcvbuf ) { ts->s.so.rcvbuf = rcvbuf; @@ -210,15 +208,15 @@ void ci_tcp_rcvbuf_drs(ci_netif* netif, ci_tcp_state* ts) } ts->rcvbuf_drs.bytes = rcv_bytes; - new_period: - ts->rcvbuf_drs.seq = ts->rcv_delivered; +new_period: + ts->rcvbuf_drs.seq = ts->rcv_delivered; ts->rcvbuf_drs.time = ci_tcp_time_now(netif); } static inline int /* bool */ -ci_tcp_recvmsg_get_nopeek(int peek_off, ci_tcp_state *ts, ci_netif *netif, - ci_ip_pkt_fmt **pkt, int total, int n, int max_bytes) +ci_tcp_recvmsg_get_nopeek(int peek_off, ci_tcp_state* ts, ci_netif* netif, + ci_ip_pkt_fmt** pkt, int total, int n, int max_bytes) { ci_assert(peek_off == 0); ts->rcv_delivered += n; @@ -236,7 +234,7 @@ ci_tcp_recvmsg_get_nopeek(int peek_off, ci_tcp_state *ts, ci_netif *netif, ci_wmb(); ci_assert(OO_PP_EQ(ts->recv1_extract, OO_PKT_P(*pkt))); ts->recv1_extract = (*pkt)->next; - *pkt = PKT_CHK_NNL(netif, ts->recv1_extract); + *pkt = PKT_CHK_NNL(netif, ts->recv1_extract); ci_assert(oo_offbuf_not_empty(&(*pkt)->buf)); } return 0; @@ -250,35 +248,35 @@ ci_tcp_recvmsg_get_nopeek(int peek_off, ci_tcp_state *ts, ci_netif *netif, */ /* Turn timestamps into the requested cmsg structure(s). */ -ci_inline void -ci_tcp_fill_recv_timestamp(struct tcp_recv_info* rinf, ci_ip_pkt_fmt* pkt) +ci_inline void ci_tcp_fill_recv_timestamp( + struct tcp_recv_info* rinf, ci_ip_pkt_fmt* pkt) { - ci_netif* ni = rinf->a->ni; - ci_tcp_state* ts = rinf->a->ts; - ci_msghdr* msg = rinf->a->msg; + ci_netif* ni = rinf->a->ni; + ci_tcp_state* ts = rinf->a->ts; + ci_msghdr* msg = rinf->a->msg; if( msg != NULL ) { struct cmsg_state cmsg_state; - if( CI_UNLIKELY( ts->s.cmsg_flags & CI_IP_CMSG_TIMESTAMP_ANY ) ) { - msg->msg_controllen = rinf->controllen; - cmsg_state.msg = msg; + if( CI_UNLIKELY(ts->s.cmsg_flags & CI_IP_CMSG_TIMESTAMP_ANY) ) { + msg->msg_controllen = rinf->controllen; + cmsg_state.msg = msg; cmsg_state.cmsg_bytes_used = 0; - cmsg_state.cm = CMSG_FIRSTHDR(msg); - cmsg_state.p_msg_flags = &rinf->msg_flags; + cmsg_state.cm = CMSG_FIRSTHDR(msg); + cmsg_state.p_msg_flags = &rinf->msg_flags; - if ( ts->s.cmsg_flags & CI_IP_CMSG_TIMESTAMPNS ) + if( ts->s.cmsg_flags & CI_IP_CMSG_TIMESTAMPNS ) ip_cmsg_recv_timestampns(ni, pkt->tstamp_frc, &cmsg_state); - else /* CI_IP_CMSG_TIMESTAMP flag gets ignored if NS counterpart is set */ + else /* CI_IP_CMSG_TIMESTAMP flag gets ignored if NS counterpart is set + */ if( ts->s.cmsg_flags & CI_IP_CMSG_TIMESTAMP ) ip_cmsg_recv_timestamp(ni, pkt->tstamp_frc, &cmsg_state); if( ts->s.cmsg_flags & CI_IP_CMSG_TIMESTAMPING ) - ip_cmsg_recv_timestamping(ni, pkt, ts->s.timestamping_flags, - &cmsg_state); + ip_cmsg_recv_timestamping( + ni, pkt, ts->s.timestamping_flags, &cmsg_state); msg->msg_controllen = cmsg_state.cmsg_bytes_used; - } - else + } else msg->msg_controllen = 0; } } @@ -287,11 +285,11 @@ ci_tcp_fill_recv_timestamp(struct tcp_recv_info* rinf, ci_ip_pkt_fmt* pkt) #if CI_CFG_TCP_OFFLOAD_RECYCLER && CI_CFG_TCP_PLUGIN_RECV_NONZC -static int offloaded_copy_block(ci_iovec* iov, const void* src, size_t max, - int flags, int* rc) +static int offloaded_copy_block( + ci_iovec* iov, const void* src, size_t max, int flags, int* rc) { int n = CI_MIN(max, CI_IOVEC_LEN(iov)); - if(CI_LIKELY( ! (flags & MSG_TRUNC) )) { + if( CI_LIKELY(! (flags & MSG_TRUNC)) ) { #ifdef __KERNEL__ if( copy_to_user(CI_IOVEC_BASE(iov), src, n) ) return -EFAULT; @@ -299,22 +297,22 @@ static int offloaded_copy_block(ci_iovec* iov, const void* src, size_t max, memcpy(CI_IOVEC_BASE(iov), src, n); #endif } - CI_IOVEC_BASE(iov) = (char*)CI_IOVEC_BASE(iov) + n; + CI_IOVEC_BASE(iov) = (char*) CI_IOVEC_BASE(iov) + n; CI_IOVEC_LEN(iov) -= n; *rc += n; return n; } static int copy_ceph_pkt(ci_netif* netif, struct tcp_recv_info* rinf, - ci_ip_pkt_fmt* pkt, int peek_off, int* ndata) + ci_ip_pkt_fmt* pkt, int peek_off, int* ndata) { /* This function is essentially entirely bogus, most prominently in the fact * that it'll emit zeros for all 'remote' data. It exists primarily so that * test tools (e.g. packetdrill) can work on pluginized streams. */ - int total = oo_offbuf_left(&pkt->buf); - int ofs = 0; - char* p = oo_offbuf_ptr(&pkt->buf); - int out_rc = 0; + int total = oo_offbuf_left(&pkt->buf); + int ofs = 0; + char* p = oo_offbuf_ptr(&pkt->buf); + int out_rc = 0; static const char zeros[64]; /* Not currently required, and a little tricky to get right: */ @@ -322,13 +320,13 @@ static int copy_ceph_pkt(ci_netif* netif, struct tcp_recv_info* rinf, return -EOPNOTSUPP; while( ofs != total && CI_IOVEC_LEN(&rinf->piov.io) != 0 ) { - const int hdr_len = offsetof(struct ceph_data_pkt, data); + const int hdr_len = offsetof(struct ceph_data_pkt, data); struct ceph_data_pkt data; - int n; + int n; if( total - ofs < hdr_len ) { - LOG_TR(log(LNTS_FMT "bogus plugin metastream ofs=%d total=%d", - LNTS_PRI_ARGS(netif, rinf->a->ts), ofs, total)); + LOG_TR(log(LNTS_FMT "bogus plugin metastream ofs=%d total=%d", + LNTS_PRI_ARGS(netif, rinf->a->ts), ofs, total)); goto unrecoverable; } @@ -337,86 +335,81 @@ static int copy_ceph_pkt(ci_netif* netif, struct tcp_recv_info* rinf, /* NB: if adding a new msg_type here, don't forget that zc_ceph_callback() * has a similar switch statement */ switch( data.msg_type ) { - case XSN_CEPH_DATA_INLINE: - if( total - ofs < data.msg_len ) { - LOG_TR(log(LNTS_FMT "bogus plugin inline len %d-%d<%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), total, ofs, - data.msg_len)); - goto unrecoverable; - } - n = offloaded_copy_block(&rinf->piov.io, p + ofs, data.msg_len, - rinf->a->flags, &out_rc); - if( n < 0 ) - return -EFAULT; - if( n != data.msg_len ) { - /* Stopped in the middle: hack the packet so that we can resume next - * time. NB: this can potentially make onload_tcpdump output a little - * odd */ - data.msg_len -= n; - memcpy(p + ofs + n - hdr_len, &data, hdr_len); - ofs += n - hdr_len; - goto out; - } - break; - - case XSN_CEPH_DATA_REMOTE: - if( total - ofs < sizeof(data.remote) || - data.msg_len != sizeof(data.remote) ) { - LOG_TR(log(LNTS_FMT "bogus plugin remote block %d-%d/%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), total, ofs, - data.msg_len)); - goto unrecoverable; - } - memcpy(&data.remote, p + ofs, sizeof(data.remote)); - while( data.remote.data_len ) { - n = offloaded_copy_block(&rinf->piov.io, zeros, - CI_MIN((uint16_t)sizeof(zeros), - data.remote.data_len), - rinf->a->flags, &out_rc); + case XSN_CEPH_DATA_INLINE: + if( total - ofs < data.msg_len ) { + LOG_TR(log(LNTS_FMT "bogus plugin inline len %d-%d<%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), total, ofs, data.msg_len)); + goto unrecoverable; + } + n = offloaded_copy_block( + &rinf->piov.io, p + ofs, data.msg_len, rinf->a->flags, &out_rc); if( n < 0 ) return -EFAULT; - data.remote.data_len -= n; - data.remote.start_ptr += n; - if( n != sizeof(zeros) ) { - memcpy(p + ofs, &data.remote, sizeof(data.remote)); - ofs -= hdr_len; + if( n != data.msg_len ) { + /* Stopped in the middle: hack the packet so that we can resume next + * time. NB: this can potentially make onload_tcpdump output a little + * odd */ + data.msg_len -= n; + memcpy(p + ofs + n - hdr_len, &data, hdr_len); + ofs += n - hdr_len; goto out; } - } - break; + break; - case XSN_CEPH_DATA_LOST_SYNC: - if( total - ofs < sizeof(data.lost_sync) || - data.msg_len != sizeof(data.lost_sync) ) { - LOG_TR(log(LNTS_FMT "bogus plugin lost-sync block %d-%d/%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), total, ofs, - data.msg_len)); - goto unrecoverable; - } - memcpy(&data.lost_sync, p, sizeof(data.lost_sync)); - log(LNTS_FMT "plugin lost sync: %u/%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), data.lost_sync.reason, - data.lost_sync.subreason); - /* Set the return value so that we'll keep hitting this same lost-sync - * message on every receive, and hence block the socket from making - * further progress */ - ofs -= hdr_len; - goto out; + case XSN_CEPH_DATA_REMOTE: + if( total - ofs < sizeof(data.remote) || + data.msg_len != sizeof(data.remote) ) { + LOG_TR(log(LNTS_FMT "bogus plugin remote block %d-%d/%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), total, ofs, data.msg_len)); + goto unrecoverable; + } + memcpy(&data.remote, p + ofs, sizeof(data.remote)); + while( data.remote.data_len ) { + n = offloaded_copy_block(&rinf->piov.io, zeros, + CI_MIN((uint16_t) sizeof(zeros), data.remote.data_len), + rinf->a->flags, &out_rc); + if( n < 0 ) + return -EFAULT; + data.remote.data_len -= n; + data.remote.start_ptr += n; + if( n != sizeof(zeros) ) { + memcpy(p + ofs, &data.remote, sizeof(data.remote)); + ofs -= hdr_len; + goto out; + } + } + break; - default: - LOG_TR(log(LNTS_FMT "bogus plugin metastream header %u/%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), data.msg_type, - data.msg_len)); - goto unrecoverable; + case XSN_CEPH_DATA_LOST_SYNC: + if( total - ofs < sizeof(data.lost_sync) || + data.msg_len != sizeof(data.lost_sync) ) { + LOG_TR(log(LNTS_FMT "bogus plugin lost-sync block %d-%d/%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), total, ofs, data.msg_len)); + goto unrecoverable; + } + memcpy(&data.lost_sync, p, sizeof(data.lost_sync)); + log(LNTS_FMT "plugin lost sync: %u/%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), data.lost_sync.reason, + data.lost_sync.subreason); + /* Set the return value so that we'll keep hitting this same lost-sync + * message on every receive, and hence block the socket from making + * further progress */ + ofs -= hdr_len; + goto out; + + default: + LOG_TR(log(LNTS_FMT "bogus plugin metastream header %u/%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), data.msg_type, data.msg_len)); + goto unrecoverable; } ofs += data.msg_len; } - out: +out: *ndata = out_rc; return ofs; - unrecoverable: +unrecoverable: /* Return the number of bytes successfully consumed, so that if the user * tries again then we'll log the same error again. This is a different * decision to the one we made at the identical label in zc_ceph_callback() @@ -429,7 +422,7 @@ static int copy_ceph_pkt(ci_netif* netif, struct tcp_recv_info* rinf, static int copy_one_pkt(ci_netif* netif, struct tcp_recv_info* rinf, - ci_ip_pkt_fmt* pkt, int peek_off, int* ndata) + ci_ip_pkt_fmt* pkt, int peek_off, int* ndata) { int n; @@ -443,14 +436,14 @@ static int copy_one_pkt(ci_netif* netif, struct tcp_recv_info* rinf, } #endif - if(CI_LIKELY( ! (rinf->a->flags & MSG_TRUNC) )) + if( CI_LIKELY(! (rinf->a->flags & MSG_TRUNC)) ) n = ci_ip_copy_pkt_to_user(netif, &rinf->piov.io, pkt, peek_off); else { /* Very strange kernel behaviour: MSG_TRUNC will consume the number * of bytes requested, but will not write to the user's pointer in any * circumstances. This code does the same. */ - n = CI_MIN((size_t)oo_offbuf_left(&pkt->buf) - peek_off, - rinf->piov.io.iov_len); + n = CI_MIN( + (size_t) oo_offbuf_left(&pkt->buf) - peek_off, rinf->piov.io.iov_len); CI_IOVEC_LEN(&rinf->piov.io) -= n; } /* NB: on failure of this function (i.e. n<0) the caller doesn't make any @@ -471,15 +464,14 @@ static int copy_one_pkt(ci_netif* netif, struct tcp_recv_info* rinf, ** arg and the CI_MSG_*_LOCKED constants to specify which locks are ** already held. */ -__attribute__((always_inline)) -static inline int -ci_tcp_recvmsg_get_impl(struct tcp_recv_info *rinf) +__attribute__((always_inline)) static inline int ci_tcp_recvmsg_get_impl( + struct tcp_recv_info* rinf) { - ci_netif* netif = rinf->a->ni; - ci_tcp_state* ts = rinf->a->ts; - int n, ndata, peek_off, total, rc; + ci_netif* netif = rinf->a->ni; + ci_tcp_state* ts = rinf->a->ts; + int n, ndata, peek_off, total, rc; ci_ip_pkt_fmt* pkt; - int max_bytes; + int max_bytes; #if CI_CFG_TIMESTAMPING && ! defined(__KERNEL__) int fill_tstamp; #endif @@ -491,27 +483,28 @@ ci_tcp_recvmsg_get_impl(struct tcp_recv_info *rinf) /* The socket must be locked. */ ci_assert(ci_sock_is_locked(netif, &ts->s.b)); - peek_off = 0; - total = 0; - rc = 0; + peek_off = 0; + total = 0; + rc = 0; /* Maximum number of bytes we have in both recv1 and recv2. * In this function, we get data from recv1 only, so the actual amount * of received data may be less than max_bytes. */ max_bytes = tcp_rcv_usr(ts); - if( max_bytes <= 0 || OO_PP_IS_NULL(ts->recv1_extract)) - return rc; /* Receive queue is empty. */ + if( max_bytes <= 0 || OO_PP_IS_NULL(ts->recv1_extract) ) + return rc; /* Receive queue is empty. */ ci_assert(OO_PP_NOT_NULL(ts->recv1.head)); pkt = PKT_CHK_NNL(netif, ts->recv1_extract); if( oo_offbuf_is_empty(&pkt->buf) ) { - if( OO_PP_IS_NULL(pkt->next) ) return rc; /* recv1 is empty. */ + if( OO_PP_IS_NULL(pkt->next) ) + return rc; /* recv1 is empty. */ /* See ci_tcp_recvmsg_get_nopeek() for barrier discussion. */ ci_wmb(); ts->recv1_extract = pkt->next; - pkt = PKT_CHK_NNL(netif, ts->recv1_extract); + pkt = PKT_CHK_NNL(netif, ts->recv1_extract); ci_assert(oo_offbuf_not_empty(&pkt->buf)); } initial_recv1_extract = ts->recv1_extract; @@ -539,16 +532,17 @@ ci_tcp_recvmsg_get_impl(struct tcp_recv_info *rinf) ci_assert(oo_offbuf_left(&pkt->buf) > peek_off); #if CI_CFG_TIMESTAMPING && ! defined(__KERNEL__) - if( fill_tstamp ) { - ci_tcp_fill_recv_timestamp(rinf, pkt); - if( ! rinf->zc_args ) - fill_tstamp = 0; - } + if( fill_tstamp ) { + ci_tcp_fill_recv_timestamp(rinf, pkt); + if( ! rinf->zc_args ) + fill_tstamp = 0; + } #endif n = rinf->copier(netif, rinf, pkt, peek_off, &ndata); -#ifdef __KERNEL__ - if( n < 0 ) break; +#ifdef __KERNEL__ + if( n < 0 ) + break; #endif rc += ndata; oo_offbuf_advance(&pkt->buf, n); @@ -556,12 +550,11 @@ ci_tcp_recvmsg_get_impl(struct tcp_recv_info *rinf) total += ndata; ci_assert_le(total, max_bytes); - if(CI_LIKELY( ! (rinf->a->flags & (MSG_PEEK | ONLOAD_MSG_ONEPKT)) )) { - if( ci_tcp_recvmsg_get_nopeek(peek_off, ts, netif, &pkt, total, ndata, - max_bytes) != 0 ) + if( CI_LIKELY(! (rinf->a->flags & (MSG_PEEK | ONLOAD_MSG_ONEPKT))) ) { + if( ci_tcp_recvmsg_get_nopeek( + peek_off, ts, netif, &pkt, total, ndata, max_bytes) != 0 ) break; - } - else { + } else { if( rinf->a->flags & MSG_PEEK ) { /* copy did an implicit advance of the offbuf which we do not want */ oo_offbuf_retard(&pkt->buf, n); @@ -572,14 +565,13 @@ ci_tcp_recvmsg_get_impl(struct tcp_recv_info *rinf) if( total == max_bytes || OO_PP_IS_NULL(pkt->next) ) /* We've emptied the receive queue. */ return rc; - pkt = PKT_CHK_NNL(netif, pkt->next); + pkt = PKT_CHK_NNL(netif, pkt->next); peek_off = 0; ci_assert(oo_offbuf_not_empty(&pkt->buf)); } - } - else { - if( ci_tcp_recvmsg_get_nopeek(peek_off, ts, netif, &pkt, total, ndata, - max_bytes) != 0 ) + } else { + if( ci_tcp_recvmsg_get_nopeek( + peek_off, ts, netif, &pkt, total, ndata, max_bytes) != 0 ) break; } @@ -597,8 +589,8 @@ ci_tcp_recvmsg_get_impl(struct tcp_recv_info *rinf) ** comment; darn. */ } - /* we do this here as the last thing to avoid sending many small window updates - * in cases with small recv window and small segments */ + /* we do this here as the last thing to avoid sending many small window + * updates in cases with small recv window and small segments */ if( initial_recv1_extract != ts->recv1_extract && CI_UNLIKELY(SEQ_LE(ts->ack_trigger, ts->rcv_delivered)) ) { ci_tcp_recvmsg_send_wnd_update(netif, ts); @@ -607,16 +599,14 @@ ci_tcp_recvmsg_get_impl(struct tcp_recv_info *rinf) } -__attribute__((always_inline)) -static inline int -ci_tcp_recvmsg_get_inline(struct tcp_recv_info *rinf) +__attribute__((always_inline)) static inline int ci_tcp_recvmsg_get_inline( + struct tcp_recv_info* rinf) { return ci_tcp_recvmsg_get_impl(rinf); } -static int -ci_tcp_recvmsg_get_outofline(struct tcp_recv_info *rinf) +static int ci_tcp_recvmsg_get_outofline(struct tcp_recv_info* rinf) { return ci_tcp_recvmsg_get_impl(rinf); } @@ -626,14 +616,14 @@ ci_tcp_recvmsg_get_outofline(struct tcp_recv_info *rinf) /* Returns >0 if socket is readable. Returns 0 if spin times-out. Returns * -ve error code otherwise. */ -static int ci_tcp_recvmsg_spin(ci_netif* ni, ci_tcp_state* ts, - ci_uint64 start_frc) +static int ci_tcp_recvmsg_spin( + ci_netif* ni, ci_tcp_state* ts, ci_uint64 start_frc) { - ci_uint64 now_frc; - ci_uint64 schedule_frc = start_frc; - citp_signal_info* si = citp_signal_get_specific_inited(); - ci_uint64 max_spin = ts->s.b.spin_cycles; - int rc, spin_limit_by_so = 0; + ci_uint64 now_frc; + ci_uint64 schedule_frc = start_frc; + citp_signal_info* si = citp_signal_get_specific_inited(); + ci_uint64 max_spin = ts->s.b.spin_cycles; + int rc, spin_limit_by_so = 0; /* Cache the next expected packet buffer to save work within the loop. * We need to update this after polling. If someone else polls, then this @@ -644,15 +634,16 @@ static int ci_tcp_recvmsg_spin(ci_netif* ni, ci_tcp_state* ts, * If there is no future packet to poll, then we point to a local location * which always contains the "poison" value. */ - int intf_i = ts->s.pkt.intf_i; - const uint32_t poison = CI_PKT_RX_POISON; - const volatile uint32_t* future = ci_netif_intf_rx_future(ni, intf_i, &poison); + int intf_i = ts->s.pkt.intf_i; + const uint32_t poison = CI_PKT_RX_POISON; + const volatile uint32_t* future = + ci_netif_intf_rx_future(ni, intf_i, &poison); if( ts->s.so.rcvtimeo_msec ) { - ci_uint64 max_so_spin = (ci_uint64)ts->s.so.rcvtimeo_msec * - IPTIMER_STATE(ni)->khz; + ci_uint64 max_so_spin = + (ci_uint64) ts->s.so.rcvtimeo_msec * IPTIMER_STATE(ni)->khz; if( max_so_spin <= max_spin ) { - max_spin = max_so_spin; + max_spin = max_so_spin; spin_limit_by_so = 1; } } @@ -679,16 +670,15 @@ static int ci_tcp_recvmsg_spin(ci_netif* ni, ci_tcp_state* ts, if( tcp_rcv_usr(ts) ) goto out; future = ci_netif_intf_rx_future(ni, intf_i, &poison); - } - else if( ! ni->state->is_spinner ) + } else if( ! ni->state->is_spinner ) ni->state->is_spinner = 1; } if( tcp_rcv_usr(ts) || TCP_RX_DONE(ts) ) goto out; ci_frc64(&now_frc); - rc = OO_SPINLOOP_PAUSE_CHECK_SIGNALS(ni, now_frc, &schedule_frc, - ts->s.so.rcvtimeo_msec, &ts->s.b, si); + rc = OO_SPINLOOP_PAUSE_CHECK_SIGNALS( + ni, now_frc, &schedule_frc, ts->s.so.rcvtimeo_msec, &ts->s.b, si); if( rc != 0 ) goto out; #if CI_CFG_SPIN_STATS @@ -699,7 +689,7 @@ static int ci_tcp_recvmsg_spin(ci_netif* ni, ci_tcp_state* ts, } while( now_frc - start_frc < max_spin ); rc = spin_limit_by_so ? -EAGAIN : 0; - out: +out: ni->state->is_spinner = 0; return rc; } @@ -715,9 +705,9 @@ static int ci_tcp_recvmsg_spin(ci_netif* ni, ci_tcp_state* ts, ** MSG_DONTWAIT or MSG_PEEK. (On linux at least: MSG_PEEK cancels ** MSG_WAITALL, and MSG_DONTWAIT overrides MSG_WAITALL). */ -#define FLAGS_AND_LOWAT_PERMIT_FAST_RET_WITH_DATA(ts, bytes, flags) \ - ((flags & (MSG_DONTWAIT | MSG_PEEK)) || \ - ((~flags & MSG_WAITALL) && (bytes) >= (ts)->s.so.rcvlowat)) +#define FLAGS_AND_LOWAT_PERMIT_FAST_RET_WITH_DATA(ts, bytes, flags) \ + ((flags & (MSG_DONTWAIT | MSG_PEEK)) || \ + ((~flags & MSG_WAITALL) && (bytes) >= (ts)->s.so.rcvlowat)) static inline void ci_tcp_recvmsg_init_piov(struct tcp_recv_info* rinf) @@ -728,34 +718,31 @@ static inline void ci_tcp_recvmsg_init_piov(struct tcp_recv_info* rinf) * always see that there's 'infinite' space left, but let's set the * pointers to NULL as well, to catch anywhere that might actually try to * write anything. */ - rinf->piov.iov = NULL; - rinf->piov.iovlen = 1; - rinf->piov.io.iov_len = ~(size_t)0; + rinf->piov.iov = NULL; + rinf->piov.iovlen = 1; + rinf->piov.io.iov_len = ~(size_t) 0; rinf->piov.io.iov_base = NULL; - } - else { + } else { /* [piov] gives keeps track of our position in the apps buffer(s). */ - ci_iovec_ptr_init_nz(&rinf->piov, - rinf->a->msg->msg_iov,rinf-> a->msg->msg_iovlen); + ci_iovec_ptr_init_nz( + &rinf->piov, rinf->a->msg->msg_iov, rinf->a->msg->msg_iovlen); } } -__attribute__((always_inline)) -static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, - pkt_copy_t copier, - struct onload_zc_recv_args* zc_args) +__attribute__((always_inline)) static inline int ci_tcp_recvmsg_impl( + const ci_tcp_recvmsg_args* a, pkt_copy_t copier, + struct onload_zc_recv_args* zc_args) { - int have_polled; - ci_uint64 sleep_seq; - ci_tcp_state* ts = a->ts; - ci_netif* ni = a->ni; - int flags = a->flags; - ci_uint64 start_frc = 0; /* suppress compiler warning */ + ci_uint64 sleep_seq; + ci_tcp_state* ts = a->ts; + ci_netif* ni = a->ni; + int flags = a->flags; + ci_uint64 start_frc = 0; /* suppress compiler warning */ #ifndef __KERNEL__ - unsigned tcp_recv_spin = 0; + unsigned tcp_recv_spin = 0; #endif - ci_uint32 timeout = ts->s.so.rcvtimeo_msec; - struct tcp_recv_info rinf; + ci_uint32 timeout = ts->s.so.rcvtimeo_msec; + struct tcp_recv_info rinf; ci_assert(a); ci_assert(ni); @@ -763,26 +750,26 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, ci_assert(a->msg); rinf.stack_locked = 0; - rinf.a = a; - rinf.rc = 0; - rinf.msg_flags = 0; - rinf.copier = copier; - rinf.zc_args = zc_args; + rinf.a = a; + rinf.rc = 0; + rinf.msg_flags = 0; + rinf.copier = copier; + rinf.zc_args = zc_args; #ifdef __KERNEL__ rinf.controllen = 0; #else - rinf.controllen = a->msg->msg_controllen; + rinf.controllen = a->msg->msg_controllen; a->msg->msg_controllen = 0; #endif /* Grab the per-socket lock so we can access the receive queue. */ rinf.rc = ci_sock_lock(ni, &ts->s.b); - if(CI_UNLIKELY( rinf.rc != 0 )) + if( CI_UNLIKELY(rinf.rc != 0) ) return rinf.rc; - if( ts->s.b.state == CI_TCP_LISTEN ) goto check_errno; + if( ts->s.b.state == CI_TCP_LISTEN ) + goto check_errno; - have_polled = 0; ci_assert_equal(rinf.rc, 0); #ifndef __KERNEL__ @@ -794,26 +781,26 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, ci_tcp_recvmsg_init_piov(&rinf); - LOG_TR(log(LNTS_FMT "recvmsg len=%d flags=%x bytes_in_rxq=%d", - LNTS_PRI_ARGS(ni, ts), - zc_args ? -1 : ci_iovec_ptr_bytes_count(&rinf.piov), - flags, tcp_rcv_usr(ts))); + LOG_TR(log(LNTS_FMT "recvmsg len=%d flags=%x bytes_in_rxq=%d", + LNTS_PRI_ARGS(ni, ts), + zc_args ? -1 : ci_iovec_ptr_bytes_count(&rinf.piov), flags, + tcp_rcv_usr(ts))); #ifndef __KERNEL__ - tcp_recv_spin = - oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_TCP_RECV); + tcp_recv_spin = oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_TCP_RECV); #endif ci_frc64(&start_frc); - poll_recv_queue: +poll_recv_queue: rinf.rc += ci_tcp_recvmsg_get_inline(&rinf); /* Return immediately if we've filled the app's buffer(s). * In case of empty buffer, we should wait for socket to be readable. - */ + */ if( ci_iovec_ptr_is_empty_proper(&rinf.piov) && - ( rinf.rc != 0 || TCP_RX_DONE(ts) || tcp_rcv_usr(ts) ) ) { - if( CI_UNLIKELY(rinf.rc == 0) ) goto check_errno; + (rinf.rc != 0 || TCP_RX_DONE(ts) || tcp_rcv_usr(ts)) ) { + if( CI_UNLIKELY(rinf.rc == 0) ) + goto check_errno; goto success_unlock_out; } @@ -823,41 +810,30 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, if( (rinf.a->flags & ONLOAD_MSG_ONEPKT) && (rinf.rc > 0) ) goto success_unlock_out; - if( ! have_polled ) { - /* We've not yet filled the app's buffer. But the receive queue may - ** not be up-to-date, so we need to check that it is, or bring it - ** up-to-date ourselves. - */ - have_polled = 1; - - if( ci_netif_may_poll(ni) && ci_netif_need_poll_spinning(ni, start_frc) ) { - if( ci_netif_trylock(ni) ) { - ci_uint32 rcv_added_before = ts->rcv_added; - int any_evs = ci_netif_poll(ni); - if( ts->rcv_added != rcv_added_before ) { - /* We've handled some events, but possibly not all. So if the - * events we've handled do not satisfy the request, we need to - * ensure we come back and poll some more. - */ - have_polled = 0; + /* We've not yet filled the app's buffer. But the receive queue may + ** not be up-to-date, so we need to check that it is, or bring it + ** up-to-date ourselves. + */ + + if( ci_netif_may_poll(ni) && ci_netif_need_poll_spinning(ni, start_frc) ) { + if( ci_netif_trylock(ni) ) { + ci_uint32 rcv_added_before = ts->rcv_added; + int any_evs = ci_netif_poll(ni); + if( any_evs ) + ci_netif_poll(ni); + ci_netif_unlock(ni); + if( ts->rcv_added != rcv_added_before ) { + if( (flags & MSG_PEEK) ) { + ci_tcp_recvmsg_init_piov(&rinf); + rinf.rc = 0; } - else if( any_evs ) - ci_netif_poll(ni); - ci_netif_unlock(ni); - if( ts->rcv_added != rcv_added_before ) { - if( (flags & MSG_PEEK) ) { - ci_tcp_recvmsg_init_piov(&rinf); - rinf.rc = 0; - } - goto poll_recv_queue; - } - } - else { - /* The netif lock is contended, so the chances are we're up-to-date. - ** Even if we're not, at least we will be soon. So we pretend we are - ** up-to-date, and continue... - */ + goto poll_recv_queue; } + } else { + /* The netif lock is contended, so the chances are we're up-to-date. + ** Even if we're not, at least we will be soon. So we pretend we are + ** up-to-date, and continue... + */ } } @@ -866,7 +842,7 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, */ /* \todo For MSG_PEEK, we always will re-copy all data if we did not * filled user buffer. */ - if(CI_UNLIKELY( OO_PP_NOT_NULL(ts->recv2.head) )) + if( CI_UNLIKELY(OO_PP_NOT_NULL(ts->recv2.head)) ) if( ci_tcp_recvmsg_recv2(&rinf) ) goto success_unlock_out; @@ -874,10 +850,12 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, ** haven't filled the app's buffer. */ - if( rinf.rc && FLAGS_AND_LOWAT_PERMIT_FAST_RET_WITH_DATA(ts, rinf.rc, flags) ) + if( rinf.rc && + FLAGS_AND_LOWAT_PERMIT_FAST_RET_WITH_DATA(ts, rinf.rc, flags) ) goto success_unlock_out; - if( TCP_RX_DONE(ts) ) goto rx_done; + if( TCP_RX_DONE(ts) ) + goto rx_done; if( rinf.rc == 0 && (flags & MSG_DONTWAIT) ) { rinf.rc = -EAGAIN; @@ -885,7 +863,7 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, } /* Must not delay return if we have any data and are peeking. */ - ci_assert(!(flags & MSG_PEEK) || rinf.rc == 0); + ci_assert(! (flags & MSG_PEEK) || rinf.rc == 0); #ifndef __KERNEL__ /* Spin (if enabled) until timeout, or something happens, or we get @@ -920,8 +898,10 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, sleep_seq = ts->s.b.sleep_seq.all; ci_rmb(); - if( tcp_rcv_usr(ts) ) goto poll_recv_queue; - if( TCP_RX_DONE(ts) ) goto rx_done; + if( tcp_rcv_usr(ts) ) + goto poll_recv_queue; + if( TCP_RX_DONE(ts) ) + goto rx_done; /* ?? TODO: lock recv queue so other thread can't get in in middle of our ** receive. NB. Need to check what happens on Linux if one thread blocks @@ -933,30 +913,28 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, int rc2; /* This function drops the socket lock, and returns unlocked. */ - ci_assert(!rinf.stack_locked); + ci_assert(! rinf.stack_locked); rc2 = ci_sock_sleep(ni, &ts->s.b, CI_SB_FLAG_WAKE_RX, - CI_SLEEP_SOCK_LOCKED | CI_SLEEP_SOCK_RQ, - sleep_seq, &timeout); + CI_SLEEP_SOCK_LOCKED | CI_SLEEP_SOCK_RQ, sleep_seq, &timeout); if( rc2 == 0 ) rc2 = ci_sock_lock(ni, &ts->s.b); if( rc2 < 0 ) { /* If we've received anything at all, we must say how much. */ if( rinf.rc ) { #ifndef __KERNEL__ - ci_tcp_recv_fill_msgname(ts, (struct sockaddr*) a->msg->msg_name, - &a->msg->msg_namelen); + ci_tcp_recv_fill_msgname( + ts, (struct sockaddr*) a->msg->msg_name, &a->msg->msg_namelen); #endif } else rinf.rc = rc2; goto out; } } - ci_assert(have_polled); goto poll_recv_queue; #ifndef __KERNEL__ - slow_path: +slow_path: if( flags & MSG_ERRQUEUE ) { #if CI_CFG_TIMESTAMPING @@ -979,7 +957,7 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, ci_rmb(); if( ! (pkt->flags & - (CI_PKT_FLAG_TX_TIMESTAMPED | CI_PKT_FLAG_INDIRECT)) ) { + (CI_PKT_FLAG_TX_TIMESTAMPED | CI_PKT_FLAG_INDIRECT)) ) { if( ! rinf.stack_locked ) { ci_netif_lock(ni); rinf.stack_locked = 1; @@ -989,75 +967,75 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, goto slow_path; } - a->msg->msg_controllen = rinf.controllen; - cmsg_state.msg = a->msg; - cmsg_state.cm = a->msg->msg_control; + a->msg->msg_controllen = rinf.controllen; + cmsg_state.msg = a->msg; + cmsg_state.cm = a->msg->msg_control; cmsg_state.cmsg_bytes_used = 0; - cmsg_state.p_msg_flags = &rinf.msg_flags; + cmsg_state.p_msg_flags = &rinf.msg_flags; if( pkt->flags & CI_PKT_FLAG_TX_TIMESTAMPED ) { if( ts->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_ONLOAD ) { if( pkt->flags & ~CI_PKT_FLAG_RTQ_RETRANS ) { - struct onload_timestamp ts = {pkt->hw_stamp.tv_sec, - pkt->hw_stamp.tv_nsec}; + struct onload_timestamp ts = { pkt->hw_stamp.tv_sec, + pkt->hw_stamp.tv_nsec }; ci_put_cmsg(&cmsg_state, SOL_SOCKET, ONLOAD_SCM_TIMESTAMPING, - sizeof(ts), &ts); - } - else { + sizeof(ts), &ts); + } else { /* Ignore retransmit timestamps. We might want something like - * ONLOAD_SCM_TIMESTAMPING_STREAM to report them along with the - * original transmission time */ + * ONLOAD_SCM_TIMESTAMPING_STREAM to report them along with the + * original transmission time */ goto timestamp_q_check; } - } - else { + } else { struct onload_scm_timestamping_stream stamps; - int tx_hw_stamp_in_sync; + int tx_hw_stamp_in_sync; memset(&stamps, 0, sizeof(stamps)); - tx_hw_stamp_in_sync = pkt->hw_stamp.tv_nsec & - CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC; + tx_hw_stamp_in_sync = + pkt->hw_stamp.tv_nsec & CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC; if( pkt->flags & CI_PKT_FLAG_RTQ_RETRANS ) { if( pkt->pf.tcp_tx.first_tx_hw_stamp.tv_nsec & CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC ) { - stamps.first_sent.tv_sec = pkt->pf.tcp_tx.first_tx_hw_stamp.tv_sec; - stamps.first_sent.tv_nsec = pkt->pf.tcp_tx.first_tx_hw_stamp.tv_nsec; + stamps.first_sent.tv_sec = + pkt->pf.tcp_tx.first_tx_hw_stamp.tv_sec; + stamps.first_sent.tv_nsec = + pkt->pf.tcp_tx.first_tx_hw_stamp.tv_nsec; } if( tx_hw_stamp_in_sync ) { - stamps.last_sent.tv_sec = pkt->hw_stamp.tv_sec; + stamps.last_sent.tv_sec = pkt->hw_stamp.tv_sec; stamps.last_sent.tv_nsec = pkt->hw_stamp.tv_nsec; } - } - else if( tx_hw_stamp_in_sync ) { - stamps.first_sent.tv_sec = pkt->hw_stamp.tv_sec; + } else if( tx_hw_stamp_in_sync ) { + stamps.first_sent.tv_sec = pkt->hw_stamp.tv_sec; stamps.first_sent.tv_nsec = pkt->hw_stamp.tv_nsec; } stamps.len = pkt->pf.tcp_tx.end_seq - pkt->pf.tcp_tx.start_seq; - /* FIN and SYN eat seq space, but the user is not interested in them */ + /* FIN and SYN eat seq space, but the user is not interested in them + */ if( TX_PKT_IPX_TCP(ipcache_af(&ts->s.pkt), pkt)->tcp_flags & - (CI_TCP_FLAG_SYN|CI_TCP_FLAG_FIN) ) + (CI_TCP_FLAG_SYN | CI_TCP_FLAG_FIN) ) stamps.len--; ci_put_cmsg(&cmsg_state, SOL_SOCKET, ONLOAD_SCM_TIMESTAMPING_STREAM, - sizeof(stamps), &stamps); + sizeof(stamps), &stamps); } } if( pkt->flags & CI_PKT_FLAG_INDIRECT ) { - struct ci_pkt_zc_header* zch = oo_tx_zc_header(pkt); + struct ci_pkt_zc_header* zch = oo_tx_zc_header(pkt); struct ci_pkt_zc_payload* zcp; - OO_TX_FOR_EACH_ZC_PAYLOAD(ni, zch, zcp) { + OO_TX_FOR_EACH_ZC_PAYLOAD(ni, zch, zcp) + { if( zcp->is_remote && zcp->use_remote_cookie ) { ci_put_cmsg(&cmsg_state, SOL_IP, ONLOAD_SO_ONLOADZC_COMPLETE, - sizeof(zcp->remote.app_cookie), - &zcp->remote.app_cookie); + sizeof(zcp->remote.app_cookie), &zcp->remote.app_cookie); } } } ci_ip_cmsg_finish(&cmsg_state); rinf.msg_flags |= MSG_ERRQUEUE; - + /* Wake up TX if necessary as a result of delivering from timestamp_q */ if( NI_OPTS(ni).tcp_sndbuf_mode >= 1 && ci_tcp_tx_advertise_space(ni, ts) ) { @@ -1070,8 +1048,7 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, rinf.rc = 0; goto unlock_out; - } - else { + } else { /* Try polling to see if there is a TX timestamp event available * to satisfy this request */ @@ -1083,9 +1060,8 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, if( pkt != NULL ) { if( ! (pkt->flags & CI_PKT_FLAG_TX_PENDING) ) goto timestamp_q_nonempty; - } - else if( (pkt = ci_udp_recv_q_get(ni, &ts->timestamp_q)) != NULL - && ! (pkt->flags & CI_PKT_FLAG_TX_PENDING) ) { + } else if( (pkt = ci_udp_recv_q_get(ni, &ts->timestamp_q)) != NULL && + ! (pkt->flags & CI_PKT_FLAG_TX_PENDING) ) { goto timestamp_q_nonempty; } } @@ -1098,50 +1074,53 @@ static inline int ci_tcp_recvmsg_impl(const ci_tcp_recvmsg_args* a, ci_assert(flags & MSG_OOB); rinf.rc = ci_tcp_recvmsg_urg(&rinf); - if( rinf.rc >= 0 ) goto success_unlock_out; + if( rinf.rc >= 0 ) + goto success_unlock_out; goto unlock_out; #endif - rx_done: - if( tcp_rcv_usr(ts) && !ci_iovec_ptr_is_empty_proper(&rinf.piov) ) +rx_done: + if( tcp_rcv_usr(ts) && ! ci_iovec_ptr_is_empty_proper(&rinf.piov) ) /* Race breaker: rx_errno can get updated asynchronously just after ** we've looked at the receive queue. We need to go back and get that ** data. */ goto poll_recv_queue; - if( rinf.rc ) goto success_unlock_out; - check_errno: + if( rinf.rc ) + goto success_unlock_out; +check_errno: /* tcp recv() does not set errno if the connection was properly shut down */ if( ts->tcpflags & CI_TCPT_FLAG_FIN_RECEIVED ) goto unlock_out; - if (ts->s.so_error) { + if( ts->s.so_error ) { ci_int32 rc1 = ci_get_so_error(&ts->s); - if (rc1 != 0) + if( rc1 != 0 ) rinf.rc = -rc1; } else if( TCP_RX_ERRNO(ts) ) { rinf.rc = -TCP_RX_ERRNO(ts); } goto unlock_out; - success_unlock_out: +success_unlock_out: #ifndef __KERNEL__ ci_tcp_recv_fill_msgname(ts, (struct sockaddr*) a->msg->msg_name, - &a->msg->msg_namelen); /*!\TODO fixme remove cast*/ + &a->msg->msg_namelen); /*!\TODO fixme remove cast*/ #endif - unlock_out: +unlock_out: /* If we've received FIN and RXQ is empty, let's reap it. * See the counterpart in ci_tcp_rx_process_fin(), if FIN arrives with * the empty receive queue. */ - if( ( ( (ts->s.b.state & CI_TCP_STATE_RECVD_FIN) && tcp_rcv_usr(ts) == 0 ) - || ni->state->mem_pressure ) && ci_netif_trylock(ni) ) { + if( (((ts->s.b.state & CI_TCP_STATE_RECVD_FIN) && tcp_rcv_usr(ts) == 0) || + ni->state->mem_pressure) && + ci_netif_trylock(ni) ) { ci_tcp_rx_reap_rxq_bufs_socklocked(ni, ts); ci_netif_unlock(ni); } ci_sock_unlock(ni, &ts->s.b); - out: - if(CI_UNLIKELY( ni->state->rxq_low )) +out: + if( CI_UNLIKELY(ni->state->rxq_low) ) ci_netif_rxq_low_on_recv(ni, &ts->s, rinf.rc); #ifndef __KERNEL__ if( rinf.rc >= 0 ) @@ -1161,8 +1140,7 @@ int ci_tcp_recvmsg(const ci_tcp_recvmsg_args* a) static void move_from_recv2_to_recv1(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt* head, - ci_ip_pkt_fmt* tail, int n) + ci_ip_pkt_fmt* head, ci_ip_pkt_fmt* tail, int n) { /* Move the [n] packets from [head] to [tail] inclusive from the ** beginning of [recv2] to [recv1]. If [recv2] is emptied, switch back @@ -1180,8 +1158,9 @@ static void move_from_recv2_to_recv1(ci_netif* ni, ci_tcp_state* ts, if( n ) { LOG_URG(log(NTS_FMT "recvmsg: moving %d pkts from recv2 to recv1", - NTS_PRI_ARGS(ni, ts), n)); - /* as this is move between recv queues - no pkt receive adjustment needed */ + NTS_PRI_ARGS(ni, ts), n)); + /* as this is move between recv queues - no pkt receive adjustment needed + */ ci_ip_queue_move(ni, recv2, recv1, tail, n); /* The extract pointer can only be made -ve when the receive queues are ** emptied (and both locks are held). It can only be -ve here if after @@ -1190,43 +1169,42 @@ static void move_from_recv2_to_recv1(ci_netif* ni, ci_tcp_state* ts, */ if( OO_PP_IS_NULL(ts->recv1_extract) ) { ts->recv1_extract = recv1->head; - } - else { + } else { /* * must point to an emptied packet * - pull up to the first packet moved from recv2 */ ci_assert(oo_offbuf_is_empty(&(PKT_CHK(ni, ts->recv1_extract)->buf))); ts->recv1_extract = OO_PKT_P(head); - ci_assert_impl(OO_PP_IS_NULL(recv1->head), - OO_PP_IS_NULL(ts->recv1_extract)); + ci_assert_impl( + OO_PP_IS_NULL(recv1->head), OO_PP_IS_NULL(ts->recv1_extract)); } - } /* If we've managed to empty recv2, and we're not still waiting for the * urgent data to arrive, then we can switch back to recv1. */ - if( OO_PP_IS_NULL(recv2->head) && !(tcp_urg_data(ts) & CI_TCP_URG_COMING) ) { + if( OO_PP_IS_NULL(recv2->head) && + ! (tcp_urg_data(ts) & CI_TCP_URG_COMING) ) { LOG_URG(log(NTS_FMT "recvmsg: switch to recv1", NTS_PRI_ARGS(ni, ts))); TS_QUEUE_RX_SET(ts, recv1); - ci_assert(!(tcp_urg_data(ts) & CI_TCP_URG_PTR_VALID)); + ci_assert(! (tcp_urg_data(ts) & CI_TCP_URG_PTR_VALID)); } } #ifndef __KERNEL__ -static int ci_tcp_recvmsg_urg(struct tcp_recv_info *rinf) +static int ci_tcp_recvmsg_urg(struct tcp_recv_info* rinf) { - ci_netif* ni = rinf->a->ni; - ci_tcp_state* ts = rinf->a->ts; + ci_netif* ni = rinf->a->ni; + ci_tcp_state* ts = rinf->a->ts; struct msghdr* msg = rinf->a->msg; - ci_iovec_ptr piov; - ci_uint8 oob; - int can_write; - int rc = 0; + ci_iovec_ptr piov; + ci_uint8 oob; + int can_write; + int rc = 0; - if( !rinf->stack_locked ) { + if( ! rinf->stack_locked ) { rc = ci_netif_lock(ni); if( rc != 0 ) return rc; @@ -1238,7 +1216,7 @@ static int ci_tcp_recvmsg_urg(struct tcp_recv_info *rinf) ci_assert(msg->msg_iovlen > 0); ci_iovec_ptr_init_nz(&piov, msg->msg_iov, msg->msg_iovlen); - can_write = !ci_iovec_ptr_is_empty_proper(&piov); + can_write = ! ci_iovec_ptr_is_empty_proper(&piov); if( ts->s.s_flags & CI_SOCK_FLAG_OOBINLINE ) { LOG_URG(ci_log("%s: OOBINLINE is set, rc=-EINVAL", __FUNCTION__)); @@ -1260,7 +1238,7 @@ static int ci_tcp_recvmsg_urg(struct tcp_recv_info *rinf) goto out; } - if (ts->s.b.state == CI_TCP_CLOSED) { + if( ts->s.b.state == CI_TCP_CLOSED ) { LOG_URG(ci_log("%s: tcp state is CLOSED, rc=0", __FUNCTION__)); goto out; } @@ -1271,11 +1249,12 @@ static int ci_tcp_recvmsg_urg(struct tcp_recv_info *rinf) oob = tcp_urg_data(ts) & CI_TCP_URG_DATA_MASK; rinf->msg_flags |= MSG_OOB; - LOG_URG(ci_log("Reading OOB byte, oob=0x%X, flags=0x%X", oob, rinf->a->flags)); + LOG_URG( + ci_log("Reading OOB byte, oob=0x%X, flags=0x%X", oob, rinf->a->flags)); /* if we are not in peek mode, mark the oob state as read */ - if (~rinf->a->flags & MSG_PEEK) - tcp_urg_data(ts) &=~ (CI_TCP_URG_IS_HERE | CI_TCP_URG_DATA_MASK); + if( ~rinf->a->flags & MSG_PEEK ) + tcp_urg_data(ts) &= ~(CI_TCP_URG_IS_HERE | CI_TCP_URG_DATA_MASK); /*! Linux appears to treat the MSG_TRUNC flag, in TCP, as a * "PEEK and clear data" flag. @@ -1295,10 +1274,10 @@ static int ci_tcp_recvmsg_urg(struct tcp_recv_info *rinf) ** ci_iovec_ptr_is_empty_proper() above has moved us to a non-zero-length ** buffer, so we can just copy the byte here. */ - *(char*)CI_IOVEC_BASE(&piov.io) = oob; - rc = 1; + *(char*) CI_IOVEC_BASE(&piov.io) = oob; + rc = 1; - out: +out: CHECK_TS(ni, ts); ci_netif_unlock(ni); rinf->stack_locked = 0; @@ -1307,32 +1286,32 @@ static int ci_tcp_recvmsg_urg(struct tcp_recv_info *rinf) #endif -static void ci_tcp_recvmsg_recv2_peek2(struct tcp_recv_info *rinfo, - int start_skip, int stop_at_mark, - unsigned rd_nxt_seq) +static void ci_tcp_recvmsg_recv2_peek2(struct tcp_recv_info* rinfo, + int start_skip, int stop_at_mark, unsigned rd_nxt_seq) { - /* + /* * This function is used to peek at data on recv2. Either to look a data ** before the mark, or at data after the OOB byte. - * + * * Windows: unlike normal reads, peeks will not read past any OOBB */ - ci_tcp_state* ts = rinfo->a->ts; - ci_netif* ni = rinfo->a->ni; + ci_tcp_state* ts = rinfo->a->ts; + ci_netif* ni = rinfo->a->ni; ci_ip_pkt_queue* recv2 = &ts->recv2; - ci_ip_pkt_fmt* pkt = PKT_CHK(ni, recv2->head); - oo_offbuf* buf = &pkt->buf; - int rc, n, peek_off = start_skip; - int orig_buf_end; + ci_ip_pkt_fmt* pkt = PKT_CHK(ni, recv2->head); + oo_offbuf* buf = &pkt->buf; + int rc, n, peek_off = start_skip; + int orig_buf_end; ci_assert(oo_offbuf_left(buf) >= start_skip); ci_assert(tcp_urg_data(ts) & CI_TCP_URG_PTR_VALID); - ci_assert(!stop_at_mark || SEQ_LE(rd_nxt_seq, tcp_rcv_up(ts))); + ci_assert(! stop_at_mark || SEQ_LE(rd_nxt_seq, tcp_rcv_up(ts))); - LOG_URG(log(LNTS_FMT "recv2_peek: so_far=%d skip=%d stop@mark=%d " - "rd_nxt_seq=%08x rcv_up=%08x", LNTS_PRI_ARGS(ni, ts), - rinfo->rc, start_skip, stop_at_mark, - rd_nxt_seq, tcp_rcv_up(ts))); + LOG_URG( + log(LNTS_FMT "recv2_peek: so_far=%d skip=%d stop@mark=%d " + "rd_nxt_seq=%08x rcv_up=%08x", + LNTS_PRI_ARGS(ni, ts), rinfo->rc, start_skip, stop_at_mark, + rd_nxt_seq, tcp_rcv_up(ts))); rd_nxt_seq += start_skip; @@ -1351,13 +1330,13 @@ static void ci_tcp_recvmsg_recv2_peek2(struct tcp_recv_info *rinfo, } rc = 0; - n = rinfo->copier(ni, rinfo, pkt, peek_off, &rc); + n = rinfo->copier(ni, rinfo, pkt, peek_off, &rc); ci_assert_equal(n, rc); /* zc shenanigans not supported with urgent data */ pkt->buf.end = orig_buf_end; #ifdef __KERNEL__ if( n < 0 ) { - LOG_URG(log(LNTS_FMT "%s: copy_to_user returned %d", - LNTS_PRI_ARGS(ni, ts), __FUNCTION__, n)); + LOG_URG(log(LNTS_FMT "%s: copy_to_user returned %d", + LNTS_PRI_ARGS(ni, ts), __FUNCTION__, n)); if( rinfo->rc == 0 ) rinfo->rc = n; break; @@ -1370,34 +1349,34 @@ static void ci_tcp_recvmsg_recv2_peek2(struct tcp_recv_info *rinfo, if( ! iovec_roll_over(&rinfo->piov) ) break; if( oo_offbuf_left(buf) - peek_off == 0 ) { - if( OO_PP_IS_NULL(pkt->next) ) - break; - pkt = PKT_CHK(ni, pkt->next); - buf = &pkt->buf; + if( OO_PP_IS_NULL(pkt->next) ) + break; + pkt = PKT_CHK(ni, pkt->next); + buf = &pkt->buf; peek_off = 0; } } } -static int ci_tcp_recvmsg_recv2_peek(struct tcp_recv_info *rinf) +static int ci_tcp_recvmsg_recv2_peek(struct tcp_recv_info* rinf) { - ci_tcp_state* ts = rinf->a->ts; - ci_netif* ni = rinf->a->ni; + ci_tcp_state* ts = rinf->a->ts; + ci_netif* ni = rinf->a->ni; ci_ip_pkt_queue* recv2 = &ts->recv2; - ci_ip_pkt_fmt* pkt; - int skip, stop_at_mark; - unsigned rd_nxt_seq; - int af = ipcache_af(&ts->s.pkt); + ci_ip_pkt_fmt* pkt; + int skip, stop_at_mark; + unsigned rd_nxt_seq; + int af = ipcache_af(&ts->s.pkt); - if( !rinf->stack_locked ) { + if( ! rinf->stack_locked ) { int rc = ci_netif_lock(ni); if( rc != 0 ) return rc; rinf->stack_locked = 1; } - pkt = PKT_CHK(ni, recv2->head); + pkt = PKT_CHK(ni, recv2->head); rd_nxt_seq = PKT_IPX_RX_BUF_SEQ(af, pkt); /* Double-check for packets added to recv1 after we finished sucking data @@ -1405,7 +1384,7 @@ static int ci_tcp_recvmsg_recv2_peek(struct tcp_recv_info *rinf) */ if( OO_PP_NOT_NULL(ts->recv1_extract) ) { ci_ip_pkt_fmt* r1pkt = PKT_CHK(ni, ts->recv1_extract); - unsigned seq = PKT_IPX_RX_BUF_SEQ(af, r1pkt) + rinf->rc; + unsigned seq = PKT_IPX_RX_BUF_SEQ(af, r1pkt) + rinf->rc; /* We think we've read everything in recv1, and [seq] points just ** beyond that. So it ought to match the beginning of recv2. If it ** doesn't, then something else has been added to recv1. @@ -1423,23 +1402,22 @@ static int ci_tcp_recvmsg_recv2_peek(struct tcp_recv_info *rinf) ** it. Otherwise peek the data up to the mark. */ if( tcp_rcv_up(ts) == rd_nxt_seq ) { - skip = !(ts->s.s_flags & CI_SOCK_FLAG_OOBINLINE); + skip = ! (ts->s.s_flags & CI_SOCK_FLAG_OOBINLINE); stop_at_mark = 0; - } - else { - skip = 0; + } else { + skip = 0; stop_at_mark = 1; } ci_tcp_recvmsg_recv2_peek2(rinf, skip, stop_at_mark, rd_nxt_seq); - out: +out: ci_netif_unlock(ni); rinf->stack_locked = 0; return rinf->rc; } -static int ci_tcp_recvmsg_handle_race(struct tcp_recv_info *rinf) +static int ci_tcp_recvmsg_handle_race(struct tcp_recv_info* rinf) { int rc; @@ -1468,32 +1446,33 @@ ci_inline int ci_tcp_recv1_is_empty(ci_netif* ni, ci_tcp_state* ts) /* NB. The first buffer pointed to by the extract pointer may be empty, ** but any subsequent ones must not be. */ - ci_ip_pkt_fmt *pkt; - if( OO_PP_IS_NULL(ts->recv1_extract) ) return 1; + ci_ip_pkt_fmt* pkt; + if( OO_PP_IS_NULL(ts->recv1_extract) ) + return 1; pkt = PKT_CHK_NNL(ni, ts->recv1_extract); return oo_offbuf_is_empty(&pkt->buf) && OO_PP_IS_NULL(pkt->next); } -static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) +static int ci_tcp_recvmsg_recv2(struct tcp_recv_info* rinf) { - ci_tcp_state* ts = rinf->a->ts; - ci_netif* ni = rinf->a->ni; + ci_tcp_state* ts = rinf->a->ts; + ci_netif* ni = rinf->a->ni; ci_ip_pkt_queue* recv2 = &ts->recv2; - ci_ip_pkt_fmt* pkt, *head_pkt, *tail_pkt; - oo_offbuf* buf; - unsigned rd_nxt_seq, n; - int must_return_from_recv = 0; - int af = ipcache_af(&ts->s.pkt); + ci_ip_pkt_fmt * pkt, *head_pkt, *tail_pkt; + oo_offbuf* buf; + unsigned rd_nxt_seq, n; + int must_return_from_recv = 0; + int af = ipcache_af(&ts->s.pkt); if( rinf->a->flags & MSG_PEEK ) return ci_tcp_recvmsg_recv2_peek(rinf); - again: +again: LOG_URG(ci_log("%s: again rc=%d", __FUNCTION__, rinf->rc)); - + ci_assert(ci_sock_is_locked(ni, &ts->s.b)); - if( !rinf->stack_locked ) { + if( ! rinf->stack_locked ) { int rc = ci_netif_lock(ni); if( rc != 0 ) return rc; @@ -1504,7 +1483,8 @@ static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) /* Double-check for packets added to recv1. */ if( ! ci_tcp_recv1_is_empty(ni, ts) ) { must_return_from_recv = ci_tcp_recvmsg_handle_race(rinf); - if( must_return_from_recv ) goto unlock_out; + if( must_return_from_recv ) + goto unlock_out; } ci_assert(ci_tcp_recv1_is_empty(ni, ts)); @@ -1516,10 +1496,10 @@ static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) /* Calculate the sequence number of the first un-read byte in this pkt. */ rd_nxt_seq = PKT_IPX_RX_BUF_SEQ(af, pkt); - LOG_URG(log("%s: "NTS_FMT "so_far=%d flags=%x nxt_seq=%08x rcv_up=%08x " - "urg_data=%03x", __FUNCTION__, NTS_PRI_ARGS(ni, ts), - rinf->rc, rinf->a->flags, rd_nxt_seq, tcp_rcv_up(ts), - tcp_urg_data(ts))); + LOG_URG(log("%s: " NTS_FMT "so_far=%d flags=%x nxt_seq=%08x rcv_up=%08x " + "urg_data=%03x", + __FUNCTION__, NTS_PRI_ARGS(ni, ts), rinf->rc, rinf->a->flags, rd_nxt_seq, + tcp_rcv_up(ts), tcp_urg_data(ts))); ci_assert(tcp_urg_data(ts) & CI_TCP_URG_PTR_VALID); @@ -1541,8 +1521,8 @@ static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) **/ if( tcp_rcv_up(ts) == rd_nxt_seq || rinf->zc_args ) { /* We are staring at the urgent byte. */ - LOG_URG(ci_log("%s: We're staring at the oob byte and rc=%d", - __FUNCTION__, rinf->rc)); + LOG_URG(ci_log("%s: We're staring at the oob byte and rc=%d", __FUNCTION__, + rinf->rc)); /* * windows allows in-band reads to pass the mark - so don't quit here @@ -1550,11 +1530,11 @@ static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) if( rinf->rc && ! rinf->zc_args ) { /* We've consumed some data, so stop at the mark. */ LOG_URG(ci_log("%s: We're staring at the oob byte and rc=%d", - __FUNCTION__, rinf->rc)); + __FUNCTION__, rinf->rc)); must_return_from_recv = 1; goto unlock_out; } - + if( ! (ts->s.s_flags & CI_SOCK_FLAG_OOBINLINE) && ! oo_offbuf_is_empty(buf) ) { @@ -1575,7 +1555,8 @@ static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) * - so leave as valid */ tcp_urg_data_invalidate(ts); - move_from_recv2_to_recv1(ni, ts, pkt, PKT_CHK(ni,recv2->tail), recv2->num); + move_from_recv2_to_recv1( + ni, ts, pkt, PKT_CHK(ni, recv2->tail), recv2->num); ci_assert(OO_PP_IS_NULL(recv2->head)); ci_assert(TS_QUEUE_RX(ts) == &ts->recv1); ci_netif_unlock(ni); @@ -1588,12 +1569,13 @@ static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) ** packets that come before the mark. */ head_pkt = pkt; - n = 0; + n = 0; tail_pkt = 0; /* just to suppress compiler warning */ while( SEQ_GE(tcp_rcv_up(ts), pkt->pf.tcp_rx.end_seq) ) { tail_pkt = pkt; ++n; - if( OO_PP_IS_NULL(pkt->next) ) break; + if( OO_PP_IS_NULL(pkt->next) ) + break; pkt = PKT_CHK(ni, pkt->next); } if( n ) { @@ -1607,26 +1589,29 @@ static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) rinf->stack_locked = 0; /* Pull data out of recv1 and return if we fill app's buffer. */ rinf->rc += ci_tcp_recvmsg_get_outofline(rinf); - must_return_from_recv = ci_iovec_ptr_is_empty_proper(&rinf->piov) || - ((rinf->a->flags & ONLOAD_MSG_ONEPKT) && (rinf->rc > 0)); - if( must_return_from_recv ) goto out; + must_return_from_recv = + ci_iovec_ptr_is_empty_proper(&rinf->piov) || + ((rinf->a->flags & ONLOAD_MSG_ONEPKT) && (rinf->rc > 0)); + if( must_return_from_recv ) + goto out; /* May need to pull some more from recv2 before the mark. NB. Can't ** just fall through to the code below, because the mark may have moved ** forward because we dropped the netif lock. */ - if( OO_PP_NOT_NULL(recv2->head) ) goto again; + if( OO_PP_NOT_NULL(recv2->head) ) + goto again; goto out; - } - else { + } else { /* The packet at the head of recv2 (if any) contains normal data ** followed by urgent data. So read the normal data. */ int n; ci_assert(! rinf->zc_args); - if( OO_PP_IS_NULL(recv2->head) ) goto unlock_out; - n = tcp_rcv_up(ts) - rd_nxt_seq; /* number of normal bytes */ - LOG_URG(ci_log("%s: reading %d bytes from urg segment before OOBB", - __FUNCTION__, n)); + if( OO_PP_IS_NULL(recv2->head) ) + goto unlock_out; + n = tcp_rcv_up(ts) - rd_nxt_seq; /* number of normal bytes */ + LOG_URG(ci_log( + "%s: reading %d bytes from urg segment before OOBB", __FUNCTION__, n)); ci_assert(n > 0); ci_assert_lt(n, oo_offbuf_left(buf)); n = ci_copy_to_iovec(&rinf->piov, oo_offbuf_ptr(buf), n); @@ -1638,29 +1623,29 @@ static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) ** recvmsg() can return now. */ must_return_from_recv = 1; - } - unlock_out: +unlock_out: CHECK_TS(ni, ts); if( rinf->stack_locked ) { ci_netif_unlock(ni); rinf->stack_locked = 0; } - out: +out: if( NI_OPTS(ni).tcp_rcvbuf_mode == 1 ) ci_tcp_rcvbuf_drs(ni, ts); /* Must return if we've filled the app buffer. */ - must_return_from_recv |= ci_iovec_ptr_is_empty_proper(&rinf->piov) || - ((rinf->a->flags & ONLOAD_MSG_ONEPKT) && (rinf->rc > 0)); - - LOG_URG(ci_log("%s: returning %d rc=%d " - "ci_iovec_ptr_is_empty_proper=%d", - __FUNCTION__, must_return_from_recv, - rinf->rc, - ci_iovec_ptr_is_empty_proper(&rinf->piov))); - + must_return_from_recv |= + ci_iovec_ptr_is_empty_proper(&rinf->piov) || + ((rinf->a->flags & ONLOAD_MSG_ONEPKT) && (rinf->rc > 0)); + + LOG_URG( + ci_log("%s: returning %d rc=%d " + "ci_iovec_ptr_is_empty_proper=%d", + __FUNCTION__, must_return_from_recv, rinf->rc, + ci_iovec_ptr_is_empty_proper(&rinf->piov))); + return must_return_from_recv; } @@ -1668,36 +1653,36 @@ static int ci_tcp_recvmsg_recv2(struct tcp_recv_info *rinf) #ifndef __KERNEL__ #if CI_CFG_TCP_OFFLOAD_RECYCLER -#define CI_ZC_IOV_STATIC_MAX 32 +#define CI_ZC_IOV_STATIC_MAX 32 CI_BUILD_ASSERT((ONLOAD_ZC_RECV_FLAG_OFFLOAD_OOB & - ~ONLOAD_ZC_RECV_FLAG_OFFLOAD_RESERVED) == 0); + ~ONLOAD_ZC_RECV_FLAG_OFFLOAD_RESERVED) == 0); static int zc_ceph_callback(ci_netif* netif, struct tcp_recv_info* rinf, - ci_ip_pkt_fmt* pkt, int peek_off, int* ndata) + ci_ip_pkt_fmt* pkt, int peek_off, int* ndata) { - int total = oo_offbuf_left(&pkt->buf); - int n = total; - char* p = oo_offbuf_ptr(&pkt->buf); - struct onload_zc_iovec static_iov[CI_ZC_IOV_STATIC_MAX]; - struct onload_zc_iovec* iov = static_iov; - int iovlen = 0; - int iov_max = CI_ZC_IOV_STATIC_MAX; - int out_rc = 0; + int total = oo_offbuf_left(&pkt->buf); + int n = total; + char* p = oo_offbuf_ptr(&pkt->buf); + struct onload_zc_iovec static_iov[CI_ZC_IOV_STATIC_MAX]; + struct onload_zc_iovec* iov = static_iov; + int iovlen = 0; + int iov_max = CI_ZC_IOV_STATIC_MAX; + int out_rc = 0; enum onload_zc_callback_rc cb_rc; - ssize_t overrun; + ssize_t overrun; /* Not currently a required feature, and a little tricky to get right: */ if( rinf->msg_flags & MSG_PEEK ) return -EOPNOTSUPP; while( n ) { - const int hdr_len = offsetof(struct ceph_data_pkt, data); + const int hdr_len = offsetof(struct ceph_data_pkt, data); struct ceph_data_pkt data; if( n < hdr_len ) { LOG_TR(log(LNTS_FMT "bogus plugin metastream len=%d", - LNTS_PRI_ARGS(netif, rinf->a->ts), n)); + LNTS_PRI_ARGS(netif, rinf->a->ts), n)); goto unrecoverable; } @@ -1712,15 +1697,15 @@ static int zc_ceph_callback(ci_netif* netif, struct tcp_recv_info* rinf, * the DDR ring buffer. A message will never require more than two * iovs: the plugin will not deliver a message longer than ddr_size. */ struct onload_zc_iovec* iov_new; - LOG_TR(log(LNTS_FMT "large number of iovs in metapkt (%d @ %d/%d)", - LNTS_PRI_ARGS(netif, rinf->a->ts), iovlen, - (int)(p - PKT_START(pkt)), - (int)(oo_offbuf_end(&pkt->buf) - PKT_START(pkt)))); + LOG_TR(log(LNTS_FMT "large number of iovs in metapkt (%d @ %d/%d)", + LNTS_PRI_ARGS(netif, rinf->a->ts), iovlen, + (int) (p - PKT_START(pkt)), + (int) (oo_offbuf_end(&pkt->buf) - PKT_START(pkt)))); iov_max = iov_max + (iov_max >> 1); - iov_new = realloc(iov == static_iov ? NULL : iov, - iov_max * sizeof(*iov)); + iov_new = + realloc(iov == static_iov ? NULL : iov, iov_max * sizeof(*iov)); if( ! iov_new ) { - log(LNTS_FMT "OOM growing iov array (%d)", + log(LNTS_FMT "OOM growing iov array (%d)", LNTS_PRI_ARGS(netif, rinf->a->ts), iov_max); goto unrecoverable; } @@ -1733,89 +1718,92 @@ static int zc_ceph_callback(ci_netif* netif, struct tcp_recv_info* rinf, /* NB: if adding a new msg_type here, don't forget that copy_ceph_pkt() * has a similar switch statement */ switch( data.msg_type ) { - case XSN_CEPH_DATA_INLINE: - if( n < data.msg_len ) { - LOG_TR(log(LNTS_FMT "bogus plugin inline len %d<%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), n, data.msg_len)); - goto unrecoverable; - } - iov[iovlen].iov_base = p; - iov[iovlen].iov_len = data.msg_len; - iov[iovlen].addr_space = EF_ADDRSPACE_LOCAL; - iov[iovlen].buf = ONLOAD_ZC_HANDLE_NONZC; - iov[iovlen].iov_flags = 0; - out_rc += data.msg_len; - break; - - case XSN_CEPH_DATA_REMOTE: - if( n < sizeof(data.remote) || data.msg_len != sizeof(data.remote) ) { - LOG_TR(log(LNTS_FMT "bogus plugin remote block %d/%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), n, data.msg_len)); - goto unrecoverable; - } - memcpy(&data.remote, p, sizeof(data.remote)); - iov[iovlen].iov_ptr = data.remote.start_ptr + rinf->a->ts->plugin_ddr_base; - iov[iovlen].iov_len = data.remote.data_len; - iov[iovlen].iov_flags = 0; - iov[iovlen].addr_space = netif->state->nic[pkt->intf_i].plugin_addr_space; - out_rc += data.remote.data_len; - - overrun = data.remote.start_ptr + data.remote.data_len - rinf->a->ts->plugin_ddr_size; - if( overrun > 0 ) { - /* This data wraps round the DDR ring buffer; the end of the data - * will be found at the start of the ring. */ - iov[iovlen].iov_len -= overrun; - iov[iovlen].buf = ONLOAD_ZC_HANDLE_NONZC; + case XSN_CEPH_DATA_INLINE: + if( n < data.msg_len ) { + LOG_TR(log(LNTS_FMT "bogus plugin inline len %d<%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), n, data.msg_len)); + goto unrecoverable; + } + iov[iovlen].iov_base = p; + iov[iovlen].iov_len = data.msg_len; + iov[iovlen].addr_space = EF_ADDRSPACE_LOCAL; + iov[iovlen].buf = ONLOAD_ZC_HANDLE_NONZC; + iov[iovlen].iov_flags = 0; + out_rc += data.msg_len; + break; - ++iovlen; - iov[iovlen].iov_ptr = rinf->a->ts->plugin_ddr_base; - iov[iovlen].iov_len = overrun; - iov[iovlen].addr_space = netif->state->nic[pkt->intf_i].plugin_addr_space; - iov[iovlen].buf = ONLOAD_ZC_HANDLE_NONZC; + case XSN_CEPH_DATA_REMOTE: + if( n < sizeof(data.remote) || data.msg_len != sizeof(data.remote) ) { + LOG_TR(log(LNTS_FMT "bogus plugin remote block %d/%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), n, data.msg_len)); + goto unrecoverable; + } + memcpy(&data.remote, p, sizeof(data.remote)); + iov[iovlen].iov_ptr = + data.remote.start_ptr + rinf->a->ts->plugin_ddr_base; + iov[iovlen].iov_len = data.remote.data_len; iov[iovlen].iov_flags = 0; - } + iov[iovlen].addr_space = + netif->state->nic[pkt->intf_i].plugin_addr_space; + out_rc += data.remote.data_len; + + overrun = data.remote.start_ptr + data.remote.data_len - + rinf->a->ts->plugin_ddr_size; + if( overrun > 0 ) { + /* This data wraps round the DDR ring buffer; the end of the data + * will be found at the start of the ring. */ + iov[iovlen].iov_len -= overrun; + iov[iovlen].buf = ONLOAD_ZC_HANDLE_NONZC; + + ++iovlen; + iov[iovlen].iov_ptr = rinf->a->ts->plugin_ddr_base; + iov[iovlen].iov_len = overrun; + iov[iovlen].addr_space = + netif->state->nic[pkt->intf_i].plugin_addr_space; + iov[iovlen].buf = ONLOAD_ZC_HANDLE_NONZC; + iov[iovlen].iov_flags = 0; + } - /* CRC iov; we are currently wasting a new iov, since we can't distinguish - * whether the CRC is enabled or not. */ - ++iovlen; - iov[iovlen].iov_flags = ONLOAD_ZC_RECV_FLAG_OFFLOAD_OOB; + /* CRC iov; we are currently wasting a new iov, since we can't + * distinguish whether the CRC is enabled or not. */ + ++iovlen; + iov[iovlen].iov_flags = ONLOAD_ZC_RECV_FLAG_OFFLOAD_OOB; - /* Pass the address of the CRC within the packet buffer. This is not - * necessarily naturally aligned. */ - iov[iovlen].iov_len = sizeof(data.remote.data_crc); - iov[iovlen].iov_base = p + CI_MEMBER_OFFSET(typeof(data.remote), - data_crc); - ci_assert_le((uintptr_t) iov[iovlen].iov_base + iov[iovlen].iov_len, - (uintptr_t) p + sizeof(data.remote)); + /* Pass the address of the CRC within the packet buffer. This is not + * necessarily naturally aligned. */ + iov[iovlen].iov_len = sizeof(data.remote.data_crc); + iov[iovlen].iov_base = + p + CI_MEMBER_OFFSET(typeof(data.remote), data_crc); + ci_assert_le((uintptr_t) iov[iovlen].iov_base + iov[iovlen].iov_len, + (uintptr_t) p + sizeof(data.remote)); - iov[iovlen].addr_space = EF_ADDRSPACE_LOCAL; - iov[iovlen].buf = ONLOAD_ZC_HANDLE_NONZC; + iov[iovlen].addr_space = EF_ADDRSPACE_LOCAL; + iov[iovlen].buf = ONLOAD_ZC_HANDLE_NONZC; - break; + break; - case XSN_CEPH_DATA_LOST_SYNC: - if( n < sizeof(data.lost_sync) || - data.msg_len != sizeof(data.lost_sync) ) { - LOG_TR(log(LNTS_FMT "bogus plugin lost-sync block %d/%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), n, data.msg_len)); + case XSN_CEPH_DATA_LOST_SYNC: + if( n < sizeof(data.lost_sync) || + data.msg_len != sizeof(data.lost_sync) ) { + LOG_TR(log(LNTS_FMT "bogus plugin lost-sync block %d/%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), n, data.msg_len)); + goto unrecoverable; + } + memcpy(&data.lost_sync, p, sizeof(data.lost_sync)); + log(LNTS_FMT "plugin lost sync: %u/%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), data.lost_sync.reason, + data.lost_sync.subreason); + rinf->a->msg->msg_controllen = 0; + *ndata = out_rc; + /* Set the return value so that we'll keep hitting this same lost-sync + * message on every receive, and hence block the socket from making + * further progress */ + return total - n - hdr_len; + + default: + LOG_TR(log(LNTS_FMT "bogus plugin metastream header %u/%u", + LNTS_PRI_ARGS(netif, rinf->a->ts), data.msg_type, data.msg_len)); goto unrecoverable; - } - memcpy(&data.lost_sync, p, sizeof(data.lost_sync)); - log(LNTS_FMT "plugin lost sync: %u/%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), data.lost_sync.reason, - data.lost_sync.subreason); - rinf->a->msg->msg_controllen = 0; - *ndata = out_rc; - /* Set the return value so that we'll keep hitting this same lost-sync - * message on every receive, and hence block the socket from making - * further progress */ - return total - n - hdr_len; - - default: - LOG_TR(log(LNTS_FMT "bogus plugin metastream header %u/%u", - LNTS_PRI_ARGS(netif, rinf->a->ts), data.msg_type, - data.msg_len)); - goto unrecoverable; } ++iovlen; @@ -1833,22 +1821,22 @@ static int zc_ceph_callback(ci_netif* netif, struct tcp_recv_info* rinf, * this case, is the first iov of the last batch, to match how UDP uses the * zc callback. */ pkt->rx_flags |= CI_PKT_RX_FLAG_KEEP; - pkt->user_refcount = CI_ZC_USER_REFCOUNT_ONE; - iov[0].buf = zc_pktbuf_to_handle(pkt); - rinf->zc_args->msg.iov = iov; + pkt->user_refcount = CI_ZC_USER_REFCOUNT_ONE; + iov[0].buf = zc_pktbuf_to_handle(pkt); + rinf->zc_args->msg.iov = iov; rinf->zc_args->msg.msghdr.msg_iovlen = iovlen; - rinf->zc_args->msg.msghdr.msg_flags = rinf->msg_flags; - cb_rc = rinf->zc_args->cb(rinf->zc_args, 0); + rinf->zc_args->msg.msghdr.msg_flags = rinf->msg_flags; + cb_rc = rinf->zc_args->cb(rinf->zc_args, 0); if( cb_rc & ONLOAD_ZC_TERMINATE ) { /* Make it look like the non-zc buffer is full */ rinf->piov.io.iov_len = 0; - rinf->piov.iovlen = 0; + rinf->piov.iovlen = 0; } ci_pkt_zc_free_clean(pkt, cb_rc); *ndata = out_rc; - unrecoverable: +unrecoverable: /* The correct thing to do with bad framing is debatable. This code throws * away the remainder of the packet and continues on without telling the * app. An easy other option would be to put the app in a continuous loop of @@ -1863,11 +1851,11 @@ static int zc_ceph_callback(ci_netif* netif, struct tcp_recv_info* rinf, static int zc_call_callback(ci_netif* netif, struct tcp_recv_info* rinf, - ci_ip_pkt_fmt* pkt, int peek_off, int* ndata) + ci_ip_pkt_fmt* pkt, int peek_off, int* ndata) { - int n = oo_offbuf_left(&pkt->buf); + int n = oo_offbuf_left(&pkt->buf); enum onload_zc_callback_rc cb_rc; - struct onload_zc_iovec iov; + struct onload_zc_iovec iov; #if CI_CFG_TCP_OFFLOAD_RECYCLER if( ci_tcp_is_pluginized(rinf->a->ts) ) @@ -1884,17 +1872,17 @@ static int zc_call_callback(ci_netif* netif, struct tcp_recv_info* rinf, */ pkt->rx_flags |= CI_PKT_RX_FLAG_KEEP; - rinf->zc_args->msg.iov = &iov; + rinf->zc_args->msg.iov = &iov; rinf->zc_args->msg.msghdr.msg_iovlen = 1; - rinf->zc_args->msg.msghdr.msg_flags = rinf->msg_flags; - iov.buf = zc_pktbuf_to_handle(pkt); - iov.iov_base = oo_offbuf_ptr(&pkt->buf) + peek_off; - iov.iov_len = oo_offbuf_left(&pkt->buf) - peek_off; - iov.iov_flags = 0; + rinf->zc_args->msg.msghdr.msg_flags = rinf->msg_flags; + iov.buf = zc_pktbuf_to_handle(pkt); + iov.iov_base = oo_offbuf_ptr(&pkt->buf) + peek_off; + iov.iov_len = oo_offbuf_left(&pkt->buf) - peek_off; + iov.iov_flags = 0; - iov.addr_space = EF_ADDRSPACE_LOCAL; - pkt->user_refcount = CI_ZC_USER_REFCOUNT_ONE; - cb_rc = rinf->zc_args->cb(rinf->zc_args, 0); + iov.addr_space = EF_ADDRSPACE_LOCAL; + pkt->user_refcount = CI_ZC_USER_REFCOUNT_ONE; + cb_rc = rinf->zc_args->cb(rinf->zc_args, 0); ci_pkt_zc_free_clean(pkt, cb_rc); if( cb_rc & ONLOAD_ZC_KEEP ) { @@ -1906,25 +1894,25 @@ static int zc_call_callback(ci_netif* netif, struct tcp_recv_info* rinf, if( cb_rc & ONLOAD_ZC_TERMINATE ) { /* Make it look like the non-zc buffer is full */ rinf->piov.io.iov_len = 0; - rinf->piov.iovlen = 0; + rinf->piov.iovlen = 0; } rinf->a->msg->msg_controllen = 0; - *ndata = n; + *ndata = n; return n; } -int ci_tcp_zc_recvmsg(const ci_tcp_recvmsg_args* a, - struct onload_zc_recv_args* args) +int ci_tcp_zc_recvmsg( + const ci_tcp_recvmsg_args* a, struct onload_zc_recv_args* args) { /* This fill_msgname is duplicated at the end of ci_tcp_recvmsg_impl, but we * want to get the value filled in before the callback is called. The * potential for inefficiency is basically irrelevant since the function * does very little in all standard build configurations */ - ci_tcp_recv_fill_msgname(a->ts, (struct sockaddr*) a->msg->msg_name, - &a->msg->msg_namelen); + ci_tcp_recv_fill_msgname( + a->ts, (struct sockaddr*) a->msg->msg_name, &a->msg->msg_namelen); return ci_tcp_recvmsg_impl(a, zc_call_callback, args); } #endif diff --git a/src/lib/transport/ip/tcp_send.c b/src/lib/transport/ip/tcp_send.c index a26214296..b9a5e05d2 100644 --- a/src/lib/transport/ip/tcp_send.c +++ b/src/lib/transport/ip/tcp_send.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr ** \brief TCP sendmsg() etc. @@ -22,7 +22,7 @@ #include "crc32c.h" #endif -#if !defined(__KERNEL__) +#if ! defined(__KERNEL__) #include #include #include @@ -39,9 +39,9 @@ #ifdef __KERNEL__ -# define OO_EINTR ERESTARTSYS +#define OO_EINTR ERESTARTSYS #else -# define OO_EINTR EINTR +#define OO_EINTR EINTR #endif /* If not locked then trylock, and if successful set locked flag and (in @@ -49,10 +49,9 @@ * false. si_ variants take a [struct udp_send_info*]. */ -#define trylock(ni, locked) \ +#define trylock(ni, locked) \ ((locked) || (ci_netif_trylock(ni) && ((locked) = 1))) -#define si_trylock(ni, sinf) \ - trylock((ni), (sinf)->stack_locked) +#define si_trylock(ni, sinf) trylock((ni), (sinf)->stack_locked) struct tcp_send_info { int rc; @@ -90,12 +89,12 @@ static void ci_tcp_tx_advance_nagle(ci_netif* ni, ci_tcp_state* ts) ci_assert(! ci_ip_queue_is_empty(sendq)); if( (sendq->num != 1) | (ci_tcp_inflight(ts) == 0) | - OO_SP_NOT_NULL(ts->local_peer)) { + OO_SP_NOT_NULL(ts->local_peer) ) { advance_now: /* NB. We call advance() before poll() to get best latency. */ ci_ip_time_resync(IPTIMER_STATE(ni)); ci_tcp_tx_advance(ts, ni); - if(CI_UNLIKELY( ts->tcpflags & CI_TCPT_FLAG_MSG_WARM )) + if( CI_UNLIKELY(ts->tcpflags & CI_TCPT_FLAG_MSG_WARM) ) return; goto poll_and_out; } @@ -107,11 +106,11 @@ static void ci_tcp_tx_advance_nagle(ci_netif* ni, ci_tcp_state* ts) ** enqueue data. */ pkt = PKT_CHK(ni, sendq->head); - ci_assert(!(TX_PKT_IPX_TCP(ipcache_af(&ts->s.pkt), pkt)->tcp_flags & - (CI_TCP_FLAG_SYN|CI_TCP_FLAG_FIN))); + ci_assert(! (TX_PKT_IPX_TCP(ipcache_af(&ts->s.pkt), pkt)->tcp_flags & + (CI_TCP_FLAG_SYN | CI_TCP_FLAG_FIN))); if( (PKT_TCP_TX_SEQ_SPACE(pkt) >= tcp_eff_mss(ts)) | - (SEQ_LT(tcp_snd_una(ts), tcp_snd_up(ts)) ) ) + (SEQ_LT(tcp_snd_una(ts), tcp_snd_up(ts))) ) goto advance_now; if( ts->s.s_aflags & CI_SOCK_AFLAG_NODELAY ) { @@ -127,26 +126,28 @@ static void ci_tcp_tx_advance_nagle(ci_netif* ni, ci_tcp_state* ts) goto advance_now; } - LOG_TV(log(LPF "%d Nagle snd=%08x-%08x-%08x enq=%08x pkt=%x-%x", - S_FMT(ts), tcp_snd_una(ts), tcp_snd_nxt(ts), - ts->snd_max, tcp_enq_nxt(ts), - pkt->pf.tcp_tx.start_seq, pkt->pf.tcp_tx.end_seq)); + LOG_TV(log(LPF "%d Nagle snd=%08x-%08x-%08x enq=%08x pkt=%x-%x", S_FMT(ts), + tcp_snd_una(ts), tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts), + pkt->pf.tcp_tx.start_seq, pkt->pf.tcp_tx.end_seq)); ++ts->stats.tx_stop_nagle; - poll_and_out: +poll_and_out: if( ci_netif_may_poll(ni) && ci_netif_has_event(ni) ) ci_netif_poll(ni); } -ci_inline int ci_tcp_tx_n_pkts_needed(int eff_mss, int maxbytes, - int maxbufs, int sendq_credit) { +ci_inline int ci_tcp_tx_n_pkts_needed( + int eff_mss, int maxbytes, int maxbufs, int sendq_credit) +{ /* Calculate how many packet buffers we need to accommodate , ** assuming each will hold bytes, but do not exceed . */ int n = (maxbytes + eff_mss - 1) / eff_mss; - if( n > sendq_credit ) n = sendq_credit; - if( n > maxbufs ) n = maxbufs; + if( n > sendq_credit ) + n = sendq_credit; + if( n > maxbufs ) + n = maxbufs; return n; } @@ -167,26 +168,23 @@ ci_inline void ci_tcp_tx_pkt_init(ci_ip_pkt_fmt* pkt, int hdrlen, int mss) } -static -int ci_tcp_sendmsg_fill_pkt(ci_netif* ni, ci_tcp_state* ts, - struct tcp_send_info* sinf, - ci_iovec_ptr* piov, int hdrlen, - int maxlen - CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) +static int ci_tcp_sendmsg_fill_pkt(ci_netif* ni, ci_tcp_state* ts, + struct tcp_send_info* sinf, ci_iovec_ptr* piov, int hdrlen, + int maxlen CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) { /* Initialise and fill a packet buffer from an iovec. */ int n; - ci_ip_pkt_fmt* pkt = oo_pkt_filler_next_pkt(ni, &sinf->pf, sinf->stack_locked); + ci_ip_pkt_fmt* pkt = + oo_pkt_filler_next_pkt(ni, &sinf->pf, sinf->stack_locked); ci_assert(pkt); ci_assert(! ci_iovec_ptr_is_empty_proper(piov)); ci_tcp_tx_pkt_init(pkt, hdrlen, maxlen); - oo_pkt_filler_init(&sinf->pf, pkt, - (uint8_t*) oo_tx_l3_hdr(pkt) + hdrlen); + oo_pkt_filler_init(&sinf->pf, pkt, (uint8_t*) oo_tx_l3_hdr(pkt) + hdrlen); #if CI_CFG_IPV6 if( ipcache_af(&ts->s.pkt) == AF_INET ) - pkt->flags &=~ CI_PKT_FLAG_IS_IP6; + pkt->flags &= ~CI_PKT_FLAG_IS_IP6; else pkt->flags |= CI_PKT_FLAG_IS_IP6; #endif @@ -198,13 +196,12 @@ int ci_tcp_sendmsg_fill_pkt(ci_netif* ni, ci_tcp_state* ts, n = sinf->total_unsent - sinf->fill_list_bytes; n = CI_MIN(maxlen, n); - sinf->rc = oo_pkt_fill(ni, &ts->s, NULL/*p_netif_locked*/, - CI_FALSE/*can_block*/, &sinf->pf, piov, - n CI_KERNEL_ARG(addr_spc)); + sinf->rc = oo_pkt_fill(ni, &ts->s, NULL /*p_netif_locked*/, + CI_FALSE /*can_block*/, &sinf->pf, piov, n CI_KERNEL_ARG(addr_spc)); /* oo_pkt_fill does not allocate packets. So, it can fail with * -EFAULT only, in kernel mode only, because of oo_pkt_fill_copy(). */ #ifdef __KERNEL__ - if( CI_UNLIKELY( sinf->rc < 0 ) ) { + if( CI_UNLIKELY(sinf->rc < 0) ) { ci_assert_equal(sinf->rc, -EFAULT); goto fill_failed; } @@ -213,15 +210,15 @@ int ci_tcp_sendmsg_fill_pkt(ci_netif* ni, ci_tcp_state* ts, #endif /* This assumes that packet filler only used a single buffer. - * offbuf use on the TCP send path needs to go long term + * offbuf use on the TCP send path needs to go long term */ ci_assert_ge(oo_offbuf_left(&pkt->buf), n); oo_offbuf_advance(&pkt->buf, n); /* We should have either filled the segment, or run out of data. */ LOG_TV(log("%s: iov.len=%d iovlen=%d n=%d pkt=%d left=%d", __FUNCTION__, - (int) CI_IOVEC_LEN(&piov->io), piov->iovlen, n, - OO_PKT_FMT(pkt), oo_offbuf_left(&pkt->buf))); + (int) CI_IOVEC_LEN(&piov->io), piov->iovlen, n, OO_PKT_FMT(pkt), + oo_offbuf_left(&pkt->buf))); #ifndef __KERNEL__ /* This can fail in the kernel due to bad user-level pointer, so can't assert this */ @@ -229,13 +226,13 @@ int ci_tcp_sendmsg_fill_pkt(ci_netif* ni, ci_tcp_state* ts, oo_offbuf_left(&pkt->buf) == 0 || pkt->n_buffers == CI_IP_PKT_SEGMENTS_MAX); #else -# ifndef NDEBUG - if(!(ci_iovec_ptr_is_empty_proper(piov) || - oo_offbuf_left(&pkt->buf) == 0 || - pkt->n_buffers == CI_IP_PKT_SEGMENTS_MAX)) +#ifndef NDEBUG + if( ! (ci_iovec_ptr_is_empty_proper(piov) || + oo_offbuf_left(&pkt->buf) == 0 || + pkt->n_buffers == CI_IP_PKT_SEGMENTS_MAX) ) LOG_U(ci_log("%s: couldn't copy data, probably bad user-level pointer", - __FUNCTION__)); -# endif + __FUNCTION__)); +#endif #endif /* We must remember the header length the packet was initialised with, and @@ -245,12 +242,11 @@ int ci_tcp_sendmsg_fill_pkt(ci_netif* ni, ci_tcp_state* ts, */ pkt->pf.tcp_tx.end_seq = n; - ci_assert_equal(TX_PKT_LEN(pkt), - oo_offbuf_ptr(&pkt->buf) - PKT_START(pkt)); + ci_assert_equal(TX_PKT_LEN(pkt), oo_offbuf_ptr(&pkt->buf) - PKT_START(pkt)); return n; #ifdef __KERNEL__ - fill_failed: +fill_failed: LOG_U(ci_log("%s: fill failed: %d\n", __FUNCTION__, sinf->rc)); ci_assert(0); return 0; @@ -259,8 +255,7 @@ int ci_tcp_sendmsg_fill_pkt(ci_netif* ni, ci_tcp_state* ts, static int ci_tcp_fill_stolen_buffer(ci_netif* ni, ci_ip_pkt_fmt* pkt, - ci_iovec_ptr* piov - CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) + ci_iovec_ptr* piov CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) { /* Fill a single packet, which must be initialised already (and may ** contain data), from an iovec. Used for the "stolen packet" case. @@ -271,22 +266,20 @@ static int ci_tcp_fill_stolen_buffer(ci_netif* ni, ci_ip_pkt_fmt* pkt, /* We should have either filled the segment, or run out of data. */ LOG_TV(log("%s: iov.len=%d iovlen=%d n=%d pkt=%d left=%d", __FUNCTION__, - (int) CI_IOVEC_LEN(&piov->io), piov->iovlen, n, - OO_PKT_FMT(pkt), oo_offbuf_left(&pkt->buf))); -#ifndef __KERNEL__ + (int) CI_IOVEC_LEN(&piov->io), piov->iovlen, n, OO_PKT_FMT(pkt), + oo_offbuf_left(&pkt->buf))); +#ifndef __KERNEL__ /* This can fail in the kernel due to bad user-level pointer, so can't assert this */ - ci_assert(ci_iovec_ptr_is_empty(piov) || - oo_offbuf_left(&pkt->buf) == 0 || + ci_assert(ci_iovec_ptr_is_empty(piov) || oo_offbuf_left(&pkt->buf) == 0 || pkt->n_buffers == CI_IP_PKT_SEGMENTS_MAX); #else -# ifndef NDEBUG - if(!(ci_iovec_ptr_is_empty(piov) || - oo_offbuf_left(&pkt->buf) == 0 || - pkt->n_buffers == CI_IP_PKT_SEGMENTS_MAX)) +#ifndef NDEBUG + if( ! (ci_iovec_ptr_is_empty(piov) || oo_offbuf_left(&pkt->buf) == 0 || + pkt->n_buffers == CI_IP_PKT_SEGMENTS_MAX) ) LOG_U(ci_log("%s: couldn't copy data, probably bad user-level pointer", - __FUNCTION__)); -# endif + __FUNCTION__)); +#endif #endif /* Fixup the packet meta-data. */ pkt->pf.tcp_tx.end_seq += n; @@ -307,11 +300,9 @@ static int tcp_payload_len(ci_ip_pkt_fmt* pkt) #endif -static -void ci_tcp_tx_fill_sendq_tail(ci_netif* ni, ci_tcp_state* ts, - ci_iovec_ptr* piov, - struct tcp_send_info* sinf - CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) +static void ci_tcp_tx_fill_sendq_tail(ci_netif* ni, ci_tcp_state* ts, + ci_iovec_ptr* piov, + struct tcp_send_info* sinf CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) { ci_ip_pkt_queue* sendq = &ts->send; ci_ip_pkt_fmt* pkt; @@ -321,10 +312,9 @@ void ci_tcp_tx_fill_sendq_tail(ci_netif* ni, ci_tcp_state* ts, ci_assert(ci_ip_queue_not_empty(sendq)); pkt = PKT_CHK(ni, sendq->tail); - if( ts->s.tx_errno == 0 && - (NI_OPTS(ni).tcp_combine_sends_mode == 0 || - pkt->flags & CI_PKT_FLAG_TX_MORE) ) { - if(CI_UNLIKELY( pkt->flags & CI_PKT_FLAG_INDIRECT )) { + if( ts->s.tx_errno == 0 && (NI_OPTS(ni).tcp_combine_sends_mode == 0 || + pkt->flags & CI_PKT_FLAG_TX_MORE) ) { + if( CI_UNLIKELY(pkt->flags & CI_PKT_FLAG_INDIRECT) ) { /* Making this work in kernelspace is not particularly difficult, but * so rarely used that it's not worth the effort. The only thing which * is needed is a version of ci_copy_iovec which calls copy_from_user. */ @@ -339,18 +329,19 @@ void ci_tcp_tx_fill_sendq_tail(ci_netif* ni, ci_tcp_state* ts, int mss = tcp_eff_mss(ts); int last_seg; int space; - if(CI_LIKELY( zch->segs < CI_IP_PKT_SEGMENTS_MAX - 1 && - (space = oo_tx_zc_left(pkt) - - CI_MEMBER_OFFSET(struct ci_pkt_zc_payload, local)) > 0 && - /* NB: annoyingly unavoidable division: */ - (last_seg = tcp_payload_len(pkt) % mss) != 0 )) { + if( CI_LIKELY(zch->segs < CI_IP_PKT_SEGMENTS_MAX - 1 && + (space = oo_tx_zc_left(pkt) - + CI_MEMBER_OFFSET( + struct ci_pkt_zc_payload, local)) > 0 && + /* NB: annoyingly unavoidable division: */ + (last_seg = tcp_payload_len(pkt) % mss) != 0) ) { /* Missed optimisation: the last zc_payload could have already been * a non-remote one so we could append there. Figuring that out, * though, would require walking the existing list of payloads. It's * better to be network-inefficient in an extremely rare case than to * be CPU-inefficient always. */ - struct ci_pkt_zc_payload* zcp = (struct ci_pkt_zc_payload*) - ((char*)zch + zch->end); + struct ci_pkt_zc_payload* zcp = + (struct ci_pkt_zc_payload*) ((char*) zch + zch->end); int n = ci_copy_iovec(zcp->local, CI_MIN(space, mss - last_seg), piov); zcp->len = n; zcp->prefix_space = 0; @@ -369,14 +360,13 @@ void ci_tcp_tx_fill_sendq_tail(ci_netif* ni, ci_tcp_state* ts, sinf->total_unsent -= n; } #endif - } - else if( oo_offbuf_left(&pkt->buf) > 0 ) { - n = ci_tcp_fill_stolen_buffer(ni, pkt, piov CI_KERNEL_ARG(addr_spc)); - LOG_TV(ci_log("%s: "NT_FMT "sq=%d if=%d bytes=%d piov.left=%d " - "pkt.left=%d", __FUNCTION__, NT_PRI_ARGS(ni, ts), - SEQ_SUB(tcp_enq_nxt(ts), tcp_snd_nxt(ts)), - ci_tcp_inflight(ts), n, ci_iovec_ptr_bytes_count(piov), - oo_offbuf_left(&pkt->buf))); + } else if( oo_offbuf_left(&pkt->buf) > 0 ) { + n = ci_tcp_fill_stolen_buffer(ni, pkt, piov CI_KERNEL_ARG(addr_spc)); + LOG_TV(ci_log("%s: " NT_FMT "sq=%d if=%d bytes=%d piov.left=%d " + "pkt.left=%d", + __FUNCTION__, NT_PRI_ARGS(ni, ts), + SEQ_SUB(tcp_enq_nxt(ts), tcp_snd_nxt(ts)), ci_tcp_inflight(ts), n, + ci_iovec_ptr_bytes_count(piov), oo_offbuf_left(&pkt->buf))); tcp_enq_nxt(ts) += n; sinf->total_sent += n; sinf->total_unsent -= n; @@ -393,8 +383,8 @@ void ci_tcp_tx_fill_sendq_tail(ci_netif* ni, ci_tcp_state* ts, } -ci_inline void ci_tcp_sendmsg_prep_pkt(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt* pkt, unsigned seq) +ci_inline void ci_tcp_sendmsg_prep_pkt( + ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* pkt, unsigned seq) { int orig_hdrlen, extra_opts; #ifndef NDEBUG @@ -410,7 +400,7 @@ ci_inline void ci_tcp_sendmsg_prep_pkt(ci_netif* ni, ci_tcp_state* ts, ** before we correct the sequence numbers (we stashed it away in [start_seq] ** when the buffer was filled). */ - orig_hdrlen = (int)pkt->pf.tcp_tx.start_seq; + orig_hdrlen = (int) pkt->pf.tcp_tx.start_seq; /* Sequence numbers in packet are 0...n, so we need to fix them up. ** (Note that, in the stolen packet case, the sequence numbers are OK and @@ -422,7 +412,7 @@ ci_inline void ci_tcp_sendmsg_prep_pkt(ci_netif* ni, ci_tcp_state* ts, pkt->pf.tcp_tx.block_end = OO_PP_NULL; LOG_TV(log(LPF "%s: %d: %x-%x", __FUNCTION__, OO_PKT_FMT(pkt), - pkt->pf.tcp_tx.start_seq, pkt->pf.tcp_tx.end_seq)); + pkt->pf.tcp_tx.start_seq, pkt->pf.tcp_tx.end_seq)); /* It's possible that we thought we didn't need space for TCP options when ** the buffer was initialised, but now it turns out that we do. (The dup @@ -435,18 +425,17 @@ ci_inline void ci_tcp_sendmsg_prep_pkt(ci_netif* ni, ci_tcp_state* ts, */ extra_opts = ts->outgoing_hdrs_len - orig_hdrlen; if( extra_opts ) - ci_tcp_tx_insert_option_space(ni, ts, pkt, - orig_hdrlen + oo_tx_ether_hdr_size(pkt), - extra_opts); + ci_tcp_tx_insert_option_space( + ni, ts, pkt, orig_hdrlen + oo_tx_ether_hdr_size(pkt), extra_opts); /* The sequence space consumed should match the bytes in the buffer. */ ci_assert_equal(oo_tx_l3_len(pkt), - CI_IPX_HDR_SIZE(af) + sizeof(ci_tcp_hdr) - + CI_TCP_HDR_OPT_LEN(TX_PKT_IPX_TCP(af, pkt)) - + SEQ_SUB(pkt->pf.tcp_tx.end_seq, pkt->pf.tcp_tx.start_seq)); + CI_IPX_HDR_SIZE(af) + sizeof(ci_tcp_hdr) + + CI_TCP_HDR_OPT_LEN(TX_PKT_IPX_TCP(af, pkt)) + + SEQ_SUB(pkt->pf.tcp_tx.end_seq, pkt->pf.tcp_tx.start_seq)); /* Correct offbuf end as might have been constructed with diff eff_mss */ - if(CI_LIKELY( ! (pkt->flags & CI_PKT_FLAG_INDIRECT) )) + if( CI_LIKELY(! (pkt->flags & CI_PKT_FLAG_INDIRECT)) ) ci_tcp_tx_pkt_set_end(ts, pkt); } @@ -456,7 +445,7 @@ ci_inline void ci_tcp_sendmsg_prep_pkt(ci_netif* ni, ci_tcp_state* ts, static int ci_tcp_tmpl_offset(void) { return CI_CFG_PKT_BUF_SIZE - sizeof(struct tcp_send_info) - - sizeof(struct oo_msg_template); + sizeof(struct oo_msg_template); } @@ -474,12 +463,10 @@ static void __ci_tcp_tmpl_handle_nic_reset(ci_netif* ni, ci_tcp_state* ts) if( tmpl->pio_addr >= 0 ) { if( ni->state->nic[tmpl->intf_i].oo_vi_flags & OO_VI_FLAGS_PIO_EN ) { CI_DEBUG_TRY(ef_pio_memcpy(ci_netif_vi(ni, tmpl->intf_i), - PKT_START(tmpl), - tmpl->pio_addr, tmpl->buf_len)); - } - else { + PKT_START(tmpl), tmpl->pio_addr, tmpl->buf_len)); + } else { ci_pio_buddy_free(ni, &ni->state->nic[tmpl->intf_i].pio_buddy, - tmpl->pio_addr, tmpl->pio_order); + tmpl->pio_addr, tmpl->pio_order); tmpl->pio_addr = -1; } } @@ -509,8 +496,8 @@ void ci_tcp_tmpl_handle_nic_reset(ci_netif* ni) /* Remove this template from the socket's template list. */ -static void ci_tcp_tmpl_remove(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt* tmpl) +static void ci_tcp_tmpl_remove( + ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* tmpl) { struct oo_msg_template* omt = ci_tcp_tmpl_pkt_to_omt(tmpl); oo_pkt_p* pp; @@ -519,7 +506,7 @@ static void ci_tcp_tmpl_remove(ci_netif* ni, ci_tcp_state* ts, pp = &(PKT_CHK(ni, *pp)->next); *pp = tmpl->next; --(ts->stats.tx_tmpl_active); - omt->oomt_sock_id = OO_SP_NULL; /* TODO: debug only? */ + omt->oomt_sock_id = OO_SP_NULL; /* TODO: debug only? */ } @@ -528,8 +515,8 @@ static void ci_tcp_tmpl_remove(ci_netif* ni, ci_tcp_state* ts, * * Must be called with the stack lock held. */ -static void ci_tcp_tmpl_free(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt* tmpl, int in_list) +static void ci_tcp_tmpl_free( + ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* tmpl, int in_list) { ci_assert(ni); ci_assert(ts); @@ -537,7 +524,7 @@ static void ci_tcp_tmpl_free(ci_netif* ni, ci_tcp_state* ts, if( tmpl->pio_addr >= 0 ) { ci_pio_buddy_free(ni, &ni->state->nic[tmpl->intf_i].pio_buddy, - tmpl->pio_addr, tmpl->pio_order); + tmpl->pio_addr, tmpl->pio_order); tmpl->pio_addr = -1; } if( in_list ) @@ -570,8 +557,8 @@ static ci_ip_pkt_fmt* ci_tcp_tmpl_omt_to_pkt(struct oo_msg_template* omt) } -static struct tcp_send_info* - ci_tcp_tmpl_omt_to_sinf(struct oo_msg_template* omt) +static struct tcp_send_info* ci_tcp_tmpl_omt_to_sinf( + struct oo_msg_template* omt) { return (void*) (omt + 1); } @@ -587,16 +574,16 @@ static struct tcp_send_info* * blocks. It returns the errno returned by ci_tcp_sendmsg(). */ static int __ci_tcp_tmpl_normal_send(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt* tmpl, - struct tcp_send_info* sinf, unsigned flags) + ci_ip_pkt_fmt* tmpl, struct tcp_send_info* sinf, unsigned flags) { -#define CI_NOT_NULL ((void *)-1) +#define CI_NOT_NULL ((void*) -1) struct iovec iov[1]; int rc; ci_assert(ci_netif_is_locked(ni)); - iov[0].iov_base = CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(ipcache_af(&ts->s.pkt), tmpl)); + iov[0].iov_base = + CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(ipcache_af(&ts->s.pkt), tmpl)); iov[0].iov_len = sinf->total_unsent; if( ts->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) @@ -609,12 +596,10 @@ static int __ci_tcp_tmpl_normal_send(ci_netif* ni, ci_tcp_state* ts, rc = ci_tcp_sendmsg(ni, ts, iov, 1, flags & ~ONLOAD_TEMPLATE_FLAGS_SEND_NOW); if( rc < 0 ) { rc = -errno; - } - else if( rc < sinf->total_unsent ) { + } else if( rc < sinf->total_unsent ) { /* We sent less than we wanted to. Connection probably closed. */ rc = -ts->s.tx_errno; - } - else { + } else { ci_assert_equal(rc, sinf->total_unsent); rc = 0; } @@ -628,8 +613,8 @@ static int __ci_tcp_tmpl_normal_send(ci_netif* ni, ci_tcp_state* ts, int ci_tcp_tmpl_alloc(ci_netif* ni, ci_tcp_state* ts, - struct oo_msg_template** omt_pp, - const struct iovec* initial_msg, int mlen, unsigned flags) + struct oo_msg_template** omt_pp, const struct iovec* initial_msg, int mlen, + unsigned flags) { int i, max_payload; int rc = 0; @@ -651,19 +636,19 @@ int ci_tcp_tmpl_alloc(ci_netif* ni, ci_tcp_state* ts, /* Templated sends currently require two data structures both of * which are stored on the packet buffer to avoid memory * allocations. They are placed at the end of the packet buffer. - */ + */ /* This is needed to ensure that an app written to a later version of the * API gets an error if they try to use a flag we don't understand. */ - if(CI_UNLIKELY( flags & ~ONLOAD_TEMPLATE_FLAGS_PIO_RETRY )) { + if( CI_UNLIKELY(flags & ~ONLOAD_TEMPLATE_FLAGS_PIO_RETRY) ) { LOG_E(ci_log("%s: called with unsupported flags=%x", __FUNCTION__, flags)); return -EINVAL; } ci_netif_lock(ni); - if(CI_UNLIKELY( (~ts->s.b.state & CI_TCP_STATE_SYNCHRONISED) )) { + if( CI_UNLIKELY((~ts->s.b.state & CI_TCP_STATE_SYNCHRONISED)) ) { /* Only handling connected connections. */ LOG_U(ci_log("ci_tcp_tmpl_alloc: not synchronized\n")); @@ -674,40 +659,40 @@ int ci_tcp_tmpl_alloc(ci_netif* ni, ci_tcp_state* ts, /* Check for valid cplane information. */ - if(CI_UNLIKELY( ! oo_cp_ipcache_is_valid(ni, ipcache) )) { + if( CI_UNLIKELY(! oo_cp_ipcache_is_valid(ni, ipcache)) ) { oo_tcp_ipcache_update(ni, ts); switch( ipcache->status ) { - case retrrc_success: - /* Successfully validated cplane info on the socket. We will copy - * it into the packet later in this function. - */ - break; - - case retrrc_nomac: - /* We could not validate cplane info on the socket. We will - * copy incorrect MAC info to the packet later in this function. - * But it doesn't matter as we will do additional testing in - * tmpl_update() to ensure that we only send with valid cplane - * info. - * - * TODO: Maybe we want to request an arp at this point - */ - break; + case retrrc_success: + /* Successfully validated cplane info on the socket. We will copy + * it into the packet later in this function. + */ + break; + + case retrrc_nomac: + /* We could not validate cplane info on the socket. We will + * copy incorrect MAC info to the packet later in this function. + * But it doesn't matter as we will do additional testing in + * tmpl_update() to ensure that we only send with valid cplane + * info. + * + * TODO: Maybe we want to request an arp at this point + */ + break; - case retrrc_localroute: - goto local_route; + case retrrc_localroute: + goto local_route; - default: - LOG_U(ci_log("%s: cplane status=%d", __FUNCTION__, ipcache->status)); - rc = -EHOSTUNREACH; - goto out; + default: + LOG_U(ci_log("%s: cplane status=%d", __FUNCTION__, ipcache->status)); + rc = -EHOSTUNREACH; + goto out; } } if( ipcache->flags & CI_IP_CACHE_IS_LOCALROUTE ) { - local_route: + local_route: LOG_U(ci_log("%s: templated sends not supported on loopback connections", - __FUNCTION__)); + __FUNCTION__)); rc = -EOPNOTSUPP; goto out; } @@ -740,7 +725,7 @@ int ci_tcp_tmpl_alloc(ci_netif* ni, ci_tcp_state* ts, int max_pio_pkt, max_buf_pkt; max_pio_pkt = nsn->pio_io_len - ETH_VLAN_HLEN; max_buf_pkt = - CI_CFG_PKT_BUF_SIZE - CI_MEMBER_OFFSET(ci_ip_pkt_fmt, dma_start); + CI_CFG_PKT_BUF_SIZE - CI_MEMBER_OFFSET(ci_ip_pkt_fmt, dma_start); max_payload = CI_MIN(max_buf_pkt, max_pio_pkt); max_payload -= ts->outgoing_hdrs_len + ETH_HLEN; max_payload -= sizeof(struct tcp_send_info); @@ -766,8 +751,9 @@ int ci_tcp_tmpl_alloc(ci_netif* ni, ci_tcp_state* ts, */ ci_assert_equal(pkt->pio_addr, -1); pkt->intf_i = intf_i; - pkt->pio_order = ci_log2_ge(ts->outgoing_hdrs_len + ETH_HLEN + ETH_VLAN_HLEN - + total_unsent, CI_CFG_MIN_PIO_BLOCK_ORDER); + pkt->pio_order = ci_log2_ge( + ts->outgoing_hdrs_len + ETH_HLEN + ETH_VLAN_HLEN + total_unsent, + CI_CFG_MIN_PIO_BLOCK_ORDER); pkt->pio_addr = ci_pio_buddy_alloc(ni, &nsn->pio_buddy, pkt->pio_order); if( pkt->pio_addr < 0 ) { pkt->pio_addr = -1; @@ -780,7 +766,7 @@ int ci_tcp_tmpl_alloc(ci_netif* ni, ci_tcp_state* ts, } #if CI_CFG_IPV6 if( af == AF_INET ) - pkt->flags &=~ CI_PKT_FLAG_IS_IP6; + pkt->flags &= ~CI_PKT_FLAG_IS_IP6; else pkt->flags |= CI_PKT_FLAG_IS_IP6; #endif @@ -813,16 +799,15 @@ int ci_tcp_tmpl_alloc(ci_netif* ni, ci_tcp_state* ts, /* XXX: Do I have to worry about MSG_CORK? */ /* TODO: look at this sinf stuff */ ci_iovec_ptr_init_nz(&piov, initial_msg, mlen); - sinf->fill_list_bytes += - ci_tcp_sendmsg_fill_pkt(ni, ts, sinf, &piov, ts->outgoing_hdrs_len, - tcp_eff_mss(ts)); + sinf->fill_list_bytes += ci_tcp_sendmsg_fill_pkt( + ni, ts, sinf, &piov, ts->outgoing_hdrs_len, tcp_eff_mss(ts)); ++sinf->n_filled; CI_USER_PTR_SET(sinf->pf.pkt->pf.tcp_tx.next, sinf->fill_list); sinf->fill_list = sinf->pf.pkt; ci_tcp_sendmsg_prep_pkt(ni, ts, pkt, tcp_enq_nxt(ts)); TX_PKT_IPX_TCP(af, sinf->fill_list)->tcp_flags = - CI_TCP_FLAG_PSH | CI_TCP_FLAG_ACK; + CI_TCP_FLAG_PSH | CI_TCP_FLAG_ACK; /* Initialise the protocol headers. We don't set those parts that will * always be rewritten when we do the actual send. */ @@ -838,24 +823,23 @@ int ci_tcp_tmpl_alloc(ci_netif* ni, ci_tcp_state* ts, ci_ip_set_mac_and_port(ni, ipcache, pkt); if( pkt->pio_addr >= 0 ) { - rc = ef_pio_memcpy(ci_netif_vi(ni, intf_i), PKT_START(pkt), - pkt->pio_addr, pkt->buf_len); + rc = ef_pio_memcpy( + ci_netif_vi(ni, intf_i), PKT_START(pkt), pkt->pio_addr, pkt->buf_len); ci_assert_equal(rc, 0); } ++ts->stats.tx_tmpl_active; - out: +out: ci_netif_unlock(ni); return rc; } -int -ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, - struct oo_msg_template* omt, - const struct onload_template_msg_update_iovec* updates, - int ulen, unsigned flags) +int ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, + struct oo_msg_template* omt, + const struct onload_template_msg_update_iovec* updates, int ulen, + unsigned flags) { /* XXX: In fast path, check if need to update ack. If send next is * what we expect it to be, we are in fast path. We should save @@ -876,8 +860,8 @@ ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, /* This is needed to ensure that an app written to a later version of the * API gets an error if they try to use a flag we don't understand. */ - if(CI_UNLIKELY( flags & ~(ONLOAD_TEMPLATE_FLAGS_SEND_NOW | - ONLOAD_TEMPLATE_FLAGS_DONTWAIT) )) { + if( CI_UNLIKELY(flags & ~(ONLOAD_TEMPLATE_FLAGS_SEND_NOW | + ONLOAD_TEMPLATE_FLAGS_DONTWAIT)) ) { LOG_E(ci_log("%s: called with unsupported flags=%x", __FUNCTION__, flags)); return -EINVAL; } @@ -886,40 +870,38 @@ ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, ipcache = &ts->s.pkt; pkt = ci_tcp_tmpl_omt_to_pkt(omt); - tcp = TX_PKT_IPX_TCP(af, pkt);; + tcp = TX_PKT_IPX_TCP(af, pkt); + ; vi = ci_netif_vi(ni, pkt->intf_i); tcp_opts = CI_TCP_HDR_OPTS(tcp); sinf = ci_tcp_tmpl_omt_to_sinf(omt); - if(CI_UNLIKELY( omt->oomt_sock_id != S_SP(ts) )) { + if( CI_UNLIKELY(omt->oomt_sock_id != S_SP(ts)) ) { rc = -EINVAL; ci_tcp_tmpl_free(ni, ts, pkt, 1); goto out; } - if(CI_UNLIKELY( ts->s.so_error )) { + if( CI_UNLIKELY(ts->s.so_error) ) { rc = -ci_get_so_error(&ts->s); if( rc < 0 ) { ci_tcp_tmpl_free(ni, ts, pkt, 1); goto out; } } - if(CI_UNLIKELY( ts->s.tx_errno )) { + if( CI_UNLIKELY(ts->s.tx_errno) ) { rc = -ts->s.tx_errno; ci_tcp_tmpl_free(ni, ts, pkt, 1); goto out; } - if(CI_UNLIKELY( pkt->pio_addr == -1 && - ! (flags & ONLOAD_TEMPLATE_FLAGS_SEND_NOW) )) { - pkt->pio_addr = - ci_pio_buddy_alloc(ni, &ni->state->nic[pkt->intf_i].pio_buddy, - pkt->pio_order); + if( CI_UNLIKELY(pkt->pio_addr == -1 && + ! (flags & ONLOAD_TEMPLATE_FLAGS_SEND_NOW)) ) { + pkt->pio_addr = ci_pio_buddy_alloc( + ni, &ni->state->nic[pkt->intf_i].pio_buddy, pkt->pio_order); if( pkt->pio_addr >= 0 ) { - rc = ef_pio_memcpy(vi, PKT_START(pkt), - pkt->pio_addr, pkt->buf_len); + rc = ef_pio_memcpy(vi, PKT_START(pkt), pkt->pio_addr, pkt->buf_len); ci_assert(rc == 0); - } - else { + } else { pkt->pio_addr = -1; } } @@ -928,8 +910,7 @@ ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, */ for( i = 0; i < ulen; ++i ) { /* TODO: Think about what checks we want at runtime. */ - if( updates[i].otmu_len == 0 || - updates[i].otmu_offset < 0 || + if( updates[i].otmu_len == 0 || updates[i].otmu_offset < 0 || #ifndef NDEBUG updates[i].otmu_base == NULL || #endif @@ -938,18 +919,21 @@ ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, goto out; } ci_assert((CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(af, pkt)) - PKT_START(pkt)) + - updates[i].otmu_offset >= 0); + updates[i].otmu_offset >= + 0); - if(CI_UNLIKELY( pkt->pio_addr != -1 )) { + if( CI_UNLIKELY(pkt->pio_addr != -1) ) { rc = ef_pio_memcpy(vi, updates[i].otmu_base, - pkt->pio_addr + (ci_uint32) - (CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(af, pkt)) - PKT_START(pkt)) + - updates[i].otmu_offset, - updates[i].otmu_len); + pkt->pio_addr + + (ci_uint32) (CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(af, pkt)) - + PKT_START(pkt)) + + updates[i].otmu_offset, + updates[i].otmu_len); ci_assert_equal(rc, 0); } - memcpy((char*)CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(af, pkt)) + updates[i].otmu_offset, - updates[i].otmu_base, updates[i].otmu_len); + memcpy((char*) CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(af, pkt)) + + updates[i].otmu_offset, + updates[i].otmu_base, updates[i].otmu_len); } if( ! (flags & ONLOAD_TEMPLATE_FLAGS_SEND_NOW) ) { @@ -965,21 +949,19 @@ ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, cplane_is_valid = oo_cp_ipcache_is_valid(ni, ipcache); if( cplane_is_valid && ! memcmp(oo_tx_ether_hdr(pkt), ci_ip_cache_ether_hdr(ipcache), - oo_tx_ether_hdr_size(pkt)) && + oo_tx_ether_hdr_size(pkt)) && pkt->pio_addr != -1 ) { /* Socket has valid cplane info, the same info is on the pkt, and * it has a pio region allocated so we can send using pio. */ - } - else if( pkt->pio_addr == -1 ) { + } else if( pkt->pio_addr == -1 ) { /* We didn't get a PIO region. This can happen due to various * reasons including a NIC reset while the template was allocated * or we never had one to start with so use normal send. * __ci_tcp_tmpl_normal_send() releases the lock. */ return __ci_tcp_tmpl_normal_send(ni, ts, pkt, sinf, flags); - } - else if( cplane_is_valid ) { + } else if( cplane_is_valid ) { /* The pkt doesn't have the right cplane info but the socket does. * So update the pkt with the latest information. This can cause * the pkt size to change if the route changed from one with vlan @@ -989,16 +971,15 @@ ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, */ ci_assert_ge(pkt->pio_addr, 0); ci_ip_set_mac_and_port(ni, ipcache, pkt); - if( oo_tx_ether_hdr_size(pkt) == - (char*)&ipcache->ipx.ip4 - (char*)ci_ip_cache_ether_hdr(ipcache) ) + if( oo_tx_ether_hdr_size(pkt) == + (char*) &ipcache->ipx.ip4 - (char*) ci_ip_cache_ether_hdr(ipcache) ) /* TODO: we need to copy just the ethernet header here. */ rc = ef_pio_memcpy(vi, PKT_START(pkt), pkt->pio_addr, - (char*)PKT_IPX_TCP_HDR(af, pkt) - PKT_START(pkt)); + (char*) PKT_IPX_TCP_HDR(af, pkt) - PKT_START(pkt)); else rc = ef_pio_memcpy(vi, PKT_START(pkt), pkt->pio_addr, pkt->buf_len); ci_assert_equal(rc, 0); - } - else { + } else { /* We could not get mac info, do a normal send. * __ci_tcp_tmpl_normal_send() releases the lock. */ return __ci_tcp_tmpl_normal_send(ni, ts, pkt, sinf, flags); @@ -1040,9 +1021,9 @@ ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, /* XXX: Currently, updating the entire TCP header. Should only * update the affected portion and only if necessary */ rc = ef_pio_memcpy(vi, TX_PKT_IPX_TCP(af, pkt), - pkt->pio_addr + (char*) TX_PKT_IPX_TCP(af, pkt) - - PKT_START(pkt), CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(af, pkt)) - - (char*)TX_PKT_IPX_TCP(af, pkt)); + pkt->pio_addr + (char*) TX_PKT_IPX_TCP(af, pkt) - PKT_START(pkt), + CI_TCP_PAYLOAD(PKT_IPX_TCP_HDR(af, pkt)) - + (char*) TX_PKT_IPX_TCP(af, pkt)); ci_assert_equal(rc, 0); /* This cannot fail as we already checked that there is space in @@ -1059,8 +1040,7 @@ ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, --ni->state->n_async_pkts; ++ts->stats.tx_tmpl_send_fast; CITP_STATS_NETIF_INC(ni, pio_pkts); - } - else { + } else { /* Unable to send via pio due to tcp state machinery or full TXQ. * So do a normal send. __ci_tcp_tmpl_normal_send() releases the * lock. @@ -1068,14 +1048,14 @@ ci_tcp_tmpl_update(ci_netif* ni, ci_tcp_state* ts, return __ci_tcp_tmpl_normal_send(ni, ts, pkt, sinf, flags); } - out: +out: ci_netif_unlock(ni); return rc; } -int ci_tcp_tmpl_abort(ci_netif* ni, ci_tcp_state* ts, - struct oo_msg_template* omt) +int ci_tcp_tmpl_abort( + ci_netif* ni, ci_tcp_state* ts, struct oo_msg_template* omt) { ci_ip_pkt_fmt* tmpl = ci_tcp_tmpl_omt_to_pkt(omt); int rc = 0; @@ -1085,7 +1065,7 @@ int ci_tcp_tmpl_abort(ci_netif* ni, ci_tcp_state* ts, goto out; } ci_tcp_tmpl_free(ni, ts, tmpl, 1); - out: +out: ci_netif_unlock(ni); return rc; } @@ -1095,9 +1075,7 @@ int ci_tcp_tmpl_abort(ci_netif* ni, ci_tcp_state* ts, static int ci_tcp_sendmsg_enqueue(ci_netif* ni, ci_tcp_state* ts, - ci_ip_pkt_fmt* reverse_list, - int total_bytes, - ci_ip_pkt_queue* sendq) + ci_ip_pkt_fmt* reverse_list, int total_bytes, ci_ip_pkt_queue* sendq) { unsigned seq = tcp_enq_nxt(ts) + total_bytes; oo_pkt_p tail_pkt_id = OO_PKT_P(reverse_list); @@ -1110,7 +1088,7 @@ static int ci_tcp_sendmsg_enqueue(ci_netif* ni, ci_tcp_state* ts, do { pkt = reverse_list; - reverse_list = (ci_ip_pkt_fmt *)CI_USER_PTR_GET(pkt->pf.tcp_tx.next); + reverse_list = (ci_ip_pkt_fmt*) CI_USER_PTR_GET(pkt->pf.tcp_tx.next); seq -= pkt->pf.tcp_tx.end_seq; ci_tcp_sendmsg_prep_pkt(ni, ts, pkt, seq); @@ -1118,8 +1096,7 @@ static int ci_tcp_sendmsg_enqueue(ci_netif* ni, ci_tcp_state* ts, pkt->next = send_list; send_list = OO_PKT_P(pkt); ++n_pkts; - } - while( reverse_list ); + } while( reverse_list ); ci_assert_equal(tcp_enq_nxt(ts), seq); tcp_enq_nxt(ts) += total_bytes; @@ -1133,16 +1110,15 @@ static int ci_tcp_sendmsg_enqueue(ci_netif* ni, ci_tcp_state* ts, PKT_CHK(ni, sendq->tail)->next = send_list; sendq->tail = tail_pkt_id; - LOG_TV(ci_log("%s: "NT_FMT "sendq.num=%d enq_nxt=%x", - __FUNCTION__, NT_PRI_ARGS(ni, ts), - sendq->num, tcp_enq_nxt(ts))); + LOG_TV(ci_log("%s: " NT_FMT "sendq.num=%d enq_nxt=%x", __FUNCTION__, + NT_PRI_ARGS(ni, ts), sendq->num, tcp_enq_nxt(ts))); CHECK_TS(ni, ts); return n_pkts; } -static int/*bool*/ +static int /*bool*/ ci_tcp_tx_prequeue(ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* fill_list) { ci_ip_pkt_fmt* next; @@ -1153,7 +1129,8 @@ ci_tcp_tx_prequeue(ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* fill_list) pkt = fill_list; while( 1 ) { ++n_pkts; - if( ! (next = CI_USER_PTR_GET(pkt->pf.tcp_tx.next)) ) break; + if( ! (next = CI_USER_PTR_GET(pkt->pf.tcp_tx.next)) ) + break; pkt->next = OO_PKT_P(next); pkt = next; } @@ -1164,9 +1141,8 @@ ci_tcp_tx_prequeue(ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* fill_list) if( next == OO_PP_ID_INVALID ) return 0; OO_PP_INIT(ni, pkt->next, next); - } - while( ci_cas32_fail(&ts->send_prequeue, - OO_PP_ID(pkt->next), OO_PKT_ID(fill_list)) ); + } while( ci_cas32_fail( + &ts->send_prequeue, OO_PP_ID(pkt->next), OO_PKT_ID(fill_list)) ); oo_atomic_add(&ts->send_prequeue_in, n_pkts); ++ts->stats.tx_defer; @@ -1175,8 +1151,8 @@ ci_tcp_tx_prequeue(ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* fill_list) } -void ci_tcp_sendmsg_enqueue_prequeue(ci_netif* ni, ci_tcp_state* ts, - int/*bool*/ shutdown) +void ci_tcp_sendmsg_enqueue_prequeue( + ci_netif* ni, ci_tcp_state* ts, int /*bool*/ shutdown) { ci_ip_pkt_queue* sendq = &ts->send; ci_ip_pkt_fmt* pkt; @@ -1192,10 +1168,10 @@ void ci_tcp_sendmsg_enqueue_prequeue(ci_netif* ni, ci_tcp_state* ts, /* Grab the contents of the prequeue atomically. */ do { OO_PP_INIT(ni, id, ts->send_prequeue); - if( OO_PP_IS_NULL(id) && ! shutdown) + if( OO_PP_IS_NULL(id) && ! shutdown ) return; } while( ci_cas32_fail(&ts->send_prequeue, OO_PP_ID(id), - shutdown ? OO_PP_ID_INVALID : OO_PP_ID_NULL) ); + shutdown ? OO_PP_ID_INVALID : OO_PP_ID_NULL) ); /* Exit if nothing to send */ if( OO_PP_IS_NULL(id) ) { @@ -1211,18 +1187,19 @@ void ci_tcp_sendmsg_enqueue_prequeue(ci_netif* ni, ci_tcp_state* ts, pkt->next = send_list; send_list = OO_PKT_P(pkt); ++n_pkts; - } - while( OO_PP_NOT_NULL(id) ); + } while( OO_PP_NOT_NULL(id) ); /* Prep each packet. */ while( 1 ) { bytes = pkt->pf.tcp_tx.end_seq; ci_tcp_sendmsg_prep_pkt(ni, ts, pkt, tcp_enq_nxt(ts)); if( pkt->flags & CI_PKT_FLAG_TX_PSH ) - TX_PKT_IPX_TCP(ipcache_af(&ts->s.pkt), pkt)->tcp_flags |= CI_TCP_FLAG_PSH; + TX_PKT_IPX_TCP(ipcache_af(&ts->s.pkt), pkt)->tcp_flags |= + CI_TCP_FLAG_PSH; tcp_enq_nxt(ts) += bytes; - if( OO_PP_IS_NULL(pkt->next) ) break; + if( OO_PP_IS_NULL(pkt->next) ) + break; pkt = PKT_CHK(ni, pkt->next); } @@ -1236,8 +1213,7 @@ void ci_tcp_sendmsg_enqueue_prequeue(ci_netif* ni, ci_tcp_state* ts, if( OO_PP_IS_NULL(sendq->head) ) { sendq->head = send_list; pkt = PKT_CHK(ni, send_list); - } - else { + } else { pkt = PKT_CHK(ni, sendq->tail); pkt->next = send_list; } @@ -1246,8 +1222,7 @@ void ci_tcp_sendmsg_enqueue_prequeue(ci_netif* ni, ci_tcp_state* ts, static int ci_tcp_sendmsg_free_pkt_list(ci_netif* ni, ci_tcp_state* ts, - oo_pkt_p pkt_list, int netif_locked, - int check_aop) + oo_pkt_p pkt_list, int netif_locked, int check_aop) { /* NB. Packets must be "asynchronous". That is, accounted for in * [n_async_pkts]. @@ -1256,7 +1231,7 @@ static int ci_tcp_sendmsg_free_pkt_list(ci_netif* ni, ci_tcp_state* ts, int n_pkts = 0; ci_assert(OO_PP_NOT_NULL(pkt_list)); - ci_assert( ! netif_locked || ci_netif_is_locked(ni)); + ci_assert(! netif_locked || ci_netif_is_locked(ni)); if( ! netif_locked && ! ci_netif_trylock(ni) ) { do { @@ -1264,14 +1239,13 @@ static int ci_tcp_sendmsg_free_pkt_list(ci_netif* ni, ci_tcp_state* ts, pkt_list = pkt->next; /* ?? TODO: cope with these cases */ ci_assert_equal(pkt->refcount, 1); - ci_assert(!(pkt->flags & CI_PKT_FLAG_RX)); + ci_assert(! (pkt->flags & CI_PKT_FLAG_RX)); pkt->refcount = 0; __ci_netif_pkt_clean(pkt); ci_netif_pkt_free_nonb_list(ni, OO_PKT_P(pkt), pkt); ++n_pkts; } while( OO_PP_NOT_NULL(pkt_list) ); - } - else { + } else { do { pkt = PKT_CHK(ni, pkt_list); pkt_list = pkt->next; @@ -1279,7 +1253,8 @@ static int ci_tcp_sendmsg_free_pkt_list(ci_netif* ni, ci_tcp_state* ts, ++n_pkts; } while( OO_PP_NOT_NULL(pkt_list) ); ni->state->n_async_pkts -= n_pkts; - if( ! netif_locked ) ci_netif_unlock(ni); + if( ! netif_locked ) + ci_netif_unlock(ni); } return n_pkts; @@ -1301,19 +1276,20 @@ static void ci_netif_pkt_convert_ptr_list(ci_netif* ni, ci_ip_pkt_fmt* list) } -static void -ci_tcp_tx_free_prequeue(ci_netif* ni, ci_tcp_state* ts, int netif_locked) +static void ci_tcp_tx_free_prequeue( + ci_netif* ni, ci_tcp_state* ts, int netif_locked) { int n_pkts; oo_pkt_p id; - ci_assert( ! netif_locked || ci_netif_is_locked(ni)); + ci_assert(! netif_locked || ci_netif_is_locked(ni)); /* Grab contents of prequeue atomically. We might not be the only thread ** trying to free it! */ do { OO_PP_INIT(ni, id, ts->send_prequeue); - if( OO_PP_IS_NULL(id) ) return; + if( OO_PP_IS_NULL(id) ) + return; } while( ci_cas32_fail(&ts->send_prequeue, OO_PP_ID(id), OO_PP_ID_NULL) ); n_pkts = ci_tcp_sendmsg_free_pkt_list(ni, ts, id, netif_locked, 1); @@ -1344,22 +1320,22 @@ void ci_tcp_sendmsg_enqueue_prequeue_deferred(ci_netif* ni, ci_tcp_state* ts) } -ci_inline void ci_tcp_sendmsg_free_unused_pkts(ci_netif* ni, - struct tcp_send_info* sinf) +ci_inline void ci_tcp_sendmsg_free_unused_pkts( + ci_netif* ni, struct tcp_send_info* sinf) { oo_pkt_filler_free_unused_pkts(ni, &sinf->stack_locked, &sinf->pf); } -static int ci_tcp_sendmsg_notsynchronised(ci_netif* ni, ci_tcp_state* ts, - int flags, struct tcp_send_info* sinf) +static int ci_tcp_sendmsg_notsynchronised( + ci_netif* ni, ci_tcp_state* ts, int flags, struct tcp_send_info* sinf) { sinf->rc = 1; /* The same sanity check is done in intercept. This one here is to make ** sure (whether needed or not) that internal calls are checked. */ if( ts->s.b.state == CI_TCP_CLOSED ) - sinf->rc = 0; /* use tx_errno */ + sinf->rc = 0; /* use tx_errno */ /* State must be SYN-SENT, but can change under our feet as we don't have ** the netif lock. If non-blocking, return EAGAIN. */ @@ -1369,16 +1345,16 @@ static int ci_tcp_sendmsg_notsynchronised(ci_netif* ni, ci_tcp_state* ts, if( sinf->rc <= 0 ) return -1; -#define CONNECT_IN_PROGRESS ((ts->s.b.state == CI_TCP_SYN_SENT) && \ - ts->s.tx_errno == 0) +#define CONNECT_IN_PROGRESS \ + ((ts->s.b.state == CI_TCP_SYN_SENT) && ts->s.tx_errno == 0) - if( !sinf->stack_locked ) { + if( ! sinf->stack_locked ) { if( (sinf->rc = ci_netif_lock(ni)) ) return -1; sinf->stack_locked = 1; } - CI_TCP_SLEEP_WHILE(ni, ts, CI_SB_FLAG_WAKE_RX, ts->s.so.rcvtimeo_msec, - CONNECT_IN_PROGRESS, &sinf->rc); + CI_TCP_SLEEP_WHILE(ni, ts, CI_SB_FLAG_WAKE_RX, ts->s.so.rcvtimeo_msec, + CONNECT_IN_PROGRESS, &sinf->rc); if( sinf->rc != 0 || ts->s.tx_errno != 0 ) return -1; @@ -1386,10 +1362,8 @@ static int ci_tcp_sendmsg_notsynchronised(ci_netif* ni, ci_tcp_state* ts, } -static void ci_tcp_sendmsg_handle_rc_or_tx_errno(ci_netif* ni, - ci_tcp_state* ts, - int flags, - struct tcp_send_info* sinf) +static void ci_tcp_sendmsg_handle_rc_or_tx_errno( + ci_netif* ni, ci_tcp_state* ts, int flags, struct tcp_send_info* sinf) { sinf->set_errno = 0; @@ -1401,8 +1375,7 @@ static void ci_tcp_sendmsg_handle_rc_or_tx_errno(ci_netif* ni, if( sinf->total_sent ) { sinf->rc = sinf->total_sent; sinf->set_errno = 0; - } - else { + } else { if( ts->s.so_error ) { ci_int32 rc1 = ci_get_so_error(&ts->s); if( rc1 != 0 ) { @@ -1413,7 +1386,7 @@ static void ci_tcp_sendmsg_handle_rc_or_tx_errno(ci_netif* ni, if( sinf->rc == 0 && ts->s.tx_errno ) { LOG_TC(log(LNT_FMT "tx_errno=%d flags=%x total_sent=%d", - LNT_PRI_ARGS(ni, ts), ts->s.tx_errno, flags, sinf->total_sent)); + LNT_PRI_ARGS(ni, ts), ts->s.tx_errno, flags, sinf->total_sent)); sinf->rc = ts->s.tx_errno; sinf->set_errno = 1; } @@ -1426,31 +1399,27 @@ static void ci_tcp_sendmsg_handle_rc_or_tx_errno(ci_netif* ni, } -static void ci_tcp_sendmsg_handle_zero_or_tx_errno(ci_netif* ni, - ci_tcp_state* ts, - int flags, - struct tcp_send_info* sinf) +static void ci_tcp_sendmsg_handle_zero_or_tx_errno( + ci_netif* ni, ci_tcp_state* ts, int flags, struct tcp_send_info* sinf) { sinf->rc = 0; return ci_tcp_sendmsg_handle_rc_or_tx_errno(ni, ts, flags, sinf); } -static void ci_tcp_sendmsg_free_fill_list(ci_netif* ni, ci_tcp_state* ts, - int flags, - struct tcp_send_info* sinf) +static void ci_tcp_sendmsg_free_fill_list( + ci_netif* ni, ci_tcp_state* ts, int flags, struct tcp_send_info* sinf) { if( sinf->fill_list ) { ci_netif_pkt_convert_ptr_list(ni, sinf->fill_list); - ci_tcp_sendmsg_free_pkt_list(ni, ts, OO_PKT_P(sinf->fill_list), - sinf->stack_locked, 0); + ci_tcp_sendmsg_free_pkt_list( + ni, ts, OO_PKT_P(sinf->fill_list), sinf->stack_locked, 0); } } -static void ci_tcp_sendmsg_handle_tx_errno(ci_netif* ni, ci_tcp_state* ts, - int flags, - struct tcp_send_info* sinf) +static void ci_tcp_sendmsg_handle_tx_errno( + ci_netif* ni, ci_tcp_state* ts, int flags, struct tcp_send_info* sinf) { ci_tcp_sendmsg_free_fill_list(ni, ts, flags, sinf); ci_tcp_sendmsg_free_unused_pkts(ni, sinf); @@ -1459,9 +1428,8 @@ static void ci_tcp_sendmsg_handle_tx_errno(ci_netif* ni, ci_tcp_state* ts, } -static void ci_tcp_sendmsg_handle_sent_or_rc(ci_netif* ni, ci_tcp_state* ts, - int flags, - struct tcp_send_info* sinf) +static void ci_tcp_sendmsg_handle_sent_or_rc( + ci_netif* ni, ci_tcp_state* ts, int flags, struct tcp_send_info* sinf) { ci_tcp_sendmsg_free_fill_list(ni, ts, flags, sinf); ci_tcp_sendmsg_free_unused_pkts(ni, sinf); @@ -1472,21 +1440,20 @@ static void ci_tcp_sendmsg_handle_sent_or_rc(ci_netif* ni, ci_tcp_state* ts, if( sinf->total_sent ) { sinf->rc = sinf->total_sent; sinf->set_errno = 0; - } - else { + } else { sinf->rc = -sinf->rc; sinf->set_errno = 1; } } -static int ci_tcp_sendmsg_no_pkt_buf(ci_netif* ni, ci_tcp_state* ts, - int flags, struct tcp_send_info* sinf) +static int ci_tcp_sendmsg_no_pkt_buf( + ci_netif* ni, ci_tcp_state* ts, int flags, struct tcp_send_info* sinf) { ci_ip_pkt_fmt* pkt; do { pkt = ci_netif_pkt_alloc_nonb(ni); - if( pkt ) + if( pkt ) oo_pkt_filler_add_pkt(&sinf->pf, pkt); else break; @@ -1496,7 +1463,7 @@ static int ci_tcp_sendmsg_no_pkt_buf(ci_netif* ni, ci_tcp_state* ts, return 0; else { CITP_STATS_NETIF_INC(ni, tcp_send_nonb_pool_empty); - if( !si_trylock(ni, sinf) ) { + if( ! si_trylock(ni, sinf) ) { if( sinf->n_filled ) return 1; if( (sinf->rc = ci_netif_lock(ni)) != 0 ) { @@ -1514,7 +1481,7 @@ static int ci_tcp_sendmsg_no_pkt_buf(ci_netif* ni, ci_tcp_state* ts, * it might be provoked to allocate more memory when none is needed. */ ci_netif_poll(ni); - + while( 1 ) { ci_assert(ci_netif_is_locked(ni)); do { @@ -1526,12 +1493,10 @@ static int ci_tcp_sendmsg_no_pkt_buf(ci_netif* ni, ci_tcp_state* ts, pkt->flags = CI_PKT_FLAG_NONB_POOL; ++ni->state->n_async_pkts; oo_pkt_filler_add_pkt(&sinf->pf, pkt); - } - else if( sinf->n_filled ) { + } else if( sinf->n_filled ) { /* If we've filled any packets, push them out before blocking. */ return 1; - } - else + } else break; } while( --sinf->n_needed > 0 ); @@ -1541,7 +1506,7 @@ static int ci_tcp_sendmsg_no_pkt_buf(ci_netif* ni, ci_tcp_state* ts, ci_assert(sinf->fill_list == 0); /* Do not block on pkt allocation if this is non-blocking send */ - if( (flags & MSG_DONTWAIT) && + if( (flags & MSG_DONTWAIT) && (NI_OPTS(ni).tcp_nonblock_no_pkts_mode == 1) ) { /* errno based on reading of __ip_append_data() and * udp_sendmsg() when skb allocation fails in kernel 3.16. @@ -1551,8 +1516,8 @@ static int ci_tcp_sendmsg_no_pkt_buf(ci_netif* ni, ci_tcp_state* ts, return -1; } - sinf->rc = ci_netif_pkt_wait(ni, &ts->s, sinf->stack_locked ? - CI_SLEEP_NETIF_LOCKED : 0); + sinf->rc = ci_netif_pkt_wait( + ni, &ts->s, sinf->stack_locked ? CI_SLEEP_NETIF_LOCKED : 0); sinf->stack_locked = 0; if( ci_netif_pkt_wait_was_interrupted(sinf->rc) ) { ci_tcp_sendmsg_handle_sent_or_rc(ni, ts, flags, sinf); @@ -1560,7 +1525,7 @@ static int ci_tcp_sendmsg_no_pkt_buf(ci_netif* ni, ci_tcp_state* ts, } do { pkt = ci_netif_pkt_alloc_nonb(ni); - if( pkt ) + if( pkt ) oo_pkt_filler_add_pkt(&sinf->pf, pkt); else break; @@ -1576,7 +1541,7 @@ static int ci_tcp_sendmsg_no_pkt_buf(ci_netif* ni, ci_tcp_state* ts, /* Start of loop expects lock to be held */ ci_assert(sinf->stack_locked == 0); - if( !si_trylock(ni, sinf) ) { + if( ! si_trylock(ni, sinf) ) { if( (sinf->rc = ci_netif_lock(ni)) != 0 ) { ci_tcp_sendmsg_handle_sent_or_rc(ni, ts, flags, sinf); return -1; @@ -1592,8 +1557,8 @@ static int ci_tcp_sendmsg_no_pkt_buf(ci_netif* ni, ci_tcp_state* ts, } -ci_inline int ci_tcp_sendmsg_spin(ci_netif* ni, ci_tcp_state* ts, - int flags, struct tcp_send_info* sinf) +ci_inline int ci_tcp_sendmsg_spin( + ci_netif* ni, ci_tcp_state* ts, int flags, struct tcp_send_info* sinf) { ci_uint64 now_frc; ci_uint64 schedule_frc; @@ -1607,8 +1572,8 @@ ci_inline int ci_tcp_sendmsg_spin(ci_netif* ni, ci_tcp_state* ts, schedule_frc = now_frc; if( ts->s.so.sndtimeo_msec ) { - ci_uint64 max_so_spin = (ci_uint64)ts->s.so.sndtimeo_msec * - IPTIMER_STATE(ni)->khz; + ci_uint64 max_so_spin = + (ci_uint64) ts->s.so.sndtimeo_msec * IPTIMER_STATE(ni)->khz; if( max_so_spin <= max_spin ) { max_spin = max_so_spin; spin_limit_by_so = 1; @@ -1629,8 +1594,7 @@ ci_inline int ci_tcp_sendmsg_spin(ci_netif* ni, ci_tcp_state* ts, ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, sinf); return -1; } - } - else if( ! ni->state->is_spinner ) + } else if( ! ni->state->is_spinner ) ni->state->is_spinner = 1; } if( sinf->stack_locked ) { @@ -1638,9 +1602,8 @@ ci_inline int ci_tcp_sendmsg_spin(ci_netif* ni, ci_tcp_state* ts, sinf->stack_locked = 0; } ci_frc64(&now_frc); - sinf->rc = OO_SPINLOOP_PAUSE_CHECK_SIGNALS(ni, now_frc, &schedule_frc, - ts->s.so.sndtimeo_msec, - NULL, si); + sinf->rc = OO_SPINLOOP_PAUSE_CHECK_SIGNALS( + ni, now_frc, &schedule_frc, ts->s.so.sndtimeo_msec, NULL, si); if( sinf->rc != 0 ) { ni->state->is_spinner = 0; ci_tcp_sendmsg_handle_sent_or_rc(ni, ts, flags, sinf); @@ -1669,22 +1632,21 @@ ci_inline int ci_tcp_sendmsg_spin(ci_netif* ni, ci_tcp_state* ts, } return 1; } - -static int ci_tcp_sendmsg_block(ci_netif* ni, ci_tcp_state* ts, - int flags, struct tcp_send_info* sinf) +static int ci_tcp_sendmsg_block( + ci_netif* ni, ci_tcp_state* ts, int flags, struct tcp_send_info* sinf) { ci_uint64 sleep_seq; - CI_IP_SOCK_STATS_INC_TXSTUCK( ts ); + CI_IP_SOCK_STATS_INC_TXSTUCK(ts); do { if( ts->s.tx_errno ) { ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, sinf); return -1; } - + /* Record the current [sleep_seq] and check again to ensure we do a * race-free block. */ @@ -1693,41 +1655,39 @@ static int ci_tcp_sendmsg_block(ci_netif* ni, ci_tcp_state* ts, sinf->sendq_credit = ci_tcp_tx_send_space(ni, ts); if( sinf->sendq_credit > 0 ) return 0; - - CI_IP_SOCK_STATS_INC_TXSLEEP( ts ); - - sinf->rc = - ci_sock_sleep(ni, &ts->s.b, CI_SB_FLAG_WAKE_TX, - sinf->stack_locked ? CI_SLEEP_NETIF_LOCKED : 0, - sleep_seq, &sinf->timeout); + + CI_IP_SOCK_STATS_INC_TXSLEEP(ts); + + sinf->rc = ci_sock_sleep(ni, &ts->s.b, CI_SB_FLAG_WAKE_TX, + sinf->stack_locked ? CI_SLEEP_NETIF_LOCKED : 0, sleep_seq, + &sinf->timeout); /* ci_sock_sleep drops lock */ sinf->stack_locked = 0; - + if( sinf->rc < 0 ) { ci_tcp_sendmsg_handle_sent_or_rc(ni, ts, flags, sinf); return -1; } - } while(1); + } while( 1 ); } -static int ci_tcp_sendmsg_slowpath(ci_netif* ni, ci_tcp_state* ts, - const ci_iovec* iov, unsigned long iovlen, - int flags, struct tcp_send_info* sinf - CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) +static int ci_tcp_sendmsg_slowpath(ci_netif* ni, ci_tcp_state* ts, + const ci_iovec* iov, unsigned long iovlen, int flags, + struct tcp_send_info* sinf CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) { /* Set NO_TX_ADVANCE flag out here in order to ensure that * ci_tcp_sendmsg can't really push any packets out; all it can do * is enqueue packets. Then we set [snd_up] to the correct value - * before unsetting the flag. + * before unsetting the flag. * * The whole point is that ci_tcp_sendmsg() can proceed without giving a * damn about urgent data. */ int rc; unsigned enq_nxt_before; - - if( !sinf->total_unsent ) { + + if( ! sinf->total_unsent ) { sinf->rc = 0; return -1; } @@ -1739,7 +1699,7 @@ static int ci_tcp_sendmsg_slowpath(ci_netif* ni, ci_tcp_state* ts, sinf->rc = rc; return -1; } - + /* Poll first, so we have an accurate view of space in the send queue. */ if( ci_netif_may_poll(ni) && ci_netif_need_poll(ni) ) ci_netif_poll(ni); @@ -1751,14 +1711,14 @@ static int ci_tcp_sendmsg_slowpath(ci_netif* ni, ci_tcp_state* ts, */ tcp_snd_up(ts) = tcp_enq_nxt(ts) + sinf->total_unsent; enq_nxt_before = tcp_enq_nxt(ts); - + ts->tcpflags |= CI_TCPT_FLAG_NO_TX_ADVANCE; ci_netif_unlock(ni); - sinf->rc = ci_tcp_sendmsg(ni, ts, iov, iovlen, (flags &~ MSG_OOB) - CI_KERNEL_ARG(addr_spc)); - + sinf->rc = ci_tcp_sendmsg( + ni, ts, iov, iovlen, (flags & ~MSG_OOB) CI_KERNEL_ARG(addr_spc)); + rc = ci_netif_lock(ni); if( rc != 0 ) { /* If this happens (should only be from the kernel, which can't @@ -1787,8 +1747,7 @@ static int ci_tcp_sendmsg_slowpath(ci_netif* ni, ci_tcp_state* ts, static int can_do_msg_warm(ci_netif* ni, ci_tcp_state* ts, - struct tcp_send_info* sinf, int total_unsent, - int flags) + struct tcp_send_info* sinf, int total_unsent, int flags) { /* Check all conditions that put us on the slow path for a normal * sends or unsupported conditions for ONLOAD_MSG_WARM. @@ -1800,25 +1759,20 @@ static int can_do_msg_warm(ci_netif* ni, ci_tcp_state* ts, * consider doing that in the future if we suspect that msg_warm can * help with them. */ - return si_trylock(ni, sinf) && - ci_ip_queue_is_empty(&ts->send) && - ci_ip_queue_is_empty(&ts->retrans) && - ! (flags & MSG_MORE) && - total_unsent < tcp_eff_mss(ts) && - total_unsent > 0 && - ! (ts->s.s_aflags & CI_SOCK_AFLAG_CORK) && - ! ts->s.tx_errno && - SEQ_LE(tcp_enq_nxt(ts) + total_unsent, ts->snd_max) && + return si_trylock(ni, sinf) && ci_ip_queue_is_empty(&ts->send) && + ci_ip_queue_is_empty(&ts->retrans) && ! (flags & MSG_MORE) && + total_unsent < tcp_eff_mss(ts) && total_unsent > 0 && + ! (ts->s.s_aflags & CI_SOCK_AFLAG_CORK) && ! ts->s.tx_errno && + SEQ_LE(tcp_enq_nxt(ts) + total_unsent, ts->snd_max) && #if CI_CFG_PORT_STRIPING - ! (ts->tcpflags & CI_TCPT_FLAG_STRIPE) && + ! (ts->tcpflags & CI_TCPT_FLAG_STRIPE) && #endif - ! (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE); + ! (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE); } -static __attribute__ ((__noinline__)) void -unroll_msg_warm(ci_netif* ni, ci_tcp_state* ts, struct tcp_send_info* sinf, - int is_zc_send) +static __attribute__((__noinline__)) void unroll_msg_warm( + ci_netif* ni, ci_tcp_state* ts, struct tcp_send_info* sinf, int is_zc_send) { ci_ip_pkt_fmt* pkt; ++ts->stats.tx_msg_warm; @@ -1843,8 +1797,7 @@ unroll_msg_warm(ci_netif* ni, ci_tcp_state* ts, struct tcp_send_info* sinf, if( ! is_zc_send ) { pkt = PKT_CHK(ni, ts->send.tail); ci_netif_pkt_release_1ref(ni, pkt); - } - else { + } else { /* ci_tcp_sendmsg_enqueue() decrements n_async_pkts. It is normally * rolled back in some way by pkt_release(), but in case of zc_send * we should fix this number. */ @@ -1854,9 +1807,8 @@ unroll_msg_warm(ci_netif* ni, ci_tcp_state* ts, struct tcp_send_info* sinf, /* Grab packet buffers. */ -static int -ci_tcp_send_alloc_pkts(ci_netif* ni, ci_tcp_state* ts, - struct tcp_send_info* sinf, int got) +static int ci_tcp_send_alloc_pkts( + ci_netif* ni, ci_tcp_state* ts, struct tcp_send_info* sinf, int got) { ci_ip_pkt_fmt* pkt; int rc; @@ -1864,9 +1816,8 @@ ci_tcp_send_alloc_pkts(ci_netif* ni, ci_tcp_state* ts, ci_assert_gt(sinf->total_unsent, 0); ci_assert_gt(sinf->sendq_credit, 0); - sinf->n_needed = ci_tcp_tx_n_pkts_needed(ts->eff_mss, sinf->total_unsent, - CI_CFG_TCP_TX_BATCH, - sinf->sendq_credit); + sinf->n_needed = ci_tcp_tx_n_pkts_needed(ts->eff_mss, sinf->total_unsent, + CI_CFG_TCP_TX_BATCH, sinf->sendq_credit); rc = sinf->n_needed; sinf->fill_list = 0; sinf->fill_list_bytes = 0; @@ -1879,10 +1830,10 @@ ci_tcp_send_alloc_pkts(ci_netif* ni, ci_tcp_state* ts, if( (pkt = ci_netif_pkt_tx_tcp_alloc(ni, ts)) ) { ++ni->state->n_async_pkts; oo_pkt_filler_add_pkt(&sinf->pf, pkt); - } - else - return rc;; - } else + } else + return rc; + ; + } else return rc; sinf->n_needed--; } @@ -1890,30 +1841,26 @@ ci_tcp_send_alloc_pkts(ci_netif* ni, ci_tcp_state* ts, return rc; } -static void -ci_tcp_send_fill_pkts(ci_netif* ni, ci_tcp_state* ts, - struct tcp_send_info* sinf, ci_iovec_ptr* piov, - int n_pkts - CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) +static void ci_tcp_send_fill_pkts(ci_netif* ni, ci_tcp_state* ts, + struct tcp_send_info* sinf, ci_iovec_ptr* piov, + int n_pkts CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) { ci_assert(! ci_iovec_ptr_is_empty_proper(piov)); ci_assert_equal(sinf->n_needed, 0); do { - sinf->fill_list_bytes += - ci_tcp_sendmsg_fill_pkt(ni, ts, sinf, piov, ts->outgoing_hdrs_len, - ts->eff_mss CI_KERNEL_ARG(addr_spc)); + sinf->fill_list_bytes += ci_tcp_sendmsg_fill_pkt(ni, ts, sinf, piov, + ts->outgoing_hdrs_len, ts->eff_mss CI_KERNEL_ARG(addr_spc)); ++sinf->n_filled; CI_USER_PTR_SET(sinf->pf.pkt->pf.tcp_tx.next, sinf->fill_list); sinf->fill_list = sinf->pf.pkt; - } - while( --n_pkts > 0 ); + } while( --n_pkts > 0 ); } /* returns 1 if data sent, 0 otherwise */ -static int ci_tcp_send_via_prequeue(ci_netif* ni, ci_tcp_state* ts, - struct tcp_send_info* sinf) +static int ci_tcp_send_via_prequeue( + ci_netif* ni, ci_tcp_state* ts, struct tcp_send_info* sinf) { int queued = ci_tcp_tx_prequeue(ni, ts, sinf->fill_list); @@ -1935,10 +1882,8 @@ static int ci_tcp_send_via_prequeue(ci_netif* ni, ci_tcp_state* ts, /* It is not safe to call this function while holding the netif lock */ /*! \todo Confirm */ -int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, - const ci_iovec* iov, unsigned long iovlen, - int flags - CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) +int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, const ci_iovec* iov, + unsigned long iovlen, int flags CI_KERNEL_ARG(ci_addr_spc_t addr_spc)) { ci_ip_pkt_queue* sendq = &ts->send; ci_ip_pkt_fmt* pkt; @@ -1968,8 +1913,8 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, sinf.timeout = ts->s.so.sndtimeo_msec; sinf.sendq_credit = 0; #ifndef __KERNEL__ - sinf.tcp_send_spin = - oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_TCP_SEND); + sinf.tcp_send_spin = + oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_TCP_SEND); if( sinf.tcp_send_spin ) ci_frc64(&sinf.start_frc); #else @@ -1977,10 +1922,10 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, #endif - if(CI_UNLIKELY( (~ts->s.b.state & CI_TCP_STATE_SYNCHRONISED) )) + if( CI_UNLIKELY((~ts->s.b.state & CI_TCP_STATE_SYNCHRONISED)) ) goto not_synchronised; - is_sync: +is_sync: /* We want (int)(2 * MAX_SEND_CHUNK) > 0 * sinf.total_unsent is `int` and must be positive, otherwise our code @@ -1989,13 +1934,14 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, * MAX_SEND_CHUNK. */ #define MAX_SEND_CHUNK 0x3fffffff - for( m = 0; m < (int)iovlen; ++m ) { + for( m = 0; m < (int) iovlen; ++m ) { sinf.total_unsent += CI_IOVEC_LEN(&iov[m]); - if(CI_UNLIKELY( CI_IOVEC_BASE(&iov[m]) == NULL && - CI_IOVEC_LEN(&iov[m]) > 0 )) { + if( CI_UNLIKELY( + CI_IOVEC_BASE(&iov[m]) == NULL && CI_IOVEC_LEN(&iov[m]) > 0) ) { sinf.rc = -EFAULT; ci_tcp_sendmsg_handle_rc_or_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } if( CI_IOVEC_LEN(&iov[m]) > MAX_SEND_CHUNK || @@ -2006,15 +1952,15 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, } #undef MAX_SEND_CHUNK - if(CI_UNLIKELY( ! sinf.total_unsent || - (flags & (MSG_OOB | ONLOAD_MSG_WARM)) )) + if( CI_UNLIKELY( + ! sinf.total_unsent || (flags & (MSG_OOB | ONLOAD_MSG_WARM))) ) goto slow_path; - fast_path: +fast_path: ci_iovec_ptr_init_nz(&piov, iov, iovlen); ci_assert_le(tcp_eff_mss(ts), - CI_MAX_ETH_DATA_LEN - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr)); + CI_MAX_ETH_DATA_LEN - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr)); if( si_trylock(ni, &sinf) && ci_ip_queue_not_empty(sendq) ) { ci_assert(! (flags & ONLOAD_MSG_WARM)); @@ -2026,19 +1972,18 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, /* If we have more data to send, do it. */ if( sinf.total_unsent > 0 ) goto non_fast; - + /* This is last packet. Set PUSH flag and MORE flag. * Send it if possible. */ pkt = PKT_CHK(ni, sendq->tail); if( (flags & MSG_MORE) || (ts->s.s_aflags & CI_SOCK_AFLAG_CORK) ) { pkt->flags |= CI_PKT_FLAG_TX_MORE; - pkt->flags &=~ CI_PKT_FLAG_TX_PSH_ON_ACK; - } - else { + pkt->flags &= ~CI_PKT_FLAG_TX_PSH_ON_ACK; + } else { pkt->flags &= ~CI_PKT_FLAG_TX_MORE; TX_PKT_IPX_TCP(af, pkt)->tcp_flags |= CI_TCP_FLAG_PSH; } - + /* We should somehow push the packet. However, it was not pushed * before. It means: * - we have no window, and zero window timer will wake us up; @@ -2049,16 +1994,16 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, * just call it. */ #ifdef MSG_SENDPAGE_NOTLAST - if( ~flags & MSG_SENDPAGE_NOTLAST || - ci_tcp_tx_send_space(ni, ts) <= 0 ) + if( ~flags & MSG_SENDPAGE_NOTLAST || ci_tcp_tx_send_space(ni, ts) <= 0 ) #endif - ci_tcp_tx_advance_nagle(ni, ts); + ci_tcp_tx_advance_nagle(ni, ts); - if( sinf.stack_locked ) ci_netif_unlock(ni); + if( sinf.stack_locked ) + ci_netif_unlock(ni); return sinf.total_sent; } - non_fast: +non_fast: ci_assert(sinf.total_unsent > 0); ci_assert(! ci_iovec_ptr_is_empty_proper(&piov)); @@ -2072,13 +2017,14 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, * ci_tcp_tx_send_space() constrains. */ if( sinf.sendq_credit <= 0 && NI_OPTS(ni).tcp_sndbuf_mode && sinf.total_sent && - ( ts->congstate == CI_TCP_CONG_OPEN || - ts->congstate == CI_TCP_CONG_FAST_RECOV ) ) + (ts->congstate == CI_TCP_CONG_OPEN || + ts->congstate == CI_TCP_CONG_FAST_RECOV) ) sinf.sendq_credit += ts->retrans.num >> 1; - if( sinf.sendq_credit <= 0 ) goto send_q_full; + if( sinf.sendq_credit <= 0 ) + goto send_q_full; - try_again: +try_again: while( 1 ) { /* Grab packet buffers and fill them with data. */ m = ci_tcp_send_alloc_pkts(ni, ts, &sinf, 0); @@ -2091,7 +2037,7 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, /* Look on MSG_MORE: do not send the last packet if it is not full */ if( (flags & MSG_MORE) || (ts->s.s_aflags & CI_SOCK_AFLAG_CORK) ) { sinf.pf.pkt->flags |= CI_PKT_FLAG_TX_MORE; - sinf.pf.pkt->flags &=~ CI_PKT_FLAG_TX_PSH_ON_ACK; + sinf.pf.pkt->flags &= ~CI_PKT_FLAG_TX_PSH_ON_ACK; } filled_some_pkts: @@ -2102,15 +2048,14 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, if( ts->s.tx_errno ) { ci_assert(! (flags & ONLOAD_MSG_WARM)); ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } /* eff_mss may now be != ts->eff_mss */ - ts->send_in += ci_tcp_sendmsg_enqueue(ni, ts, - sinf.fill_list, - sinf.fill_list_bytes, - &ts->send); + ts->send_in += ci_tcp_sendmsg_enqueue( + ni, ts, sinf.fill_list, sinf.fill_list_bytes, &ts->send); sinf.total_sent += sinf.fill_list_bytes; sinf.total_unsent -= sinf.fill_list_bytes; @@ -2129,13 +2074,14 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, ci_tcp_tx_send_space(ni, ts) <= 0 ) #endif { - ci_tcp_tx_advance_nagle(ni, ts); - if(CI_UNLIKELY( flags & ONLOAD_MSG_WARM )) - unroll_msg_warm(ni, ts, &sinf, 0); + ci_tcp_tx_advance_nagle(ni, ts); + if( CI_UNLIKELY(flags & ONLOAD_MSG_WARM) ) + unroll_msg_warm(ni, ts, &sinf, 0); } /* Assert that there's no need to free unused packets */ ci_assert_equal(sinf.pf.alloc_pkt, NULL); - if( sinf.stack_locked ) ci_netif_unlock(ni); + if( sinf.stack_locked ) + ci_netif_unlock(ni); return sinf.total_sent; } @@ -2144,30 +2090,31 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, ci_tcp_tx_send_space(ni, ts) <= 0 ) #endif { - /* Stuff left to do -- push out what we've got first. */ - ci_assert(! (flags & ONLOAD_MSG_WARM)); - if( ci_netif_may_poll(ni) && ci_netif_need_poll(ni) ) - ci_netif_poll(ni); - sinf.fill_list = 0; - if( ts->s.tx_errno ) { - ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); - return sinf.rc; - } - if(CI_LIKELY( ! ci_ip_queue_is_empty(sendq) )) - ci_tcp_tx_advance(ts, ni); + /* Stuff left to do -- push out what we've got first. */ + ci_assert(! (flags & ONLOAD_MSG_WARM)); + if( ci_netif_may_poll(ni) && ci_netif_need_poll(ni) ) + ci_netif_poll(ni); + sinf.fill_list = 0; + if( ts->s.tx_errno ) { + ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, &sinf); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); + return sinf.rc; + } + if( CI_LIKELY(! ci_ip_queue_is_empty(sendq)) ) + ci_tcp_tx_advance(ts, ni); } - } - else { + } else { if( sinf.total_unsent == sinf.fill_list_bytes ) /* The last segment needs to have the PSH flag set. */ - if ( ! (sinf.fill_list->flags & CI_PKT_FLAG_TX_MORE) ) + if( ! (sinf.fill_list->flags & CI_PKT_FLAG_TX_MORE) ) sinf.fill_list->flags |= CI_PKT_FLAG_TX_PSH; /* Couldn't get the netif lock, so enqueue packets on the prequeue. */ if( ! ci_tcp_send_via_prequeue(ni, ts, &sinf) ) { ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } sinf.total_sent += sinf.fill_list_bytes; @@ -2175,7 +2122,8 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, if( sinf.total_unsent == 0 ) { /* Assert that there's no need to free unused packets */ ci_assert_equal(sinf.pf.alloc_pkt, NULL); - if( sinf.stack_locked ) ci_netif_unlock(ni); + if( sinf.stack_locked ) + ci_netif_unlock(ni); return sinf.total_sent; } /* We've more to send, so keep filling buffers. */ @@ -2186,11 +2134,12 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, /* It looks like we don't have any credit in the send queue; * let's check for sure. */ sinf.sendq_credit = ci_tcp_tx_send_space(ni, ts); - if( sinf.sendq_credit <= 0 ) goto send_q_full; + if( sinf.sendq_credit <= 0 ) + goto send_q_full; } } - send_q_full: +send_q_full: /* We jump into here when the send queue (including prequeue) is full. */ ci_assert(! (flags & ONLOAD_MSG_WARM)); ci_assert(sinf.total_unsent > 0); @@ -2201,11 +2150,13 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, ci_netif_poll(ni); if( ts->s.tx_errno ) { ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } sinf.sendq_credit = ci_tcp_tx_send_space(ni, ts); - if( sinf.sendq_credit > 0 ) goto try_again; + if( sinf.sendq_credit > 0 ) + goto try_again; } /* The send queue is full, the prequeue is empty, and the netif has been @@ -2220,7 +2171,8 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, */ sinf.rc = -EAGAIN; ci_tcp_sendmsg_handle_sent_or_rc(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } @@ -2230,7 +2182,8 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, if( rc == 0 ) goto try_again; else if( rc == -1 ) { - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } sinf.tcp_send_spin = 0; @@ -2239,38 +2192,39 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, if( ci_tcp_sendmsg_block(ni, ts, flags, &sinf) == 0 ) goto try_again; else { - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } - no_pkt_buf: - { - int rc; - if(CI_UNLIKELY( flags & ONLOAD_MSG_WARM )) { - /* ONLOAD_MSG_WARM should only try to allocate 1 buffer and if - * that failed, then the buffer list should be empty. As we are - * not hitting the fast path, just return. - */ - ++ts->stats.tx_msg_warm_abort; - ci_assert_equal(sinf.pf.alloc_pkt, NULL); - if( sinf.stack_locked ) - ci_netif_unlock(ni); - return 0; - } - rc = ci_tcp_sendmsg_no_pkt_buf(ni, ts, flags, &sinf); - if( rc == 0 ) - goto got_pkt_buf; - else if( rc == 1 ) - goto filled_some_pkts; - else { - ci_assert(rc == -1); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); - return sinf.rc; - } +no_pkt_buf : { + int rc; + if( CI_UNLIKELY(flags & ONLOAD_MSG_WARM) ) { + /* ONLOAD_MSG_WARM should only try to allocate 1 buffer and if + * that failed, then the buffer list should be empty. As we are + * not hitting the fast path, just return. + */ + ++ts->stats.tx_msg_warm_abort; + ci_assert_equal(sinf.pf.alloc_pkt, NULL); + if( sinf.stack_locked ) + ci_netif_unlock(ni); + return 0; + } + rc = ci_tcp_sendmsg_no_pkt_buf(ni, ts, flags, &sinf); + if( rc == 0 ) + goto got_pkt_buf; + else if( rc == 1 ) + goto filled_some_pkts; + else { + ci_assert(rc == -1); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); + return sinf.rc; } +} - not_synchronised: - if(CI_UNLIKELY( flags & ONLOAD_MSG_WARM )) { +not_synchronised: + if( CI_UNLIKELY(flags & ONLOAD_MSG_WARM) ) { ++ts->stats.tx_msg_warm_abort; if( sinf.stack_locked ) ci_netif_unlock(ni); @@ -2279,13 +2233,14 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, if( ci_tcp_sendmsg_notsynchronised(ni, ts, flags, &sinf) == -1 ) { ci_tcp_sendmsg_handle_rc_or_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } goto is_sync; - slow_path: - if(CI_UNLIKELY( flags & ONLOAD_MSG_WARM )) { +slow_path: + if( CI_UNLIKELY(flags & ONLOAD_MSG_WARM) ) { if( can_do_msg_warm(ni, ts, &sinf, sinf.total_unsent, flags) ) { ts->tcpflags |= CI_TCPT_FLAG_MSG_WARM; #if CI_CFG_BURST_CONTROL @@ -2301,10 +2256,11 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, RET_WITH_ERRNO(EINVAL); return 0; } - if( ci_tcp_sendmsg_slowpath(ni, ts, iov, iovlen, flags, &sinf - CI_KERNEL_ARG(addr_spc)) == -1 ) { + if( ci_tcp_sendmsg_slowpath( + ni, ts, iov, iovlen, flags, &sinf CI_KERNEL_ARG(addr_spc)) == -1 ) { ci_tcp_sendmsg_handle_rc_or_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } return sinf.rc; @@ -2312,10 +2268,8 @@ int ci_tcp_sendmsg(ci_netif* ni, ci_tcp_state* ts, #if CI_CFG_TX_CRC_OFFLOAD -ci_int8 -ci_tcp_offload_zc_send_accum_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, - struct ci_pkt_zc_payload* zcp, - unsigned payload_offset, void* prefix) +ci_int8 ci_tcp_offload_zc_send_accum_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, + struct ci_pkt_zc_payload* zcp, unsigned payload_offset, void* prefix) { struct ci_tcp_offload_zc_send_prefix* crc_prefix = prefix; ci_tcp_state* ts = SP_TO_TCP(ni, pkt->pf.tcp_tx.sock_id); @@ -2336,8 +2290,8 @@ ci_tcp_offload_zc_send_accum_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, crc_prefix->accum_crc.reset = (id == ZC_NVME_CRC_ID_INVALID); if( crc_prefix->accum_crc.reset ) { - rc = ci_nvme_plugin_crc_id_alloc(&ni->state->nvme_crc_plugin_idp[intf_i], - &id); + rc = ci_nvme_plugin_crc_id_alloc( + &ni->state->nvme_crc_plugin_idp[intf_i], &id); if( rc < 0 ) return rc; } @@ -2347,15 +2301,19 @@ ci_tcp_offload_zc_send_accum_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, #if CI_CFG_NVME_LOCAL_CRC_MODE #ifdef __KERNEL__ - ci_log("WARNING: Unable to compute CRC in kernel context; " - "emitted CRC will be invalid"); + ci_log( + "WARNING: Unable to compute CRC in kernel context; " + "emitted CRC will be invalid"); #else if( zcp->local_addr == NULL ) { ci_log("ERROR: %s: buffer is non-local\n", __func__); abort(); } - ci_uint32 crc = crc_prefix->accum_crc.reset ? 0 : ni->state->nvme_crc_plugin_idp[intf_i].crcs[id]; - ni->state->nvme_crc_plugin_idp[intf_i].crcs[id] = crc32c(crc ^ 0xffffffff, zcp->local_addr, zcp->len); + ci_uint32 crc = crc_prefix->accum_crc.reset + ? 0 + : ni->state->nvme_crc_plugin_idp[intf_i].crcs[id]; + ni->state->nvme_crc_plugin_idp[intf_i].crcs[id] = + crc32c(crc ^ 0xffffffff, zcp->local_addr, zcp->len); #endif #endif @@ -2363,10 +2321,8 @@ ci_tcp_offload_zc_send_accum_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, } -ci_uint8 -ci_tcp_offload_zc_send_insert_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, - struct ci_pkt_zc_payload* zcp, - unsigned payload_offset, void* prefix) +ci_uint8 ci_tcp_offload_zc_send_insert_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, + struct ci_pkt_zc_payload* zcp, unsigned payload_offset, void* prefix) { struct ci_tcp_offload_zc_send_prefix* crc_prefix = prefix; ci_tcp_state* ts = SP_TO_TCP(ni, pkt->pf.tcp_tx.sock_id); @@ -2374,7 +2330,8 @@ ci_tcp_offload_zc_send_insert_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, ci_assert_equal(NI_OPTS(ni).tcp_offload_plugin, CITP_TCP_OFFLOAD_NVME); crc_prefix->type = CI_TCP_OFFLOAD_ZC_SEND_PREFIX_TYPE_INSERT; - crc_prefix->data_offset = payload_offset + zcp->len - zcp->crc_insert_n_bytes; + crc_prefix->data_offset = + payload_offset + zcp->len - zcp->crc_insert_n_bytes; crc_prefix->insert_crc.first_byte = zcp->crc_insert_first_byte; crc_prefix->insert_crc.n_bytes = zcp->crc_insert_n_bytes; if( zcp->crc_id == ZC_NVME_CRC_ID_INVALID ) { @@ -2388,15 +2345,18 @@ ci_tcp_offload_zc_send_insert_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, #if CI_CFG_NVME_LOCAL_CRC_MODE #ifdef __KERNEL__ - ci_log("WARNING: Unable to insert CRC in kernel context; " - "emitted CRC will be invalid"); + ci_log( + "WARNING: Unable to insert CRC in kernel context; " + "emitted CRC will be invalid"); #else if( zcp->local_addr == NULL ) { ci_log("ERROR: %s: buffer is non-local\n", __func__); abort(); } - char* src = (char*)&ni->state->nvme_crc_plugin_idp[pkt->intf_i].crcs[zcp->crc_id] + zcp->crc_insert_first_byte; - char* dst = (char*)zcp->local_addr + zcp->len - zcp->crc_insert_n_bytes; + char* src = + (char*) &ni->state->nvme_crc_plugin_idp[pkt->intf_i].crcs[zcp->crc_id] + + zcp->crc_insert_first_byte; + char* dst = (char*) zcp->local_addr + zcp->len - zcp->crc_insert_n_bytes; memcpy(dst, src, zcp->crc_insert_n_bytes); #endif #endif @@ -2409,9 +2369,8 @@ ci_tcp_offload_zc_send_insert_crc(ci_netif* ni, ci_ip_pkt_fmt* pkt, #ifndef __KERNEL__ -ci_uint8 -ci_tcp_offload_zc_send_get_prefix_len(ci_netif* ni, - const struct onload_zc_iovec* iov) +ci_uint8 ci_tcp_offload_zc_send_get_prefix_len( + ci_netif* ni, const struct onload_zc_iovec* iov) { ci_uint8 prefix_len = 0; if( iov->iov_flags & ONLOAD_ZC_SEND_FLAG_ACCUM_CRC ) @@ -2425,16 +2384,16 @@ ci_tcp_offload_zc_send_get_prefix_len(ci_netif* ni, static inline ci_uint32 zc_iov_flags_to_zcp_flags(unsigned iov_flags) { ci_uint32 zcp_flags = 0; - if( iov_flags & ONLOAD_ZC_SEND_FLAG_ACCUM_CRC) + if( iov_flags & ONLOAD_ZC_SEND_FLAG_ACCUM_CRC ) zcp_flags |= ZC_PAYLOAD_FLAG_ACCUM_CRC; - if( iov_flags & ONLOAD_ZC_SEND_FLAG_INSERT_CRC) + if( iov_flags & ONLOAD_ZC_SEND_FLAG_INSERT_CRC ) zcp_flags |= ZC_PAYLOAD_FLAG_INSERT_CRC; return zcp_flags; } -static inline ci_uint8 -zc_prefix_reservation(ci_netif* ni, const struct onload_zc_iovec* iov) +static inline ci_uint8 zc_prefix_reservation( + ci_netif* ni, const struct onload_zc_iovec* iov) { if( iov->iov_flags == 0 ) return 0; @@ -2442,7 +2401,7 @@ zc_prefix_reservation(ci_netif* ni, const struct onload_zc_iovec* iov) } -/* +/* * TODO: * - improve TCP send path (in general) to handle fragmented buffers, then: * o append a small buffer to the existing send queue (via frag @@ -2451,33 +2410,33 @@ zc_prefix_reservation(ci_netif* ni, const struct onload_zc_iovec* iov) * packet; */ -static inline bool ci_tcp_tx_has_room_for_zc(ci_netif* ni, ci_ip_pkt_fmt* pkt, - ci_uint8 prefix_resv) +static inline bool ci_tcp_tx_has_room_for_zc( + ci_netif* ni, ci_ip_pkt_fmt* pkt, ci_uint8 prefix_resv) { if( pkt->flags & CI_PKT_FLAG_INDIRECT ) { return oo_tx_zc_left(pkt) >= oo_tx_zc_payload_size(ni) + prefix_resv && /* We need one additional segment for the header: */ oo_tx_zc_header(pkt)->segs < CI_IP_PKT_SEGMENTS_MAX - 1; - } - else { - return CI_PTR_ALIGN_FWD(oo_offbuf_end(&pkt->buf), CI_PKT_ZC_PAYLOAD_ALIGN) - + sizeof(struct ci_pkt_zc_header) + oo_tx_zc_payload_size(ni) + - prefix_resv - <= (char*)pkt + CI_CFG_PKT_BUF_SIZE; + } else { + return CI_PTR_ALIGN_FWD( + oo_offbuf_end(&pkt->buf), CI_PKT_ZC_PAYLOAD_ALIGN) + + sizeof(struct ci_pkt_zc_header) + oo_tx_zc_payload_size(ni) + + prefix_resv <= + (char*) pkt + CI_CFG_PKT_BUF_SIZE; } } -int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, - int flags) +int ci_tcp_zc_send( + ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, int flags) { struct tcp_send_info sinf; ci_ip_pkt_fmt* pkt; int j; unsigned eff_mss; - uint32_t tcp_pay_len = 0; int af = ipcache_af(&ts->s.pkt); #if CI_CFG_TIMESTAMPING + uint32_t tcp_pay_len = 0; bool reusing_prev_pkt; #endif @@ -2495,24 +2454,24 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, sinf.pf.alloc_pkt = NULL; sinf.timeout = ts->s.so.sndtimeo_msec; #ifndef __KERNEL__ - sinf.tcp_send_spin = - oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_TCP_SEND); + sinf.tcp_send_spin = + oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_TCP_SEND); if( sinf.tcp_send_spin ) ci_frc64(&sinf.start_frc); #else sinf.tcp_send_spin = 0; #endif - if( !(ts->s.b.state & CI_TCP_STATE_SYNCHRONISED) && - ci_tcp_sendmsg_notsynchronised(ni, ts, flags, &sinf) == -1) { + if( ! (ts->s.b.state & CI_TCP_STATE_SYNCHRONISED) && + ci_tcp_sendmsg_notsynchronised(ni, ts, flags, &sinf) == -1 ) { ci_tcp_sendmsg_handle_rc_or_tx_errno(ni, ts, flags, &sinf); msg->rc = sinf.set_errno ? -sinf.rc : sinf.rc; return 1; } eff_mss = tcp_eff_mss(ts); - ci_assert_le(eff_mss, - CI_MAX_ETH_DATA_LEN - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr)); + ci_assert_le( + eff_mss, CI_MAX_ETH_DATA_LEN - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr)); j = 0; @@ -2521,7 +2480,7 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, * branches in fast path. */ if( sinf.sendq_credit <= 0 || flags & ONLOAD_MSG_WARM ) { - if(CI_UNLIKELY( flags & ONLOAD_MSG_WARM )) { + if( CI_UNLIKELY(flags & ONLOAD_MSG_WARM) ) { if( ! can_do_msg_warm(ni, ts, &sinf, msg->msg.iov[0].iov_len, flags) || msg->msg.msghdr.msg_iovlen > 1 ) { ++ts->stats.tx_msg_warm_abort; @@ -2538,13 +2497,12 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, sinf.old_burst_window = ts->burst_window; #endif sinf.old_tcp_snd_nxt = tcp_snd_nxt(ts); - } - else { + } else { goto send_q_full; } } - - send_q_not_full: + +send_q_not_full: pkt = NULL; #if CI_CFG_TIMESTAMPING reusing_prev_pkt = false; @@ -2552,47 +2510,47 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, while( j < msg->msg.msghdr.msg_iovlen ) { if( msg->msg.iov[j].buf == ONLOAD_ZC_HANDLE_NONZC ) { goto bad_buffer; - } - else if( zc_is_pktbuf(msg->msg.iov[j].buf) ) { + } else if( zc_is_pktbuf(msg->msg.iov[j].buf) ) { pkt = zc_handle_to_pktbuf(msg->msg.iov[j].buf); ci_assert_equal(pkt->stack_id, ni->state->stack_id); ci_assert(msg->msg.iov[j].iov_base != NULL); ci_assert_gt(msg->msg.iov[j].iov_len, 0); ci_assert_le(msg->msg.iov[j].iov_len, eff_mss); - ci_assert_gt((char*)msg->msg.iov[j].iov_base, - PKT_START(pkt) + ts->outgoing_hdrs_len); - ci_assert_lt((char*)msg->msg.iov[j].iov_base + - msg->msg.iov[j].iov_len, - ((char*)pkt) + CI_CFG_PKT_BUF_SIZE); + ci_assert_gt((char*) msg->msg.iov[j].iov_base, + PKT_START(pkt) + ts->outgoing_hdrs_len); + ci_assert_lt((char*) msg->msg.iov[j].iov_base + msg->msg.iov[j].iov_len, + ((char*) pkt) + CI_CFG_PKT_BUF_SIZE); if( pkt->stack_id != ni->state->stack_id || - msg->msg.iov[j].iov_len <= 0 || - msg->msg.iov[j].iov_len > eff_mss || - (char*)msg->msg.iov[j].iov_base < - PKT_START(pkt) + ts->outgoing_hdrs_len || - (char*)msg->msg.iov[j].iov_base + msg->msg.iov[j].iov_len > - ((char*)pkt) + CI_CFG_PKT_BUF_SIZE ) + msg->msg.iov[j].iov_len <= 0 || msg->msg.iov[j].iov_len > eff_mss || + (char*) msg->msg.iov[j].iov_base < + PKT_START(pkt) + ts->outgoing_hdrs_len || + (char*) msg->msg.iov[j].iov_base + msg->msg.iov[j].iov_len > + ((char*) pkt) + CI_CFG_PKT_BUF_SIZE ) goto bad_buffer; pkt->pio_addr = -1; oo_pkt_af_set(pkt, af); - __ci_tcp_tx_pkt_init(pkt, ((uint8_t*) msg->msg.iov[j].iov_base - - (uint8_t*) oo_tx_l3_hdr(pkt)), eff_mss); + __ci_tcp_tx_pkt_init(pkt, + ((uint8_t*) msg->msg.iov[j].iov_base - (uint8_t*) oo_tx_l3_hdr(pkt)), + eff_mss); pkt->n_buffers = 1; pkt->buf_len += msg->msg.iov[j].iov_len; pkt->pay_len += msg->msg.iov[j].iov_len; oo_offbuf_advance(&pkt->buf, msg->msg.iov[j].iov_len); pkt->pf.tcp_tx.end_seq = msg->msg.iov[j].iov_len; +#if CI_CFG_TIMESTAMPING tcp_pay_len = msg->msg.iov[j].iov_len; +#endif - ci_assert_equal(TX_PKT_LEN(pkt), oo_offbuf_ptr(&pkt->buf) - PKT_START(pkt)); + ci_assert_equal( + TX_PKT_LEN(pkt), oo_offbuf_ptr(&pkt->buf) - PKT_START(pkt)); CI_USER_PTR_SET(pkt->pf.tcp_tx.next, sinf.fill_list); sinf.fill_list = pkt; --sinf.sendq_credit; sinf.fill_list_bytes += msg->msg.iov[j].iov_len; - } - else { + } else { #if ! CI_CFG_TIMESTAMPING /* We use all the TX timestamping delivery machinery to handle * completions, therefore this feature is removed when timestamps are @@ -2638,10 +2596,10 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, ci_assert_gt(msg->msg.iov[j].iov_len, 0); ci_assert_le(msg->msg.iov[j].iov_len, um->size); ci_assert_le(msg->msg.iov[j].iov_ptr + msg->msg.iov[j].iov_len, - um->base + um->size); + um->base + um->size); - if( !pkt && ci_ip_queue_not_empty(&ts->send) ) { - ci_ip_pkt_fmt *tail_pkt = PKT_CHK(ni, ts->send.tail); + if( ! pkt && ci_ip_queue_not_empty(&ts->send) ) { + ci_ip_pkt_fmt* tail_pkt = PKT_CHK(ni, ts->send.tail); if( NI_OPTS(ni).tcp_combine_sends_mode == 0 || tail_pkt->flags & CI_PKT_FLAG_TX_MORE ) { pkt = tail_pkt; @@ -2651,18 +2609,18 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, } /* Chop up the user's buffer in to contiguous DMA regions */ - iov_base = (uintptr_t)msg->msg.iov[j].iov_base; + iov_base = (uintptr_t) msg->msg.iov[j].iov_base; iov_len = msg->msg.iov[j].iov_len; while( iov_len ) { - const uint64_t NIC_PAGE_MASK = ~(uint64_t)(EF_VI_NIC_PAGE_SIZE - 1); + const uint64_t NIC_PAGE_MASK = ~(uint64_t) (EF_VI_NIC_PAGE_SIZE - 1); /* Max size is bounded by ci_ip_pkt_fmt::pay_len, minus slack to make * the boundary case in the loop below easier */ const uint32_t MAX_CONTIG_LEN = INT_MAX - EF_VI_NIC_PAGE_SIZE; uint32_t contig_len, room_len, remaining_len; /* Up to the end of the current page is guaranteed to be contiguous */ - contig_len = ((iov_base + EF_VI_NIC_PAGE_SIZE) & NIC_PAGE_MASK) - - iov_base; + contig_len = + ((iov_base + EF_VI_NIC_PAGE_SIZE) & NIC_PAGE_MASK) - iov_base; if( NI_OPTS(ni).packet_buffer_mode == CITP_PKTBUF_MODE_PHYS ) { /* Keep going until we find any noncontiguity. It'll be common that * there isn't any, so this optimisation is worth it. @@ -2671,18 +2629,19 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, * so it's a good idea to ensure that we don't try to enqueue such a * large single block that it overflows the whole txq on its own. */ while( contig_len < CI_MIN(iov_len, MAX_CONTIG_LEN) ) { - uint64_t ix = (iov_base + contig_len - (uintptr_t)um->base) >> + uint64_t ix = (iov_base + contig_len - (uintptr_t) um->base) >> EF_VI_NIC_PAGE_SHIFT; uint64_t size = um->size >> EF_VI_NIC_PAGE_SHIFT; bool noncontig = false; - OO_STACK_FOR_EACH_INTF_I(ni, i) { + OO_STACK_FOR_EACH_INTF_I(ni, i) + { if( um->hw_addrs[i * size + ix] != um->hw_addrs[i * size + ix - 1] + EF_VI_NIC_PAGE_SIZE ) { noncontig = true; break; } } - if (noncontig) + if( noncontig ) break; contig_len += EF_VI_NIC_PAGE_SIZE; } @@ -2730,23 +2689,23 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, zch->segs = 0; zch->prefix_spc = 0; zch->end = sizeof(*zch); - + CI_USER_PTR_SET(pkt->pf.tcp_tx.next, sinf.fill_list); sinf.fill_list = pkt; --sinf.sendq_credit; - } - else { + } else { room_len = CI_MIN(eff_mss - tcp_pay_len, contig_len); - + if( ! (pkt->flags & CI_PKT_FLAG_INDIRECT) ) { /* ci_tcp_tx_has_room_for_zc() has already decided that we can do * this */ pkt->flags |= CI_PKT_FLAG_INDIRECT; pkt->pf.tcp_tx.sock_id = ts->s.b.bufid; - oo_offbuf_set_end(&pkt->buf, CI_PTR_ALIGN_FWD( - CI_MIN(oo_offbuf_end(&pkt->buf), - oo_offbuf_ptr(&pkt->buf) + CI_TCP_MAX_OPTS_LEN), - CI_PKT_ZC_PAYLOAD_ALIGN)); + oo_offbuf_set_end(&pkt->buf, + CI_PTR_ALIGN_FWD( + CI_MIN(oo_offbuf_end(&pkt->buf), + oo_offbuf_ptr(&pkt->buf) + CI_TCP_MAX_OPTS_LEN), + CI_PKT_ZC_PAYLOAD_ALIGN)); zch = oo_tx_zc_header(pkt); zch->segs = 0; zch->prefix_spc = 0; @@ -2754,10 +2713,10 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, } } zch = oo_tx_zc_header(pkt); - zcp = (struct ci_pkt_zc_payload*)((char*)zch + zch->end); + zcp = (struct ci_pkt_zc_payload*) ((char*) zch + zch->end); zch->end += oo_tx_zc_payload_size(ni); ++zch->segs; - + pkt->pf.tcp_tx.end_seq += room_len; tcp_pay_len += room_len; pkt->pay_len += room_len; @@ -2765,47 +2724,46 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, zcp->crc_id = ZC_NVME_CRC_ID_INVALID; zcp->use_remote_cookie = iov_len == room_len; zcp->len = room_len; - zcp->remote.app_cookie = (uintptr_t)msg->msg.iov[j].app_cookie; + zcp->remote.app_cookie = (uintptr_t) msg->msg.iov[j].app_cookie; zcp->remote.addr_space = um->addr_space; #if CI_CFG_NVME_LOCAL_CRC_MODE if( zcp->remote.addr_space == EF_ADDRSPACE_LOCAL ) - zcp->local_addr = (void*)iov_base; + zcp->local_addr = (void*) iov_base; else zcp->local_addr = NULL; #endif OO_STACK_FOR_EACH_INTF_I(ni, i) - zcp->remote.dma_addr[i] = zc_usermem_dma_addr(um, iov_base, i); + zcp->remote.dma_addr[i] = zc_usermem_dma_addr(um, iov_base, i); zcp->prefix_space = prefix_resv; zch->prefix_spc += prefix_resv; - zcp->zcp_flags = zc_iov_flags_to_zcp_flags(msg->msg.iov[j].iov_flags); + zcp->zcp_flags = + zc_iov_flags_to_zcp_flags(msg->msg.iov[j].iov_flags); remaining_len = iov_len - room_len; if( zcp->zcp_flags & ZC_PAYLOAD_FLAG_INSERT_CRC ) { /* The INSERT_CRC flag should only be attached to the final 4 bytes * of the iov */ if( remaining_len >= 4 ) { zcp->zcp_flags &= ~ZC_PAYLOAD_FLAG_INSERT_CRC; - } - else { + } else { zcp->crc_insert_first_byte = (iov_len < 4) ? 4 - iov_len : 0; - zcp->crc_insert_n_bytes = (4 - remaining_len - - zcp->crc_insert_first_byte); + zcp->crc_insert_n_bytes = + (4 - remaining_len - zcp->crc_insert_first_byte); ci_assert_lt(remaining_len + zcp->crc_insert_first_byte, 4); } } ASSERT_VALID_PKT(ni, pkt); - + iov_base += room_len; iov_len -= room_len; - + if( reusing_prev_pkt ) { sinf.total_sent += room_len; tcp_enq_nxt(ts) += room_len; - } - else + } else sinf.fill_list_bytes += room_len; - + contig_len -= room_len; if( contig_len > 0 ) @@ -2828,7 +2786,7 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, if( ((flags & MSG_MORE) || (ts->s.s_aflags & CI_SOCK_AFLAG_CORK)) ) { pkt->flags |= CI_PKT_FLAG_TX_MORE; - pkt->flags &=~ CI_PKT_FLAG_TX_PSH_ON_ACK; + pkt->flags &= ~CI_PKT_FLAG_TX_PSH_ON_ACK; } /* If we can grab the lock now, setup the meta-data and get sending. @@ -2838,27 +2796,24 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, if( ts->s.tx_errno ) goto tx_errno; if( sinf.fill_list ) { - ts->send_in += ci_tcp_sendmsg_enqueue(ni, ts, - sinf.fill_list, - sinf.fill_list_bytes, - &ts->send); + ts->send_in += ci_tcp_sendmsg_enqueue( + ni, ts, sinf.fill_list, sinf.fill_list_bytes, &ts->send); sinf.total_sent += sinf.fill_list_bytes; } if( pkt->flags & CI_PKT_FLAG_TX_MORE ) TX_PKT_IPX_TCP(af, pkt)->tcp_flags = CI_TCP_FLAG_ACK; else - TX_PKT_IPX_TCP(af, pkt)->tcp_flags = CI_TCP_FLAG_PSH|CI_TCP_FLAG_ACK; + TX_PKT_IPX_TCP(af, pkt)->tcp_flags = CI_TCP_FLAG_PSH | CI_TCP_FLAG_ACK; ci_tcp_tx_advance_nagle(ni, ts); - if(CI_UNLIKELY( flags & ONLOAD_MSG_WARM )) { + if( CI_UNLIKELY(flags & ONLOAD_MSG_WARM) ) { unroll_msg_warm(ni, ts, &sinf, 1); } - } - else { + } else { if( ts->s.tx_errno ) goto tx_errno; - if( !(pkt->flags & CI_PKT_FLAG_TX_MORE) ) + if( ! (pkt->flags & CI_PKT_FLAG_TX_MORE) ) pkt->flags |= CI_PKT_FLAG_TX_PSH; if( ! ci_tcp_send_via_prequeue(ni, ts, &sinf) ) @@ -2875,12 +2830,12 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, else goto send_q_full; } - if( sinf.stack_locked ) + if( sinf.stack_locked ) ci_netif_unlock(ni); msg->rc = sinf.total_sent; return 1; - send_q_full: +send_q_full: if( ci_netif_may_poll(ni) && ci_netif_need_poll(ni) && si_trylock(ni, &sinf) ) { ci_netif_poll(ni); @@ -2906,11 +2861,11 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, if( rc == 0 ) goto send_q_not_full; else if( rc == -1 ) { - if( sinf.stack_locked ) + if( sinf.stack_locked ) ci_netif_unlock(ni); if( j == 0 ) /* Must invert error sign as functions shared with sendmsg store - * error as positive + * error as positive */ msg->rc = -sinf.rc; return 1; @@ -2920,19 +2875,19 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, if( ci_tcp_sendmsg_block(ni, ts, flags, &sinf) == 0 ) goto send_q_not_full; else { - if( sinf.stack_locked ) + if( sinf.stack_locked ) ci_netif_unlock(ni); if( j == 0 ) /* Must invert error sign as functions shared with sendmsg store - * error as positive + * error as positive */ msg->rc = -sinf.rc; return 1; } - bad_buffer: - if(CI_UNLIKELY( ts->tcpflags & CI_TCPT_FLAG_MSG_WARM )) { +bad_buffer: + if( CI_UNLIKELY(ts->tcpflags & CI_TCPT_FLAG_MSG_WARM) ) { ++ts->stats.tx_msg_warm_abort; if( sinf.stack_locked ) ci_netif_unlock(ni); @@ -2944,14 +2899,11 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, if( si_trylock(ni, &sinf) ) { if( ts->s.tx_errno ) goto tx_errno; - ts->send_in += ci_tcp_sendmsg_enqueue(ni, ts, - sinf.fill_list, - sinf.fill_list_bytes, - &ts->send); + ts->send_in += ci_tcp_sendmsg_enqueue( + ni, ts, sinf.fill_list, sinf.fill_list_bytes, &ts->send); sinf.total_sent += sinf.fill_list_bytes; sinf.fill_list = 0; - } - else { + } else { if( ! ci_tcp_send_via_prequeue(ni, ts, &sinf) ) goto tx_errno; sinf.total_sent += sinf.fill_list_bytes; @@ -2966,7 +2918,7 @@ int ci_tcp_zc_send(ci_netif* ni, ci_tcp_state* ts, struct onload_zc_mmsg* msg, ci_netif_unlock(ni); return 1; - tx_errno: +tx_errno: /* Similar to ci_tcp_sendmsg_handle_tx_errno(), but * - no need to free the fill_list: user owns the packets in case of * error; @@ -3018,15 +2970,12 @@ static int ci_tcp_ds_get_arp(ci_netif* ni, ci_tcp_state* ts) return 0; } -#define MAX_HEADERS_LEN \ - ( ETH_HLEN + ETH_VLAN_HLEN + sizeof(ci_ip4_hdr) + \ - 0xf * sizeof(ci_uint32) ) +#define MAX_HEADERS_LEN \ + (ETH_HLEN + ETH_VLAN_HLEN + sizeof(ci_ip4_hdr) + 0xf * sizeof(ci_uint32)) -enum onload_delegated_send_rc -ci_tcp_ds_fill_headers(ci_netif* ni, ci_tcp_state* ts, unsigned flags, - void* headers, int* headers_len_inout, - int* ip_tcp_hdr_len_out, - int* tcp_seq_offset_out, int* ip_len_offset_out) +enum onload_delegated_send_rc ci_tcp_ds_fill_headers(ci_netif* ni, + ci_tcp_state* ts, unsigned flags, void* headers, int* headers_len_inout, + int* ip_tcp_hdr_len_out, int* tcp_seq_offset_out, int* ip_len_offset_out) { int headers_len; int ether_header_len; @@ -3039,7 +2988,7 @@ ci_tcp_ds_fill_headers(ci_netif* ni, ci_tcp_state* ts, unsigned flags, /* Try to get valid cache */ if( ! oo_cp_ipcache_is_valid(ni, &ts->s.pkt) && - (~flags & ONLOAD_DELEGATED_SEND_FLAG_IGNORE_ARP ) && + (~flags & ONLOAD_DELEGATED_SEND_FLAG_IGNORE_ARP) && ! ci_tcp_ds_get_arp(ni, ts) ) { return ONLOAD_DELEGATED_SEND_RC_NOARP; } @@ -3058,8 +3007,9 @@ ci_tcp_ds_fill_headers(ci_netif* ni, ci_tcp_state* ts, unsigned flags, /* Create a "packet" which we are pretending to transmit. */ memcpy(headers, ci_ip_cache_ether_hdr(&ts->s.pkt), headers_len); - ip = (void*)((ci_uintptr_t)headers + ether_header_len); - tcp = (void*)((ci_uintptr_t)headers + ether_header_len + sizeof(ci_ip4_hdr)); + ip = (void*) ((ci_uintptr_t) headers + ether_header_len); + tcp = + (void*) ((ci_uintptr_t) headers + ether_header_len + sizeof(ci_ip4_hdr)); /* tcp_snd_nxt, tcp_rcv_nxt, tsrecent, eff_mss could change after we've * passed our header to the user, so there is nothing to do with it. */ @@ -3072,11 +3022,10 @@ ci_tcp_ds_fill_headers(ci_netif* ni, ci_tcp_state* ts, unsigned flags, ci_uint8* opt = CI_TCP_HDR_OPTS(tcp); ci_tcp_tx_opt_tso(&opt, ci_tcp_time_now(ni), ts->tsrecent); } - ip->ip_tot_len_be16 = - CI_BSWAP_BE16(ts->outgoing_hdrs_len + ts->eff_mss); + ip->ip_tot_len_be16 = CI_BSWAP_BE16(ts->outgoing_hdrs_len + ts->eff_mss); ip->ip_id_be16 = 0; - ci_assert_equal(CI_TCP_HDR_LEN(tcp), - ts->outgoing_hdrs_len - sizeof(ci_tcp_hdr)); + ci_assert_equal( + CI_TCP_HDR_LEN(tcp), ts->outgoing_hdrs_len - sizeof(ci_tcp_hdr)); ci_assert_equal(ip->ip_check_be16, 0); ci_assert_equal(tcp->tcp_check_be16, 0); ci_assert_equal(tcp->tcp_urg_ptr_be16, 0); @@ -3085,15 +3034,14 @@ ci_tcp_ds_fill_headers(ci_netif* ni, ci_tcp_state* ts, unsigned flags, *ip_tcp_hdr_len_out = ts->outgoing_hdrs_len; *tcp_seq_offset_out = ether_header_len + sizeof(ci_ip4_hdr) + CI_MEMBER_OFFSET(ci_tcp_hdr, tcp_seq_be32); - *ip_len_offset_out = ether_header_len + - CI_MEMBER_OFFSET(ci_ip4_hdr, ip_tot_len_be16); + *ip_len_offset_out = + ether_header_len + CI_MEMBER_OFFSET(ci_ip4_hdr, ip_tot_len_be16); return ONLOAD_DELEGATED_SEND_RC_OK; } -int -ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, - const ci_iovec *iov, int iovlen, int flags) +int ci_tcp_ds_done( + ci_netif* ni, ci_tcp_state* ts, const ci_iovec* iov, int iovlen, int flags) { int already_acked, i; ci_iovec_ptr piov; @@ -3109,7 +3057,7 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, sinf.pf.alloc_pkt = NULL; sinf.timeout = 0; /* ignore ts->s.so.sndtimeo_msec */ sinf.tcp_send_spin = - oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_TCP_SEND); + oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_TCP_SEND); sinf.fill_list = 0; for( i = 0; i < iovlen; ++i ) @@ -3120,14 +3068,14 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, if( sinf.total_unsent > ts->snd_delegated ) RET_WITH_ERRNO(EMSGSIZE); - try_again: +try_again: while( 1 ) { if( ! si_trylock(ni, &sinf) ) { ci_netif_lock(ni); sinf.stack_locked = 1; } - already_acked = SEQ_SUB(ts->snd_una, ts->snd_nxt); + already_acked = SEQ_SUB(ts->snd_una, ts->snd_nxt); /* already_acked > 0 => some of our data is already ACKed; * already_acked == 0 => retransmit queue is empty, but our data is not * acked; @@ -3158,7 +3106,7 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, iov_offset = 0; } } - if( sinf.total_unsent == 0) + if( sinf.total_unsent == 0 ) goto out; /* copy data from iov to retransmit queue */ @@ -3174,7 +3122,8 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, ci_iovec_ptr_advance(&piov, already_acked + iov_offset); ci_assert(! ci_iovec_ptr_is_empty_proper(&piov)); - if( sinf.sendq_credit <= 0 ) goto send_q_full; + if( sinf.sendq_credit <= 0 ) + goto send_q_full; /* Either: * - we got all the buffers we needed and then used them, so got is 0; @@ -3211,12 +3160,13 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, if( ts->s.tx_errno ) { ci_assert(! (flags & ONLOAD_MSG_WARM)); ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } /* add to retrans q */ - ci_tcp_sendmsg_enqueue(ni, ts, sinf.fill_list, sinf.fill_list_bytes, - &ts->retrans); + ci_tcp_sendmsg_enqueue( + ni, ts, sinf.fill_list, sinf.fill_list_bytes, &ts->retrans); sinf.total_sent += sinf.fill_list_bytes; sinf.total_unsent -= sinf.fill_list_bytes; ts->snd_nxt += sinf.fill_list_bytes; @@ -3242,7 +3192,8 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, if( ts->s.tx_errno ) { ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } @@ -3251,18 +3202,19 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, /* It looks like we don't have any credit in the send queue; * let's check for sure. */ sinf.sendq_credit = ci_tcp_tx_send_space(ni, ts); - if( sinf.sendq_credit <= 0 ) goto send_q_full; + if( sinf.sendq_credit <= 0 ) + goto send_q_full; } } - out: +out: ci_assert(sinf.stack_locked); /* Set up the retransmit timer if: * (1) we've added something to the retrans queue; * (2) it was not acked in ci_netif_poll() we call above. */ - if( sinf.total_sent > already_acked && !ci_ip_queue_is_empty(&ts->retrans)) + if( sinf.total_sent > already_acked && ! ci_ip_queue_is_empty(&ts->retrans) ) ci_tcp_rto_check_and_set(ni, ts); /* We may have allocated some packets and then found they weren't neeeded. @@ -3286,11 +3238,13 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, ci_netif_poll(ni); if( ts->s.tx_errno ) { ci_tcp_sendmsg_handle_tx_errno(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } sinf.sendq_credit = ci_tcp_tx_send_space(ni, ts); - if( sinf.sendq_credit > 0 ) goto try_again; + if( sinf.sendq_credit > 0 ) + goto try_again; /* We are pushing our data to retransmit queue; send queue is empty; * tx timestamp queue is guaranteed to be disabled. So, the only @@ -3301,7 +3255,8 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, if( flags & MSG_DONTWAIT ) { sinf.rc = -EAGAIN; ci_tcp_sendmsg_handle_sent_or_rc(ni, ts, flags, &sinf); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } @@ -3311,7 +3266,8 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, if( rc == 0 ) goto try_again; else if( rc == -1 ) { - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } sinf.tcp_send_spin = 0; @@ -3320,29 +3276,28 @@ ci_tcp_ds_done(ci_netif* ni, ci_tcp_state* ts, if( ci_tcp_sendmsg_block(ni, ts, flags, &sinf) == 0 ) goto try_again; else { - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); return sinf.rc; } - no_pkt_buf: - { - int rc; - rc = ci_tcp_sendmsg_no_pkt_buf(ni, ts, flags, &sinf); - if( rc == 0 ) { - got = last_needed - sinf.n_needed; - goto try_again; - } - else { - /* Once we've filled some packets we're guaranteed to queue them, so - * we should never be calling ci_tcp_sendmsg_no_pkt_buf with some - * packets filled. - */ - ci_assert(rc == -1); - if( sinf.set_errno ) CI_SET_ERROR(sinf.rc, sinf.rc); - return sinf.rc; - } +no_pkt_buf : { + int rc; + rc = ci_tcp_sendmsg_no_pkt_buf(ni, ts, flags, &sinf); + if( rc == 0 ) { + got = last_needed - sinf.n_needed; + goto try_again; + } else { + /* Once we've filled some packets we're guaranteed to queue them, so + * we should never be calling ci_tcp_sendmsg_no_pkt_buf with some + * packets filled. + */ + ci_assert(rc == -1); + if( sinf.set_errno ) + CI_SET_ERROR(sinf.rc, sinf.rc); + return sinf.rc; } - +} } #endif diff --git a/src/lib/transport/ip/udp_connect.c b/src/lib/transport/ip/udp_connect.c index 25093f3b8..e7d1f89b5 100644 --- a/src/lib/transport/ip/udp_connect.c +++ b/src/lib/transport/ip/udp_connect.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2004-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr/ctk/stg ** \brief UDP connection routines: @@ -11,7 +11,7 @@ ** *//* \**************************************************************************/ - + /*! \cidoxg_lib_transport_ip */ #include "ip_internal.h" @@ -21,50 +21,44 @@ #include #endif -#define LPF "ci_udp_" -#define LPFIN "-> " LPF -#define LPFOUT "<- " LPF +#define LPF "ci_udp_" +#define LPFIN "-> " LPF +#define LPFOUT "<- " LPF #define INADDR_ANY_BE32 (CI_BSWAPC_BE32(INADDR_ANY)) #ifndef __ci_driver__ #ifndef NDEBUG -static char * ci_udp_addr_str( ci_udp_state* us ) +static char* ci_udp_addr_str(ci_udp_state* us) { static char buf[128]; ci_assert(us); - snprintf( buf, sizeof(buf), "L[" IPX_PORT_FMT "] R[" IPX_PORT_FMT "]", - IPX_ARG(AF_IP(udp_ipx_laddr(us))), - CI_BSWAP_BE16(udp_lport_be16(us)), - IPX_ARG(AF_IP(udp_ipx_raddr(us))), - CI_BSWAP_BE16(udp_rport_be16(us)) ); + snprintf(buf, sizeof(buf), "L[" IPX_PORT_FMT "] R[" IPX_PORT_FMT "]", + IPX_ARG(AF_IP(udp_ipx_laddr(us))), CI_BSWAP_BE16(udp_lport_be16(us)), + IPX_ARG(AF_IP(udp_ipx_raddr(us))), CI_BSWAP_BE16(udp_rport_be16(us))); return buf; } -#define CI_UDP_EP_ADDR_STR(ep) \ - ci_udp_addr_str((ep)->state) +#define CI_UDP_EP_ADDR_STR(ep) ci_udp_addr_str((ep)->state) -# define CI_UDPSTATE_SHOW(us) \ - LOG_UV(log( "%s: %d UDP %s Fl[%s]", \ - __FUNCTION__, S_FMT(us), \ - ci_udp_addr_str((us)), \ - UDP_GET_FLAG((us), CI_UDPF_FILTERED) ? "Flt " : "" \ - )) +#define CI_UDPSTATE_SHOW(us) \ + LOG_UV(log("%s: %d UDP %s Fl[%s]", __FUNCTION__, S_FMT(us), \ + ci_udp_addr_str((us)), \ + UDP_GET_FLAG((us), CI_UDPF_FILTERED) ? "Flt " : "")) -#define CI_UDPSTATE_SHOW_EP(ep) \ - CI_UDPSTATE_SHOW( SOCK_TO_UDP((ep)->s) ) +#define CI_UDPSTATE_SHOW_EP(ep) CI_UDPSTATE_SHOW(SOCK_TO_UDP((ep)->s)) #else -# define CI_UDPSTATE_SHOW(us) -# define CI_UDPSTATE_SHOW_EP(ep) +#define CI_UDPSTATE_SHOW(us) +#define CI_UDPSTATE_SHOW_EP(ep) #endif /* Encapsulation of sys_getsockname for UDP EPs */ -static int ci_udp_sys_getsockname( ci_fd_t sock, citp_socket* ep ) +static int ci_udp_sys_getsockname(ci_fd_t sock, citp_socket* ep) { socklen_t salen; int rc; @@ -79,21 +73,21 @@ static int ci_udp_sys_getsockname( ci_fd_t sock, citp_socket* ep ) salen = sizeof(sa_u); - rc = ci_sys_getsockname( sock, &sa_u.sa, &salen ); + rc = ci_sys_getsockname(sock, &sa_u.sa, &salen); if( rc ) return rc; if( sa_u.sa.sa_family != ep->s->domain || salen < sizeof(struct sockaddr_in) #if CI_CFG_FAKE_IPV6 - || (ep->s->domain == AF_INET6 && salen < sizeof(struct sockaddr_in6) ) + || (ep->s->domain == AF_INET6 && salen < sizeof(struct sockaddr_in6)) #endif - ) { - LOG_UV(log("%s: OS sock domain %d != expected domain %d or " - "sys_getsockname struct small (%d exp %d)", - __FUNCTION__, sa_u.sa.sa_family, ep->s->domain, - salen, - (int)(ep->s->domain == AF_INET ? sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)))); + ) { + LOG_UV( + log("%s: OS sock domain %d != expected domain %d or " + "sys_getsockname struct small (%d exp %d)", + __FUNCTION__, sa_u.sa.sa_family, ep->s->domain, salen, + (int) (ep->s->domain == AF_INET ? sizeof(struct sockaddr_in) + : sizeof(struct sockaddr_in6)))); return -1; } @@ -103,12 +97,12 @@ static int ci_udp_sys_getsockname( ci_fd_t sock, citp_socket* ep ) } /* Wrapper for call down to OS disconnect. */ -ci_inline int ci_udp_sys_disconnect( ci_fd_t sock, citp_socket* ep ) +ci_inline int ci_udp_sys_disconnect(ci_fd_t sock, citp_socket* ep) { struct sockaddr_in sin; - + sin.sin_family = AF_UNSPEC; - return ci_sys_connect( sock, (struct sockaddr*)&sin, sizeof(sin) ); + return ci_sys_connect(sock, (struct sockaddr*) &sin, sizeof(sin)); } @@ -129,11 +123,14 @@ static int ci_udp_set_filters(citp_socket* ep, ci_udp_state* us) ci_assert(ep); ci_assert(us); + if( udp_lport_be16(us) == 0 ) return 0; - rc = ci_tcp_ep_set_filters(ep->netif, S_SP(us), us->s.cp.so_bindtodevice, - OO_SP_NULL); + LOG_UC(log(FNS_FMT "Setting UDP filters", FNS_PRI_ARGS(ep->netif, ep->s))); + + rc = ci_tcp_ep_set_filters( + ep->netif, S_SP(us), us->s.cp.so_bindtodevice, OO_SP_NULL); if( rc == -EFILTERSSOME ) { if( CITP_OPTS.no_fail ) rc = 0; @@ -143,8 +140,6 @@ static int ci_udp_set_filters(citp_socket* ep, ci_udp_state* us) } } if( rc < 0 ) { - LOG_UC(log(FNS_FMT "ci_tcp_ep_set_filters failed (%d)", - FNS_PRI_ARGS(ep->netif, ep->s), -rc)); CI_SET_ERROR(rc, -rc); return rc; } @@ -157,36 +152,36 @@ static int ci_udp_set_filters(citp_socket* ep, ci_udp_state* us) * Interface */ -static int ci_udp_should_handover(citp_socket* ep, ci_addr_t laddr, - ci_uint16 lport) +static int ci_udp_should_handover( + citp_socket* ep, ci_addr_t laddr, ci_uint16 lport) { if( (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover_min && - CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover_max) || + CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover_max) || (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover2_min && - CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover2_max) || + CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover2_max) || (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover3_min && - CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover3_max) ) { + CI_BSWAP_BE16(lport) <= + NI_OPTS(ep->netif).udp_port_handover3_max) ) { LOG_UC(log(FNS_FMT "HANDOVER (%d <= %d <= %d)", - FNS_PRI_ARGS(ep->netif, ep->s), - NI_OPTS(ep->netif).udp_port_handover_min, - CI_BSWAP_BE16(lport), - NI_OPTS(ep->netif).udp_port_handover_max)); + FNS_PRI_ARGS(ep->netif, ep->s), + NI_OPTS(ep->netif).udp_port_handover_min, CI_BSWAP_BE16(lport), + NI_OPTS(ep->netif).udp_port_handover_max)); goto handover; } if( ~ep->netif->state->flags & CI_NETIF_FLAG_USE_ALIEN_LADDRS && ! CI_IPX_ADDR_IS_ANY(laddr) && - ! cicp_user_addr_is_local_efab(ep->netif, laddr) && + ! cicp_user_addr_is_local_efab(ep->netif, laddr) && ! CI_IPX_IS_MULTICAST(laddr) ) { /* Either the bind/getsockname indicated that we need to let the OS - * take this or the local address is not one of ours - so we can safely - * hand-over as bind to a non-ANY addr cannot be revoked. - * The filters (if any) have already been removed, so we just get out. */ + * take this or the local address is not one of ours - so we can safely + * hand-over as bind to a non-ANY addr cannot be revoked. + * The filters (if any) have already been removed, so we just get out. */ goto handover; } return 0; - handover: +handover: return 1; } @@ -194,7 +189,8 @@ static int ci_udp_should_handover(citp_socket* ep, ci_addr_t laddr, static void ci_udp_init_ipcache_ip4_hdr(ci_udp_state* us) { /* Move source and destination ports */ - memmove(&us->s.pkt.ipx.ip4 + 1, &us->s.pkt.ipx.ip6 + 1, sizeof(ci_uint16) * 2); + memmove( + &us->s.pkt.ipx.ip4 + 1, &us->s.pkt.ipx.ip6 + 1, sizeof(ci_uint16) * 2); ci_init_ipcache_ip4_hdr(&us->s); us->ephemeral_pkt.ether_type = CI_ETHERTYPE_IP; memset(&us->ephemeral_pkt.ipx.ip4, 0, sizeof(us->ephemeral_pkt.ipx.ip4)); @@ -208,7 +204,8 @@ static void ci_udp_init_ipcache_ip4_hdr(ci_udp_state* us) static void ci_udp_init_ipcache_ip6_hdr(ci_udp_state* us) { /* Move source and destination ports */ - memmove(&us->s.pkt.ipx.ip6 + 1, &us->s.pkt.ipx.ip4 + 1, sizeof(ci_uint16) * 2); + memmove( + &us->s.pkt.ipx.ip6 + 1, &us->s.pkt.ipx.ip4 + 1, sizeof(ci_uint16) * 2); ci_init_ipcache_ip6_hdr(&us->s); us->ephemeral_pkt.ether_type = CI_ETHERTYPE_IP6; memset(&us->ephemeral_pkt.ipx.ip6, 0, sizeof(us->ephemeral_pkt.ipx.ip6)); @@ -222,10 +219,9 @@ static void ci_udp_init_ipcache_ip6_hdr(ci_udp_state* us) void ci_udp_ipcache_convert(int af, ci_udp_state* us) { if( IS_AF_INET6(af) ) { - if( !ipcache_is_ipv6(&us->s.pkt) ) + if( ! ipcache_is_ipv6(&us->s.pkt) ) ci_udp_init_ipcache_ip6_hdr(us); - } - else if( ipcache_is_ipv6(&us->s.pkt) ) { + } else if( ipcache_is_ipv6(&us->s.pkt) ) { ci_udp_init_ipcache_ip4_hdr(us); } } @@ -236,7 +232,7 @@ void ci_udp_ipcache_convert(int af, ci_udp_state* us) * called on an OS socket. [lport] and CI_SIN(addr)->sin_port do not * have to be the same value. */ int ci_udp_bind_conclude(citp_socket* ep, const struct sockaddr* addr, - socklen_t addrlen, ci_uint16 lport) + socklen_t addrlen, ci_uint16 lport) { ci_udp_state* us; ci_addr_t laddr; @@ -271,8 +267,8 @@ int ci_udp_bind_conclude(citp_socket* ep, const struct sockaddr* addr, ci_sock_cmn_set_laddr(ep->s, laddr, lport); - if( !CI_IPX_ADDR_IS_ANY(laddr) && !CI_IPX_IS_MULTICAST(laddr) ) { - us->s.cp.sock_cp_flags &=~ OO_SCP_UDP_WILD; + if( ! CI_IPX_ADDR_IS_ANY(laddr) && ! CI_IPX_IS_MULTICAST(laddr) ) { + us->s.cp.sock_cp_flags &= ~OO_SCP_UDP_WILD; us->s.cp.sock_cp_flags |= OO_SCP_BOUND_ADDR; } /* reset any rx/tx that have taken place already */ @@ -281,15 +277,15 @@ int ci_udp_bind_conclude(citp_socket* ep, const struct sockaddr* addr, /* OS source addrs have already been handed-over, so this must be one of * our src addresses. */ - rc = ci_udp_set_filters( ep, us); - ci_assert( !UDP_GET_FLAG(us, CI_UDPF_EF_BIND) ); + rc = ci_udp_set_filters(ep, us); + ci_assert(! UDP_GET_FLAG(us, CI_UDPF_EF_BIND)); /*! \todo FIXME isn't the port the thing to be testing here? */ - if( !CI_IPX_ADDR_IS_ANY(udp_ipx_laddr(us)) ) + if( ! CI_IPX_ADDR_IS_ANY(udp_ipx_laddr(us)) ) UDP_SET_FLAG(us, CI_UDPF_EF_BIND); - CI_UDPSTATE_SHOW_EP( ep ); - if( rc == CI_SOCKET_ERROR && CITP_OPTS.no_fail) { + CI_UDPSTATE_SHOW_EP(ep); + if( rc == CI_SOCKET_ERROR && CITP_OPTS.no_fail ) { CITP_STATS_NETIF(++ep->netif->state->stats.udp_bind_no_filter); - goto handover; + goto filter_fail; } /* If we don't want unicast filters installed, and we've now got a unicast @@ -297,44 +293,50 @@ int ci_udp_bind_conclude(citp_socket* ep, const struct sockaddr* addr, * just handover now. */ if( UDP_GET_FLAG(us, CI_UDPF_NO_UCAST_FILTER) && - !CI_IPX_ADDR_IS_ANY(udp_ipx_laddr(us)) && - !CI_IPX_IS_MULTICAST(udp_ipx_laddr(us)) ) + ! CI_IPX_ADDR_IS_ANY(udp_ipx_laddr(us)) && + ! CI_IPX_IS_MULTICAST(udp_ipx_laddr(us)) ) goto handover; return rc; - handover: - LOG_UV(log("%s: "SK_FMT" HANDOVER", __FUNCTION__, SK_PRI_ARGS(ep))); +handover: + LOG_U(log("%s: " SK_FMT " HANDOVER", __FUNCTION__, SK_PRI_ARGS(ep))); return CI_SOCKET_HANDOVER; + +filter_fail: + return rc; } #if CI_CFG_ENDPOINT_MOVE -void ci_udp_handle_force_reuseport(ci_fd_t fd, citp_socket* ep, - const struct sockaddr* sa, socklen_t sa_len) +void ci_udp_handle_force_reuseport( + ci_fd_t fd, citp_socket* ep, const struct sockaddr* sa, socklen_t sa_len) { int rc; if( CITP_OPTS.udp_reuseports != 0 && - ((struct sockaddr_in*)sa)->sin_port != 0 ) { - struct ci_port_list *force_reuseport; + ((struct sockaddr_in*) sa)->sin_port != 0 ) { + struct ci_port_list* force_reuseport; CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link, - (ci_dllist*)(ci_uintptr_t)CITP_OPTS.udp_reuseports) { - if( force_reuseport->port == ((struct sockaddr_in*)sa)->sin_port ) { + (ci_dllist*) (ci_uintptr_t) CITP_OPTS.udp_reuseports) + { + if( force_reuseport->port == ((struct sockaddr_in*) sa)->sin_port ) { int one = 1; ci_fd_t os_sock = ci_get_os_sock_fd(fd); ci_assert(CI_IS_VALID_SOCKET(os_sock)); - rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one, - sizeof(one)); + rc = ci_sys_setsockopt( + os_sock, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); ci_rel_os_sock_fd(os_sock); /* Fixme: shouldn't we handle errors? */ if( rc != 0 ) { log("%s: failed to set SO_REUSEPORT on OS socket: " - "rc=%d errno=%d", __func__, rc, errno); + "rc=%d errno=%d", + __func__, rc, errno); } ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT; - LOG_UC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u", - __FUNCTION__, SF_PRI_ARGS(ep, fd), force_reuseport->port)); + LOG_UC( + log("%s " SF_FMT ", applied legacy SO_REUSEPORT flag for port %u", + __FUNCTION__, SF_PRI_ARGS(ep, fd), force_reuseport->port)); } } } @@ -344,18 +346,14 @@ void ci_udp_handle_force_reuseport(ci_fd_t fd, citp_socket* ep, /* Set a reuseport bind on a socket. */ int ci_udp_reuseport_bind(citp_socket* ep, ci_fd_t fd, - const struct sockaddr* sa, socklen_t sa_len, - ci_uint16 lport) + const struct sockaddr* sa, socklen_t sa_len, ci_uint16 lport) { int rc; ci_assert_nequal(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT, 0); if( (rc = ci_tcp_ep_reuseport_bind(fd, CITP_OPTS.cluster_name, - CITP_OPTS.cluster_size, - CITP_OPTS.cluster_restart_opt, - CITP_OPTS.cluster_hot_restart_opt, - ci_get_addr(sa), - lport)) != 0 ) { + CITP_OPTS.cluster_size, CITP_OPTS.cluster_restart_opt, + CITP_OPTS.cluster_hot_restart_opt, ci_get_addr(sa), lport)) != 0 ) { errno = -rc; return -1; } @@ -367,42 +365,41 @@ int ci_udp_reuseport_bind(citp_socket* ep, ci_fd_t fd, /* To handle bind we just let the underlying OS socket make all * of the decisions for us. If The bind leaves things such that * the source address is not one of ours then we hand it over to the - * OS (by returning CI_SOCKET_HANDOVER) - in which case the OS socket + * OS (by returning CI_SOCKET_HANDOVER) - in which case the OS socket * will be bound as expected. */ int ci_udp_bind_start(citp_socket* ep, ci_fd_t fd, const struct sockaddr* addr, - socklen_t addrlen, ci_uint16* lport) + socklen_t addrlen, ci_uint16* lport) { CHECK_UEP(ep); - LOG_UC(log("%s("SF_FMT", addrlen=%d)", __FUNCTION__, - SF_PRI_ARGS(ep,fd), addrlen)); + LOG_UC(log("%s(" SF_FMT ", addrlen=%d)", __FUNCTION__, SF_PRI_ARGS(ep, fd), + addrlen)); return ci_tcp_helper_bind_os_sock(fd, addr, addrlen, lport); } -static void ci_udp_set_raddr(ci_udp_state* us, ci_addr_t addr, - int rport_be16) +static void ci_udp_set_raddr(ci_udp_state* us, ci_addr_t addr, int rport_be16) { ci_ip_cache_invalidate(&us->s.pkt); ci_sock_set_raddr_port(&us->s, addr, rport_be16); } -#define IS_DISCONNECTING(sin) ( (sin)->sin_family == AF_UNSPEC ) +#define IS_DISCONNECTING(sin) ((sin)->sin_family == AF_UNSPEC) -static int -ci_udp_disconnect(citp_socket* ep, ci_udp_state* us, ci_fd_t os_sock) +static int ci_udp_disconnect( + citp_socket* ep, ci_udp_state* us, ci_fd_t os_sock) { int rc; if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) { LOG_E(log(FNS_FMT "ERROR: sys_getsockname failed (%d)", - FNS_PRI_ARGS(ep->netif, ep->s), errno)); + FNS_PRI_ARGS(ep->netif, ep->s), errno)); return rc; } ci_udp_set_raddr(us, addr_any, 0); - us->s.s_flags &=~ CI_SOCK_FLAG_CONNECTED; + us->s.s_flags &= ~CI_SOCK_FLAG_CONNECTED; /* TODO: We shouldn't really clear then set here; instead we should * insert wildcard filters before removing the full-match ones. ie. The @@ -413,8 +410,8 @@ ci_udp_disconnect(citp_socket* ep, ci_udp_state* us, ci_fd_t os_sock) if( (rc = ci_udp_set_filters(ep, us)) != 0 ) /* Not too bad -- should still get packets via OS socket. */ - LOG_U(log(FNS_FMT "ERROR: ci_udp_set_filters failed (%d)", - FNS_PRI_ARGS(ep->netif, ep->s), errno)); + LOG_UC(log(FNS_FMT "ERROR: ci_udp_set_filters failed (%d)", + FNS_PRI_ARGS(ep->netif, ep->s), errno)); if( ! (us->s.cp.sock_cp_flags & OO_SCP_BOUND_ADDR) ) us->s.cp.sock_cp_flags |= OO_SCP_UDP_WILD; return 0; @@ -426,8 +423,7 @@ ci_udp_disconnect(citp_socket* ep, ci_udp_state* us, ci_fd_t os_sock) * in here, then it can be consider an internal error or failing of onload. */ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, - const struct sockaddr* serv_addr, - socklen_t addrlen, ci_fd_t os_sock) + const struct sockaddr* serv_addr, socklen_t addrlen, ci_fd_t os_sock) { const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr; ci_addr_t dst; @@ -437,9 +433,11 @@ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, CHECK_UEP(ep); + LOG_UC(log(FNT_FMT, FNT_PRI_ARGS(ep->netif, us))); + UDP_CLR_FLAG(us, CI_UDPF_EF_SEND); us->s.rx_errno = 0; - us->s.tx_errno = 0; + us->s.tx_errno = 0; if( IS_DISCONNECTING(serv_sin) ) { rc = ci_udp_disconnect(ep, us, os_sock); @@ -448,13 +446,13 @@ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, if( NI_OPTS(ep->netif).udp_connect_handover == 2 ) { LOG_UC(log(FNT_FMT "HANDOVER (udp_connect_handover == 2)" IPX_PORT_FMT, - FNT_PRI_ARGS(ep->netif, us), IPX_ARG(AF_IP(dst)), - CI_BSWAP_BE16(serv_sin->sin_port))); + FNT_PRI_ARGS(ep->netif, us), IPX_ARG(AF_IP(dst)), + CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } -#if CI_CFG_FAKE_IPV6 && !CI_CFG_IPV6 - if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) { +#if CI_CFG_FAKE_IPV6 && ! CI_CFG_IPV6 + if( us->s.domain == PF_INET6 && ! ci_tcp_ipv6_is_ipv4(serv_addr) ) { LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us))); goto handover; } @@ -486,38 +484,41 @@ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) { LOG_E(log(FNT_FMT "ERROR: (" IPX_PORT_FMT ") sys_getsockname failed (%d)", - FNT_PRI_ARGS(ep->netif, us), IPX_ARG(AF_IP(dst)), - CI_BSWAP_BE16(serv_sin->sin_port), errno)); + FNT_PRI_ARGS(ep->netif, us), IPX_ARG(AF_IP(dst)), + CI_BSWAP_BE16(serv_sin->sin_port), errno)); goto out; } - us->s.cp.sock_cp_flags &=~ OO_SCP_UDP_WILD; + us->s.cp.sock_cp_flags &= ~OO_SCP_UDP_WILD; + + ci_udp_clr_filters(ep); ci_udp_set_raddr(us, dst, serv_sin->sin_port); us->s.s_flags |= CI_SOCK_FLAG_CONNECTED; cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp); switch( us->s.pkt.status ) { - case retrrc_success: - case retrrc_nomac: - onloadable = 1; - break; - default: - onloadable = 0; - if( NI_OPTS(ep->netif).udp_connect_handover ) { - LOG_UC(log(FNT_FMT "HANDOVER " IPX_PORT_FMT, FNT_PRI_ARGS(ep->netif, us), - IPX_ARG(AF_IP(dst)), CI_BSWAP_BE16(serv_sin->sin_port))); - goto handover; - } - break; + case retrrc_success: + case retrrc_nomac: + onloadable = 1; + break; + default: + onloadable = 0; + if( NI_OPTS(ep->netif).udp_connect_handover ) { + LOG_UC( + log(FNT_FMT "HANDOVER " IPX_PORT_FMT, FNT_PRI_ARGS(ep->netif, us), + IPX_ARG(AF_IP(dst)), CI_BSWAP_BE16(serv_sin->sin_port))); + goto handover; + } + break; } ci_ipcache_update_flowlabel(ep->netif, &us->s); if( CI_IPX_ADDR_IS_ANY(dst) || serv_sin->sin_port == 0 ) { LOG_UC(log(FNT_FMT IPX_PORT_FMT " - route via OS socket", - FNT_PRI_ARGS(ep->netif, us), IPX_ARG(AF_IP(dst)), - CI_BSWAP_BE16(serv_sin->sin_port))); + FNT_PRI_ARGS(ep->netif, us), IPX_ARG(AF_IP(dst)), + CI_BSWAP_BE16(serv_sin->sin_port))); ci_udp_clr_filters(ep); return 0; } @@ -526,7 +527,7 @@ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, * else. */ LOG_UC(log(FNT_FMT "HANDOVER " IPX_PORT_FMT, FNT_PRI_ARGS(ep->netif, us), - IPX_ARG(AF_IP(dst)), CI_BSWAP_BE16(serv_sin->sin_port))); + IPX_ARG(AF_IP(dst)), CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } @@ -535,7 +536,7 @@ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, * just handover now. */ if( UDP_GET_FLAG(us, CI_UDPF_NO_UCAST_FILTER) && - !CI_IPX_IS_MULTICAST(udp_ipx_laddr(us)) ) + ! CI_IPX_IS_MULTICAST(udp_ipx_laddr(us)) ) goto handover; if( onloadable ) { @@ -543,40 +544,38 @@ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, /* Failed to set filters. Most likely we've run out of h/w filters. */ if( NI_OPTS(ep->netif).udp_connect_handover ) { - LOG_U(log(FNT_FMT - "ERROR: (" IPX_PORT_FMT ") ci_udp_set_filters failed (%d)", - FNT_PRI_ARGS(ep->netif, us), IPX_ARG(AF_IP(dst)), - CI_BSWAP_BE16(serv_sin->sin_port), rc)); + LOG_U(log(FNT_FMT "ERROR: (" IPX_PORT_FMT + ") ci_udp_set_filters failed (%d)", + FNT_PRI_ARGS(ep->netif, us), IPX_ARG(AF_IP(dst)), + CI_BSWAP_BE16(serv_sin->sin_port), rc)); CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter); goto out; - } - else { + } else { /* We aren't classing this as a failure. The app will be able to send * via the accelerated path, but will receive packets via the kernel. */ rc = 0; } } - } - else { + } else { ci_udp_clr_filters(ep); } - LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:" IPX_PORT_FMT - " R:" IPX_PORT_FMT " (err:%d)", - SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS", - IPX_ARG(AF_IP(udp_ipx_laddr(us))), - (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)), - IPX_ARG(AF_IP(udp_ipx_raddr(us))), - (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno)); + LOG_UC(log(LPF "connect: " SF_FMT " %sCONNECTED L:" IPX_PORT_FMT + " R:" IPX_PORT_FMT " (err:%d)", + SF_PRI_ARGS(ep, fd), udp_raddr_be32(us) ? "" : "DIS", + IPX_ARG(AF_IP(udp_ipx_laddr(us))), + (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)), + IPX_ARG(AF_IP(udp_ipx_raddr(us))), + (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno)); return 0; - out: +out: if( rc < 0 && CITP_OPTS.no_fail ) goto handover; return rc; - handover: +handover: ci_udp_clr_filters(ep); return CI_SOCKET_HANDOVER; } @@ -586,23 +585,23 @@ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, * This uses the OS to do all the work so that we don't have to emulate * some of the more unpleasant "tricks" of Linux. * - * When we're either handing-over OS-dest connects or when we're "no + * When we're either handing-over OS-dest connects or when we're "no * failing" connects we may return -2 (unhandled). In this case the * OS socket _has_ been connected & we therefore are handing-over to * a socket in the right state. */ int ci_udp_connect(citp_socket* ep, ci_fd_t fd, - const struct sockaddr* serv_addr, socklen_t addrlen ) + const struct sockaddr* serv_addr, socklen_t addrlen) { int rc; - ci_fd_t os_sock; + ci_fd_t os_sock; CHECK_UEP(ep); - LOG_UC(log("%s("SF_FMT", addrlen=%d)", __FUNCTION__, - SF_PRI_ARGS(ep,fd), addrlen)); + LOG_UC(log("%s(" SF_FMT ", addrlen=%d)", __FUNCTION__, SF_PRI_ARGS(ep, fd), + addrlen)); os_sock = ci_get_os_sock_fd(fd); - if( !CI_IS_VALID_SOCKET( os_sock ) ) { + if( ! CI_IS_VALID_SOCKET(os_sock) ) { LOG_U(ci_log("%s: no backing socket", __FUNCTION__)); return -1; } @@ -624,7 +623,7 @@ int ci_udp_connect(citp_socket* ep, ci_fd_t fd, return -1; } - rc = ci_udp_connect_conclude( ep, fd, serv_addr, addrlen, os_sock); + rc = ci_udp_connect_conclude(ep, fd, serv_addr, addrlen, os_sock); ci_rel_os_sock_fd(os_sock); return rc; } @@ -634,29 +633,29 @@ int __ci_udp_shutdown(ci_netif* netif, ci_udp_state* us, int how) { ci_assert(netif); ci_assert(us); - + if( CI_IPX_ADDR_IS_ANY(udp_ipx_raddr(us)) ) return -ENOTCONN; /* Maybe ESHUTDOWN is suitable, but Linux returns EPIPE */ switch( how ) { - case SHUT_RD: - us->s.rx_errno |= CI_SHUT_RD; - break; - case SHUT_WR: - us->s.rx_errno |= CI_SHUT_WR; - us->s.tx_errno = EPIPE; - break; - case SHUT_RDWR: - us->s.rx_errno |= (CI_SHUT_RD | CI_SHUT_WR); - us->s.tx_errno = EPIPE; - ci_assert(UDP_IS_SHUT_RDWR(us)); - break; - default: - ci_fail(("'how' parameter of shutdown() must be verified earlier")); - return -EINVAL; + case SHUT_RD: + us->s.rx_errno |= CI_SHUT_RD; + break; + case SHUT_WR: + us->s.rx_errno |= CI_SHUT_WR; + us->s.tx_errno = EPIPE; + break; + case SHUT_RDWR: + us->s.rx_errno |= (CI_SHUT_RD | CI_SHUT_WR); + us->s.tx_errno = EPIPE; + ci_assert(UDP_IS_SHUT_RDWR(us)); + break; + default: + ci_fail(("'how' parameter of shutdown() must be verified earlier")); + return -EINVAL; } /* shutdown() must not disconnect */ - return 0; + return 0; } #endif /* !__ci_driver__ */ @@ -666,23 +665,23 @@ int __ci_udp_shutdown(ci_netif* netif, ci_udp_state* us, int how) int ci_udp_shutdown(citp_socket* ep, ci_fd_t fd, int how) { - ci_fd_t os_sock; + ci_fd_t os_sock; int rc; CHECK_UEP(ep); - LOG_UV(log(LPF "shutdown("SF_FMT", %d)", SF_PRI_ARGS(ep,fd), how)); + LOG_UV(log(LPF "shutdown(" SF_FMT ", %d)", SF_PRI_ARGS(ep, fd), how)); os_sock = ci_get_os_sock_fd(fd); - if( CI_IS_VALID_SOCKET( os_sock ) ) { + if( CI_IS_VALID_SOCKET(os_sock) ) { rc = ci_sys_shutdown(os_sock, how); - ci_rel_os_sock_fd( os_sock ); + ci_rel_os_sock_fd(os_sock); if( rc < 0 ) return CI_SOCKET_ERROR; } rc = __ci_udp_shutdown(ep->netif, SOCK_TO_UDP(ep->s), how); - + if( rc < 0 ) { CI_SET_ERROR(rc, -rc); return rc; @@ -692,14 +691,15 @@ int ci_udp_shutdown(citp_socket* ep, ci_fd_t fd, int how) /*! \todo we can simplify this a lot by letting the kernel have it! */ -int ci_udp_getpeername(citp_socket*ep, struct sockaddr* name, socklen_t* namelen) +int ci_udp_getpeername( + citp_socket* ep, struct sockaddr* name, socklen_t* namelen) { ci_udp_state* us; int af; ci_addr_t addr; - + CHECK_UEP(ep); - + us = SOCK_TO_UDP(ep->s); af = ipcache_af(&us->s.pkt); addr = sock_ipx_raddr(&us->s); @@ -714,9 +714,8 @@ int ci_udp_getpeername(citp_socket*ep, struct sockaddr* name, socklen_t* namelen } else if( name == NULL || namelen == NULL ) { RET_WITH_ERRNO(EFAULT); } else { - ci_addr_to_user(name, namelen, af, ep->s->domain, - udp_rport_be16(us), CI_IPX_ADDR_PTR(af, addr), - us->s.cp.so_bindtodevice); + ci_addr_to_user(name, namelen, af, ep->s->domain, udp_rport_be16(us), + CI_IPX_ADDR_PTR(af, addr), us->s.cp.so_bindtodevice); return 0; } } @@ -747,16 +746,15 @@ void ci_udp_all_fds_gone(ci_netif* netif, oo_sp sock_id, int do_free) ci_assert(ci_netif_is_locked(netif)); ci_assert(us->s.b.state == CI_TCP_STATE_UDP); - LOG_UC(ci_log("ci_udp_all_fds_gone: "NTS_FMT, - NTS_PRI_ARGS(netif, us))); + LOG_UC(ci_log("ci_udp_all_fds_gone: " NTS_FMT, NTS_PRI_ARGS(netif, us))); if( UDP_GET_FLAG(us, CI_UDPF_FILTERED) ) { UDP_CLR_FLAG(us, CI_UDPF_FILTERED); ci_tcp_ep_clear_filters(netif, S_SP(us), 0); } #ifdef __KERNEL__ - ci_assert_equal(ci_netif_get_valid_ep(netif, sock_id)-> - oofilter.sf_local_port, NULL); + ci_assert_equal( + ci_netif_get_valid_ep(netif, sock_id)->oofilter.sf_local_port, NULL); #endif ci_udp_recv_q_drop(netif, &us->recv_q); oo_p_dllink_del(netif, oo_p_dllink_sb(netif, &us->s.b, &us->s.reap_link)); diff --git a/src/lib/transport/ip/udp_recv.c b/src/lib/transport/ip/udp_recv.c index 752f5a979..2b811af95 100644 --- a/src/lib/transport/ip/udp_recv.c +++ b/src/lib/transport/ip/udp_recv.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr ** \brief UDP recvmsg() etc. @@ -10,18 +10,18 @@ ** *//* \**************************************************************************/ - + /*! \cidoxg_lib_transport_ip */ -#define _GNU_SOURCE /* for recvmmsg */ +#define _GNU_SOURCE /* for recvmmsg */ #include "ip_internal.h" #include #ifndef __KERNEL__ -# include +#include #endif -#if !defined(__KERNEL__) +#if ! defined(__KERNEL__) #include #include #endif @@ -29,13 +29,13 @@ #if OO_DO_STACK_POLL #define VERB(x) -#define LPF "ci_udp_" -#define LPFIN LPF -#define LPFOUT LPF +#define LPF "ci_udp_" +#define LPFIN LPF +#define LPFOUT LPF /* Special return codes from ci_udp_recvmsg_socklocked_slowpath() */ -#define SLOWPATH_RET_IOVLEN_INITED (1<<30) -#define SLOWPATH_RET_ZERO (SLOWPATH_RET_IOVLEN_INITED + 1) +#define SLOWPATH_RET_IOVLEN_INITED (1 << 30) +#define SLOWPATH_RET_ZERO (SLOWPATH_RET_IOVLEN_INITED + 1) /* Implementation: ** MSG_PEEK supported @@ -54,23 +54,23 @@ ** ** On Linux, MSG_OOB is ignored. */ -#define MSG_OOB_CHK 0 +#define MSG_OOB_CHK 0 #ifdef MSG_ERRQUEUE -# define MSG_ERRQUEUE_CHK MSG_ERRQUEUE +#define MSG_ERRQUEUE_CHK MSG_ERRQUEUE #else -# define MSG_ERRQUEUE_CHK 0 +#define MSG_ERRQUEUE_CHK 0 #endif #ifndef __KERNEL__ -# define HAVE_MSG_FLAGS 1 +#define HAVE_MSG_FLAGS 1 #else -# define HAVE_MSG_FLAGS 0 +#define HAVE_MSG_FLAGS 0 #endif -#define LOCAL_MSG_TRUNC MSG_TRUNC +#define LOCAL_MSG_TRUNC MSG_TRUNC typedef struct { - ci_udp_iomsg_args *a; + ci_udp_iomsg_args* a; ci_msghdr* msg; int sock_locked; int flags; @@ -80,9 +80,8 @@ typedef struct { } ci_udp_recv_info; -ci_inline void ci_udp_recvmsg_fill_msghdr(ci_netif* ni, ci_msghdr* msg, - const ci_ip_pkt_fmt* pkt, - ci_sock_cmn* s) +ci_inline void ci_udp_recvmsg_fill_msghdr( + ci_netif* ni, ci_msghdr* msg, const ci_ip_pkt_fmt* pkt, ci_sock_cmn* s) { #ifndef __KERNEL__ if( msg != NULL ) { @@ -94,8 +93,8 @@ ci_inline void ci_udp_recvmsg_fill_msghdr(ci_netif* ni, ci_msghdr* msg, if( pkt->flags & CI_PKT_FLAG_INDIRECT ) pkt = PKT_CHK_NNL(ni, pkt->frag_next); af = oo_pkt_af(pkt); - udp = oo_ipx_data(af, (ci_ip_pkt_fmt*)pkt); - saddr = RX_PKT_SADDR((ci_ip_pkt_fmt*)pkt); + udp = oo_ipx_data(af, (ci_ip_pkt_fmt*) pkt); + saddr = RX_PKT_SADDR((ci_ip_pkt_fmt*) pkt); #if CI_CFG_IPV6 if( CI_IPX_IS_LINKLOCAL(saddr) ) @@ -103,8 +102,8 @@ ci_inline void ci_udp_recvmsg_fill_msghdr(ci_netif* ni, ci_msghdr* msg, #endif ci_addr_to_user(CI_SA(msg->msg_name), &msg->msg_namelen, af, s->domain, - udp->udp_source_be16, CI_IPX_ADDR_PTR(af, saddr), - s->cp.so_bindtodevice); + udp->udp_source_be16, CI_IPX_ADDR_PTR(af, saddr), + s->cp.so_bindtodevice); } } #endif @@ -127,28 +126,26 @@ struct oo_copy_state { int pkt_off; int bytes_copied; int bytes_to_copy; - const char *from; + const char* from; const ci_ip_pkt_fmt* pkt; }; -ci_inline int -__oo_copy_frag_to_iovec_no_adv(ci_netif* ni, - ci_iovec_ptr* piov, - struct oo_copy_state *ocs) +ci_inline int __oo_copy_frag_to_iovec_no_adv( + ci_netif* ni, ci_iovec_ptr* piov, struct oo_copy_state* ocs) { int n; - n = CI_MIN((size_t)ocs->pkt_left, CI_IOVEC_LEN(&piov->io)); + n = CI_MIN((size_t) ocs->pkt_left, CI_IOVEC_LEN(&piov->io)); n = CI_MIN(n, ocs->bytes_to_copy); - if(CI_UNLIKELY( do_copy(CI_IOVEC_BASE(&piov->io), - ocs->from + ocs->pkt_off, n) != 0 )) + if( CI_UNLIKELY(do_copy(CI_IOVEC_BASE(&piov->io), ocs->from + ocs->pkt_off, + n) != 0) ) return -EFAULT; - + ocs->bytes_copied += n; ocs->pkt_off += n; if( n == ocs->bytes_to_copy ) return 0; - + ocs->bytes_to_copy -= n; if( n == ocs->pkt_left ) { /* Caller guarantees that packet contains at least [bytes_to_copy]. */ @@ -161,7 +158,7 @@ __oo_copy_frag_to_iovec_no_adv(ci_netif* ni, */ return 1; } - + ci_assert_equal(n, CI_IOVEC_LEN(&piov->io)); if( piov->iovlen == 0 ) return 0; @@ -172,9 +169,8 @@ __oo_copy_frag_to_iovec_no_adv(ci_netif* ni, } -static int -oo_copy_pkt_to_iovec_no_adv(ci_netif* ni, const ci_ip_pkt_fmt* pkt, - ci_iovec_ptr* piov, int bytes_to_copy) +static int oo_copy_pkt_to_iovec_no_adv(ci_netif* ni, const ci_ip_pkt_fmt* pkt, + ci_iovec_ptr* piov, int bytes_to_copy) { /* Copy data from [pkt] to [piov], following [pkt->frag_next] as * necessary. Does not modify [pkt]. May or may not advance [piov]. @@ -209,9 +205,8 @@ oo_copy_pkt_to_iovec_no_adv(ci_netif* ni, const ci_ip_pkt_fmt* pkt, #ifndef __KERNEL__ #if CI_CFG_TIMESTAMPING /* Very similar to oo_copy_pkt_to_iovec_no_adv() but doesn't use pkt->buf */ -static int -ci_udp_timestamp_q_pkt_to_iovec(ci_netif* ni, const ci_ip_pkt_fmt* pkt, - ci_iovec_ptr* piov) +static int ci_udp_timestamp_q_pkt_to_iovec( + ci_netif* ni, const ci_ip_pkt_fmt* pkt, ci_iovec_ptr* piov) { int rc; struct oo_copy_state ocs; @@ -219,7 +214,7 @@ ci_udp_timestamp_q_pkt_to_iovec(ci_netif* ni, const ci_ip_pkt_fmt* pkt, /* We have to copy all chunks of jumbo frame, so pkt->buf_len is wrong * here. */ ocs.bytes_to_copy = CI_BSWAP_BE16(oo_ip_hdr_const(pkt)->ip_tot_len_be16) + - oo_tx_pre_l3_len(pkt); + oo_tx_pre_l3_len(pkt); ocs.pkt_off = 0; ocs.pkt = pkt; while( 1 ) { @@ -227,7 +222,7 @@ ci_udp_timestamp_q_pkt_to_iovec(ci_netif* ni, const ci_ip_pkt_fmt* pkt, * need different offsets to include the delivery of the headers */ ocs.pkt_left = ocs.pkt->buf_len - ocs.pkt_off; - ocs.from = (char *)oo_ether_hdr_const(ocs.pkt); + ocs.from = (char*) oo_ether_hdr_const(ocs.pkt); rc = __oo_copy_frag_to_iovec_no_adv(ni, piov, &ocs); if( rc == 0 ) return ocs.bytes_copied; @@ -246,13 +241,13 @@ ci_udp_timestamp_q_pkt_to_iovec(ci_netif* ni, const ci_ip_pkt_fmt* pkt, #ifndef __KERNEL__ /* Max number of iovecs needed: * = max_datagram / (min_mtu - udp_header) - * = 65536 / (576 - 28) + * = 65536 / (576 - 28) * = 120 */ #define CI_UDP_ZC_IOVEC_MAX 120 -static void ci_udp_pkt_to_zc_msg(ci_netif* ni, ci_ip_pkt_fmt* pkt, - struct onload_zc_msg* zc_msg) +static void ci_udp_pkt_to_zc_msg( + ci_netif* ni, ci_ip_pkt_fmt* pkt, struct onload_zc_msg* zc_msg) { int i, bytes_left = pkt->pf.udp.pay_len; ci_ip_pkt_fmt* frag; @@ -271,16 +266,14 @@ static void ci_udp_pkt_to_zc_msg(ci_netif* ni, ci_ip_pkt_fmt* pkt, handle_frag->user_refcount = CI_ZC_USER_REFCOUNT_ONE; do { - zc_msg->iov[i].iov_len = CI_MIN(oo_offbuf_left(&frag->buf), - bytes_left); + zc_msg->iov[i].iov_len = CI_MIN(oo_offbuf_left(&frag->buf), bytes_left); zc_msg->iov[i].iov_base = oo_offbuf_ptr(&frag->buf); zc_msg->iov[i].buf = zc_pktbuf_to_handle(handle_frag); zc_msg->iov[i].iov_flags = 0; zc_msg->iov[i].addr_space = EF_ADDRSPACE_LOCAL; bytes_left -= zc_msg->iov[i].iov_len; ++i; - if( OO_PP_IS_NULL(frag->frag_next) || - (i == CI_UDP_ZC_IOVEC_MAX) || + if( OO_PP_IS_NULL(frag->frag_next) || (i == CI_UDP_ZC_IOVEC_MAX) || (bytes_left == 0) ) break; frag = PKT_CHK_NNL(ni, frag->frag_next); @@ -289,9 +282,9 @@ static void ci_udp_pkt_to_zc_msg(ci_netif* ni, ci_ip_pkt_fmt* pkt, zc_msg->msghdr.msg_iovlen = i; } -# if CI_CFG_ZC_RECV_FILTER -static void ci_udp_filter_kernel_pkt(ci_netif* ni, ci_udp_state* us, - struct msghdr* msg, int *bytes) +#if CI_CFG_ZC_RECV_FILTER +static void ci_udp_filter_kernel_pkt( + ci_netif* ni, ci_udp_state* us, struct msghdr* msg, int* bytes) { enum onload_zc_callback_rc rc; struct onload_zc_msg zc_msg; @@ -301,7 +294,7 @@ static void ci_udp_filter_kernel_pkt(ci_netif* ni, ci_udp_state* us, if( msg->msg_iovlen > CI_UDP_ZC_IOVEC_MAX ) { LOG_U(log("%s: too many fragments (%d), passing packet unfiltered", - __FUNCTION__, (int)msg->msg_iovlen)); + __FUNCTION__, (int) msg->msg_iovlen)); return; } @@ -313,22 +306,23 @@ static void ci_udp_filter_kernel_pkt(ci_netif* ni, ci_udp_state* us, do { zc_msg.iov[i].iov_base = msg->msg_iov[i].iov_base; - zc_msg.iov[i].iov_len = msg->msg_iov[i].iov_len > bytes_remaining ? - bytes_remaining : msg->msg_iov[i].iov_len; + zc_msg.iov[i].iov_len = msg->msg_iov[i].iov_len > bytes_remaining + ? bytes_remaining + : msg->msg_iov[i].iov_len; zc_msg.iov[i].buf = ONLOAD_ZC_HANDLE_NONZC; zc_msg.iov[i].iov_flags = 0; bytes_remaining -= zc_msg.iov[i].iov_len; - } while(++i < msg->msg_iovlen && bytes_remaining); + } while( ++i < msg->msg_iovlen && bytes_remaining ); zc_msg.msghdr.msg_iovlen = i; - rc = (*(onload_zc_recv_filter_callback)((ci_uintptr_t)us->recv_q_filter)) - (&zc_msg, (void *)((ci_uintptr_t)us->recv_q_filter_arg), cb_flags); + rc = (*(onload_zc_recv_filter_callback) ((ci_uintptr_t) us->recv_q_filter))( + &zc_msg, (void*) ((ci_uintptr_t) us->recv_q_filter_arg), cb_flags); ci_assert_equal(rc, ONLOAD_ZC_CONTINUE); - (void)rc; + (void) rc; } -# endif +#endif #endif /* __KERNEL__ */ @@ -347,7 +341,7 @@ static int ci_udp_recvmsg_get(ci_udp_recv_info* rinf, ci_iovec_ptr* piov) #ifndef __KERNEL__ if( msg != NULL ) { - if( CI_UNLIKELY(us->s.cmsg_flags != 0 ) ) + if( CI_UNLIKELY(us->s.cmsg_flags != 0) ) ci_ip_cmsg_recv(ni, us, pkt, msg, 0, &rinf->msg_flags); else msg->msg_controllen = 0; @@ -358,15 +352,15 @@ static int ci_udp_recvmsg_get(ci_udp_recv_info* rinf, ci_iovec_ptr* piov) rc = oo_copy_pkt_to_iovec_no_adv(ni, pkt, piov, pkt->pf.udp.pay_len); - if(CI_LIKELY( rc >= 0 )) { + if( CI_LIKELY(rc >= 0) ) { #if HAVE_MSG_FLAGS - if(CI_UNLIKELY( rc < pkt->pf.udp.pay_len && msg != NULL )) + if( CI_UNLIKELY(rc < pkt->pf.udp.pay_len && msg != NULL) ) rinf->msg_flags |= LOCAL_MSG_TRUNC; #endif ci_udp_recvmsg_fill_msghdr(ni, msg, pkt, &us->s); if( ! (rinf->flags & MSG_PEEK) ) { #ifndef __KERNEL__ -# if CI_CFG_ZC_RECV_FILTER +#if CI_CFG_ZC_RECV_FILTER if( us->recv_q_filter ) { struct onload_zc_msg zc_msg; struct onload_zc_iovec zc_iovec[CI_UDP_ZC_IOVEC_MAX]; @@ -379,17 +373,18 @@ static int ci_udp_recvmsg_get(ci_udp_recv_info* rinf, ci_iovec_ptr* piov) ci_udp_pkt_to_zc_msg(ni, pkt, &zc_msg); - cb_flags = CI_IP_IS_MULTICAST(oo_ip_hdr(pkt)->ip_daddr_be32) ? - ONLOAD_ZC_MSG_SHARED : 0; - filterrc = - (*(onload_zc_recv_filter_callback)((ci_uintptr_t)us->recv_q_filter)) - (&zc_msg, (void *)((ci_uintptr_t)us->recv_q_filter_arg), cb_flags); + cb_flags = CI_IP_IS_MULTICAST(oo_ip_hdr(pkt)->ip_daddr_be32) + ? ONLOAD_ZC_MSG_SHARED + : 0; + filterrc = (*(onload_zc_recv_filter_callback) (( + ci_uintptr_t) us->recv_q_filter))( + &zc_msg, (void*) ((ci_uintptr_t) us->recv_q_filter_arg), cb_flags); ci_assert_equal(filterrc, ONLOAD_ZC_CONTINUE); - (void)filterrc; + (void) filterrc; pkt->pio_addr = -1; } -# endif +#endif #endif ci_udp_recv_q_deliver(ni, &us->recv_q, pkt); @@ -399,15 +394,15 @@ static int ci_udp_recvmsg_get(ci_udp_recv_info* rinf, ci_iovec_ptr* piov) return rc; - recv_q_is_empty: +recv_q_is_empty: return -EAGAIN; } #ifndef __KERNEL__ -static int __ci_udp_recvmsg_try_os(ci_netif *ni, ci_udp_state *us, - struct msghdr* msg, int flags, int* prc) +static int __ci_udp_recvmsg_try_os( + ci_netif* ni, ci_udp_state* us, struct msghdr* msg, int flags, int* prc) { int rc; @@ -417,11 +412,10 @@ static int __ci_udp_recvmsg_try_os(ci_netif *ni, ci_udp_state *us, ++us->stats.n_rx_os; us->udpflags &= ~CI_UDPF_LAST_RECV_ON; if( ! (flags & MSG_PEEK) ) - us->udpflags &=~ CI_UDPF_PEEK_FROM_OS; + us->udpflags &= ~CI_UDPF_PEEK_FROM_OS; else - us->udpflags |= CI_UDPF_PEEK_FROM_OS; - } - else { + us->udpflags |= CI_UDPF_PEEK_FROM_OS; + } else { if( rc == -EAGAIN ) return 0; CI_SET_ERROR(rc, -rc); @@ -432,14 +426,14 @@ static int __ci_udp_recvmsg_try_os(ci_netif *ni, ci_udp_state *us, return 1; } -#else /* __KERNEL__ */ +#else /* __KERNEL__ */ -static int __ci_udp_recvmsg_try_os(ci_netif *ni, ci_udp_state *us, - ci_msghdr* msg, int flags, int* prc) +static int __ci_udp_recvmsg_try_os( + ci_netif* ni, ci_udp_state* us, ci_msghdr* msg, int flags, int* prc) { int rc, total_bytes, i; - tcp_helper_endpoint_t *ep = ci_netif_ep_get(ni, us->s.b.bufid); - struct socket *sock; + tcp_helper_endpoint_t* ep = ci_netif_ep_get(ni, us->s.b.bufid); + struct socket* sock; oo_os_file os_sock; struct msghdr kmsg; @@ -471,8 +465,7 @@ static int __ci_udp_recvmsg_try_os(ci_netif *ni, ci_udp_state *us, if( rc >= 0 ) { ++us->stats.n_rx_os; - } - else { + } else { if( rc == -EAGAIN ) return 0; ++us->stats.n_rx_os_error; @@ -481,22 +474,22 @@ static int __ci_udp_recvmsg_try_os(ci_netif *ni, ci_udp_state *us, if( rc >= 0 ) { us->udpflags &= ~CI_UDPF_LAST_RECV_ON; if( ! (flags & MSG_PEEK) ) - us->udpflags &=~ CI_UDPF_PEEK_FROM_OS; + us->udpflags &= ~CI_UDPF_PEEK_FROM_OS; else - us->udpflags |= CI_UDPF_PEEK_FROM_OS; + us->udpflags |= CI_UDPF_PEEK_FROM_OS; } *prc = rc; return 1; } -#endif /* __KERNEL__ */ +#endif /* __KERNEL__ */ -static int ci_udp_recvmsg_try_os(ci_udp_recv_info *rinf, int* prc) +static int ci_udp_recvmsg_try_os(ci_udp_recv_info* rinf, int* prc) { - ci_udp_state *us = rinf->a->us; + ci_udp_state* us = rinf->a->us; int rc; - if( !(us->s.os_sock_status & OO_OS_STATUS_RX) ) + if( ! (us->s.os_sock_status & OO_OS_STATUS_RX) ) return 0; rc = __ci_udp_recvmsg_try_os(rinf->a->ni, us, rinf->msg, rinf->flags, prc); #if HAVE_MSG_FLAGS @@ -508,28 +501,28 @@ static int ci_udp_recvmsg_try_os(ci_udp_recv_info *rinf, int* prc) #endif #ifndef __KERNEL__ -# if CI_CFG_ZC_RECV_FILTER - if( us->recv_q_filter && rc == 1 && *prc >= 0) +#if CI_CFG_ZC_RECV_FILTER + if( us->recv_q_filter && rc == 1 && *prc >= 0 ) ci_udp_filter_kernel_pkt(rinf->a->ni, us, rinf->msg, prc); -# endif +#endif #endif return rc; } -static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_recv_info* rinf, - ci_iovec_ptr *piov) +static int ci_udp_recvmsg_socklocked_slowpath( + ci_udp_recv_info* rinf, ci_iovec_ptr* piov) { int rc = 0; ci_netif* ni = rinf->a->ni; ci_udp_state* us = rinf->a->us; - if(CI_UNLIKELY( ni->state->rxq_low )) - ci_netif_rxq_low_on_recv(ni, &us->s, - 1 /* assume at least one pkt freed */); - /* In the kernel recv() with flags is not called. - * only read(). So flags may only contain MSG_DONTWAIT */ + if( CI_UNLIKELY(ni->state->rxq_low) ) + ci_netif_rxq_low_on_recv( + ni, &us->s, 1 /* assume at least one pkt freed */); + /* In the kernel recv() with flags is not called. + * only read(). So flags may only contain MSG_DONTWAIT */ #ifdef __KERNEL__ ci_assert_equal(rinf->flags, 0); #endif @@ -549,13 +542,13 @@ static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_recv_info* rinf, struct { struct oo_sock_extended_err ee; union { - struct sockaddr_in offender; + struct sockaddr_in offender; #if CI_CFG_IPV6 - struct sockaddr_in6 offender6; + struct sockaddr_in6 offender6; #endif }; } __attribute__((packed, aligned(sizeof(ci_uint32)))) errhdr; - int do_data = ( rinf->msg->msg_iovlen > 0 ); + int do_data = (rinf->msg->msg_iovlen > 0); cmsg_state.msg = rinf->msg; cmsg_state.cm = rinf->msg->msg_control; @@ -565,12 +558,11 @@ static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_recv_info* rinf, ci_iovec_ptr_init_nz(piov, rinf->msg->msg_iov, rinf->msg->msg_iovlen); if( us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_ONLOAD ) { - struct onload_timestamp ts = {pkt->hw_stamp.tv_sec, - pkt->hw_stamp.tv_nsec}; - ci_put_cmsg(&cmsg_state, SOL_SOCKET, ONLOAD_SCM_TIMESTAMPING, - sizeof(ts), &ts); - } - else { + struct onload_timestamp ts = { pkt->hw_stamp.tv_sec, + pkt->hw_stamp.tv_nsec }; + ci_put_cmsg( + &cmsg_state, SOL_SOCKET, ONLOAD_SCM_TIMESTAMPING, sizeof(ts), &ts); + } else { struct timespec ts[3]; memset(ts, 0, sizeof(ts)); @@ -578,24 +570,23 @@ static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_recv_info* rinf, ts[2].tv_sec = pkt->hw_stamp.tv_sec; ts[2].tv_nsec = pkt->hw_stamp.tv_nsec; } - if( (us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_SYS_HARDWARE) && + if( (us->s.timestamping_flags & + ONLOAD_SOF_TIMESTAMPING_SYS_HARDWARE) && (pkt->hw_stamp.tv_nsec & CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC) ) { ts[1].tv_sec = pkt->hw_stamp.tv_sec; ts[1].tv_nsec = pkt->hw_stamp.tv_nsec; } - ci_put_cmsg(&cmsg_state, SOL_SOCKET, ONLOAD_SCM_TIMESTAMPING, - sizeof(ts), &ts); + ci_put_cmsg( + &cmsg_state, SOL_SOCKET, ONLOAD_SCM_TIMESTAMPING, sizeof(ts), &ts); } if( us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_OPT_TSONLY ) { rc = SLOWPATH_RET_ZERO; - } - else if( do_data ) { + } else if( do_data ) { rc = ci_udp_timestamp_q_pkt_to_iovec(ni, pkt, piov); if( rc < pkt->buf_len ) rinf->msg_flags |= LOCAL_MSG_TRUNC; - } - else { + } else { rinf->msg_flags |= LOCAL_MSG_TRUNC; rc = SLOWPATH_RET_ZERO; } @@ -609,8 +600,8 @@ static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_recv_info* rinf, ci_addr_t saddr = ipx_hdr_saddr(oo_pkt_af(pkt), oo_ipx_hdr(pkt)); #if CI_CFG_IPV6 if( IS_AF_INET6(us->s.domain) ) - ci_make_sockaddr_in6_from_ip6(&errhdr.offender6, 0, - (ci_uint32*)saddr.ip6); + ci_make_sockaddr_in6_from_ip6( + &errhdr.offender6, 0, (ci_uint32*) saddr.ip6); else #endif ci_make_sockaddr_from_ip4(&errhdr.offender, 0, saddr.ip4); @@ -622,11 +613,11 @@ static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_recv_info* rinf, #if CI_CFG_IPV6 if( IS_AF_INET6(us->s.domain) ) ci_put_cmsg(&cmsg_state, SOL_IPV6, IPV6_RECVERR, - sizeof(errhdr.ee) + sizeof(errhdr.offender6), &errhdr); + sizeof(errhdr.ee) + sizeof(errhdr.offender6), &errhdr); else #endif ci_put_cmsg(&cmsg_state, SOL_IP, IP_RECVERR, - sizeof(errhdr.ee) + sizeof(errhdr.offender), &errhdr); + sizeof(errhdr.ee) + sizeof(errhdr.offender), &errhdr); ci_ip_cmsg_finish(&cmsg_state); rinf->msg_flags |= MSG_ERRQUEUE_CHK; @@ -637,8 +628,7 @@ static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_recv_info* rinf, rc = oo_os_sock_recvmsg(ni, SC_SP(&us->s), rinf->msg, rinf->flags); if( rc < 0 ) { RET_WITH_ERRNO(-rc); - } - else { + } else { rinf->msg_flags = rinf->msg->msg_flags; return rc == 0 ? SLOWPATH_RET_ZERO : rc; } @@ -654,8 +644,8 @@ static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_recv_info* rinf, return rc; } #endif -#if CI_CFG_POSIX_RECV - if( ! udp_lport_be16(us)) { +#if CI_CFG_POSIX_RECV + if( ! udp_lport_be16(us) ) { LOG_UV(log("%s: -1 (ENOTCONN)", __FUNCTION__)); CI_SET_ERROR(rc, ENOTCONN); return rc; @@ -686,9 +676,8 @@ struct recvmsg_spinstate { }; -static int -ci_udp_recvmsg_block(ci_udp_iomsg_args* a, ci_netif* ni, ci_udp_state* us, - int timeout) +static int ci_udp_recvmsg_block( + ci_udp_iomsg_args* a, ci_netif* ni, ci_udp_state* us, int timeout) { int rc; @@ -720,7 +709,7 @@ ci_udp_recvmsg_block(ci_udp_iomsg_args* a, ci_netif* ni, ci_udp_state* us, timeout == -1 ) { /* Blocking recv() should only be restarted if there is no timeout. */ goto continue_to_block; - } else + } else rc = -errno; return rc; @@ -740,11 +729,9 @@ ci_udp_recvmsg_block(ci_udp_iomsg_args* a, ci_netif* ni, ci_udp_state* us, if( rc == 0 ) { if( mask ) { return 0; - } - else + } else rc = -EAGAIN; - } - else if( rc == -ERESTARTSYS && us->s.so.rcvtimeo_msec ) + } else if( rc == -ERESTARTSYS && us->s.so.rcvtimeo_msec ) rc = -EINTR; } return rc; @@ -752,9 +739,8 @@ ci_udp_recvmsg_block(ci_udp_iomsg_args* a, ci_netif* ni, ci_udp_state* us, } -ci_inline int -ci_udp_recvmsg_socklocked_spin(ci_netif* ni, ci_udp_state* us, - struct recvmsg_spinstate* spin_state) +ci_inline int ci_udp_recvmsg_socklocked_spin( + ci_netif* ni, ci_udp_state* us, struct recvmsg_spinstate* spin_state) { ci_uint64 now_frc; @@ -766,8 +752,8 @@ ci_udp_recvmsg_socklocked_spin(ci_netif* ni, ci_udp_state* us, if( ci_netif_may_poll(ni) ) { #ifndef __KERNEL__ if( spin_state->future == &spin_state->poison ) - spin_state->future = ci_netif_intf_rx_future(ni, us->future_intf_i, - &spin_state->poison); + spin_state->future = ci_netif_intf_rx_future( + ni, us->future_intf_i, &spin_state->poison); if( *spin_state->future != CI_PKT_RX_POISON && ci_netif_trylock(ni) ) { if( ! ci_netif_poll_intf_future(ni, us->future_intf_i, now_frc) ) { @@ -793,12 +779,10 @@ ci_udp_recvmsg_socklocked_spin(ci_netif* ni, ci_udp_state* us, if( ! ni->state->is_spinner ) ni->state->is_spinner = 1; } - return OO_SPINLOOP_PAUSE_CHECK_SIGNALS(ni, now_frc, - &spin_state->schedule_frc, - us->s.so.rcvtimeo_msec, - &us->s.b, spin_state->si); - } - else { + return OO_SPINLOOP_PAUSE_CHECK_SIGNALS(ni, now_frc, + &spin_state->schedule_frc, us->s.so.rcvtimeo_msec, &us->s.b, + spin_state->si); + } else { if( spin_state->spin_limit_by_so ) { ++us->stats.n_rx_eagain; return -EAGAIN; @@ -821,15 +805,13 @@ ci_udp_recvmsg_socklocked_spin(ci_netif* ni, ci_udp_state* us, } -static int -ci_udp_recvmsg_common(ci_udp_recv_info *rinf) +static int ci_udp_recvmsg_common(ci_udp_recv_info* rinf) { ci_netif* ni = rinf->a->ni; ci_udp_state* us = rinf->a->us; - int have_polled = 0; - ci_iovec_ptr piov = {NULL,0, {NULL, 0}}; + ci_iovec_ptr piov = { NULL, 0, { NULL, 0 } }; int rc = 0, slow; - struct recvmsg_spinstate spin_state = {0}; + struct recvmsg_spinstate spin_state = { 0 }; #ifndef __KERNEL__ spin_state.do_spin = -1; @@ -838,9 +820,9 @@ ci_udp_recvmsg_common(ci_udp_recv_info *rinf) spin_state.timeout = us->s.so.rcvtimeo_msec; /* Grab the per-socket lock so we can access the receive queue. */ - if( !rinf->sock_locked ) { + if( ! rinf->sock_locked ) { rc = ci_sock_lock(ni, &us->s.b); - if(CI_UNLIKELY( rc != 0 )) { + if( CI_UNLIKELY(rc != 0) ) { CI_SET_ERROR(rc, -rc); return rc; } @@ -852,52 +834,48 @@ ci_udp_recvmsg_common(ci_udp_recv_info *rinf) #endif slow = ((rinf->flags & (MSG_OOB_CHK | MSG_ERRQUEUE_CHK)) | - (rinf->msg->msg_iovlen == 0 ) | - (rinf->msg->msg_iov == NULL ) | - (ni->state->rxq_low ) | -#if CI_CFG_POSIX_RECV - (udp_lport_be16(us) == 0 ) | + (rinf->msg->msg_iovlen == 0) | (rinf->msg->msg_iov == NULL) | + (ni->state->rxq_low) | +#if CI_CFG_POSIX_RECV + (udp_lport_be16(us) == 0) | #endif - (us->s.so_error )); + (us->s.so_error)); if( slow ) goto slow_path; - back_to_fast_path: +back_to_fast_path: ci_iovec_ptr_init_nz(&piov, rinf->msg->msg_iov, rinf->msg->msg_iovlen); - - piov_inited: - if(CI_UNLIKELY( us->udpflags & CI_UDPF_PEEK_FROM_OS )) + +piov_inited: + if( CI_UNLIKELY(us->udpflags & CI_UDPF_PEEK_FROM_OS) ) goto peek_from_os; - check_ul_recv_q: +check_ul_recv_q: rc = ci_udp_recvmsg_get(rinf, &piov); if( rc >= 0 ) goto out; /* User-level receive queue is empty. */ - if( ! have_polled ) { - have_polled = 1; - ci_frc64(&spin_state.start_frc); + ci_frc64(&spin_state.start_frc); - if( ci_netif_may_poll(ni) && - ci_netif_need_poll_spinning(ni, spin_state.start_frc) && - ci_netif_trylock(ni) ) { - int any_evs = ci_netif_poll(ni); - if( ci_udp_recv_q_is_empty(&us->recv_q) && any_evs ) - ci_netif_poll(ni); - ci_netif_unlock(ni); - if( ci_udp_recv_q_not_empty(&us->recv_q) ) - goto check_ul_recv_q; - } + if( ci_netif_may_poll(ni) && + ci_netif_need_poll_spinning(ni, spin_state.start_frc) && + ci_netif_trylock(ni) ) { + int any_evs = ci_netif_poll(ni); + if( ci_udp_recv_q_is_empty(&us->recv_q) && any_evs ) + ci_netif_poll(ni); + ci_netif_unlock(ni); + if( ci_udp_recv_q_not_empty(&us->recv_q) ) + goto check_ul_recv_q; } - if(CI_UNLIKELY( (rc = UDP_RX_ERRNO(us)) )) { + if( CI_UNLIKELY((rc = UDP_RX_ERRNO(us))) ) { CI_SET_ERROR(rc, rc); us->s.rx_errno = us->s.rx_errno & 0xf0000000; goto out; } - if(CI_UNLIKELY( us->s.so_error )) { + if( CI_UNLIKELY(us->s.so_error) ) { int rc1 = ci_get_so_error(&us->s); if( rc1 != 0 ) { CI_SET_ERROR(rc, rc1); @@ -909,13 +887,12 @@ ci_udp_recvmsg_common(ci_udp_recv_info *rinf) if( ci_udp_recvmsg_try_os(rinf, &rc) ) goto out; - if( ((rinf->flags | us->s.b.sb_aflags) & MSG_DONTWAIT)) { + if( ((rinf->flags | us->s.b.sb_aflags) & MSG_DONTWAIT) ) { /* UDP returns EAGAIN when non-blocking even when shutdown. */ CI_SET_ERROR(rc, EAGAIN); ++us->stats.n_rx_eagain; goto out; - } - else if (UDP_IS_SHUT_RD(us)) { + } else if( UDP_IS_SHUT_RD(us) ) { /* Blocking and shutdowned */ rc = 0; goto out; @@ -923,11 +900,11 @@ ci_udp_recvmsg_common(ci_udp_recv_info *rinf) /* We need to block (optionally spinning first). */ -#ifndef __KERNEL__ +#ifndef __KERNEL__ /* -1 is special value for uninitialised */ if( spin_state.do_spin == -1 ) { - spin_state.do_spin = - oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_UDP_RECV); + spin_state.do_spin = + oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_UDP_RECV); if( spin_state.do_spin ) { spin_state.poison = CI_PKT_RX_POISON; @@ -935,8 +912,8 @@ ci_udp_recvmsg_common(ci_udp_recv_info *rinf) spin_state.schedule_frc = spin_state.start_frc; spin_state.max_spin = us->s.b.spin_cycles; if( us->s.so.rcvtimeo_msec ) { - ci_uint64 max_so_spin = (ci_uint64)us->s.so.rcvtimeo_msec * - IPTIMER_STATE(ni)->khz; + ci_uint64 max_so_spin = + (ci_uint64) us->s.so.rcvtimeo_msec * IPTIMER_STATE(ni)->khz; if( max_so_spin <= spin_state.max_spin ) { spin_state.max_spin = max_so_spin; spin_state.spin_limit_by_so = 1; @@ -960,7 +937,7 @@ ci_udp_recvmsg_common(ci_udp_recv_info *rinf) rinf->sock_locked = 0; rc = ci_udp_recvmsg_block(rinf->a, ni, us, spin_state.timeout); if( rc == 0 ) { - if( !rinf->sock_locked ) + if( ! rinf->sock_locked ) rc = ci_sock_lock(ni, &us->s.b); } if( rc == 0 ) { @@ -969,32 +946,31 @@ ci_udp_recvmsg_common(ci_udp_recv_info *rinf) } CI_SET_ERROR(rc, -rc); - out: +out: ni->state->is_spinner = 0; return rc; - slow_path: +slow_path: rc = ci_udp_recvmsg_socklocked_slowpath(rinf, &piov); - if( rc == 0 ) + if( rc == 0 ) goto back_to_fast_path; else if( rc == SLOWPATH_RET_IOVLEN_INITED ) goto piov_inited; else if( rc == SLOWPATH_RET_ZERO ) { rc = 0; goto out; - } - else + } else goto out; - peek_from_os: +peek_from_os: if( ci_udp_recvmsg_try_os(rinf, &rc) ) goto out; - + goto check_ul_recv_q; } -int ci_udp_recvmsg(ci_udp_iomsg_args *a, ci_msghdr* msg, int flags) +int ci_udp_recvmsg(ci_udp_iomsg_args* a, ci_msghdr* msg, int flags) { ci_netif* ni = a->ni; ci_udp_state* us = a->us; @@ -1019,9 +995,8 @@ int ci_udp_recvmsg(ci_udp_iomsg_args *a, ci_msghdr* msg, int flags) #ifndef __KERNEL__ -int ci_udp_recvmmsg(ci_udp_iomsg_args *a, struct mmsghdr* mmsg, - unsigned int vlen, int flags, - const struct timespec* timeout) +int ci_udp_recvmmsg(ci_udp_iomsg_args* a, struct mmsghdr* mmsg, + unsigned int vlen, int flags, const struct timespec* timeout) { ci_netif* ni = a->ni; ci_udp_state* us = a->us; @@ -1048,8 +1023,7 @@ int ci_udp_recvmmsg(ci_udp_iomsg_args *a, struct mmsghdr* mmsg, #if HAVE_MSG_FLAGS mmsg[i].msg_hdr.msg_flags = rinf.msg_flags; #endif - } - else { + } else { if( i != 0 && errno != EAGAIN ) us->s.so_error = errno; if( rinf.sock_locked ) @@ -1060,7 +1034,7 @@ int ci_udp_recvmmsg(ci_udp_iomsg_args *a, struct mmsghdr* mmsg, return rc; } - if( ( rinf.flags & MSG_DONTWAIT ) && rc == 0 ) + if( (rinf.flags & MSG_DONTWAIT) && rc == 0 ) break; if( rinf.flags & MSG_WAITFORONE ) @@ -1084,7 +1058,7 @@ int ci_udp_recvmmsg(ci_udp_iomsg_args *a, struct mmsghdr* mmsg, if( rinf.sock_locked ) ci_sock_unlock(ni, &us->s.b); - + return i; } #endif @@ -1093,12 +1067,12 @@ int ci_udp_recvmmsg(ci_udp_iomsg_args *a, struct mmsghdr* mmsg, #ifndef __KERNEL__ static int ci_udp_zc_recv_from_os(ci_netif* ni, ci_udp_state* us, - struct onload_zc_recv_args* args, - enum onload_zc_callback_rc* cb_rc) + struct onload_zc_recv_args* args, enum onload_zc_callback_rc* cb_rc) { -#define ZC_BUFFERS_FOR_64K_DATAGRAM \ - ((0x10000 / (CI_CFG_PKT_BUF_SIZE - \ - CI_MEMBER_OFFSET(ci_ip_pkt_fmt, dma_start))) + 1) +#define ZC_BUFFERS_FOR_64K_DATAGRAM \ + ((0x10000 / \ + (CI_CFG_PKT_BUF_SIZE - CI_MEMBER_OFFSET(ci_ip_pkt_fmt, dma_start))) + \ + 1) int rc, i, cb_flags; struct msghdr msg; @@ -1109,7 +1083,7 @@ static int ci_udp_zc_recv_from_os(ci_netif* ni, ci_udp_state* us, ci_assert_le(us->zc_kernel_datagram_count, ZC_BUFFERS_FOR_64K_DATAGRAM); - if( us->zc_kernel_datagram_count < ZC_BUFFERS_FOR_64K_DATAGRAM) { + if( us->zc_kernel_datagram_count < ZC_BUFFERS_FOR_64K_DATAGRAM ) { if( us->zc_kernel_datagram_count == 0 ) ci_assert_equal(us->zc_kernel_datagram, OO_PP_NULL); @@ -1120,7 +1094,7 @@ static int ci_udp_zc_recv_from_os(ci_netif* ni, ci_udp_state* us, ci_netif_lock(ni); while( us->zc_kernel_datagram_count < ZC_BUFFERS_FOR_64K_DATAGRAM ) { pkt = ci_netif_pkt_alloc(ni, 0); - if( !pkt ) { + if( ! pkt ) { ci_netif_unlock(ni); return -ENOBUFS; } @@ -1141,8 +1115,8 @@ static int ci_udp_zc_recv_from_os(ci_netif* ni, ci_udp_state* us, #endif pkt = PKT_CHK_NNL(ni, pkt_p); iov[i].iov_base = pkt->dma_start; - iov[i].iov_len = (CI_CFG_PKT_BUF_SIZE - - ((char *)pkt->dma_start - (char*)pkt)); + iov[i].iov_len = + (CI_CFG_PKT_BUF_SIZE - ((char*) pkt->dma_start - (char*) pkt)); ++i; pkt_p = pkt->frag_next; } @@ -1156,17 +1130,17 @@ static int ci_udp_zc_recv_from_os(ci_netif* ni, ci_udp_state* us, msg.msg_flags = 0; ci_assert(us->s.os_sock_status & OO_OS_STATUS_RX); - i = __ci_udp_recvmsg_try_os(ni, us, &msg, - args->flags & ONLOAD_ZC_RECV_FLAGS_PTHRU_MASK, - &rc); + i = __ci_udp_recvmsg_try_os( + ni, us, &msg, args->flags & ONLOAD_ZC_RECV_FLAGS_PTHRU_MASK, &rc); ci_assert_equal(i, 1); /* should be data on the OS socket */ - if(CI_UNLIKELY( rc < 0 )) return rc; + if( CI_UNLIKELY(rc < 0) ) + return rc; /* We now have to translate the result from OS recvmsg - stored as * an iovec - into something we can pass to the callback, stored in - * the caller's onload_zc_iovec + * the caller's onload_zc_iovec */ - + i = 0; pkt_p = us->zc_kernel_datagram; do { @@ -1182,10 +1156,10 @@ static int ci_udp_zc_recv_from_os(ci_netif* ni, ci_udp_state* us, rc -= zc_iov[i].iov_len; ++i; pkt_p = pkt->frag_next; - } while (rc > 0); + } while( rc > 0 ); /* Clear last packet's frag_next in chain we're passing to callback. - * We'll restore it later if they don't keep the buffers + * We'll restore it later if they don't keep the buffers */ pkt->frag_next = OO_PP_NULL; /* pkt_p handily points to the buffer after the last one used for @@ -1209,20 +1183,20 @@ static int ci_udp_zc_recv_from_os(ci_netif* ni, ci_udp_state* us, args->msg.msghdr.msg_flags = msg.msg_flags; cb_flags = 0; - if( (ci_udp_recv_q_pkts(&us->recv_q) == 0) && + if( (ci_udp_recv_q_pkts(&us->recv_q) == 0) && (us->s.os_sock_status & OO_OS_STATUS_RX) == 0 ) cb_flags |= ONLOAD_ZC_END_OF_BURST; - /* Beware - as soon as we provide the pkts to the callback we can't + /* Beware - as soon as we provide the pkts to the callback we can't * touch them anymore as we don't know what the app might be doing with * them, such as releasing them. */ *cb_rc = (*args->cb)(args, cb_flags); - if( !((*cb_rc) & ONLOAD_ZC_KEEP) ) { + if( ! ((*cb_rc) & ONLOAD_ZC_KEEP) ) { #ifndef NDEBUG - /* Check the integrity of the list structure on the packets that we passed to - * the application. */ + /* Check the integrity of the list structure on the packets that we passed + * to the application. */ int app_packet_count = 0; pkt_p = first_pkt_p; while( OO_PP_NOT_NULL(pkt_p) ) { @@ -1264,7 +1238,7 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) ci_netif* ni = a->ni; ci_udp_state* us = a->us; enum onload_zc_callback_rc cb_rc = ONLOAD_ZC_CONTINUE; - struct recvmsg_spinstate spin_state = {0}; + struct recvmsg_spinstate spin_state = { 0 }; size_t supplied_controllen = args->msg.msghdr.msg_controllen; void* supplied_control = args->msg.msghdr.msg_control; socklen_t supplied_namelen = args->msg.msghdr.msg_namelen; @@ -1277,11 +1251,11 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) spin_state.timeout = us->s.so.rcvtimeo_msec; rc = ci_sock_lock(ni, &us->s.b); - if(CI_UNLIKELY( rc != 0 )) + if( CI_UNLIKELY(rc != 0) ) return rc; #if CI_CFG_ZC_RECV_FILTER - ci_assert(!us->recv_q_filter); + ci_assert(! us->recv_q_filter); #endif if( CI_UNLIKELY(us->s.so_error) ) { @@ -1305,25 +1279,24 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) args->msg.msghdr.msg_namelen = supplied_namelen; args->msg.msghdr.msg_flags = 0; - if( CI_UNLIKELY(us->s.cmsg_flags != 0 ) ) { + if( CI_UNLIKELY(us->s.cmsg_flags != 0) ) { args->msg.msghdr.msg_controllen = supplied_controllen; args->msg.msghdr.msg_control = supplied_control; - ci_ip_cmsg_recv(ni, us, pkt, &args->msg.msghdr, 0, - &args->msg.msghdr.msg_flags); - } - else + ci_ip_cmsg_recv( + ni, us, pkt, &args->msg.msghdr, 0, &args->msg.msghdr.msg_flags); + } else args->msg.msghdr.msg_controllen = 0; - ci_udp_recvmsg_fill_msghdr(ni, &args->msg.msghdr, pkt, - &us->s); + ci_udp_recvmsg_fill_msghdr(ni, &args->msg.msghdr, pkt, &us->s); ci_udp_pkt_to_zc_msg(ni, pkt, &args->msg); us->stamp = pkt->tstamp_frc; us->udpflags |= CI_UDPF_LAST_RECV_ON; - - cb_flags = CI_IP_IS_MULTICAST(oo_ip_hdr(pkt)->ip_daddr_be32) ? - ONLOAD_ZC_MSG_SHARED : 0; + + cb_flags = CI_IP_IS_MULTICAST(oo_ip_hdr(pkt)->ip_daddr_be32) + ? ONLOAD_ZC_MSG_SHARED + : 0; if( (ci_udp_recv_q_pkts(&us->recv_q) == 1) && ((us->s.os_sock_status & OO_OS_STATUS_RX) == 0) ) cb_flags |= ONLOAD_ZC_END_OF_BURST; @@ -1346,33 +1319,32 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) goto out; } - if( done_big_poll && done_kernel_poll && + if( done_big_poll && done_kernel_poll && (cb_flags & ONLOAD_ZC_END_OF_BURST) ) goto out; goto empty; } - out: +out: ni->state->is_spinner = 0; ci_sock_unlock(ni, &us->s.b); - + return rc; - empty: +empty: if( spin_state.start_frc == 0 ) ci_frc64(&spin_state.start_frc); if( ci_netif_may_poll(ni) && - ci_netif_need_poll_spinning(ni, spin_state.start_frc) && + ci_netif_need_poll_spinning(ni, spin_state.start_frc) && ci_netif_trylock(ni) ) { /* If only a few events, we don't need to bother with the full poll */ - if( ci_netif_poll(ni) < - NI_OPTS(ni).evs_per_poll ) + if( ci_netif_poll(ni) < NI_OPTS(ni).evs_per_poll ) done_big_poll = 1; /* If polling a few events didn't get us anything, do a full poll */ - if( !done_big_poll && ci_udp_recv_q_is_empty(&us->recv_q) ) { + if( ! done_big_poll && ci_udp_recv_q_is_empty(&us->recv_q) ) { done_big_poll = 1; ci_netif_poll(ni); } @@ -1382,16 +1354,16 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) if( ci_udp_recv_q_not_empty(&us->recv_q) ) goto not_empty; - } else + } else done_big_poll = 1; /* pretend we did if we can't poll */ - spin_loop: - if(CI_UNLIKELY( (rc = UDP_RX_ERRNO(us)) )) { +spin_loop: + if( CI_UNLIKELY((rc = UDP_RX_ERRNO(us))) ) { rc = -rc; us->s.rx_errno = us->s.rx_errno & 0xf0000000; goto out; } - if(CI_UNLIKELY( us->s.so_error )) { + if( CI_UNLIKELY(us->s.so_error) ) { int rc1 = ci_get_so_error(&us->s); if( rc1 != 0 ) { rc = -rc1; @@ -1417,8 +1389,7 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) if( ci_udp_recv_q_not_empty(&us->recv_q) ) goto not_empty; } while( us->s.os_sock_status & OO_OS_STATUS_RX ); - } - else { + } else { /* Return error */ rc = -ENOTEMPTY; goto out; @@ -1436,13 +1407,12 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) goto out; } - if( ((args->flags | us->s.b.sb_aflags) & MSG_DONTWAIT)) { + if( ((args->flags | us->s.b.sb_aflags) & MSG_DONTWAIT) ) { /* UDP returns EAGAIN when non-blocking even when shutdown. */ rc = -EAGAIN; ++us->stats.n_rx_eagain; goto out; - } - else if (UDP_IS_SHUT_RD(us)) { + } else if( UDP_IS_SHUT_RD(us) ) { /* Blocking and shutdowned */ rc = 0; goto out; @@ -1452,9 +1422,9 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) /* -1 is special value that means uninitialised */ if( spin_state.do_spin == -1 ) { - spin_state.do_spin = - oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_UDP_RECV); - + spin_state.do_spin = + oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_UDP_RECV); + if( spin_state.do_spin ) { spin_state.si = citp_signal_get_specific_inited(); spin_state.max_spin = us->s.b.spin_cycles; @@ -1462,8 +1432,8 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) spin_state.future = &spin_state.poison; if( us->s.so.rcvtimeo_msec ) { - ci_uint64 max_so_spin = (ci_uint64)us->s.so.rcvtimeo_msec * - IPTIMER_STATE(ni)->khz; + ci_uint64 max_so_spin = + (ci_uint64) us->s.so.rcvtimeo_msec * IPTIMER_STATE(ni)->khz; if( max_so_spin <= spin_state.max_spin ) { spin_state.max_spin = max_so_spin; spin_state.spin_limit_by_so = 1; @@ -1474,16 +1444,15 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) if( spin_state.do_spin ) { rc = ci_udp_recvmsg_socklocked_spin(ni, us, &spin_state); - /* 0 => ul maybe readable - * 1 => spin complete - * -ve => error + /* 0 => ul maybe readable + * 1 => spin complete + * -ve => error */ if( rc == 0 ) { if( ci_udp_recv_q_not_empty(&us->recv_q) ) goto not_empty; goto spin_loop; - } - else if( rc < 0 ) + } else if( rc < 0 ) goto out; } @@ -1495,14 +1464,13 @@ int ci_udp_zc_recv(ci_udp_iomsg_args* a, struct onload_zc_recv_args* args) goto not_empty; else goto empty; - } - else + } else goto out; } -int ci_udp_recvmsg_kernel(int fd, ci_netif* ni, ci_udp_state* us, - struct msghdr* msg, int flags) +int ci_udp_recvmsg_kernel( + int fd, ci_netif* ni, ci_udp_state* us, struct msghdr* msg, int flags) { int rc = 0; int rc1; @@ -1515,8 +1483,7 @@ int ci_udp_recvmsg_kernel(int fd, ci_netif* ni, ci_udp_state* us, else rc = rc1; } - } - else { + } else { rc = -EAGAIN; } diff --git a/src/lib/transport/ip/udp_send.c b/src/lib/transport/ip/udp_send.c index 7aec1fcb3..7d87d6637 100644 --- a/src/lib/transport/ip/udp_send.c +++ b/src/lib/transport/ip/udp_send.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2004-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr/stg ** \brief UDP sendmsg() etc. @@ -10,9 +10,9 @@ ** *//* \**************************************************************************/ - + /*! \cidoxg_lib_transport_ip */ - + #include "ip_internal.h" #include "udp_internal.h" #include "ip_tx.h" @@ -29,53 +29,51 @@ #if OO_DO_STACK_POLL #define VERB(x) -#define LPF "ci_udp_" -#define LPFIN "-> " LPF +#define LPF "ci_udp_" +#define LPFIN "-> " LPF #define LPFOUT "<- " LPF /* This just avoids some ugly #ifdef. This val is not used at userlevel. */ #ifndef __KERNEL__ -# define ERESTARTSYS 0 +#define ERESTARTSYS 0 #endif -#define TXQ_LEVEL(us) \ +#define TXQ_LEVEL(us) \ ((us)->tx_count + oo_atomic_read(&(us)->tx_async_q_level)) /* If not locked then trylock, and if successful set locked flag and (in * some cases) increment the counter. Return true if lock held, else * false. si_ variants take a [struct udp_send_info*]. */ -#define trylock(ni, locked) \ +#define trylock(ni, locked) \ ((locked) || (ci_netif_trylock(ni) && ((locked) = 1))) -#define si_trylock(ni, sinf) \ - trylock((ni), (sinf)->stack_locked) -#define trylock_and_inc(ni, locked, cntr) \ +#define si_trylock(ni, sinf) trylock((ni), (sinf)->stack_locked) +#define trylock_and_inc(ni, locked, cntr) \ ((locked) || (ci_netif_trylock(ni) && (++(cntr), (locked) = 1))) -#define si_trylock_and_inc(ni, sinf, cntr) \ +#define si_trylock_and_inc(ni, sinf, cntr) \ trylock_and_inc((ni), (sinf)->stack_locked, (cntr)) #if CI_CFG_IPV6 -#define msg_namelen_ok(af, namelen) ((af) == AF_INET6 ? \ - (namelen) >= sizeof(struct sockaddr_in6) : \ - (namelen) >= sizeof(struct sockaddr_in)) +#define msg_namelen_ok(af, namelen) \ + ((af) == AF_INET6 ? (namelen) >= sizeof(struct sockaddr_in6) \ + : (namelen) >= sizeof(struct sockaddr_in)) #else -#define msg_namelen_ok(af, namelen) \ - ((namelen) >= sizeof(struct sockaddr_in)) +#define msg_namelen_ok(af, namelen) ((namelen) >= sizeof(struct sockaddr_in)) #endif -#define oo_tx_udp_hdr(pkt) ((ci_udp_hdr*) oo_tx_ip_data(pkt)) +#define oo_tx_udp_hdr(pkt) ((ci_udp_hdr*) oo_tx_ip_data(pkt)) #define oo_tx_ipx_udp_hdr(af, pkt) ((ci_udp_hdr*) oo_tx_ipx_data(af, pkt)) struct udp_send_info { - int rc; - ci_ip_cached_hdrs ipcache; - int used_ipcache; - int stack_locked; - ci_uint32 timeout; - int old_ipcache_updated; + int rc; + ci_ip_cached_hdrs ipcache; + int used_ipcache; + int stack_locked; + ci_uint32 timeout; + int old_ipcache_updated; }; static bool ci_ipx_is_first_frag(int af, ci_ipx_hdr_t* ipx) @@ -88,15 +86,14 @@ static bool ci_ipx_is_first_frag(int af, ci_ipx_hdr_t* ipx) frag_hdr = ipx_hdr_data(af, ipx); if( (frag_hdr->frag_off & CI_BSWAPC_BE16(CI_IP6_OFFSET)) == 0 ) return true; - } - else + } else #endif { if( (ipx->ip4.ip_frag_off_be16 & CI_IP4_OFFSET_MASK) == 0 ) return true; } return false; - } +} /* Check if More Fragments flag is set for IPv4 or IPv6 header */ static bool ci_ipx_is_mf_set(int af, ci_ipx_hdr_t* ipx) @@ -109,8 +106,7 @@ static bool ci_ipx_is_mf_set(int af, ci_ipx_hdr_t* ipx) frag_hdr = ipx_hdr_data(af, ipx); if( frag_hdr->frag_off & CI_BSWAPC_BE16(CI_IP6_MF) ) return true; - } - else + } else #endif { if( ipx->ip4.ip_frag_off_be16 & CI_IP4_FRAG_MORE ) @@ -119,8 +115,8 @@ static bool ci_ipx_is_mf_set(int af, ci_ipx_hdr_t* ipx) return false; } -ci_noinline void ci_udp_sendmsg_chksum(ci_netif* ni, ci_ip_pkt_fmt* pkt, - int af, ci_ipx_hdr_t* first_hdr) +ci_noinline void ci_udp_sendmsg_chksum( + ci_netif* ni, ci_ip_pkt_fmt* pkt, int af, ci_ipx_hdr_t* first_hdr) { /* 1400*50 = 70000, i.e. in normal situation there are <50 fragments */ #define MAX_IP_FRAGMENTS 50 @@ -133,7 +129,7 @@ ci_noinline void ci_udp_sendmsg_chksum(ci_netif* ni, ci_ip_pkt_fmt* pkt, /* iterate all IP fragments */ while( OO_PP_NOT_NULL(p->next) ) { int frag_len; - char *frag_start; + char* frag_start; int max_sg_len; /* When too many fragments, let's send it without checksum */ @@ -141,27 +137,32 @@ ci_noinline void ci_udp_sendmsg_chksum(ci_netif* ni, ci_ip_pkt_fmt* pkt, return; if( first_frag ) { - frag_start = (char*)(udp + 1); - frag_len = ( WITH_CI_CFG_IPV6( IS_AF_INET6(af) ? - CI_BSWAP_BE16(first_hdr->ip6.payload_len) - sizeof(ci_ip6_frag_hdr) : ) - CI_BSWAP_BE16(first_hdr->ip4.ip_tot_len_be16) - - CI_IP4_IHL(&first_hdr->ip4)) - sizeof(ci_udp_hdr); + frag_start = (char*) (udp + 1); + frag_len = (WITH_CI_CFG_IPV6( + IS_AF_INET6(af) + ? CI_BSWAP_BE16(first_hdr->ip6.payload_len) - + sizeof(ci_ip6_frag_hdr) + :) CI_BSWAP_BE16(first_hdr->ip4.ip_tot_len_be16) - + CI_IP4_IHL(&first_hdr->ip4)) - + sizeof(ci_udp_hdr); first_frag = 0; - } - else { + } else { ci_ipx_hdr_t* p_ipx; p = PKT_CHK(ni, p->next); p_ipx = TX_PKT_IPX_HDR(af, p); - frag_len = WITH_CI_CFG_IPV6( IS_AF_INET6(af) ? - CI_BSWAP_BE16(p_ipx->ip6.payload_len) - sizeof(ci_ip6_frag_hdr) : ) - CI_BSWAP_BE16(p_ipx->ip4.ip_tot_len_be16) - CI_IP4_IHL(&p_ipx->ip4); - frag_start = (char*)ipx_hdr_data(af, p_ipx) + CI_IPX_FRAG_HDR_SIZE(af); + frag_len = WITH_CI_CFG_IPV6(IS_AF_INET6(af) + ? CI_BSWAP_BE16(p_ipx->ip6.payload_len) - + sizeof(ci_ip6_frag_hdr) + :) + CI_BSWAP_BE16(p_ipx->ip4.ip_tot_len_be16) - + CI_IP4_IHL(&p_ipx->ip4); + frag_start = (char*) ipx_hdr_data(af, p_ipx) + CI_IPX_FRAG_HDR_SIZE(af); } iov[n].iov_base = frag_start; iov[n].iov_len = frag_len; - max_sg_len = CI_PTR_ALIGN_FWD(PKT_START(p), CI_CFG_PKT_BUF_SIZE) - - frag_start; + max_sg_len = + CI_PTR_ALIGN_FWD(PKT_START(p), CI_CFG_PKT_BUF_SIZE) - frag_start; if( frag_len > max_sg_len ) { iov[n].iov_len = max_sg_len; frag_len -= max_sg_len; @@ -178,29 +179,26 @@ ci_noinline void ci_udp_sendmsg_chksum(ci_netif* ni, ci_ip_pkt_fmt* pkt, iov[n].iov_base = PKT_START(sg_pkt); iov[n].iov_len = frag_len; - max_sg_len = CI_PTR_ALIGN_FWD(PKT_START(sg_pkt), - CI_CFG_PKT_BUF_SIZE) - + max_sg_len = CI_PTR_ALIGN_FWD(PKT_START(sg_pkt), CI_CFG_PKT_BUF_SIZE) - PKT_START(sg_pkt); if( frag_len > max_sg_len ) { iov[n].iov_len = max_sg_len; frag_len -= max_sg_len; - } - else + } else frag_len = 0; } ci_assert_equal(frag_len, 0); } } - - udp->udp_check_be16 = WITH_CI_CFG_IPV6( IS_AF_INET6(af) ? - ci_ip6_udp_checksum(&first_hdr->ip6, udp, iov, n+1) : ) - ci_udp_checksum(&first_hdr->ip4, udp, iov, n+1); + + udp->udp_check_be16 = WITH_CI_CFG_IPV6( + IS_AF_INET6(af) ? ci_ip6_udp_checksum(&first_hdr->ip6, udp, iov, n + 1) + :) ci_udp_checksum(&first_hdr->ip4, udp, iov, n + 1); } -static void -ci_ip_send_udp_slow(ci_netif* ni, struct oo_sock_cplane* sock_cp, - ci_ip_pkt_fmt* pkt, ci_ip_cached_hdrs* ipcache) +static void ci_ip_send_udp_slow(ci_netif* ni, struct oo_sock_cplane* sock_cp, + ci_ip_pkt_fmt* pkt, ci_ip_cached_hdrs* ipcache) { int os_rc = 0; @@ -231,13 +229,14 @@ static int ci_udp_sendmsg_loop(ci_sock_cmn* s, void* opaque_arg) */ frag_head = state->pkt; udp = (ci_udp_hdr*) (oo_ip_hdr(frag_head) + 1); - frag_head->tstamp_frc = IPTIMER_STATE(state->ni)->frc; - frag_head->pf.udp.pay_len = CI_BSWAP_BE16(udp->udp_len_be16) - sizeof(*udp); + frag_head->tstamp_frc = IPTIMER_STATE(state->ni)->frc; + frag_head->pf.udp.pay_len = + CI_BSWAP_BE16(udp->udp_len_be16) - sizeof(*udp); buf_pkt = frag_head; seg_i = 0; while( 1 ) { ++state->ni->state->n_rx_pkts; - ci_assert(!(buf_pkt->flags & CI_PKT_FLAG_RX)); + ci_assert(! (buf_pkt->flags & CI_PKT_FLAG_RX)); buf_pkt->flags |= CI_PKT_FLAG_RX; if( buf_pkt == state->pkt ) /* First IP fragment, move past IP+UDP header */ @@ -246,7 +245,7 @@ static int ci_udp_sendmsg_loop(ci_sock_cmn* s, void* opaque_arg) /* Subsequent IP fragment, move past IP header */ buf_start = oo_ip_hdr(buf_pkt) + 1; else - /* Internal (jumbo) fragment, no header to move past */ + /* Internal (jumbo) fragment, no header to move past */ buf_start = PKT_START(buf_pkt); buf_len = buf_pkt->buf_len; buf_len -= (char*) buf_start - PKT_START(buf_pkt); @@ -265,13 +264,12 @@ static int ci_udp_sendmsg_loop(ci_sock_cmn* s, void* opaque_arg) ci_udp_rx_deliver(s, opaque_arg); citp_waitable_wake_not_in_poll(state->ni, &s->b, CI_SB_FLAG_WAKE_RX); - return 0; /* continue delivering to other sockets */ + return 0; /* continue delivering to other sockets */ } static void ci_udp_sendmsg_mcast(ci_netif* ni, ci_udp_state* us, - ci_ip_cached_hdrs* ipcache, - ci_ip_pkt_fmt* pkt) + ci_ip_cached_hdrs* ipcache, ci_ip_pkt_fmt* pkt) { /* NB. We don't deliver multicast packets directly to local sockets if * sending via the control plane (below) as they'll get there via the @@ -288,7 +286,7 @@ static void ci_udp_sendmsg_mcast(ci_netif* ni, ci_udp_state* us, if( ! (us->udpflags & CI_UDPF_MCAST_LOOP) || ! (NI_OPTS(ni).mcast_send & CITP_MCAST_SEND_FLAG_LOCAL) ) return; - if(CI_UNLIKELY( ni->state->n_rx_pkts >= NI_OPTS(ni).max_rx_packets )) { + if( CI_UNLIKELY(ni->state->n_rx_pkts >= NI_OPTS(ni).max_rx_packets) ) { ci_netif_try_to_reap(ni, 100); if( ni->state->n_rx_pkts >= NI_OPTS(ni).max_rx_packets ) { CITP_STATS_NETIF_INC(ni, udp_send_mcast_loop_drop); @@ -310,26 +308,19 @@ static void ci_udp_sendmsg_mcast(ci_netif* ni, ci_udp_state* us, */ ci_ip_time_resync(IPTIMER_STATE(ni)); - ci_netif_filter_for_each_match(ni, - oo_ip_hdr(pkt)->ip_daddr_be32, - udp->udp_dest_be16, - oo_ip_hdr(pkt)->ip_saddr_be32, - udp->udp_source_be16, - IPPROTO_UDP, ipcache->intf_i, - ipcache->encap.vlan_id, - ci_udp_sendmsg_loop, &state, NULL); - ci_netif_filter_for_each_match(ni, - oo_ip_hdr(pkt)->ip_daddr_be32, - udp->udp_dest_be16, - 0, 0, IPPROTO_UDP, ipcache->intf_i, - ipcache->encap.vlan_id, - ci_udp_sendmsg_loop, &state, NULL); + ci_netif_filter_for_each_match(ni, oo_ip_hdr(pkt)->ip_daddr_be32, + udp->udp_dest_be16, oo_ip_hdr(pkt)->ip_saddr_be32, udp->udp_source_be16, + IPPROTO_UDP, ipcache->intf_i, ipcache->encap.vlan_id, + ci_udp_sendmsg_loop, &state, NULL); + ci_netif_filter_for_each_match(ni, oo_ip_hdr(pkt)->ip_daddr_be32, + udp->udp_dest_be16, 0, 0, IPPROTO_UDP, ipcache->intf_i, + ipcache->encap.vlan_id, ci_udp_sendmsg_loop, &state, NULL); } /* Pass prepared packet to ip_send(), release our ref & and update stats */ ci_inline void prep_send_pkt(ci_netif* ni, ci_udp_state* us, - ci_ip_pkt_fmt* pkt, ci_ip_cached_hdrs* ipcache) + ci_ip_pkt_fmt* pkt, ci_ip_cached_hdrs* ipcache) { int af = ipcache_af(&us->s.pkt); ci_ipx_hdr_t* ipx = oo_tx_ipx_hdr(af, pkt); @@ -342,7 +333,7 @@ ci_inline void prep_send_pkt(ci_netif* ni, ci_udp_state* us, us->tx_count += pkt->pf.udp.tx_length; pkt->flags |= CI_PKT_FLAG_UDP; pkt->pf.udp.tx_sock_id = S_SP(us); - CI_UDP_STATS_INC_OUT_DGRAMS( ni ); + CI_UDP_STATS_INC_OUT_DGRAMS(ni); #if CI_CFG_IPV6 if( IS_AF_INET6(af) ) { @@ -350,9 +341,8 @@ ci_inline void prep_send_pkt(ci_netif* ni, ci_udp_state* us, TX_PKT_SET_FLOWLABEL(af, pkt, ci_ip6_flowlabel_be32(&ipcache->ipx.ip6)); } pkt->flags |= CI_PKT_FLAG_IS_IP6; - } - else { - pkt->flags &=~ CI_PKT_FLAG_IS_IP6; + } else { + pkt->flags &= ~CI_PKT_FLAG_IS_IP6; } #endif @@ -372,9 +362,8 @@ ci_inline void prep_send_pkt(ci_netif* ni, ci_udp_state* us, #ifdef __KERNEL__ -static int do_sys_sendmsg(tcp_helper_endpoint_t *ep, oo_os_file os_sock, - const ci_msghdr* msg, - int flags, int user_buffers, int atomic) +static int do_sys_sendmsg(tcp_helper_endpoint_t* ep, oo_os_file os_sock, + const ci_msghdr* msg, int flags, int user_buffers, int atomic) { struct socket* sock; int i, bytes; @@ -385,8 +374,8 @@ static int do_sys_sendmsg(tcp_helper_endpoint_t *ep, oo_os_file os_sock, ci_assert(! user_buffers || ! atomic); LOG_NT(ci_log("%s: user_buffers=%d atomic=%d sk_allocation=%x ATOMIC=%x", - __FUNCTION__, user_buffers, atomic, - sock->sk->sk_allocation, GFP_ATOMIC)); + __FUNCTION__, user_buffers, atomic, sock->sk->sk_allocation, + GFP_ATOMIC)); if( atomic && sock->sk->sk_allocation != GFP_ATOMIC ) { ci_log("%s: cannot proceed", __FUNCTION__); @@ -400,11 +389,9 @@ static int do_sys_sendmsg(tcp_helper_endpoint_t *ep, oo_os_file os_sock, if( user_buffers ) { oo_msg_iov_init(&kmsg, WRITE, msg->msg_iov, msg->msg_iovlen, bytes); bytes = sock_sendmsg(sock, &kmsg); - } - else { - bytes = kernel_sendmsg(sock, &kmsg, - (struct kvec*) msg->msg_iov, msg->msg_iovlen, - bytes); + } else { + bytes = kernel_sendmsg( + sock, &kmsg, (struct kvec*) msg->msg_iov, msg->msg_iovlen, bytes); } /* Clear OS TX flag if necessary */ @@ -413,11 +400,10 @@ static int do_sys_sendmsg(tcp_helper_endpoint_t *ep, oo_os_file os_sock, } static int ci_udp_sendmsg_os(ci_netif* ni, ci_udp_state* us, - const ci_msghdr* msg, int flags, - int user_buffers, int atomic) + const ci_msghdr* msg, int flags, int user_buffers, int atomic) { int rc; - tcp_helper_endpoint_t *ep = ci_netif_ep_get(ni, us->s.b.bufid); + tcp_helper_endpoint_t* ep = ci_netif_ep_get(ni, us->s.b.bufid); oo_os_file os_sock; ++us->stats.n_tx_os; @@ -434,8 +420,7 @@ static int ci_udp_sendmsg_os(ci_netif* ni, ci_udp_state* us, #else ci_inline int ci_udp_sendmsg_os(ci_netif* ni, ci_udp_state* us, - const struct msghdr* msg, int flags, - int user_buffers, int atomic) + const struct msghdr* msg, int flags, int user_buffers, int atomic) { ++us->stats.n_tx_os; return oo_os_sock_sendmsg(ni, S_SP(us), msg, flags); @@ -450,24 +435,24 @@ ci_inline int ci_udp_sendmsg_os(ci_netif* ni, ci_udp_state* us, * * TODO: wrap it into ioctl: bind+getsockname. * */ -static int ci_udp_sendmsg_os_get_binding(citp_socket *ep, ci_fd_t fd, - const struct msghdr * msg, int flags) +static int ci_udp_sendmsg_os_get_binding( + citp_socket* ep, ci_fd_t fd, const struct msghdr* msg, int flags) { ci_netif* ni = ep->netif; ci_udp_state* us = SOCK_TO_UDP(ep->s); int rc; union ci_sockaddr_u sa_u = {}; socklen_t salen = sizeof(sa_u); - ci_fd_t os_sock = (ci_fd_t)ci_get_os_sock_fd(fd); + ci_fd_t os_sock = (ci_fd_t) ci_get_os_sock_fd(fd); ci_addr_t laddr; ci_uint16 lport; - ci_assert( !udp_lport_be16(us)); + ci_assert(! udp_lport_be16(us)); - if ( !CI_IS_VALID_SOCKET(os_sock) ) { - LOG_U( log("%s: "NT_FMT" can't get OS socket (%d)", __FUNCTION__, - NT_PRI_ARGS(ni,us), os_sock)); - RET_WITH_ERRNO((int)os_sock); /*! \todo FIXME remvoce cast */ + if( ! CI_IS_VALID_SOCKET(os_sock) ) { + LOG_U(log("%s: " NT_FMT " can't get OS socket (%d)", __FUNCTION__, + NT_PRI_ARGS(ni, us), os_sock)); + RET_WITH_ERRNO((int) os_sock); /*! \todo FIXME remvoce cast */ } /* Not bound. Probably not connected & sending for the first time, @@ -482,24 +467,25 @@ static int ci_udp_sendmsg_os_get_binding(citp_socket *ep, ci_fd_t fd, /* see what the kernel did - we'll do just the same */ if( rc == 0 ) - rc = ci_sys_getsockname( os_sock, &sa_u.sa, &salen); + rc = ci_sys_getsockname(os_sock, &sa_u.sa, &salen); /* Must release the os_sock fd before we can take the stack lock, as the * citp_dup2_lock is held until we do so, and lock ordering does not allow * us to take the stack lock with the dup2 lock held. */ - ci_rel_os_sock_fd( os_sock ); + ci_rel_os_sock_fd(os_sock); /* get out if getsockname fails or returns a non INET family - * or a sockaddr struct that's too darned small */ - if( CI_UNLIKELY( rc || (!rc && - ( sa_u.sa.sa_family != us->s.domain || - /* FIXME case when sa_family is AF_INET and us->s.domain is AF_INET6 */ - salen < IPX_SOCKADDR_SIZE(sa_u.sa.sa_family))))) { - LOG_UV(log("%s: "NT_FMT" sys_getsockname prob. (rc:%d err:%d, fam:%d, " - "len:%d - exp %u)", - __FUNCTION__, NT_PRI_ARGS(ni,us), rc, errno, sa_u.sa.sa_family, - salen, (unsigned)IPX_SOCKADDR_SIZE(sa_u.sa.sa_family))); + * or a sockaddr struct that's too darned small */ + if( CI_UNLIKELY(rc || (! rc && (sa_u.sa.sa_family != us->s.domain || + /* FIXME case when sa_family is AF_INET + and us->s.domain is AF_INET6 */ + salen < IPX_SOCKADDR_SIZE( + sa_u.sa.sa_family)))) ) { + LOG_UV(log("%s: " NT_FMT " sys_getsockname prob. (rc:%d err:%d, fam:%d, " + "len:%d - exp %u)", + __FUNCTION__, NT_PRI_ARGS(ni, us), rc, errno, sa_u.sa.sa_family, salen, + (unsigned) IPX_SOCKADDR_SIZE(sa_u.sa.sa_family))); return rc; } @@ -510,30 +496,30 @@ static int ci_udp_sendmsg_os_get_binding(citp_socket *ep, ci_fd_t fd, ci_sock_cmn_set_laddr(ep->s, laddr, lport); /* Add a filter if the local addressing is appropriate. */ - if( ~ni->state->flags & CI_NETIF_FLAG_USE_ALIEN_LADDRS && - lport != 0 && (CI_IPX_ADDR_IS_ANY(laddr) || - cicp_user_addr_is_local_efab(ni, laddr)) ) { - ci_assert( ! (us->udpflags & CI_UDPF_FILTERED) ); + if( ~ni->state->flags & CI_NETIF_FLAG_USE_ALIEN_LADDRS && lport != 0 && + (CI_IPX_ADDR_IS_ANY(laddr) || + cicp_user_addr_is_local_efab(ni, laddr)) ) { + ci_assert(! (us->udpflags & CI_UDPF_FILTERED)); - rc = ci_tcp_ep_set_filters(ni, S_SP(us), us->s.cp.so_bindtodevice, - OO_SP_NULL); + rc = ci_tcp_ep_set_filters( + ni, S_SP(us), us->s.cp.so_bindtodevice, OO_SP_NULL); if( rc ) { LOG_U(log("%s: FILTER ADD FAIL %d", __FUNCTION__, -rc)); if( rc == -EFILTERSSOME ) UDP_SET_FLAG(us, CI_UDPF_FILTERED); if( CITP_OPTS.no_fail ) rc = 0; - } - else { + } else { UDP_SET_FLAG(us, CI_UDPF_FILTERED); } } ci_netif_unlock(ni); laddr = sock_laddr(&us->s); - LOG_UV(ci_log("%s: "NT_FMT"Unbound: first send via OS got L:[" IPX_PORT_FMT "]", - __FUNCTION__, NT_PRI_ARGS(ni,us), - IPX_ARG(AF_IP(laddr)), udp_lport_be16(us))); + LOG_UV(ci_log("%s: " NT_FMT "Unbound: first send via OS got L:[" IPX_PORT_FMT + "]", + __FUNCTION__, NT_PRI_ARGS(ni, us), IPX_ARG(AF_IP(laddr)), + udp_lport_be16(us))); return rc; } @@ -541,8 +527,7 @@ static int ci_udp_sendmsg_os_get_binding(citp_socket *ep, ci_fd_t fd, static int ci_udp_sendmsg_send_pkt_via_os(ci_netif* ni, ci_udp_state* us, - ci_ip_pkt_fmt* pkt, int flags, - struct udp_send_info* sinf) + ci_ip_pkt_fmt* pkt, int flags, struct udp_send_info* sinf) { int seg_i, buf_len, iov_i; ci_ip_pkt_fmt* frag_head; @@ -566,12 +551,11 @@ static int ci_udp_sendmsg_send_pkt_via_os(ci_netif* ni, ci_udp_state* us, { ci_addr_t daddr = TX_PKT_DADDR(af, pkt); if( ! CI_IPX_ADDR_IS_ANY(daddr) ) { - ss = ci_make_sockaddr_storage_from_addr(TX_PKT_UDP(pkt)->udp_dest_be16, - daddr); + ss = ci_make_sockaddr_storage_from_addr( + TX_PKT_UDP(pkt)->udp_dest_be16, daddr); m.msg_name = &ss; m.msg_namelen = IPX_SOCKADDR_SIZE(af); - } - else { + } else { m.msg_name = NULL; m.msg_namelen = 0; } @@ -592,7 +576,7 @@ static int ci_udp_sendmsg_send_pkt_via_os(ci_netif* ni, ci_udp_state* us, /* Subsequent IP fragment, move past IP header */ buf_start = oo_tx_ipx_data(af, buf_pkt); else - /* Internal (jumbo) fragment, no header to move past */ + /* Internal (jumbo) fragment, no header to move past */ buf_start = PKT_START(buf_pkt); buf_len = buf_pkt->buf_len; buf_len -= (char*) buf_start - PKT_START(buf_pkt); @@ -628,7 +612,7 @@ static int ci_udp_sendmsg_send_pkt_via_os(ci_netif* ni, ci_udp_state* us, } -static void fixup_pkt_not_transmitted(ci_netif *ni, ci_ip_pkt_fmt* pkt) +static void fixup_pkt_not_transmitted(ci_netif* ni, ci_ip_pkt_fmt* pkt) { ci_assert(ci_netif_is_locked(ni)); while( 1 ) { @@ -649,8 +633,7 @@ static void fixup_pkt_not_transmitted(ci_netif *ni, ci_ip_pkt_fmt* pkt) static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, - ci_ip_pkt_fmt* pkt, int flags, - struct udp_send_info* sinf) + ci_ip_pkt_fmt* pkt, int flags, struct udp_send_info* sinf) { ci_ip_pkt_fmt* first_pkt = pkt; ci_ip_cached_hdrs* ipcache; @@ -680,9 +663,8 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, if( oo_cp_ipcache_is_valid(ni, ipcache) ) goto done_hdr_update; old_ipcache_updated = 1; - } - else { - us->udpflags &=~ CI_UDPF_LAST_SEND_NOMAC; + } else { + us->udpflags &= ~CI_UDPF_LAST_SEND_NOMAC; ci_ipcache_set_daddr(ipcache, pkt_daddr); ipcache->dport_be16 = TX_PKT_IPX_DPORT(af, pkt); if( sinf != NULL && sinf->used_ipcache && @@ -702,8 +684,7 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, * footing cicp_user_retrieve(). */ ci_ip_cache_invalidate(ipcache); cicp_user_retrieve(ni, ipcache, &us->s.cp); - } - else { + } else { /********************************************************************** * Connected send. */ @@ -714,7 +695,7 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, if( CI_IPX_ADDR_IS_ANY(udp_raddr) ) goto no_longer_connected; ipcache = &us->s.pkt; - if(CI_UNLIKELY( ! oo_cp_ipcache_is_valid(ni, ipcache) )) { + if( CI_UNLIKELY(! oo_cp_ipcache_is_valid(ni, ipcache)) ) { ++us->stats.n_tx_cp_c_lookup; cicp_user_retrieve(ni, ipcache, &us->s.cp); old_ipcache_updated = 1; @@ -725,22 +706,22 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, TX_PKT_IPX_UDP(af, pkt, is_frag)->udp_dest_be16 = udp_rport_be16(us); } - done_hdr_update: +done_hdr_update: switch( ipcache->status ) { - case retrrc_success: - ipcache_onloadable = 1; + case retrrc_success: + ipcache_onloadable = 1; - /* Try to avoid reordering of the packets: send all nomac packets */ - if( old_ipcache_updated && (us->udpflags & CI_UDPF_LAST_SEND_NOMAC) ) { - oo_deferred_send(ni); - us->udpflags &=~ CI_UDPF_LAST_SEND_NOMAC; - } - break; - case retrrc_nomac: - ipcache_onloadable = 0; - break; - default: - goto send_pkt_via_os; + /* Try to avoid reordering of the packets: send all nomac packets */ + if( old_ipcache_updated && (us->udpflags & CI_UDPF_LAST_SEND_NOMAC) ) { + oo_deferred_send(ni); + us->udpflags &= ~CI_UDPF_LAST_SEND_NOMAC; + } + break; + case retrrc_nomac: + ipcache_onloadable = 0; + break; + default: + goto send_pkt_via_os; } #if CI_CFG_IPV6 @@ -755,7 +736,7 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, tot_len = ipx_hdr_tot_len(af, oo_tx_ipx_hdr(af, pkt)); - if(CI_UNLIKELY( tot_len > ipcache->mtu )) + if( CI_UNLIKELY(tot_len > ipcache->mtu) ) /* Oh dear -- we've fragmented the packet with too large an MTU. * Either the MTU has recently changed, or we are unconnected and * sampled the MTU from the cached value at a bad time. @@ -765,15 +746,15 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, * * For now just carry on regardless... */ - ci_log("%s: pkt mtu=%d exceeds path mtu=%d", __FUNCTION__, - tot_len, ipcache->mtu); + ci_log("%s: pkt mtu=%d exceeds path mtu=%d", __FUNCTION__, tot_len, + ipcache->mtu); ci_assert_equal(ni->state->send_may_poll, 0); ni->state->send_may_poll = ci_netif_may_poll(ni); /* Linux allows sending IPv6 packets with zero Hop Limit field */ if( ipcache_ttl(ipcache) || ipcache_is_ipv6(ipcache) ) { - if(CI_LIKELY( ipcache_onloadable )) { + if( CI_LIKELY(ipcache_onloadable) ) { /* TODO: Hit the doorbell just once. */ while( 1 ) { oo_pkt_p next = pkt->next; @@ -784,21 +765,20 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, break; pkt = PKT_CHK(ni, next); #ifdef __KERNEL__ - if(CI_UNLIKELY( i++ > ni->pkt_sets_n << CI_CFG_PKTS_PER_SET_S )) { - ci_netif_error_detected(ni, CI_NETIF_ERROR_UDP_SEND_PKTS_LIST, - __FUNCTION__); + if( CI_UNLIKELY(i++ > ni->pkt_sets_n << CI_CFG_PKTS_PER_SET_S) ) { + ci_netif_error_detected( + ni, CI_NETIF_ERROR_UDP_SEND_PKTS_LIST, __FUNCTION__); } #endif } if( flags & MSG_CONFIRM ) - oo_cp_arp_confirm(ni->cplane, &ipcache->fwd_ver, - ci_ni_fwd_table_id(ni)); + oo_cp_arp_confirm( + ni->cplane, &ipcache->fwd_ver, ci_ni_fwd_table_id(ni)); if( CI_IPX_IS_MULTICAST(ipcache_raddr(ipcache)) ) ci_udp_sendmsg_mcast(ni, us, ipcache, first_pkt); - us->udpflags &=~ CI_UDPF_LAST_SEND_NOMAC; - } - else { + us->udpflags &= ~CI_UDPF_LAST_SEND_NOMAC; + } else { /* Packet should go via an onload interface, but ipcache is not valid. * Could be that we don't have a mac, or could be that we need to drop * into the kernel to keep the mac entry alive. @@ -820,28 +800,26 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, break; pkt = PKT_CHK(ni, next); #ifdef __KERNEL__ - if(CI_UNLIKELY( i++ > ni->pkt_sets_n << CI_CFG_PKTS_PER_SET_S )) { - ci_netif_error_detected(ni, CI_NETIF_ERROR_UDP_SEND_PKTS_LIST, - __FUNCTION__); + if( CI_UNLIKELY(i++ > ni->pkt_sets_n << CI_CFG_PKTS_PER_SET_S) ) { + ci_netif_error_detected( + ni, CI_NETIF_ERROR_UDP_SEND_PKTS_LIST, __FUNCTION__); } #endif } } - } - else if( CI_IPX_IS_MULTICAST(ipcache_raddr(ipcache)) ) { + } else if( CI_IPX_IS_MULTICAST(ipcache_raddr(ipcache)) ) { fixup_pkt_not_transmitted(ni, first_pkt); ci_udp_sendmsg_mcast(ni, us, ipcache, first_pkt); - } - else { + } else { fixup_pkt_not_transmitted(ni, first_pkt); - LOG_U(ci_log("%s: do not send UDP packet because IP TTL = 0", - __FUNCTION__)); + LOG_U( + ci_log("%s: do not send UDP packet because IP TTL = 0", __FUNCTION__)); } ni->state->send_may_poll = 0; return; - send_pkt_via_os: +send_pkt_via_os: ++us->stats.n_tx_os_late; fixup_pkt_not_transmitted(ni, pkt); @@ -856,7 +834,7 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, } return; - no_longer_connected: +no_longer_connected: /* We were connected when we entered ci_udp_sendmsg(), but we're not now. * If not draining tx_async_q, return error to caller. Otherwise just * drop this datagram. @@ -876,12 +854,12 @@ static void ci_udp_sendmsg_send(ci_netif* ni, ci_udp_state* us, } -static int ci_udp_tx_datagram_level(ci_netif* ni, ci_ip_pkt_fmt* pkt, - ci_boolean_t ni_locked) +static int ci_udp_tx_datagram_level( + ci_netif* ni, ci_ip_pkt_fmt* pkt, ci_boolean_t ni_locked) { /* Sum the contributions from each IP fragment. */ int level = 0; - for( ; ; pkt = PKT_CHK_NML(ni, pkt->next, ni_locked) ) { + for( ;; pkt = PKT_CHK_NML(ni, pkt->next, ni_locked) ) { level += pkt->pf.udp.tx_length; if( OO_PP_IS_NULL(pkt->next) ) return level; @@ -898,7 +876,8 @@ void ci_udp_sendmsg_send_async_q(ci_netif* ni, ci_udp_state* us) /* Grab the contents of [tx_async_q]. */ do { OO_PP_INIT(ni, pp, us->tx_async_q); - if( OO_PP_IS_NULL(pp) ) return; + if( OO_PP_IS_NULL(pp) ) + return; } while( ci_cas32_fail(&us->tx_async_q, OO_PP_ID(pp), OO_PP_ID_NULL) ); /* Reverse the list. */ @@ -909,8 +888,7 @@ void ci_udp_sendmsg_send_async_q(ci_netif* ni, ci_udp_state* us) pp = pkt->netif.tx.dmaq_next; pkt->netif.tx.dmaq_next = send_list; send_list = OO_PKT_P(pkt); - } - while( OO_PP_NOT_NULL(pp) ); + } while( OO_PP_NOT_NULL(pp) ); oo_atomic_add(&us->tx_async_q_level, -level); @@ -924,24 +902,25 @@ void ci_udp_sendmsg_send_async_q(ci_netif* ni, ci_udp_state* us) ++us->stats.n_tx_lock_defer; ci_udp_sendmsg_send(ni, us, pkt, flags, NULL); ci_netif_pkt_release(ni, pkt); - if( OO_PP_IS_NULL(pp) ) break; + if( OO_PP_IS_NULL(pp) ) + break; pkt = PKT_CHK(ni, pp); } } -static void ci_udp_sendmsg_async_q_enqueue(ci_netif* ni, ci_udp_state* us, - ci_ip_pkt_fmt* pkt, int flags) +static void ci_udp_sendmsg_async_q_enqueue( + ci_netif* ni, ci_udp_state* us, ci_ip_pkt_fmt* pkt, int flags) { if( flags & MSG_CONFIRM ) /* Only setting this for first IP fragment -- that should be fine. */ pkt->flags |= CI_PKT_FLAG_MSG_CONFIRM; - oo_atomic_add(&us->tx_async_q_level, - ci_udp_tx_datagram_level(ni, pkt, CI_FALSE)); + oo_atomic_add( + &us->tx_async_q_level, ci_udp_tx_datagram_level(ni, pkt, CI_FALSE)); do OO_PP_INIT(ni, pkt->netif.tx.dmaq_next, us->tx_async_q); - while( ci_cas32_fail(&us->tx_async_q, - OO_PP_ID(pkt->netif.tx.dmaq_next), OO_PKT_ID(pkt)) ); + while( ci_cas32_fail( + &us->tx_async_q, OO_PP_ID(pkt->netif.tx.dmaq_next), OO_PKT_ID(pkt)) ); if( ci_netif_lock_or_defer_work(ni, &us->s.b) ) ci_netif_unlock(ni); @@ -950,7 +929,8 @@ static void ci_udp_sendmsg_async_q_enqueue(ci_netif* ni, ci_udp_state* us, #ifndef __KERNEL__ /* Check if provided address struct/content is OK for us. */ -static int ci_udp_name_is_ok(int af, ci_udp_state* us, const struct msghdr* msg) +static int ci_udp_name_is_ok( + int af, ci_udp_state* us, const struct msghdr* msg) { ci_assert(us); ci_assert(msg != NULL); @@ -960,14 +940,14 @@ static int ci_udp_name_is_ok(int af, ci_udp_state* us, const struct msghdr* msg) if( msg->msg_name == NULL ) return 0; -#if CI_CFG_FAKE_IPV6 && !CI_CFG_IPV6 +#if CI_CFG_FAKE_IPV6 && ! CI_CFG_IPV6 if( us->s.domain == AF_INET6 ) { return msg->msg_namelen >= SIN6_LEN_RFC2133 && af == AF_INET6 && - ci_tcp_ipv6_is_ipv4((struct sockaddr*) msg->msg_name); + ci_tcp_ipv6_is_ipv4((struct sockaddr*) msg->msg_name); } #endif - if( af != AF_INET && !IS_AF_INET6(us->s.domain) ) + if( af != AF_INET && ! IS_AF_INET6(us->s.domain) ) return 0; return msg_namelen_ok(af, msg->msg_namelen); @@ -975,7 +955,7 @@ static int ci_udp_name_is_ok(int af, ci_udp_state* us, const struct msghdr* msg) #endif -#define OO_TIMEVAL_UNINITIALISED ((struct oo_timeval*) 1) +#define OO_TIMEVAL_UNINITIALISED ((struct oo_timeval*) 1) static int ci_udp_sendmsg_may_send(ci_udp_state* us, int bytes_to_send) @@ -1002,8 +982,7 @@ static int ci_udp_sendmsg_may_send(ci_udp_state* us, int bytes_to_send) static int ci_udp_sendmsg_wait(ci_netif* ni, ci_udp_state* us, - unsigned bytes_to_send, int flags, - struct udp_send_info* sinf) + unsigned bytes_to_send, int flags, struct udp_send_info* sinf) { ci_uint64 start_frc = 0, now_frc = 0; ci_uint64 schedule_frc = 0; @@ -1030,11 +1009,11 @@ static int ci_udp_sendmsg_wait(ci_netif* ni, ci_udp_state* us, if( si_trylock_and_inc(ni, sinf, us->stats.n_tx_lock_poll) ) ci_netif_poll(ni); - no_error: +no_error: while( 1 ) { sleep_seq = us->s.b.sleep_seq.all; ci_rmb(); - if(CI_UNLIKELY( (rc = ci_get_so_error(&us->s)) != 0 || us->s.tx_errno )) + if( CI_UNLIKELY((rc = ci_get_so_error(&us->s)) != 0 || us->s.tx_errno) ) goto so_error; if( ci_udp_sendmsg_may_send(us, bytes_to_send) ) { us->stats.n_tx_poll_avoids_full += first_time; @@ -1043,7 +1022,8 @@ static int ci_udp_sendmsg_wait(ci_netif* ni, ci_udp_state* us, return 0; } if( (flags & MSG_DONTWAIT) || - (us->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK|CI_SB_AFLAG_O_NDELAY)) ) { + (us->s.b.sb_aflags & + (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY)) ) { ++us->stats.n_tx_eagain; return -EAGAIN; } @@ -1073,28 +1053,24 @@ static int ci_udp_sendmsg_wait(ci_netif* ni, ci_udp_state* us, if( ci_netif_need_poll_spinning(ni, now_frc) ) { if( si_trylock(ni, sinf) ) ci_netif_poll(ni); - } - else if( ! ni->state->is_spinner ) + } else if( ! ni->state->is_spinner ) ni->state->is_spinner = 1; } if( sinf->stack_locked ) { ci_netif_unlock(ni); sinf->stack_locked = 0; } - rc = OO_SPINLOOP_PAUSE_CHECK_SIGNALS(ni, now_frc, &schedule_frc, - us->s.so.sndtimeo_msec, - NULL, si); + rc = OO_SPINLOOP_PAUSE_CHECK_SIGNALS( + ni, now_frc, &schedule_frc, us->s.so.sndtimeo_msec, NULL, si); if( rc != 0 ) { ni->state->is_spinner = 0; return rc; } - } - else if( spin_limit_by_so ) { + } else if( spin_limit_by_so ) { ++us->stats.n_tx_eagain; return -EAGAIN; } - } - else { + } else { if( sinf->timeout && udp_send_spin ) { ci_uint32 spin_ms = NI_OPTS(ni).spin_usec >> 10; if( spin_ms < sinf->timeout ) @@ -1106,15 +1082,15 @@ static int ci_udp_sendmsg_wait(ci_netif* ni, ci_udp_state* us, } ++us->stats.n_tx_block; rc = ci_sock_sleep(ni, &us->s.b, CI_SB_FLAG_WAKE_TX, - sinf->stack_locked ? CI_SLEEP_NETIF_LOCKED : 0, - sleep_seq, &sinf->timeout); + sinf->stack_locked ? CI_SLEEP_NETIF_LOCKED : 0, sleep_seq, + &sinf->timeout); sinf->stack_locked = 0; if( rc < 0 ) return rc; } } - so_error: +so_error: if( udp_send_spin ) ni->state->is_spinner = 0; if( rc == 0 ) @@ -1123,10 +1099,10 @@ static int ci_udp_sendmsg_wait(ci_netif* ni, ci_udp_state* us, goto no_error; return rc; } - -ci_inline ci_udp_hdr* udp_init(ci_udp_state* us, ci_ip_pkt_fmt* pkt, - unsigned payload_bytes, bool is_frag) + +ci_inline ci_udp_hdr* udp_init( + ci_udp_state* us, ci_ip_pkt_fmt* pkt, unsigned payload_bytes, bool is_frag) { int af = ipcache_af(&us->s.pkt); ci_udp_hdr* udp = TX_PKT_IPX_UDP(af, pkt, is_frag); @@ -1140,8 +1116,8 @@ ci_inline ci_udp_hdr* udp_init(ci_udp_state* us, ci_ip_pkt_fmt* pkt, /* put in the def. eth hdr, IP hdr then update the address * and IP ID fields. */ -ci_inline ci_ip4_hdr* eth_ip_init(ci_netif* ni, ci_udp_state* us, - ci_ip_pkt_fmt* pkt) +ci_inline ci_ip4_hdr* eth_ip_init( + ci_netif* ni, ci_udp_state* us, ci_ip_pkt_fmt* pkt) { ci_ip4_hdr* ip; @@ -1158,8 +1134,8 @@ ci_inline ci_ip4_hdr* eth_ip_init(ci_netif* ni, ci_udp_state* us, } #if CI_CFG_IPV6 -ci_inline ci_ip6_hdr* eth_ip6_init(ci_netif* ni, ci_udp_state* us, - ci_ip_pkt_fmt* pkt, bool is_frag) +ci_inline ci_ip6_hdr* eth_ip6_init( + ci_netif* ni, ci_udp_state* us, ci_ip_pkt_fmt* pkt, bool is_frag) { ci_ip6_hdr* ip6 = oo_tx_ip6_hdr(pkt); ci_uint8 tclass = ci_ip6_tclass(&us->s.pkt.ipx.ip6); @@ -1176,13 +1152,9 @@ ci_inline ci_ip6_hdr* eth_ip6_init(ci_netif* ni, ci_udp_state* us, * * Returns [bytes_to_send] on success, -errno on failure. */ -static -int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, - ci_iovec_ptr* piov, int bytes_to_send, - int flags, - struct oo_pkt_filler* pf, - struct udp_send_info* sinf, - bool need_frag) +static int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, + ci_iovec_ptr* piov, int bytes_to_send, int flags, struct oo_pkt_filler* pf, + struct udp_send_info* sinf, bool need_frag) { ci_ip_pkt_fmt* first_pkt; ci_ip_pkt_fmt* new_pkt; @@ -1192,13 +1164,14 @@ int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, int pmtu = sinf->ipcache.mtu; int can_block = ! ((NI_OPTS(ni).udp_nonblock_no_pkts_mode) && ((flags & MSG_DONTWAIT) || - (us->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK|CI_SB_AFLAG_O_NDELAY)))); + (us->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | + CI_SB_AFLAG_O_NDELAY)))); int af = ipcache_af(&us->s.pkt); ci_udp_hdr* udp; ci_assert(pmtu > 0); - ci_assert_equiv( need_frag, - bytes_to_send > pmtu - CI_IPX_HDR_SIZE(af) - sizeof(ci_udp_hdr) ); + ci_assert_equiv(need_frag, + bytes_to_send > pmtu - CI_IPX_HDR_SIZE(af) - sizeof(ci_udp_hdr)); frag_off = 0; bytes_left = bytes_to_send; @@ -1210,21 +1183,22 @@ int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, ! sinf->stack_locked ) sinf->stack_locked = ci_netif_trylock(ni); - rc = ci_netif_pkt_alloc_block(ni, &us->s, &sinf->stack_locked, can_block, - &first_pkt); + rc = ci_netif_pkt_alloc_block( + ni, &us->s, &sinf->stack_locked, can_block, &first_pkt); if( rc != 0 ) return rc; oo_tx_pkt_layout_init(first_pkt); /* ID for IPv6 case should only be generated when fragmentation is really * required. */ - if( !IS_AF_INET6(af) || need_frag ) + if( ! IS_AF_INET6(af) || need_frag ) ipx_id = ci_next_ipx_id_be(af, ni); udp = udp_init(us, first_pkt, bytes_to_send, need_frag); oo_pkt_filler_init(pf, first_pkt, (uint8_t*) udp + sizeof(ci_udp_hdr)); - first_pkt->pay_len = ((char*) udp + sizeof(ci_udp_hdr) - PKT_START(first_pkt)); + first_pkt->pay_len = + ((char*) udp + sizeof(ci_udp_hdr) - PKT_START(first_pkt)); oo_pkt_af_set(first_pkt, af); @@ -1232,8 +1206,7 @@ int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, if( payload_bytes >= bytes_left ) { payload_bytes = bytes_left; bytes_left = 0; - } - else { + } else { payload_bytes = UDP_PAYLOAD1_SPACE_PMTU(af, pmtu); bytes_left -= payload_bytes; } @@ -1241,26 +1214,25 @@ int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, while( 1 ) { pf->pkt->pf.udp.tx_length = payload_bytes + sizeof(ci_udp_hdr) + - CI_IPX_HDR_SIZE(af) + sizeof(ci_ether_hdr); + CI_IPX_HDR_SIZE(af) + sizeof(ci_ether_hdr); if( need_frag ) pf->pkt->pf.udp.tx_length += CI_IPX_FRAG_HDR_SIZE(af); #if CI_CFG_IPV6 if( IS_AF_INET6(af) ) { - ci_ip6_hdr *ip6 = eth_ip6_init(ni, us, pf->pkt, need_frag); + ci_ip6_hdr* ip6 = eth_ip6_init(ni, us, pf->pkt, need_frag); ip6->payload_len = frag_bytes; if( need_frag ) { ci_ip6_frag_hdr_init(ci_ip6_data(ip6), IPPROTO_UDP, frag_off, - (bytes_left > 0) ? 1 : 0, ipx_id.ip6); + (bytes_left > 0) ? 1 : 0, ipx_id.ip6); ip6->payload_len += CI_IPX_FRAG_HDR_SIZE(af); } ip6->payload_len = CI_BSWAP_BE16(ip6->payload_len); - } - else + } else #endif { - ci_ip4_hdr *ip = eth_ip_init(ni, us, pf->pkt); + ci_ip4_hdr* ip = eth_ip_init(ni, us, pf->pkt); ip->ip_tot_len_be16 = frag_bytes + sizeof(ci_ip4_hdr); ip->ip_tot_len_be16 = CI_BSWAP_BE16(ip->ip_tot_len_be16); ip->ip_frag_off_be16 = frag_off >> 3u; @@ -1268,8 +1240,8 @@ int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, if( bytes_left > 0 ) ip->ip_frag_off_be16 |= CI_IP4_FRAG_MORE; else if( us->s.s_flags & CI_SOCK_FLAG_ALWAYS_DF || - ( us->s.s_flags & CI_SOCK_FLAG_PMTU_DO && - pf->pkt == first_pkt ) ) { + (us->s.s_flags & CI_SOCK_FLAG_PMTU_DO && + pf->pkt == first_pkt) ) { ip->ip_frag_off_be16 = CI_IP4_FRAG_DONT; } ip->ip_id_be16 = ipx_id.ip4; @@ -1280,8 +1252,8 @@ int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, ci_netif_pkt_hold(ni, pf->pkt); rc = oo_pkt_fill(ni, &us->s, &sinf->stack_locked, can_block, pf, piov, - payload_bytes CI_KERNEL_ARG(CI_ADDR_SPC_CURRENT)); - if( CI_UNLIKELY( rc != 0 ) ) + payload_bytes CI_KERNEL_ARG(CI_ADDR_SPC_CURRENT)); + if( CI_UNLIKELY(rc != 0) ) goto fill_failed; if( bytes_left == 0 ) @@ -1290,9 +1262,9 @@ int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, /* This counts the number of fragments not including the first. */ ++us->stats.n_tx_fragments; - rc = ci_netif_pkt_alloc_block(ni, &us->s, &sinf->stack_locked, - can_block, &new_pkt); - if( CI_UNLIKELY( rc != 0 )) + rc = ci_netif_pkt_alloc_block( + ni, &us->s, &sinf->stack_locked, can_block, &new_pkt); + if( CI_UNLIKELY(rc != 0) ) goto fill_failed; oo_tx_pkt_layout_init(new_pkt); @@ -1317,7 +1289,7 @@ int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, return bytes_to_send; - fill_failed: +fill_failed: if( ! sinf->stack_locked && ci_netif_lock(ni) == 0 ) sinf->stack_locked = 1; @@ -1344,27 +1316,25 @@ int ci_udp_sendmsg_fill(ci_netif* ni, ci_udp_state* us, } } } - pkt_chain_released: +pkt_chain_released: /* Free the packet chain by freeing the first fragment. */ - #ifdef __KERNEL__ - if( ! sinf->stack_locked ) - ci_netif_set_merge_atomic_flag(ni); - ci_netif_pkt_release_mnl(ni, first_pkt, &sinf->stack_locked); - #else - /* ci_netif_lock() can't fail in UL */ - ci_assert(sinf->stack_locked); - ci_netif_pkt_release(ni, first_pkt); - #endif +#ifdef __KERNEL__ + if( ! sinf->stack_locked ) + ci_netif_set_merge_atomic_flag(ni); + ci_netif_pkt_release_mnl(ni, first_pkt, &sinf->stack_locked); +#else + /* ci_netif_lock() can't fail in UL */ + ci_assert(sinf->stack_locked); + ci_netif_pkt_release(ni, first_pkt); +#endif return rc; } -static -void ci_udp_sendmsg_onload(ci_netif* ni, ci_udp_state* us, - const ci_msghdr* msg, int flags, - struct udp_send_info* sinf) +static void ci_udp_sendmsg_onload(ci_netif* ni, ci_udp_state* us, + const ci_msghdr* msg, int flags, struct udp_send_info* sinf) { int rc, i; unsigned long bytes_to_send; @@ -1390,27 +1360,26 @@ void ci_udp_sendmsg_onload(ci_netif* ni, ci_udp_state* us, goto efault; } while( --i >= 0 ); ci_iovec_ptr_init_nz(&piov, msg->msg_iov, msg->msg_iovlen); - } - else { + } else { ci_iovec_ptr_init(&piov, NULL, 0); } - if( bytes_to_send > sinf->ipcache.mtu - CI_IPX_HDR_SIZE(af) - - sizeof(ci_udp_hdr) ) + if( bytes_to_send > + sinf->ipcache.mtu - CI_IPX_HDR_SIZE(af) - sizeof(ci_udp_hdr) ) need_frag = true; /* For now we don't allocate packets in advance, so init to NULL */ pf.alloc_pkt = NULL; - if( ! UDP_HAS_SENDQ_SPACE(us, bytes_to_send) | + if( ! UDP_HAS_SENDQ_SPACE(us, bytes_to_send) | (bytes_to_send > (unsigned long) CI_UDP_MAX_PAYLOAD_BYTES(af)) ) goto no_space_or_too_big; - back_to_fast_path: +back_to_fast_path: was_locked = sinf->stack_locked; if( need_frag && is_sock_flag_always_df_set(&us->s, af) ) { /* We are trying to send too large a datagram with DontFragment bit */ - if( is_sockopt_flag_ip_recverr_set(&us->s, af ) ) { + if( is_sockopt_flag_ip_recverr_set(&us->s, af) ) { /* We have to add an error message to the error queue. * Let OS do it! */ goto send_via_os; @@ -1419,8 +1388,7 @@ void ci_udp_sendmsg_onload(ci_netif* ni, ci_udp_state* us, /* IP_PMTUDISC_DO */ sinf->rc = -EMSGSIZE; return; - } - else + } else #ifndef __KERNEL__ if( msg->msg_namelen == 0 ) #endif @@ -1442,8 +1410,8 @@ void ci_udp_sendmsg_onload(ci_netif* ni, ci_udp_state* us, } /* IP_PMTUDISC_PROBE does not do anything in non-connected case */ } - rc = ci_udp_sendmsg_fill(ni, us, &piov, bytes_to_send, flags, &pf, sinf, - need_frag); + rc = ci_udp_sendmsg_fill( + ni, us, &piov, bytes_to_send, flags, &pf, sinf, need_frag); #if CI_CFG_TIMESTAMPING if( us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_OPT_ID ) { pf.pkt->ts_key = us->s.ts_key; @@ -1452,7 +1420,7 @@ void ci_udp_sendmsg_onload(ci_netif* ni, ci_udp_state* us, #endif if( sinf->stack_locked && ! was_locked ) ++us->stats.n_tx_lock_pkt; - if(CI_LIKELY( rc >= 0 )) { + if( CI_LIKELY(rc >= 0) ) { sinf->rc = bytes_to_send; TX_PKT_SET_DADDR(af, pf.pkt, ipcache_raddr(&sinf->ipcache)); TX_PKT_IPX_UDP(af, pf.pkt, need_frag)->udp_dest_be16 = @@ -1463,23 +1431,21 @@ void ci_udp_sendmsg_onload(ci_netif* ni, ci_udp_state* us, ci_netif_pkt_release(ni, pf.pkt); ci_netif_unlock(ni); sinf->stack_locked = 0; - } - else { + } else { ci_udp_sendmsg_async_q_enqueue(ni, us, pf.pkt, flags); } - } - else { + } else { sinf->rc = rc; } return; /* *********************** */ - efault: +efault: sinf->rc = -EFAULT; return; - send_via_os: +send_via_os: if( sinf->stack_locked ) { ci_netif_unlock(ni); sinf->stack_locked = 0; @@ -1487,7 +1453,7 @@ void ci_udp_sendmsg_onload(ci_netif* ni, ci_udp_state* us, sinf->rc = ci_udp_sendmsg_os(ni, us, msg, flags, 1, 0); return; - no_space_or_too_big: +no_space_or_too_big: /* TODO: If we implement IP options we'll have to calculate * CI_UDP_MAX_PAYLOAD_BYTES depending on them. */ @@ -1498,19 +1464,19 @@ void ci_udp_sendmsg_onload(ci_netif* ni, ci_udp_state* us, /* There may be insufficient room in the sendq. */ rc = ci_udp_sendmsg_wait(ni, us, bytes_to_send, flags, sinf); - if(CI_UNLIKELY( rc != 0 )) { + if( CI_UNLIKELY(rc != 0) ) { sinf->rc = rc; return; } - LOG_UV(ci_log("%s: "NT_FMT"back to fast path", __FUNCTION__, - NT_PRI_ARGS(ni,us))); + LOG_UV(ci_log( + "%s: " NT_FMT "back to fast path", __FUNCTION__, NT_PRI_ARGS(ni, us))); goto back_to_fast_path; } -#if !defined(__KERNEL__) && defined(__i386__) -static int ci_udp_sendmsg_control_os(ci_fd_t fd, ci_udp_state *us, - const struct msghdr* msg, int flags) +#if ! defined(__KERNEL__) && defined(__i386__) +static int ci_udp_sendmsg_control_os( + ci_fd_t fd, ci_udp_state* us, const struct msghdr* msg, int flags) { ci_fd_t os_sock; int rc; @@ -1524,11 +1490,10 @@ static int ci_udp_sendmsg_control_os(ci_fd_t fd, ci_udp_state *us, } #endif -int ci_udp_sendmsg(ci_udp_iomsg_args *a, - const ci_msghdr* msg, int flags) +int ci_udp_sendmsg(ci_udp_iomsg_args* a, const ci_msghdr* msg, int flags) { - ci_netif *ni = a->ni; - ci_udp_state *us = a->us; + ci_netif* ni = a->ni; + ci_udp_state* us = a->us; struct udp_send_info sinf; int rc; @@ -1547,10 +1512,10 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, /* We do not want to re-pack msg_control field or to find out sys_sendmsg32() * syscall when sending from a 32-bit application. So, let the kernel to take * care of it. */ - if(CI_UNLIKELY( msg->msg_controllen != 0 )) + if( CI_UNLIKELY(msg->msg_controllen != 0) ) return ci_udp_sendmsg_control_os(a->fd, us, msg, flags); #else - if(CI_UNLIKELY( CMSG_FIRSTHDR(msg) != NULL )) { + if( CI_UNLIKELY(CMSG_FIRSTHDR(msg) != NULL) ) { void* info = NULL; if( ci_ip_cmsg_send(msg, &info) != 0 || info != NULL ) goto send_via_os; @@ -1558,29 +1523,29 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, #endif #endif - if(CI_UNLIKELY( flags & MSG_MORE )) { + if( CI_UNLIKELY(flags & MSG_MORE) ) { LOG_E(ci_log("%s: MSG_MORE not yet supported", __FUNCTION__)); CI_SET_ERROR(rc, EOPNOTSUPP); return rc; } - if(CI_UNLIKELY( flags & MSG_OOB )) + if( CI_UNLIKELY(flags & MSG_OOB) ) /* This returns an error, so very unlikely! */ goto send_via_os; - if(CI_UNLIKELY( us->s.so_error | us->s.tx_errno )) + if( CI_UNLIKELY(us->s.so_error | us->s.tx_errno) ) goto so_error; - no_error: +no_error: if( ! NI_OPTS(ni).udp_send_unlocked ) { -# ifndef __KERNEL__ +#ifndef __KERNEL__ ci_netif_lock(ni); -# else +#else if( (rc = ci_netif_lock(ni)) < 0 ) { rc = -ERESTARTSYS; goto error; } -# endif +#endif sinf.stack_locked = 1; } @@ -1613,14 +1578,13 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, /* ?? TODO: put some code here to avoid conditional branch forward on * fast path. */ - } - else { + } else { /* In the case of a control plane change and stack lock contention we * may use old info here. Worst case is that we'll send via OS when * we could have accelerated (and that can only happen if the control * plane change affected this connection). */ - if(CI_UNLIKELY( ! oo_cp_ipcache_is_valid(ni, &us->s.pkt) )) { + if( CI_UNLIKELY(! oo_cp_ipcache_is_valid(ni, &us->s.pkt)) ) { if( si_trylock_and_inc(ni, &sinf, us->stats.n_tx_lock_cp) ) { ++us->stats.n_tx_cp_c_lookup; cicp_user_retrieve(ni, &us->s.pkt, &us->s.cp); @@ -1634,11 +1598,10 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, sinf.ipcache.mtu = us->s.pkt.mtu; } #ifndef __KERNEL__ - else if(CI_UNLIKELY( msg->msg_name == NULL )) { + else if( CI_UNLIKELY(msg->msg_name == NULL) ) { rc = -EFAULT; goto error; - } - else { + } else { /********************************************************************** * Unconnected send -- dest IP and port provided. */ @@ -1649,8 +1612,7 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, if( msg->msg_name != NULL && msg_namelen_ok(af, msg->msg_namelen) && (! CI_CFG_FAKE_IPV6 || us->s.domain == AF_INET) && af == AF_INET ) { /* Fast check -- we're okay. */ - } - else if( ! ci_udp_name_is_ok(af, us, msg) ) + } else if( ! ci_udp_name_is_ok(af, us, msg) ) /* Fast check and more detailed check failed. */ goto send_via_os; @@ -1658,8 +1620,8 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, #if CI_CFG_IPV6 if( CI_IPX_IS_LINKLOCAL(pkt_daddr) && - ci_sock_set_ip6_scope_id(ni, &us->s, CI_SA(msg->msg_name), - msg->msg_namelen, 1) ) + ci_sock_set_ip6_scope_id( + ni, &us->s, CI_SA(msg->msg_name), msg->msg_namelen, 1) ) goto send_via_os; ci_udp_ipcache_convert(CI_ADDR_AF(pkt_daddr), us); sinf.ipcache.ether_type = us->s.pkt.ether_type; @@ -1672,7 +1634,7 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, goto send_via_os; #ifndef __KERNEL__ - if(CI_UNLIKELY( udp_lport_be16(us) == 0 )) { + if( CI_UNLIKELY(udp_lport_be16(us) == 0) ) { /* We haven't yet allocated a local port. Do it now. */ if( sinf.stack_locked ) ci_netif_unlock(ni); @@ -1682,14 +1644,12 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, } #endif - reuse_ipcache = (sinf.ipcache.dport_be16 == - us->ephemeral_pkt.dport_be16) && - CI_IPX_ADDR_EQ(pkt_daddr, - ipcache_raddr(&us->ephemeral_pkt)); + reuse_ipcache = + (sinf.ipcache.dport_be16 == us->ephemeral_pkt.dport_be16) && + CI_IPX_ADDR_EQ(pkt_daddr, ipcache_raddr(&us->ephemeral_pkt)); if( ! reuse_ipcache ) - us->udpflags &=~ CI_UDPF_LAST_SEND_NOMAC; - if( reuse_ipcache && - oo_cp_ipcache_is_valid(ni, &us->ephemeral_pkt) ) { + us->udpflags &= ~CI_UDPF_LAST_SEND_NOMAC; + if( reuse_ipcache && oo_cp_ipcache_is_valid(ni, &us->ephemeral_pkt) ) { /* Looks like [us->ephemeral_pkt] has up-to-date info for this * destination, so go with it. This is racey if another thread is * sending on the same socket concurrently (and happens to be @@ -1705,14 +1665,13 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, goto send_via_os; sinf.ipcache.mtu = us->ephemeral_pkt.mtu; ++us->stats.n_tx_cp_match; - } - else if( si_trylock_and_inc(ni, &sinf, us->stats.n_tx_lock_cp) ) { - if( !reuse_ipcache ) { + } else if( si_trylock_and_inc(ni, &sinf, us->stats.n_tx_lock_cp) ) { + if( ! reuse_ipcache ) { ci_ipcache_set_daddr(&us->ephemeral_pkt, ipcache_raddr(&sinf.ipcache)); us->ephemeral_pkt.dport_be16 = sinf.ipcache.dport_be16; ci_ip_cache_invalidate(&us->ephemeral_pkt); } - if(CI_UNLIKELY( ! oo_cp_ipcache_is_valid(ni, &us->ephemeral_pkt) )) { + if( CI_UNLIKELY(! oo_cp_ipcache_is_valid(ni, &us->ephemeral_pkt)) ) { ++us->stats.n_tx_cp_uc_lookup; cicp_user_retrieve(ni, &us->ephemeral_pkt, &us->s.cp); if( reuse_ipcache ) @@ -1722,8 +1681,7 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, us->ephemeral_pkt.status != retrrc_nomac ) goto send_via_os; sinf.ipcache.mtu = us->ephemeral_pkt.mtu; - } - else { + } else { /* Need control plane lookup and could not grab stack lock; so do * lookup with temporary ipcache [sinf.ipcache]. */ @@ -1744,21 +1702,21 @@ int ci_udp_sendmsg(ci_udp_iomsg_args *a, if( sinf.stack_locked ) ci_netif_unlock(ni); if( sinf.rc < 0 ) - CI_SET_ERROR(sinf.rc, -sinf.rc); + CI_SET_ERROR(sinf.rc, -sinf.rc); return sinf.rc; - so_error: +so_error: if( (rc = -ci_get_so_error(&us->s)) == 0 && (rc = -us->s.tx_errno) == 0 ) goto no_error; goto error; - error: +error: if( sinf.stack_locked ) ci_netif_unlock(ni); CI_SET_ERROR(rc, -rc); return rc; - send_via_os: +send_via_os: if( sinf.stack_locked ) ci_netif_unlock(ni); rc = ci_udp_sendmsg_os(ni, us, msg, flags, 1, 0); diff --git a/src/lib/transport/unix/dpdk.c b/src/lib/transport/unix/dpdk.c new file mode 100644 index 000000000..43362a10c --- /dev/null +++ b/src/lib/transport/unix/dpdk.c @@ -0,0 +1,38 @@ +#include +#include "internal.h" + +static char *__dpdk_eal_argv[] = { "-l", "1", "--proc-type=secondary", + "--log-level", "0" }; + +/* These values should be kept in sync with the argv list above. The last two + * should always be the log level so that we can conditionally enable or + * disable verbose logging by simply changing the number of args passed to + * dpdk*/ +static const int __dpdk_eal_argc = 5; + +int dpdk_cleanup(void) +{ + rte_eal_cleanup(); + return 0; +} + +int dpdk_init(void) +{ + int ret; + int argc = __dpdk_eal_argc; + Log_V(ci_log("Enabling Verbose DPDK initialization logging"); argc -= 2); + ret = rte_eal_init(argc, __dpdk_eal_argv); + if( ret < 0 ) { + LOG_U(ci_log("Unable to intialize DPDK")); + return -1; + } + + ret = rte_eal_primary_proc_alive(NULL); + if( ret == 0 ) { + LOG_U(ci_log("DPDK Primary process is not alive")); + dpdk_cleanup(); + return -2; + } + + return 0; +} diff --git a/src/lib/transport/unix/fdtable.c b/src/lib/transport/unix/fdtable.c index 0cd4fd12f..a2b21d63a 100644 --- a/src/lib/transport/unix/fdtable.c +++ b/src/lib/transport/unix/fdtable.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr/ctk ** \brief Table mapping [fd]s to userlevel state. @@ -10,7 +10,7 @@ ** *//* \**************************************************************************/ - + /*! \cidoxg_lib_transport_unix */ #include "internal.h" @@ -32,20 +32,20 @@ #include /* FIXME Yes, it is ugly. But we do not have any appropriate header */ -#define CI_ID_POOL_ID_NONE ((unsigned)(-1)) +#define CI_ID_POOL_ID_NONE ((unsigned) (-1)) #define DEBUGPREINIT(x) -citp_fdtable_globals citp_fdtable; +citp_fdtable_globals citp_fdtable; /* Initial seqno should differ from the seqno in special fdi, such as * citp_the_closed_fd */ ci_uint64 fdtable_seq_no = 1; -static void dup2_complete(citp_fdinfo* prev_newfdi, - citp_fdinfo_p prev_newfdip, int fdt_locked); +static void dup2_complete( + citp_fdinfo* prev_newfdi, citp_fdinfo_p prev_newfdip, int fdt_locked); static void exit_with_status(int status) @@ -54,15 +54,15 @@ static void exit_with_status(int status) if( WIFEXITED(status) ) ci_sys__exit(WEXITSTATUS(status)); else if( WIFSIGNALED(status) ) - ci_sys_syscall(__NR_tgkill, getpid(), ci_sys_syscall(__NR_gettid), - WTERMSIG(status)); + ci_sys_syscall( + __NR_tgkill, getpid(), ci_sys_syscall(__NR_gettid), WTERMSIG(status)); return; } void oo_signal_terminate(int signum) { - struct sigaction act = { }; + struct sigaction act = {}; Log_CALL(ci_log("%s(%d)", __func__, signum)); @@ -78,7 +78,8 @@ void oo_signal_terminate(int signum) static void sighandler_sigonload(int sig, siginfo_t* info, void* context) { - /* The signal was sent solely in order to wake up the app, so nothing to do */ + /* The signal was sent solely in order to wake up the app, so nothing to do + */ } /* Hook to be called at gracious exit */ @@ -98,8 +99,8 @@ void oo_exit_hook(int status) do { old_status = exit_status; - } while( ci_cas32u_fail(&exit_status, old_status, - status | OO_EXIT_STATUS_SET) ); + } while( + ci_cas32u_fail(&exit_status, old_status, status | OO_EXIT_STATUS_SET) ); if( old_status != 0 ) { if( status != 0 ) { @@ -108,8 +109,7 @@ void oo_exit_hook(int status) */ exit_with_status(status); return; - } - else { + } else { /* This hook have already been called, from either _exit() or signal. * Now we are in _fini(): return. */ @@ -140,24 +140,22 @@ static citp_fdinfo_p citp_fdtable_closing_wait(unsigned fd, int fdt_locked); #ifdef __x86_64__ #if __GNUC__ >= 6 -__attribute__((force_align_arg_pointer)) -static long oo_close_nocancel_entry(long fd) +__attribute__((force_align_arg_pointer)) static long oo_close_nocancel_entry( + long fd) #else extern long oo_close_nocancel_entry(long fd); __asm__( - ".globl oo_close_nocancel_entry;" - "oo_close_nocancel_entry:" + ".globl oo_close_nocancel_entry;" + "oo_close_nocancel_entry:" "push %rbp;" "mov %rsp,%rbp;" "and $0xfffffffffffffff0,%rsp;" "call close_nocancel_entry_fixed;" "mov %rbp,%rsp;" "pop %rbp;" - "ret;" -); + "ret;"); -__attribute__((used)) -static long close_nocancel_entry_fixed(long fd) +__attribute__((used)) static long close_nocancel_entry_fixed(long fd) #endif #else static long oo_close_nocancel_entry(long fd) @@ -179,7 +177,7 @@ static long oo_close_nocancel_entry(long fd) Log_CALL(ci_log("%s: close_nocancel(%ld)", __func__, fd)); citp_enter_lib(&lib_context); - rc = citp_ep_close((int)fd); + rc = citp_ep_close((int) fd); citp_exit_lib(&lib_context, false); Log_CALL_RESULT(rc); return rc; @@ -190,7 +188,7 @@ static long oo_close_nocancel_entry(long fd) static void aarch64_write_ptr_insns(void* dst, const void* value) { unsigned* u = dst; - uintptr_t v = (uintptr_t)value; + uintptr_t v = (uintptr_t) value; u[0] |= ((v >> 0) & 0xffff) << 5; u[1] |= ((v >> 16) & 0xffff) << 5; u[2] |= ((v >> 32) & 0xffff) << 5; @@ -208,8 +206,8 @@ static int modify_glibc_code(void* dst, const void* src, size_t n) /* This patching is thread-unsafe, but happens at process startup when * there's only one thread */ patch_page_start = CI_PTR_ALIGN_BACK(dst, CI_PAGE_SIZE); - patch_page_size = (char*)CI_PTR_ALIGN_FWD((char*)dst + n, CI_PAGE_SIZE) - - (char*)patch_page_start; + patch_page_size = (char*) CI_PTR_ALIGN_FWD((char*) dst + n, CI_PAGE_SIZE) - + (char*) patch_page_start; rc = mprotect(patch_page_start, patch_page_size, PROT_READ | PROT_WRITE); if( rc != 0 ) { rc = -errno; @@ -220,23 +218,25 @@ static int modify_glibc_code(void* dst, const void* src, size_t n) rc = mprotect(patch_page_start, patch_page_size, PROT_READ | PROT_EXEC); if( rc != 0 ) { rc = -errno; - ci_log("CRITICAL: mprotect(glibc exec) = %d. " - "Process will likely crash now", errno); + ci_log( + "CRITICAL: mprotect(glibc exec) = %d. " + "Process will likely crash now", + errno); return rc; } return 0; } #ifdef __x86_64__ -static const unsigned char x64_endbr[] = {0xf3, 0x0f, 0x1e, 0xfa}; -static const unsigned char x64_nop[] = {0x90}; +static const unsigned char x64_endbr[] = { 0xf3, 0x0f, 0x1e, 0xfa }; +static const unsigned char x64_nop[] = { 0x90 }; /* Returns the length of the given instruction, if it's one of the * instructions that we expect to find in the implementation of libc's * _IO_file_close. Currently this is just some movs */ static int is_io_file_close_insn(const unsigned char* insn) { - if( insn[0] == 0x8b ) { /* mov r,r/m */ + if( insn[0] == 0x8b ) { /* mov r,r/m */ bool has_sib = (insn[1] & 7) == 4 && insn[1] < 0xc0; bool has_disp8 = (insn[1] >> 6) == 1; bool has_disp32 = (insn[1] >> 6) == 2 || (insn[1] & 0xc7) == 0x04; @@ -273,9 +273,9 @@ static void* find_close_nocancel(void) io_file_close += sizeof(x64_nop); while( (n = is_io_file_close_insn(io_file_close)) != 0 ) io_file_close += n; - if( *io_file_close != 0xe9 ) /* jmp rel32 */ + if( *io_file_close != 0xe9 ) /* jmp rel32 */ return NULL; - return io_file_close + 5 + *(uint32_t*)(io_file_close + 1); + return io_file_close + 5 + *(uint32_t*) (io_file_close + 1); } #else /* We do not do extra searching on aarch64, since we don't support old glibc @@ -296,17 +296,17 @@ static int patch_libc_close_nocancel(void) #ifdef __x86_64__ { static const unsigned char sysclose[] = { - 0xb8, 0x03, 0x00, 0x00, 0x00, /* mov $3, %eax */ - 0x0f, 0x05 /* syscall */ + 0xb8, 0x03, 0x00, 0x00, 0x00, /* mov $3, %eax */ + 0x0f, 0x05 /* syscall */ }; static const unsigned char call_rax[] = { - 0xff, 0xd0 /* call *%rax */ + 0xff, 0xd0 /* call *%rax */ }; static const unsigned char trampo_code[] = { - 0xf3, 0x0f, 0x1e, 0xfa, /* endbr64 */ - 0x48, 0xb8, 0xef, 0xcd, 0xab, 0x89, 0x67, - 0x45, 0x23, 0x01, /* movabs $0x123456789abcdef,%rax */ - 0xff, 0xe0, /* jmpq *%rax */ + 0xf3, 0x0f, 0x1e, 0xfa, /* endbr64 */ + 0x48, 0xb8, 0xef, 0xcd, 0xab, 0x89, 0x67, 0x45, 0x23, + 0x01, /* movabs $0x123456789abcdef,%rax */ + 0xff, 0xe0, /* jmpq *%rax */ }; unsigned char new_glibc_bytes[6]; unsigned char* trampoline; @@ -328,13 +328,13 @@ static int patch_libc_close_nocancel(void) return -ESRCH; } trampoline = mmap(NULL, sizeof(trampo_code), PROT_READ | PROT_WRITE, - MAP_32BIT | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MAP_32BIT | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if( trampoline == MAP_FAILED ) { rc = -errno; LOG_S(ci_log("__close_nocancel mmap failed: %d", errno)); return rc; } - ci_assert_le((uintptr_t)trampoline, 0xffffffff); + ci_assert_le((uintptr_t) trampoline, 0xffffffff); memcpy(trampoline, trampo_code, sizeof(trampo_code)); memcpy(trampoline + 6, &target, sizeof(void*)); rc = mprotect(trampoline, CI_PAGE_SIZE, PROT_READ | PROT_EXEC); @@ -346,35 +346,35 @@ static int patch_libc_close_nocancel(void) memcpy(new_glibc_bytes, &trampoline, 4); memcpy(new_glibc_bytes + 4, call_rax, sizeof(call_rax)); - return modify_glibc_code(close_nocancel + 1, new_glibc_bytes, - sizeof(new_glibc_bytes)); + return modify_glibc_code( + close_nocancel + 1, new_glibc_bytes, sizeof(new_glibc_bytes)); } #elif defined __aarch64__ { static const unsigned expected[] = { - 0x93407c00, /* sxtw x0, w0 */ - 0xd2800728, /* mov x8, #57 */ - 0xd4000001, /* svc #0 */ + 0x93407c00, /* sxtw x0, w0 */ + 0xd2800728, /* mov x8, #57 */ + 0xd4000001, /* svc #0 */ }; unsigned replacement[] = { - 0xd2a00008, /* mov x8, #0xnnnn0000 */ - 0xf2c00008, /* movk x8, #0xnnnn, lsl #32 */ - 0xd61f0100, /* br x8 */ + 0xd2a00008, /* mov x8, #0xnnnn0000 */ + 0xf2c00008, /* movk x8, #0xnnnn, lsl #32 */ + 0xd61f0100, /* br x8 */ }; static const unsigned trampo_code[] = { - 0xa9bf7bfd, /* stp x29, x30, [sp, #-16]! */ - 0x910003fd, /* mov x29, sp */ - 0xd2800008, /* mov x8, #0xnnnn */ - 0xf2a00008, /* movk x8, #0xnnnn, lsl #16 */ - 0xf2c00008, /* movk x8, #0xnnnn, lsl #32 */ - 0xf2e00008, /* movk x8, #0xnnnn, lsl #48 */ - 0xd63f0100, /* blr x8 */ - 0xa8c17bfd, /* ldp x29, x30, [sp], #16 */ - 0xd2800008, /* mov x8, #0xnnnn */ - 0xf2a00008, /* movk x8, #0xnnnn, lsl #16 */ - 0xf2c00008, /* movk x8, #0xnnnn, lsl #32 */ - 0xf2e00008, /* movk x8, #0xnnnn, lsl #48 */ - 0xd61f0100, /* br x8 */ + 0xa9bf7bfd, /* stp x29, x30, [sp, #-16]! */ + 0x910003fd, /* mov x29, sp */ + 0xd2800008, /* mov x8, #0xnnnn */ + 0xf2a00008, /* movk x8, #0xnnnn, lsl #16 */ + 0xf2c00008, /* movk x8, #0xnnnn, lsl #32 */ + 0xf2e00008, /* movk x8, #0xnnnn, lsl #48 */ + 0xd63f0100, /* blr x8 */ + 0xa8c17bfd, /* ldp x29, x30, [sp], #16 */ + 0xd2800008, /* mov x8, #0xnnnn */ + 0xf2a00008, /* movk x8, #0xnnnn, lsl #16 */ + 0xf2c00008, /* movk x8, #0xnnnn, lsl #32 */ + 0xf2e00008, /* movk x8, #0xnnnn, lsl #48 */ + 0xd61f0100, /* br x8 */ }; void* trampo_area; unsigned* trampoline; @@ -389,22 +389,22 @@ static int patch_libc_close_nocancel(void) * and map only the useful page. AArch64 has only a 48-bit virtual address * space, so we get a pointer we can fit in to two mov instructions. */ trampo_area = mmap(NULL, 65536, PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); if( trampo_area == MAP_FAILED ) { rc = -errno; LOG_S(ci_log("__close_nocancel reservation failed: %d", errno)); return rc; } - trampoline = (unsigned*)CI_PTR_ALIGN_FWD(trampo_area, 65536); + trampoline = (unsigned*) CI_PTR_ALIGN_FWD(trampo_area, 65536); if( mmap(trampoline, CI_PAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) != trampoline ) { + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) != trampoline ) { rc = -errno; LOG_S(ci_log("__close_nocancel mmap failed: %d", errno)); return rc; } memcpy(trampoline, trampo_code, sizeof(trampo_code)); aarch64_write_ptr_insns(trampoline + 2, oo_close_nocancel_entry); - aarch64_write_ptr_insns(trampoline + 8, (unsigned*)close_nocancel + 3); + aarch64_write_ptr_insns(trampoline + 8, (unsigned*) close_nocancel + 3); rc = mprotect(trampoline, CI_PAGE_SIZE, PROT_READ | PROT_EXEC); if( rc != 0 ) { rc = -errno; @@ -412,10 +412,10 @@ static int patch_libc_close_nocancel(void) return rc; } - ci_assert_equal((uintptr_t)trampoline & 0xffff, 0); - ci_assert_equal((uintptr_t)trampoline >> 48, 0); - replacement[0] |= (((uintptr_t)trampoline >> 16) & 0xffff) << 5; - replacement[1] |= (((uintptr_t)trampoline >> 32) & 0xffff) << 5; + ci_assert_equal((uintptr_t) trampoline & 0xffff, 0); + ci_assert_equal((uintptr_t) trampoline >> 48, 0); + replacement[0] |= (((uintptr_t) trampoline >> 16) & 0xffff) << 5; + replacement[1] |= (((uintptr_t) trampoline >> 32) & 0xffff) << 5; return modify_glibc_code(close_nocancel, replacement, sizeof(replacement)); } #else @@ -437,11 +437,11 @@ int citp_fdtable_ctor() Log_S(log("%s:", __FUNCTION__)); - /* How big should our fdtable be by default? It's pretty arbitrary, but we have - * seen a few apps that use setrlimit to set the fdtable to 4096 entries on - * start-up (see bugs 3253 and 3373), so we choose that. (Note: we can't grow - * the table if the app later does setrlimit, and unused entries consume virtual - * space only, so it's worth allocating a table of reasonable sized.) + /* How big should our fdtable be by default? It's pretty arbitrary, but we + * have seen a few apps that use setrlimit to set the fdtable to 4096 entries + * on start-up (see bugs 3253 and 3373), so we choose that. (Note: we can't + * grow the table if the app later does setrlimit, and unused entries consume + * virtual space only, so it's worth allocating a table of reasonable sized.) */ citp_fdtable.size = 4096; @@ -449,34 +449,33 @@ int citp_fdtable_ctor() citp_fdtable.size = rlim.rlim_max; if( CITP_OPTS.fdtable_size != 0 && CITP_OPTS.fdtable_size != rlim.rlim_max ) { - Log_S(ci_log("Set the limits for the number of opened files " - "to EF_FDTABLE_SIZE=%u value.", - CITP_OPTS.fdtable_size)); + Log_S( + ci_log("Set the limits for the number of opened files " + "to EF_FDTABLE_SIZE=%u value.", + CITP_OPTS.fdtable_size)); rlim.rlim_max = CITP_OPTS.fdtable_size; if( rlim.rlim_cur > rlim.rlim_max ) rlim.rlim_cur = rlim.rlim_max; if( ci_sys_setrlimit(RLIMIT_NOFILE, &rlim) == 0 ) - citp_fdtable.size = rlim.rlim_max; + citp_fdtable.size = rlim.rlim_max; else { /* Most probably, we've got EPERM */ ci_assert_lt(citp_fdtable.size, CITP_OPTS.fdtable_size); ci_log("Can't set EF_FDTABLE_SIZE=%u; using %u", - CITP_OPTS.fdtable_size, citp_fdtable.size); + CITP_OPTS.fdtable_size, citp_fdtable.size); rlim.rlim_max = rlim.rlim_cur = citp_fdtable.size; CI_TRY(ci_sys_setrlimit(RLIMIT_NOFILE, &rlim)); } } - } - else + } else Log_S(ci_log("Assume EF_FDTABLE_SIZE=%u", citp_fdtable.size)); citp_fdtable.inited_count = 0; - citp_fdtable.table = malloc(sizeof (citp_fdtable_entry) * - citp_fdtable.size); + citp_fdtable.table = malloc(sizeof(citp_fdtable_entry) * citp_fdtable.size); if( ! citp_fdtable.table ) { Log_U(log("%s: failed to allocate fdtable (0x%x)", __FUNCTION__, - citp_fdtable.size)); + citp_fdtable.size)); return -1; } @@ -496,7 +495,8 @@ int citp_fdtable_ctor() rc = patch_libc_close_nocancel(); if( rc < 0 ) { - Log_E(log("%s: Didn't intercept libc internal close %d", __FUNCTION__, rc)); + Log_E( + log("%s: Didn't intercept libc internal close %d", __FUNCTION__, rc)); /* Which is bad, but not fatal */ } @@ -509,17 +509,17 @@ int citp_fdtable_ctor() } -#if !defined (NDEBUG) || CI_CFG_FDTABLE_CHECKS +#if ! defined(NDEBUG) || CI_CFG_FDTABLE_CHECKS /* This function does some simple tests to ensure that the fdtable makes sense. * There are many more tests we could do; feel free to add them at your * leisure! */ -void -citp_fdtable_assert_valid(void) +void citp_fdtable_assert_valid(void) { int i; - if( ! citp_fdtable.table ) return; + if( ! citp_fdtable.table ) + return; CITP_FDTABLE_LOCK_RD(); @@ -527,26 +527,26 @@ citp_fdtable_assert_valid(void) citp_fdinfo_p fdip = citp_fdtable.table[i].fdip; if( fdip_is_normal(fdip) ) { - citp_fdinfo * fdi = fdip_to_fdi(fdip); + citp_fdinfo* fdi = fdip_to_fdi(fdip); ci_assert(fdi); ci_assert(fdi->protocol); - if( ( fdi->protocol->type == CITP_TCP_SOCKET || - fdi->protocol->type == CITP_UDP_SOCKET ) - && fdi_to_socket(fdi)->s ) - ci_assert(! (fdi_to_socket(fdi)->s->b.sb_aflags & CI_SB_AFLAG_ORPHAN)); + if( (fdi->protocol->type == CITP_TCP_SOCKET || + fdi->protocol->type == CITP_UDP_SOCKET) && + fdi_to_socket(fdi)->s ) + ci_assert(! (fdi_to_socket(fdi)->s->b.sb_aflags & CI_SB_AFLAG_ORPHAN)); - if (!fdi->is_special) { + if( ! fdi->is_special ) { /* Ensure the "back pointer" makes sense */ - ci_assert (fdi->fd == i); + ci_assert(fdi->fd == i); /* Ensure that the reference count is in a vaguely sensible range */ - ci_assert ((oo_atomic_read (&fdi->ref_count) > 0) && - (oo_atomic_read (&fdi->ref_count) < 10000)); + ci_assert((oo_atomic_read(&fdi->ref_count) > 0) && + (oo_atomic_read(&fdi->ref_count) < 10000)); /* 10,000 threads is a bit mad, warn if more than 20 */ - if (oo_atomic_read (&fdi->ref_count) > 20) { - Log_U (log ("Warning: fd %d's ref-count suspiciously large (%d)\n", - i, oo_atomic_read (&fdi->ref_count))); + if( oo_atomic_read(&fdi->ref_count) > 20 ) { + Log_U(log("Warning: fd %d's ref-count suspiciously large (%d)\n", i, + oo_atomic_read(&fdi->ref_count))); } } } @@ -557,19 +557,21 @@ citp_fdtable_assert_valid(void) #endif -static void fdtable_swap(unsigned fd, citp_fdinfo_p from, - citp_fdinfo_p to, int fdt_locked) +static void fdtable_swap( + unsigned fd, citp_fdinfo_p from, citp_fdinfo_p to, int fdt_locked) { volatile citp_fdinfo_p* p_fdip; citp_fdinfo_p fdip; p_fdip = &citp_fdtable.table[fd].fdip; - again: +again: fdip = *p_fdip; - if( fdip_is_busy(fdip) ) fdip = citp_fdtable_busy_wait(fd, fdt_locked); + if( fdip_is_busy(fdip) ) + fdip = citp_fdtable_busy_wait(fd, fdt_locked); ci_assert_equal(fdip, from); - if( fdip_cas_fail(p_fdip, from, to) ) goto again; + if( fdip_cas_fail(p_fdip, from, to) ) + goto again; } /* If this is called with OO_IOC_TCP_HANDOVER the stack lock must be held */ @@ -597,9 +599,8 @@ static int fdtable_fd_move(ci_fd_t sock_fd, int op) return rc; } -static int -citp_fdtable_probe_restore(int fd, ci_ep_info_t * info, int print_banner, - citp_fdinfo_p* fdip_out) +static int citp_fdtable_probe_restore( + int fd, ci_ep_info_t* info, int print_banner, citp_fdinfo_p* fdip_out) { citp_protocol_impl* proto = 0; citp_fdinfo* fdi = 0; @@ -613,25 +614,30 @@ citp_fdtable_probe_restore(int fd, ci_ep_info_t * info, int print_banner, /* Will need to review this function if the following assert fires */ switch( info->fd_flags & OO_FDFLAG_EP_MASK ) { - case OO_FDFLAG_EP_TCP: proto = &citp_tcp_protocol_impl; break; - case OO_FDFLAG_EP_UDP: proto = &citp_udp_protocol_impl; break; - case OO_FDFLAG_EP_PASSTHROUGH: - proto = &citp_passthrough_protocol_impl; - c_sock_fdi = 0; - break; - case OO_FDFLAG_EP_ALIEN: - proto = NULL; - c_sock_fdi = 0; - break; - case OO_FDFLAG_EP_PIPE_READ: - proto = &citp_pipe_read_protocol_impl; - c_sock_fdi = 0; - break; - case OO_FDFLAG_EP_PIPE_WRITE: - proto = &citp_pipe_write_protocol_impl; - c_sock_fdi = 0; - break; - default: ci_assert(0); + case OO_FDFLAG_EP_TCP: + proto = &citp_tcp_protocol_impl; + break; + case OO_FDFLAG_EP_UDP: + proto = &citp_udp_protocol_impl; + break; + case OO_FDFLAG_EP_PASSTHROUGH: + proto = &citp_passthrough_protocol_impl; + c_sock_fdi = 0; + break; + case OO_FDFLAG_EP_ALIEN: + proto = NULL; + c_sock_fdi = 0; + break; + case OO_FDFLAG_EP_PIPE_READ: + proto = &citp_pipe_read_protocol_impl; + c_sock_fdi = 0; + break; + case OO_FDFLAG_EP_PIPE_WRITE: + proto = &citp_pipe_write_protocol_impl; + c_sock_fdi = 0; + break; + default: + ci_assert(0); } /* Attempt to find the user-level netif for this endpoint */ @@ -641,17 +647,16 @@ citp_fdtable_probe_restore(int fd, ci_ep_info_t * info, int print_banner, /* Not found, rebuild/restore the netif for this endpoint */ rc = citp_netif_recreate_probed(fd, &netif_fd, &ni); - if ( rc < 0 ) { - Log_E(log("%s: citp_netif_recreate_probed failed! (%d)", - __FUNCTION__, rc)); + if( rc < 0 ) { + Log_E(log( + "%s: citp_netif_recreate_probed failed! (%d)", __FUNCTION__, rc)); goto fail; } if( print_banner ) { ci_netif_log_startup_banner(ni, "Importing"); } - } - else + } else citp_netif_add_ref(ni); /* There is a race condition where the fd can have been created, but it has @@ -667,7 +672,7 @@ citp_fdtable_probe_restore(int fd, ci_ep_info_t * info, int print_banner, return rc; } - if (c_sock_fdi) { + if( c_sock_fdi ) { citp_sock_fdi* sock_fdi; sock_fdi = CI_ALLOC_OBJ(citp_sock_fdi); @@ -680,14 +685,13 @@ citp_fdtable_probe_restore(int fd, ci_ep_info_t * info, int print_banner, sock_fdi->sock.s = SP_TO_SOCK_CMN(ni, info->sock_id); sock_fdi->sock.netif = ni; - } - else if( info->fd_flags & OO_FDFLAG_EP_PASSTHROUGH ) { + } else if( info->fd_flags & OO_FDFLAG_EP_PASSTHROUGH ) { citp_waitable* w = SP_TO_WAITABLE(ni, info->sock_id); citp_alien_fdi* alien_fdi; if( ~w->sb_aflags & CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL && fdtable_fd_move(fd, OO_IOC_FILE_MOVED) == 0 ) { citp_netif_release_ref(ni, 1); - *fdip_out = fdip_passthru; + *fdip_out = fdip_passthru; return rc; } @@ -756,11 +760,12 @@ citp_fdtable_probe_restore(int fd, ci_ep_info_t * info, int print_banner, /* We're returning a reference to the caller. */ citp_fdinfo_ref(fdi); citp_fdtable_insert(fdi, fd, 1); - *fdip_out = fdi_to_fdip(fdi); + *fdip_out = fdi_to_fdip(fdi); return rc; - - fail: - if( ni ) citp_netif_release_ref(ni, 1); + +fail: + if( ni ) + citp_netif_release_ref(ni, 1); *fdip_out = fdip_unknown; return rc; } @@ -769,9 +774,8 @@ citp_fdtable_probe_restore(int fd, ci_ep_info_t * info, int print_banner, /* Find out what sort of thing [fd] is, and if it is a user-level socket * then map in the user-level state. */ -static int -citp_fdtable_probe_locked(unsigned fd, int print_banner, - int fdip_is_already_busy, citp_fdinfo** fdi_out) +static int citp_fdtable_probe_locked(unsigned fd, int print_banner, + int fdip_is_already_busy, citp_fdinfo** fdi_out) { citp_fdinfo* fdi = NULL; struct stat64 st; @@ -785,14 +789,17 @@ citp_fdtable_probe_locked(unsigned fd, int print_banner, ** this keeps it cleaner. May optimise down the line when I understand ** what other code needs to call this. */ - + p_fdip = &citp_fdtable.table[fd].fdip; - again: + again: fdip = *p_fdip; - if( fdip_is_busy(fdip) ) fdip = citp_fdtable_busy_wait(fd, 1); - if( ! fdip_is_unknown(fdip) && ! fdip_is_normal(fdip) ) goto exit; - if( fdip_cas_fail(p_fdip, fdip, fdip_busy) ) goto again; - + if( fdip_is_busy(fdip) ) + fdip = citp_fdtable_busy_wait(fd, 1); + if( ! fdip_is_unknown(fdip) && ! fdip_is_normal(fdip) ) + goto exit; + if( fdip_cas_fail(p_fdip, fdip, fdip_busy) ) + goto again; + if( fdip_is_normal(fdip) ) { fdi = fdip_to_fdi(fdip); citp_fdinfo_ref(fdi); @@ -814,61 +821,62 @@ citp_fdtable_probe_locked(unsigned fd, int print_banner, /* oo_get_st_rdev() and oo_onloadfs_dev_t() open-and-close fd, so * fdtable should be locked if strict mode requested. */ - if( fdtable_strict() ) { CITP_FDTABLE_ASSERT_LOCKED(1); } + if( fdtable_strict() ) { + CITP_FDTABLE_ASSERT_LOCKED(1); + } - if( st.st_dev == oo_onloadfs_dev_t() ) { + if( st.st_dev == oo_onloadfs_dev_t() ) { /* Retrieve user-level endpoint info */ if( oo_ep_info(fd, &info) < 0 ) { - Log_V(log("%s: fd=%d unknown type "OO_FDFLAG_FMT, - __FUNCTION__, fd, OO_FDFLAG_ARG(info.fd_flags))); + Log_V(log("%s: fd=%d unknown type " OO_FDFLAG_FMT, __FUNCTION__, fd, + OO_FDFLAG_ARG(info.fd_flags))); citp_fdtable_busy_clear(fd, fdip_passthru, 1); goto exit; } switch( info.fd_flags & (OO_FDFLAG_EP_MASK | OO_FDFLAG_STACK) ) { - case OO_FDFLAG_EP_TCP: - case OO_FDFLAG_EP_UDP: - case OO_FDFLAG_EP_PASSTHROUGH: - case OO_FDFLAG_EP_ALIEN: - case OO_FDFLAG_EP_PIPE_READ: - case OO_FDFLAG_EP_PIPE_WRITE: - { - citp_fdinfo_p fdip; - - Log_V(log("%s: fd=%d restore type "OO_FDFLAG_FMT, __FUNCTION__, fd, - OO_FDFLAG_ARG(info.fd_flags))); - rc = citp_fdtable_probe_restore(fd, &info, print_banner, &fdip); - if( fdip_is_normal(fdip) ) - fdi = fdip_to_fdi(fdip); - else - citp_fdtable_busy_clear(fd, fdip, 1); - goto exit; - } - - case OO_FDFLAG_STACK: - /* This should never happen, because netif fds are close-on-exec. - ** But let's leave this code here just in case my reasoning is bad. - */ - Log_U(log("%s: fd=%d NETIF reserved", __FUNCTION__, fd)); - citp_fdtable_busy_clear(fd, fdip_reserved, 1); - fdi = &citp_the_reserved_fd; - citp_fdinfo_ref(fdi); - goto exit; + case OO_FDFLAG_EP_TCP: + case OO_FDFLAG_EP_UDP: + case OO_FDFLAG_EP_PASSTHROUGH: + case OO_FDFLAG_EP_ALIEN: + case OO_FDFLAG_EP_PIPE_READ: + case OO_FDFLAG_EP_PIPE_WRITE: { + citp_fdinfo_p fdip; + + Log_V(log("%s: fd=%d restore type " OO_FDFLAG_FMT, __FUNCTION__, fd, + OO_FDFLAG_ARG(info.fd_flags))); + rc = citp_fdtable_probe_restore(fd, &info, print_banner, &fdip); + if( fdip_is_normal(fdip) ) + fdi = fdip_to_fdi(fdip); + else + citp_fdtable_busy_clear(fd, fdip, 1); + goto exit; + } - default: - /* This happens if a thread gets at an onload driver fd that has just - * been created, but not yet specialised. On Linux I think this - * means it will shortly be a new netif internal fd. (fds associated - * with sockets and pipes are never unspecialised). - */ - Log_V(log("%s: fd=%d TYPE_NONE", __FUNCTION__, fd)); - citp_fdtable_busy_clear(fd, fdip_passthru, 1); - goto exit; + case OO_FDFLAG_STACK: + /* This should never happen, because netif fds are close-on-exec. + ** But let's leave this code here just in case my reasoning is bad. + */ + Log_U(log("%s: fd=%d NETIF reserved", __FUNCTION__, fd)); + citp_fdtable_busy_clear(fd, fdip_reserved, 1); + fdi = &citp_the_reserved_fd; + citp_fdinfo_ref(fdi); + goto exit; + + default: + /* This happens if a thread gets at an onload driver fd that has just + * been created, but not yet specialised. On Linux I think this + * means it will shortly be a new netif internal fd. (fds associated + * with sockets and pipes are never unspecialised). + */ + Log_V(log("%s: fd=%d TYPE_NONE", __FUNCTION__, fd)); + citp_fdtable_busy_clear(fd, fdip_passthru, 1); + goto exit; } } #if CI_CFG_EPOLL2 else if( ci_major(st.st_rdev) == ci_major(oo_get_st_rdev(OO_EPOLL_DEV)) ) { - citp_epollb_fdi *epi = CI_ALLOC_OBJ(citp_epollb_fdi); + citp_epollb_fdi* epi = CI_ALLOC_OBJ(citp_epollb_fdi); if( ! epi ) { Log_E(log("%s: out of memory (epoll_fdi)", __FUNCTION__)); citp_fdtable_busy_clear(fd, fdip_passthru, 1); @@ -894,13 +902,12 @@ citp_fdtable_probe_locked(unsigned fd, int print_banner, Log_V(log("%s: fd=%u non-efab", __FUNCTION__, fd)); citp_fdtable_busy_clear(fd, fdip_passthru, 1); - exit: +exit: *fdi_out = fdi; return rc; } -static citp_fdinfo * -citp_fdtable_probe(unsigned fd) +static citp_fdinfo* citp_fdtable_probe(unsigned fd) { citp_fdinfo* fdi; int saved_errno; @@ -919,19 +926,18 @@ citp_fdtable_probe(unsigned fd) return fdi; } -static int -citp_fdinfo_is_consistent(citp_fdinfo* fdi) +static int citp_fdinfo_is_consistent(citp_fdinfo* fdi) { switch( fdi->protocol->type ) { - case CITP_TCP_SOCKET: - case CITP_UDP_SOCKET: - return ~fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags & CI_SB_AFLAG_MOVED_AWAY; + case CITP_TCP_SOCKET: + case CITP_UDP_SOCKET: + return ~fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags & + CI_SB_AFLAG_MOVED_AWAY; } return CI_TRUE; } -citp_fdinfo * -citp_fdtable_lookup(unsigned fd) +citp_fdinfo* citp_fdtable_lookup(unsigned fd) { /* Note that if we haven't yet initialised this module, then ** [inited_count] will be zero, and the following test will fail. So the @@ -946,7 +952,6 @@ citp_fdtable_lookup(unsigned fd) ci_assert(oo_per_thread_get()->sig.c.inside_lib); if( fd < citp_fdtable.inited_count ) { - volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip; citp_fdinfo_p fdip; @@ -956,49 +961,48 @@ citp_fdtable_lookup(unsigned fd) if( fdip_is_normal(fdip) ) { if( citp_fdtable_not_mt_safe() ) { - if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) { - fdi = fdip_to_fdi(fdip); - ci_assert(fdi); - ci_assert_gt(oo_atomic_read(&fdi->ref_count), 0); - ci_assert(fdip_is_closing(fdip) || fdip_is_reserved(fdip) || - fdi->fd == fd); - /* Bump the reference count. */ - citp_fdinfo_ref(fdi); + if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) { + fdi = fdip_to_fdi(fdip); + ci_assert(fdi); + ci_assert_gt(oo_atomic_read(&fdi->ref_count), 0); + ci_assert(fdip_is_closing(fdip) || fdip_is_reserved(fdip) || + fdi->fd == fd); + /* Bump the reference count. */ + citp_fdinfo_ref(fdi); if( ! citp_fdinfo_is_consistent(fdi) ) { /* Something is wrong. Re-probe. */ fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_TRUE); - } - else { + } else { /* Swap the busy marker out again. */ citp_fdtable_busy_clear(fd, fdip, 0); } - return fdi; - } - goto again; - } - else { - /* No need to use atomic ops when single-threaded. The definition + return fdi; + } + goto again; + } else { + /* No need to use atomic ops when single-threaded. The definition * of "fds_mt_safe" is that the app does not change the meaning of * a file descriptor in one thread when it is being used in another * thread. In that case I'm hoping this should be safe, but at * time of writing I'm really not confident. (FIXME). */ - fdi = fdip_to_fdi(fdip); + fdi = fdip_to_fdi(fdip); if( ci_is_multithreaded() ) - citp_fdinfo_ref(fdi); + citp_fdinfo_ref(fdi); else ++fdi->ref_count.n; if( ! citp_fdinfo_is_consistent(fdi) ) fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_FALSE); - return fdi; + return fdi; } } /* Not normal! */ - if( fdip_is_passthru(fdip) ) return NULL; + if( fdip_is_passthru(fdip) ) + return NULL; if( fdip_is_busy(fdip) ) { citp_fdtable_busy_wait(fd, 0); @@ -1009,24 +1013,24 @@ citp_fdtable_lookup(unsigned fd) goto probe; } - if (citp.init_level < CITP_INIT_FDTABLE) { - if (_citp_do_init_inprogress == 0) + if( citp.init_level < CITP_INIT_FDTABLE ) { + if( _citp_do_init_inprogress == 0 ) CI_TRY(citp_do_init(CITP_INIT_MAX)); else CI_TRY(citp_do_init(CITP_INIT_FDTABLE)); /* get what we need */ } - if( fd >= citp_fdtable.size ) return NULL; + if( fd >= citp_fdtable.size ) + return NULL; - probe: +probe: fdi = citp_fdtable_probe(fd); return fdi; } -citp_fdinfo* -citp_fdtable_lookup_fast(citp_lib_context_t* ctx, unsigned fd) +citp_fdinfo* citp_fdtable_lookup_fast(citp_lib_context_t* ctx, unsigned fd) { /* Note that if we haven't yet initialised this module, then ** [inited_count] will be zero, and the following test will fail. So the @@ -1040,38 +1044,36 @@ citp_fdtable_lookup_fast(citp_lib_context_t* ctx, unsigned fd) /* Try to avoid entering lib. */ ctx->thread = NULL; - if(CI_LIKELY( fd < citp_fdtable.inited_count )) { + if( CI_LIKELY(fd < citp_fdtable.inited_count) ) { volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip; citp_fdinfo_p fdip; again: fdip = *p_fdip; - if(CI_LIKELY( fdip_is_normal(fdip) )) { - + if( CI_LIKELY(fdip_is_normal(fdip)) ) { citp_enter_lib_if(ctx); if( citp_fdtable_is_mt_safe() ) { - /* No need to use atomic ops or add a ref to the fdi when MT-safe. + /* No need to use atomic ops or add a ref to the fdi when MT-safe. * The definition of "fds_mt_safe" is that the app does not change * the meaning of a file descriptor in one thread when it is being * used in another thread. */ fdi = fdip_to_fdi(fdip); - if(CI_UNLIKELY( ! citp_fdinfo_is_consistent(fdi) )) + if( CI_UNLIKELY(! citp_fdinfo_is_consistent(fdi)) ) fdi = citp_reprobe_moved(fdi, CI_TRUE, CI_FALSE); - return fdi; - } - else { + return fdi; + } else { /* Swap in the busy marker. */ - if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) { - fdi = fdip_to_fdi(fdip); + if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) { + fdi = fdip_to_fdi(fdip); - ci_assert(fdi); - ci_assert_gt(oo_atomic_read(&fdi->ref_count), 0); - ci_assert(fdip_is_closing(fdip) || fdip_is_reserved(fdip) || - fdi->fd == fd); - /* Bump the reference count. */ - citp_fdinfo_ref(fdi); + ci_assert(fdi); + ci_assert_gt(oo_atomic_read(&fdi->ref_count), 0); + ci_assert(fdip_is_closing(fdip) || fdip_is_reserved(fdip) || + fdi->fd == fd); + /* Bump the reference count. */ + citp_fdinfo_ref(fdi); if( ! citp_fdinfo_is_consistent(fdi) ) fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_TRUE); @@ -1079,9 +1081,9 @@ citp_fdtable_lookup_fast(citp_lib_context_t* ctx, unsigned fd) /* Swap the busy marker out again. */ citp_fdtable_busy_clear(fd, fdip, 0); } - return fdi; - } - goto again; + return fdi; + } + goto again; } } @@ -1109,7 +1111,7 @@ citp_fdtable_lookup_fast(citp_lib_context_t* ctx, unsigned fd) if( fd >= citp_fdtable.size ) return NULL; - probe: +probe: citp_enter_lib_if(ctx); fdi = citp_fdtable_probe(fd); if( fdi && citp_fdtable_is_mt_safe() ) @@ -1132,7 +1134,7 @@ citp_fdinfo* citp_fdtable_lookup_noprobe(unsigned fd, int fdt_locked) ** fdtable lock, and on fail see if we need to initialise it. */ if( CI_UNLIKELY(citp.init_level < CITP_INIT_FDTABLE) ) { - if (_citp_do_init_inprogress == 0) + if( _citp_do_init_inprogress == 0 ) CI_TRY(citp_do_init(CITP_INIT_MAX)); else CI_TRY(citp_do_init(CITP_INIT_FDTABLE)); /* get what we need */ @@ -1141,7 +1143,6 @@ citp_fdinfo* citp_fdtable_lookup_noprobe(unsigned fd, int fdt_locked) } if( fd < citp_fdtable.inited_count ) { - volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip; citp_fdinfo_p fdip; @@ -1150,11 +1151,11 @@ citp_fdinfo* citp_fdtable_lookup_noprobe(unsigned fd, int fdt_locked) fdip = *p_fdip; if( fdip_is_normal(fdip) ) { if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) { - /* Bump the reference count. */ - citp_fdinfo* fdi = fdip_to_fdi(fdip); - citp_fdinfo_ref(fdi); - /* Swap the busy marker out again. */ - citp_fdtable_busy_clear(fd, fdip, fdt_locked); + /* Bump the reference count. */ + citp_fdinfo* fdi = fdip_to_fdi(fdip); + citp_fdinfo_ref(fdi); + /* Swap the busy marker out again. */ + citp_fdtable_busy_clear(fd, fdip, fdt_locked); return fdi; } goto again; @@ -1164,7 +1165,6 @@ citp_fdinfo* citp_fdtable_lookup_noprobe(unsigned fd, int fdt_locked) citp_fdtable_busy_wait(fd, fdt_locked); goto again; } - } return NULL; @@ -1177,8 +1177,7 @@ static ci_netif* fd_to_netif(int fd, int fdt_locked) if( oo_ep_info(fd, &info) < 0 ) { Log_V(log("%s: fd=%d unknown", __FUNCTION__, fd)); - } - else { + } else { ni = citp_find_ul_netif(info.resource_id, fdt_locked); } @@ -1201,13 +1200,13 @@ static void citp_fdinfo_do_handover(citp_fdinfo* fdi, int fdt_locked) Log_V(ci_log("%s: fd=%d nonb_switch=%d", __FUNCTION__, fdi->fd, - fdi->on_rcz.handover_nonb_switch)); + fdi->on_rcz.handover_nonb_switch)); epoll_fdi = citp_epoll_fdi_from_member(fdi, fdt_locked); #if CI_CFG_EPOLL2 if( fdi->epoll_fd >= 0 && epoll_fdi != NULL && epoll_fdi->protocol->type == CITP_EPOLLB_FD ) { - citp_epollb_on_handover(epoll_fdi, fdi); + citp_epollb_on_handover(epoll_fdi, fdi); } #endif @@ -1223,7 +1222,7 @@ static void citp_fdinfo_do_handover(citp_fdinfo* fdi, int fdt_locked) ci_assert(ni); ci_netif_lock(ni); /* Remove SO_LINGER flag from the old ep: we want to close it silently */ - sock->s_flags &=~ CI_SOCK_FLAG_LINGER; + sock->s_flags &= ~CI_SOCK_FLAG_LINGER; citp_waitable_cleanup(ni, SOCK_TO_WAITABLE_OBJ(sock), 0); rc = fdtable_fd_move(fdi->fd, OO_IOC_TCP_HANDOVER); ci_netif_unlock(ni); @@ -1238,15 +1237,17 @@ static void citp_fdinfo_do_handover(citp_fdinfo* fdi, int fdt_locked) } if( rc != 0 ) { citp_fdinfo* new_fdi; - if( ! fdt_locked ) CITP_FDTABLE_LOCK(); + if( ! fdt_locked ) + CITP_FDTABLE_LOCK(); citp_fdtable_probe_locked(fdi->fd, CI_TRUE, CI_TRUE, &new_fdi); citp_fdinfo_release_ref(new_fdi, 1); - if( ! fdt_locked ) CITP_FDTABLE_UNLOCK(); + if( ! fdt_locked ) + CITP_FDTABLE_UNLOCK(); ci_assert_equal(citp_fdinfo_get_type(new_fdi), CITP_PASSTHROUGH_FD); os_fd = fdi_to_alien_fdi(new_fdi)->os_socket; } if( fdi->on_rcz.handover_nonb_switch >= 0 ) { - int on_off = !! fdi->on_rcz.handover_nonb_switch; + int on_off = ! ! fdi->on_rcz.handover_nonb_switch; int rc = ci_sys_ioctl(os_fd, FIONBIO, &on_off); if( rc < 0 ) Log_E(ci_log("%s: ioctl failed on_off=%d", __FUNCTION__, on_off)); @@ -1258,8 +1259,7 @@ static void citp_fdinfo_do_handover(citp_fdinfo* fdi, int fdt_locked) citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked); if( epoll_fdi != NULL && epoll_fdi->protocol->type == CITP_EPOLL_FD ) { citp_epoll_on_handover(epoll_fdi, fdi, fdt_locked); - } - else { + } else { if( epoll_fdi != NULL ) citp_fdinfo_release_ref(epoll_fdi, fdt_locked); citp_fdinfo_free(fdi); @@ -1268,23 +1268,23 @@ static void citp_fdinfo_do_handover(citp_fdinfo* fdi, int fdt_locked) #if CI_CFG_FD_CACHING /* Closes a cached fd. In the typical case, this boils down to sys_close. */ -static int uncache_fd_ul(ci_netif* ni, ci_tcp_state* ts, int cur_tgid, int quiet) +static int uncache_fd_ul( + ci_netif* ni, ci_tcp_state* ts, int cur_tgid, int quiet) { - int fd = ts->cached_on_fd; + int fd = ts->cached_on_fd; int pid = ts->cached_on_pid; - Log_V(ci_log("Uncaching fd %d on pid %d running pid %d", fd, - pid, cur_tgid)); + Log_V(ci_log("Uncaching fd %d on pid %d running pid %d", fd, pid, cur_tgid)); /* No tasklets or other bottom-halves - we always have "current" */ if( ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE && - !(ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE_NO_FD) ) { + ! (ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE_NO_FD) ) { if( pid != cur_tgid ) { if( quiet ) return -1; Log_V(ci_log("%s: file cached on unexpected PID %d , expected %d", - __FUNCTION__, pid, cur_tgid)); + __FUNCTION__, pid, cur_tgid)); return -1; } - S_TO_EPS(ni,ts)->fd = CI_FD_BAD; + S_TO_EPS(ni, ts)->fd = CI_FD_BAD; /* simply close kernel FD, it should not affect endpoint at all */ ci_tcp_helper_close_no_trampoline(fd); CITP_STATS_NETIF_INC(ni, epoll_fd_uncache); @@ -1293,8 +1293,8 @@ static int uncache_fd_ul(ci_netif* ni, ci_tcp_state* ts, int cur_tgid, int quiet } -static void -__citp_uncache_fds_ul(ci_netif* netif, struct oo_p_dllink_state list) +static void __citp_uncache_fds_ul( + ci_netif* netif, struct oo_p_dllink_state list) { int cur_tgid = getpid(); ci_tcp_state** eps; @@ -1307,7 +1307,8 @@ __citp_uncache_fds_ul(ci_netif* netif, struct oo_p_dllink_state list) struct oo_p_dllink_state l; eps = malloc(sizeof(ci_tcp_state*) * n_ep_bufs); - oo_p_dllink_for_each(netif, l, list) { + oo_p_dllink_for_each(netif, l, list) + { if( n >= n_ep_bufs ) { ci_log("%s: ep %d with n_ep_bufs %d", __FUNCTION__, n, n_ep_bufs); break; @@ -1316,7 +1317,7 @@ __citp_uncache_fds_ul(ci_netif* netif, struct oo_p_dllink_state list) ci_assert(ts); ci_assert(ci_tcp_is_cached(ts)); if( ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE && - !(ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE_NO_FD) && + ! (ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE_NO_FD) && ts->cached_on_pid == cur_tgid ) eps[n++] = ts; } @@ -1325,18 +1326,17 @@ __citp_uncache_fds_ul(ci_netif* netif, struct oo_p_dllink_state list) { int i; for( i = 0; i < n; ++i ) - uncache_fd_ul(netif, eps[i], cur_tgid, 1/*quiet*/); + uncache_fd_ul(netif, eps[i], cur_tgid, 1 /*quiet*/); } free(eps); } -void -citp_uncache_fds_ul(ci_netif* netif) +void citp_uncache_fds_ul(ci_netif* netif) { { int i; - for( i = 0; i < netif->state->n_ep_bufs; ++i) { + for( i = 0; i < netif->state->n_ep_bufs; ++i ) { int fd = netif->eps[i].fd; if( fd != CI_FD_BAD ) ci_tcp_helper_close_no_trampoline(fd); @@ -1345,17 +1345,17 @@ citp_uncache_fds_ul(ci_netif* netif) return; if( netif->cached_count == 0 ) return; - Log_V(ci_log("%s: %d: %s: cached_count %d", __func__, - getpid(), netif->state->pretty_name, netif->cached_count)); + Log_V(ci_log("%s: %d: %s: cached_count %d", __func__, getpid(), + netif->state->pretty_name, netif->cached_count)); /* Remove all fds from the cache that belong to the current process */ - __citp_uncache_fds_ul(netif, oo_p_dllink_ptr(netif, - &netif->state->passive_scalable_cache.cache)); - __citp_uncache_fds_ul(netif, oo_p_dllink_ptr(netif, - &netif->state->passive_scalable_cache.pending)); - __citp_uncache_fds_ul(netif, oo_p_dllink_ptr(netif, - &netif->state->active_cache.cache)); - __citp_uncache_fds_ul(netif, oo_p_dllink_ptr(netif, - &netif->state->active_cache.pending)); + __citp_uncache_fds_ul(netif, + oo_p_dllink_ptr(netif, &netif->state->passive_scalable_cache.cache)); + __citp_uncache_fds_ul(netif, + oo_p_dllink_ptr(netif, &netif->state->passive_scalable_cache.pending)); + __citp_uncache_fds_ul( + netif, oo_p_dllink_ptr(netif, &netif->state->active_cache.cache)); + __citp_uncache_fds_ul( + netif, oo_p_dllink_ptr(netif, &netif->state->active_cache.pending)); } #endif @@ -1380,8 +1380,8 @@ void __citp_fdinfo_ref_count_zero(citp_fdinfo* fdi, int fdt_locked) #if CI_CFG_FD_CACHING int cached; #endif - Log_V(log("%s: fd=%d on_rcz=%d", __FUNCTION__, fdi->fd, - fdi->on_ref_count_zero)); + Log_V(log( + "%s: fd=%d on_rcz=%d", __FUNCTION__, fdi->fd, fdi->on_ref_count_zero)); citp_fdinfo_assert_valid(fdi); ci_assert(oo_atomic_read(&fdi->ref_count) == 0); @@ -1390,80 +1390,85 @@ void __citp_fdinfo_ref_count_zero(citp_fdinfo* fdi, int fdt_locked) ci_assert_nequal(fdi_to_fdip(fdi), citp_fdtable.table[fdi->fd].fdip); switch( fdi->on_ref_count_zero ) { - case FDI_ON_RCZ_CLOSE: + case FDI_ON_RCZ_CLOSE: #if CI_CFG_FD_CACHING - cached = citp_fdinfo_get_ops(fdi)->cache(fdi); - if( cached == 1 ) { - if( ! fdt_locked && fdtable_strict() ) CITP_FDTABLE_LOCK(); - fdi_to_socket(fdi)->netif->cached_count++; - fdtable_swap(fdi->fd, fdip_closing, fdip_unknown, - fdt_locked | fdtable_strict()); - citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked | fdtable_strict()); - if( ! fdt_locked && fdtable_strict() ) CITP_FDTABLE_UNLOCK(); - citp_fdinfo_free(fdi); - break; - } + cached = citp_fdinfo_get_ops(fdi)->cache(fdi); + if( cached == 1 ) { + if( ! fdt_locked && fdtable_strict() ) + CITP_FDTABLE_LOCK(); + fdi_to_socket(fdi)->netif->cached_count++; + fdtable_swap(fdi->fd, fdip_closing, fdip_unknown, + fdt_locked | fdtable_strict()); + citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked | fdtable_strict()); + if( ! fdt_locked && fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); + citp_fdinfo_free(fdi); + break; + } #endif - { + { #if CI_CFG_UL_INTERRUPT_HELPER - ci_netif* netif = fdi_to_stack(fdi); + ci_netif* netif = fdi_to_stack(fdi); #endif - /* We mark the fd as busy before closing it to avoid races. This means - * that if this fd is looked up during this phase of the close the looker - * upper will have to wait. - * - * There are problems if we try and keep this safe just by swapping - * the unknown and closing fdi entries. If we set to unknown before - * close that could result in things being re-probed in the gap between - * setting to uknown and actually closing the fd. If we close before - * setting to unknown then the fd could be re-used by the kernel - * without onload seeing it, and lookups would still return the closing - * fdi until the unknown entry had been swapped in. - */ - if( ! fdt_locked && fdtable_strict() ) CITP_FDTABLE_LOCK(); + /* We mark the fd as busy before closing it to avoid races. This means + * that if this fd is looked up during this phase of the close the + * looker upper will have to wait. + * + * There are problems if we try and keep this safe just by swapping + * the unknown and closing fdi entries. If we set to unknown before + * close that could result in things being re-probed in the gap between + * setting to uknown and actually closing the fd. If we close before + * setting to unknown then the fd could be re-used by the kernel + * without onload seeing it, and lookups would still return the closing + * fdi until the unknown entry had been swapped in. + */ + if( ! fdt_locked && fdtable_strict() ) + CITP_FDTABLE_LOCK(); - fdtable_swap(fdi->fd, fdip_closing, fdip_busy, - fdt_locked | fdtable_strict()); - if( fdi->protocol->type == CITP_TCP_SOCKET ) - SC_TO_EPS(fdi_to_socket(fdi)->netif,fdi_to_socket(fdi)->s)->fd = CI_FD_BAD; + fdtable_swap( + fdi->fd, fdip_closing, fdip_busy, fdt_locked | fdtable_strict()); + if( fdi->protocol->type == CITP_TCP_SOCKET ) + SC_TO_EPS(fdi_to_socket(fdi)->netif, fdi_to_socket(fdi)->s)->fd = + CI_FD_BAD; - if( fdi->on_ref_count_zero == FDI_ON_RCZ_CLOSE ) - ci_tcp_helper_close_no_trampoline(fdi->fd); + if( fdi->on_ref_count_zero == FDI_ON_RCZ_CLOSE ) + ci_tcp_helper_close_no_trampoline(fdi->fd); #if CI_CFG_UL_INTERRUPT_HELPER - /* If it was the last fd for this socket, then we should proceed with - * the real closing right now. - * Todo: In case of SO_LINGER it is really important to handle it all - * here. - */ - if( netif != NULL && ci_netif_trylock(netif) ) { - ci_netif_handle_actions(netif); - ci_netif_unlock(netif); - } + /* If it was the last fd for this socket, then we should proceed with + * the real closing right now. + * Todo: In case of SO_LINGER it is really important to handle it all + * here. + */ + if( netif != NULL && ci_netif_trylock(netif) ) { + ci_netif_handle_actions(netif); + ci_netif_unlock(netif); + } #endif - citp_fdtable_busy_clear(fdi->fd, fdip_unknown, - fdt_locked | fdtable_strict()); - citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked | fdtable_strict()); - if( ! fdt_locked && fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + citp_fdtable_busy_clear( + fdi->fd, fdip_unknown, fdt_locked | fdtable_strict()); + citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked | fdtable_strict()); + if( ! fdt_locked && fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); + citp_fdinfo_free(fdi); + break; + } + case FDI_ON_RCZ_DUP2: + dup2_complete(fdi, fdi_to_fdip(fdi), fdt_locked); + break; + case FDI_ON_RCZ_HANDOVER: + citp_fdinfo_do_handover(fdi, fdt_locked); + break; + case FDI_ON_RCZ_MOVED: + citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked); citp_fdinfo_free(fdi); break; - } - case FDI_ON_RCZ_DUP2: - dup2_complete(fdi, fdi_to_fdip(fdi), fdt_locked); - break; - case FDI_ON_RCZ_HANDOVER: - citp_fdinfo_do_handover(fdi, fdt_locked); - break; - case FDI_ON_RCZ_MOVED: - citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked); - citp_fdinfo_free(fdi); - break; - default: - CI_DEBUG(ci_log("%s: fd=%d on_ref_count_zero=%d", __FUNCTION__, - fdi->fd, fdi->on_ref_count_zero)); - ci_assert(0); + default: + CI_DEBUG(ci_log("%s: fd=%d on_ref_count_zero=%d", __FUNCTION__, fdi->fd, + fdi->on_ref_count_zero)); + ci_assert(0); } } @@ -1483,21 +1488,23 @@ void citp_fdinfo_handover(citp_fdinfo* fdi, int nonb_switch) citp_fdinfo_p fdip; unsigned fd = fdi->fd; + LOG_U(ci_log("handing FD over: %d", fdi->fd)); + /* We're about to free some user-level state, so we need to interlock ** against select and poll. */ CITP_FDTABLE_LOCK(); p_fdip = &citp_fdtable.table[fd].fdip; - again: +again: fdip = *p_fdip; - if( fdip_is_busy(fdip) ) fdip = citp_fdtable_busy_wait(fd, 1); + if( fdip_is_busy(fdip) ) + fdip = citp_fdtable_busy_wait(fd, 1); if( fdip == fdi_to_fdip(fdi) ) { if( fdip_cas_fail(p_fdip, fdip, fdip_busy) ) goto again; - } - else { + } else { /* [fd] must have changed meaning under our feet. It must be closing, ** so do nothing except drop the ref passed in. */ @@ -1531,14 +1538,14 @@ void citp_fdtable_fork_hook(void) { unsigned fd; - for (fd = 0; fd < citp_fdtable.inited_count; fd++) { + for( fd = 0; fd < citp_fdtable.inited_count; fd++ ) { citp_fdinfo_p fdip = citp_fdtable.table[fd].fdip; /* Parent has forked when one of its threads had made an fdtable * entry busy. Here in the child no-one will clear the busy state. * We can't do any better than just clearing back to the unknown * state. */ - if (fdip_is_busy(fdip)) { + if( fdip_is_busy(fdip) ) { citp_fdtable.table[fd].fdip = fdip_unknown; continue; } @@ -1546,17 +1553,19 @@ void citp_fdtable_fork_hook(void) } -citp_fdinfo_p -citp_fdtable_new_fd_set(unsigned fd, citp_fdinfo_p new_fdip, int fdt_locked) +citp_fdinfo_p citp_fdtable_new_fd_set( + unsigned fd, citp_fdinfo_p new_fdip, int fdt_locked) { volatile citp_fdinfo_p* p_fdip; citp_fdinfo_p prev; if( fd >= citp_fdtable.inited_count ) { ci_assert_lt(fd, citp_fdtable.size); - if( ! fdt_locked ) CITP_FDTABLE_LOCK(); + if( ! fdt_locked ) + CITP_FDTABLE_LOCK(); __citp_fdtable_extend(fd); - if( ! fdt_locked ) CITP_FDTABLE_UNLOCK(); + if( ! fdt_locked ) + CITP_FDTABLE_UNLOCK(); } p_fdip = &citp_fdtable.table[fd].fdip; @@ -1575,17 +1584,16 @@ citp_fdtable_new_fd_set(unsigned fd, citp_fdinfo_p new_fdip, int fdt_locked) /* Reserved? Perhaps it was a netif fd that has just been closed. So it ** should be about to be unreserved. */ - } while (fdip_is_reserved(prev) || fdip_cas_fail(p_fdip, prev, new_fdip) ); + } while( fdip_is_reserved(prev) || fdip_cas_fail(p_fdip, prev, new_fdip) ); if( fdip_is_normal(prev) ) { /* We can get here is close-trampolining fails. So for release ** builds we accept that the user-level state got out-of-sync, and ** leak [fdi] since it seems like a suitably cautious thing to do. */ - ci_log("%s: ERROR: Orphaned entry %d in user-level fd-table", - __FUNCTION__, fd); - } - else + ci_log("%s: ERROR: Orphaned entry %d in user-level fd-table", __FUNCTION__, + fd); + } else /* We (at time of writing) only register a trampoline handler when we ** create a netif, so we can miss the closing of pass-through ** descriptors. @@ -1610,8 +1618,8 @@ void citp_fdtable_insert(citp_fdinfo* fdi, unsigned fd, int fdt_locked) } -void __citp_fdtable_busy_clear_slow(unsigned fd, citp_fdinfo_p new_fdip, - int fdt_locked) +void __citp_fdtable_busy_clear_slow( + unsigned fd, citp_fdinfo_p new_fdip, int fdt_locked) { volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip; citp_fdinfo_p fdip, next; @@ -1623,24 +1631,28 @@ void __citp_fdtable_busy_clear_slow(unsigned fd, citp_fdinfo_p new_fdip, /* We need to write-lock citp_ul_lock to avoid races between * this oo_rwlock_cond_broadcast() and oo_rwlock_cond_wait() below. */ - if( !fdt_locked ) + if( ! fdt_locked ) CITP_FDTABLE_LOCK(); - again: +again: fdip = *p_fdip; ci_assert(fdip_is_busy(fdip)); waiter = fdip_to_waiter(fdip); ci_assert(waiter); ci_assert(fdip_is_busy(waiter->next)); - if( waiter->next == fdip_busy ) next = new_fdip; - else next = waiter->next; - if( fdip_cas_fail(p_fdip, fdip, next) ) goto again; + if( waiter->next == fdip_busy ) + next = new_fdip; + else + next = waiter->next; + if( fdip_cas_fail(p_fdip, fdip, next) ) + goto again; oo_rwlock_cond_broadcast(&waiter->cond); - if( next != new_fdip ) goto again; + if( next != new_fdip ) + goto again; - if( !fdt_locked ) + if( ! fdt_locked ) CITP_FDTABLE_UNLOCK(); } @@ -1659,10 +1671,10 @@ citp_fdinfo_p citp_fdtable_busy_wait(unsigned fd, int fdt_locked) /* We should lock citp_ul_lock before checking the condition which can * lead to oo_rwlock_cond_wait() call. */ - if( !fdt_locked ) + if( ! fdt_locked ) CITP_FDTABLE_LOCK(); - again: +again: waiter.next = *p_fdip; if( fdip_is_busy(waiter.next) ) { /* we can replace one "busy" fdip by another without fdtable lock */ @@ -1671,7 +1683,7 @@ citp_fdinfo_p citp_fdtable_busy_wait(unsigned fd, int fdt_locked) goto again; } - if( !fdt_locked ) + if( ! fdt_locked ) CITP_FDTABLE_UNLOCK(); oo_rwlock_cond_destroy(&waiter.cond); @@ -1692,9 +1704,10 @@ static citp_fdinfo_p citp_fdtable_closing_wait(unsigned fd, int fdt_locked) Log_V(ci_log("%s: fd=%u", __FUNCTION__, fd)); - again: +again: fdip = *p_fdip; - if( fdip_is_busy(fdip) ) fdip = citp_fdtable_busy_wait(fd, fdt_locked); + if( fdip_is_busy(fdip) ) + fdip = citp_fdtable_busy_wait(fd, fdt_locked); if( fdip_is_closing(fdip) ) { if( fdt_locked ) { /* Need to drop the lock to avoid deadlock with the other thread @@ -1714,8 +1727,10 @@ void __citp_fdtable_reserve(int fd, int protect) CITP_FDTABLE_ASSERT_LOCKED(1); ci_assert_lt((unsigned) fd, citp_fdtable.size); - if( protect ) citp_fdtable_new_fd_set(fd, fdip_reserved, 1); - else fdtable_swap(fd, fdip_reserved, fdip_unknown, 1); + if( protect ) + citp_fdtable_new_fd_set(fd, fdip_reserved, 1); + else + fdtable_swap(fd, fdip_reserved, fdip_unknown, 1); } @@ -1743,8 +1758,7 @@ int citp_ep_dup_fcntl_dup_cloexec(int oldfd, long arg) ** Why do these live here? Because they need to hack into the low-level ** dirty nastiness of the fdtable. */ -int citp_ep_dup(unsigned oldfd, int (*syscall)(int oldfd, long arg), - long arg) +int citp_ep_dup(unsigned oldfd, int (*syscall)(int oldfd, long arg), long arg) { /* This implements dup(oldfd) and fcntl(oldfd, F_DUPFD, arg). */ @@ -1756,8 +1770,8 @@ int citp_ep_dup(unsigned oldfd, int (*syscall)(int oldfd, long arg), Log_V(log("%s(%d)", __FUNCTION__, oldfd)); - if(CI_UNLIKELY( citp.init_level < CITP_INIT_FDTABLE || - oo_per_thread_get()->in_vfork_child )) + if( CI_UNLIKELY(citp.init_level < CITP_INIT_FDTABLE || + oo_per_thread_get()->in_vfork_child) ) /* Lib not initialised, so no U/L state, and therefore system dup() ** will do just fine. */ return syscall(oldfd, arg); @@ -1773,7 +1787,7 @@ int citp_ep_dup(unsigned oldfd, int (*syscall)(int oldfd, long arg), } p_oldfdip = &citp_fdtable.table[oldfd].fdip; - again: +again: oldfdip = *p_oldfdip; if( fdip_is_busy(oldfdip) ) oldfdip = citp_fdtable_busy_wait(oldfd, 0); @@ -1814,18 +1828,20 @@ int citp_ep_dup(unsigned oldfd, int (*syscall)(int oldfd, long arg), return -1; } - if( fdtable_strict() ) CITP_FDTABLE_LOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_LOCK(); newfd = syscall(oldfd, arg); if( newfd >= 0 ) citp_fdtable_new_fd_set(newfd, fdip_busy, fdtable_strict()); - if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); if( newfd >= 0 ) { citp_fdtable_insert(newfdi, newfd, 0); newfdi = 0; } - } - else { - if( fdtable_strict() ) CITP_FDTABLE_LOCK(); + } else { + if( fdtable_strict() ) + CITP_FDTABLE_LOCK(); newfd = syscall(oldfd, arg); if( newfd >= 0 && newfd < citp_fdtable.inited_count ) { /* Mark newfd as unknown. When used, it'll get probed. @@ -1838,19 +1854,21 @@ int citp_ep_dup(unsigned oldfd, int (*syscall)(int oldfd, long arg), */ citp_fdtable_new_fd_set(newfd, fdip_unknown, fdtable_strict()); } - if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); } citp_fdtable_busy_clear(oldfd, oldfdip, 0); - if( newfdi ) citp_fdinfo_free(newfdi); + if( newfdi ) + citp_fdinfo_free(newfdi); return newfd; } -static void dup2_complete(citp_fdinfo* prev_tofdi, - citp_fdinfo_p prev_tofdip, int fdt_locked) +static void dup2_complete( + citp_fdinfo* prev_tofdi, citp_fdinfo_p prev_tofdip, int fdt_locked) { - volatile citp_fdinfo_p *p_fromfdip; + volatile citp_fdinfo_p* p_fromfdip; unsigned fromfd = prev_tofdi->on_rcz.dup3_args.fd; unsigned tofd = prev_tofdi->fd; citp_fdinfo_p fromfdip; @@ -1865,7 +1883,7 @@ static void dup2_complete(citp_fdinfo* prev_tofdi, citp_fdinfo* fromfdi; p_fromfdip = &citp_fdtable.table[fromfd].fdip; - lock_fromfdip_again: +lock_fromfdip_again: fromfdip = *p_fromfdip; if( fdip_is_busy(fromfdip) ) fromfdip = citp_fdtable_busy_wait(fromfd, fdt_locked); @@ -1878,9 +1896,11 @@ static void dup2_complete(citp_fdinfo* prev_tofdi, #if CI_CFG_FD_CACHING /* Need to check in case this sucker's cached */ if( fdip_is_unknown(fromfdip) ) { - if( !fdt_locked ) CITP_FDTABLE_LOCK(); + if( ! fdt_locked ) + CITP_FDTABLE_LOCK(); citp_fdtable_probe_locked(fromfd, CI_FALSE, CI_FALSE, &fromfdi); - if( !fdt_locked ) CITP_FDTABLE_UNLOCK(); + if( ! fdt_locked ) + CITP_FDTABLE_UNLOCK(); if( fromfdi == &citp_the_closed_fd ) { prev_tofdi->on_rcz.dup2_result = -EBADF; ci_wmb(); @@ -1907,16 +1927,16 @@ static void dup2_complete(citp_fdinfo* prev_tofdi, } ci_assert(fdip_is_normal(fromfdip) | fdip_is_passthru(fromfdip) | - fdip_is_unknown(fromfdip)); + fdip_is_unknown(fromfdip)); if( fdip_is_normal(fromfdip) && - (((fromfdi = fdip_to_fdi(fromfdip))->protocol->type) == CITP_EPOLL_FD) ) { + (((fromfdi = fdip_to_fdi(fromfdip))->protocol->type) == + CITP_EPOLL_FD) ) { citp_fdinfo* newfdi = citp_fdinfo_get_ops(fromfdi)->dup(fromfdi); if( newfdi ) { citp_fdinfo_init(newfdi, fdip_to_fdi(fromfdip)->protocol); citp_fdtable_insert(newfdi, tofd, fdt_locked); - } - else { + } else { /* Out of memory. Can't probe epoll1 fd later on, so fail. */ citp_fdtable_busy_clear(fromfd, fromfdip, fdt_locked); prev_tofdi->on_rcz.dup2_result = -ENOMEM; @@ -1924,8 +1944,7 @@ static void dup2_complete(citp_fdinfo* prev_tofdi, prev_tofdi->on_ref_count_zero = FDI_ON_RCZ_DONE; return; } - } - else { + } else { /* Mark newfd as unknown. When used, it'll get probed. * * We are not just being lazy here: Setting to unknown rather than @@ -2001,7 +2020,7 @@ int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags) pthread_mutex_lock(&citp_dup_lock); CITP_FDTABLE_LOCK(); p_tofdip = &citp_fdtable.table[tofd].fdip; - lock_tofdip_again: +lock_tofdip_again: tofdip = *p_tofdip; if( fdip_is_busy(tofdip) ) tofdip = citp_fdtable_busy_wait(tofd, 1); @@ -2011,7 +2030,7 @@ int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags) /* ?? FIXME: we can't cope with this at the moment */ CITP_FDTABLE_UNLOCK(); Log_U(log("%s(%d, %d): target is reserved, see EF_ONLOAD_FD_BASE", - __FUNCTION__, fromfd, tofd)); + __FUNCTION__, fromfd, tofd)); errno = EBUSY; tofd = -1; goto out; @@ -2020,7 +2039,7 @@ int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags) goto lock_tofdip_again; CITP_FDTABLE_UNLOCK(); ci_assert(fdip_is_normal(tofdip) | fdip_is_passthru(tofdip) | - fdip_is_unknown(tofdip)); + fdip_is_unknown(tofdip)); if( fdip_is_normal(tofdip) ) { /* We're duping onto a user-level socket. */ @@ -2055,7 +2074,7 @@ int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags) while( tofdi->on_ref_count_zero != FDI_ON_RCZ_DONE ) { if( ci_is_multithreaded() && i % 10000 == 9999 ) { pthread_t pth = tofdi->thread_id; - if( pth != pthread_self() && pth != PTHREAD_NULL ) { + if( pth != pthread_self() && pth != PTHREAD_NULL ) { pthread_kill(pth, SIGONLOAD); sleep(1); } @@ -2073,8 +2092,7 @@ int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags) CI_DEBUG(tofdi->on_ref_count_zero = FDI_ON_RCZ_NONE); citp_fdtable_busy_clear(tofd, tofdip, 0); tofd = -1; - } - else { + } else { ci_assert(tofdi->on_rcz.dup2_result == tofd); citp_fdinfo_get_ops(tofdi)->dtor(tofdi, 0); citp_fdinfo_free(tofdi); @@ -2096,12 +2114,11 @@ int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags) errno = -fdi.on_rcz.dup2_result; citp_fdtable_busy_clear(tofd, tofdip, 0); tofd = -1; - } - else + } else ci_assert(fdi.on_rcz.dup2_result == tofd); } - out: +out: pthread_mutex_unlock(&citp_dup_lock); return tofd; } @@ -2153,14 +2170,15 @@ int citp_ep_close(unsigned fd) got_lock = 1; p_fdip = &citp_fdtable.table[fd].fdip; - again: +again: fdip = *p_fdip; - if( fdip_is_busy(fdip) ) fdip = citp_fdtable_busy_wait(fd, 1); + if( fdip_is_busy(fdip) ) + fdip = citp_fdtable_busy_wait(fd, 1); if( fdip_is_closing(fdip) | fdip_is_reserved(fdip) ) { /* Concurrent close or attempt to close reserved. */ Log_V(ci_log("%s: fd=%d closing=%d reserved=%d", __FUNCTION__, fd, - fdip_is_closing(fdip), fdip_is_reserved(fdip))); + fdip_is_closing(fdip), fdip_is_reserved(fdip))); errno = EBADF; rc = -1; goto done; @@ -2181,8 +2199,8 @@ int citp_ep_close(unsigned fd) } #endif - ci_assert(fdip_is_normal(fdip) | fdip_is_passthru(fdip) | - fdip_is_unknown(fdip)); + ci_assert( + fdip_is_normal(fdip) | fdip_is_passthru(fdip) | fdip_is_unknown(fdip)); /* Swap in the "closed" pseudo-fdinfo. This lets any other thread know ** that we're in the middle of closing this fd. @@ -2227,22 +2245,21 @@ int citp_ep_close(unsigned fd) citp_fdinfo_release_ref(fdi, 0); rc = 0; - } - else { - ci_assert(fdip_is_passthru(fdip) || - fdip_is_unknown(fdip)); + } else { + ci_assert(fdip_is_passthru(fdip) || fdip_is_unknown(fdip)); if( ! fdtable_strict() ) { CITP_FDTABLE_UNLOCK(); got_lock = 0; } Log_V(ci_log("%s: fd=%d passthru=%d unknown=%d", __FUNCTION__, fd, - fdip_is_passthru(fdip), fdip_is_unknown(fdip))); + fdip_is_passthru(fdip), fdip_is_unknown(fdip))); fdtable_swap(fd, fdip_closing, fdip_unknown, fdtable_strict()); rc = ci_tcp_helper_close_no_trampoline(fd); } - done: - if( got_lock ) CITP_FDTABLE_UNLOCK(); +done: + if( got_lock ) + CITP_FDTABLE_UNLOCK(); FDTABLE_ASSERT_VALID(); return rc; } @@ -2252,8 +2269,7 @@ int citp_ep_close(unsigned fd) * or from citp_fdtable_lookup_fast(). The _fast() variant is used by * read/write/recvmsg/sendto/... socket call interceptors. */ int citp_reprobe_moved_common(citp_fdinfo* fdinfo, int from_fast_lookup, - int fdip_is_already_busy, - citp_fdinfo** fdinfo_out) + int fdip_is_already_busy, citp_fdinfo** fdinfo_out) { int fd = fdinfo->fd; citp_fdinfo* new_fdinfo = NULL; @@ -2264,14 +2280,16 @@ int citp_reprobe_moved_common(citp_fdinfo* fdinfo, int from_fast_lookup, if( ! fdip_is_already_busy ) { volatile citp_fdinfo_p* p_fdip; citp_fdinfo_p fdip; - + p_fdip = &citp_fdtable.table[fd].fdip; - again: + again: fdip = *p_fdip; - if( fdip_is_busy(fdip) ) fdip = citp_fdtable_busy_wait(fd, 1); - ci_assert( fdip_is_normal(fdip) || fdip_is_passthru(fdip) ); - if( fdip_cas_fail(p_fdip, fdip, fdip_busy) ) goto again; - + if( fdip_is_busy(fdip) ) + fdip = citp_fdtable_busy_wait(fd, 1); + ci_assert(fdip_is_normal(fdip) || fdip_is_passthru(fdip)); + if( fdip_cas_fail(p_fdip, fdip, fdip_busy) ) + goto again; + /* Possibly, a parrallel thread have already called * citp_reprobe_moved() for us. */ if( fdip_is_passthru(fdip) ) { @@ -2280,16 +2298,15 @@ int citp_reprobe_moved_common(citp_fdinfo* fdinfo, int from_fast_lookup, citp_fdinfo_ref(new_fdinfo); goto done; } - ci_assert( fdip_is_normal(fdip) ); + ci_assert(fdip_is_normal(fdip)); new_fdinfo = fdip_to_fdi(fdip); - if( new_fdinfo != fdinfo) { + if( new_fdinfo != fdinfo ) { citp_fdtable_busy_clear(fd, fdip, 1); if( new_fdinfo != NULL ) citp_fdinfo_ref(new_fdinfo); goto done; } - } - else + } else ci_assert(fdip_is_busy(citp_fdtable.table[fd].fdip)); /* re-probe new fd */ @@ -2301,8 +2318,7 @@ int citp_reprobe_moved_common(citp_fdinfo* fdinfo, int from_fast_lookup, if( epoll_fdi->protocol->type == CITP_EPOLLB_FD ) { citp_epollb_on_handover(epoll_fdi, fdinfo); citp_fdinfo_release_ref(epoll_fdi, 1); - } - else + } else #endif { citp_epoll_on_move(epoll_fdi, fdinfo, new_fdinfo, 1); @@ -2313,7 +2329,7 @@ int citp_reprobe_moved_common(citp_fdinfo* fdinfo, int from_fast_lookup, fdinfo->on_ref_count_zero = FDI_ON_RCZ_MOVED; citp_fdinfo_release_ref(fdinfo, 1); - done: +done: /* One refcount from the caller */ if( from_fast_lookup ) citp_fdinfo_release_ref_fast(fdinfo); @@ -2340,7 +2356,8 @@ void __oo_service_fd(bool fdtable_locked) int fd; ci_assert_equal(citp.onload_fd, -1); - if( ef_onload_driver_open(&fd, OO_STACK_DEV, 1) ) return; + if( ef_onload_driver_open(&fd, OO_STACK_DEV, 1) ) + return; if( ci_cas32_succeed(&citp.onload_fd, -1, fd) ) { if( fdtable_locked ) { /* __citp_fdtable_extend() handles citp.onload_fd, so there is no @@ -2349,8 +2366,7 @@ void __oo_service_fd(bool fdtable_locked) __citp_fdtable_extend(fd); else __citp_fdtable_reserve(fd, 0); - } - else { + } else { /* We do not know the current context, so we can't lock fdtable, * or leverage the already-taken lock. * Let's hope that logging happens at start of day, so our fd is @@ -2360,11 +2376,10 @@ void __oo_service_fd(bool fdtable_locked) if( citp_fdtable.table ) { ci_assert_lt(fd, citp_fdtable.size); citp_fdtable.table[citp.onload_fd].fdip = - fdi_to_fdip(&citp_the_reserved_fd); + fdi_to_fdip(&citp_the_reserved_fd); } } - } - else { + } else { /* Unspecialised /dev/onload does not trampoline, * so simple close is OK. */ ci_sys_close(fd); diff --git a/src/lib/transport/unix/internal.h b/src/lib/transport/unix/internal.h index 0b524b38e..02cf9a887 100644 --- a/src/lib/transport/unix/internal.h +++ b/src/lib/transport/unix/internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author ** \brief @@ -47,7 +47,7 @@ /* Define onload_ functions to use in the library. */ #define CI_MK_DECL(ret, fn, args) extern ret onload_##fn args; -# include +#include /********************************************************************** @@ -55,15 +55,15 @@ */ typedef struct { - int log_fd; - int onload_fd; + int log_fd; + int onload_fd; - ci_uint64 spin_cycles; - ci_uint64 poll_nonblock_fast_cycles; - ci_uint64 poll_fast_cycles; - ci_uint64 select_nonblock_fast_cycles; - ci_uint64 select_fast_cycles; - ci_uint32 cpu_khz; + ci_uint64 spin_cycles; + ci_uint64 poll_nonblock_fast_cycles; + ci_uint64 poll_fast_cycles; + ci_uint64 select_nonblock_fast_cycles; + ci_uint64 select_fast_cycles; + ci_uint32 cpu_khz; enum { CITP_INIT_NONE = 0, @@ -75,43 +75,51 @@ typedef struct { * application request. * CITP_INIT_MAX may contains some post-init actions. */ -#define CITP_INIT_ALL CITP_INIT_PROTO -#define CITP_INIT_MAX CITP_INIT_SIGNALS +#define CITP_INIT_ALL CITP_INIT_PROTO +#define CITP_INIT_MAX CITP_INIT_SIGNALS - char process_path[128]; - char* process_name; + char process_path[128]; + char* process_name; #if CI_CFG_FD_CACHING - ci_uint32 pid; + ci_uint32 pid; #endif } citp_globals_t; -extern citp_globals_t citp CI_HV; +extern citp_globals_t citp CI_HV; -#define fdtable_strict() (CITP_OPTS.fdtable_strict) +#define fdtable_strict() (CITP_OPTS.fdtable_strict) /********************************************************************** ** Atomic ops that are not atomic when app is single-threaded. */ -ci_inline int citp_fdtable_is_mt_safe(void) { +ci_inline int citp_fdtable_is_mt_safe(void) +{ return ! ci_is_multithreaded() || CITP_OPTS.fds_mt_safe; } -ci_inline int citp_fdtable_not_mt_safe(void) { +ci_inline int citp_fdtable_not_mt_safe(void) +{ return ci_is_multithreaded() && ! CITP_OPTS.fds_mt_safe; } -ci_inline void oo_atomic_quick_inc (oo_atomic_t *a) { - if( ci_is_multithreaded() ) oo_atomic_inc(a); - else ++a->n; +ci_inline void oo_atomic_quick_inc(oo_atomic_t* a) +{ + if( ci_is_multithreaded() ) + oo_atomic_inc(a); + else + ++a->n; } -ci_inline int oo_atomic_quick_dec_and_test(oo_atomic_t* a) { - if( ci_is_multithreaded() ) return oo_atomic_dec_and_test(a); - else return --a->n == 0; +ci_inline int oo_atomic_quick_dec_and_test(oo_atomic_t* a) +{ + if( ci_is_multithreaded() ) + return oo_atomic_dec_and_test(a); + else + return --a->n == 0; } @@ -131,110 +139,109 @@ typedef struct { */ typedef struct citp_protocol_impl_s citp_protocol_impl; -typedef struct citp_fdinfo_s citp_fdinfo; +typedef struct citp_fdinfo_s citp_fdinfo; struct oo_ul_poll_state; struct oo_ul_select_state; struct citp_epoll_member; struct oo_ul_epoll_state; typedef struct { - int (*socket )(int domain, int type, int protocol); - citp_fdinfo* - (*dup )(citp_fdinfo*); + int (*socket)(int domain, int type, int protocol); + citp_fdinfo* (*dup)(citp_fdinfo*); #if CI_CFG_FD_CACHING - void (*close )(citp_fdinfo*); + void (*close)(citp_fdinfo*); #endif - void (*dtor )(citp_fdinfo*, int fdt_locked); - int (*bind )(citp_fdinfo*, const struct sockaddr*, socklen_t); - int (*listen )(citp_fdinfo*, int); - int (*accept )(citp_fdinfo*, struct sockaddr*, socklen_t*, int flags, - citp_lib_context_t*); - int (*connect )(citp_fdinfo*, const struct sockaddr*, socklen_t, - citp_lib_context_t*); - int (*shutdown )(citp_fdinfo*, int); - int (*getsockname )(citp_fdinfo*, struct sockaddr*, socklen_t*); - int (*getpeername )(citp_fdinfo*, struct sockaddr*, socklen_t*); - int (*getsockopt )(citp_fdinfo*, int, int, void*, socklen_t*); - int (*setsockopt )(citp_fdinfo*, int, int, const void*, socklen_t); - int (*recv )(citp_fdinfo*, struct msghdr*, int); - int (*recvmmsg )(citp_fdinfo*, struct mmsghdr*, unsigned, int, - ci_recvmmsg_timespec*); - int (*send )(citp_fdinfo*, const struct msghdr*, int); - int (*sendmmsg )(citp_fdinfo*, struct mmsghdr*, unsigned, int); - int (*fcntl )(citp_fdinfo*, int, long); - int (*ioctl )(citp_fdinfo*, int, void *); + void (*dtor)(citp_fdinfo*, int fdt_locked); + int (*bind)(citp_fdinfo*, const struct sockaddr*, socklen_t); + int (*listen)(citp_fdinfo*, int); + int (*accept)(citp_fdinfo*, struct sockaddr*, socklen_t*, int flags, + citp_lib_context_t*); + int (*connect)( + citp_fdinfo*, const struct sockaddr*, socklen_t, citp_lib_context_t*); + int (*shutdown)(citp_fdinfo*, int); + int (*getsockname)(citp_fdinfo*, struct sockaddr*, socklen_t*); + int (*getpeername)(citp_fdinfo*, struct sockaddr*, socklen_t*); + int (*getsockopt)(citp_fdinfo*, int, int, void*, socklen_t*); + int (*setsockopt)(citp_fdinfo*, int, int, const void*, socklen_t); + int (*recv)(citp_fdinfo*, struct msghdr*, int); + int (*recvmmsg)( + citp_fdinfo*, struct mmsghdr*, unsigned, int, ci_recvmmsg_timespec*); + int (*send)(citp_fdinfo*, const struct msghdr*, int); + int (*sendmmsg)(citp_fdinfo*, struct mmsghdr*, unsigned, int); + int (*fcntl)(citp_fdinfo*, int, long); + int (*ioctl)(citp_fdinfo*, int, void*); /* poll() and select() return "I've handled it" bool */ - int (*select )(citp_fdinfo*, int*, int, int, int, - struct oo_ul_select_state*); - int (*poll )(citp_fdinfo*, struct pollfd*, struct oo_ul_poll_state*); + int (*select)(citp_fdinfo*, int*, int, int, int, struct oo_ul_select_state*); + int (*poll)(citp_fdinfo*, struct pollfd*, struct oo_ul_poll_state*); /* epoll() and sleep_seq() should be present both or none. * epoll() returns "poll again" bool */ - int (*epoll )(citp_fdinfo*, struct citp_epoll_member* eitem, - struct oo_ul_epoll_state*, int* stored_event); + int (*epoll)(citp_fdinfo*, struct citp_epoll_member* eitem, + struct oo_ul_epoll_state*, int* stored_event); ci_uint64 (*sleep_seq)(citp_fdinfo*); - int (*zc_send )(citp_fdinfo*, struct onload_zc_mmsg*, int); - int (*zc_recv )(citp_fdinfo*, struct onload_zc_recv_args*); - int (*zc_recv_filter)(citp_fdinfo*, onload_zc_recv_filter_callback, - void*, int); - int (*recvmsg_kernel)(citp_fdinfo*, struct msghdr*, int); - int (*tmpl_alloc)(citp_fdinfo*, const struct iovec*, int, - struct oo_msg_template**, unsigned); - int (*tmpl_update)(citp_fdinfo*, struct oo_msg_template*, - const struct onload_template_msg_update_iovec*, int, - unsigned); - int (*tmpl_abort)(citp_fdinfo*, struct oo_msg_template*); + int (*zc_send)(citp_fdinfo*, struct onload_zc_mmsg*, int); + int (*zc_recv)(citp_fdinfo*, struct onload_zc_recv_args*); + int (*zc_recv_filter)( + citp_fdinfo*, onload_zc_recv_filter_callback, void*, int); + int (*recvmsg_kernel)(citp_fdinfo*, struct msghdr*, int); + int (*tmpl_alloc)(citp_fdinfo*, const struct iovec*, int, + struct oo_msg_template**, unsigned); + int (*tmpl_update)(citp_fdinfo*, struct oo_msg_template*, + const struct onload_template_msg_update_iovec*, int, unsigned); + int (*tmpl_abort)(citp_fdinfo*, struct oo_msg_template*); #if CI_CFG_TIMESTAMPING /* Examines receive queue up to timespec limit, and fills in first_out * with the timestamp of the first data available, and bytes_out with the * number of bytes available to be read before reaching limit. */ - int (*ordered_data)(citp_fdinfo*, struct timespec* limit, - struct timespec* first_out, int* bytes_out); + int (*ordered_data)(citp_fdinfo*, struct timespec* limit, + struct timespec* first_out, int* bytes_out); #endif int (*is_spinning)(citp_fdinfo*); #if CI_CFG_FD_CACHING - int (*cache )(citp_fdinfo*); + int (*cache)(citp_fdinfo*); #endif - enum onload_delegated_send_rc - (*dsend_prepare)(citp_fdinfo*, int size, unsigned flags, - struct onload_delegated_send* out); - int (*dsend_complete)(citp_fdinfo*, const ci_iovec *iov, int iovlen, - int flags); - int (*dsend_cancel)(citp_fdinfo*); + enum onload_delegated_send_rc (*dsend_prepare)(citp_fdinfo*, int size, + unsigned flags, struct onload_delegated_send* out); + int (*dsend_complete)( + citp_fdinfo*, const ci_iovec* iov, int iovlen, int flags); + int (*dsend_cancel)(citp_fdinfo*); } citp_fdops; struct citp_protocol_impl_s { - int type; -# define CITP_CI_SOCKET 0 -# define CITP_TCP_SOCKET 1 -# define CITP_UDP_SOCKET 2 -# define CITP_PASSTHROUGH_FD 3 -# define CITP_EPOLL_FD 4 -# define CITP_EPOLLB_FD 5 -# define CITP_PIPE_FD 6 - - citp_fdops ops; - - ci_dllink link; + int type; +#define CITP_CI_SOCKET 0 +#define CITP_TCP_SOCKET 1 +#define CITP_UDP_SOCKET 2 +#define CITP_PASSTHROUGH_FD 3 +#define CITP_EPOLL_FD 4 +#define CITP_EPOLLB_FD 5 +#define CITP_PIPE_FD 6 + + citp_fdops ops; + + ci_dllink link; }; -#define CITP_PROTOCOL_IMPL_ASSERT_VALID citp_protocol_impl_assert_valid +#define CITP_PROTOCOL_IMPL_ASSERT_VALID citp_protocol_impl_assert_valid extern void citp_protocol_impl_assert_valid(citp_protocol_impl*) CI_HF; -extern void citp_protocol_manager_add(citp_protocol_impl*, - int is_stream) CI_HF; +extern void citp_protocol_manager_add( + citp_protocol_impl*, int is_stream) CI_HF; -extern int citp_protocol_manager_create_socket(int dom, - int type, int proto) CI_HF; +extern int citp_protocol_manager_create_socket( + int dom, int type, int proto) CI_HF; + +extern int dpdk_init(void); +extern int dpdk_cleanup(void); #define citp_protocol_impl_get_ops(p) (&(p)->ops) -#define citp_protocol_impl_get_type(p) ((p)->type) +#define citp_protocol_impl_get_type(p) ((p)->type) /*! Call not handled - this is not the same as a handover! */ -#define CITP_NOT_HANDLED -2 +#define CITP_NOT_HANDLED -2 /******************************************************************************* @@ -242,12 +249,12 @@ extern int citp_protocol_manager_create_socket(int dom, */ #if CI_CFG_FDTABLE_CHECKS extern void citp_fdtable_assert_valid(void) CI_HF; -# define FDTABLE_ASSERT_VALID() citp_fdtable_assert_valid() +#define FDTABLE_ASSERT_VALID() citp_fdtable_assert_valid() #else -# define FDTABLE_ASSERT_VALID() +#define FDTABLE_ASSERT_VALID() #endif -#define PTHREAD_NULL ((pthread_t)(-1L)) +#define PTHREAD_NULL ((pthread_t) (-1L)) /********************************************************************** ** File descriptor info. @@ -265,49 +272,49 @@ extern void citp_fdtable_assert_valid(void) CI_HF; */ struct citp_fdinfo_s { /* Sequence no. Used by epoll to detect change in meaning of an fd. */ - ci_uint64 seq; + ci_uint64 seq; /* Seq no. of epoll fd this fd has been added to. */ - ci_uint64 epoll_fd_seq; + ci_uint64 epoll_fd_seq; /* The implementation for this fdinfo. */ - citp_protocol_impl* protocol; + citp_protocol_impl* protocol; /* Number of threads using this (+1 if it's in the table). */ - oo_atomic_t ref_count; + oo_atomic_t ref_count; union { struct { - unsigned fd; - int flags; + unsigned fd; + int flags; } dup3_args; - int dup2_result; - int handover_nonb_switch; + int dup2_result; + int handover_nonb_switch; } on_rcz; /* The O/S file descriptor. */ - int fd; + int fd; /* epoll fd this fd has been added to, or -1 if not in an epoll set. */ - int epoll_fd; + int epoll_fd; /* thread id using this fdi */ - pthread_t thread_id; + pthread_t thread_id; /* What to do when the ref count goes to zero. */ -# define FDI_ON_RCZ_NONE 0 -# define FDI_ON_RCZ_CLOSE 1 -# define FDI_ON_RCZ_DUP2 2 -# define FDI_ON_RCZ_HANDOVER 3 -# define FDI_ON_RCZ_MOVED 5 -# define FDI_ON_RCZ_DONE 6 - volatile char on_ref_count_zero; +#define FDI_ON_RCZ_NONE 0 +#define FDI_ON_RCZ_CLOSE 1 +#define FDI_ON_RCZ_DUP2 2 +#define FDI_ON_RCZ_HANDOVER 3 +#define FDI_ON_RCZ_MOVED 5 +#define FDI_ON_RCZ_DONE 6 + volatile char on_ref_count_zero; #if CI_CFG_FD_CACHING /* Non-zero if this fd is eligable for caching (i.e. if created via * accept). */ - char can_cache; + char can_cache; #endif /* This bit is redundant -- can be calculated from other state. However, @@ -321,44 +328,45 @@ struct citp_fdinfo_s { * 'chars' rather than bit-fields because this is quicker on * architectures that allow byte- aligned access (e.g. x86). */ - char is_special; + char is_special; }; extern ci_uint64 fdtable_seq_no; -ci_inline void citp_fdinfo_init(citp_fdinfo* fdi, citp_protocol_impl* p) { +ci_inline void citp_fdinfo_init(citp_fdinfo* fdi, citp_protocol_impl* p) +{ /* The rest of the initialisation is done in citp_fdtable_insert(). */ oo_atomic_set(&fdi->ref_count, 1); #if CI_CFG_FD_CACHING fdi->can_cache = 0; #endif - fdi->protocol = p; - fdi->seq = fdtable_seq_no++; - fdi->epoll_fd = -1; + fdi->protocol = p; + fdi->seq = fdtable_seq_no++; + fdi->epoll_fd = -1; fdi->thread_id = PTHREAD_NULL; } ci_inline int citp_sys_socket(int domain, int type, int protocol) { int s = -1; - s = ci_sys_socket(domain, type | SOCK_CLOEXEC, protocol); + s = ci_sys_socket(domain, type | SOCK_CLOEXEC, protocol); return s; } -#define CITP_FDINFO_ASSERT_VALID citp_fdinfo_assert_valid +#define CITP_FDINFO_ASSERT_VALID citp_fdinfo_assert_valid extern void citp_fdinfo_assert_valid(citp_fdinfo*) CI_HF; -#define citp_fdinfo_get_ops(fdinfo) \ +#define citp_fdinfo_get_ops(fdinfo) \ (citp_protocol_impl_get_ops((fdinfo)->protocol)) -#define citp_fdinfo_get_type(fdinfo) \ +#define citp_fdinfo_get_type(fdinfo) \ (citp_protocol_impl_get_type((fdinfo)->protocol)) -extern citp_fdinfo citp_the_closed_fd CI_HV; -extern citp_fdinfo citp_the_reserved_fd CI_HV; +extern citp_fdinfo citp_the_closed_fd CI_HV; +extern citp_fdinfo citp_the_reserved_fd CI_HV; extern citp_protocol_impl citp_closed_protocol_impl CI_HV; @@ -368,27 +376,27 @@ extern citp_protocol_impl citp_closed_protocol_impl CI_HV; */ extern void __citp_fdinfo_ref_count_zero(citp_fdinfo*, int fdt_locked) CI_HF; -#define citp_fdinfo_ref(fdi) \ +#define citp_fdinfo_ref(fdi) \ do { \ oo_atomic_quick_inc(&(fdi)->ref_count); \ if( ci_is_multithreaded() ) \ (fdi)->thread_id = pthread_self(); \ - } while(0) + } while( 0 ) /*! Release one ref count. When the ref count hits zero will cause - * release of resources, handles etc. + * release of resources, handles etc. * Call with [fdt_locked] = 0 if the fd table lock is NOT held (this - * is the legacy operation) or [fdt_locked] != 0 if * the fd table + * is the legacy operation) or [fdt_locked] != 0 if * the fd table * lock IS held. */ -ci_inline void citp_fdinfo_release_ref(citp_fdinfo* fdinfo, - int fdt_locked) { +ci_inline void citp_fdinfo_release_ref(citp_fdinfo* fdinfo, int fdt_locked) +{ /* If we're releasing a reference from one of our "magic" fd's, then we * should never drop the ref-count below 1,000,000 */ - ci_assert (((fdinfo != &citp_the_closed_fd) && - (fdinfo != &citp_the_reserved_fd)) || - (oo_atomic_read (&fdinfo->ref_count) > 1000000)); + ci_assert( + ((fdinfo != &citp_the_closed_fd) && (fdinfo != &citp_the_reserved_fd)) || + (oo_atomic_read(&fdinfo->ref_count) > 1000000)); ci_assert_gt(oo_atomic_read(&fdinfo->ref_count), 0); /* We might call ref_count_zero, which locks fdtable. Assert that @@ -400,18 +408,20 @@ ci_inline void citp_fdinfo_release_ref(citp_fdinfo* fdinfo, } /*! Release reference obtained by calling citp_fdtable_lookup_fast(). */ -ci_inline void citp_fdinfo_release_ref_fast(citp_fdinfo* fdinfo) { +ci_inline void citp_fdinfo_release_ref_fast(citp_fdinfo* fdinfo) +{ if( citp_fdtable_not_mt_safe() ) citp_fdinfo_release_ref(fdinfo, 0); } /*! Take the same number of references as with citp_fdtable_lookup_fast(). */ -ci_inline void citp_fdinfo_ref_fast(citp_fdinfo* fdinfo) { +ci_inline void citp_fdinfo_ref_fast(citp_fdinfo* fdinfo) +{ if( citp_fdtable_not_mt_safe() ) citp_fdinfo_ref(fdinfo); } /* Called when refcount reaches zero. */ -# define citp_fdinfo_free CI_FREE_OBJ +#define citp_fdinfo_free CI_FREE_OBJ /* Hands-over the socket to the kernel. That is, it replaces the @@ -428,7 +438,7 @@ ci_inline void citp_fdinfo_ref_fast(citp_fdinfo* fdinfo) { ** If [nonb_switch == 0] O_NONBLOCK is cleared for the socket. If ** [nonb_switch > 0] if is set. Otherwise nothing is done. */ -extern void citp_fdinfo_handover(citp_fdinfo* fdi, int nonb_switch) CI_HF; +extern void citp_fdinfo_handover(citp_fdinfo* fdi, int nonb_switch) CI_HF; ci_inline int citp_fdinfo_is_socket(const citp_fdinfo* fdi) { @@ -436,7 +446,7 @@ ci_inline int citp_fdinfo_is_socket(const citp_fdinfo* fdi) (fdi->protocol->type == CITP_UDP_SOCKET); } -extern int citp_ep_dup(unsigned oldfd, int (*syscall)(int,long), long) CI_HF; +extern int citp_ep_dup(unsigned oldfd, int (*syscall)(int, long), long) CI_HF; /* One of these should be used as an arg to citp_ep_dup(). */ extern int citp_ep_dup_dup(int oldfd, long arg_unused) CI_HF; @@ -450,66 +460,66 @@ extern int citp_ep_close(unsigned fd) CI_HF; /********************************************************************** ** exec() support */ -extern int __citp_exec_restore( int fd ) CI_HF; +extern int __citp_exec_restore(int fd) CI_HF; /********************************************************************** ** Transport implementations. */ -extern citp_protocol_impl citp_tcp_protocol_impl CI_HV; -extern citp_protocol_impl citp_udp_protocol_impl CI_HV; +extern citp_protocol_impl citp_tcp_protocol_impl CI_HV; +extern citp_protocol_impl citp_udp_protocol_impl CI_HV; extern citp_protocol_impl citp_epoll_protocol_impl CI_HV; #if CI_CFG_EPOLL2 extern citp_protocol_impl citp_epollb_protocol_impl CI_HV; #endif -extern citp_protocol_impl citp_pipe_read_protocol_impl CI_HV; +extern citp_protocol_impl citp_pipe_read_protocol_impl CI_HV; extern citp_protocol_impl citp_pipe_write_protocol_impl CI_HV; extern citp_protocol_impl citp_passthrough_protocol_impl; typedef struct { - citp_fdinfo fdinfo; - citp_socket sock; + citp_fdinfo fdinfo; + citp_socket sock; } citp_sock_fdi; -#define fdi_to_sock_fdi(fdi) CI_CONTAINER(citp_sock_fdi, fdinfo, (fdi)) -#define fdi_to_socket(fdi) (&fdi_to_sock_fdi(fdi)->sock) +#define fdi_to_sock_fdi(fdi) CI_CONTAINER(citp_sock_fdi, fdinfo, (fdi)) +#define fdi_to_socket(fdi) (&fdi_to_sock_fdi(fdi)->sock) typedef struct { - citp_fdinfo fdinfo; - ci_netif* netif; + citp_fdinfo fdinfo; + ci_netif* netif; citp_waitable* ep; - int os_socket; + int os_socket; } citp_alien_fdi; -#define fdi_to_alien_fdi(fdi) CI_CONTAINER(citp_alien_fdi, fdinfo, (fdi)) +#define fdi_to_alien_fdi(fdi) CI_CONTAINER(citp_alien_fdi, fdinfo, (fdi)) extern void citp_passthrough_init(citp_alien_fdi* epi); #include typedef struct { - citp_fdinfo fdinfo; - int kepfd; - int is_accel; - - int not_mt_safe; - int have_postponed; - pthread_mutex_t lock_postponed; - struct oo_epoll_item postponed[CI_CFG_EPOLL_MAX_POSTPONED]; + citp_fdinfo fdinfo; + int kepfd; + int is_accel; + + int not_mt_safe; + int have_postponed; + pthread_mutex_t lock_postponed; + struct oo_epoll_item postponed[CI_CFG_EPOLL_MAX_POSTPONED]; } citp_epollb_fdi; -#define fdi_to_epollb_fdi(fdi) CI_CONTAINER(citp_epollb_fdi, fdinfo, (fdi)) -extern void oo_epollb_ctor(citp_epollb_fdi *epi); +#define fdi_to_epollb_fdi(fdi) CI_CONTAINER(citp_epollb_fdi, fdinfo, (fdi)) +extern void oo_epollb_ctor(citp_epollb_fdi* epi); /********************************************************************** ** Netif initialisation (netif_init.c). */ -extern int citp_netif_init_ctor(void) CI_HF; +extern int citp_netif_init_ctor(void) CI_HF; /* Set up fork handling at start-of-day */ -extern int ci_setup_fork(void); +extern int ci_setup_fork(void); /*! Handles user-level netif internals pre bproc_move() */ extern void citp_netif_pre_bproc_move_hook(void) CI_HF; @@ -527,22 +537,22 @@ extern bool have_active_netifs(void); ** Misc. */ -extern void citp_log_fn_ul(const char* msg) CI_HF; -extern void citp_log_fn_drv(const char* msg) CI_HF; +extern void citp_log_fn_ul(const char* msg) CI_HF; +extern void citp_log_fn_drv(const char* msg) CI_HF; extern void citp_setup_logging_prefix(void) CI_HF; -extern int citp_packet_interceptor_startup(void) CI_HF; +extern int citp_packet_interceptor_startup(void) CI_HF; extern int _citp_do_init_inprogress CI_HV; -extern int citp_do_init(int max_init_level) CI_HF; +extern int citp_do_init(int max_init_level) CI_HF; -extern int citp_basic_syscall_init(void) CI_HF; -extern int citp_syscall_init(void) CI_HF; +extern int citp_basic_syscall_init(void) CI_HF; +extern int citp_syscall_init(void) CI_HF; #undef socklen_t -extern citp_fdinfo* citp_tcp_dup(citp_fdinfo* orig_fdi); +extern citp_fdinfo* citp_tcp_dup(citp_fdinfo* orig_fdi); /* Locking order: * - citp_pkt_map_lock is the innermost lock; @@ -554,11 +564,11 @@ extern citp_fdinfo* citp_tcp_dup(citp_fdinfo* orig_fdi); extern pthread_mutex_t citp_dup_lock; extern pthread_mutex_t citp_pkt_map_lock; -extern int citp_timespec_compare(const struct timespec* a, - const struct timespec* b) CI_HF; +extern int citp_timespec_compare( + const struct timespec* a, const struct timespec* b) CI_HF; -extern int citp_oo_timespec_compare(const struct oo_timespec* a, - const struct timespec* b) CI_HF; +extern int citp_oo_timespec_compare( + const struct oo_timespec* a, const struct timespec* b) CI_HF; /********************************************************************** ** fdtable internals. @@ -568,31 +578,32 @@ extern int citp_oo_timespec_compare(const struct oo_timespec* a, ** may be accessed concurrently from multiple threads, and also take ** certain special values. */ -typedef ci_uintptr_t citp_fdinfo_p; - -#define fdip_passthru ((citp_fdinfo_p)1u) -#define fdip_unknown ((citp_fdinfo_p)2u) -#define fdip_busy ((citp_fdinfo_p)3u) -#define fdip_closing fdi_to_fdip(&citp_the_closed_fd) -#define fdip_reserved fdi_to_fdip(&citp_the_reserved_fd) - -#define fdip_is_normal(fdip) (((fdip) & fdip_busy) == 0) -#define fdip_is_busy(fdip) (((fdip) & fdip_busy) == fdip_busy) -#define fdip_is_passthru(fdip) ((fdip) == fdip_passthru) -#define fdip_is_unknown(fdip) ((fdip) == fdip_unknown) -#define fdip_is_closing(fdip) ((fdip) == fdip_closing) -#define fdip_is_reserved(fdip) ((fdip) == fdip_reserved) - -ci_inline citp_fdinfo* fdip_to_fdi(citp_fdinfo_p fdip) { +typedef ci_uintptr_t citp_fdinfo_p; + +#define fdip_passthru ((citp_fdinfo_p) 1u) +#define fdip_unknown ((citp_fdinfo_p) 2u) +#define fdip_busy ((citp_fdinfo_p) 3u) +#define fdip_closing fdi_to_fdip(&citp_the_closed_fd) +#define fdip_reserved fdi_to_fdip(&citp_the_reserved_fd) + +#define fdip_is_normal(fdip) (((fdip) &fdip_busy) == 0) +#define fdip_is_busy(fdip) (((fdip) &fdip_busy) == fdip_busy) +#define fdip_is_passthru(fdip) ((fdip) == fdip_passthru) +#define fdip_is_unknown(fdip) ((fdip) == fdip_unknown) +#define fdip_is_closing(fdip) ((fdip) == fdip_closing) +#define fdip_is_reserved(fdip) ((fdip) == fdip_reserved) + +ci_inline citp_fdinfo* fdip_to_fdi(citp_fdinfo_p fdip) +{ ci_assert(fdip_is_normal(fdip)); ci_assert(fdip); return (citp_fdinfo*) fdip; } -#define fdi_to_fdip(fdi) ((citp_fdinfo_p)(ci_uintptr_t) (fdi)) +#define fdi_to_fdip(fdi) ((citp_fdinfo_p) (ci_uintptr_t) (fdi)) -#define fdip_cas_succeed ci_cas_uintptr_succeed -#define fdip_cas_fail ci_cas_uintptr_fail +#define fdip_cas_succeed ci_cas_uintptr_succeed +#define fdip_cas_fail ci_cas_uintptr_fail typedef struct { @@ -601,31 +612,32 @@ typedef struct { typedef struct { - citp_fdtable_entry* table; - unsigned size; - unsigned inited_count; + citp_fdtable_entry* table; + unsigned size; + unsigned inited_count; } citp_fdtable_globals; -extern citp_fdtable_globals citp_fdtable CI_HV; +extern citp_fdtable_globals citp_fdtable CI_HV; /* The following stuff is used to block when an fdtable entry is busy. */ typedef struct { - citp_fdinfo_p next; - oo_rwlock_cond cond; + citp_fdinfo_p next; + oo_rwlock_cond cond; } citp_fdtable_waiter; -#define fdip_to_waiter(fdip) ((citp_fdtable_waiter*)(ci_uintptr_t) \ - ((fdip) & ~(fdip_busy))) +#define fdip_to_waiter(fdip) \ + ((citp_fdtable_waiter*) (ci_uintptr_t) ((fdip) & ~(fdip_busy))) -#define waiter_to_fdip(w) ((citp_fdinfo_p)(ci_uintptr_t)(w) | fdip_busy) +#define waiter_to_fdip(w) ((citp_fdinfo_p) (ci_uintptr_t) (w) | fdip_busy) -extern void __citp_fdtable_busy_clear_slow(unsigned fd, citp_fdinfo_p, - int fdt_locked) CI_HF; +extern void __citp_fdtable_busy_clear_slow( + unsigned fd, citp_fdinfo_p, int fdt_locked) CI_HF; -ci_inline void citp_fdtable_busy_clear(unsigned fd, citp_fdinfo_p fdip, - int fdt_locked) { +ci_inline void citp_fdtable_busy_clear( + unsigned fd, citp_fdinfo_p fdip, int fdt_locked) +{ if( fdip_cas_fail(&citp_fdtable.table[fd].fdip, fdip_busy, fdip) ) __citp_fdtable_busy_clear_slow(fd, fdip, fdt_locked); } @@ -635,8 +647,6 @@ ci_inline void citp_fdtable_busy_clear(unsigned fd, citp_fdinfo_p fdip, extern citp_fdinfo_p citp_fdtable_busy_wait(unsigned fd, int fdt_locked) CI_HF; - - /********************************************************************** ** Transport library user-level lock */ @@ -653,33 +663,37 @@ extern citp_fdinfo_p citp_fdtable_busy_wait(unsigned fd, int fdt_locked) CI_HF; * private to the process for which we can play this game. */ -#define __CITP_LOCK(l) do { \ - if (ci_is_multithreaded()) \ - oo_rwlock_lock_write(l); \ - } while(0) +#define __CITP_LOCK(l) \ + do { \ + if( ci_is_multithreaded() ) \ + oo_rwlock_lock_write(l); \ + } while( 0 ) -#define __CITP_LOCK_RD(l) do { \ - if (ci_is_multithreaded()) \ - oo_rwlock_lock_read(l); \ - } while(0) +#define __CITP_LOCK_RD(l) \ + do { \ + if( ci_is_multithreaded() ) \ + oo_rwlock_lock_read(l); \ + } while( 0 ) -#define __CITP_UNLOCK(l) do { \ - if (ci_is_multithreaded()) \ - oo_rwlock_unlock_write(l);\ - } while(0) +#define __CITP_UNLOCK(l) \ + do { \ + if( ci_is_multithreaded() ) \ + oo_rwlock_unlock_write(l); \ + } while( 0 ) -#define __CITP_UNLOCK_RD(l) do {\ - if (ci_is_multithreaded()) \ - oo_rwlock_unlock_read(l); \ - } while(0) +#define __CITP_UNLOCK_RD(l) \ + do { \ + if( ci_is_multithreaded() ) \ + oo_rwlock_unlock_read(l); \ + } while( 0 ) /* \TODO Add specific unlock read and unlock write operations */ #ifdef NDEBUG -#define CITP_FDTABLE_LOCK() __CITP_LOCK(&citp_ul_lock) -#define CITP_FDTABLE_LOCK_RD() __CITP_LOCK_RD(&citp_ul_lock) -#define CITP_FDTABLE_UNLOCK() __CITP_UNLOCK(&citp_ul_lock) +#define CITP_FDTABLE_LOCK() __CITP_LOCK(&citp_ul_lock) +#define CITP_FDTABLE_LOCK_RD() __CITP_LOCK_RD(&citp_ul_lock) +#define CITP_FDTABLE_UNLOCK() __CITP_UNLOCK(&citp_ul_lock) #define CITP_FDTABLE_UNLOCK_RD() __CITP_UNLOCK_RD(&citp_ul_lock) #define CITP_FDTABLE_ASSERT_LOCKED(fdt_locked) @@ -716,8 +730,8 @@ ci_inline void CITP_FDTABLE_UNLOCK_RD(void) __CITP_UNLOCK_RD(&citp_ul_lock); } -ci_inline void -_CITP_FDTABLE_ASSERT_LOCKED(int fdt_locked, char* file, int line) +ci_inline void _CITP_FDTABLE_ASSERT_LOCKED( + int fdt_locked, char* file, int line) { citp_signal_info* si = &oo_per_thread_get()->sig; if( ! fdt_locked ) @@ -725,7 +739,7 @@ _CITP_FDTABLE_ASSERT_LOCKED(int fdt_locked, char* file, int line) _ci_assert(si->c.aflags & OO_SIGNAL_FLAG_FDTABLE_LOCKED, file, line); _ci_assert(si->c.inside_lib, file, line); } -#define CITP_FDTABLE_ASSERT_LOCKED(fdt_locked) \ +#define CITP_FDTABLE_ASSERT_LOCKED(fdt_locked) \ _CITP_FDTABLE_ASSERT_LOCKED(fdt_locked, __FILE__, __LINE__) #define CITP_FDTABLE_ASSERT_LOCKED_RD CITP_FDTABLE_ASSERT_LOCKED(1) @@ -735,7 +749,7 @@ _CITP_FDTABLE_ASSERT_LOCKED(int fdt_locked, char* file, int line) ** File-descriptor table. */ -extern int citp_fdtable_ctor(void) CI_HF; +extern int citp_fdtable_ctor(void) CI_HF; /* Looks up the user-level fdinfo for a given file descriptor. If found, * increments the ref count and returns pointer to the fdinfo. @@ -744,55 +758,56 @@ extern int citp_fdtable_ctor(void) CI_HF; * * Must have "entered" the library before calling this. */ -extern citp_fdinfo* citp_fdtable_lookup(unsigned fd) CI_HF; +extern citp_fdinfo* citp_fdtable_lookup(unsigned fd) CI_HF; /* Faster version of citp_fdtable_lookup(). In most cases (but not always) * avoids entering the library when [fd] is pass-through. */ -extern citp_fdinfo* citp_fdtable_lookup_fast(citp_lib_context_t*, - unsigned fd) CI_HF; +extern citp_fdinfo* citp_fdtable_lookup_fast( + citp_lib_context_t*, unsigned fd) CI_HF; -extern citp_fdinfo* citp_fdtable_lookup_noprobe(unsigned fd, int fdt_locked) CI_HF; +extern citp_fdinfo* citp_fdtable_lookup_noprobe( + unsigned fd, int fdt_locked) CI_HF; -extern int -citp_reprobe_moved_common(citp_fdinfo* fdinfo, int from_fast_lookup, - int fdip_is_already_busy, citp_fdinfo** fdinfo_out); +extern int citp_reprobe_moved_common(citp_fdinfo* fdinfo, int from_fast_lookup, + int fdip_is_already_busy, citp_fdinfo** fdinfo_out); -ci_inline citp_fdinfo* -citp_reprobe_moved(citp_fdinfo* fdinfo, int from_fast_lookup, - int fdip_is_already_busy) +ci_inline citp_fdinfo* citp_reprobe_moved( + citp_fdinfo* fdinfo, int from_fast_lookup, int fdip_is_already_busy) { citp_fdinfo* new_fdinfo; - citp_reprobe_moved_common(fdinfo, from_fast_lookup, fdip_is_already_busy, - &new_fdinfo); + citp_reprobe_moved_common( + fdinfo, from_fast_lookup, fdip_is_already_busy, &new_fdinfo); return new_fdinfo; } #if CI_CFG_FD_CACHING -extern void citp_netif_cache_disable(void) CI_HF; -extern void citp_netif_cache_warn_on_fork(void) CI_HF; +extern void citp_netif_cache_disable(void) CI_HF; +extern void citp_netif_cache_warn_on_fork(void) CI_HF; #endif extern void citp_fdtable_fork_hook(void) CI_HF; -extern citp_fdinfo_p citp_fdtable_new_fd_set(unsigned fd, citp_fdinfo_p, - int fdt_locked) CI_HF; -extern void citp_fdtable_insert(citp_fdinfo*, - unsigned fd, int fdt_locked) CI_HF; -extern void __citp_fdtable_reserve(int fd, int reserve) CI_HF; - - /* Marks [fd] as reserved, so any external attempt to use it will give - ** EBADF. Caller should ensure that the fdtable lock is held (or can be - ** called at init time without the lock). - ** - ** If [reserve] is zero, the entry is un-reserved. - */ - - -ci_inline bool citp_fd_is_special(int fd) { - return fd == citp.log_fd || fd == citp.onload_fd; +extern citp_fdinfo_p citp_fdtable_new_fd_set( + unsigned fd, citp_fdinfo_p, int fdt_locked) CI_HF; +extern void citp_fdtable_insert( + citp_fdinfo*, unsigned fd, int fdt_locked) CI_HF; +extern void __citp_fdtable_reserve(int fd, int reserve) CI_HF; + +/* Marks [fd] as reserved, so any external attempt to use it will give +** EBADF. Caller should ensure that the fdtable lock is held (or can be +** called at init time without the lock). +** +** If [reserve] is zero, the entry is un-reserved. +*/ + + +ci_inline bool citp_fd_is_special(int fd) +{ + return fd == citp.log_fd || fd == citp.onload_fd; } /* Extend the initialisation of the FD table, marking each FD as unknown */ -ci_inline void __citp_fdtable_extend(unsigned fd) { +ci_inline void __citp_fdtable_extend(unsigned fd) +{ unsigned i, max; CITP_FDTABLE_ASSERT_LOCKED(1); @@ -804,9 +819,9 @@ ci_inline void __citp_fdtable_extend(unsigned fd) { if( max > citp_fdtable.inited_count ) { for( i = citp_fdtable.inited_count; i < max; ++i ) { if( ! citp_fd_is_special(i) ) - citp_fdtable.table[i].fdip = fdip_unknown; + citp_fdtable.table[i].fdip = fdip_unknown; else - citp_fdtable.table[i].fdip = fdi_to_fdip(&citp_the_reserved_fd); + citp_fdtable.table[i].fdip = fdi_to_fdip(&citp_the_reserved_fd); } ci_wmb(); citp_fdtable.inited_count = max; @@ -815,30 +830,27 @@ ci_inline void __citp_fdtable_extend(unsigned fd) { /*! Marks an fdtable entry as pass-through. */ -ci_inline void citp_fdtable_passthru(int fd, int fdt_locked) { +ci_inline void citp_fdtable_passthru(int fd, int fdt_locked) +{ if( fd >= 0 && fd < citp_fdtable.inited_count && - !oo_per_thread_get()->in_vfork_child ) + ! oo_per_thread_get()->in_vfork_child ) citp_fdtable_new_fd_set(fd, fdip_passthru, fdt_locked); } -extern int -citp_passthrough_bind(citp_fdinfo* fdi, - const struct sockaddr* sa, socklen_t sa_len); -extern int -citp_passthrough_accept(citp_fdinfo* fdi, - struct sockaddr* sa, socklen_t* p_sa_len, int flags, - citp_lib_context_t* lib_context); -extern int -citp_passthrough_connect(citp_fdinfo* fdi, - const struct sockaddr* sa, socklen_t sa_len, - citp_lib_context_t* lib_context); +extern int citp_passthrough_bind( + citp_fdinfo* fdi, const struct sockaddr* sa, socklen_t sa_len); +extern int citp_passthrough_accept(citp_fdinfo* fdi, struct sockaddr* sa, + socklen_t* p_sa_len, int flags, citp_lib_context_t* lib_context); +extern int citp_passthrough_connect(citp_fdinfo* fdi, + const struct sockaddr* sa, socklen_t sa_len, + citp_lib_context_t* lib_context); /********************************************************************** ** Stack name state access */ extern int oo_extensions_init(void); -ci_inline struct oo_stackname_state *oo_stackname_thread_get(void) +ci_inline struct oo_stackname_state* oo_stackname_thread_get(void) { struct oo_per_thread* pt = oo_per_thread_get(); return &pt->stackname; @@ -849,101 +861,95 @@ ci_inline struct oo_stackname_state *oo_stackname_thread_get(void) ** Signal deferral and errno propagation */ -ci_inline int __citp_checked_enter_lib(citp_lib_context_t *lib_context - CI_DEBUG_ARG(const char *fn) - CI_DEBUG_ARG(int line) ) +ci_inline int __citp_checked_enter_lib( + citp_lib_context_t* lib_context CI_DEBUG_ARG(const char* fn) + CI_DEBUG_ARG(int line)) { lib_context->saved_errno = errno; - lib_context->thread = __oo_per_thread_get(); + lib_context->thread = __oo_per_thread_get(); Log_LIB(log(" citp_checked_enter_lib(%p) [was_in=%d] %s (%d)", - lib_context->thread, lib_context->thread->sig.c.inside_lib > 0, - fn, line)); - ci_assert(~lib_context->thread->sig.c.aflags & - OO_SIGNAL_FLAG_FDTABLE_LOCKED); - ++lib_context->thread->sig.c.inside_lib; - return (lib_context->thread->sig.c.inside_lib==1); + lib_context->thread, lib_context->thread->sig.c.inside_lib > 0, fn, + line)); + ci_assert( + ~lib_context->thread->sig.c.aflags & OO_SIGNAL_FLAG_FDTABLE_LOCKED); + ++lib_context->thread->sig.c.inside_lib; + return (lib_context->thread->sig.c.inside_lib == 1); } -ci_inline void __citp_enter_lib(citp_lib_context_t *lib_context - CI_DEBUG_ARG(const char *fn) - CI_DEBUG_ARG(int line) ) +ci_inline void __citp_enter_lib(citp_lib_context_t* lib_context CI_DEBUG_ARG( + const char* fn) CI_DEBUG_ARG(int line)) { lib_context->saved_errno = errno; - lib_context->thread = __oo_per_thread_get(); + lib_context->thread = __oo_per_thread_get(); Log_LIB(log(" citp_enter_lib(%p) inside_lib=%d %s (%d)", - lib_context->thread, lib_context->thread->sig.c.inside_lib, - fn, line)); + lib_context->thread, lib_context->thread->sig.c.inside_lib, fn, line)); ci_assert_ge(lib_context->thread->sig.c.inside_lib, 0); - ci_assert(~lib_context->thread->sig.c.aflags & - OO_SIGNAL_FLAG_FDTABLE_LOCKED); + ci_assert( + ~lib_context->thread->sig.c.aflags & OO_SIGNAL_FLAG_FDTABLE_LOCKED); ++lib_context->thread->sig.c.inside_lib; } -ci_inline void __citp_reenter_lib(citp_lib_context_t *lib_context - CI_DEBUG_ARG(const char *fn) - CI_DEBUG_ARG(int line) ) +ci_inline void __citp_reenter_lib(citp_lib_context_t* lib_context CI_DEBUG_ARG( + const char* fn) CI_DEBUG_ARG(int line)) { Log_LIB(log(" citp_reenter_lib(%p) inside_lib=%d %s (%d)", - lib_context->thread, lib_context->thread->sig.c.inside_lib, - fn, line)); + lib_context->thread, lib_context->thread->sig.c.inside_lib, fn, line)); ci_assert_ge(lib_context->thread->sig.c.inside_lib, 0); - ci_assert(~lib_context->thread->sig.c.aflags & - OO_SIGNAL_FLAG_FDTABLE_LOCKED); + ci_assert( + ~lib_context->thread->sig.c.aflags & OO_SIGNAL_FLAG_FDTABLE_LOCKED); ++lib_context->thread->sig.c.inside_lib; } -ci_inline void __citp_exit_lib(citp_lib_context_t *lib_context, int do_errno - CI_DEBUG_ARG(const char *fn) - CI_DEBUG_ARG(int line) ) +ci_inline void __citp_exit_lib(citp_lib_context_t* lib_context, + int do_errno CI_DEBUG_ARG(const char* fn) CI_DEBUG_ARG(int line)) { - Log_LIB(log(" citp_exit_lib(%p) inside_lib=%d %s (%d)", - lib_context->thread, lib_context->thread->sig.c.inside_lib, - fn, line)); + Log_LIB(log(" citp_exit_lib(%p) inside_lib=%d %s (%d)", lib_context->thread, + lib_context->thread->sig.c.inside_lib, fn, line)); ci_assert_ge(lib_context->thread->sig.c.inside_lib, 1); - ci_assert(~lib_context->thread->sig.c.aflags & - OO_SIGNAL_FLAG_FDTABLE_LOCKED); + ci_assert( + ~lib_context->thread->sig.c.aflags & OO_SIGNAL_FLAG_FDTABLE_LOCKED); --lib_context->thread->sig.c.inside_lib; ci_compiler_barrier(); - if(CI_UNLIKELY( lib_context->thread->sig.c.inside_lib == 0 && - (lib_context->thread->sig.c.aflags & - OO_SIGNAL_FLAG_HAVE_PENDING) )) + if( CI_UNLIKELY( + lib_context->thread->sig.c.inside_lib == 0 && + (lib_context->thread->sig.c.aflags & OO_SIGNAL_FLAG_HAVE_PENDING)) ) citp_signal_run_pending(&lib_context->thread->sig); if( do_errno ) errno = lib_context->saved_errno; } -#define citp_checked_enter_lib(c) \ - __citp_checked_enter_lib((c) CI_DEBUG_ARG(__FUNCTION__) \ - CI_DEBUG_ARG(__LINE__)) +#define citp_checked_enter_lib(c) \ + __citp_checked_enter_lib( \ + (c) CI_DEBUG_ARG(__FUNCTION__) CI_DEBUG_ARG(__LINE__)) -#define citp_enter_lib(c) \ +#define citp_enter_lib(c) \ __citp_enter_lib((c) CI_DEBUG_ARG(__FUNCTION__) CI_DEBUG_ARG(__LINE__)) -#define citp_reenter_lib(c) \ +#define citp_reenter_lib(c) \ __citp_reenter_lib((c) CI_DEBUG_ARG(__FUNCTION__) CI_DEBUG_ARG(__LINE__)) -#define citp_exit_lib(c, de) \ +#define citp_exit_lib(c, de) \ __citp_exit_lib((c), (de) CI_DEBUG_ARG(__FUNCTION__) CI_DEBUG_ARG(__LINE__)) -#define citp_enter_lib_if(c) \ - do { \ - if( (c)->thread == NULL ) \ - citp_enter_lib(c); \ +#define citp_enter_lib_if(c) \ + do { \ + if( (c)->thread == NULL ) \ + citp_enter_lib(c); \ } while( 0 ) -#define citp_exit_lib_if(c, de) \ - do { \ - if(CI_UNLIKELY( (c)->thread != NULL )) \ - citp_exit_lib((c), (de)); \ +#define citp_exit_lib_if(c, de) \ + do { \ + if( CI_UNLIKELY((c)->thread != NULL) ) \ + citp_exit_lib((c), (de)); \ } while( 0 ) #ifndef TRUE -#define TRUE 1 +#define TRUE 1 #define FALSE 0 #endif @@ -953,13 +959,13 @@ ci_inline void __citp_exit_lib(citp_lib_context_t *lib_context, int do_errno */ extern size_t citp_environ_count_args(const char* arg, va_list args) CI_HF; -extern void citp_environ_handle_args(char** argv, const char* arg, - va_list args, char*** env_ptr) CI_HF; -extern char* const* citp_environ_check_preload(char* const* env, - size_t* bytes_reqd) CI_HF; -extern void citp_environ_make_preload(char* const* env, char** new_env, - size_t new_env_bytes) CI_HF; -extern int citp_environ_init(void) CI_HF; +extern void citp_environ_handle_args( + char** argv, const char* arg, va_list args, char*** env_ptr) CI_HF; +extern char* const* citp_environ_check_preload( + char* const* env, size_t* bytes_reqd) CI_HF; +extern void citp_environ_make_preload( + char* const* env, char** new_env, size_t new_env_bytes) CI_HF; +extern int citp_environ_init(void) CI_HF; /********************************************************************** @@ -967,8 +973,7 @@ extern int citp_environ_init(void) CI_HF; */ extern int citp_sock_fcntl_os_sock(citp_sock_fdi* epi, int fd, int cmd, - long arg, const char* cmd_str, - int* fcntl_result) CI_HF; + long arg, const char* cmd_str, int* fcntl_result) CI_HF; /*! Handler for fcntl() cmds that are common across sockets. * @@ -980,7 +985,7 @@ extern int citp_sock_fcntl_os_sock(citp_sock_fdi* epi, int fd, int cmd, */ extern int citp_sock_fcntl(citp_sock_fdi*, int fd, int cmd, long arg) CI_HF; -#define ci_major(dev) ((dev) & 0xff00) +#define ci_major(dev) ((dev) &0xff00) /********************************************************************** * poll, select, epoll @@ -992,10 +997,9 @@ extern int citp_sock_fcntl(citp_sock_fdi*, int fd, int cmd, long arg) CI_HF; * At exit time, if *timeout_ms!=0 and rc==0, caller should block in system * call for the specified timeout. */ -int citp_ul_do_poll(struct pollfd*__restrict__ fds, nfds_t nfds, - ci_uint64 timeout_ms, ci_uint64 *used_ms, - citp_lib_context_t *lib_context, - const sigset_t *sigmask); +int citp_ul_do_poll(struct pollfd* __restrict__ fds, nfds_t nfds, + ci_uint64 timeout_ms, ci_uint64* used_ms, citp_lib_context_t* lib_context, + const sigset_t* sigmask); /* Generic select/pselect implementation. * This function is called after citp_enter_lib(), and it MUST NOT call * citp_exit_lib(). @@ -1003,9 +1007,8 @@ int citp_ul_do_poll(struct pollfd*__restrict__ fds, nfds_t nfds, * call for the specified timeout. */ int citp_ul_do_select(int nfds, fd_set* rds, fd_set* wrs, fd_set* exs, - ci_uint64 timeout_ms, ci_uint64 *used_ms, - citp_lib_context_t *lib_context, - const sigset_t *sigmask); + ci_uint64 timeout_ms, ci_uint64* used_ms, citp_lib_context_t* lib_context, + const sigset_t* sigmask); /* ppoll/pselect common code. * @@ -1043,9 +1046,8 @@ int citp_ul_do_select(int nfds, fd_set* rds, fd_set* wrs, fd_set* exs, * to the latency-critical path without any visible benefit. */ -static inline int -citp_ul_pwait_spin_pre(citp_lib_context_t *lib_context, - const sigset_t *sigmask, sigset_t *sigsaved) +static inline int citp_ul_pwait_spin_pre(citp_lib_context_t* lib_context, + const sigset_t* sigmask, sigset_t* sigsaved) { Log_POLL(log("%s(%p,%p)", __func__, sigmask, sigsaved)); citp_exit_lib(lib_context, CI_FALSE); @@ -1056,8 +1058,8 @@ citp_ul_pwait_spin_pre(citp_lib_context_t *lib_context, return 0; sigprocmask(SIG_SETMASK, sigmask, NULL); - if(CI_UNLIKELY( lib_context->thread->sig.c.aflags & - OO_SIGNAL_FLAG_HAVE_PENDING )) { + if( CI_UNLIKELY( + lib_context->thread->sig.c.aflags & OO_SIGNAL_FLAG_HAVE_PENDING) ) { sigprocmask(SIG_SETMASK, sigsaved, NULL); Log_POLL(log("%s: interrupted", __func__)); errno = EINTR; @@ -1065,14 +1067,13 @@ citp_ul_pwait_spin_pre(citp_lib_context_t *lib_context, } return 0; } -static inline void -citp_ul_pwait_spin_done(citp_lib_context_t *lib_context, - sigset_t *sigsaved, int *p_rc) +static inline void citp_ul_pwait_spin_done( + citp_lib_context_t* lib_context, sigset_t* sigsaved, int* p_rc) { Log_POLL(log("%s(%p,%d)", __func__, sigsaved, *p_rc)); sigprocmask(SIG_BLOCK, sigsaved, NULL); - if(CI_UNLIKELY( lib_context->thread->sig.c.aflags & - OO_SIGNAL_FLAG_HAVE_PENDING )) { + if( CI_UNLIKELY( + lib_context->thread->sig.c.aflags & OO_SIGNAL_FLAG_HAVE_PENDING) ) { Log_POLL(log("%s: interrupted", __func__)); errno = EINTR; *p_rc = -1; @@ -1086,8 +1087,8 @@ citp_ul_pwait_spin_done(citp_lib_context_t *lib_context, /* As citp_poll_if_needed(), but leaves the stack locked after polling. */ -static inline int __citp_poll_if_needed(ci_netif* ni, ci_uint64 recent_frc, - int is_spinning) +static inline int __citp_poll_if_needed( + ci_netif* ni, ci_uint64 recent_frc, int is_spinning) { if( ci_netif_may_poll(ni) && ci_netif_need_poll_maybe_spinning(ni, recent_frc, is_spinning) && @@ -1106,8 +1107,8 @@ static inline int __citp_poll_if_needed(ci_netif* ni, ci_uint64 recent_frc, * * Used by poll(), select() and epoll_wait(). */ -static inline int citp_poll_if_needed(ci_netif* ni, ci_uint64 recent_frc, - int is_spinning) +static inline int citp_poll_if_needed( + ci_netif* ni, ci_uint64 recent_frc, int is_spinning) { int rc = __citp_poll_if_needed(ni, recent_frc, is_spinning); if( rc != 0 ) @@ -1117,26 +1118,27 @@ static inline int citp_poll_if_needed(ci_netif* ni, ci_uint64 recent_frc, /* Query onload driver for the cpu_khz value */ -void citp_oo_get_cpu_khz(ci_uint32* cpu_khz); +void citp_oo_get_cpu_khz(ci_uint32* cpu_khz); ci_inline ci_uint64 citp_usec_to_cycles64(unsigned usec) -{ return (ci_uint64) usec * citp.cpu_khz / 1000; } +{ + return (ci_uint64) usec * citp.cpu_khz / 1000; +} -extern int citp_sock_is_spinning(citp_fdinfo* fdi); +extern int citp_sock_is_spinning(citp_fdinfo* fdi); /********************************************************************** * Utils */ -ci_inline void -ms2timespec(ci_uint64 timeout, ci_uint64 spent, struct timespec* tv) +ci_inline void ms2timespec( + ci_uint64 timeout, ci_uint64 spent, struct timespec* tv) { if( timeout > spent ) { - tv->tv_sec = (timeout - spent) / 1000; + tv->tv_sec = (timeout - spent) / 1000; tv->tv_nsec = ((timeout - spent) % 1000) * 1000000; - } - else { + } else { tv->tv_sec = tv->tv_nsec = 0; } } @@ -1156,7 +1158,7 @@ ci_inline int citp_getpid(void) * fdtable_locked == false means that we do not know (typically from * ci_log). */ -extern void __oo_service_fd(bool fdtable_locked); +extern void __oo_service_fd(bool fdtable_locked); ci_inline int oo_service_fd(void) { if( citp.onload_fd < 0 ) @@ -1167,5 +1169,5 @@ ci_inline int oo_service_fd(void) extern void oo_signal_terminate(int signum); -#endif /* __CI_TRANSPORT_INTERNAL_H__ */ +#endif /* __CI_TRANSPORT_INTERNAL_H__ */ /*! \cidoxg_end */ diff --git a/src/lib/transport/unix/mmake.mk b/src/lib/transport/unix/mmake.mk index da265c867..88759a40e 100644 --- a/src/lib/transport/unix/mmake.mk +++ b/src/lib/transport/unix/mmake.mk @@ -8,6 +8,13 @@ ifeq ($(ISA),i386) BUILD_TREE_COPY := mapfile.ilp32 endif +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +MMAKE_DPDK := $(DEFAULT_DPDK) +MMAKE_INCLUDE += -I$(RTE_SDK)/build/install/include + TARGET := libcitransport0.so MMAKE_TYPE := DLL @@ -15,7 +22,7 @@ LDEP := $(CITPCOMMON_LIB_DEPEND) $(CIIP_LIB_DEPEND) $(CPLANE_LIB_DEPEND) \ $(CITOOLS_LIB_DEPEND) $(CIUL_LIB_DEPEND) LLNK := $(LINK_CITPCOMMON_LIB) $(LINK_CIIP_LIB) $(LINK_CPLANE_LIB) \ - $(LINK_CITOOLS_LIB) $(LINK_CIUL_LIB) + $(LINK_CITOOLS_LIB) $(LINK_CIUL_LIB) $(MMAKE_DPDK) LIB_SRCS := \ startup.c \ @@ -44,7 +51,8 @@ LIB_SRCS := \ wqlock.c \ poll_select.c \ passthrough_fd.c \ - utils.c + utils.c \ + dpdk.c MMAKE_OBJ_PREFIX := ci_tp_unix_ LIB_OBJS := $(LIB_SRCS:%.c=$(MMAKE_OBJ_PREFIX)%.o) diff --git a/src/lib/transport/unix/netif_init.c b/src/lib/transport/unix/netif_init.c index e18266dc1..5129526ef 100644 --- a/src/lib/transport/unix/netif_init.c +++ b/src/lib/transport/unix/netif_init.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2004-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file netif_init.c + *//*! \file netif_init.c ** ** \author stg ** \brief Common functionality used by TCP & UDP @@ -10,7 +10,7 @@ ** *//* \**************************************************************************/ - + /*! \cidoxg_lib_transport_unix */ #include @@ -22,8 +22,8 @@ #include -#define LPF "citp_netif_" -#define LPFIN "-> " LPF +#define LPF "citp_netif_" +#define LPFIN "-> " LPF #define LPFOUT "<- " LPF @@ -35,44 +35,43 @@ int citp_netif_init_ctor(void) citp_cmn_netif_init_ctor(CITP_OPTS.netif_dtor); - return 0; + return dpdk_init(); } /* Storage for stackname context across fork() */ static struct oo_stackname_state stackname_config_across_fork; /* Storage for library context across fork() */ -static citp_lib_context_t citp_lib_context_across_fork; +static citp_lib_context_t citp_lib_context_across_fork; /*! Handles user-level netif internals pre fork() */ -static void citp_netif_pre_fork_hook(void); +static void citp_netif_pre_fork_hook(void); /*! Handles user-level netif internals post fork() in the parent */ -static void citp_netif_parent_fork_hook(void); +static void citp_netif_parent_fork_hook(void); /* Handles user-level netif internals post fork() in the child */ -static void citp_netif_child_fork_hook(void); +static void citp_netif_child_fork_hook(void); /* I do not understand why, but __register_atfork seems to work better than * __libc_atfork */ -extern int __register_atfork(void (*prepare)(void), void (*parent)(void), - void (*child)(void), void *dso); +extern int __register_atfork(void (*prepare)(void), void (*parent)(void), + void (*child)(void), void* dso); -int ci_setup_fork(void) +int ci_setup_fork(void) { - Log_CALL(ci_log("%s()", __FUNCTION__)); - return __register_atfork(citp_netif_pre_fork_hook, - citp_netif_parent_fork_hook, - citp_netif_child_fork_hook, NULL); + Log_CALL(ci_log("%s()", __FUNCTION__)); + return __register_atfork(citp_netif_pre_fork_hook, + citp_netif_parent_fork_hook, citp_netif_child_fork_hook, NULL); } /* Handles user-level netif internals pre fork() */ static void citp_netif_pre_fork_hook(void) { - struct oo_stackname_state *stackname_state; + struct oo_stackname_state* stackname_state; /* If we have not inited fork hook, how can we get here in the first * place? */ - if( citp.init_level < CITP_INIT_FORK_HOOKS) { + if( citp.init_level < CITP_INIT_FORK_HOOKS ) { ci_assert(0); return; } @@ -105,9 +104,9 @@ static void citp_netif_pre_fork_hook(void) return; stackname_state = oo_stackname_thread_get(); - memcpy(&stackname_config_across_fork, stackname_state, - sizeof(stackname_config_across_fork)); - + memcpy(&stackname_config_across_fork, stackname_state, + sizeof(stackname_config_across_fork)); + /* If the call to _fork() subsequently fails we potentially have * marked all of our netifs as shared when ideally we shouldn't * have. However, this is non-fatal and is probably the least of @@ -123,7 +122,7 @@ static void citp_netif_parent_fork_hook(void) { /* If we have not inited fork hook, how can we get here in the first * place? */ - if( citp.init_level < CITP_INIT_FORK_HOOKS) { + if( citp.init_level < CITP_INIT_FORK_HOOKS ) { ci_assert(0); return; } @@ -132,12 +131,12 @@ static void citp_netif_parent_fork_hook(void) pthread_mutex_unlock(&citp_pkt_map_lock); oo_rwlock_unlock_write(&citp_dup2_lock); - if( citp.init_level < CITP_INIT_FDTABLE) + if( citp.init_level < CITP_INIT_FDTABLE ) goto unlock_fork; - else if( citp.init_level < CITP_INIT_NETIF) + else if( citp.init_level < CITP_INIT_NETIF ) goto unlock; - if( CITP_OPTS.fork_netif == CI_UNIX_FORK_NETIF_PARENT ) + if( CITP_OPTS.fork_netif == CI_UNIX_FORK_NETIF_PARENT ) __citp_netif_mark_all_dont_use(); unlock: @@ -152,7 +151,7 @@ static void citp_netif_child_fork_hook(void) { /* If we have not inited fork hook, how can we get here in the first * place? */ - if( citp.init_level < CITP_INIT_FORK_HOOKS) { + if( citp.init_level < CITP_INIT_FORK_HOOKS ) { ci_assert(0); return; } @@ -179,13 +178,13 @@ static void citp_netif_child_fork_hook(void) oo_rwlock_ctor(&citp_dup2_lock); pthread_mutex_init(&citp_pkt_map_lock, NULL); - if( citp.init_level < CITP_INIT_FDTABLE) + if( citp.init_level < CITP_INIT_FDTABLE ) return; pthread_mutex_lock(&citp_dup_lock); CITP_FDTABLE_LOCK(); - if( citp.init_level < CITP_INIT_NETIF) + if( citp.init_level < CITP_INIT_NETIF ) goto setup_fdtable; citp_setup_logging_prefix(); @@ -193,7 +192,7 @@ static void citp_netif_child_fork_hook(void) oo_stackname_update(&stackname_config_across_fork); - if( CITP_OPTS.fork_netif == CI_UNIX_FORK_NETIF_CHILD ) + if( CITP_OPTS.fork_netif == CI_UNIX_FORK_NETIF_CHILD ) __citp_netif_mark_all_dont_use(); setup_fdtable: @@ -233,7 +232,7 @@ void citp_netif_pre_bproc_move_hook(void) * time of the bproc_move(). */ __citp_netif_unprotect_all(); - + CITP_FDTABLE_UNLOCK(); } @@ -248,34 +247,36 @@ static void ci_netif_check_process_config(ci_netif* ni) #if CI_CFG_FD_CACHING if( ni->state->opts.sock_cache_max > 0 ) { if( citp_fdtable_not_mt_safe() ) { - NI_LOG(ni, CONFIG_WARNINGS, "Socket caching is not supported when " - "EF_FDS_MT_SAFE=0, and has been disabled"); + NI_LOG(ni, CONFIG_WARNINGS, + "Socket caching is not supported when " + "EF_FDS_MT_SAFE=0, and has been disabled"); citp_netif_cache_disable(); - } - else if( CITP_OPTS.ul_epoll != 3 ) - NI_LOG(ni, CONFIG_WARNINGS, "Sockets that are added to an epoll set can " - "only be cached if EF_UL_EPOLL=3"); + } else if( CITP_OPTS.ul_epoll != 3 ) + NI_LOG(ni, CONFIG_WARNINGS, + "Sockets that are added to an epoll set can " + "only be cached if EF_UL_EPOLL=3"); } - if( (NI_OPTS(ni).scalable_filter_enable) && - (CITP_OPTS.ul_epoll != 1) && (CITP_OPTS.ul_epoll != 3) ) { - NI_LOG(ni, CONFIG_WARNINGS, "When using a scalable filters mode handover " - "of TCP sockets in an epoll set is only " - "supported if EF_UL_EPOLL=1 or 3."); + if( (NI_OPTS(ni).scalable_filter_enable) && (CITP_OPTS.ul_epoll != 1) && + (CITP_OPTS.ul_epoll != 3) ) { + NI_LOG(ni, CONFIG_WARNINGS, + "When using a scalable filters mode handover " + "of TCP sockets in an epoll set is only " + "supported if EF_UL_EPOLL=1 or 3."); } #endif if( NI_OPTS(ni).scalable_filter_enable && CITP_OPTS.stack_per_thread ) { - NI_LOG(ni, CONFIG_WARNINGS, "EF_STACK_PER_THREAD=1 cannot be used in " - "scalable filters mode as a single filter " - "configuration can only be used by one stack."); + NI_LOG(ni, CONFIG_WARNINGS, + "EF_STACK_PER_THREAD=1 cannot be used in " + "scalable filters mode as a single filter " + "configuration can only be used by one stack."); } } /* Platform specific code, called after netif construction */ -void citp_netif_ctor_hook(ci_netif* ni, int realloc) +void citp_netif_ctor_hook(ci_netif* ni, int realloc) { - - if (!realloc) + if( ! realloc ) /* Protect the netif's FD table entry */ __citp_fdtable_reserve(ci_netif_get_driver_handle(ni), 1); @@ -284,11 +285,14 @@ void citp_netif_ctor_hook(ci_netif* ni, int realloc) /* Platform specific code, called proir to netif destruction */ -void citp_netif_free_hook(ci_netif* ni) +void citp_netif_free_hook(ci_netif* ni) { #if CI_CFG_FD_CACHING citp_uncache_fds_ul(ni); #endif + + dpdk_cleanup(); + /* Unprotect the netif's FD table entry */ __citp_fdtable_reserve(ci_netif_get_driver_handle(ni), 0); } diff --git a/src/lib/transport/unix/startup.c b/src/lib/transport/unix/startup.c index 8384190cf..d5e0d3185 100644 --- a/src/lib/transport/unix/startup.c +++ b/src/lib/transport/unix/startup.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr/ctk ** \brief Sockets interface to user level TCP @@ -10,15 +10,15 @@ ** *//* \**************************************************************************/ - + /*! \cidoxg_lib_transport_unix */ #include #include #include -#include /* for getpid() */ -#include /* for mkdir() */ -#include /* for mkdir() */ +#include /* for getpid() */ +#include /* for mkdir() */ +#include /* for mkdir() */ #include #include @@ -43,9 +43,9 @@ static int citp_setup_logging_early(void) return 0; } -static void citp_setup_logging_change(void *new_log_fn) +static void citp_setup_logging_change(void* new_log_fn) { - if( ci_log_fn != new_log_fn && citp.log_fd >= 0) { + if( ci_log_fn != new_log_fn && citp.log_fd >= 0 ) { ci_sys_close(citp.log_fd); citp.log_fd = -1; } @@ -55,7 +55,8 @@ static void citp_setup_logging_change(void *new_log_fn) void citp_setup_logging_prefix(void) { static char s0[64]; - snprintf(s0, sizeof(s0), "oo:%.16s[%d]: ", citp.process_name, (int) getpid()); + snprintf( + s0, sizeof(s0), "oo:%.16s[%d]: ", citp.process_name, (int) getpid()); ci_set_log_prefix(s0); } @@ -117,8 +118,6 @@ static void __oo_per_thread_init_thread(struct oo_per_thread* pt) } - - static void citp_dump_config(void) { char buf[80]; @@ -130,89 +129,88 @@ static void citp_dump_config(void) log("ci_is_multithreaded = %d", ci_is_multithreaded()); } -static void citp_dump_opts(citp_opts_t *o) +static void citp_dump_opts(citp_opts_t* o) { /* ?? TODO: should be using opts_cittp_def.h here */ -# define DUMP_OPT_INT(envstr, name) \ - ci_log("%s=%d", (envstr), (int) o->name) -# define DUMP_OPT_HEX(envstr, name) \ +#define DUMP_OPT_INT(envstr, name) ci_log("%s=%d", (envstr), (int) o->name) +#define DUMP_OPT_HEX(envstr, name) \ ci_log("%s=%x", (envstr), (unsigned) o->name) - DUMP_OPT_HEX("EF_UNIX_LOG", log_level); - DUMP_OPT_INT("EF_PROBE", probe); - DUMP_OPT_INT("EF_TCP", ul_tcp); - DUMP_OPT_INT("EF_UDP", ul_udp); - DUMP_OPT_INT("EF_UL_SELECT", ul_select); - DUMP_OPT_INT("EF_SELECT_SPIN", ul_select_spin); - DUMP_OPT_INT("EF_SELECT_FAST", ul_select_fast); - DUMP_OPT_INT("EF_UL_POLL", ul_poll); - DUMP_OPT_INT("EF_POLL_SPIN", ul_poll_spin); - DUMP_OPT_INT("EF_POLL_FAST", ul_poll_fast); - DUMP_OPT_INT("EF_POLL_FAST_USEC", ul_poll_fast_usec); + DUMP_OPT_HEX("EF_UNIX_LOG", log_level); + DUMP_OPT_INT("EF_PROBE", probe); + DUMP_OPT_INT("EF_TCP", ul_tcp); + DUMP_OPT_INT("EF_UDP", ul_udp); + DUMP_OPT_INT("EF_UL_SELECT", ul_select); + DUMP_OPT_INT("EF_SELECT_SPIN", ul_select_spin); + DUMP_OPT_INT("EF_SELECT_FAST", ul_select_fast); + DUMP_OPT_INT("EF_UL_POLL", ul_poll); + DUMP_OPT_INT("EF_POLL_SPIN", ul_poll_spin); + DUMP_OPT_INT("EF_POLL_FAST", ul_poll_fast); + DUMP_OPT_INT("EF_POLL_FAST_USEC", ul_poll_fast_usec); DUMP_OPT_INT("EF_POLL_NONBLOCK_FAST_USEC", ul_poll_nonblock_fast_usec); - DUMP_OPT_INT("EF_SELECT_FAST_USEC", ul_select_fast_usec); + DUMP_OPT_INT("EF_SELECT_FAST_USEC", ul_select_fast_usec); DUMP_OPT_INT("EF_SELECT_NONBLOCK_FAST_USEC", ul_select_nonblock_fast_usec); - DUMP_OPT_INT("EF_UDP_RECV_SPIN", udp_recv_spin); - DUMP_OPT_INT("EF_UDP_SEND_SPIN", udp_send_spin); - DUMP_OPT_INT("EF_TCP_RECV_SPIN", tcp_recv_spin); - DUMP_OPT_INT("EF_TCP_SEND_SPIN", tcp_send_spin); - DUMP_OPT_INT("EF_TCP_ACCEPT_SPIN", tcp_accept_spin); - DUMP_OPT_INT("EF_TCP_CONNECT_SPIN", tcp_connect_spin); - DUMP_OPT_INT("EF_PKT_WAIT_SPIN", pkt_wait_spin); - DUMP_OPT_INT("EF_PIPE_RECV_SPIN", pipe_recv_spin); - DUMP_OPT_INT("EF_PIPE_SEND_SPIN", pipe_send_spin); - DUMP_OPT_INT("EF_PIPE_SIZE", pipe_size); - DUMP_OPT_INT("EF_SOCK_LOCK_BUZZ", sock_lock_buzz); - DUMP_OPT_INT("EF_STACK_LOCK_BUZZ", stack_lock_buzz); - DUMP_OPT_INT("EF_SO_BUSY_POLL_SPIN", so_busy_poll_spin); - DUMP_OPT_INT("EF_UL_EPOLL", ul_epoll); - DUMP_OPT_INT("EF_EPOLL_SPIN", ul_epoll_spin); - DUMP_OPT_INT("EF_EPOLL_CTL_FAST", ul_epoll_ctl_fast); - DUMP_OPT_INT("EF_EPOLL_CTL_HANDOFF", ul_epoll_ctl_handoff); - DUMP_OPT_INT("EF_EPOLL_MT_SAFE", ul_epoll_mt_safe); - DUMP_OPT_INT("EF_FDTABLE_SIZE", fdtable_size); - DUMP_OPT_INT("EF_SPIN_USEC", ul_spin_usec); - DUMP_OPT_INT("EF_SLEEP_SPIN_USEC", sleep_spin_usec); - DUMP_OPT_INT("EF_STACK_PER_THREAD", stack_per_thread); - DUMP_OPT_INT("EF_DONT_ACCELERATE", dont_accelerate); - DUMP_OPT_INT("EF_FDTABLE_STRICT", fdtable_strict); - DUMP_OPT_INT("EF_FDS_MT_SAFE", fds_mt_safe); - DUMP_OPT_INT("EF_FORK_NETIF", fork_netif); - DUMP_OPT_INT("EF_NETIF_DTOR", netif_dtor); - DUMP_OPT_INT("EF_NO_FAIL", no_fail); - DUMP_OPT_INT("EF_SA_ONSTACK_INTERCEPT", sa_onstack_intercept); + DUMP_OPT_INT("EF_UDP_RECV_SPIN", udp_recv_spin); + DUMP_OPT_INT("EF_UDP_SEND_SPIN", udp_send_spin); + DUMP_OPT_INT("EF_TCP_RECV_SPIN", tcp_recv_spin); + DUMP_OPT_INT("EF_TCP_SEND_SPIN", tcp_send_spin); + DUMP_OPT_INT("EF_TCP_ACCEPT_SPIN", tcp_accept_spin); + DUMP_OPT_INT("EF_TCP_CONNECT_SPIN", tcp_connect_spin); + DUMP_OPT_INT("EF_PKT_WAIT_SPIN", pkt_wait_spin); + DUMP_OPT_INT("EF_PIPE_RECV_SPIN", pipe_recv_spin); + DUMP_OPT_INT("EF_PIPE_SEND_SPIN", pipe_send_spin); + DUMP_OPT_INT("EF_PIPE_SIZE", pipe_size); + DUMP_OPT_INT("EF_SOCK_LOCK_BUZZ", sock_lock_buzz); + DUMP_OPT_INT("EF_STACK_LOCK_BUZZ", stack_lock_buzz); + DUMP_OPT_INT("EF_SO_BUSY_POLL_SPIN", so_busy_poll_spin); + DUMP_OPT_INT("EF_UL_EPOLL", ul_epoll); + DUMP_OPT_INT("EF_EPOLL_SPIN", ul_epoll_spin); + DUMP_OPT_INT("EF_EPOLL_CTL_FAST", ul_epoll_ctl_fast); + DUMP_OPT_INT("EF_EPOLL_CTL_HANDOFF", ul_epoll_ctl_handoff); + DUMP_OPT_INT("EF_EPOLL_MT_SAFE", ul_epoll_mt_safe); + DUMP_OPT_INT("EF_FDTABLE_SIZE", fdtable_size); + DUMP_OPT_INT("EF_SPIN_USEC", ul_spin_usec); + DUMP_OPT_INT("EF_SLEEP_SPIN_USEC", sleep_spin_usec); + DUMP_OPT_INT("EF_STACK_PER_THREAD", stack_per_thread); + DUMP_OPT_INT("EF_DONT_ACCELERATE", dont_accelerate); + DUMP_OPT_INT("EF_FDTABLE_STRICT", fdtable_strict); + DUMP_OPT_INT("EF_FDS_MT_SAFE", fds_mt_safe); + DUMP_OPT_INT("EF_FORK_NETIF", fork_netif); + DUMP_OPT_INT("EF_NETIF_DTOR", netif_dtor); + DUMP_OPT_INT("EF_NO_FAIL", no_fail); + DUMP_OPT_INT("EF_SA_ONSTACK_INTERCEPT", sa_onstack_intercept); DUMP_OPT_INT("EF_ACCEPT_INHERIT_NONBLOCK", accept_force_inherit_nonblock); DUMP_OPT_INT("EF_PIPE", ul_pipe); DUMP_OPT_HEX("EF_SIGNALS_NOPOSTPONE", signals_no_postpone); DUMP_OPT_HEX("EF_SYNC_CPLANE_AT_CREATE", sync_cplane); - DUMP_OPT_INT("EF_CLUSTER_SIZE", cluster_size); - DUMP_OPT_INT("EF_CLUSTER_RESTART", cluster_restart_opt); + DUMP_OPT_INT("EF_CLUSTER_SIZE", cluster_size); + DUMP_OPT_INT("EF_CLUSTER_RESTART", cluster_restart_opt); DUMP_OPT_INT("EF_CLUSTER_HOT_RESTART", cluster_hot_restart_opt); ci_log("EF_CLUSTER_NAME=%s", o->cluster_name); if( o->tcp_reuseports == 0 ) { DUMP_OPT_INT("EF_TCP_FORCE_REUSEPORT", tcp_reuseports); } else { - struct ci_port_list *force_reuseport; + struct ci_port_list* force_reuseport; CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link, - (ci_dllist*)(ci_uintptr_t)o->tcp_reuseports) - ci_log("%s=%d", "EF_TCP_FORCE_REUSEPORT", ntohs(force_reuseport->port)); + (ci_dllist*) (ci_uintptr_t) o->tcp_reuseports) + ci_log("%s=%d", "EF_TCP_FORCE_REUSEPORT", ntohs(force_reuseport->port)); } if( o->udp_reuseports == 0 ) { DUMP_OPT_INT("EF_UDP_FORCE_REUSEPORT", udp_reuseports); } else { - struct ci_port_list *force_reuseport; + struct ci_port_list* force_reuseport; CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link, - (ci_dllist*)(ci_uintptr_t)o->udp_reuseports) - ci_log("%s=%d", "EF_UDP_FORCE_REUSEPORT", ntohs(force_reuseport->port)); + (ci_dllist*) (ci_uintptr_t) o->udp_reuseports) + ci_log("%s=%d", "EF_UDP_FORCE_REUSEPORT", ntohs(force_reuseport->port)); } } -static void citp_log_to_file(const char *s) +static void citp_log_to_file(const char* s) { int fd; - ci_assert(!CITP_OPTS.log_via_ioctl); + ci_assert(! CITP_OPTS.log_via_ioctl); fd = open(s, O_WRONLY | O_CREAT | O_TRUNC, S_IREAD | S_IWRITE); if( fd >= 0 ) { if( citp.log_fd >= 0 ) @@ -233,39 +231,43 @@ static void citp_get_process_name(void) ci_snprintf(citp.process_path, sizeof(citp.process_path), ""); - n = readlink("/proc/self/exe", citp.process_path, - sizeof(citp.process_path)); - if (n < 0) + n = readlink("/proc/self/exe", citp.process_path, sizeof(citp.process_path)); + if( n < 0 ) return; n = CI_MIN(n + 1, sizeof(citp.process_path)); citp.process_path[n - 1] = '\0'; citp.process_name = citp.process_path + n - 2; - while (citp.process_name > citp.process_path && - citp.process_name[-1] != '/') + while( + citp.process_name > citp.process_path && citp.process_name[-1] != '/' ) --citp.process_name; } static int get_env_opt_int(const char* name, int old_val, int hex) -{ const char* s; +{ + const char* s; int new_val; char dummy; if( (s = getenv(name)) ) { if( sscanf(s, hex ? "%x %c" : "%d %c", &new_val, &dummy) == 1 ) /*! TODO: should use option value range checking here */ return new_val; - else if (s[0] != '\0') + else if( s[0] != '\0' ) ci_log("citp: bad option '%s=%s'", name, s); } return old_val; } -#define GET_ENV_OPT_INT(envstr, var) \ - do{ opts->var = get_env_opt_int((envstr), opts->var, 0); }while(0) +#define GET_ENV_OPT_INT(envstr, var) \ + do { \ + opts->var = get_env_opt_int((envstr), opts->var, 0); \ + } while( 0 ) -#define GET_ENV_OPT_HEX(envstr, var) \ - do{ opts->var = get_env_opt_int((envstr), opts->var, 1); }while(0) +#define GET_ENV_OPT_HEX(envstr, var) \ + do { \ + opts->var = get_env_opt_int((envstr), opts->var, 1); \ + } while( 0 ) /* This function assumes an option of the same form and types as @@ -273,18 +275,19 @@ static int get_env_opt_int(const char* name, int old_val, int hex) */ static void get_env_opt_port_list(ci_uint64* opt, const char* name) { - char *s; + char* s; unsigned v; if( (s = getenv(name)) ) { + ci_log("GETTING LIST %s", name); /* The memory used for this list is never freed, as we need it - * persist until the process terminates + * persist until the process terminates */ - *opt = (ci_uint64)(ci_uintptr_t)malloc(sizeof(ci_dllist)); + *opt = (ci_uint64) (ci_uintptr_t) malloc(sizeof(ci_dllist)); if( ! *opt ) log("Could not allocate memory for %s list", name); else { - struct ci_port_list *curr; - ci_dllist *opt_list = (ci_dllist*)(ci_uintptr_t)*opt; + struct ci_port_list* curr; + ci_dllist* opt_list = (ci_dllist*) (ci_uintptr_t) *opt; ci_dllist_init(opt_list); while( sscanf(s, "%u", &v) == 1 ) { @@ -297,8 +300,7 @@ static void get_env_opt_port_list(ci_uint64* opt, const char* name) if( curr->port != v ) { log("ERROR: %s contains value that is too large: %u", name, v); free(curr); - } - else { + } else { curr->port = htons(curr->port); ci_dllist_push(opt_list, &curr->link); } @@ -311,8 +313,8 @@ static void get_env_opt_port_list(ci_uint64* opt, const char* name) } } -static void citp_update_and_crosscheck(ci_netif_config_opts* netif_opts, - citp_opts_t* citp_opts) +static void citp_update_and_crosscheck( + ci_netif_config_opts* netif_opts, citp_opts_t* citp_opts) { /* * ci_netif_config_opts_getenv() is called before @@ -321,14 +323,15 @@ static void citp_update_and_crosscheck(ci_netif_config_opts* netif_opts, * making netifs to inherit flags if the O/S is * being forced to do so */ - if (citp_opts->accept_force_inherit_nonblock) + if( citp_opts->accept_force_inherit_nonblock ) netif_opts->accept_inherit_nonblock = 1; if( citp_opts->ul_epoll == 0 && netif_opts->int_driven == 0 ) { - ci_log("EF_INT_DRIVEN=0 and EF_UL_EPOLL=0 are not compatible. " - "EF_INT_DRIVEN can be set to 0 implicitly, because of non-zero " - "EF_POLL_USEC. If you need both spinning and EF_UL_EPOLL=0, " - "please set EF_INT_DRIVEN=1 explicitly."); + ci_log( + "EF_INT_DRIVEN=0 and EF_UL_EPOLL=0 are not compatible. " + "EF_INT_DRIVEN can be set to 0 implicitly, because of non-zero " + "EF_POLL_USEC. If you need both spinning and EF_UL_EPOLL=0, " + "please set EF_INT_DRIVEN=1 explicitly."); } return; } @@ -341,9 +344,9 @@ static void citp_opts_getenv(citp_opts_t* opts) unsigned v; opts->log_via_ioctl = 3; - GET_ENV_OPT_INT("EF_LOG_VIA_IOCTL", log_via_ioctl); + GET_ENV_OPT_INT("EF_LOG_VIA_IOCTL", log_via_ioctl); - if( (s = getenv("EF_LOG_FILE")) && opts->log_via_ioctl == 3) { + if( (s = getenv("EF_LOG_FILE")) && opts->log_via_ioctl == 3 ) { opts->log_via_ioctl = 0; citp_log_to_file(s); } else if( opts->log_via_ioctl == 3 ) { @@ -356,7 +359,7 @@ static void citp_opts_getenv(citp_opts_t* opts) } if( opts->log_via_ioctl ) { - ci_log_options &=~ CI_LOG_PID; + ci_log_options &= ~CI_LOG_PID; citp_setup_logging_change(citp_log_fn_drv); } else { GET_ENV_OPT_INT("EF_LOG_TIMESTAMPS", log_timestamps); @@ -401,52 +404,53 @@ static void citp_opts_getenv(citp_opts_t* opts) opts->stack_lock_buzz = 1; } - GET_ENV_OPT_HEX("EF_UNIX_LOG", log_level); - GET_ENV_OPT_INT("EF_PROBE", probe); - GET_ENV_OPT_INT("EF_TCP", ul_tcp); - GET_ENV_OPT_INT("EF_UDP", ul_udp); - GET_ENV_OPT_INT("EF_UL_SELECT", ul_select); - GET_ENV_OPT_INT("EF_SELECT_SPIN", ul_select_spin); - GET_ENV_OPT_INT("EF_SELECT_FAST", ul_select_fast); - GET_ENV_OPT_INT("EF_UL_POLL", ul_poll); - GET_ENV_OPT_INT("EF_POLL_SPIN", ul_poll_spin); - GET_ENV_OPT_INT("EF_POLL_FAST", ul_poll_fast); - GET_ENV_OPT_INT("EF_POLL_FAST_USEC", ul_poll_fast_usec); + GET_ENV_OPT_HEX("EF_UNIX_LOG", log_level); + GET_ENV_OPT_INT("EF_PROBE", probe); + GET_ENV_OPT_INT("EF_TCP", ul_tcp); + GET_ENV_OPT_INT("EF_UDP", ul_udp); + GET_ENV_OPT_INT("EF_UL_SELECT", ul_select); + GET_ENV_OPT_INT("EF_SELECT_SPIN", ul_select_spin); + GET_ENV_OPT_INT("EF_SELECT_FAST", ul_select_fast); + GET_ENV_OPT_INT("EF_UL_POLL", ul_poll); + GET_ENV_OPT_INT("EF_POLL_SPIN", ul_poll_spin); + GET_ENV_OPT_INT("EF_POLL_FAST", ul_poll_fast); + GET_ENV_OPT_INT("EF_POLL_FAST_USEC", ul_poll_fast_usec); GET_ENV_OPT_INT("EF_POLL_NONBLOCK_FAST_USEC", ul_poll_nonblock_fast_usec); - GET_ENV_OPT_INT("EF_SELECT_FAST_USEC", ul_select_fast_usec); - GET_ENV_OPT_INT("EF_SELECT_NONBLOCK_FAST_USEC", ul_select_nonblock_fast_usec); - GET_ENV_OPT_INT("EF_UDP_RECV_SPIN", udp_recv_spin); - GET_ENV_OPT_INT("EF_UDP_SEND_SPIN", udp_send_spin); - GET_ENV_OPT_INT("EF_TCP_RECV_SPIN", tcp_recv_spin); - GET_ENV_OPT_INT("EF_TCP_SEND_SPIN", tcp_send_spin); + GET_ENV_OPT_INT("EF_SELECT_FAST_USEC", ul_select_fast_usec); + GET_ENV_OPT_INT( + "EF_SELECT_NONBLOCK_FAST_USEC", ul_select_nonblock_fast_usec); + GET_ENV_OPT_INT("EF_UDP_RECV_SPIN", udp_recv_spin); + GET_ENV_OPT_INT("EF_UDP_SEND_SPIN", udp_send_spin); + GET_ENV_OPT_INT("EF_TCP_RECV_SPIN", tcp_recv_spin); + GET_ENV_OPT_INT("EF_TCP_SEND_SPIN", tcp_send_spin); GET_ENV_OPT_INT("EF_TCP_ACCEPT_SPIN", tcp_accept_spin); - GET_ENV_OPT_INT("EF_TCP_CONNECT_SPIN",tcp_connect_spin); - GET_ENV_OPT_INT("EF_PKT_WAIT_SPIN", pkt_wait_spin); - GET_ENV_OPT_INT("EF_PIPE_RECV_SPIN", pipe_recv_spin); - GET_ENV_OPT_INT("EF_PIPE_SEND_SPIN", pipe_send_spin); - GET_ENV_OPT_INT("EF_PIPE_SIZE", pipe_size); - GET_ENV_OPT_INT("EF_SOCK_LOCK_BUZZ", sock_lock_buzz); + GET_ENV_OPT_INT("EF_TCP_CONNECT_SPIN", tcp_connect_spin); + GET_ENV_OPT_INT("EF_PKT_WAIT_SPIN", pkt_wait_spin); + GET_ENV_OPT_INT("EF_PIPE_RECV_SPIN", pipe_recv_spin); + GET_ENV_OPT_INT("EF_PIPE_SEND_SPIN", pipe_send_spin); + GET_ENV_OPT_INT("EF_PIPE_SIZE", pipe_size); + GET_ENV_OPT_INT("EF_SOCK_LOCK_BUZZ", sock_lock_buzz); GET_ENV_OPT_INT("EF_STACK_LOCK_BUZZ", stack_lock_buzz); GET_ENV_OPT_INT("EF_SO_BUSY_POLL_SPIN", so_busy_poll_spin); - GET_ENV_OPT_INT("EF_UL_EPOLL", ul_epoll); - GET_ENV_OPT_INT("EF_EPOLL_SPIN", ul_epoll_spin); - GET_ENV_OPT_INT("EF_EPOLL_CTL_FAST", ul_epoll_ctl_fast); - GET_ENV_OPT_INT("EF_EPOLL_CTL_HANDOFF",ul_epoll_ctl_handoff); - GET_ENV_OPT_INT("EF_EPOLL_MT_SAFE", ul_epoll_mt_safe); + GET_ENV_OPT_INT("EF_UL_EPOLL", ul_epoll); + GET_ENV_OPT_INT("EF_EPOLL_SPIN", ul_epoll_spin); + GET_ENV_OPT_INT("EF_EPOLL_CTL_FAST", ul_epoll_ctl_fast); + GET_ENV_OPT_INT("EF_EPOLL_CTL_HANDOFF", ul_epoll_ctl_handoff); + GET_ENV_OPT_INT("EF_EPOLL_MT_SAFE", ul_epoll_mt_safe); GET_ENV_OPT_INT("EF_WODA_SINGLE_INTERFACE", woda_single_if); - GET_ENV_OPT_INT("EF_FDTABLE_SIZE", fdtable_size); - GET_ENV_OPT_INT("EF_SPIN_USEC", ul_spin_usec); - GET_ENV_OPT_INT("EF_SLEEP_SPIN_USEC", sleep_spin_usec); - GET_ENV_OPT_INT("EF_STACK_PER_THREAD",stack_per_thread); - GET_ENV_OPT_INT("EF_DONT_ACCELERATE", dont_accelerate); - GET_ENV_OPT_INT("EF_FDTABLE_STRICT", fdtable_strict); - GET_ENV_OPT_INT("EF_FDS_MT_SAFE", fds_mt_safe); - GET_ENV_OPT_INT("EF_NO_FAIL", no_fail); - GET_ENV_OPT_INT("EF_SA_ONSTACK_INTERCEPT", sa_onstack_intercept); - GET_ENV_OPT_INT("EF_ACCEPT_INHERIT_NONBLOCK", accept_force_inherit_nonblock); - GET_ENV_OPT_INT("EF_VFORK_MODE", vfork_mode); - GET_ENV_OPT_INT("EF_PIPE", ul_pipe); - GET_ENV_OPT_INT("EF_SYNC_CPLANE_AT_CREATE", sync_cplane); + GET_ENV_OPT_INT("EF_FDTABLE_SIZE", fdtable_size); + GET_ENV_OPT_INT("EF_SPIN_USEC", ul_spin_usec); + GET_ENV_OPT_INT("EF_SLEEP_SPIN_USEC", sleep_spin_usec); + GET_ENV_OPT_INT("EF_STACK_PER_THREAD", stack_per_thread); + GET_ENV_OPT_INT("EF_DONT_ACCELERATE", dont_accelerate); + GET_ENV_OPT_INT("EF_FDTABLE_STRICT", fdtable_strict); + GET_ENV_OPT_INT("EF_FDS_MT_SAFE", fds_mt_safe); + GET_ENV_OPT_INT("EF_NO_FAIL", no_fail); + GET_ENV_OPT_INT("EF_SA_ONSTACK_INTERCEPT", sa_onstack_intercept); + GET_ENV_OPT_INT("EF_ACCEPT_INHERIT_NONBLOCK", accept_force_inherit_nonblock); + GET_ENV_OPT_INT("EF_VFORK_MODE", vfork_mode); + GET_ENV_OPT_INT("EF_PIPE", ul_pipe); + GET_ENV_OPT_INT("EF_SYNC_CPLANE_AT_CREATE", sync_cplane); if( (s = getenv("EF_FORK_NETIF")) && sscanf(s, "%x", &v) == 1 ) { opts->fork_netif = CI_MIN(v, CI_UNIX_FORK_NETIF_BOTH); @@ -458,7 +462,7 @@ static void citp_opts_getenv(citp_opts_t* opts) if( (s = getenv("EF_SIGNALS_NOPOSTPONE")) ) { opts->signals_no_postpone = 0; while( sscanf(s, "%u", &v) == 1 ) { - opts->signals_no_postpone |= (1ULL << (v-1)); + opts->signals_no_postpone |= (1ULL << (v - 1)); s = strchr(s, ','); if( s == NULL ) break; @@ -466,19 +470,19 @@ static void citp_opts_getenv(citp_opts_t* opts) } } /* SIGONLOAD is used internally, and should not be postponed. */ - opts->signals_no_postpone |= (1ULL << (SIGONLOAD-1)); + opts->signals_no_postpone |= (1ULL << (SIGONLOAD - 1)); if( (s = getenv("EF_CLUSTER_NAME")) ) { strncpy(opts->cluster_name, s, CI_CFG_CLUSTER_NAME_LEN); opts->cluster_name[CI_CFG_CLUSTER_NAME_LEN] = '\0'; - } - else { + } else { opts->cluster_name[0] = '\0'; } - GET_ENV_OPT_INT("EF_CLUSTER_SIZE", cluster_size); + GET_ENV_OPT_INT("EF_CLUSTER_SIZE", cluster_size); if( opts->cluster_size < 0 ) - log("ERROR: invalid cluster_size. cluster_size needs to be 0 or a positive number"); - GET_ENV_OPT_INT("EF_CLUSTER_RESTART", cluster_restart_opt); + log("ERROR: invalid cluster_size. cluster_size needs to be 0 or a " + "positive number"); + GET_ENV_OPT_INT("EF_CLUSTER_RESTART", cluster_restart_opt); GET_ENV_OPT_INT("EF_CLUSTER_HOT_RESTART", cluster_hot_restart_opt); get_env_opt_port_list(&opts->tcp_reuseports, "EF_TCP_FORCE_REUSEPORT"); get_env_opt_port_list(&opts->udp_reuseports, "EF_UDP_FORCE_REUSEPORT"); @@ -487,7 +491,7 @@ static void citp_opts_getenv(citp_opts_t* opts) get_env_opt_port_list(&opts->sock_cache_ports, "EF_SOCKET_CACHE_PORTS"); #endif - GET_ENV_OPT_INT("EF_ONLOAD_FD_BASE", fd_base); + GET_ENV_OPT_INT("EF_ONLOAD_FD_BASE", fd_base); } @@ -498,59 +502,47 @@ static void citp_opts_validate_env(void) #undef CI_CFG_OPTGROUP #undef CI_CFG_OPT -#define CI_CFG_OPT(env, name, type, doc, bits, group, default, minimum, maximum, pres) env, +#define CI_CFG_OPT( \ + env, name, type, doc, bits, group, default, minimum, maximum, pres) \ + env, char* ef_names[] = { #include #include #include - "EF_NAME", - "EF_USERBUILD", - "EF_NO_PRELOAD_RESTORE", - "EF_LD_PRELOAD", - "EF_CLUSTER_NAME", - "EF_LOG_THREAD", - "EF_LOG_FILE", - "EF_VI_TXQ_SIZE", - "EF_VI_RXQ_SIZE", - "EF_VI_EVQ_SIZE", - "EF_VI_CTPIO_WB_TICKS", - "EF_VI_CTPIO_MODE", - "EF_VI_CLUSTER_SOCKET", - "EF_VI_PD_FLAGS", - "EF_VI_LOG_LEVEL", - "EF_VI_EVQ_CLEAR_STRIDE", - "EF_BUILDTREE_UL", - NULL + "EF_NAME", "EF_USERBUILD", "EF_NO_PRELOAD_RESTORE", "EF_LD_PRELOAD", + "EF_CLUSTER_NAME", "EF_LOG_THREAD", "EF_LOG_FILE", "EF_VI_TXQ_SIZE", + "EF_VI_RXQ_SIZE", "EF_VI_EVQ_SIZE", "EF_VI_CTPIO_WB_TICKS", + "EF_VI_CTPIO_MODE", "EF_VI_CLUSTER_SOCKET", "EF_VI_PD_FLAGS", + "EF_VI_LOG_LEVEL", "EF_VI_EVQ_CLEAR_STRIDE", "EF_BUILDTREE_UL", NULL }; char** env_name; int i; int len; char* s; - + s = getenv("EF_VALIDATE_ENV"); if( s ) { char* s_end; long v; v = strtol(s, &s_end, 0); - + if( ! s_end ) ci_log("Invalid option for EF_VALIDATE_ENV: \"%s\"", s); else if( ! v ) return; } - + env_name = environ; while( *env_name != NULL ) { - if( ! strncmp(*env_name, "EF_", 3) ) { - len = strchrnul(*env_name, '=') - *env_name; - for( i = 0; ef_names[i]; ++i ) { + len = strchrnul(*env_name, '=') - *env_name; + for( i = 0; ef_names[i]; ++i ) { if( strlen(ef_names[i]) == len && ! strncmp(ef_names[i], *env_name, len) ) break; } - + if( ! ef_names[i] ) ci_log("Unknown option \"%s\" identified", *env_name); } @@ -559,16 +551,14 @@ static void citp_opts_validate_env(void) } -static int -citp_cfg_init(void) +static int citp_cfg_init(void) { ci_cfg_query(); return 0; } -static int -citp_transport_init(void) +static int citp_transport_init(void) { const char* s; @@ -591,14 +581,13 @@ citp_transport_init(void) citp_oo_get_cpu_khz(&citp.cpu_khz); citp.spin_cycles = citp_usec_to_cycles64(CITP_OPTS.ul_spin_usec); - citp.poll_nonblock_fast_cycles = - citp_usec_to_cycles64(CITP_OPTS.ul_poll_nonblock_fast_usec); - citp.poll_fast_cycles = - citp_usec_to_cycles64(CITP_OPTS.ul_poll_fast_usec); - citp.select_nonblock_fast_cycles = - citp_usec_to_cycles64(CITP_OPTS.ul_select_nonblock_fast_usec); - citp.select_fast_cycles = - citp_usec_to_cycles64(CITP_OPTS.ul_select_fast_usec); + citp.poll_nonblock_fast_cycles = + citp_usec_to_cycles64(CITP_OPTS.ul_poll_nonblock_fast_usec); + citp.poll_fast_cycles = citp_usec_to_cycles64(CITP_OPTS.ul_poll_fast_usec); + citp.select_nonblock_fast_cycles = + citp_usec_to_cycles64(CITP_OPTS.ul_select_nonblock_fast_usec); + citp.select_fast_cycles = + citp_usec_to_cycles64(CITP_OPTS.ul_select_fast_usec); ci_tp_init(__oo_per_thread_init_thread, oo_signal_terminate); citp_update_and_crosscheck(&ci_cfg_opts.netif_opts, &CITP_OPTS); @@ -619,8 +608,7 @@ static int citp_transport_register(void) int _citp_do_init_inprogress = 0; typedef int (*cipt_init_func_t)(void); -cipt_init_func_t cipt_init_funcs[] = -{ +cipt_init_func_t cipt_init_funcs[] = { #define STARTUP_ITEM(level, func) func, #include "startup_order.h" #undef STARTUP_ITEM @@ -633,21 +621,20 @@ int citp_do_init(int max_init_level) int saved_errno = errno; if( citp.init_level < max_init_level ) { - /* If threads are launched very early in program startup, then there could be - * a race here as multiple threads attempt to initialise on first access. - * The guard must be recursive, since this function might be re-entered during - * initialisation. + /* If threads are launched very early in program startup, then there could + * be a race here as multiple threads attempt to initialise on first + * access. The guard must be recursive, since this function might be + * re-entered during initialisation. */ static pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; pthread_mutex_lock(&mutex); _citp_do_init_inprogress++; - for (level = citp.init_level; - level < CI_MIN(max_init_level, CITP_INIT_MAX); - level++) { + for( level = citp.init_level; + level < CI_MIN(max_init_level, CITP_INIT_MAX); level++ ) { rc = cipt_init_funcs[level](); - if (rc < 0) + if( rc < 0 ) break; citp.init_level = level + 1; } @@ -663,14 +650,15 @@ int citp_do_init(int max_init_level) void _init(void) { - if (getpagesize() != CI_PAGE_SIZE) - ci_fail(("Page size mismatch, expected %u, " - "but the current value is %u", - CI_PAGE_SIZE, getpagesize())); + if( getpagesize() != CI_PAGE_SIZE ) + ci_fail( + ("Page size mismatch, expected %u, " + "but the current value is %u", + CI_PAGE_SIZE, getpagesize())); /* must not do any logging yet... */ if( citp_do_init(CITP_INIT_MAX) < 0 ) ci_fail(("EtherFabric transport library: failed to initialise (%d)", - citp.init_level)); + citp.init_level)); Log_S(log("citp: initialisation done.")); } @@ -683,7 +671,6 @@ void _fini(void) } - /* We can't use variables from onload_version_msg(), because strlen() * is not available when the library is run as an executable. * And even a simple function like `local_strlen()` results in calling @@ -694,15 +681,18 @@ void _fini(void) Ensure that no libc() functions are used */ void onload_version_msg(void) { - const char *msg = ONLOAD_PRODUCT" "ONLOAD_VERSION"\n"ONLOAD_COPYRIGHT"\n" - "Built: "__DATE__" "__TIME__" " + const char* msg = ONLOAD_PRODUCT + " " ONLOAD_VERSION "\n" ONLOAD_COPYRIGHT + "\n" + "Built: "__DATE__ + " "__TIME__ + " " #ifdef NDEBUG - "(release)" + "(release)" #else - "(debug)" + "(debug)" #endif - "\nBuild profile header: " - OO_STRINGIFY(TRANSPORT_CONFIG_OPT_HDR) "\n"; + "\nBuild profile header: " OO_STRINGIFY(TRANSPORT_CONFIG_OPT_HDR) "\n"; my_syscall3(write, STDOUT_FILENO, (long) msg, strlen(msg)); my_syscall3(exit, 0, 0, 0); diff --git a/src/lib/transport/unix/tcp_fd.c b/src/lib/transport/unix/tcp_fd.c index 9e5830d2f..88301341c 100644 --- a/src/lib/transport/unix/tcp_fd.c +++ b/src/lib/transport/unix/tcp_fd.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2003-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr/ctk ** \brief Sockets interface to user level TCP @@ -30,7 +30,7 @@ #include -#define LPF "citp_tcp_" +#define LPF "citp_tcp_" #if CI_CFG_FD_CACHING int ci_tcp_close(ci_netif* netif, ci_tcp_state* ts); @@ -41,35 +41,37 @@ int ci_tcp_close(ci_netif* netif, ci_tcp_state* ts); #if CI_CFG_FD_CACHING #define CI_CACHE_FIXUP_FLAGS (O_NONBLOCK | O_CLOEXEC) -static int -citp_tcp_cached_fixup_flags(ci_netif* ni, ci_tcp_state* ts, int fd, int flags) +static int citp_tcp_cached_fixup_flags( + ci_netif* ni, ci_tcp_state* ts, int fd, int flags) { int rc = 0; /* Socket caching is only supported on linux, where these are identical */ CI_BUILD_ASSERT(O_NONBLOCK == O_NDELAY); - if( !!(flags & O_NONBLOCK) != - !!(ts->s.b.sb_aflags & CI_SB_AFLAG_O_NONBLOCK) ) { + if( ! ! (flags & O_NONBLOCK) != + ! ! (ts->s.b.sb_aflags & CI_SB_AFLAG_O_NONBLOCK) ) { /* Flip the value of the onload flag */ ci_atomic32_merge(&ts->s.b.sb_aflags, ~ts->s.b.sb_aflags, - CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY); + CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY); /* And tell others that our flag is to be trusted, * but the OS flag is not. */ ci_atomic32_or(&ts->s.b.sb_aflags, CI_SB_AFLAG_O_NONBLOCK_UNSYNCED); } - if( !!(flags & O_CLOEXEC) != - !!(ts->s.b.sb_aflags & CI_SB_AFLAG_O_CLOEXEC) ) { + if( ! ! (flags & O_CLOEXEC) != + ! ! (ts->s.b.sb_aflags & CI_SB_AFLAG_O_CLOEXEC) ) { /* Set new value */ rc = ci_sys_fcntl(fd, F_SETFD, (flags & O_CLOEXEC) ? FD_CLOEXEC : 0); /* Flip the value of the onload flag */ if( rc == 0 ) - ci_atomic32_merge(&ts->s.b.sb_aflags, ~ts->s.b.sb_aflags, - CI_SB_AFLAG_O_CLOEXEC); + ci_atomic32_merge( + &ts->s.b.sb_aflags, ~ts->s.b.sb_aflags, CI_SB_AFLAG_O_CLOEXEC); else - NI_LOG(ni, RESOURCE_WARNINGS, "%s: Failed to modify O_CLOEXEC setting of" - " cached socket to new value", __FUNCTION__); + NI_LOG(ni, RESOURCE_WARNINGS, + "%s: Failed to modify O_CLOEXEC setting of" + " cached socket to new value", + __FUNCTION__); } return rc; @@ -89,8 +91,7 @@ citp_tcp_cached_fixup_flags(ci_netif* ni, ci_tcp_state* ts, int fd, int flags) * hold the stack lock. */ static ci_fd_t citp_tcp_ep_acquire_fd(ci_netif* netif, ci_tcp_state* ts, - ci_tcp_socket_listen* listener, - int domain, int type, int flags) + ci_tcp_socket_listen* listener, int domain, int type, int flags) { ci_fd_t fd = -1; #if CI_CFG_FD_CACHING @@ -102,10 +103,11 @@ static ci_fd_t citp_tcp_ep_acquire_fd(ci_netif* netif, ci_tcp_state* ts, /* As well as protecting the explicit fdtable operations that follow, the * fdtable lock prevents a probe of the new fd until we've finished setting * it up. */ - if( fdtable_strict() ) CITP_FDTABLE_LOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_LOCK(); #if CI_CFG_FD_CACHING - from_cache = ci_tcp_is_cached(ts); + from_cache = ci_tcp_is_cached(ts); pid_matches = ts->cached_on_pid == citp_getpid(); /* It is possible that someone is concurrently trying to dup2/3 onto the @@ -120,10 +122,9 @@ static ci_fd_t citp_tcp_ep_acquire_fd(ci_netif* netif, ci_tcp_state* ts, * consider only doing this check if fdtable_strict()... */ if( (from_cache != 0) && - !(ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE_NO_FD) && - pid_matches ) { - int prev = citp_fdtable_new_fd_set(ts->cached_on_fd, fdip_busy, - fdtable_strict()); + ! (ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE_NO_FD) && pid_matches ) { + int prev = + citp_fdtable_new_fd_set(ts->cached_on_fd, fdip_busy, fdtable_strict()); /* Now we're in one of two states: * - there was dup2/3 in progress onto our fd, but it's now completed @@ -137,55 +138,57 @@ static ci_fd_t citp_tcp_ep_acquire_fd(ci_netif* netif, ci_tcp_state* ts, has_fd = ! (ts->s.b.sb_aflags & CI_SB_AFLAG_IN_CACHE_NO_FD); if( ! from_cache ) - ci_assert_equal(S_TO_EPS(netif,ts)->fd, CI_FD_BAD); + ci_assert_equal(S_TO_EPS(netif, ts)->fd, CI_FD_BAD); if( (from_cache == 0) || (! has_fd || ! pid_matches) ) { if( ! pid_matches ) - Log_EP(ci_log("%s: calling sock attach ep %d:%d", __FUNCTION__, NI_ID(netif), S_SP(ts))); + Log_EP(ci_log("%s: calling sock attach ep %d:%d", __FUNCTION__, + NI_ID(netif), S_SP(ts))); #endif /* Need to create new fd */ ci_fd_t stack_fd = ci_netif_get_driver_handle(netif); - oo_sp sp = S_SP(ts); - fd = ( listener != NULL ) ? - ci_tcp_helper_tcp_accept_sock_attach(stack_fd, sp, flags) : - ci_tcp_helper_sock_attach(stack_fd, sp, domain, type); + oo_sp sp = S_SP(ts); + fd = (listener != NULL) + ? ci_tcp_helper_tcp_accept_sock_attach(stack_fd, sp, flags) + : ci_tcp_helper_sock_attach(stack_fd, sp, domain, type); if( fd < 0 ) { - if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); return fd; } citp_fdtable_new_fd_set(fd, fdip_busy, fdtable_strict()); #if CI_CFG_FD_CACHING - } - else { + } else { /* Got endpoint with fd belonging to current process */ ci_assert_equal(ts->cached_on_pid, citp_getpid()); fd = ts->cached_on_fd; // See bug 78546 - //ci_assert_equal(fd, S_TO_EPS(netif, ts)->fd); + // ci_assert_equal(fd, S_TO_EPS(netif, ts)->fd); - /* It's possible that the cached socket has different flags from those - * requested - if so we need to sort that out. - */ + /* It's possible that the cached socket has different flags from those + * requested - if so we need to sort that out. + */ citp_tcp_cached_fixup_flags(netif, ts, fd, flags); ts->s.domain = domain; - /* We're reusing a cached socket. We don't attach, but need to set the - * flags that would be set on attach. We also clear the cached_on_fd - * state. - * - * This state must be consistent before we add the entry to the fdtable. - */ + /* We're reusing a cached socket. We don't attach, but need to set the + * flags that would be set on attach. We also clear the cached_on_fd + * state. + * + * This state must be consistent before we add the entry to the fdtable. + */ ci_atomic32_and(&ts->s.b.sb_aflags, - ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ | - CI_SB_AFLAG_IN_CACHE | CI_SB_AFLAG_IN_PASSIVE_CACHE)); - ts->cached_on_fd = -1; + ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ | + CI_SB_AFLAG_IN_CACHE | CI_SB_AFLAG_IN_PASSIVE_CACHE)); + ts->cached_on_fd = -1; ts->cached_on_pid = -1; } #endif - if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); /* By this point, [ts] should have had its cache bit cleared. */ ci_assert(! ci_tcp_is_cached(ts)); @@ -198,7 +201,7 @@ static ci_fd_t citp_tcp_ep_acquire_fd(ci_netif* netif, ci_tcp_state* ts, ci_fd_t ci_tcp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type) { ci_tcp_state* ts = NULL; - ci_fd_t fd; + ci_fd_t fd; ci_assert(ep); ci_assert(netif); @@ -216,23 +219,24 @@ ci_fd_t ci_tcp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type) ci_netif_unlock(netif); if( ts == NULL ) { - LOG_E(ci_log("%s: [%d] out of socket buffers", __FUNCTION__,NI_ID(netif))); + LOG_E( + ci_log("%s: [%d] out of socket buffers", __FUNCTION__, NI_ID(netif))); return -EMFILE; } fd = citp_tcp_ep_acquire_fd(netif, ts, NULL, domain, type, - type + type #if CI_CFG_FD_CACHING - & CI_CACHE_FIXUP_FLAGS + & CI_CACHE_FIXUP_FLAGS #endif - ); + ); if( fd < 0 ) { if( fd == -EAFNOSUPPORT ) LOG_U(ci_log("%s: citp_tcp_ep_acquire_fd (domain=%d, type=%d) failed %d", - __FUNCTION__, domain, type, fd)); + __FUNCTION__, domain, type, fd)); else LOG_E(ci_log("%s: citp_tcp_ep_acquire_fd (domain=%d, type=%d) failed %d", - __FUNCTION__, domain, type, fd)); + __FUNCTION__, domain, type, fd)); return fd; } @@ -249,7 +253,7 @@ ci_fd_t ci_tcp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type) ts->s.laddr = addr_any; #endif ep->netif = netif; - ep->s = &ts->s; + ep->s = &ts->s; #ifndef NDEBUG /* We hold the only reference to [ep] and its fd is marked busy, so its @@ -265,13 +269,12 @@ ci_fd_t ci_tcp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type) #endif -static int -citp_tcp_socket(int domain, int type, int protocol) +static int citp_tcp_socket(int domain, int type, int protocol) { - citp_fdinfo* fdi; + citp_fdinfo* fdi; citp_sock_fdi* epi; - int fd, rc; - ci_netif* ni; + int fd, rc; + ci_netif* ni; Log_VSS(ci_log(LPF "socket(%d, %d, %d)", domain, type, protocol)); @@ -298,7 +301,7 @@ citp_tcp_socket(int domain, int type, int protocol) goto fail2; } - if((fd = ci_tcp_ep_ctor( &epi->sock, ni, domain, type)) < 0) { + if( (fd = ci_tcp_ep_ctor(&epi->sock, ni, domain, type)) < 0 ) { Log_U(ci_log(LPF "socket: tcp_ep_ctor failed")); errno = -fd; goto fail3; @@ -311,21 +314,22 @@ citp_tcp_socket(int domain, int type, int protocol) ci_atomic32_and(&epi->sock.s->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY); citp_fdtable_insert(fdi, fd, 0); - Log_VSS(ci_log(LPF "socket(%d, %d, %d) = "EF_FMT, domain, - type, protocol, NI_ID(ni), SC_FMT(epi->sock.s), fd)); + Log_VSS(ci_log(LPF "socket(%d, %d, %d) = " EF_FMT, domain, type, protocol, + NI_ID(ni), SC_FMT(epi->sock.s), fd)); return fd; - fail3: +fail3: if( CITP_OPTS.no_fail && errno != ELIBACC ) CITP_STATS_NETIF(++ni->state->stats.tcp_handover_socket); citp_netif_release_ref(ni, 0); - fail2: +fail2: CI_FREE_OBJ(epi); - fail1: +fail1: /* BUG1408: Fail gracefully. We let the OS have a go at this so long as it's * not been caused by a driver/library mis-match */ if( CITP_OPTS.no_fail && errno != ELIBACC ) { - Log_U(ci_log("%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, errno)); + Log_U( + ci_log("%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, errno)); return CI_SOCKET_HANDOVER; } return -1; @@ -334,8 +338,8 @@ citp_tcp_socket(int domain, int type, int protocol) citp_fdinfo* citp_tcp_dup(citp_fdinfo* orig_fdi) { - citp_socket* orig_sock = fdi_to_socket(orig_fdi); - citp_sock_fdi* sock_fdi = CI_ALLOC_OBJ(citp_sock_fdi); + citp_socket* orig_sock = fdi_to_socket(orig_fdi); + citp_sock_fdi* sock_fdi = CI_ALLOC_OBJ(citp_sock_fdi); if( sock_fdi ) { citp_fdinfo_init(&sock_fdi->fdinfo, orig_fdi->protocol); sock_fdi->sock = *orig_sock; @@ -347,9 +351,9 @@ citp_fdinfo* citp_tcp_dup(citp_fdinfo* orig_fdi) ci_inline ci_uint64 linger_hash(ci_sock_cmn* s) { - return (ci_uint64)(sock_lport_be16(s) << 16) | - (ci_uint64)sock_rport_be16(s) | - ((ci_uint64)sock_raddr_be32(s) << 32); + return (ci_uint64) (sock_lport_be16(s) << 16) | + (ci_uint64) sock_rport_be16(s) | + ((ci_uint64) sock_raddr_be32(s) << 32); } #if CI_CFG_FD_CACHING @@ -358,19 +362,20 @@ static void citp_tcp_close(citp_fdinfo* fdinfo) citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); if( epi->sock.s->b.state == CI_TCP_LISTEN ) { - ci_netif* ni = epi->sock.netif; + ci_netif* ni = epi->sock.netif; ci_tcp_socket_listen* tls = SOCK_TO_TCP_LISTEN(epi->sock.s); if( ! (tls->s.s_flags & CI_SOCK_FLAG_SCALPASSIVE) ) { struct oo_p_dllink_state fd_list = - oo_p_dllink_sb(ni, &tls->s.b, &tls->epcache.fd_states); + oo_p_dllink_sb(ni, &tls->s.b, &tls->epcache.fd_states); struct oo_p_dllink_state l; ci_netif_lock(ni); - oo_p_dllink_for_each(ni, l, fd_list) { + oo_p_dllink_for_each(ni, l, fd_list) + { ci_tcp_state* ts = CI_CONTAINER(ci_tcp_state, epcache_fd_link, l.l); if( ts->cached_on_pid == citp_getpid() && - S_TO_EPS(epi->sock.netif, ts)->fd != CI_FD_BAD) { + S_TO_EPS(epi->sock.netif, ts)->fd != CI_FD_BAD ) { /* Fixme: should we move all the content of * ci_tcp_listen_uncache_fds() here? */ S_TO_EPS(epi->sock.netif, ts)->fd = CI_FD_BAD; @@ -378,6 +383,15 @@ static void citp_tcp_close(citp_fdinfo* fdinfo) } ci_netif_unlock(ni); } + } else if( epi->sock.s->b.state != CI_TCP_CLOSED ) { + int rc; + ci_netif* ni = epi->sock.netif; + ci_netif_lock(ni); + rc = ci_tcp_close(ni, SOCK_TO_TCP(epi->sock.s)); + if( rc != 0 ) { + LOG_TC(ci_log("Failed to close TCP Socket")); + } + ci_netif_unlock(ni); } } #endif @@ -391,7 +405,7 @@ static void citp_tcp_dtor(citp_fdinfo* fdinfo, int fdt_locked) * UL netif structure knows about this. Normal close() is already * handled in the fd caching code, but there are a lot of ways to close * a socket in abnormal way: handover, onload_move_fd, dup2/dup3. - * + * * We can insert this line in tcp_handover() at all, but here is just * one place for all the cases mentioned above. */ @@ -407,8 +421,8 @@ static void tcp_handover(citp_sock_fdi* sock_fdi) /* The O_NONBLOCK flag is not propagated to the O/S socket, so we have to ** fix it up when we handover. */ - ci_sock_cmn* s = sock_fdi->sock.s; - int nonb_switch = -1; + ci_sock_cmn* s = sock_fdi->sock.s; + int nonb_switch = -1; ci_assert_flags(s->b.sb_aflags, CI_SB_AFLAG_OS_BACKED); @@ -416,27 +430,27 @@ static void tcp_handover(citp_sock_fdi* sock_fdi) /* O/S socket is already has O_NONBLOCK. Turn it off? */ if( ! (s->b.sb_aflags & CI_SB_AFLAG_O_NONBLOCK) ) nonb_switch = 0; - } - else if( s->b.sb_aflags & CI_SB_AFLAG_O_NONBLOCK ) + } else if( s->b.sb_aflags & CI_SB_AFLAG_O_NONBLOCK ) nonb_switch = 1; citp_fdinfo_handover(&sock_fdi->fdinfo, nonb_switch); } -static int citp_tcp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, - socklen_t sa_len) +static int citp_tcp_bind( + citp_fdinfo* fdinfo, const struct sockaddr* sa, socklen_t sa_len) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - citp_socket* ep = &epi->sock; - ci_sock_cmn* s = ep->s; - int rc; - -#if !CI_CFG_FAKE_IPV6 - Log_VSS(const struct sockaddr_in* sai = (const struct sockaddr_in*) sa; - ci_log(LPF "bind("EF_FMT", %s:%d, %d)", EF_PRI_ARGS(epi, fdinfo->fd), - (sai != NULL) ? ip_addr_str(sai->sin_addr.s_addr) : "(null)", - (sai != NULL) ? CI_BSWAP_BE16(sai->sin_port) : 0, sa_len)); + citp_socket* ep = &epi->sock; + ci_sock_cmn* s = ep->s; + int rc; + +#if ! CI_CFG_FAKE_IPV6 + Log_VSS( + const struct sockaddr_in* sai = (const struct sockaddr_in*) sa; + ci_log(LPF "bind(" EF_FMT ", %s:%d, %d)", EF_PRI_ARGS(epi, fdinfo->fd), + (sai != NULL) ? ip_addr_str(sai->sin_addr.s_addr) : "(null)", + (sai != NULL) ? CI_BSWAP_BE16(sai->sin_port) : 0, sa_len)); #endif ci_netif_lock_fdi(epi); @@ -454,7 +468,7 @@ static int citp_tcp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, if( fdinfo == NULL ) return ci_sys_bind(fd, sa, sa_len); else { - ci_assert_equal( fdinfo->protocol->type, CITP_PASSTHROUGH_FD); + ci_assert_equal(fdinfo->protocol->type, CITP_PASSTHROUGH_FD); return citp_passthrough_bind(fdinfo, sa, sa_len); } } @@ -470,9 +484,8 @@ static int citp_tcp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, /* The socket has moved so need to reprobe the fd. This will also * map the the new stack into user space of the executing process. */ - fdinfo = citp_reprobe_moved(fdinfo, - CI_FALSE/* ! from_fast_lookup */, - CI_FALSE/* ! fdip_is_busy */); + fdinfo = citp_reprobe_moved(fdinfo, CI_FALSE /* ! from_fast_lookup */, + CI_FALSE /* ! fdip_is_busy */); /* We want to prefault the packets for the new clustered stack. This * is only needed if we successfully reprobed a valid fd. This might @@ -485,15 +498,14 @@ static int citp_tcp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, */ if( fdinfo && fdinfo->protocol == &citp_tcp_protocol_impl ) { epi = fdi_to_sock_fdi(fdinfo); - ep = &epi->sock; + ep = &epi->sock; ci_netif_cluster_prefault(ep->netif); - } - else { + } else { CI_SET_ERROR(rc, EBADF); } } #else - (void)s; /* appease compiler when NDEBUG */ + (void) s; /* appease compiler when NDEBUG */ #endif if( fdinfo ) @@ -501,8 +513,7 @@ static int citp_tcp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, return rc; } -static bool -tcp_rc_means_handover(int rc) +static bool tcp_rc_means_handover(int rc) { /* ENOMEM or EBUSY means we are out of some sort of resource, so hand * this socket over to the OS. @@ -512,21 +523,22 @@ tcp_rc_means_handover(int rc) * Onload iptables so traffic should go via the OS. The case of Onload * iptables preventing filter insertion on only some interfaces is already * handled correctly in ci_tcp_listen(). */ - return rc == CI_SOCKET_HANDOVER - || ( (rc < 0) && CITP_OPTS.no_fail && - ( errno == ENOMEM || errno == EBUSY || errno == ENOBUFS || - errno == ERFKILL || errno == ENOENT ) ); + return rc == CI_SOCKET_HANDOVER || + ((rc < 0) && CITP_OPTS.no_fail && + (errno == ENOMEM || errno == EBUSY || errno == ENOBUFS || + errno == ERFKILL || errno == ENOENT)); } static int citp_tcp_listen(citp_fdinfo* fdinfo, int backlog) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - int rc; + int rc; - Log_VSS(ci_log(LPF "listen("EF_FMT", %d)", EF_PRI_ARGS(epi,fdinfo->fd), - backlog)); + Log_VSS(ci_log( + LPF "listen(" EF_FMT ", %d)", EF_PRI_ARGS(epi, fdinfo->fd), backlog)); - if( epi->sock.s->s_flags & (CI_SOCK_FLAGS_SCALABLE & ~CI_SOCK_FLAG_SCALPASSIVE) ) { + if( epi->sock.s->s_flags & + (CI_SOCK_FLAGS_SCALABLE & ~CI_SOCK_FLAG_SCALPASSIVE) ) { /* We do not support IP_TRANSPARENT on listening sockets. If this has * already been bound then we're past the point where we should have * created the OS socket, otherwise we can just handover. @@ -535,16 +547,15 @@ static int citp_tcp_listen(citp_fdinfo* fdinfo, int backlog) * then we hand over sockets as soon as IP_TRANSPARENT is applied. */ if( epi->sock.s->s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ) { - NI_LOG(epi->sock.netif, USAGE_WARNINGS, "Listening sockets using " - "socket option IP_TRANSPARENT cannot be accelerated"); + NI_LOG(epi->sock.netif, USAGE_WARNINGS, + "Listening sockets using " + "socket option IP_TRANSPARENT cannot be accelerated"); rc = CI_SOCKET_HANDOVER; - } - else { + } else { errno = EINVAL; - rc = -1; + rc = -1; } - } - else { + } else { rc = ci_tcp_listen(&(epi->sock), fdinfo->fd, backlog); } @@ -560,21 +571,20 @@ static int citp_tcp_listen(citp_fdinfo* fdinfo, int backlog) return rc; } - citp_fdinfo_release_ref( fdinfo, 0 ); + citp_fdinfo_release_ref(fdinfo, 0); return rc; } -static int citp_tcp_accept_os(citp_sock_fdi* epi, int fd, - struct sockaddr* sa, socklen_t* p_sa_len, - int flags) +static int citp_tcp_accept_os(citp_sock_fdi* epi, int fd, struct sockaddr* sa, + socklen_t* p_sa_len, int flags) { int rc; - rc = oo_os_sock_accept(epi->sock.netif, SC_SP(epi->sock.s), - sa, p_sa_len, flags); - Log_VSS(ci_log(LPF "accept("EF_FMT", sa, %d) = SYSTEM FD %d", - EF_PRI_ARGS(epi,fd), p_sa_len ? *p_sa_len:-1, rc)); + rc = oo_os_sock_accept( + epi->sock.netif, SC_SP(epi->sock.s), sa, p_sa_len, flags); + Log_VSS(ci_log(LPF "accept(" EF_FMT ", sa, %d) = SYSTEM FD %d", + EF_PRI_ARGS(epi, fd), p_sa_len ? *p_sa_len : -1, rc)); if( rc >= 0 ) citp_fdtable_passthru(rc, 0); else @@ -583,10 +593,9 @@ static int citp_tcp_accept_os(citp_sock_fdi* epi, int fd, } -static int citp_tcp_accept_complete(ci_netif* ni, - struct sockaddr* sa, socklen_t* p_sa_len, - ci_tcp_socket_listen* listener, - ci_tcp_state* ts, int newfd) +static int citp_tcp_accept_complete(ci_netif* ni, struct sockaddr* sa, + socklen_t* p_sa_len, ci_tcp_socket_listen* listener, ci_tcp_state* ts, + int newfd) { CITP_STATS_NETIF(++ni->state->stats.ul_accepts); @@ -594,13 +603,12 @@ static int citp_tcp_accept_complete(ci_netif* ni, ci_tcp_get_peer_addr(ts, sa, p_sa_len); Log_VSS(ci_log(LPF "%d ACCEPTING %d " IPX_FMT - ":%u rcv=%08x-%08x snd=%08x-%08x-%08x " - "enq=%08x", S_FMT(listener), S_FMT(ts), - IPX_ARG(AF_IP(tcp_ipx_raddr(ts))), - (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_dest_be16), - tcp_rcv_nxt(ts), tcp_rcv_wnd_right_edge_sent(ts), - tcp_snd_una(ts), tcp_snd_nxt(ts), ts->snd_max, - tcp_enq_nxt(ts))); + ":%u rcv=%08x-%08x snd=%08x-%08x-%08x " + "enq=%08x", + S_FMT(listener), S_FMT(ts), IPX_ARG(AF_IP(tcp_ipx_raddr(ts))), + (unsigned) CI_BSWAP_BE16(TS_IPX_TCP(ts)->tcp_dest_be16), tcp_rcv_nxt(ts), + tcp_rcv_wnd_right_edge_sent(ts), tcp_snd_una(ts), tcp_snd_nxt(ts), + ts->snd_max, tcp_enq_nxt(ts))); /* Considered safe to take inode/uid from listening socket. As this * socket is necessarily in the same stack as the listener we know that @@ -618,19 +626,19 @@ static int citp_tcp_accept_complete(ci_netif* ni, #define CI_ACCEPT_FAKED_UP -3 static int citp_tcp_accept_alien(ci_netif* ni, ci_tcp_socket_listen* listener, - struct sockaddr* sa, socklen_t* p_sa_len, - int flags, citp_waitable* w) + struct sockaddr* sa, socklen_t* p_sa_len, int flags, citp_waitable* w) { - ci_netif *ani; - oo_sp sp = w->moved_to_sock_id; - ci_uint32 stack_id = w->moved_to_stack_id; + ci_netif* ani; + oo_sp sp = w->moved_to_sock_id; + ci_uint32 stack_id = w->moved_to_stack_id; citp_sock_fdi* newepi; - citp_fdinfo* newfdi; - citp_waitable *neww; - ci_tcp_state* ts; - int newfd, rc; + citp_fdinfo* newfdi; + citp_waitable* neww; + ci_tcp_state* ts; + int newfd, rc; - if( fdtable_strict() ) CITP_FDTABLE_LOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_LOCK(); rc = citp_netif_by_id(stack_id, &ani, fdtable_strict()); if( rc != 0 ) { @@ -638,38 +646,41 @@ static int citp_tcp_accept_alien(ci_netif* ni, ci_tcp_socket_listen* listener, * and return it to the user. We've lost all the data we might have * received via this connection. */ ci_tcp_state* ts; - static int printed = 0; + static int printed = 0; - if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); if( ! printed ) { - ci_log("EF_TCP_SERVER_LOOPBACK=2 limitation: the real client has gone, " - "faking it up. All data sent via this loopback connection " - "is lost."); + ci_log( + "EF_TCP_SERVER_LOOPBACK=2 limitation: the real client has gone, " + "faking it up. All data sent via this loopback connection " + "is lost."); printed = 1; } CITP_STATS_TCP_LISTEN(++listener->stats.n_accept_loop2_closed); ci_assert_equal(w->sb_aflags, - CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_NOT_READY | - CI_SB_AFLAG_MOVED_AWAY); + CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_NOT_READY | CI_SB_AFLAG_MOVED_AWAY); w->sb_aflags = CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ | CI_SB_AFLAG_NOT_READY; - ci_tcp_state_init(ni, &CI_CONTAINER(citp_waitable_obj, waitable, w)->tcp, 0); - ts = &CI_CONTAINER(citp_waitable_obj, waitable, w)->tcp; + ci_tcp_state_init( + ni, &CI_CONTAINER(citp_waitable_obj, waitable, w)->tcp, 0); + ts = &CI_CONTAINER(citp_waitable_obj, waitable, w)->tcp; - ts->tcpflags = CI_TCPT_FLAG_LOOP_FAKE | CI_TCPT_FLAG_PASSIVE_OPENED; - ts->s.domain = AF_INET; - tcp_laddr_be32(ts) = sock_laddr_be32(&listener->s) == INADDR_ANY ? - INADDR_LOOPBACK : sock_laddr_be32(&listener->s); + ts->tcpflags = CI_TCPT_FLAG_LOOP_FAKE | CI_TCPT_FLAG_PASSIVE_OPENED; + ts->s.domain = AF_INET; + tcp_laddr_be32(ts) = sock_laddr_be32(&listener->s) == INADDR_ANY + ? INADDR_LOOPBACK + : sock_laddr_be32(&listener->s); tcp_lport_be16(ts) = sock_lport_be16(&listener->s); tcp_raddr_be32(ts) = INADDR_LOOPBACK; tcp_rport_be16(ts) = 0; /* We do not have any hint about this port! */ - ts->s.tx_errno = EPIPE; - ts->s.rx_errno = CI_SHUT_RD; - ts->s.so_error = ECONNRESET; + ts->s.tx_errno = EPIPE; + ts->s.rx_errno = CI_SHUT_RD; + ts->s.so_error = ECONNRESET; return CI_ACCEPT_FAKED_UP; } @@ -681,23 +692,25 @@ static int citp_tcp_accept_alien(ci_netif* ni, ci_tcp_socket_listen* listener, ci_netif_unlock(ni); - newfd = ci_tcp_helper_tcp_accept_sock_attach(ci_netif_get_driver_handle(ani), - sp, flags); + newfd = ci_tcp_helper_tcp_accept_sock_attach( + ci_netif_get_driver_handle(ani), sp, flags); if( newfd < 0 ) { citp_netif_release_ref(ani, fdtable_strict()); - if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); return -1; } citp_fdtable_new_fd_set(newfd, fdip_busy, fdtable_strict()); - if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); Log_EP(ci_log("%s: %d:%d accepted fd=%d %d:%d", __FUNCTION__, - ni->state->stack_id, S_ID(listener), newfd, - ani->state->stack_id, OO_SP_TO_INT(sp))); + ni->state->stack_id, S_ID(listener), newfd, ani->state->stack_id, + OO_SP_TO_INT(sp))); /* Check that this ts looks in the way we expect; * there is no guarantee that it is the same stack it used to be. */ neww = SP_TO_WAITABLE(ani, sp); - if( !(neww->state & CI_TCP_STATE_TCP) || neww->state == CI_TCP_LISTEN ) { + if( ! (neww->state & CI_TCP_STATE_TCP) || neww->state == CI_TCP_LISTEN ) { errno = EINVAL; goto fail; } @@ -705,13 +718,13 @@ static int citp_tcp_accept_alien(ci_netif* ni, ci_tcp_socket_listen* listener, ts = SP_TO_TCP(ani, sp); if( sock_lport_be16(&ts->s) != sock_lport_be16(&listener->s) || (sock_laddr_be32(&listener->s) != INADDR_ANY && - (sock_laddr_be32(&listener->s) != sock_laddr_be32(&ts->s)) )) { + (sock_laddr_be32(&listener->s) != sock_laddr_be32(&ts->s))) ) { errno = EINVAL; goto fail; } - ci_assert(!(ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); - ci_assert(!(ts->s.b.sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ)); + ci_assert(! (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); + ci_assert(! (ts->s.b.sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ)); newepi = CI_ALLOC_OBJ(citp_sock_fdi); if( newepi == 0 ) { @@ -723,7 +736,7 @@ static int citp_tcp_accept_alien(ci_netif* ni, ci_tcp_socket_listen* listener, #if CI_CFG_FD_CACHING newfdi->can_cache = 0; #endif - newepi->sock.s = &ts->s; + newepi->sock.s = &ts->s; newepi->sock.netif = ani; /* get new file descriptor into table */ @@ -733,8 +746,10 @@ static int citp_tcp_accept_alien(ci_netif* ni, ci_tcp_socket_listen* listener, return citp_tcp_accept_complete(ni, sa, p_sa_len, listener, ts, newfd); fail: - Log_E (ci_log(LPF "failed to get accepted socket from alien stack [%d]:" - " errno=%d", NI_ID(ani), errno)); + Log_E( + ci_log(LPF "failed to get accepted socket from alien stack [%d]:" + " errno=%d", + NI_ID(ani), errno)); ef_onload_driver_close(newfd); citp_netif_release_ref(ani, 0); return -1; @@ -743,22 +758,21 @@ static int citp_tcp_accept_alien(ci_netif* ni, ci_tcp_socket_listen* listener, static int citp_tcp_accept_ul(citp_fdinfo* fdinfo, ci_netif* ni, - ci_tcp_socket_listen* listener, - struct sockaddr* sa, socklen_t* p_sa_len, - int flags) + ci_tcp_socket_listen* listener, struct sockaddr* sa, socklen_t* p_sa_len, + int flags) { citp_sock_fdi* newepi; - citp_fdinfo* newfdi; - ci_tcp_state* ts; + citp_fdinfo* newfdi; + ci_tcp_state* ts; citp_waitable* w; - int newfd; + int newfd; #if CI_CFG_FD_CACHING int from_cache; #endif int unlocked = 0; - Log_VSS(ci_log(LPF "accept(%d:%d, sa, %d)", fdinfo->fd, - S_FMT(listener), p_sa_len ? *p_sa_len : -1)); + Log_VSS(ci_log(LPF "accept(%d:%d, sa, %d)", fdinfo->fd, S_FMT(listener), + p_sa_len ? *p_sa_len : -1)); #if CI_CFG_FD_CACHING redo: #endif @@ -783,11 +797,11 @@ static int citp_tcp_accept_ul(citp_fdinfo* fdinfo, ci_netif* ni, ts = &CI_CONTAINER(citp_waitable_obj, waitable, w)->tcp; #if CI_CFG_FD_CACHING if( S_TO_EPS(ni, ts)->fd != CI_FD_BAD ) { - /* we have fd to ep already this also means we are not at risk of concurrent - * sys_close() - * and are able to fixup state to reflect it coming from our cache */ + /* we have fd to ep already this also means we are not at risk of + * concurrent sys_close() and are able to fixup state to reflect it coming + * from our cache */ ci_assert_nflags(ts->s.b.sb_aflags, CI_SB_AFLAG_IN_CACHE_NO_FD); - ts->cached_on_fd = S_TO_EPS(ni,ts)->fd; + ts->cached_on_fd = S_TO_EPS(ni, ts)->fd; ts->cached_on_pid = citp_getpid(); } from_cache = ci_tcp_is_cached(ts); @@ -796,24 +810,24 @@ static int citp_tcp_accept_ul(citp_fdinfo* fdinfo, ci_netif* ni, * But faked-up loopback connection can't be cached, so we are safe * here. */ ci_assert(! unlocked); - oo_p_dllink_del_init(ni, oo_p_dllink_sb(ni, &ts->s.b, - &ts->epcache_fd_link)); + oo_p_dllink_del_init( + ni, oo_p_dllink_sb(ni, &ts->s.b, &ts->epcache_fd_link)); } #endif if( ! unlocked ) ci_sock_unlock(ni, &listener->s.b); - newfd = citp_tcp_ep_acquire_fd(ni, ts, listener, ts->s.domain, SOCK_STREAM, - flags); + newfd = citp_tcp_ep_acquire_fd( + ni, ts, listener, ts->s.domain, SOCK_STREAM, flags); if( newfd < 0 ) { - Log_E(ci_log(LPF "%s: citp_tcp_ep_acquire_fd failed: %d", - __FUNCTION__, newfd)); + Log_E(ci_log( + LPF "%s: citp_tcp_ep_acquire_fd failed: %d", __FUNCTION__, newfd)); ci_sock_lock(ni, &listener->s.b); ci_assert(ts->s.b.sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ); #if CI_CFG_FD_CACHING if( newfd == -ENOANO ) { Log_EP(ci_log("%s: [%d:%d]. puttint accepted socket back on acceptq", - __FUNCTION__, NI_ID(ni), S_SP(ts))); + __FUNCTION__, NI_ID(ni), S_SP(ts))); ci_tcp_acceptq_put_back_tail(ni, listener, &ts->s.b); CITP_STATS_NETIF_INC(ni, accept_attach_fd_retry); sched_yield(); @@ -831,16 +845,16 @@ static int citp_tcp_accept_ul(citp_fdinfo* fdinfo, ci_netif* ni, /* Whether [ts] came from the cache or not, we need to create the u/l state * for the fd (i.e. fdinfo). */ - ci_assert(!(ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); - ci_assert(!(ts->s.b.sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ)); + ci_assert(! (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); + ci_assert(! (ts->s.b.sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ)); newepi = CI_ALLOC_OBJ(citp_sock_fdi); if( newepi == 0 ) { - Log_E (ci_log(LPF "accept: newepi malloc failed")); + Log_E(ci_log(LPF "accept: newepi malloc failed")); citp_fdtable_busy_clear(newfd, fdip_unknown, 0); /* FIXME close the EP in case of shared EP cache */ ci_tcp_helper_close_no_trampoline(newfd); - S_TO_EPS(ni,ts)->fd = CI_FD_BAD; + S_TO_EPS(ni, ts)->fd = CI_FD_BAD; return -1; } newfdi = &newepi->fdinfo; @@ -848,7 +862,7 @@ static int citp_tcp_accept_ul(citp_fdinfo* fdinfo, ci_netif* ni, #if CI_CFG_FD_CACHING newfdi->can_cache = 1; #endif - newepi->sock.s = &ts->s; + newepi->sock.s = &ts->s; newepi->sock.netif = ni; citp_netif_add_ref(ni); @@ -856,11 +870,11 @@ static int citp_tcp_accept_ul(citp_fdinfo* fdinfo, ci_netif* ni, if( from_cache ) { ci_atomic32_inc(&ni->state->passive_cache_avail_stack); ci_atomic32_inc(&listener->cache_avail_sock); - ci_assert_le(ni->state->passive_cache_avail_stack, - ni->state->opts.sock_cache_max); + ci_assert_le( + ni->state->passive_cache_avail_stack, ni->state->opts.sock_cache_max); if( ~NI_OPTS(ni).scalable_filter_mode & CITP_SCALABLE_MODE_PASSIVE ) - ci_assert_le(listener->cache_avail_sock, - ni->state->opts.per_sock_cache_max); + ci_assert_le( + listener->cache_avail_sock, ni->state->opts.per_sock_cache_max); } #endif @@ -873,32 +887,29 @@ static int citp_tcp_accept_ul(citp_fdinfo* fdinfo, ci_netif* ni, } -static int citp_tcp_accept(citp_fdinfo* fdinfo, - struct sockaddr* sa, socklen_t* p_sa_len, - int flags, - citp_lib_context_t* lib_context) +static int citp_tcp_accept(citp_fdinfo* fdinfo, struct sockaddr* sa, + socklen_t* p_sa_len, int flags, citp_lib_context_t* lib_context) { ci_tcp_socket_listen* listener; - citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - ci_netif* ni; - int have_polled = 0; - ci_uint64 start_frc = 0 /* for effing stoopid compilers */; - int rc = 0; - ci_uint64 max_spin; - int spin_limit_by_so = 0; - int timeout; - unsigned tcp_accept_spin = oo_per_thread_get()->spinstate & - (1 << ONLOAD_SPIN_TCP_ACCEPT); - - ni = epi->sock.netif; + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); + ci_netif* ni; + ci_uint64 start_frc = 0 /* for effing stoopid compilers */; + int rc = 0; + ci_uint64 max_spin; + int spin_limit_by_so = 0; + int timeout; + unsigned tcp_accept_spin = + oo_per_thread_get()->spinstate & (1 << ONLOAD_SPIN_TCP_ACCEPT); + + ni = epi->sock.netif; /* Prepare to spin if necessary */ max_spin = epi->sock.s->b.spin_cycles; if( epi->sock.s->so.rcvtimeo_msec && tcp_accept_spin ) { - ci_uint64 max_so_spin = (ci_uint64)epi->sock.s->so.rcvtimeo_msec * - IPTIMER_STATE(ni)->khz; + ci_uint64 max_so_spin = + (ci_uint64) epi->sock.s->so.rcvtimeo_msec * IPTIMER_STATE(ni)->khz; if( max_so_spin <= max_spin ) { - max_spin = max_so_spin; + max_spin = max_so_spin; spin_limit_by_so = 1; } } @@ -918,30 +929,28 @@ static int citp_tcp_accept(citp_fdinfo* fdinfo, } if( ci_tcp_acceptq_n(listener) ) { - ci_sock_lock(ni, &listener->s.b); - if( ci_tcp_acceptq_not_empty(listener) ) { - /* delayed error report (after a connect came) */ - if( CI_UNLIKELY(p_sa_len == NULL && sa != NULL) ) { - ci_sock_unlock(ni, &listener->s.b); - CI_SET_ERROR(rc, EFAULT); - return rc; - } - return citp_tcp_accept_ul(fdinfo, ni, listener, sa, p_sa_len, flags); + ci_sock_lock(ni, &listener->s.b); + if( ci_tcp_acceptq_not_empty(listener) ) { + /* delayed error report (after a connect came) */ + if( CI_UNLIKELY(p_sa_len == NULL && sa != NULL) ) { + ci_sock_unlock(ni, &listener->s.b); + CI_SET_ERROR(rc, EFAULT); + return rc; } - ci_sock_unlock(ni, &listener->s.b); + return citp_tcp_accept_ul(fdinfo, ni, listener, sa, p_sa_len, flags); + } + ci_sock_unlock(ni, &listener->s.b); } /* User-level accept queue is empty. Are we up-to-date? */ - if( ! have_polled ) { - have_polled = 1; - ci_frc64(&start_frc); - if( ci_netif_may_poll(ni) && ci_netif_need_poll_frc(ni, start_frc) && - ci_netif_trylock(ni) ) { - int any_evs = ci_netif_poll(ni); - ci_netif_unlock(ni); - if( any_evs ) goto check_ul_accept_q; - } + ci_frc64(&start_frc); + if( ci_netif_may_poll(ni) && ci_netif_need_poll_frc(ni, start_frc) && + ci_netif_trylock(ni) ) { + int any_evs = ci_netif_poll(ni); + ci_netif_unlock(ni); + if( any_evs ) + goto check_ul_accept_q; } /* What about the O/S socket? */ @@ -949,19 +958,20 @@ static int citp_tcp_accept(citp_fdinfo* fdinfo, if( listener->s.os_sock_status & OO_OS_STATUS_RX ) { rc = citp_tcp_accept_os(epi, fdinfo->fd, sa, p_sa_len, flags); if( rc >= 0 ) { - CITP_STATS_TCP_LISTEN(++listener->stats.n_accept_os); - ++ni->state->stats.tcp_accept_os; - goto unlock_out; + CITP_STATS_TCP_LISTEN(++listener->stats.n_accept_os); + ++ni->state->stats.tcp_accept_os; + goto unlock_out; } - if( errno != EAGAIN ) goto unlock_out; + if( errno != EAGAIN ) + goto unlock_out; } } - if( listener->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | - CI_SB_AFLAG_O_NDELAY) ) { + if( listener->s.b.sb_aflags & + (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) { CITP_STATS_NETIF(++ni->state->stats.accept_eagain); errno = EAGAIN; - rc = -1; + rc = -1; goto unlock_out; } @@ -976,38 +986,38 @@ static int citp_tcp_accept(citp_fdinfo* fdinfo, ni->state->stats.spin_tcp_accept++; #endif if( ci_netif_may_poll(ni) && ci_netif_need_poll_frc(ni, now_frc) ) { - if( ci_netif_trylock(ni) ) { - ci_netif_poll(ni); + if( ci_netif_trylock(ni) ) { + ci_netif_poll(ni); ci_netif_unlock(ni); - } - } - else if( ! ni->state->is_spinner ) + } + } else if( ! ni->state->is_spinner ) ni->state->is_spinner = 1; - if(CI_UNLIKELY( lib_context->thread->sig.c.aflags & - OO_SIGNAL_FLAG_HAVE_PENDING )) { + if( CI_UNLIKELY(lib_context->thread->sig.c.aflags & + OO_SIGNAL_FLAG_HAVE_PENDING) ) { if( listener->s.so.rcvtimeo_msec ) { ni->state->is_spinner = 0; - errno = EINTR; + errno = EINTR; return -1; } /* run any pending signals: */ { int inside_lib = - oo_exit_lib_temporary_begin(&lib_context->thread->sig); + oo_exit_lib_temporary_begin(&lib_context->thread->sig); oo_exit_lib_temporary_end(&lib_context->thread->sig, inside_lib); } - if( ~lib_context->thread->sig.c.aflags & OO_SIGNAL_FLAG_NEED_RESTART ) { + if( ~lib_context->thread->sig.c.aflags & + OO_SIGNAL_FLAG_NEED_RESTART ) { ni->state->is_spinner = 0; - errno = EINTR; + errno = EINTR; return -1; } if( oo_atomic_read(&fdinfo->ref_count) == 1 ) { ni->state->is_spinner = 0; - errno = EBADF; + errno = EBADF; return -1; } } @@ -1024,7 +1034,7 @@ static int citp_tcp_accept(citp_fdinfo* fdinfo, { struct pollfd pfd; - pfd.fd = fdinfo->fd; + pfd.fd = fdinfo->fd; pfd.events = POLLIN; if( timeout == 0 ) @@ -1037,7 +1047,7 @@ static int citp_tcp_accept(citp_fdinfo* fdinfo, * * See also ci_udp_recvmsg(). */ - restart_select: + restart_select: citp_exit_lib(lib_context, FALSE); rc = ci_sys_poll(&pfd, 1, timeout); citp_reenter_lib(lib_context); @@ -1046,11 +1056,11 @@ static int citp_tcp_accept(citp_fdinfo* fdinfo, goto check_ul_accept_q; else if( rc == 0 ) { errno = EAGAIN; - rc = -1; - } - else if( errno == EINTR && - (lib_context->thread->sig.c.aflags & OO_SIGNAL_FLAG_NEED_RESTART) && - timeout == -1 ) { + rc = -1; + } else if( errno == EINTR && + (lib_context->thread->sig.c.aflags & + OO_SIGNAL_FLAG_NEED_RESTART) && + timeout == -1 ) { /* Before restarting because of SA_RESTART, let's check the fd was * not closed. One refcount is ours - so we exit if it is the last * one. */ @@ -1062,24 +1072,23 @@ static int citp_tcp_accept(citp_fdinfo* fdinfo, } } - unlock_out: +unlock_out: ni->state->is_spinner = 0; return rc; } -static int citp_tcp_connect(citp_fdinfo* fdinfo, - const struct sockaddr* sa, socklen_t sa_len, - citp_lib_context_t* lib_context) +static int citp_tcp_connect(citp_fdinfo* fdinfo, const struct sockaddr* sa, + socklen_t sa_len, citp_lib_context_t* lib_context) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - ci_sock_cmn* s = epi->sock.s; - int rc; - int moved = 0; + ci_sock_cmn* s = epi->sock.s; + int rc; + int moved = 0; -#if !CI_CFG_FAKE_IPV6 +#if ! CI_CFG_FAKE_IPV6 Log_VSS(const struct sockaddr_in* sai = (const struct sockaddr_in*) sa; - ci_log(LPF "connect("EF_FMT", %s:%d, %d)", - EF_PRI_ARGS(epi,fdinfo->fd), + ci_log(LPF "connect(" EF_FMT ", %s:%d, %d)", + EF_PRI_ARGS(epi, fdinfo->fd), (sai != NULL) ? ip_addr_str(sai->sin_addr.s_addr) : "(null)", (sai != NULL) ? CI_BSWAP_BE16(sai->sin_port) : 0, sa_len)); #endif @@ -1095,18 +1104,18 @@ static int citp_tcp_connect(citp_fdinfo* fdinfo, */ if( (s->s_flags & CI_SOCK_FLAG_TPROXY) && (s->s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND) ) { - NI_LOG(epi->sock.netif, USAGE_WARNINGS, "Sockets using socket option " - "IP_TRANSPARENT must explicitly bind to a port to be accelerated"); + NI_LOG(epi->sock.netif, USAGE_WARNINGS, + "Sockets using socket option " + "IP_TRANSPARENT must explicitly bind to a port to be accelerated"); rc = CI_SOCKET_HANDOVER; - } - else { - rc = ci_tcp_connect( &epi->sock, sa, sa_len, fdinfo->fd, &moved); + } else { + rc = ci_tcp_connect(&epi->sock, sa, sa_len, fdinfo->fd, &moved); } if( moved ) { citp_fdinfo* new_fdinfo; - int reprobe_rc = citp_reprobe_moved_common(fdinfo, CI_FALSE, CI_FALSE, - &new_fdinfo); + int reprobe_rc = + citp_reprobe_moved_common(fdinfo, CI_FALSE, CI_FALSE, &new_fdinfo); fdinfo = new_fdinfo; if( fdinfo == NULL ) { /* Most probably, it is EMFILE, but we can't know for sure. @@ -1118,7 +1127,7 @@ static int citp_tcp_connect(citp_fdinfo* fdinfo, /* Possibly we also should handover. To do it properly, we need * current epi value. */ epi = fdi_to_sock_fdi(fdinfo); - s = epi->sock.s; + s = epi->sock.s; } if( tcp_rc_means_handover(rc) ) { @@ -1127,20 +1136,19 @@ static int citp_tcp_connect(citp_fdinfo* fdinfo, * been bound (ie they wouldn't have an os socket even if they weren't * tproxy. */ - if( !(s->s_flags & CI_SOCK_FLAG_TPROXY) || - (s->s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND) ) { + if( ! (s->s_flags & CI_SOCK_FLAG_TPROXY) || + (s->s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND) ) { int fd = fdinfo->fd; - rc = 0; + rc = 0; ci_netif_lock_fdi(epi); if( ~epi->sock.s->b.sb_aflags & CI_SB_AFLAG_OS_BACKED ) { - rc = ci_tcp_helper_os_sock_create_and_set(epi->sock.netif, fdinfo->fd, - epi->sock.s, - -1, 0, NULL, 0); + rc = ci_tcp_helper_os_sock_create_and_set( + epi->sock.netif, fdinfo->fd, epi->sock.s, -1, 0, NULL, 0); } ci_netif_unlock_fdi(epi); if( rc < 0 ) { /* Too bad, but we can't do anything. Return to the user. */ - citp_fdinfo_release_ref( fdinfo, 0 ); + citp_fdinfo_release_ref(fdinfo, 0); RET_WITH_ERRNO(-rc); } @@ -1154,38 +1162,36 @@ static int citp_tcp_connect(citp_fdinfo* fdinfo, rc = ci_sys_connect(fd, sa, sa_len); citp_reenter_lib(lib_context); return rc; - } - else { - ci_assert_equal( fdinfo->protocol->type, CITP_PASSTHROUGH_FD); + } else { + ci_assert_equal(fdinfo->protocol->type, CITP_PASSTHROUGH_FD); return citp_passthrough_connect(fdinfo, sa, sa_len, lib_context); } - } - else { - NI_LOG(epi->sock.netif, USAGE_WARNINGS, "Sockets using socket option " - "IP_TRANSPARENT cannot be handed over after bind"); + } else { + NI_LOG(epi->sock.netif, USAGE_WARNINGS, + "Sockets using socket option " + "IP_TRANSPARENT cannot be handed over after bind"); errno = EINVAL; - rc = -1; + rc = -1; } } - citp_fdinfo_release_ref( fdinfo, 0 ); + citp_fdinfo_release_ref(fdinfo, 0); return rc; } #if CI_CFG_FD_CACHING -static void citp_tcp_close_cached(citp_fdinfo* fdinfo, - struct oo_p_dllink_state cache_pending, - int active) +static void citp_tcp_close_cached( + citp_fdinfo* fdinfo, struct oo_p_dllink_state cache_pending, int active) { - ci_socket_cache_t* cache = CI_CONTAINER(ci_socket_cache_t, pending, - cache_pending.l); - citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - ci_sock_cmn* s = epi->sock.s; - ci_netif* netif = epi->sock.netif; - ci_tcp_state* ts = SOCK_TO_TCP(s); + ci_socket_cache_t* cache = + CI_CONTAINER(ci_socket_cache_t, pending, cache_pending.l); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); + ci_sock_cmn* s = epi->sock.s; + ci_netif* netif = epi->sock.netif; + ci_tcp_state* ts = SOCK_TO_TCP(s); struct oo_p_dllink_state link; - ci_assert(!ci_tcp_is_cached(ts)); + ci_assert(! ci_tcp_is_cached(ts)); /* We've decided to cache. There are two lots of things to do. Firstly, * set up the state needed to cache: @@ -1203,21 +1209,22 @@ static void citp_tcp_close_cached(citp_fdinfo* fdinfo, * We don't go via citp_waitable_all_fds_gone as we must not set the * ORPHAN flag - we remain attached to our fd. */ - ci_atomic32_dec((volatile ci_uint32*)CI_NETIF_PTR(netif, - cache->avail_stack)); + ci_atomic32_dec( + (volatile ci_uint32*) CI_NETIF_PTR(netif, cache->avail_stack)); ci_assert_ge(cache->avail_stack, 0); - ci_assert_lt(*(ci_uint32*)CI_NETIF_PTR(netif, cache->avail_stack), - netif->state->opts.sock_cache_max); + ci_assert_lt(*(ci_uint32*) CI_NETIF_PTR(netif, cache->avail_stack), + netif->state->opts.sock_cache_max); if( S_TO_EPS(netif, ts)->fd != CI_FD_BAD ) ci_assert_equal(fdinfo->fd, S_TO_EPS(netif, ts)->fd); - ts->cached_on_fd = fdinfo->fd; + ts->cached_on_fd = fdinfo->fd; ts->cached_on_pid = citp_getpid(); - ci_assert(!(s->b.sb_aflags & CI_SB_AFLAG_NOT_READY)); - ci_atomic32_or(&s->b.sb_aflags, CI_SB_AFLAG_NOT_READY | CI_SB_AFLAG_IN_CACHE - | (active ? 0 : CI_SB_AFLAG_IN_PASSIVE_CACHE)); + ci_assert(! (s->b.sb_aflags & CI_SB_AFLAG_NOT_READY)); + ci_atomic32_or( + &s->b.sb_aflags, CI_SB_AFLAG_NOT_READY | CI_SB_AFLAG_IN_CACHE | + (active ? 0 : CI_SB_AFLAG_IN_PASSIVE_CACHE)); /* If this socket was previously accepted from cache it may already be on * the connected list, so it needs removing before pushing to the pending @@ -1238,18 +1245,18 @@ static void citp_tcp_close_cached(citp_fdinfo* fdinfo, /* We calculate cache->fd_states state pointer from cache_pending * and offest between these 2 lists in the cache structure. */ if( ! oo_p_dllink_concurrent_add(netif, - oo_p_dllink_statep(netif, cache_pending.p + - ((uintptr_t)&cache->fd_states - (uintptr_t)&cache->pending)), - link) ) { + oo_p_dllink_statep( + netif, cache_pending.p + ((uintptr_t) &cache->fd_states - + (uintptr_t) &cache->pending)), + link) ) { /* When cache is shared sys_close will only release FD and * decrease reference on system file. However, we need this * endpoint to be really closed */ S_TO_EPS(netif, ts)->fd = CI_FD_BAD; ci_tcp_helper_close_no_trampoline(ts->cached_on_fd); - } - else { + } else { /* store sys fd for reuse */ - S_TO_EPS(netif,ts)->fd = fdinfo->fd; + S_TO_EPS(netif, ts)->fd = fdinfo->fd; } /* We're more of a kidnapped child than an orphan, but we still need to @@ -1258,7 +1265,7 @@ static void citp_tcp_close_cached(citp_fdinfo* fdinfo, * * NB. This socket cannot now be added to the deferred list, because * no-one has a reference to it. - */ + */ ci_netif_purge_deferred_socket_list(netif); /* We also need to remove the socket from the post-poll list. It may @@ -1279,8 +1286,7 @@ static void citp_tcp_close_cached(citp_fdinfo* fdinfo, */ if( ts->s.b.state != CI_TCP_CLOSED ) { ci_tcp_close(netif, ts); - } - else { + } else { /* Only active cached sockets can go directly to the cached list - we * can only cache passive sockets that still have their hw filter ref. */ @@ -1291,8 +1297,8 @@ static void citp_tcp_close_cached(citp_fdinfo* fdinfo, } -static void citp_tcp_close_passive_cached(ci_netif* netif, citp_fdinfo* fdinfo, - ci_tcp_socket_listen* tls) +static void citp_tcp_close_passive_cached( + ci_netif* netif, citp_fdinfo* fdinfo, ci_tcp_socket_listen* tls) { struct oo_p_dllink_state cache_pending; ci_atomic32_dec(&tls->cache_avail_sock); @@ -1303,18 +1309,16 @@ static void citp_tcp_close_passive_cached(ci_netif* netif, citp_fdinfo* fdinfo, if( (tls->s.s_flags & CI_SOCK_FLAG_SCALPASSIVE) == 0 ) cache_pending = oo_p_dllink_sb(netif, &tls->s.b, &tls->epcache.pending); else - cache_pending = oo_p_dllink_ptr(netif, - &netif->state->passive_scalable_cache.pending); + cache_pending = + oo_p_dllink_ptr(netif, &netif->state->passive_scalable_cache.pending); citp_tcp_close_cached(fdinfo, cache_pending, 0); } static void citp_tcp_close_active_cached(ci_netif* netif, citp_fdinfo* fdinfo) { - citp_tcp_close_cached(fdinfo, - oo_p_dllink_ptr(netif, - &netif->state->active_cache.pending), - 1); + citp_tcp_close_cached( + fdinfo, oo_p_dllink_ptr(netif, &netif->state->active_cache.pending), 1); } @@ -1325,16 +1329,17 @@ static void citp_tcp_close_active_cached(ci_netif* netif, citp_fdinfo* fdinfo) /* Check whether a socket's local port is in the list of permitted ports for * caching. */ -static int citp_tcp_cache_port_eligible(ci_sock_cmn* s) { - struct ci_port_list *sock_cache_port; +static int citp_tcp_cache_port_eligible(ci_sock_cmn* s) +{ + struct ci_port_list* sock_cache_port; if( CITP_OPTS.sock_cache_ports == 0 ) return 1; CI_DLLIST_FOR_EACH2(struct ci_port_list, sock_cache_port, link, - (ci_dllist*)(ci_uintptr_t)CITP_OPTS.sock_cache_ports) - if( sock_cache_port->port == sock_lport_be16(s) ) - return 1; + (ci_dllist*) (ci_uintptr_t) CITP_OPTS.sock_cache_ports) + if( sock_cache_port->port == sock_lport_be16(s) ) + return 1; return 0; } @@ -1348,14 +1353,14 @@ static int citp_tcp_cache_port_eligible(ci_sock_cmn* s) { */ static int citp_tcp_cache(citp_fdinfo* fdinfo) { - int rc = 0; - citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - ci_sock_cmn* s = epi->sock.s; - ci_tcp_state* ts; - ci_netif* netif = epi->sock.netif; + int rc = 0; + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); + ci_sock_cmn* s = epi->sock.s; + ci_tcp_state* ts; + ci_netif* netif = epi->sock.netif; ci_tcp_socket_listen* tls; - Log_VSS(ci_log(LPF "cache("EF_FMT")", EF_PRI_ARGS(epi, fdinfo->fd))); + Log_VSS(ci_log(LPF "cache(" EF_FMT ")", EF_PRI_ARGS(epi, fdinfo->fd))); /* We don't cache OS-backed sockets as managing the backing socket would * require going into the kernel. This stops us from caching listening @@ -1375,10 +1380,10 @@ static int citp_tcp_cache(citp_fdinfo* fdinfo) Log_EP(ci_log("FD %d not cached - SO_LINGER set", fdinfo->fd)); return 0; } - + /* Loopback sockets lack hw filter - shouldn't cache. */ if( OO_SP_NOT_NULL(ts->local_peer) && - ts->tcpflags & CI_TCPT_FLAG_PASSIVE_OPENED) { + ts->tcpflags & CI_TCPT_FLAG_PASSIVE_OPENED ) { Log_EP(ci_log("FD %d not cached - accelerated loopback", fdinfo->fd)); return 0; } @@ -1389,21 +1394,21 @@ static int citp_tcp_cache(citp_fdinfo* fdinfo) */ if( s->b.sb_aflags & (CI_SB_AFLAG_O_ASYNC | CI_SB_AFLAG_O_APPEND) ) { Log_EP(ci_log("FD %d not cached - invalid flags set 0x%x", fdinfo->fd, - s->b.sb_aflags & (CI_SB_AFLAG_O_ASYNC | CI_SB_AFLAG_O_APPEND))); + s->b.sb_aflags & (CI_SB_AFLAG_O_ASYNC | CI_SB_AFLAG_O_APPEND))); return 0; } /* We'd need to go into the kernel to reset sigown - shouldn't cache */ if( s->b.sigown != 0 ) { Log_EP(ci_log("FD %d not cached - owner's PID is set to %d", fdinfo->fd, - s->b.sigown)); + s->b.sigown)); return 0; } /* We may not be cacheable, for example if we've been duped, or added to * a ul_epoll=2 set. */ - if( !fdinfo->can_cache ) { + if( ! fdinfo->can_cache ) { Log_EP(ci_log("FD %d not cached - fdinfo not cacheable", fdinfo->fd)); return 0; } @@ -1433,9 +1438,8 @@ static int citp_tcp_cache(citp_fdinfo* fdinfo) /* We need to decide whether this socket should go on the passive- or * active-open cache, as the remaining work is different in each case. */ if( ts->tcpflags & CI_TCPT_FLAG_PASSIVE_OPENED ) { - oo_sp sock = ci_netif_listener_lookup(netif, sock_af_space(s), - sock_ipx_laddr(s), - sock_lport_be16(s)); + oo_sp sock = ci_netif_listener_lookup( + netif, sock_af_space(s), sock_ipx_laddr(s), sock_lport_be16(s)); if( OO_SP_IS_NULL(sock) ) { /* If the listener has been closed, we can't cache this socket. */ rc = 0; @@ -1448,8 +1452,8 @@ static int citp_tcp_cache(citp_fdinfo* fdinfo) /* We limit the maximum number of sockets cached in a stack. */ if( netif->state->passive_cache_avail_stack == 0 ) { - Log_EP(ci_log("FD %d not cached - passive stack limit reached", - fdinfo->fd)); + Log_EP(ci_log( + "FD %d not cached - passive stack limit reached", fdinfo->fd)); CITP_STATS_NETIF(++netif->state->stats.passive_sockcache_stacklim); goto unlock_out; } @@ -1457,22 +1461,23 @@ static int citp_tcp_cache(citp_fdinfo* fdinfo) /* The tcp state needs to still have its filters, or we'd have to go into * kernel anyway. */ - if( !(s->b.state & CI_TCP_STATE_TCP_CONN) ) { + if( ! (s->b.state & CI_TCP_STATE_TCP_CONN) ) { Log_EP(ci_log("FD %d not cached - not in suitable state (0x%x)", - fdinfo->fd, s->b.state)); + fdinfo->fd, s->b.state)); goto unlock_out; } if( s->s_flags & CI_SOCK_FLAG_FILTER ) { - Log_EP(ci_log("FD %d not cached - full match hw filter is installed", - fdinfo->fd)); + Log_EP(ci_log( + "FD %d not cached - full match hw filter is installed", fdinfo->fd)); goto unlock_out; } tls = SP_TO_TCP_LISTEN(netif, sock); if( tls->cache_avail_sock == 0 ) { - Log_EP(ci_log("FD %d not cached - per-socket limit reached", fdinfo->fd)); + Log_EP( + ci_log("FD %d not cached - per-socket limit reached", fdinfo->fd)); CITP_STATS_NETIF(++netif->state->stats.sockcache_socklim); goto unlock_out; } @@ -1480,24 +1485,24 @@ static int citp_tcp_cache(citp_fdinfo* fdinfo) /* Woohoo! Cache this sucker! */ citp_tcp_close_passive_cached(netif, fdinfo, tls); Log_EP(ci_log("FD %d cached on passive-open cache", fdinfo->fd)); - } - else { + } else { /* Non-scalable non-closed sockets might carry some other state that prevails our partial cache-based reinitialisation. Lack of hw filter and lack of backing socket are not enough to make this sockets cacheable */ if( (s->s_flags & CI_SOCK_FLAGS_SCALABLE) == 0 && - !(s->b.state == CI_TCP_CLOSED && - (s->s_flags & CI_SOCK_FLAG_BOUND) == 0) ) { - Log_EP(ci_log("FD %d not cached - active nonscalable socket", fdinfo->fd)); + ! (s->b.state == CI_TCP_CLOSED && + (s->s_flags & CI_SOCK_FLAG_BOUND) == 0) ) { + Log_EP( + ci_log("FD %d not cached - active nonscalable socket", fdinfo->fd)); goto unlock_out; } #if ! CI_CFG_IPV6 /* Don't cache IPv6 sockets */ if( s->domain != AF_INET ) { - Log_EP(ci_log("FD %d not cached - non-IPv4 domain %d", - fdinfo->fd, s->domain)); + Log_EP(ci_log( + "FD %d not cached - non-IPv4 domain %d", fdinfo->fd, s->domain)); CITP_STATS_NETIF(++netif->state->stats.active_sockcache_non_ip4); goto unlock_out; } @@ -1505,8 +1510,8 @@ static int citp_tcp_cache(citp_fdinfo* fdinfo) /* We limit the maximum number of sockets cached in a stack. */ if( netif->state->active_cache_avail_stack == 0 ) { - Log_EP(ci_log("FD %d not cached - active stack limit reached", - fdinfo->fd)); + Log_EP( + ci_log("FD %d not cached - active stack limit reached", fdinfo->fd)); CITP_STATS_NETIF(++netif->state->stats.active_sockcache_stacklim); goto unlock_out; } @@ -1518,7 +1523,7 @@ static int citp_tcp_cache(citp_fdinfo* fdinfo) rc = 1; CITP_STATS_NETIF(++netif->state->stats.sockcache_cached); - unlock_out: +unlock_out: ci_netif_unlock_fdi(epi); return rc; } @@ -1528,21 +1533,22 @@ static int citp_tcp_cache(citp_fdinfo* fdinfo) static int citp_tcp_shutdown(citp_fdinfo* fdinfo, int how) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - int rc; + int rc; - Log_VSS(ci_log(LPF "shutdown("EF_FMT", %d)", EF_PRI_ARGS(epi,fdinfo->fd), how)); + Log_VSS(ci_log( + LPF "shutdown(" EF_FMT ", %d)", EF_PRI_ARGS(epi, fdinfo->fd), how)); rc = ci_tcp_shutdown(&(epi->sock), how, fdinfo->fd); return rc; } -static int citp_tcp_getsockname(citp_fdinfo* fdinfo, - struct sockaddr* sa, socklen_t* p_sa_len) +static int citp_tcp_getsockname( + citp_fdinfo* fdinfo, struct sockaddr* sa, socklen_t* p_sa_len) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - int rc; + int rc; - Log_VSC(ci_log(LPF "getsockname("EF_FMT")", EF_PRI_ARGS(epi,fdinfo->fd))); + Log_VSC(ci_log(LPF "getsockname(" EF_FMT ")", EF_PRI_ARGS(epi, fdinfo->fd))); rc = ci_tcp_getsockname(&epi->sock, fdinfo->fd, sa, p_sa_len); if( rc == 0 ) __citp_getsockname(epi->sock.s, sa, p_sa_len); @@ -1550,13 +1556,13 @@ static int citp_tcp_getsockname(citp_fdinfo* fdinfo, } -static int citp_tcp_getpeername(citp_fdinfo* fdinfo, - struct sockaddr* sa, socklen_t* p_sa_len) +static int citp_tcp_getpeername( + citp_fdinfo* fdinfo, struct sockaddr* sa, socklen_t* p_sa_len) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - int rc; + int rc; - Log_VSC(ci_log(LPF "getpeername("EF_FMT")", EF_PRI_ARGS(epi,fdinfo->fd))); + Log_VSC(ci_log(LPF "getpeername(" EF_FMT ")", EF_PRI_ARGS(epi, fdinfo->fd))); ci_netif_lock_fdi(epi); rc = ci_tcp_getpeername(&epi->sock, sa, p_sa_len); ci_netif_unlock_fdi(epi); @@ -1564,34 +1570,34 @@ static int citp_tcp_getpeername(citp_fdinfo* fdinfo, } -static int citp_tcp_getsockopt(citp_fdinfo* fdinfo, int level, - int optname, void* optval, socklen_t* optlen) +static int citp_tcp_getsockopt(citp_fdinfo* fdinfo, int level, int optname, + void* optval, socklen_t* optlen) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - int rc; + int rc; - Log_VSC(ci_log(LPF "getsockopt("EF_FMT", %d, %d)", - EF_PRI_ARGS(epi,fdinfo->fd), level, optname)); + Log_VSC(ci_log(LPF "getsockopt(" EF_FMT ", %d, %d)", + EF_PRI_ARGS(epi, fdinfo->fd), level, optname)); ci_netif_lock_count(epi->sock.netif, getsockopt_ni_lock_contends); - rc = ci_tcp_getsockopt(&epi->sock, fdinfo->fd, - level, optname, optval, optlen); + rc = ci_tcp_getsockopt( + &epi->sock, fdinfo->fd, level, optname, optval, optlen); ci_netif_unlock_fdi(epi); return rc; } -static int citp_tcp_setsockopt(citp_fdinfo* fdinfo, int level, - int optname, const void* optval, socklen_t optlen) +static int citp_tcp_setsockopt(citp_fdinfo* fdinfo, int level, int optname, + const void* optval, socklen_t optlen) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - int rc; + int rc; - Log_VSC(ci_log(LPF "setsockopt("EF_FMT", %d, %d)", - EF_PRI_ARGS(epi,fdinfo->fd), level, optname)); + Log_VSC(ci_log(LPF "setsockopt(" EF_FMT ", %d, %d)", + EF_PRI_ARGS(epi, fdinfo->fd), level, optname)); - rc = ci_tcp_setsockopt(&epi->sock, fdinfo->fd, - level, optname, optval, optlen); + rc = ci_tcp_setsockopt( + &epi->sock, fdinfo->fd, level, optname, optval, optlen); if( rc == CI_SOCKET_HANDOVER ) { CITP_STATS_NETIF(++epi->sock.netif->state->stats.tcp_handover_setsockopt); @@ -1612,13 +1618,11 @@ static int citp_tcp_setsockopt(citp_fdinfo* fdinfo, int level, ci_netif_lock_fdi(epi); ci_tcp_helper_ep_clear_filters( - ci_netif_get_driver_handle(epi->sock.netif), - SC_SP(epi->sock.s), 0); + ci_netif_get_driver_handle(epi->sock.netif), SC_SP(epi->sock.s), 0); ci_netif_unlock_fdi(epi); citp_fdinfo_release_ref(fdinfo, 0); return 0; - } - else if( epi->sock.s->b.state == CI_TCP_CLOSED ) { + } else if( epi->sock.s->b.state == CI_TCP_CLOSED ) { ci_netif_lock_fdi(epi); if( (epi->sock.s->b.sb_aflags & CI_SB_AFLAG_OS_BACKED) == 0 ) { /* Non os-backed TCP sockets are one of three things: @@ -1633,22 +1637,22 @@ static int citp_tcp_setsockopt(citp_fdinfo* fdinfo, int level, * For tproxy bound sockets we can't handover, so we fail. */ if( (SOCK_TO_TCP(epi->sock.s)->tcpflags & - CI_TCPT_FLAG_PASSIVE_OPENED) ) { + CI_TCPT_FLAG_PASSIVE_OPENED) ) { ci_netif_unlock_fdi(epi); RET_WITH_ERRNO(EINVAL); - } - else if( (epi->sock.s->s_flags & CI_SOCK_FLAG_TPROXY) && - (epi->sock.s->s_flags & CI_SOCK_FLAG_PORT_BOUND) ) { + } else if( (epi->sock.s->s_flags & CI_SOCK_FLAG_TPROXY) && + (epi->sock.s->s_flags & CI_SOCK_FLAG_PORT_BOUND) ) { ci_netif_unlock_fdi(epi); - NI_LOG(epi->sock.netif, USAGE_WARNINGS, "Sockets that have been " - "bound with IP_TRANSPARENT set cannot be handed over, and " - "socket option %d %d requires handover", level, optname); + NI_LOG(epi->sock.netif, USAGE_WARNINGS, + "Sockets that have been " + "bound with IP_TRANSPARENT set cannot be handed over, and " + "socket option %d %d requires handover", + level, optname); RET_WITH_ERRNO(EINVAL); } rc = ci_tcp_helper_os_sock_create_and_set(epi->sock.netif, fdinfo->fd, - epi->sock.s, level, optname, - optval, optlen); + epi->sock.s, level, optname, optval, optlen); if( rc < 0 ) { ci_netif_unlock_fdi(epi); RET_WITH_ERRNO(-rc); @@ -1659,14 +1663,12 @@ static int citp_tcp_setsockopt(citp_fdinfo* fdinfo, int level, ci_assert_flags(epi->sock.s->b.sb_aflags, CI_SB_AFLAG_OS_BACKED); tcp_handover(epi); return 0; - } - else /* Can't handover connected socket */ + } else /* Can't handover connected socket */ RET_WITH_ERRNO(EINVAL); } #if CI_CFG_ENDPOINT_MOVE - if( rc == 0 && - (epi->sock.s->s_flags & CI_SOCK_FLAG_PORT_BOUND) != 0 && + if( rc == 0 && (epi->sock.s->s_flags & CI_SOCK_FLAG_PORT_BOUND) != 0 && (epi->sock.s->s_flags & CI_SOCK_FLAG_FILTER) == 0 && ci_opt_is_setting_reuseport(level, optname, optval, optlen) != 0 ) /* If the following fails, we are not undoing the bind() done @@ -1677,10 +1679,9 @@ static int citp_tcp_setsockopt(citp_fdinfo* fdinfo, int level, /* The socket has moved so need to reprobe the fd. This will also * map the the new stack into user space of the executing process. */ - fdinfo = citp_reprobe_moved(fdinfo, - CI_FALSE/* ! from_fast_lookup */, - CI_FALSE/* ! fdip_is_busy */); - epi = fdi_to_sock_fdi(fdinfo); + fdinfo = citp_reprobe_moved(fdinfo, CI_FALSE /* ! from_fast_lookup */, + CI_FALSE /* ! fdip_is_busy */); + epi = fdi_to_sock_fdi(fdinfo); ci_netif_cluster_prefault(epi->sock.netif); } #endif @@ -1692,11 +1693,12 @@ static int citp_tcp_setsockopt(citp_fdinfo* fdinfo, int level, static int citp_tcp_recv(citp_fdinfo* fdinfo, struct msghdr* msg, int flags) { - citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); ci_tcp_recvmsg_args a; - int rc; + int rc; - if (epi->sock.s->b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK|CI_SB_AFLAG_O_NDELAY)) + if( epi->sock.s->b.sb_aflags & + (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) flags |= MSG_DONTWAIT; if( (flags & (MSG_WAITALL | ONLOAD_MSG_ONEPKT)) == @@ -1706,75 +1708,75 @@ static int citp_tcp_recv(citp_fdinfo* fdinfo, struct msghdr* msg, int flags) return -1; }; - Log_V(ci_log(LPF "recv("EF_FMT", len=%d, "CI_SOCKCALL_FLAGS_FMT")", - EF_PRI_ARGS(epi,fdinfo->fd), - ci_iovec_bytes(msg->msg_iov, msg->msg_iovlen), - CI_SOCKCALL_FLAGS_PRI_ARG(flags))); + Log_V(ci_log(LPF "recv(" EF_FMT ", len=%d, " CI_SOCKCALL_FLAGS_FMT ")", + EF_PRI_ARGS(epi, fdinfo->fd), + ci_iovec_bytes(msg->msg_iov, msg->msg_iovlen), + CI_SOCKCALL_FLAGS_PRI_ARG(flags))); if( epi->sock.s->b.state != CI_TCP_LISTEN ) { if( (msg->msg_iovlen == 0 || msg->msg_iov == NULL) && ! (flags & MSG_ERRQUEUE) ) { - msg->msg_flags = 0; + msg->msg_flags = 0; msg->msg_controllen = 0; return 0; } - ci_tcp_recvmsg_args_init(&a, epi->sock.netif, SOCK_TO_TCP(epi->sock.s), - msg, flags); + ci_tcp_recvmsg_args_init( + &a, epi->sock.netif, SOCK_TO_TCP(epi->sock.s), msg, flags); rc = ci_tcp_recvmsg(&a); - Log_V(ci_log(LPF "recv("EF_FMT") = %d", EF_PRI_ARGS(epi, fdinfo->fd), rc)); + Log_V( + ci_log(LPF "recv(" EF_FMT ") = %d", EF_PRI_ARGS(epi, fdinfo->fd), rc)); return rc; } CI_SET_ERROR(rc, SOCK_RX_ERRNO(epi->sock.s)); - Log_V(ci_log(LPF "recv("EF_FMT") = %d", EF_PRI_ARGS(epi, fdinfo->fd), rc)); + Log_V(ci_log(LPF "recv(" EF_FMT ") = %d", EF_PRI_ARGS(epi, fdinfo->fd), rc)); return rc; } -static int citp_tcp_recvmmsg(citp_fdinfo* fdinfo, struct mmsghdr* msg, - unsigned vlen, int flags, - ci_recvmmsg_timespec* timeout) +static int citp_tcp_recvmmsg(citp_fdinfo* fdinfo, struct mmsghdr* msg, + unsigned vlen, int flags, ci_recvmmsg_timespec* timeout) { - Log_E(ci_log("%s: TCP fd recvmmsg not supported by OpenOnload", - __FUNCTION__)); + Log_E( + ci_log("%s: TCP fd recvmmsg not supported by OpenOnload", __FUNCTION__)); errno = ENOSYS; return -1; } -static int citp_tcp_sendmmsg(citp_fdinfo* fdinfo, struct mmsghdr* msg, - unsigned vlen, int flags) +static int citp_tcp_sendmmsg( + citp_fdinfo* fdinfo, struct mmsghdr* msg, unsigned vlen, int flags) { - Log_E(ci_log("%s: TCP fd sendmmsg not supported by OpenOnload", - __FUNCTION__)); + Log_E( + ci_log("%s: TCP fd sendmmsg not supported by OpenOnload", __FUNCTION__)); errno = ENOSYS; return -1; } -static int citp_tcp_send(citp_fdinfo* fdinfo, const struct msghdr* msg, - int flags) +static int citp_tcp_send( + citp_fdinfo* fdinfo, const struct msghdr* msg, int flags) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - int rc; + int rc; ci_assert(msg != NULL); - if( epi->sock.s->b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | - CI_SB_AFLAG_O_NDELAY) ) { + if( epi->sock.s->b.sb_aflags & + (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) { flags |= MSG_DONTWAIT; } - if(CI_LIKELY( msg->msg_iov != NULL && msg->msg_iovlen > 0 )) { + if( CI_LIKELY(msg->msg_iov != NULL && msg->msg_iovlen > 0) ) { ci_uint32 state; - Log_V(ci_log(LPF "send("EF_FMT", len=%d, "CI_SOCKCALL_FLAGS_FMT")", - EF_PRI_ARGS(epi,fdinfo->fd), - ci_iovec_bytes(msg->msg_iov, msg->msg_iovlen), - CI_SOCKCALL_FLAGS_PRI_ARG(flags))); + Log_V(ci_log(LPF "send(" EF_FMT ", len=%d, " CI_SOCKCALL_FLAGS_FMT ")", + EF_PRI_ARGS(epi, fdinfo->fd), + ci_iovec_bytes(msg->msg_iov, msg->msg_iovlen), + CI_SOCKCALL_FLAGS_PRI_ARG(flags))); state = OO_ACCESS_ONCE(epi->sock.s->b.state); - /* Process CI_TCP_CLOSED without entering ci_tcp_sendmsg() because TCP state - * can be changed under our feet and we do not want to meet CI_TCP_LISTEN - * state inside ci_tcp_sendmsg(). */ + /* Process CI_TCP_CLOSED without entering ci_tcp_sendmsg() because TCP + * state can be changed under our feet and we do not want to meet + * CI_TCP_LISTEN state inside ci_tcp_sendmsg(). */ if( CI_UNLIKELY(state == CI_TCP_CLOSED || state == CI_TCP_LISTEN || state == CI_TCP_INVALID) ) { if( CI_UNLIKELY(flags & ONLOAD_MSG_WARM) ) @@ -1783,31 +1785,27 @@ static int citp_tcp_send(citp_fdinfo* fdinfo, const struct msghdr* msg, CI_SET_ERROR(rc, rc); else CI_SET_ERROR(rc, EPIPE); - } - else { + } else { rc = ci_tcp_sendmsg(epi->sock.netif, SOCK_TO_TCP(epi->sock.s), - msg->msg_iov, msg->msg_iovlen, flags); + msg->msg_iov, msg->msg_iovlen, flags); } - } - else if( msg != NULL && msg->msg_iovlen == 0 ) { + } else if( msg != NULL && msg->msg_iovlen == 0 ) { if( epi->sock.s->tx_errno ) { errno = epi->sock.s->tx_errno; - rc = -1; - } - else { + rc = -1; + } else { rc = 0; } - } - else { + } else { errno = EFAULT; - rc = -1; + rc = -1; } if( rc == -1 && errno == EPIPE && ! (flags & MSG_NOSIGNAL) ) { - oo_resource_op(ci_netif_get_driver_handle(epi->sock.netif), - OO_IOC_KILL_SELF_SIGPIPE, NULL); + oo_resource_op(ci_netif_get_driver_handle(epi->sock.netif), + OO_IOC_KILL_SELF_SIGPIPE, NULL); } - Log_V(log(LPF "send("EF_FMT") = %d", EF_PRI_ARGS(epi,fdinfo->fd),rc)); + Log_V(log(LPF "send(" EF_FMT ") = %d", EF_PRI_ARGS(epi, fdinfo->fd), rc)); return rc; } @@ -1820,28 +1818,28 @@ static int citp_tcp_fcntl(citp_fdinfo* fdinfo, int cmd, long arg) static int citp_tcp_ioctl(citp_fdinfo* fdinfo, int request, void* arg) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); - int rc; + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); + int rc; - Log_VSC(ci_log(LPF "ioctl("EF_FMT", %d, %#lx)", - EF_PRI_ARGS(epi,fdinfo->fd), - request, (long) arg)); + Log_VSC(ci_log(LPF "ioctl(" EF_FMT ", %d, %#lx)", + EF_PRI_ARGS(epi, fdinfo->fd), request, (long) arg)); rc = ci_tcp_ioctl(&epi->sock, fdinfo->fd, request, arg); - Log_VSC(ci_log(LPF "ioctl: "EF_FMT" rc=%d", EF_PRI_ARGS(epi,fdinfo->fd),rc)); + Log_VSC( + ci_log(LPF "ioctl: " EF_FMT " rc=%d", EF_PRI_ARGS(epi, fdinfo->fd), rc)); if( rc < -1 ) CI_SET_ERROR(rc, -rc); return rc; } -/* ATTENTION! This function should be kept is sync with +/* ATTENTION! This function should be kept is sync with * ci_tcp_poll_events_listen() and ci_tcp_poll_events_nolisten() */ static int citp_tcp_select(citp_fdinfo* fdi, int* n, int rd, int wr, int ex, - struct oo_ul_select_state*__restrict__ ss) + struct oo_ul_select_state* __restrict__ ss) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdi); - ci_sock_cmn* s = epi->sock.s; - ci_netif* ni = epi->sock.netif; + citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); + ci_sock_cmn* s = epi->sock.s; + ci_netif* ni = epi->sock.netif; #if CI_CFG_SPIN_STATS if( CI_UNLIKELY(! ss->stat_incremented) ) { @@ -1857,28 +1855,26 @@ static int citp_tcp_select(citp_fdinfo* fdi, int* n, int rd, int wr, int ex, * Everything else goes via ci_tcp_poll_events_nolisten() */ if( (s->b.state & CI_TCP_STATE_SYNCHRONISED) && s->tx_errno == 0 ) { ci_tcp_state* ts = SOCK_TO_TCP(s); - if( rd && ( ci_tcp_recv_not_blocked(ts) - || ci_tcp_poll_timestamp_q_nonempty(ni, ts) ) ) { - FD_SET(fdi->fd, ss->rdu); - ++*n; + if( rd && (ci_tcp_recv_not_blocked(ts) || + ci_tcp_poll_timestamp_q_nonempty(ni, ts)) ) { + FD_SET(fdi->fd, ss->rdu); + ++*n; } - if( wr && ( ci_tcp_tx_advertise_space(ni, ts) - || ci_tcp_poll_timestamp_q_nonempty(ni, ts) ) ) { - FD_SET(fdi->fd, ss->wru); - ++*n; + if( wr && (ci_tcp_tx_advertise_space(ni, ts) || + ci_tcp_poll_timestamp_q_nonempty(ni, ts)) ) { + FD_SET(fdi->fd, ss->wru); + ++*n; } if( ex && ci_tcp_poll_events_nolisten_haspri(ni, ts) ) { FD_SET(fdi->fd, ss->exu); ++*n; } - } - else if( s->b.state == CI_TCP_LISTEN ) { + } else if( s->b.state == CI_TCP_LISTEN ) { if( rd && ci_tcp_poll_events_listen(ni, SOCK_TO_TCP_LISTEN(s)) ) { FD_SET(fdi->fd, ss->rdu); ++*n; } - } - else { + } else { /* slow path: instead of copying ci_tcp_poll_events_nolisten(), just * call it. And avoid races by calling ci_tcp_poll_events(). */ unsigned mask = ci_tcp_poll_events(ni, s); @@ -1899,24 +1895,23 @@ static int citp_tcp_select(citp_fdinfo* fdi, int* n, int rd, int wr, int ex, return 1; } -static int citp_tcp_poll(citp_fdinfo*__restrict__ fdi, - struct pollfd*__restrict__ pfd, - struct oo_ul_poll_state*__restrict__ ps) +static int citp_tcp_poll(citp_fdinfo* __restrict__ fdi, + struct pollfd* __restrict__ pfd, struct oo_ul_poll_state* __restrict__ ps) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdi); - ci_sock_cmn* s = epi->sock.s; - ci_netif* ni = epi->sock.netif; - unsigned mask; + citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); + ci_sock_cmn* s = epi->sock.s; + ci_netif* ni = epi->sock.netif; + unsigned mask; #if CI_CFG_SPIN_STATS ni->state->stats.spin_poll++; #endif - mask = ci_tcp_poll_events(ni, s); + mask = ci_tcp_poll_events(ni, s); pfd->revents = mask & (pfd->events | POLLERR | POLLHUP); if( pfd->revents == 0 ) if( citp_poll_if_needed(ni, ps->this_poll_frc, ps->ul_poll_spin) ) { - mask = ci_tcp_poll_events(ni, s); + mask = ci_tcp_poll_events(ni, s); pfd->revents = mask & (pfd->events | POLLERR | POLLHUP); } @@ -1924,20 +1919,18 @@ static int citp_tcp_poll(citp_fdinfo*__restrict__ fdi, } - #include "ul_epoll.h" /* More-or-less copy of citp_tcp_poll */ -static int citp_tcp_epoll(citp_fdinfo*__restrict__ fdi, - struct citp_epoll_member*__restrict__ eitem, - struct oo_ul_epoll_state*__restrict__ eps, - int* stored_event) +static int citp_tcp_epoll(citp_fdinfo* __restrict__ fdi, + struct citp_epoll_member* __restrict__ eitem, + struct oo_ul_epoll_state* __restrict__ eps, int* stored_event) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - ci_sock_cmn* s = epi->sock.s; - ci_netif* ni = epi->sock.netif; - ci_uint64 sleep_seq; - ci_uint32 mask; - int seq_mismatch = 0; + ci_sock_cmn* s = epi->sock.s; + ci_netif* ni = epi->sock.netif; + ci_uint64 sleep_seq; + ci_uint32 mask; + int seq_mismatch = 0; #if CI_CFG_SPIN_STATS if( CI_UNLIKELY(! eps->stat_incremented) ) { @@ -1947,28 +1940,26 @@ static int citp_tcp_epoll(citp_fdinfo*__restrict__ fdi, #endif /* Try to return a result without polling if we can. */ - sleep_seq = s->b.sleep_seq.all; - mask = ci_tcp_poll_events(ni, s); - *stored_event = citp_ul_epoll_set_ul_events(eps, eitem, mask, sleep_seq, - &s->b.sleep_seq.all, - &seq_mismatch); + sleep_seq = s->b.sleep_seq.all; + mask = ci_tcp_poll_events(ni, s); + *stored_event = citp_ul_epoll_set_ul_events( + eps, eitem, mask, sleep_seq, &s->b.sleep_seq.all, &seq_mismatch); /* Try a poll if we don't already have events. If this is an ordered wait * (ie we have ordering_info) another netif poll will be too late, so don't * bother. */ - if( (*stored_event == 0) && !eps->ordering_info ) { + if( (*stored_event == 0) && ! eps->ordering_info ) { if( citp_poll_if_needed(ni, eps->this_poll_frc, eps->ul_epoll_spin) ) { - sleep_seq = s->b.sleep_seq.all; - mask = ci_tcp_poll_events(ni, s); - seq_mismatch = 0; - *stored_event = citp_ul_epoll_set_ul_events(eps, eitem, mask, sleep_seq, - &s->b.sleep_seq.all, - &seq_mismatch); + sleep_seq = s->b.sleep_seq.all; + mask = ci_tcp_poll_events(ni, s); + seq_mismatch = 0; + *stored_event = citp_ul_epoll_set_ul_events( + eps, eitem, mask, sleep_seq, &s->b.sleep_seq.all, &seq_mismatch); } } /* We shouldn't have stored an event if there was a mismatch */ - ci_assert( !(seq_mismatch == 1 && *stored_event == 1) ); + ci_assert(! (seq_mismatch == 1 && *stored_event == 1)); return seq_mismatch; } @@ -1978,36 +1969,35 @@ ci_uint64 citp_sock_sleep_seq(citp_fdinfo* fdi) } -static int citp_tcp_zc_send(citp_fdinfo* fdi, struct onload_zc_mmsg* msg, - int flags) +static int citp_tcp_zc_send( + citp_fdinfo* fdi, struct onload_zc_mmsg* msg, int flags) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - ci_netif* ni = epi->sock.netif; - ci_tcp_state *ts = SOCK_TO_TCP(epi->sock.s); - int rc = 0; + ci_netif* ni = epi->sock.netif; + ci_tcp_state* ts = SOCK_TO_TCP(epi->sock.s); + int rc = 0; if( epi->sock.s->b.state != CI_TCP_LISTEN ) { if( flags & ~ONLOAD_ZC_SEND_FLAGS_MASK ) { msg->rc = -EINVAL; - rc = 1; + rc = 1; } - - if( epi->sock.s->b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | - CI_SB_AFLAG_O_NDELAY) ) + + if( epi->sock.s->b.sb_aflags & + (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) flags |= MSG_DONTWAIT; if( rc == 0 ) rc = ci_tcp_zc_send(ni, ts, msg, flags); - } - else { + } else { msg->rc = -epi->sock.s->tx_errno; - rc = 1; + rc = 1; } ci_assert_equal(rc, 1); if( msg->rc == -EPIPE && ! (flags & MSG_NOSIGNAL) ) { - oo_resource_op(ci_netif_get_driver_handle(epi->sock.netif), - OO_IOC_KILL_SELF_SIGPIPE, NULL); + oo_resource_op(ci_netif_get_driver_handle(epi->sock.netif), + OO_IOC_KILL_SELF_SIGPIPE, NULL); } return rc; } @@ -2015,16 +2005,16 @@ static int citp_tcp_zc_send(citp_fdinfo* fdi, struct onload_zc_mmsg* msg, static int citp_tcp_zc_recv(citp_fdinfo* fdi, struct onload_zc_recv_args* args) { - citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); ci_tcp_recvmsg_args a; - int rc; + int rc; ci_tcp_recvmsg_args_init(&a, epi->sock.netif, SOCK_TO_TCP(epi->sock.s), - &args->msg.msghdr, args->flags); + &args->msg.msghdr, args->flags); /* Pointless for TCP, but we allow it to be specified anyway because it's * not strictly meaningless. */ - a.flags &=~ ONLOAD_MSG_RECV_OS_INLINE; + a.flags &= ~ONLOAD_MSG_RECV_OS_INLINE; if( (a.flags & (MSG_WAITALL | ONLOAD_MSG_ONEPKT)) == (MSG_WAITALL | ONLOAD_MSG_ONEPKT) ) { @@ -2040,32 +2030,31 @@ static int citp_tcp_zc_recv(citp_fdinfo* fdi, struct onload_zc_recv_args* args) return -EINVAL; } - if (epi->sock.s->b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK|CI_SB_AFLAG_O_NDELAY)) + if( epi->sock.s->b.sb_aflags & + (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) a.flags |= MSG_DONTWAIT; - Log_V(ci_log(LPF "zc_recv("EF_FMT", "CI_SOCKCALL_FLAGS_FMT")", - EF_PRI_ARGS(epi, fdi->fd), - CI_SOCKCALL_FLAGS_PRI_ARG(a.flags))); + Log_V(ci_log(LPF "zc_recv(" EF_FMT ", " CI_SOCKCALL_FLAGS_FMT ")", + EF_PRI_ARGS(epi, fdi->fd), CI_SOCKCALL_FLAGS_PRI_ARG(a.flags))); if( epi->sock.s->b.state != CI_TCP_LISTEN ) rc = ci_tcp_zc_recvmsg(&a, args); else rc = -SOCK_RX_ERRNO(epi->sock.s); - Log_V(ci_log(LPF "zc_recv("EF_FMT") = %d", EF_PRI_ARGS(epi, fdi->fd), rc)); + Log_V(ci_log(LPF "zc_recv(" EF_FMT ") = %d", EF_PRI_ARGS(epi, fdi->fd), rc)); return rc; } -static int citp_tcp_recvmsg_kernel(citp_fdinfo* fdi, struct msghdr *msg, - int flags) +static int citp_tcp_recvmsg_kernel( + citp_fdinfo* fdi, struct msghdr* msg, int flags) { return -EOPNOTSUPP; } static int citp_tcp_zc_recv_filter(citp_fdinfo* fdi, - onload_zc_recv_filter_callback filter, - void* cb_arg, int flags) + onload_zc_recv_filter_callback filter, void* cb_arg, int flags) { #if CI_CFG_ZC_RECV_FILTER return -EOPNOTSUPP; @@ -2075,13 +2064,12 @@ static int citp_tcp_zc_recv_filter(citp_fdinfo* fdi, } int citp_tcp_tmpl_alloc(citp_fdinfo* fdi, const struct iovec* initial_msg, - int mlen, struct oo_msg_template** omt_pp, - unsigned flags) + int mlen, struct oo_msg_template** omt_pp, unsigned flags) { #if CI_CFG_PIO citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - ci_tcp_state* ts = SOCK_TO_TCP(epi->sock.s); - ci_netif* ni = epi->sock.netif; + ci_tcp_state* ts = SOCK_TO_TCP(epi->sock.s); + ci_netif* ni = epi->sock.netif; ci_assert(ts->s.b.state != CI_TCP_LISTEN); return ci_tcp_tmpl_alloc(ni, ts, omt_pp, initial_msg, mlen, flags); @@ -2094,15 +2082,14 @@ int citp_tcp_tmpl_alloc(citp_fdinfo* fdi, const struct iovec* initial_msg, } -int -citp_tcp_tmpl_update(citp_fdinfo* fdi, struct oo_msg_template* omt, - const struct onload_template_msg_update_iovec* updates, - int ulen, unsigned flags) +int citp_tcp_tmpl_update(citp_fdinfo* fdi, struct oo_msg_template* omt, + const struct onload_template_msg_update_iovec* updates, int ulen, + unsigned flags) { #if CI_CFG_PIO citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - ci_tcp_state* ts = SOCK_TO_TCP(epi->sock.s); - ci_netif* ni = epi->sock.netif; + ci_tcp_state* ts = SOCK_TO_TCP(epi->sock.s); + ci_netif* ni = epi->sock.netif; ci_assert(ts->s.b.state != CI_TCP_LISTEN); return ci_tcp_tmpl_update(ni, ts, omt, updates, ulen, flags); @@ -2116,8 +2103,8 @@ int citp_tcp_tmpl_abort(citp_fdinfo* fdi, struct oo_msg_template* omt) { #if CI_CFG_PIO citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - ci_tcp_state* ts = SOCK_TO_TCP(epi->sock.s); - ci_netif* ni = epi->sock.netif; + ci_tcp_state* ts = SOCK_TO_TCP(epi->sock.s); + ci_netif* ni = epi->sock.netif; ci_assert(ts->s.b.state != CI_TCP_LISTEN); return ci_tcp_tmpl_abort(ni, ts, omt); @@ -2128,35 +2115,34 @@ int citp_tcp_tmpl_abort(citp_fdinfo* fdi, struct oo_msg_template* omt) #if CI_CFG_TIMESTAMPING -static int -citp_tcp_ordered_data(citp_fdinfo* fdi, struct timespec* limit, - struct timespec* next_out, int* bytes_out) +static int citp_tcp_ordered_data(citp_fdinfo* fdi, struct timespec* limit, + struct timespec* next_out, int* bytes_out) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - ci_sock_cmn* s = epi->sock.s; - ci_tcp_state* ts; + ci_sock_cmn* s = epi->sock.s; + ci_tcp_state* ts; ci_ip_pkt_fmt* pkt; - *bytes_out = 0; + *bytes_out = 0; next_out->tv_sec = 0; if( s->b.state != CI_TCP_LISTEN ) { ts = SOCK_TO_TCP(s); - + if( OO_SP_NOT_NULL(ts->local_peer) ) return 0; ci_sock_lock(epi->sock.netif, &ts->s.b); - if( tcp_rcv_usr(ts) <= 0 || OO_PP_IS_NULL(ts->recv1_extract)) { + if( tcp_rcv_usr(ts) <= 0 || OO_PP_IS_NULL(ts->recv1_extract) ) { ci_sock_unlock(epi->sock.netif, &ts->s.b); return 0; } pkt = PKT_CHK_NNL(epi->sock.netif, ts->recv1_extract); if( oo_offbuf_is_empty(&pkt->buf) ) { - if( OO_PP_IS_NULL(pkt->next) ) { + if( OO_PP_IS_NULL(pkt->next) ) { ci_sock_unlock(epi->sock.netif, &ts->s.b); - return 0; /* recv1 is empty. */ + return 0; /* recv1 is empty. */ } pkt = PKT_CHK_NNL(epi->sock.netif, pkt->next); ci_assert(oo_offbuf_not_empty(&pkt->buf)); @@ -2164,13 +2150,12 @@ citp_tcp_ordered_data(citp_fdinfo* fdi, struct timespec* limit, do { struct timespec stamp; - ci_rx_pkt_timespec(pkt, &stamp, - NI_OPTS(epi->sock.netif).rx_timestamping_ordering); + ci_rx_pkt_timespec( + pkt, &stamp, NI_OPTS(epi->sock.netif).rx_timestamping_ordering); if( citp_timespec_compare(&stamp, limit) < 1 ) { *bytes_out += oo_offbuf_left(&pkt->buf); - } - else { + } else { *next_out = stamp; break; } @@ -2178,8 +2163,7 @@ citp_tcp_ordered_data(citp_fdinfo* fdi, struct timespec* limit, pkt = PKT_CHK_NNL(epi->sock.netif, pkt->next); else break; - } - while( 1 ); + } while( 1 ); ci_sock_unlock(epi->sock.netif, &ts->s.b); } @@ -2190,19 +2174,17 @@ citp_tcp_ordered_data(citp_fdinfo* fdi, struct timespec* limit, int citp_sock_is_spinning(citp_fdinfo* fdi) { - return !!fdi_to_sock_fdi(fdi)->sock.s->b.spin_cycles; + return ! ! fdi_to_sock_fdi(fdi)->sock.s->b.spin_cycles; } - -enum onload_delegated_send_rc -citp_tcp_ds_prepare(citp_fdinfo* fdi, int size, unsigned flags, - struct onload_delegated_send* out) +enum onload_delegated_send_rc citp_tcp_ds_prepare(citp_fdinfo* fdi, int size, + unsigned flags, struct onload_delegated_send* out) { - citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - ci_netif* ni = epi->sock.netif; - ci_sock_cmn* s = epi->sock.s; - ci_tcp_state* ts; + citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); + ci_netif* ni = epi->sock.netif; + ci_sock_cmn* s = epi->sock.s; + ci_tcp_state* ts; enum onload_delegated_send_rc rc = ONLOAD_DELEGATED_SEND_RC_OK; enum onload_delegated_send_rc rc1; @@ -2211,7 +2193,7 @@ citp_tcp_ds_prepare(citp_fdinfo* fdi, int size, unsigned flags, #if CI_CFG_TIMESTAMPING || (s->timestamping_flags & ONLOAD_SOF_TIMESTAMPING_STREAM) #endif - ) + ) return ONLOAD_DELEGATED_SEND_RC_BAD_SOCKET; ts = SOCK_TO_TCP(epi->sock.s); if( ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE ) @@ -2227,27 +2209,26 @@ citp_tcp_ds_prepare(citp_fdinfo* fdi, int size, unsigned flags, } /* Calculate the windows */ - out->mss = tcp_eff_mss(ts); - out->send_wnd = CI_MIN(SEQ_SUB(ts->snd_max, tcp_snd_nxt(ts)), - ci_tcp_tx_send_space(ni, ts) * tcp_eff_mss(ts)); - out->cong_wnd = ts->cwnd + ts->cwnd_extra - ci_tcp_inflight(ts); + out->mss = tcp_eff_mss(ts); + out->send_wnd = CI_MIN(SEQ_SUB(ts->snd_max, tcp_snd_nxt(ts)), + ci_tcp_tx_send_space(ni, ts) * tcp_eff_mss(ts)); + out->cong_wnd = ts->cwnd + ts->cwnd_extra - ci_tcp_inflight(ts); out->user_size = size; if( out->cong_wnd < out->mss ) { - ci_assert( ci_ip_queue_not_empty(&ts->retrans) ); + ci_assert(ci_ip_queue_not_empty(&ts->retrans)); rc = ONLOAD_DELEGATED_SEND_RC_NOCWIN; /* We allow user to violate congestion window, and intentionally fill * in the headers in this case. */ } if( out->send_wnd <= 0 ) { out->send_wnd = 0; - rc = ONLOAD_DELEGATED_SEND_RC_NOWIN; + rc = ONLOAD_DELEGATED_SEND_RC_NOWIN; goto unlock_out; } rc1 = ci_tcp_ds_fill_headers(ni, ts, flags, out->headers, &out->headers_len, - &out->ip_tcp_hdr_len, - &out->tcp_seq_offset, &out->ip_len_offset); + &out->ip_tcp_hdr_len, &out->tcp_seq_offset, &out->ip_len_offset); if( rc1 != ONLOAD_DELEGATED_SEND_RC_OK ) { rc = rc1; goto unlock_out; @@ -2256,18 +2237,18 @@ citp_tcp_ds_prepare(citp_fdinfo* fdi, int size, unsigned flags, /* Tell TCP state to be ready for ACKs from future */ ts->snd_delegated = CI_MIN(size, out->send_wnd); - unlock_out: +unlock_out: ci_netif_unlock(ni); return rc; } -int citp_tcp_ds_complete(citp_fdinfo* fdi, const ci_iovec *iov, int iovlen, - int flags) +int citp_tcp_ds_complete( + citp_fdinfo* fdi, const ci_iovec* iov, int iovlen, int flags) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - ci_netif* ni = epi->sock.netif; - ci_sock_cmn* s = epi->sock.s; - int rc; + ci_netif* ni = epi->sock.netif; + ci_sock_cmn* s = epi->sock.s; + int rc; if( (~s->b.state & CI_TCP_STATE_TCP) || s->b.state == CI_TCP_LISTEN ) { errno = EINVAL; @@ -2278,7 +2259,7 @@ int citp_tcp_ds_complete(citp_fdinfo* fdi, const ci_iovec *iov, int iovlen, if( rc == -1 && errno == EPIPE && ! (flags & MSG_NOSIGNAL) ) { oo_resource_op(ci_netif_get_driver_handle(epi->sock.netif), - OO_IOC_KILL_SELF_SIGPIPE, NULL); + OO_IOC_KILL_SELF_SIGPIPE, NULL); } return rc; } @@ -2286,7 +2267,7 @@ int citp_tcp_ds_complete(citp_fdinfo* fdi, const ci_iovec *iov, int iovlen, int citp_tcp_ds_cancel(citp_fdinfo* fdi) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - ci_sock_cmn* s = epi->sock.s; + ci_sock_cmn* s = epi->sock.s; if( (~s->b.state & CI_TCP_STATE_TCP) || s->b.state == CI_TCP_LISTEN ) { errno = ENOTTY; @@ -2298,52 +2279,50 @@ int citp_tcp_ds_cancel(citp_fdinfo* fdi) } -citp_protocol_impl citp_tcp_protocol_impl = { - .type = CITP_TCP_SOCKET, - .ops = { - .socket = citp_tcp_socket, +citp_protocol_impl citp_tcp_protocol_impl = { .type = CITP_TCP_SOCKET, + .ops = { + .socket = citp_tcp_socket, #if CI_CFG_FD_CACHING - .close = citp_tcp_close, + .close = citp_tcp_close, #endif - .dtor = citp_tcp_dtor, - .dup = citp_tcp_dup, - .bind = citp_tcp_bind, - .listen = citp_tcp_listen, - .accept = citp_tcp_accept, - .connect = citp_tcp_connect, - .shutdown = citp_tcp_shutdown, - .getsockname = citp_tcp_getsockname, - .getpeername = citp_tcp_getpeername, - .getsockopt = citp_tcp_getsockopt, - .setsockopt = citp_tcp_setsockopt, - .recv = citp_tcp_recv, - .recvmmsg = citp_tcp_recvmmsg, - .send = citp_tcp_send, - .sendmmsg = citp_tcp_sendmmsg, - .fcntl = citp_tcp_fcntl, - .ioctl = citp_tcp_ioctl, - .select = citp_tcp_select, - .poll = citp_tcp_poll, - .epoll = citp_tcp_epoll, - .sleep_seq = citp_sock_sleep_seq, - .zc_send = citp_tcp_zc_send, - .zc_recv = citp_tcp_zc_recv, - .zc_recv_filter = citp_tcp_zc_recv_filter, - .recvmsg_kernel = citp_tcp_recvmsg_kernel, - .tmpl_alloc = citp_tcp_tmpl_alloc, - .tmpl_update = citp_tcp_tmpl_update, - .tmpl_abort = citp_tcp_tmpl_abort, + .dtor = citp_tcp_dtor, + .dup = citp_tcp_dup, + .bind = citp_tcp_bind, + .listen = citp_tcp_listen, + .accept = citp_tcp_accept, + .connect = citp_tcp_connect, + .shutdown = citp_tcp_shutdown, + .getsockname = citp_tcp_getsockname, + .getpeername = citp_tcp_getpeername, + .getsockopt = citp_tcp_getsockopt, + .setsockopt = citp_tcp_setsockopt, + .recv = citp_tcp_recv, + .recvmmsg = citp_tcp_recvmmsg, + .send = citp_tcp_send, + .sendmmsg = citp_tcp_sendmmsg, + .fcntl = citp_tcp_fcntl, + .ioctl = citp_tcp_ioctl, + .select = citp_tcp_select, + .poll = citp_tcp_poll, + .epoll = citp_tcp_epoll, + .sleep_seq = citp_sock_sleep_seq, + .zc_send = citp_tcp_zc_send, + .zc_recv = citp_tcp_zc_recv, + .zc_recv_filter = citp_tcp_zc_recv_filter, + .recvmsg_kernel = citp_tcp_recvmsg_kernel, + .tmpl_alloc = citp_tcp_tmpl_alloc, + .tmpl_update = citp_tcp_tmpl_update, + .tmpl_abort = citp_tcp_tmpl_abort, #if CI_CFG_TIMESTAMPING - .ordered_data = citp_tcp_ordered_data, + .ordered_data = citp_tcp_ordered_data, #endif - .is_spinning = citp_sock_is_spinning, + .is_spinning = citp_sock_is_spinning, #if CI_CFG_FD_CACHING - .cache = citp_tcp_cache, + .cache = citp_tcp_cache, #endif - .dsend_prepare = citp_tcp_ds_prepare, - .dsend_complete = citp_tcp_ds_complete, - .dsend_cancel = citp_tcp_ds_cancel, - } -}; + .dsend_prepare = citp_tcp_ds_prepare, + .dsend_complete = citp_tcp_ds_complete, + .dsend_cancel = citp_tcp_ds_cancel, + } }; /*! \cidoxg_end */ diff --git a/src/lib/transport/unix/udp_fd.c b/src/lib/transport/unix/udp_fd.c index a69c82ad7..25785e3a9 100644 --- a/src/lib/transport/unix/udp_fd.c +++ b/src/lib/transport/unix/udp_fd.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2004-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author djr/ctk/stg ** \brief Sockets interface to user level UDP @@ -25,10 +25,10 @@ #define VERB(x) Log_VTC(x) -#define LPF "citp_udp_" +#define LPF "citp_udp_" #ifndef MSG_CONFIRM -#define MSG_CONFIRM 0 /* so we never see it in our flags */ +#define MSG_CONFIRM 0 /* so we never see it in our flags */ #endif @@ -37,35 +37,35 @@ */ #ifndef NDEBUG -ci_inline char * __decode_flags(int fl) +ci_inline char* __decode_flags(int fl) { static char buf[32]; - char * t = buf; + char* t = buf; size_t n = sizeof(buf); *buf = 0; - if( fl & MSG_OOB) { + if( fl & MSG_OOB ) { t += snprintf(t, n, "OOB "); n = t - buf; } - if( fl & MSG_PEEK) { + if( fl & MSG_PEEK ) { t += snprintf(t, n, "PEEK "); n = t - buf; } - if( fl & MSG_NOSIGNAL) { + if( fl & MSG_NOSIGNAL ) { t += snprintf(t, n, "NSIG "); n = t - buf; } - if( fl & MSG_TRUNC) { + if( fl & MSG_TRUNC ) { t += snprintf(t, n, "TRNC "); n = t - buf; } - if( fl & MSG_DONTWAIT) { + if( fl & MSG_DONTWAIT ) { t += snprintf(t, n, "NWT "); n = t - buf; } - if( fl & MSG_WAITALL) { + if( fl & MSG_WAITALL ) { t += snprintf(t, n, "WALL "); n = t - buf; } @@ -86,7 +86,7 @@ static int citp_udp_socket(int domain, int type, int protocol) ci_netif* ni; int /*bool*/ orderly_handover = CI_FALSE; - Log_V(log(LPF "socket(%d, %d, %d)", domain, type, protocol)); + LOG_UC(log(LPF "socket(%d, %d, %d)", domain, type, protocol)); epi = CI_ALLOC_OBJ(citp_sock_fdi); if( ! epi ) { @@ -108,17 +108,20 @@ static int citp_udp_socket(int domain, int type, int protocol) } /* Protect the fdtable entry until we're done initialising. */ - if( fdtable_strict() ) CITP_FDTABLE_LOCK(); - if((fd = ci_udp_ep_ctor(&epi->sock, ni, domain, type)) < 0) { + if( fdtable_strict() ) + CITP_FDTABLE_LOCK(); + if( (fd = ci_udp_ep_ctor(&epi->sock, ni, domain, type)) < 0 ) { /*! ?? \TODO unpick the ci_udp_ep_ctor according to how failed */ - if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); Log_U(ci_log(LPF "socket: udp_ep_ctor failed")); errno = -fd; goto fail3; } citp_fdtable_new_fd_set(fd, fdip_busy, fdtable_strict()); - if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); + if( fdtable_strict() ) + CITP_FDTABLE_UNLOCK(); CI_DEBUG(epi->sock.s->pid = getpid()); @@ -127,23 +130,23 @@ static int citp_udp_socket(int domain, int type, int protocol) ci_atomic32_and(&epi->sock.s->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY); citp_fdtable_insert(fdi, fd, 0); - Log_VSS(log(LPF "socket(%d, %d, %d) = "EF_FMT, domain, type, protocol, - EF_PRI_ARGS(epi,fd))); + Log_VSS(log(LPF "socket(%d, %d, %d) = " EF_FMT, domain, type, protocol, + EF_PRI_ARGS(epi, fd))); return fd; - fail3: +fail3: if( (CITP_OPTS.no_fail || orderly_handover) && errno != ELIBACC ) CITP_STATS_NETIF(++ni->state->stats.udp_handover_socket); citp_netif_release_ref(ni, 0); - fail2: +fail2: CI_FREE_OBJ(epi); - fail1: +fail1: /* BUG1408: Graceful failure. We'll only fail outright if there's a * driver/library mismatch */ if( (CITP_OPTS.no_fail || orderly_handover) && errno != ELIBACC ) { if( ! orderly_handover ) - Log_U(ci_log("%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, - errno)); + Log_U(ci_log( + "%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, errno)); return CI_SOCKET_HANDOVER; } return -1; @@ -156,10 +159,10 @@ static void citp_udp_dtor(citp_fdinfo* fdinfo, int fdt_locked) } -static int citp_udp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, - socklen_t sa_len) +static int citp_udp_bind( + citp_fdinfo* fdinfo, const struct sockaddr* sa, socklen_t sa_len) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); citp_socket* ep = &epi->sock; ci_sock_cmn* s = ep->s; ci_uint16 lport; @@ -169,11 +172,11 @@ static int citp_udp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, if( sa != NULL ) Log_V(log("%s: Bind to port = %d", __FUNCTION__, - ntohs(((struct sockaddr_in*)sa)->sin_port))); + ntohs(((struct sockaddr_in*) sa)->sin_port))); /* There should be address length check before address family validation to * match Linux errno value set in inet6_bind(). */ - if (s->domain == PF_INET6 && sa_len < SIN6_LEN_RFC2133) { + if( s->domain == PF_INET6 && sa_len < SIN6_LEN_RFC2133 ) { CI_SET_ERROR(rc, EINVAL); goto done; } @@ -204,16 +207,15 @@ static int citp_udp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, #if CI_CFG_ENDPOINT_MOVE /* multicast sockets do not do clustering */ - if( (s->s_flags & CI_SOCK_FLAG_REUSEPORT) != 0 && - CI_SOCK_NOT_BOUND(s) && + if( (s->s_flags & CI_SOCK_FLAG_REUSEPORT) != 0 && CI_SOCK_NOT_BOUND(s) && ! CI_IPX_IS_MULTICAST(ci_get_addr(sa)) ) { - if( (rc = ci_udp_reuseport_bind(ep, fdinfo->fd, sa, sa_len, lport)) == 0 ) { + if( (rc = ci_udp_reuseport_bind(ep, fdinfo->fd, sa, sa_len, lport)) == + 0 ) { /* The socket has moved so need to reprobe the fd. This will also * map the the new stack into user space of the executing process. */ - fdinfo = citp_reprobe_moved(fdinfo, - CI_FALSE/* ! from_fast_lookup */, - CI_FALSE/* ! fdip_is_busy */); + fdinfo = citp_reprobe_moved(fdinfo, CI_FALSE /* ! from_fast_lookup */, + CI_FALSE /* ! fdip_is_busy */); /* We want to prefault the packets for the new clustered stack. This * is only needed if we successfully reprobed a valid fd. This might * not happen if the fd has been closed or re-used under our feet. @@ -227,13 +229,11 @@ static int citp_udp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, ep = &epi->sock; UDP_SET_FLAG(SOCK_TO_UDP(ep->s), CI_UDPF_FILTERED); ci_netif_cluster_prefault(ep->netif); - } - else { + } else { CI_SET_ERROR(rc, EBADF); goto done; } - } - else { + } else { goto done; } } @@ -243,27 +243,35 @@ static int citp_udp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa, rc = ci_udp_bind_conclude(ep, sa, sa_len, lport); ci_netif_unlock_fdi(epi); - done: + // This will happen when we try to set userland filters that have already + // been set We currently don't support that so we have to return that the + // Address is in use regardless of any flags set on the socket + if( rc == CI_SOCKET_ERROR ) { + errno = EADDRINUSE; + } + +done: if( rc == CI_SOCKET_HANDOVER ) { CITP_STATS_NETIF(++epi->sock.netif->state->stats.udp_handover_bind); citp_fdinfo_handover(fdinfo, -1); return 0; } - if( fdinfo ) - citp_fdinfo_release_ref( fdinfo, 0 ); + if( fdinfo ) { + citp_fdinfo_release_ref(fdinfo, 0); + } + return rc; } -static int citp_udp_connect(citp_fdinfo* fdinfo, - const struct sockaddr* sa, socklen_t sa_len, - citp_lib_context_t* lib_context) +static int citp_udp_connect(citp_fdinfo* fdinfo, const struct sockaddr* sa, + socklen_t sa_len, citp_lib_context_t* lib_context) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); int rc; - Log_V(log(LPF "connect(%d, sa, %d)", fdinfo->fd, sa_len)); + LOG_UC(log(LPF "connect(%d, sa, %d)", fdinfo->fd, sa_len)); ci_netif_lock_fdi(epi); rc = ci_udp_connect(&epi->sock, fdinfo->fd, sa, sa_len); @@ -275,45 +283,46 @@ static int citp_udp_connect(citp_fdinfo* fdinfo, return 0; } - citp_fdinfo_release_ref( fdinfo, 0 ); + citp_fdinfo_release_ref(fdinfo, 0); return rc; } static int citp_udp_shutdown(citp_fdinfo* fdinfo, int how) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); int rc; - Log_V(ci_log("%s("EF_FMT", %d)", __FUNCTION__, EF_PRI_ARGS(epi,fdinfo->fd), how)); + LOG_UC(ci_log( + "%s(" EF_FMT ", %d)", __FUNCTION__, EF_PRI_ARGS(epi, fdinfo->fd), how)); ci_netif_lock_fdi(epi); rc = ci_udp_shutdown(&epi->sock, fdinfo->fd, how); ci_netif_unlock_fdi(epi); - Log_V(log(LPF "shutdown: fd=%d rc=%d", fdinfo->fd, rc)); + LOG_UC(log(LPF "shutdown: fd=%d rc=%d", fdinfo->fd, rc)); return rc; } -static int citp_udp_getsockname(citp_fdinfo* fdinfo, - struct sockaddr* sa, socklen_t* p_sa_len) +static int citp_udp_getsockname( + citp_fdinfo* fdinfo, struct sockaddr* sa, socklen_t* p_sa_len) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); - Log_VSC(log(LPF "getsockname("EF_FMT")", EF_PRI_ARGS(epi, fdinfo->fd))); + Log_VSC(log(LPF "getsockname(" EF_FMT ")", EF_PRI_ARGS(epi, fdinfo->fd))); __citp_getsockname(epi->sock.s, sa, p_sa_len); return 0; } -static int citp_udp_getpeername(citp_fdinfo* fdinfo, - struct sockaddr* sa, socklen_t* p_sa_len) +static int citp_udp_getpeername( + citp_fdinfo* fdinfo, struct sockaddr* sa, socklen_t* p_sa_len) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); int rc; - Log_V(log("%s("EF_FMT")", __FUNCTION__, EF_PRI_ARGS(epi,fdinfo->fd))); + Log_V(log("%s(" EF_FMT ")", __FUNCTION__, EF_PRI_ARGS(epi, fdinfo->fd))); ci_netif_lock_fdi(epi); rc = ci_udp_getpeername(&epi->sock, sa, p_sa_len); @@ -322,33 +331,33 @@ static int citp_udp_getpeername(citp_fdinfo* fdinfo, } -static int citp_udp_getsockopt(citp_fdinfo* fdinfo, int level, - int optname, void* optval, socklen_t* optlen) +static int citp_udp_getsockopt(citp_fdinfo* fdinfo, int level, int optname, + void* optval, socklen_t* optlen) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); int rc; - Log_V(log("%s("EF_FMT", %d, %d)", __FUNCTION__, EF_PRI_ARGS(epi,fdinfo->fd), - level, optname )); + LOG_UC(log("%s(" EF_FMT ", %d, %d)", __FUNCTION__, + EF_PRI_ARGS(epi, fdinfo->fd), level, optname)); ci_netif_lock_fdi(epi); - rc = ci_udp_getsockopt(&epi->sock, fdinfo->fd, - level, optname, optval, optlen); + rc = ci_udp_getsockopt( + &epi->sock, fdinfo->fd, level, optname, optval, optlen); ci_netif_unlock_fdi(epi); return rc; } -static int citp_udp_setsockopt(citp_fdinfo* fdinfo, int level, - int optname, const void* optval, socklen_t optlen) +static int citp_udp_setsockopt(citp_fdinfo* fdinfo, int level, int optname, + const void* optval, socklen_t optlen) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); - citp_socket* ep = &epi->sock; - ci_sock_cmn* s = ep->s; + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); + citp_socket* ep = &epi->sock; + ci_sock_cmn* s = ep->s; int rc; - Log_VSC(log("%s("EF_FMT", %d, %d)", __FUNCTION__, - EF_PRI_ARGS(epi, fdinfo->fd), level, optname)); + LOG_UC(log("%s(" EF_FMT ", %d, %d)", __FUNCTION__, + EF_PRI_ARGS(epi, fdinfo->fd), level, optname)); if( ci_opt_is_setting_reuseport(level, optname, optval, optlen) != 0 && ! CI_SOCK_NOT_BOUND(s) ) { @@ -356,15 +365,15 @@ static int citp_udp_setsockopt(citp_fdinfo* fdinfo, int level, CITP_STATS_NETIF(++epi->sock.netif->state->stats.udp_handover_setsockopt); citp_fdinfo_handover(fdinfo, -1); NI_LOG(epi->sock.netif, USAGE_WARNINGS, - "%s: setting reuseport after binding on udp not supported", - __FUNCTION__); + "%s: setting reuseport after binding on udp not supported", + __FUNCTION__); return ci_sys_setsockopt(fd, level, optname, optval, optlen); } - rc = ci_udp_setsockopt(&epi->sock, fdinfo->fd, - level, optname, optval, optlen); + rc = ci_udp_setsockopt( + &epi->sock, fdinfo->fd, level, optname, optval, optlen); - Log_V(log(LPF "setsockopt: fd=%d rc=%d", fdinfo->fd, rc)); + LOG_UC(log(LPF "setsockopt: fd=%d rc=%d", fdinfo->fd, rc)); if( rc == CI_SOCKET_HANDOVER ) { CITP_STATS_NETIF(++epi->sock.netif->state->stats.udp_handover_setsockopt); @@ -376,15 +385,14 @@ static int citp_udp_setsockopt(citp_fdinfo* fdinfo, int level, return rc; } -static int citp_udp_recvmmsg(citp_fdinfo* fdinfo, struct mmsghdr* msg, - unsigned vlen, int flags, - ci_recvmmsg_timespec* timeout) +static int citp_udp_recvmmsg(citp_fdinfo* fdinfo, struct mmsghdr* msg, + unsigned vlen, int flags, ci_recvmmsg_timespec* timeout) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); ci_udp_iomsg_args a; - Log_V(log(LPF "recvmmsg(%d, msg, %u, %#x)", fdinfo->fd, vlen, - (unsigned) flags)); + Log_V(log( + LPF "recvmmsg(%d, msg, %u, %#x)", fdinfo->fd, vlen, (unsigned) flags)); a.fd = fdinfo->fd; a.ep = &epi->sock; @@ -406,14 +414,14 @@ static int citp_udp_recv(citp_fdinfo* fdinfo, struct msghdr* msg, int flags) a.ni = epi->sock.netif; a.us = SOCK_TO_UDP(epi->sock.s); - return ci_udp_recvmsg( &a, msg, flags); + return ci_udp_recvmsg(&a, msg, flags); } -static int citp_udp_send(citp_fdinfo* fdinfo, const struct msghdr * msg, - int flags) +static int citp_udp_send( + citp_fdinfo* fdinfo, const struct msghdr* msg, int flags) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); ci_udp_iomsg_args a; int rc; @@ -425,10 +433,9 @@ static int citp_udp_send(citp_fdinfo* fdinfo, const struct msghdr * msg, a.us = SOCK_TO_UDP(epi->sock.s); /* NB. msg_name[len] validated in ci_udp_sendmsg(). */ - if(CI_LIKELY( msg->msg_iov != NULL || msg->msg_iovlen == 0 )) { - rc = ci_udp_sendmsg( &a, msg, flags); - } - else { + if( CI_LIKELY(msg->msg_iov != NULL || msg->msg_iovlen == 0) ) { + rc = ci_udp_sendmsg(&a, msg, flags); + } else { rc = -1; errno = EFAULT; } @@ -436,17 +443,17 @@ static int citp_udp_send(citp_fdinfo* fdinfo, const struct msghdr * msg, } -static int citp_udp_sendmmsg(citp_fdinfo* fdinfo, struct mmsghdr* mmsg, - unsigned vlen, int flags) +static int citp_udp_sendmmsg( + citp_fdinfo* fdinfo, struct mmsghdr* mmsg, unsigned vlen, int flags) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); ci_udp_iomsg_args a; int i, rc; - Log_V(log(LPF "sendmmsg(%d, msg, %u, %#x)", fdinfo->fd, vlen, - (unsigned) flags)); + Log_V(log( + LPF "sendmmsg(%d, msg, %u, %#x)", fdinfo->fd, vlen, (unsigned) flags)); - if( vlen == 0 ) + if( vlen == 0 ) return 0; a.ep = &epi->sock; @@ -458,31 +465,30 @@ static int citp_udp_sendmmsg(citp_fdinfo* fdinfo, struct mmsghdr* mmsg, do { rc = ci_udp_sendmsg(&a, &mmsg[i].msg_hdr, flags); - if(CI_LIKELY( rc >= 0 ) ) + if( CI_LIKELY(rc >= 0) ) mmsg[i].msg_len = rc; ++i; } while( rc >= 0 && i < vlen ); - return (rc>=0) ? i : rc; + return (rc >= 0) ? i : rc; } static int citp_udp_fcntl(citp_fdinfo* fdinfo, int cmd, long arg) { - return citp_sock_fcntl(fdi_to_sock_fdi(fdinfo), - fdinfo->fd, cmd, arg); + return citp_sock_fcntl(fdi_to_sock_fdi(fdinfo), fdinfo->fd, cmd, arg); } static int citp_udp_ioctl(citp_fdinfo* fdinfo, int request, void* arg) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo); int rc; - Log_V(log("%s("EF_FMT", %d, 0x%lx)", __FUNCTION__, - EF_PRI_ARGS(epi, fdinfo->fd), request, (long) arg)); + Log_U(log("%s(" EF_FMT ", %d, 0x%lx)", __FUNCTION__, + EF_PRI_ARGS(epi, fdinfo->fd), request, (long) arg)); rc = ci_udp_ioctl(&epi->sock, fdinfo->fd, request, arg); - Log_V(log(LPF "ioctl()=%d", rc)); + Log_U(log(LPF "ioctl()=%d", rc)); if( rc < 0 ) CI_SET_ERROR(rc, -rc); return rc; @@ -490,7 +496,7 @@ static int citp_udp_ioctl(citp_fdinfo* fdinfo, int request, void* arg) static int citp_udp_select(citp_fdinfo* fdi, int* n, int rd, int wr, int ex, - struct oo_ul_select_state*__restrict__ ss) + struct oo_ul_select_state* __restrict__ ss) { citp_sock_fdi* epi; ci_udp_state* us; @@ -530,11 +536,10 @@ static int citp_udp_select(citp_fdinfo* fdi, int* n, int rd, int wr, int ex, } -static int citp_udp_poll(citp_fdinfo*__restrict__ fdi, - struct pollfd*__restrict__ pfd, - struct oo_ul_poll_state*__restrict__ ps) +static int citp_udp_poll(citp_fdinfo* __restrict__ fdi, + struct pollfd* __restrict__ pfd, struct oo_ul_poll_state* __restrict__ ps) { - citp_sock_fdi *epi = fdi_to_sock_fdi(fdi); + citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); ci_udp_state* us = SOCK_TO_UDP(epi->sock.s); ci_netif* ni = epi->sock.netif; unsigned mask; @@ -555,13 +560,11 @@ static int citp_udp_poll(citp_fdinfo*__restrict__ fdi, } - #include "ul_epoll.h" /* More-or-less copy of citp_udp_poll */ -static int citp_udp_epoll(citp_fdinfo*__restrict__ fdi, - struct citp_epoll_member*__restrict__ eitem, - struct oo_ul_epoll_state*__restrict__ eps, - int* stored_event) +static int citp_udp_epoll(citp_fdinfo* __restrict__ fdi, + struct citp_epoll_member* __restrict__ eitem, + struct oo_ul_epoll_state* __restrict__ eps, int* stored_event) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); ci_udp_state* us = SOCK_TO_UDP(epi->sock.s); @@ -580,21 +583,19 @@ static int citp_udp_epoll(citp_fdinfo*__restrict__ fdi, /* Try to return a result without polling if we can. */ sleep_seq = us->s.b.sleep_seq.all; mask = ci_udp_poll_events(ni, us); - *stored_event = citp_ul_epoll_set_ul_events(eps, eitem, mask, sleep_seq, - &us->s.b.sleep_seq.all, - &seq_mismatch); - if( (*stored_event == 0) && !eps->ordering_info ) + *stored_event = citp_ul_epoll_set_ul_events( + eps, eitem, mask, sleep_seq, &us->s.b.sleep_seq.all, &seq_mismatch); + if( (*stored_event == 0) && ! eps->ordering_info ) if( citp_poll_if_needed(ni, eps->this_poll_frc, eps->ul_epoll_spin) ) { sleep_seq = us->s.b.sleep_seq.all; mask = ci_udp_poll_events(ni, us); seq_mismatch = 0; - *stored_event = citp_ul_epoll_set_ul_events(eps, eitem, mask, sleep_seq, - &us->s.b.sleep_seq.all, - &seq_mismatch); + *stored_event = citp_ul_epoll_set_ul_events( + eps, eitem, mask, sleep_seq, &us->s.b.sleep_seq.all, &seq_mismatch); } /* We shouldn't have stored an event if there was a mismatch */ - ci_assert( !(seq_mismatch == 1 && *stored_event == 1) ); + ci_assert(! (seq_mismatch == 1 && *stored_event == 1)); return seq_mismatch; } @@ -602,22 +603,20 @@ static int citp_udp_epoll(citp_fdinfo*__restrict__ fdi, static int citp_udp_listen(citp_fdinfo* fdinfo, int backlog) { Log_V(log(LPF "listen: not supported by dg protocol")); - citp_fdinfo_release_ref( fdinfo, 0 ); - RET_WITH_ERRNO( EOPNOTSUPP ); + citp_fdinfo_release_ref(fdinfo, 0); + RET_WITH_ERRNO(EOPNOTSUPP); } -static int citp_udp_accept(citp_fdinfo* fdinfo, - struct sockaddr* sa, socklen_t* p_sa_len, - int flags, - citp_lib_context_t* lib_context) +static int citp_udp_accept(citp_fdinfo* fdinfo, struct sockaddr* sa, + socklen_t* p_sa_len, int flags, citp_lib_context_t* lib_context) { Log_V(log(LPF "accept: not supported by dg protocol")); - RET_WITH_ERRNO( EOPNOTSUPP ); + RET_WITH_ERRNO(EOPNOTSUPP); } -static int citp_udp_zc_send(citp_fdinfo* fdi, struct onload_zc_mmsg* msg, - int flags) +static int citp_udp_zc_send( + citp_fdinfo* fdi, struct onload_zc_mmsg* msg, int flags) { msg->rc = -EOPNOTSUPP; return 1; @@ -629,7 +628,7 @@ static int citp_udp_zc_recv(citp_fdinfo* fdi, struct onload_zc_recv_args* args) citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); ci_udp_iomsg_args a; - if( args->flags & ~ONLOAD_ZC_RECV_FLAGS_MASK ) + if( args->flags & ~ONLOAD_ZC_RECV_FLAGS_MASK ) return -EINVAL; a.fd = fdi->fd; @@ -642,8 +641,7 @@ static int citp_udp_zc_recv(citp_fdinfo* fdi, struct onload_zc_recv_args* args) static int citp_udp_zc_recv_filter(citp_fdinfo* fdi, - onload_zc_recv_filter_callback filter, - void* cb_arg, int flags) + onload_zc_recv_filter_callback filter, void* cb_arg, int flags) { #if CI_CFG_ZC_RECV_FILTER citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); @@ -652,8 +650,8 @@ static int citp_udp_zc_recv_filter(citp_fdinfo* fdi, /* flags not yet used */ ci_assert_equal(flags, 0); - us->recv_q_filter = (ci_uintptr_t)filter; - us->recv_q_filter_arg = (ci_uintptr_t)cb_arg; + us->recv_q_filter = (ci_uintptr_t) filter; + us->recv_q_filter_arg = (ci_uintptr_t) cb_arg; return 0; #else return -ENOSYS; @@ -661,28 +659,26 @@ static int citp_udp_zc_recv_filter(citp_fdinfo* fdi, } -static int citp_udp_recvmsg_kernel(citp_fdinfo* fdi, struct msghdr* msg, - int flags) +static int citp_udp_recvmsg_kernel( + citp_fdinfo* fdi, struct msghdr* msg, int flags) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); - return ci_udp_recvmsg_kernel(fdi->fd, epi->sock.netif, - SOCK_TO_UDP(epi->sock.s), - msg, flags); + return ci_udp_recvmsg_kernel( + fdi->fd, epi->sock.netif, SOCK_TO_UDP(epi->sock.s), msg, flags); } int citp_udp_tmpl_alloc(citp_fdinfo* fdi, const struct iovec* initial_msg, - int mlen, struct oo_msg_template** omt_pp, - unsigned flags) + int mlen, struct oo_msg_template** omt_pp, unsigned flags) { return -EOPNOTSUPP; } int citp_udp_tmpl_update(citp_fdinfo* fdi, struct oo_msg_template* omt, - const struct onload_template_msg_update_iovec* updates, - int ulen, unsigned flags) + const struct onload_template_msg_update_iovec* updates, int ulen, + unsigned flags) { return -EOPNOTSUPP; } @@ -695,9 +691,8 @@ int citp_udp_tmpl_abort(citp_fdinfo* fdi, struct oo_msg_template* omt) #if CI_CFG_TIMESTAMPING -static int -citp_udp_ordered_data(citp_fdinfo* fdi, struct timespec* limit, - struct timespec* next_out, int* bytes_out) +static int citp_udp_ordered_data(citp_fdinfo* fdi, struct timespec* limit, + struct timespec* next_out, int* bytes_out) { citp_sock_fdi* epi = fdi_to_sock_fdi(fdi); ci_udp_state* us = SOCK_TO_UDP(epi->sock.s); @@ -710,72 +705,68 @@ citp_udp_ordered_data(citp_fdinfo* fdi, struct timespec* limit, if( (pkt = ci_udp_recv_q_get(epi->sock.netif, &us->recv_q)) == NULL ) { ci_sock_unlock(epi->sock.netif, &us->s.b); return 0; - } + } do { struct timespec stamp; - ci_rx_pkt_timespec(pkt, &stamp, - NI_OPTS(epi->sock.netif).rx_timestamping_ordering); + ci_rx_pkt_timespec( + pkt, &stamp, NI_OPTS(epi->sock.netif).rx_timestamping_ordering); if( citp_timespec_compare(&stamp, limit) < 1 ) { /* We have data before the limit, add on the number of readable bytes. */ *bytes_out += pkt->pf.udp.pay_len; - } - else { + } else { /* We have more data, but it's after the limit. Set the next data * limit here, and stop. */ *next_out = stamp; break; } - } - while( (pkt = ci_udp_recv_q_next(epi->sock.netif, pkt)) != NULL ); + } while( (pkt = ci_udp_recv_q_next(epi->sock.netif, pkt)) != NULL ); ci_sock_unlock(epi->sock.netif, &us->s.b); return 1; } #endif -citp_protocol_impl citp_udp_protocol_impl = { - .type = CITP_UDP_SOCKET, - .ops = { - .socket = citp_udp_socket, - .dtor = citp_udp_dtor, - .dup = citp_tcp_dup, - .bind = citp_udp_bind, - .listen = citp_udp_listen, - .accept = citp_udp_accept, - .connect = citp_udp_connect, - .shutdown = citp_udp_shutdown, - .getsockname = citp_udp_getsockname, - .getpeername = citp_udp_getpeername, - .getsockopt = citp_udp_getsockopt, - .setsockopt = citp_udp_setsockopt, - .recv = citp_udp_recv, - .recvmmsg = citp_udp_recvmmsg, - .send = citp_udp_send, - .sendmmsg = citp_udp_sendmmsg, - .fcntl = citp_udp_fcntl, - .ioctl = citp_udp_ioctl, - .select = citp_udp_select, - .poll = citp_udp_poll, - .epoll = citp_udp_epoll, - .sleep_seq = citp_sock_sleep_seq, - .zc_send = citp_udp_zc_send, - .zc_recv = citp_udp_zc_recv, - .zc_recv_filter = citp_udp_zc_recv_filter, - .recvmsg_kernel = citp_udp_recvmsg_kernel, - .tmpl_alloc = citp_udp_tmpl_alloc, - .tmpl_update = citp_udp_tmpl_update, - .tmpl_abort = citp_udp_tmpl_abort, +citp_protocol_impl citp_udp_protocol_impl = { .type = CITP_UDP_SOCKET, + .ops = { + .socket = citp_udp_socket, + .dtor = citp_udp_dtor, + .dup = citp_tcp_dup, + .bind = citp_udp_bind, + .listen = citp_udp_listen, + .accept = citp_udp_accept, + .connect = citp_udp_connect, + .shutdown = citp_udp_shutdown, + .getsockname = citp_udp_getsockname, + .getpeername = citp_udp_getpeername, + .getsockopt = citp_udp_getsockopt, + .setsockopt = citp_udp_setsockopt, + .recv = citp_udp_recv, + .recvmmsg = citp_udp_recvmmsg, + .send = citp_udp_send, + .sendmmsg = citp_udp_sendmmsg, + .fcntl = citp_udp_fcntl, + .ioctl = citp_udp_ioctl, + .select = citp_udp_select, + .poll = citp_udp_poll, + .epoll = citp_udp_epoll, + .sleep_seq = citp_sock_sleep_seq, + .zc_send = citp_udp_zc_send, + .zc_recv = citp_udp_zc_recv, + .zc_recv_filter = citp_udp_zc_recv_filter, + .recvmsg_kernel = citp_udp_recvmsg_kernel, + .tmpl_alloc = citp_udp_tmpl_alloc, + .tmpl_update = citp_udp_tmpl_update, + .tmpl_abort = citp_udp_tmpl_abort, #if CI_CFG_TIMESTAMPING - .ordered_data = citp_udp_ordered_data, + .ordered_data = citp_udp_ordered_data, #endif - .is_spinning = citp_sock_is_spinning, + .is_spinning = citp_sock_is_spinning, #if CI_CFG_FD_CACHING - .cache = citp_nonsock_cache, + .cache = citp_nonsock_cache, #endif - } -}; + } }; /*! \cidoxg_end */ diff --git a/src/lib/transport/unix/zc_intercept.c b/src/lib/transport/unix/zc_intercept.c index 24aa81e7d..f51747041 100644 --- a/src/lib/transport/unix/zc_intercept.c +++ b/src/lib/transport/unix/zc_intercept.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* X-SPDX-Copyright-Text: (c) Copyright 2011-2020 Xilinx, Inc. */ /**************************************************************************\ -*//*! \file + *//*! \file ** ** \author kjm ** \brief Intercept of zero-copy API calls @@ -34,29 +34,29 @@ static int fd_to_stack(int fd, ci_netif** pni, citp_fdinfo** pfdi) int rc; citp_sock_fdi* epi; citp_fdinfo* fdi = citp_fdtable_lookup(fd); - if( ! fdi ) /* Not an Onload socket */ + if( ! fdi ) /* Not an Onload socket */ return -ESOCKTNOSUPPORT; switch( citp_fdinfo_get_type(fdi) ) { - case CITP_UDP_SOCKET: - case CITP_TCP_SOCKET: - epi = fdi_to_sock_fdi(fdi); - *pfdi = fdi; - *pni = epi->sock.netif; - return 0; - case CITP_EPOLL_FD: - rc = -ENOTSOCK; - break; - case CITP_PIPE_FD: - rc = -ENOTSOCK; - break; - case CITP_PASSTHROUGH_FD: - rc = -ESOCKTNOSUPPORT; - break; - default: - LOG_U(log("%s: unknown fdinfo type %d", __FUNCTION__, - citp_fdinfo_get_type(fdi))); - rc = -EINVAL; + case CITP_UDP_SOCKET: + case CITP_TCP_SOCKET: + epi = fdi_to_sock_fdi(fdi); + *pfdi = fdi; + *pni = epi->sock.netif; + return 0; + case CITP_EPOLL_FD: + rc = -ENOTSOCK; + break; + case CITP_PIPE_FD: + rc = -ENOTSOCK; + break; + case CITP_PASSTHROUGH_FD: + rc = -ESOCKTNOSUPPORT; + break; + default: + LOG_U(log("%s: unknown fdinfo type %d", __FUNCTION__, + citp_fdinfo_get_type(fdi))); + rc = -EINVAL; } citp_fdinfo_release_ref(fdi, 0); return rc; @@ -66,7 +66,8 @@ static int fd_to_stack(int fd, ci_netif** pni, citp_fdinfo** pfdi) static bool txqs_have_reached(ci_netif* ni, const uint32_t* dest) { int i; - OO_STACK_FOR_EACH_INTF_I(ni, i) { + OO_STACK_FOR_EACH_INTF_I(ni, i) + { int32_t diff = dest[i] - ni->state->nic[i].tx_dmaq_done_seq; if( diff > 0 ) return false; @@ -91,7 +92,7 @@ int onload_zc_await_stack_sync(int fd) int tries = 0; ci_netif_lock(ni); OO_STACK_FOR_EACH_INTF_I(ni, i) - added[i] = ni->state->nic[i].tx_dmaq_insert_seq; + added[i] = ni->state->nic[i].tx_dmaq_insert_seq; while( ! txqs_have_reached(ni, added) ) { if( ++tries > 100 ) { /* This hack exists for the purpose of coping with NIC reset. The @@ -115,22 +116,20 @@ int onload_zc_await_stack_sync(int fd) } - int onload_zc_alloc_buffers(int fd, struct onload_zc_iovec* iovecs, - int iovecs_len, - enum onload_zc_buffer_type_flags flags) + int iovecs_len, enum onload_zc_buffer_type_flags flags) { int rc = 0, i; citp_lib_context_t lib_context; citp_fdinfo* fdi; citp_sock_fdi* epi; ci_netif* ni; - ci_ip_pkt_fmt *pkt; + ci_ip_pkt_fmt* pkt; unsigned max_len; ci_tcp_state* ts = NULL; - Log_CALL(ci_log("%s(%d, %p, %d, %x)", __FUNCTION__, fd, iovecs, - iovecs_len, flags)); + Log_CALL(ci_log( + "%s(%d, %p, %d, %x)", __FUNCTION__, fd, iovecs, iovecs_len, flags)); citp_enter_lib(&lib_context); @@ -152,42 +151,39 @@ int onload_zc_alloc_buffers(int fd, struct onload_zc_iovec* iovecs, goto out; } /* Make sure this is clear as it affects behaviour when freeing */ - pkt->rx_flags &=~ CI_PKT_RX_FLAG_KEEP; + pkt->rx_flags &= ~CI_PKT_RX_FLAG_KEEP; pkt->user_refcount = CI_ZC_USER_REFCOUNT_ONE; iovecs[i].buf = zc_pktbuf_to_handle(pkt); if( flags & ONLOAD_ZC_BUFFER_HDR_TCP ) { if( ts != NULL ) { oo_tx_pkt_layout_init(pkt); - iovecs[i].iov_base = ((char *)oo_tx_ip_hdr(pkt)) + - ts->outgoing_hdrs_len; + iovecs[i].iov_base = + ((char*) oo_tx_ip_hdr(pkt)) + ts->outgoing_hdrs_len; max_len = tcp_eff_mss(ts); - } - else { + } else { /* Best guess. We can fix it up later. Magic 12 leaves * space for time stamp option (common case) */ oo_tx_pkt_layout_init(pkt); iovecs[i].iov_base = - (uint8_t*) oo_tx_ip_data(pkt) + sizeof(ci_tcp_hdr) + 12; + (uint8_t*) oo_tx_ip_data(pkt) + sizeof(ci_tcp_hdr) + 12; } - } - else if( flags & ONLOAD_ZC_BUFFER_HDR_UDP ) { + } else if( flags & ONLOAD_ZC_BUFFER_HDR_UDP ) { oo_tx_pkt_layout_init(pkt); iovecs[i].iov_base = - (uint8_t*) oo_tx_ip_data(pkt) + sizeof(ci_udp_hdr); - } - else + (uint8_t*) oo_tx_ip_data(pkt) + sizeof(ci_udp_hdr); + } else iovecs[i].iov_base = PKT_START(pkt); - iovecs[i].iov_len = CI_CFG_PKT_BUF_SIZE - - ((char *)iovecs[i].iov_base - (char *)pkt); + iovecs[i].iov_len = + CI_CFG_PKT_BUF_SIZE - ((char*) iovecs[i].iov_base - (char*) pkt); if( iovecs[i].iov_len > max_len ) iovecs[i].iov_len = max_len; } ni->state->n_async_pkts += iovecs_len; - out: + out: ci_netif_unlock(ni); citp_fdinfo_release_ref(fdi, 0); - } + } citp_exit_lib(&lib_context, TRUE); Log_CALL_RESULT(rc); @@ -215,7 +211,7 @@ int onload_zc_release_buffers(int fd, onload_zc_handle* bufs, int bufs_len) pkt = zc_handle_to_pktbuf(bufs[i]); if( pkt->stack_id != ni->state->stack_id ) { LOG_U(log("%s: attempt to free buffer from stack %d to stack %d", - __FUNCTION__, pkt->stack_id, ni->state->stack_id)); + __FUNCTION__, pkt->stack_id, ni->state->stack_id)); rc = -EINVAL; break; } @@ -223,16 +219,16 @@ int onload_zc_release_buffers(int fd, onload_zc_handle* bufs, int bufs_len) if( rc == 0 ) { for( i = 0; i < bufs_len; ++i ) { pkt = zc_handle_to_pktbuf(bufs[i]); - pkt->pio_addr = -1; /* Got reused by user_refcount */ + pkt->pio_addr = -1; /* Got reused by user_refcount */ /* If we are releasing a packet without the RX_FLAG then the user - * allocated and then freed the packet (without using it). - * We detect this to decrement n_asyn_pkts. - * RX packets (kept via ONLOAD_ZC_KEEP) are counted differently - * so don't decrement here. (But may release) - */ + * allocated and then freed the packet (without using it). + * We detect this to decrement n_asyn_pkts. + * RX packets (kept via ONLOAD_ZC_KEEP) are counted differently + * so don't decrement here. (But may release) + */ rx_pkt = pkt->flags & CI_PKT_FLAG_RX; released = ci_netif_pkt_release_check_keep(ni, pkt); - if ( ! rx_pkt ) { + if( ! rx_pkt ) { ci_assert(released == 1); (void) released; --ni->state->n_async_pkts; @@ -241,7 +237,7 @@ int onload_zc_release_buffers(int fd, onload_zc_handle* bufs, int bufs_len) } ci_netif_unlock(ni); citp_fdinfo_release_ref(fdi, 0); - } + } citp_exit_lib(&lib_context, TRUE); Log_CALL_RESULT(rc); @@ -250,8 +246,8 @@ int onload_zc_release_buffers(int fd, onload_zc_handle* bufs, int bufs_len) } -int onload_zc_query_rx_memregs(int fd, struct onload_zc_iovec* iov, - int* iovecs_len, int flags) +int onload_zc_query_rx_memregs( + int fd, struct onload_zc_iovec* iov, int* iovecs_len, int flags) { int rc = 0; unsigned i; @@ -259,8 +255,8 @@ int onload_zc_query_rx_memregs(int fd, struct onload_zc_iovec* iov, citp_fdinfo* fdi; ci_netif* ni; - Log_CALL(ci_log("%s(%d, %p, %p, %d)", - __FUNCTION__, fd, iov, iovecs_len, flags)); + Log_CALL( + ci_log("%s(%d, %p, %p, %d)", __FUNCTION__, fd, iov, iovecs_len, flags)); citp_enter_lib(&lib_context); @@ -322,7 +318,8 @@ int onload_zc_recv(int fd, struct onload_zc_recv_args* args) citp_lib_context_t lib_context; citp_fdinfo* fdi; - Log_CALL(ci_log("%s(%d, %p(flags=%x))", __FUNCTION__, fd, args, args->flags)); + Log_CALL( + ci_log("%s(%d, %p(flags=%x))", __FUNCTION__, fd, args, args->flags)); if( (fdi = citp_fdtable_lookup_fast(&lib_context, fd)) ) { rc = citp_fdinfo_get_ops(fdi)->zc_recv(fdi, args); @@ -338,7 +335,6 @@ int onload_zc_recv(int fd, struct onload_zc_recv_args* args) } - int onload_zc_send(struct onload_zc_mmsg* msgs, int mlen, int flags) { int done = 0, last_fd = -1, i; @@ -362,7 +358,7 @@ int onload_zc_send(struct onload_zc_mmsg* msgs, int mlen, int flags) last_fd = msgs[i].fd; } - CI_TRY_EQ( citp_fdinfo_get_ops(fdi)->zc_send(fdi, &msgs[i], flags), 1); + CI_TRY_EQ(citp_fdinfo_get_ops(fdi)->zc_send(fdi, &msgs[i], flags), 1); /* If we got an error, return the number of msgs that have had * rc set and exit. fd_op should have updated msgs.rc appropriately */ @@ -371,7 +367,7 @@ int onload_zc_send(struct onload_zc_mmsg* msgs, int mlen, int flags) goto out; } - out: +out: if( fdi != NULL ) citp_fdinfo_release_ref(fdi, 0); @@ -386,15 +382,15 @@ int onload_zc_send(struct onload_zc_mmsg* msgs, int mlen, int flags) } -int onload_set_recv_filter(int fd, onload_zc_recv_filter_callback filter, - void* cb_arg, int flags) +int onload_set_recv_filter( + int fd, onload_zc_recv_filter_callback filter, void* cb_arg, int flags) { int rc; citp_lib_context_t lib_context; citp_fdinfo* fdi; - Log_CALL(ci_log("%s(%d, %p, %p, %x)", __FUNCTION__, fd, filter, - cb_arg, flags)); + Log_CALL( + ci_log("%s(%d, %p, %p, %x)", __FUNCTION__, fd, filter, cb_arg, flags)); if( (fdi = citp_fdtable_lookup_fast(&lib_context, fd)) ) { rc = citp_fdinfo_get_ops(fdi)->zc_recv_filter(fdi, filter, cb_arg, flags); @@ -425,12 +421,13 @@ static int verify_addrspace_override(ci_netif* ni) /* Only EF100 hardware supports address space overrides. Fail if the * machine only contains older NICs. */ - OO_STACK_FOR_EACH_INTF_I(ni, nic_i) { + OO_STACK_FOR_EACH_INTF_I(ni, nic_i) + { if( ci_netif_vi(ni, nic_i)->nic_type.arch == EF_VI_ARCH_EF100 ) has_ef100 = 1; } - if( !has_ef100 ) + if( ! has_ef100 ) return -EINVAL; /* Address space override is only allowed in physical addressing @@ -447,17 +444,17 @@ static bool have_unsupported_nic(ci_netif* ni) int nic_i; OO_STACK_FOR_EACH_INTF_I(ni, nic_i) - if( ci_netif_vi(ni, nic_i)->nic_type.arch == EF_VI_ARCH_AF_XDP || - ci_netif_vi(ni, nic_i)->nic_type.nic_flags & EFHW_VI_NIC_CTPIO_ONLY ) - return true; + if( ci_netif_vi(ni, nic_i)->nic_type.arch == EF_VI_ARCH_AF_XDP || + ci_netif_vi(ni, nic_i)->nic_type.arch == EF_VI_ARCH_SWXTCH || + ci_netif_vi(ni, nic_i)->nic_type.nic_flags & EFHW_VI_NIC_CTPIO_ONLY ) + return true; return false; } int onload_zc_register_buffers(int fd, ef_addrspace addr_space, - uint64_t base_ptr, uint64_t len, int flags, - onload_zc_handle* handle) + uint64_t base_ptr, uint64_t len, int flags, onload_zc_handle* handle) { int rc; citp_lib_context_t lib_context; @@ -465,7 +462,7 @@ int onload_zc_register_buffers(int fd, ef_addrspace addr_space, ci_netif* ni; Log_CALL(ci_log("%s(%d, %" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %d, %p)", - __FUNCTION__, fd, addr_space, base_ptr, len, flags, handle)); + __FUNCTION__, fd, addr_space, base_ptr, len, flags, handle)); citp_enter_lib(&lib_context); @@ -480,29 +477,27 @@ int onload_zc_register_buffers(int fd, ef_addrspace addr_space, rc = -E2BIG; else if( (rc = fd_to_stack(fd, &ni, &fdi)) == 0 ) { int num_pages = len >> EF_VI_NIC_PAGE_SHIFT; - struct ci_zc_usermem* um = malloc(sizeof(struct ci_zc_usermem) + - sizeof(um->hw_addrs[0]) * num_pages * - oo_stack_intf_max(ni)); + struct ci_zc_usermem* um = + malloc(sizeof(struct ci_zc_usermem) + + sizeof(um->hw_addrs[0]) * num_pages * oo_stack_intf_max(ni)); if( ! um ) rc = -ENOMEM; else if( addr_space != EF_ADDRSPACE_LOCAL && (rc = verify_addrspace_override(ni)) < 0 ) { /* error code already set appropriately */ - } - else if( have_unsupported_nic(ni) ) { + } else if( have_unsupported_nic(ni) ) { /* Because these NICs don't support checksum offload, the code necessary * to compute checksums on the host is gnarly and thus non-existant. */ rc = -ENOTSUP; - } - else { + } else { um->addr_space = addr_space; um->base = base_ptr; um->size = len; if( addr_space == EF_ADDRSPACE_LOCAL ) - rc = ci_tcp_helper_zc_register_buffers(ni, (void*)(uintptr_t)base_ptr, - num_pages, um->hw_addrs, - &um->kernel_id); + rc = + ci_tcp_helper_zc_register_buffers(ni, (void*) (uintptr_t) base_ptr, + num_pages, um->hw_addrs, &um->kernel_id); if( rc == 0 ) *handle = zc_usermem_to_handle(um); @@ -552,7 +547,7 @@ int onload_zc_unregister_buffers(int fd, onload_zc_handle handle, int flags) } -int onload_recvmsg_kernel(int fd, struct msghdr *msg, int flags) +int onload_recvmsg_kernel(int fd, struct msghdr* msg, int flags) { int rc; citp_lib_context_t lib_context; @@ -570,7 +565,7 @@ int onload_recvmsg_kernel(int fd, struct msghdr *msg, int flags) } Log_CALL_RESULT(rc); - return rc; + return rc; } diff --git a/src/tests/ef_vi/mmake.mk b/src/tests/ef_vi/mmake.mk index b2ed50efb..abb228d09 100644 --- a/src/tests/ef_vi/mmake.mk +++ b/src/tests/ef_vi/mmake.mk @@ -13,9 +13,10 @@ endif TARGETS := $(TEST_APPS:%=$(AppPattern)) -MMAKE_LIBS := $(LINK_CIUL_LIB) $(LINK_CIAPP_LIB) +MMAKE_LIBS := $(LINK_CIUL_LIB) $(LINK_CIAPP_LIB) $(DEFAULT_DPDK) MMAKE_LIB_DEPS := $(CIUL_LIB_DEPEND) $(CIAPP_LIB_DEPEND) +MMAKE_INCLUDE += -I$(RTE_SDK)/build/install/include all: $(TARGETS) diff --git a/src/tests/rtt/mmake.mk b/src/tests/rtt/mmake.mk index d63d8e26d..82c805310 100644 --- a/src/tests/rtt/mmake.mk +++ b/src/tests/rtt/mmake.mk @@ -11,8 +11,7 @@ clean: @$(MakeClean) -MMAKE_LIBS := $(LINK_CIAPP_LIB) $(LINK_CITOOLS_LIB) $(LINK_CIUL_LIB) +MMAKE_LIBS := $(LINK_CIAPP_LIB) $(LINK_CITOOLS_LIB) $(LINK_CIUL_LIB) $(DEFAULT_DPDK) MMAKE_LIB_DEPS := $(CIAPP_LIB_DEPEND) $(CITOOLS_LIB_DEPEND) $(CIUL_LIB_DEPEND) - rtt: rtt.o rtt_socket.o rtt_efvi.o diff --git a/src/tests/trade_sim/mmake.mk b/src/tests/trade_sim/mmake.mk index a36197087..5f42ee654 100644 --- a/src/tests/trade_sim/mmake.mk +++ b/src/tests/trade_sim/mmake.mk @@ -12,13 +12,12 @@ all: $(TARGETS) clean: @$(MakeClean) - exchange: exchange.o utils.o -exchange: MMAKE_LIBS += $(LINK_ONLOAD_EXT_LIB) +exchange: MMAKE_LIBS += $(LINK_ONLOAD_EXT_LIB) $(DEFAULT_DPDK) exchange: MMAKE_LIB_DEPS += $(ONLOAD_EXT_LIB_DEPEND) trader_onload_ds_efvi: trader_onload_ds_efvi.o utils.o trader_onload_ds_efvi: \ - MMAKE_LIBS += $(LINK_ONLOAD_EXT_LIB) $(LINK_CIUL_LIB) + MMAKE_LIBS += $(LINK_ONLOAD_EXT_LIB) $(LINK_CIUL_LIB) $(DEFAULT_DPDK) trader_onload_ds_efvi: \ MMAKE_LIB_DEPS += $(ONLOAD_EXT_LIB_DEPEND) $(CIUL_LIB_DEPEND) diff --git a/src/tests/unit/mmake.mk b/src/tests/unit/mmake.mk index 15f9ddcb7..7398fb18c 100644 --- a/src/tests/unit/mmake.mk +++ b/src/tests/unit/mmake.mk @@ -19,6 +19,7 @@ TARGETS := $(TESTS:%=$(AppPattern)) OBJECTS := $(TESTS:%=%.o) PASSED := $(TESTS:%=%.passed) + # Library objects names are mangled with a prefix. Deal with that madness here. LIB_PREFIXES := transport/common/ci_tp_common_ transport/ip/ci_ip_ lib_prefix = $(notdir $(filter $(dir $(1))%,$(LIB_PREFIXES))) diff --git a/src/tools/ip/mmake.mk b/src/tools/ip/mmake.mk index 85c43797c..46a757ba5 100644 --- a/src/tools/ip/mmake.mk +++ b/src/tools/ip/mmake.mk @@ -26,7 +26,7 @@ endif MMAKE_LIBS := $(LINK_CIIP_LIB) $(LINK_CIAPP_LIB) \ $(LINK_CITOOLS_LIB) $(LINK_CIUL_LIB) \ - $(LINK_CPLANE_LIB) $(MMAKE_LIBS_LIBPCAP) + $(LINK_CPLANE_LIB) $(MMAKE_LIBS_LIBPCAP) $(DEFAULT_DPDK) MMAKE_LIB_DEPS := $(CIIP_LIB_DEPEND) $(CIAPP_LIB_DEPEND) \ $(CITOOLS_LIB_DEPEND) $(CIUL_LIB_DEPEND) \ $(CPLANE_LIB_DEPEND) diff --git a/src/tools/onload_remote_monitor/mmake.mk b/src/tools/onload_remote_monitor/mmake.mk index b45ca0d03..ec4452c3d 100644 --- a/src/tools/onload_remote_monitor/mmake.mk +++ b/src/tools/onload_remote_monitor/mmake.mk @@ -26,7 +26,7 @@ endif MMAKE_LIBS := $(LINK_CIIP_LIB) $(LINK_CIAPP_LIB) $(MMAKE_LIBS_LIBPCAP) \ $(LINK_CITOOLS_LIB) $(LINK_CIUL_LIB) \ - -lpthread $(LINK_CPLANE_LIB) + -lpthread $(LINK_CPLANE_LIB) $(DEFAULT_DPDK) MMAKE_INCLUDE += -I$(TOPPATH)/src/tools/ip LIBS += $(MMAKE_LIBS) $(ZMQ_LIBS) diff --git a/test_programs/bin/sockperf b/test_programs/bin/sockperf new file mode 100755 index 000000000..670ff5b81 Binary files /dev/null and b/test_programs/bin/sockperf differ diff --git a/test_programs/mock_xnic/Makefile b/test_programs/mock_xnic/Makefile new file mode 100644 index 000000000..f72688274 --- /dev/null +++ b/test_programs/mock_xnic/Makefile @@ -0,0 +1,24 @@ +CFLAGS = -march=native -O3 -msse3 -mavx +INCLUDES = -I./ -I$(RTE_SDK)/build/install/include +LFLAGS = -L$(RTE_SDK)/build/drivers -L$(RTE_SDK)/build/lib +LIBS = -lrte_hash -lrte_cmdline -lrte_pci -lrte_bus_pci -lrte_bus_vdev -lrte_mempool_ring -lrte_kni -lrte_ethdev -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring -lrte_kvargs -lrte_pmd_bond -lrte_pmd_virtio -lrte_pmd_enic -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_net -lrte_pmd_e1000 -lrte_pmd_ring -lrte_pmd_af_packet -lrte_pmd_mlx4 -lrte_pmd_mlx5 -lrte_pmd_ena -lrte_pmd_failsafe -lrte_pmd_netvsc -lrte_pmd_vdev_netvsc -lrte_bus_vmbus -lrte_pmd_tap -lrte_gso -lrte_timer -lrte_meter +DYN_LIBS = -lm -ldl -lnuma -libverbs -lmlx4 -lmlx5 +STATIC = -Wl,-Bstatic -Wl,--whole-archive +NO_STATIC = -Wl,--no-whole-archive -Wl,-Bdynamic +SRCS = main.c mp_commands.c +OBJS = $(SRCS:.c=.o) +MAIN = xnic_data + +.PHONY: depend clean + +all: $(MAIN) + @echo done + +$(MAIN): $(OBJS) + $(CC) $(CFLAGS) $(INCLUDES) -o $(MAIN) $(OBJS) $(LFLAGS) $(STATIC) $(LIBS) $(NO_STATIC) $(DYN_LIBS) + +.c.o: + $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ + +clean: + $(RM) *.o *~ $(MAIN) diff --git a/test_programs/mock_xnic/README.md b/test_programs/mock_xnic/README.md new file mode 100644 index 000000000..c0dae8f7b --- /dev/null +++ b/test_programs/mock_xnic/README.md @@ -0,0 +1,14 @@ +This test application is intended to behave semi-similar to how our xNIC implementation would. There currently is no code +for Tunneling and Untunneling multicast traffic, but it is easy to see how we could implement that in the receive/transmit +functions. + +This was pieced together from two dpdk samples. The first being `simple_mp` and the other being `???`. + +Build and run with: +`make` + +`sudo ./xnic_data ` +E.G. +`sudo ./xnic_data -l 2-3 -w "06e6:00:02.0" --vdev="net_vdev_netvsc0,iface=eth1" --proc-type=primary` + +To quit the application literally type `quit`. Thats leftovers from the `simple_mp`. diff --git a/test_programs/mock_xnic/main.c b/test_programs/mock_xnic/main.c new file mode 100644 index 000000000..50254629d --- /dev/null +++ b/test_programs/mock_xnic/main.c @@ -0,0 +1,558 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +/* + * This sample application is a simple multi-process application which + * demostrates sharing of queues and memory pools between processes, and + * using those queues/pools for communication between the processes. + * + * Application is designed to run with two processes, a primary and a + * secondary, and each accepts commands on the commandline, the most + * important of which is "send", which just sends a string to the other + * process. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mp_commands.h" + +#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 +#define DEBUG_TX 0 +#define DEBUG_RX 0 + +static const char *_TX_RING = "TX_RING"; +static const char *_RX_PENDING_RING = "RX_PENDING_RING"; +static const char *_TX_COMP_RING = "TX_COMP_RING"; +static const char *_RX_RING = "RX_RING"; +static const char *_RX_FILL_RING = "RX_FILL_RING"; +static const char *_RX_MBUF_POOL = "RX_MBUF_POOL"; + +struct rte_ring *tx_ring, *tx_completion_ring, *rx_ring, *rx_fill_ring, + *rx_pending_ring; + +static const unsigned MAX_MESSAGE_SIZE = 2048; + +#define RX_RING_SIZE 1024 +#define TX_RING_SIZE 2048 + +#define NUM_MBUFS 8192 +#define MBUF_CACHE_SIZE 0 +#define BURST_SIZE 32 + +struct rte_mempool *mbuf_pool; +unsigned long idle_count = 0; +unsigned long packet_count = 0; +volatile int quit = 0; + +struct rte_mbuf *tx_bufs[BURST_SIZE]; +void *rx_fill_pkts[BURST_SIZE]; +struct rte_mbuf *rx_bufs[BURST_SIZE]; +struct rte_mbuf *rx_final_bufs[BURST_SIZE]; + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { + .max_rx_pkt_len = RTE_ETHER_MAX_LEN, + }, +}; + +/* basicfwd.c: Basic DPDK skeleton forwarding example. */ + +/* + * Initializes a given port using global settings and with the RX buffers + * coming from the mbuf_pool passed as a parameter. + */ +static inline int port_init(uint16_t port) +{ + struct rte_eth_conf port_conf = port_conf_default; + const uint16_t rx_rings = 1, tx_rings = 1; + uint16_t nb_rxd = RX_RING_SIZE; + uint16_t nb_txd = TX_RING_SIZE; + int retval; + uint16_t q; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf txconf; + + if( ! rte_eth_dev_is_valid_port(port) ) + return -1; + + retval = rte_eth_dev_info_get(port, &dev_info); + if( retval != 0 ) { + printf("Error during getting device (port %u) info: %s\n", port, + strerror(-retval)); + return retval; + } + + if( dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE ) + port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; + + /* Configure the Ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if( retval != 0 ) + return retval; + + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if( retval != 0 ) + return retval; + + printf("Setting up RX Desc: %d TX Desc: %d\n", nb_rxd, nb_txd); + + /* Allocate and set up 1 RX queue per Ethernet port. */ + for( q = 0; q < rx_rings; q++ ) { + printf("setting up rx ring: %d\n", q); + retval = rte_eth_rx_queue_setup( + port, q, nb_rxd, rte_eth_dev_socket_id(port), NULL, mbuf_pool); + if( retval < 0 ) + return retval; + } + + txconf = dev_info.default_txconf; + txconf.offloads = port_conf.txmode.offloads; + /* Allocate and set up 1 TX queue per Ethernet port. */ + for( q = 0; q < tx_rings; q++ ) { + retval = rte_eth_tx_queue_setup( + port, q, nb_txd, rte_eth_dev_socket_id(port), NULL); + if( retval < 0 ) + return retval; + } + + /* Start the Ethernet port. */ + retval = rte_eth_dev_start(port); + if( retval < 0 ) + return retval; + + /* Display the port MAC address. */ + struct rte_ether_addr addr; + retval = rte_eth_macaddr_get(port, &addr); + if( retval != 0 ) + return retval; + + printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 + " %02" PRIx8 " %02" PRIx8 "\n", + port, addr.addr_bytes[0], addr.addr_bytes[1], addr.addr_bytes[2], + addr.addr_bytes[3], addr.addr_bytes[4], addr.addr_bytes[5]); + + /* Enable RX in promiscuous mode for the Ethernet device. */ + retval = rte_eth_promiscuous_enable(port); + if( retval != 0 ) + return retval; + + return 0; +} + +static inline int init_ports(void) +{ + uint16_t portid; + + /* Initialize all ports. */ + RTE_ETH_FOREACH_DEV(portid) + { + if( port_init(portid) != 0 ) { + printf("Cannot init port %d \n", portid); + } + } + + return 0; +} + +static void dump_ring_state() +{ + printf("MPOOL free: %d\n", rte_mempool_avail_count(mbuf_pool)); + printf("TX RING count: %d\n", rte_ring_count(tx_ring)); + printf("TX COMP RING count: %d\n", rte_ring_count(tx_completion_ring)); + printf("RX FILL RING count: %d\n", rte_ring_count(rx_fill_ring)); + printf("RX RING count: %d\n", rte_ring_count(rx_ring)); +} + +static void print_mbufs(struct rte_mbuf **mbufs, int length, char *type) +{ + if( length != 0 ) { + printf("TYPE: %s\n", type); + } + for( int i = 0; i < length; i++ ) { + uint8_t *data = rte_pktmbuf_mtod(mbufs[i], uint8_t *); + int data_len = rte_pktmbuf_data_len(mbufs[i]); + printf("Data Len: %d\n", data_len); + for( int j = 0; j < data_len; j++ ) { + printf("%02X ", data[j]); + } + printf("\n"); + } +} + +static void drain_queue(struct rte_ring *ring) +{ + unsigned available = 0; + int count = 0; + int burst_size = 32; + struct rte_mbuf *bufs[burst_size]; + + do { + count = rte_ring_dequeue_burst( + ring, (void **) &bufs[0], burst_size, &available); + + for( int i = 0; i < count; ++i ) { + struct rte_mbuf *buf = bufs[i]; + printf("%p\n", buf); + rte_pktmbuf_free(buf); + } + + } while( available != 0 ); +} + + +static void bulk_free(struct rte_mbuf **mbufs, unsigned length) +{ + for( int i = 0; i < length; ++i ) { + rte_pktmbuf_free(mbufs[i]); + } +} + +static void handle_arp(struct rte_ether_hdr *eth_h) +{ + struct rte_arp_hdr *arp = (struct rte_arp_hdr *) (eth_h + 1); + if( ntohs(arp->arp_opcode) == RTE_ARP_OP_REPLY ) { + char ethStr[50]; + struct in_addr add; + add.s_addr = arp->arp_data.arp_sip; + rte_ether_format_addr(ethStr, 50, &arp->arp_data.arp_sha); + char *ipAddr = inet_ntoa(add); + char str[100]; + sprintf(str, "ip neigh replace %s dev eth1 lladdr %s nud reachable", + ipAddr, ethStr); + if( system(str) != 0 ) { + printf("Failed to add neighbor\n"); + } + } +} + +static int receive(uint16_t port) +{ + int allowed = + rte_ring_dequeue_burst(rx_fill_ring, rx_fill_pkts, BURST_SIZE, NULL); + + if( unlikely(allowed == 0) ) { + return 0; + } + + + uint16_t recv = rte_eth_rx_burst(port, 0, &rx_bufs[0], allowed); + + // recycle any unused packets + if( unlikely(recv != allowed) ) { + rte_ring_enqueue_bulk( + rx_fill_ring, &rx_fill_pkts[recv], allowed - recv, NULL); + } + + + int enqueueCount = 0; + for( int i = 0; i < recv; ++i ) { + struct rte_ether_hdr *eth_h = + rte_pktmbuf_mtod(rx_bufs[i], struct rte_ether_hdr *); + + switch( ntohs(eth_h->ether_type) ) { + case RTE_ETHER_TYPE_IPV6: // this is flooding everything for some + // reason + rte_ring_enqueue(rx_fill_ring, rx_fill_pkts[i]); + break; + case RTE_ETHER_TYPE_ARP: + rte_ring_enqueue(rx_fill_ring, rx_fill_pkts[i]); + // handle_arp(eth_h); + break; + case RTE_ETHER_TYPE_IPV4: + struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *) (eth_h + 1); + if( unlikely(hdr->next_proto_id == IPPROTO_ICMP) ) { + rte_ring_enqueue(rx_fill_ring, rx_fill_pkts[i]); + } else { + rx_final_bufs[enqueueCount++] = rx_bufs[i]; + } + break; + default: + rx_final_bufs[enqueueCount++] = rx_bufs[i]; + } + } + +#if DEBUG_RX + print_mbufs(rx_final_bufs, enqueueCount, "RX"); +#endif + + if( likely(enqueueCount != 0) ) { + if( rte_ring_enqueue_bulk( + rx_ring, (void **) rx_final_bufs, enqueueCount, NULL) == 0 ) { + printf("FAILED TO HAND TO RX RING\n"); + rte_pktmbuf_free_bulk(rx_final_bufs, enqueueCount); + } + } +} + +// Array to keep track of the amount of transactions sent +static void *null_array[BURST_SIZE]; + +static int transmit(uint16_t port) +{ + // might be better to manually move the consumer tail manually? + int allowed = + rte_ring_dequeue_burst(tx_ring, (void **) &tx_bufs[0], BURST_SIZE, NULL); + if( unlikely(allowed == 0) ) { + return 0; + } + + +#if DEBUG_TX + print_mbufs(tx_bufs, allowed, "TX"); +#endif + + int start = 0; + int end = allowed; + do { + uint16_t txed = rte_eth_tx_burst(port, 0, &tx_bufs[start], end); + start += txed; + end -= txed; + } while( unlikely(start != allowed) ); + + if( unlikely(start != allowed) ) { + printf("Unable to send all tx packets\n"); + rte_pktmbuf_free_bulk(&tx_bufs[start], end); + } + + if( unlikely(start == 0) ) { + return start; + } + + // the tx_completion ring is more of a + if( unlikely(rte_ring_enqueue_bulk( + tx_completion_ring, null_array, allowed, NULL) == 0) ) { + printf("FAILED to mark tx completed\n"); + } + return start; +} + +static int lcore_run(__attribute__((unused)) void *arg) +{ + int nb_ports = rte_eth_dev_count_avail(); + if( nb_ports < 1 ) { + printf("ERROR: failed to init. Bad number of ports: %d\n", nb_ports); + return -1; + } + unsigned lcore_id = rte_lcore_id(); + uint16_t port; + + printf("Starting core %u\n", lcore_id); + + int inited = init_ports(); + if( inited < 0 ) { + rte_eal_cleanup(); + rte_exit(-1, "Failed to init ports\n"); + } + + printf("Ports initialized\n"); + + /* + * Check that the port is on the same NUMA node as the polling thread + * for best performance. + */ + RTE_ETH_FOREACH_DEV(port) + { + if( rte_eth_dev_socket_id(port) > 0 && + rte_eth_dev_socket_id(port) != (int) rte_socket_id() ) + printf( + "WARNING, port %u is on remote NUMA node to " + "polling thread.\n\tPerformance will " + "not be optimal.\n", + port); + + printf("Using PORT: %d\n", port); + + while( likely(! quit) ) { + int recved = receive(port); + int sent = transmit(port); + if( recved == 0 && sent == 0 ) { + idle_count++; + } + } + } + + return 0; +} + +static int lcore_run_test_consumer(__attribute__((unused)) void *arg) +{ + while( likely(! quit) ) { + int count = rte_ring_dequeue_burst(tx_ring, (void **) tx_bufs, 32, NULL); + if( unlikely(count == 0) ) { + continue; + } + + + rte_ring_enqueue_bulk(tx_completion_ring, (void **) tx_bufs, count, NULL); + } +} + +static int lcore_run_test_comp(__attribute__((unused)) void *arg) +{ + while( likely(! quit) ) { + int count = rte_ring_dequeue_burst( + tx_completion_ring, (void **) tx_bufs, 32, NULL); + packet_count += count; + bulk_free(tx_bufs, count); + } +} + + +static int run_test_producer() +{ + const int size = 1; + struct rte_mbuf *mbufs[size]; + time_t t; + time(&t); + time_t end = t + 10; + + while( t < end ) { + rte_mempool_get_bulk(mbuf_pool, (void **) mbufs, size); + rte_ring_enqueue_bulk(tx_ring, (void **) mbufs, size, NULL); + time(&t); + } +} + +static void signal_handler(int signum) +{ + /* When we receive a RTMIN or SIGINT signal, stop kni processing */ + if( signum == SIGRTMIN || signum == SIGINT ) { + printf("\nSIGRTMIN/SIGINT received. processing stopping.\n"); + quit = 1; + return; + } +} + +int main(int argc, char **argv) +{ + const unsigned flags = 0; + const unsigned ring_size = 512; + const unsigned fill_ring_size = 1024; + const unsigned pool_size = 1024; + const unsigned pool_cache = 0; + const unsigned priv_data_sz = 0; + + int ret; + unsigned lcore_id; + + signal(SIGRTMIN, signal_handler); + signal(SIGINT, signal_handler); + + ret = rte_eal_init(argc, argv); + if( ret < 0 ) + rte_exit(EXIT_FAILURE, "Cannot init EAL\n"); + + if( rte_eal_process_type() != RTE_PROC_PRIMARY ) { + rte_exit(EXIT_FAILURE, "This program must be run as primary\n"); + } + + if( rte_eal_process_type() == RTE_PROC_PRIMARY ) { + tx_ring = rte_ring_create( + _TX_RING, ring_size * 8, SOCKET_ID_ANY, RING_F_SC_DEQ | RING_F_SP_ENQ); + rx_ring = rte_ring_create( + _RX_RING, ring_size * 2, SOCKET_ID_ANY, RING_F_SC_DEQ | RING_F_SP_ENQ); + rx_fill_ring = rte_ring_create( + _RX_FILL_RING, fill_ring_size, SOCKET_ID_ANY, RING_F_SC_DEQ); + tx_completion_ring = rte_ring_create(_TX_COMP_RING, fill_ring_size * 2, + SOCKET_ID_ANY, RING_F_SC_DEQ | RING_F_SP_ENQ); + + mbuf_pool = rte_pktmbuf_pool_create(_RX_MBUF_POOL, NUM_MBUFS - 1, + MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, SOCKET_ID_ANY); + rx_pending_ring = rte_ring_create(_RX_PENDING_RING, ring_size / 4, + SOCKET_ID_ANY, RING_F_SC_DEQ | RING_F_SP_ENQ); + + for( int i = 0; i < BURST_SIZE; i++ ) { + null_array[i] = NULL; + } + } else { + tx_ring = rte_ring_lookup(_TX_RING); + rx_ring = rte_ring_lookup(_RX_RING); + rx_fill_ring = rte_ring_lookup(_RX_FILL_RING); + tx_completion_ring = rte_ring_lookup(_TX_COMP_RING); + mbuf_pool = rte_mempool_lookup(_RX_MBUF_POOL); + rx_pending_ring = rte_ring_lookup(_RX_PENDING_RING); + } + + + if( tx_ring == NULL ) + rte_exit(EXIT_FAILURE, "Problem getting sending ring\n"); + if( rx_ring == NULL ) + rte_exit(EXIT_FAILURE, "Problem getting receiving ring\n"); + if( rx_fill_ring == NULL ) + rte_exit(EXIT_FAILURE, "Problem getting fill ring\n"); + if( tx_completion_ring == NULL ) + rte_exit(EXIT_FAILURE, "Problem getting comp ring\n"); + if( mbuf_pool == NULL ) + rte_exit(EXIT_FAILURE, "Problem getting message pool\n"); + + RTE_LOG(INFO, APP, "Finished Process Init.\n"); + + /* call lcore_recv() on every slave lcore */ + if( rte_eal_process_type() == RTE_PROC_PRIMARY ) { + RTE_LCORE_FOREACH_SLAVE(lcore_id) + { + rte_eal_remote_launch(lcore_run, NULL, lcore_id); + } + struct cmdline *cl = cmdline_stdin_new(simple_mp_ctx, "\nsimple_mp > "); + if( cl == NULL ) + rte_exit(EXIT_FAILURE, "Cannot create cmdline instance\n"); + cmdline_interact(cl); + cmdline_stdin_exit(cl); + + rte_eal_mp_wait_lcore(); + + rte_ring_free(tx_ring); + rte_ring_free(rx_ring); + rte_ring_free(tx_completion_ring); + rte_ring_free(rx_fill_ring); + rte_ring_free(rx_pending_ring); + rte_mempool_free(mbuf_pool); + + + } else { + RTE_LCORE_FOREACH_SLAVE(lcore_id) + { + rte_eal_remote_launch(lcore_run_test_comp, NULL, lcore_id); + } + run_test_producer(); + quit = 1; + printf("Total packets round tripped = %ld. PPS = %ld\n", packet_count, + packet_count / 10); + rte_eal_mp_wait_lcore(); + } + + + return 0; +} diff --git a/test_programs/mock_xnic/mp_commands.c b/test_programs/mock_xnic/mp_commands.c new file mode 100644 index 000000000..9aa478427 --- /dev/null +++ b/test_programs/mock_xnic/mp_commands.c @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "mp_commands.h" + +/**********************************************************/ + +/**********************************************************/ + +struct cmd_quit_result { + cmdline_fixed_string_t quit; +}; + +static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, __attribute__((unused)) void *data) +{ + quit = 1; + cmdline_quit(cl); +} + +cmdline_parse_token_string_t cmd_quit_quit = + TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit"); + +cmdline_parse_inst_t cmd_quit = { + .f = cmd_quit_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "close the application", + .tokens = { + /* token list, NULL terminated */ + (void *)&cmd_quit_quit, + NULL, + }, +}; + +/**********************************************************/ + +struct cmd_help_result { + cmdline_fixed_string_t help; +}; + +static void cmd_help_parsed(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, __attribute__((unused)) void *data) +{ + cmdline_printf(cl, + "Simple demo example of multi-process in RTE\n\n" + "This is a readline-like interface that can be used to\n" + "send commands to the simple app. Commands supported are:\n\n" + "- send [string]\n" + "- help\n" + "- quit\n\n"); +} + +cmdline_parse_token_string_t cmd_help_help = + TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help"); + +cmdline_parse_inst_t cmd_help = { + .f = cmd_help_parsed, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "show help", + .tokens = { + /* token list, NULL terminated */ + (void *)&cmd_help_help, + NULL, + }, +}; + +static void dump_rings(__attribute__((unused)) void *parsed_result, + struct cmdline *cl, __attribute__((unused)) void *data) +{ + rte_mempool_dump(stdout, mbuf_pool); + cmdline_printf(cl, + "Idle Count: %ld\n" + "TX Ring Count: %d:%d\n" + "TX Comp Ring Count: %d:%d\n" + "RX Fill Ring Count: %d:%d\n" + "RX Ring Count: %d:%d\n" + "RX Pending Ring Count: %d:%d\n", + idle_count, rte_ring_count(tx_ring), rte_ring_get_capacity(tx_ring), + rte_ring_count(tx_completion_ring), + rte_ring_get_capacity(tx_completion_ring), rte_ring_count(rx_fill_ring), + rte_ring_get_capacity(rx_fill_ring), rte_ring_count(rx_ring), + rte_ring_get_capacity(rx_ring), rte_ring_count(rx_pending_ring), + rte_ring_get_capacity(rx_pending_ring)); +} + +cmdline_parse_token_string_t cmd_dump_rings_help = + TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "dump"); + +cmdline_parse_inst_t cmd_dump_rings= { + .f = dump_rings, /* function to call */ + .data = NULL, /* 2nd arg of func */ + .help_str = "show ring state", + .tokens = { + /* token list, NULL terminated */ + (void *)&cmd_dump_rings_help, + NULL, + }, +}; + +/****** CONTEXT (list of instruction) */ +cmdline_parse_ctx_t simple_mp_ctx[] = { + (cmdline_parse_inst_t *) &cmd_quit, + (cmdline_parse_inst_t *) &cmd_help, + (cmdline_parse_inst_t *) &cmd_dump_rings, + NULL, +}; diff --git a/test_programs/mock_xnic/mp_commands.h b/test_programs/mock_xnic/mp_commands.h new file mode 100644 index 000000000..aa78dc15b --- /dev/null +++ b/test_programs/mock_xnic/mp_commands.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _SIMPLE_MP_COMMANDS_H_ +#define _SIMPLE_MP_COMMANDS_H_ + +extern struct rte_ring *tx_ring, *tx_completion_ring, *rx_ring, *rx_fill_ring, + *rx_pending_ring; +extern struct rte_mempool *mbuf_pool; +extern unsigned long idle_count; +extern volatile int quit; + +extern cmdline_parse_ctx_t simple_mp_ctx[]; + +#endif /* _SIMPLE_MP_COMMANDS_H_ */ diff --git a/test_programs/share/doc/sockperf/README.md b/test_programs/share/doc/sockperf/README.md new file mode 100644 index 000000000..4fa01b32b --- /dev/null +++ b/test_programs/share/doc/sockperf/README.md @@ -0,0 +1,81 @@ +## Introduction + +**sockperf** is a network benchmarking utility over socket API that was designed for testing performance (latency and throughput) of high-performance systems (it is also good for testing performance of regular networking systems). It covers most of the socket API calls and options. + +Specifically, in addition to the standard throughput tests, **sockperf** does the following: + + * Measure latency of each discrete packet at sub-nanosecond resolution (using TSC register that counts CPU ticks with very low overhead). + + * Does the above for both ping-pong mode and latency under load mode. This means that we measure latency of single packets even under load of millions of Packets Per Second (without waiting for reply of packet before sending subsequent packet on time) + + * Enable spike analysis by providing histogram, with various percentiles of the packets’ latencies (for example: median, min, max, 99% percentile, and more), (this is in addition to average and standard deviation). Also, **sockperf** provides a full log with all packet’s tx/rx times that can be further analyzed with external tools, such as MS-Excel or matplotlib - All this without affecting the benchmark itself. + + * Support MANY optional settings for good coverage of socket API and network configurations, while still keeping very low overhead in the fast path to allow cleanest results. + +## Prereqs: What you will need to compile sockperf on Unix systems + + * Perl 5.8+ (used by the automake tools) + + * GNU make tools: automake 1.7+, autoconf 2.57+, m4 1.4+ and libtool 1.4+ + + * A C++11 Compiler, among those tested are: + + * GCC + * Clang + * icc + + `sudo apt install perl make automake autoconf m4 libtool-bin g++` + +## How to install + + The sockperf package uses the GNU autotools compilation and installation + framework. +``` +./autogen.sh (only when cloning from repository) +./configure --prefix= +make +make install + ``` +### Configuration + + Type `./configure --help` for a list of all the configure + options. Some of the options are generic autoconf options, while the sockperf + specific options are prefixed with "SOCKPERF:" in the help text. + + * To enable TLS support + * `./configure --prefix= --with-tls=` + * Use OpenSSL 3.0.0 or higher + + * To enable unit tests + * `./configure --prefix= --enable-test` + + * To enable the documentation + * `./configure --prefix= --enable-doc` + + * To enable the special scripts + * `./configure --prefix= --enable-tool` + + * To compile with debug symbols and information: + * `./configure --prefix= --enable-debug` + * This will define the DEBUG variable at compile time. + +### To build for ARM + +1) Define CROSS_COMPILE in the environment to point to the cross compilation tools, e.g. +set `CROSS_COMPILE=/opt/gcc-linaro-arm-linux-gnueabihf-4.7-2012.11-20121123_linux/bin/arm-linux-gnueabihf-` +2) Use `./autogen.sh` to create the configure script. +3) Invoke `./configure` with the following options: +`./configure CXX=${CROSS_COMPILE}g++ STRIP=${CROSS_COMPILE}strip +LD=${CROSS_COMPILE}ld CC=${CROSS_COMPILE}gcc --host i386` +4) Invoke `make` + +### To build for FreeBSD + +* Make sure automake tools are installed. + +## Licensing + + [View Here](https://github.com/Mellanox/sockperf/blob/sockperf_v2/copying) + +~Good luck! + diff --git a/test_programs/share/doc/sockperf/authors b/test_programs/share/doc/sockperf/authors new file mode 100644 index 000000000..c4c5ac53f --- /dev/null +++ b/test_programs/share/doc/sockperf/authors @@ -0,0 +1,2 @@ +Avner BenHanoch +Igor Ivanov diff --git a/test_programs/share/doc/sockperf/copying b/test_programs/share/doc/sockperf/copying new file mode 100644 index 000000000..cadcb1d1b --- /dev/null +++ b/test_programs/share/doc/sockperf/copying @@ -0,0 +1,28 @@ + /* + * Copyright (c) 2011-2022 Mellanox Technologies Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of the Mellanox Technologies Ltd nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY + * OF SUCH DAMAGE. + * + */ diff --git a/test_programs/share/doc/sockperf/news b/test_programs/share/doc/sockperf/news new file mode 100644 index 000000000..e69de29bb diff --git a/test_programs/share/doc/sockperf/version b/test_programs/share/doc/sockperf/version new file mode 100644 index 000000000..c8cfe3959 --- /dev/null +++ b/test_programs/share/doc/sockperf/version @@ -0,0 +1 @@ +3.10 diff --git a/test_programs/tcp_receiver.py b/test_programs/tcp_receiver.py new file mode 100644 index 000000000..8e3bd51df --- /dev/null +++ b/test_programs/tcp_receiver.py @@ -0,0 +1,20 @@ +import socket + +HOST = "10.2.164.5" +PORT = 31339 + +with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind((HOST, PORT)) + s.listen() + print("Listening on port: "+str(PORT)) + while True: + conn, addr = s.accept() + with conn: + print(f"Connected by {addr}") + while True: + data = conn.recv(1024) + if not data: + break + print(f"Received {data}") + #conn.sendall(data) + #conn.shutdown(socket.SHUT_RDWR) diff --git a/test_programs/tcp_sender.py b/test_programs/tcp_sender.py new file mode 100644 index 000000000..a0cd0cbd1 --- /dev/null +++ b/test_programs/tcp_sender.py @@ -0,0 +1,18 @@ +import socket +import time + +import socket + +HOST = "10.2.164.7" +PORT = 31330 + +with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.connect((HOST, PORT)) + print("connected") + for i in range(0, 5): + s.sendall(b"Hello, world") + print(f"Sent") + #data = s.recv(1024) + time.sleep(1) + + #s.shutdown(socket.SHUT_RDWR) diff --git a/test_programs/udp_receiver.py b/test_programs/udp_receiver.py new file mode 100644 index 000000000..248baa75f --- /dev/null +++ b/test_programs/udp_receiver.py @@ -0,0 +1,21 @@ +import socket + +server_address = '10.2.164.5' +server_port = 31337 +connected = False +connected_port = 0 + +with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s: + s.bind((server_address, server_port)) + print("Binding to " + server_address + ":" + str(server_port)) + while True: + payload, (client_address, client_port)= s.recvfrom(1024) + if connected_port != client_port: + s.connect((client_address, client_port)) + connected_port = client_port + + print(str(payload) + " Echoing to: " + str(client_address) + ":" + str(client_port)) + s.send(payload) + + #s.shutdown(socket.SHUT_RDWR) + diff --git a/test_programs/udp_sender.py b/test_programs/udp_sender.py new file mode 100644 index 000000000..a47fc3956 --- /dev/null +++ b/test_programs/udp_sender.py @@ -0,0 +1,17 @@ +import socket +import time + +server_address = '10.2.164.7' +server_port = 31337 + +with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as client_socket: + client_socket.connect((server_address, server_port)) + print("connected on port: ", client_socket.getsockname()[1]) + + for i in range(0, 5): + message = 'Hello World' + client_socket.send(message.encode()) + print('sent') + time.sleep(1) + #response = client_socket.recv(1024).decode() + #print(str(response)) diff --git a/test_programs/udp_sender_eth0.py b/test_programs/udp_sender_eth0.py new file mode 100644 index 000000000..2b5d43ba8 --- /dev/null +++ b/test_programs/udp_sender_eth0.py @@ -0,0 +1,37 @@ +import socket + +server_address = '10.2.164.5' +server_port = 31337 + +def create_server_sock() -> socket.socket: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) + s.bind((server_address, server_port)) + return s + + +server_sock1 = create_server_sock() +try: + server_sock2 = create_server_sock() +except Exception as err: + print(err) + +server_sock1.close() + + + +''' +for i in range(2): + active_sock = server_sock + for i in range(5): + payload, client = active_sock.recvfrom(1024) + if i == 0: + print("Connected by: ", client[0], client[1]) + active_sock.connect(client) + print(payload.decode()) + active_sock.send(payload) + #active_sock.close() + #server_sock = create_server_sock() + +server_sock.close() +'''