From 2839f87e5c75021fdf312e84efbd74f2eeb7af9b Mon Sep 17 00:00:00 2001 From: alp317 <12beemaarslan@seecs.edu.pk> Date: Wed, 13 Feb 2019 13:20:25 +0500 Subject: [PATCH 1/2] pci_dev in dpdk/device/cli.c for RTE_SCHED_COLLECT_STATS --- src/plugins/dpdk/device/cli.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c index 7e20f565a8c9..fd59509b5fc4 100644 --- a/src/plugins/dpdk/device/cli.c +++ b/src/plugins/dpdk/device/cli.c @@ -1848,6 +1848,7 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, dpdk_device_t *xd; uword *p = 0; struct rte_eth_dev_info dev_info; + struct rte_pci_device *pci_dev; dpdk_device_config_t *devconf = 0; u32 qindex; struct rte_sched_queue_stats stats; @@ -1893,14 +1894,16 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, xd = vec_elt_at_index (dm->devices, hw->dev_instance); rte_eth_dev_info_get (xd->port_id, &dev_info); - if (dev_info.pci_dev) + pci_dev = dpdk_get_pci_device (&dev_info); + + if (pci_dev) { /* bonded interface has no pci info */ vlib_pci_addr_t pci_addr; - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; + pci_addr.domain = pci_dev->addr.domain; + pci_addr.bus = pci_dev->addr.bus; + pci_addr.slot = pci_dev->addr.devid; + pci_addr.function = pci_dev->addr.function; p = hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); From 20d963a3bdb67533525a82f792b1d0887831e664 Mon Sep 17 00:00:00 2001 From: alp317 <12beemaarslan@seecs.edu.pk> Date: Wed, 20 Feb 2019 11:34:18 +0500 Subject: [PATCH 2/2] 1901 + HQoS --- .gitignore | 1 + Makefile | 33 +- build-data/packages/vom.mk | 4 + build-data/packages/vpp.mk | 7 + build-root/Makefile | 4 + build/external/ebuild-install | 1 + build/external/packages/dpdk.mk | 4 +- build/external/packages/nasm.mk | 2 +- docs/about.rst | 4 +- .../developers/buildwireshark.md | 49 +- docs/gettingstarted/developers/vnet.md | 60 +- docs/usecases/vmxnet3.rst | 22 + extras/scripts/vfctl | 10 + extras/vom/vom/CMakeLists.txt | 19 +- extras/vom/vom/api_types.cpp | 59 +- extras/vom/vom/api_types.hpp | 10 +- extras/vom/vom/arp_proxy_config.cpp | 5 +- extras/vom/vom/arp_proxy_config_cmds.cpp | 13 +- extras/vom/vom/enum_base.hpp | 21 + extras/vom/vom/gbp_bridge_domain.cpp | 65 +- extras/vom/vom/gbp_bridge_domain.hpp | 34 +- extras/vom/vom/gbp_bridge_domain_cmds.cpp | 14 +- extras/vom/vom/gbp_bridge_domain_cmds.hpp | 6 +- extras/vom/vom/gbp_endpoint_group.cpp | 52 +- extras/vom/vom/gbp_endpoint_group.hpp | 18 +- extras/vom/vom/gbp_endpoint_group_cmds.cpp | 3 + extras/vom/vom/gbp_endpoint_group_cmds.hpp | 2 + extras/vom/vom/gbp_route_domain.cpp | 2 +- extras/vom/vom/gbp_route_domain.hpp | 2 +- extras/vom/vom/gbp_vxlan_cmds.cpp | 2 - extras/vom/vom/interface_cmds.hpp | 1 - extras/vom/vom/interface_factory.hpp | 1 - extras/vom/vom/l2_binding.cpp | 37 +- extras/vom/vom/l2_binding.hpp | 20 +- extras/vom/vom/l2_binding_cmds.cpp | 44 - extras/vom/vom/l2_binding_cmds.hpp | 41 - extras/vom/vom/l2_vtr.cpp | 45 + extras/vom/vom/l2_vtr.hpp | 50 + extras/vom/vom/l2_vtr_cmds.cpp | 71 + extras/vom/vom/l2_vtr_cmds.hpp | 78 + extras/vom/vom/l2_xconnect.cpp | 27 + extras/vom/vom/l2_xconnect.hpp | 16 + extras/vom/vom/neighbour.cpp | 53 +- extras/vom/vom/neighbour.hpp | 32 +- extras/vom/vom/neighbour_cmds.cpp | 29 +- extras/vom/vom/neighbour_cmds.hpp | 8 +- extras/vom/vom/ra_prefix.cpp | 5 +- extras/vom/vom/route.cpp | 2 +- extras/vom/vom/tap_interface_cmds.cpp | 2 - extras/vom/vom/tap_interface_cmds.hpp | 1 - extras/vom/vom/vxlan_tunnel.cpp | 8 +- extras/vom/vom/vxlan_tunnel.hpp | 5 + src/CMakeLists.txt | 54 +- src/cmake/cpu.cmake | 7 +- src/cmake/library.cmake | 2 +- src/cmake/misc.cmake | 67 + src/cmake/pack.cmake | 1 + src/cmake/plugin.cmake | 5 +- src/pkg/CMakeLists.txt | 44 + src/pkg/debian/changelog.in | 6 + src/pkg/debian/control | 74 + src/pkg/debian/copyright | 9 + src/pkg/debian/rules.in | 47 + src/pkg/debian/vpp.postinst | 8 + src/pkg/debian/vpp.postrm | 21 + src/pkg/debian/vpp.preinst | 4 + src/pkg/debian/vpp.service | 13 + src/plugins/abf/abf_policy.c | 2 + src/plugins/acl/fa_node.h | 2 +- src/plugins/avf/avf.h | 3 +- src/plugins/avf/device.c | 8 +- src/plugins/avf/input.c | 32 +- src/plugins/avf/output.c | 2 +- src/plugins/dpdk/CMakeLists.txt | 255 ++- src/plugins/dpdk/buffer.c | 813 +++---- src/plugins/dpdk/buffer.h | 36 + src/plugins/dpdk/device/cli.c | 109 +- src/plugins/dpdk/device/common.c | 22 +- src/plugins/dpdk/device/device.c | 15 +- src/plugins/dpdk/device/dpdk.h | 17 +- src/plugins/dpdk/device/dpdk_priv.h | 11 - src/plugins/dpdk/device/init.c | 115 +- src/plugins/dpdk/device/node.c | 39 +- src/plugins/dpdk/ipsec/cli.c | 20 +- src/plugins/dpdk/ipsec/crypto_node.c | 87 +- src/plugins/dpdk/ipsec/esp_decrypt.c | 5 +- src/plugins/dpdk/ipsec/esp_encrypt.c | 5 +- src/plugins/dpdk/ipsec/ipsec.c | 91 +- src/plugins/dpdk/ipsec/ipsec.h | 8 +- src/plugins/flowprobe/node.c | 4 - src/plugins/gbp/CMakeLists.txt | 1 + src/plugins/gbp/gbp.api | 2 + src/plugins/gbp/gbp_api.c | 6 +- src/plugins/gbp/gbp_bridge_domain.c | 48 +- src/plugins/gbp/gbp_bridge_domain.h | 9 +- src/plugins/gbp/gbp_endpoint.c | 2 +- src/plugins/gbp/gbp_endpoint_group.c | 19 +- src/plugins/gbp/gbp_endpoint_group.h | 33 + src/plugins/gbp/gbp_learn.c | 8 +- src/plugins/gbp/gbp_route_domain.c | 5 + src/plugins/gbp/gbp_sclass.c | 386 ++++ src/plugins/gbp/gbp_sclass.h | 34 + src/plugins/gbp/gbp_types.h | 1 + src/plugins/gbp/gbp_vxlan.c | 35 +- src/plugins/igmp/igmp_pkt.c | 3 - src/plugins/ixge/ixge.c | 4 +- src/plugins/lb/lb.c | 16 +- src/plugins/mactime/mactime.c | 3 +- src/plugins/mactime/mactime.h | 2 +- src/plugins/map/map.c | 2 +- src/plugins/map/map_doc.md | 2 +- src/plugins/marvell/pp2/output.c | 2 +- src/plugins/marvell/pp2/pp2.c | 2 +- src/plugins/memif/memif.c | 12 +- src/plugins/memif/node.c | 46 +- src/plugins/memif/private.h | 3 +- src/plugins/nat/in2out.c | 10 +- src/plugins/nat/in2out_ed.c | 12 +- src/plugins/nat/nat.c | 109 +- src/plugins/nat/nat44_classify.c | 8 +- src/plugins/nat/nat44_cli.c | 8 +- src/plugins/nat/nat64.c | 29 +- src/plugins/nat/nat64.h | 4 +- src/plugins/nat/nat64_cli.c | 2 +- src/plugins/nat/nat64_db.c | 56 +- src/plugins/nat/nat64_db.h | 30 +- src/plugins/nat/nat64_in2out.c | 47 +- src/plugins/nat/nat64_out2in.c | 14 +- src/plugins/nat/nat_api.c | 2 +- src/plugins/nat/nat_det.h | 8 +- src/plugins/nat/nat_det_in2out.c | 15 +- src/plugins/nat/nat_ipfix_logging.c | 780 +++---- src/plugins/nat/nat_ipfix_logging.h | 45 +- src/plugins/nat/nat_reass.c | 12 +- src/plugins/nat/nat_reass.h | 8 +- src/plugins/nat/out2in.c | 8 +- src/plugins/nat/out2in_ed.c | 8 +- src/plugins/nsim/nsim_input.c | 3 - src/plugins/perfmon/perfmon.c | 90 +- src/plugins/perfmon/perfmon.h | 17 +- src/plugins/perfmon/perfmon_periodic.c | 360 +-- src/plugins/pppoe/pppoe.h | 2 +- src/plugins/tlsmbedtls/tls_mbedtls.c | 33 +- src/plugins/tlsopenssl/tls_async.c | 2 +- src/plugins/tlsopenssl/tls_openssl.c | 42 +- src/plugins/tlsopenssl/tls_openssl.h | 3 +- src/plugins/unittest/session_test.c | 119 +- src/plugins/unittest/string_test.c | 12 + src/plugins/unittest/tcp_test.c | 4 +- src/plugins/vmxnet3/README.md | 2 +- src/plugins/vmxnet3/cli.c | 18 +- src/plugins/vmxnet3/format.c | 140 +- src/plugins/vmxnet3/output.c | 8 +- src/plugins/vmxnet3/vmxnet3.api | 46 +- src/plugins/vmxnet3/vmxnet3.c | 157 +- src/plugins/vmxnet3/vmxnet3.h | 41 +- src/plugins/vmxnet3/vmxnet3_api.c | 29 +- src/plugins/vmxnet3/vmxnet3_test.c | 42 +- src/scripts/generate_version_h | 2 +- src/scripts/remove-rpath | 37 + src/svm/svm_fifo.c | 23 + src/svm/svm_fifo.h | 27 +- src/tests/vnet/session/tcp_echo.c | 256 +-- src/tools/g2/g2version.c | 9 +- src/tools/g2/view1.c | 771 +++++-- src/tools/vppapigen/test_vppapigen.py | 5 +- src/tools/vppapigen/vppapigen.py | 19 +- src/vat/api_format.c | 1231 +++++----- src/vat/main.c | 34 + src/vcl/ldp.c | 57 +- src/vcl/vcl_bapi.c | 5 +- src/vcl/vcl_locked.c | 705 +++++- src/vcl/vcl_locked.h | 2 + src/vcl/vcl_private.c | 181 +- src/vcl/vcl_private.h | 84 +- src/vcl/vppcom.c | 472 +--- src/vcl/vppcom.h | 10 +- src/vlib/buffer.c | 693 ++---- src/vlib/buffer.h | 307 +-- src/vlib/buffer_funcs.h | 718 +++--- src/vlib/buffer_node.h | 12 +- src/vlib/cli.c | 43 +- src/vlib/error.c | 31 - src/vlib/format.c | 24 + src/vlib/format_funcs.h | 3 + src/vlib/linux/pci.c | 28 +- src/vlib/linux/vmbus.c | 8 +- src/vlib/log.c | 4 +- src/vlib/log.h | 2 - src/vlib/main.c | 392 ++-- src/vlib/main.h | 16 +- src/vlib/node.h | 32 +- src/vlib/node_cli.c | 93 +- src/vlib/pci/pci.h | 3 + src/vlib/threads.c | 66 +- src/vlib/unix/input.c | 12 +- src/vlib/unix/mc_socket.c | 6 +- src/vlib/vlib.h | 1 + src/vlibapi/node_serialize.c | 11 +- src/vlibmemory/memory_client.c | 10 + src/vlibmemory/memory_client.h | 3 + src/vlibmemory/memory_shared.c | 1 + src/vlibmemory/socket_client.c | 19 +- src/vlibmemory/socket_client.h | 5 +- src/vnet/CMakeLists.txt | 32 +- src/vnet/bfd/bfd_main.c | 2 - src/vnet/bfd/bfd_main.h | 2 +- src/vnet/bfd/bfd_udp.c | 1 - src/vnet/bier/bier_fmask.h | 2 +- src/vnet/bonding/node.c | 2 + src/vnet/bonding/node.h | 2 +- src/vnet/buffer.h | 6 +- src/vnet/devices/af_packet/node.c | 2 +- src/vnet/devices/netmap/node.c | 2 +- src/vnet/devices/pipe/pipe_api.c | 2 +- src/vnet/devices/tap/cli.c | 100 +- src/vnet/devices/tap/tap.c | 27 +- src/vnet/devices/virtio/cli.c | 205 ++ src/vnet/devices/virtio/device.c | 144 +- src/vnet/devices/virtio/node.c | 53 +- src/vnet/devices/virtio/pci.c | 1021 +++++++++ src/vnet/devices/virtio/pci.h | 254 +++ src/vnet/devices/virtio/vhost_user_input.c | 8 +- src/vnet/devices/virtio/vhost_user_output.c | 2 +- src/vnet/devices/virtio/virtio.api | 96 + src/vnet/devices/virtio/virtio.c | 180 +- src/vnet/devices/virtio/virtio.h | 98 +- src/vnet/devices/virtio/virtio_api.c | 237 ++ src/vnet/dhcp/dhcp6_proxy_node.c | 6 +- src/vnet/dns/dns.c | 1 - src/vnet/dpo/dvr_dpo.c | 2 + src/vnet/dpo/dvr_dpo.h | 2 +- src/vnet/dpo/interface_rx_dpo.c | 2 + src/vnet/dpo/interface_rx_dpo.h | 2 +- src/vnet/dpo/l3_proxy_dpo.h | 2 +- src/vnet/dpo/receive_dpo.h | 2 +- src/vnet/ethernet/arp.c | 232 +- src/vnet/ethernet/arp.h | 58 +- src/vnet/ethernet/arp_packet.h | 35 +- src/vnet/ethernet/ethernet.h | 72 +- src/vnet/ethernet/interface.c | 4 +- src/vnet/ethernet/mac_address.c | 25 +- src/vnet/ethernet/mac_address.h | 68 +- src/vnet/ethernet/node.c | 8 +- src/vnet/fib/fib_node.h | 4 +- src/vnet/fib/ip4_fib.c | 11 +- src/vnet/interface.c | 4 +- src/vnet/interface.h | 31 +- src/vnet/interface_funcs.h | 2 - src/vnet/interface_output.c | 36 +- src/vnet/ip/ip.api | 162 +- src/vnet/ip/ip4_forward.c | 14 +- src/vnet/ip/ip6.h | 38 +- src/vnet/ip/ip6_forward.c | 152 +- src/vnet/ip/ip6_neighbor.c | 166 +- src/vnet/ip/ip6_neighbor.h | 26 +- src/vnet/ip/ip6_packet.h | 7 + src/vnet/ip/ip_api.c | 336 +-- src/vnet/ip/ip_checksum.c | 23 +- src/vnet/ip/ip_frag.c | 6 +- src/vnet/ip/ip_neighbor.c | 39 +- src/vnet/ip/ip_neighbor.h | 15 +- src/vnet/ip/ip_types_api.c | 28 +- src/vnet/ip/ip_types_api.h | 10 + src/vnet/ip/lookup.c | 1 + src/vnet/ip/ping.c | 11 +- src/vnet/ip/punt.c | 2 +- src/vnet/ip/rd_cp.c | 8 +- src/vnet/ipfix-export/flow_report.c | 5 +- src/vnet/ipfix-export/ipfix_doc.md | 4 - src/vnet/ipsec/ah_decrypt.c | 6 +- src/vnet/ipsec/ah_encrypt.c | 26 +- src/vnet/ipsec/esp_decrypt.c | 7 +- src/vnet/ipsec/esp_encrypt.c | 32 +- src/vnet/ipsec/ikev2.c | 16 +- src/vnet/ipsec/ikev2_crypto.c | 24 + src/vnet/ipsec/ipsec.api | 379 ++-- src/vnet/ipsec/ipsec.c | 501 +--- src/vnet/ipsec/ipsec.h | 279 +-- src/vnet/ipsec/ipsec_api.c | 497 ++-- src/vnet/ipsec/ipsec_cli.c | 482 +--- src/vnet/ipsec/ipsec_format.c | 148 ++ src/vnet/ipsec/ipsec_if.c | 171 +- src/vnet/ipsec/ipsec_if.h | 94 + src/vnet/ipsec/ipsec_if_in.c | 17 +- src/vnet/ipsec/ipsec_input.c | 206 +- src/vnet/ipsec/ipsec_io.h | 61 + src/vnet/ipsec/ipsec_output.c | 77 +- src/vnet/ipsec/ipsec_sa.c | 385 ++++ src/vnet/ipsec/ipsec_sa.h | 182 ++ src/vnet/ipsec/ipsec_spd.c | 116 + src/vnet/ipsec/ipsec_spd.h | 71 + src/vnet/ipsec/ipsec_spd_policy.c | 238 ++ src/vnet/ipsec/ipsec_spd_policy.h | 95 + src/vnet/l2/l2_bvi.h | 2 +- src/vnet/l2/l2_input.h | 1 + src/vnet/l2/l2_learn.h | 2 +- src/vnet/l2/l2_output.h | 1 + src/vnet/l2/l2_patch.c | 2 +- src/vnet/lisp-cp/control.c | 4 +- src/vnet/lisp-cp/lisp_cli.c | 1 + src/vnet/lisp-cp/lisp_msg_serdes.c | 1 + src/vnet/lisp-cp/one_cli.c | 3 + src/vnet/lldp/lldp_node.h | 2 +- src/vnet/mpls/mpls_lookup.h | 2 +- src/vnet/mpls/mpls_tunnel.c | 5 +- src/vnet/pg/cli.c | 103 +- src/vnet/pg/edit.c | 6 +- src/vnet/pg/input.c | 13 +- src/vnet/pg/pg.h | 1 + src/vnet/pg/stream.c | 27 +- src/vnet/sctp/sctp.c | 8 +- src/vnet/sctp/sctp_input.c | 2 +- src/vnet/sctp/sctp_output.c | 5 +- src/vnet/session-apps/echo_client.c | 41 +- src/vnet/session-apps/echo_server.c | 21 +- src/vnet/session-apps/http_server.c | 21 +- src/vnet/session-apps/proxy.c | 54 +- src/vnet/session/application.c | 2015 ++++++----------- src/vnet/session/application.h | 224 +- src/vnet/session/application_interface.c | 560 +---- src/vnet/session/application_interface.h | 78 +- src/vnet/session/application_namespace.c | 21 +- src/vnet/session/application_namespace.h | 5 +- src/vnet/session/application_worker.c | 1165 ++++++++++ src/vnet/session/mma_16.h | 2 +- src/vnet/session/mma_40.h | 2 +- src/vnet/session/mma_template.c | 2 +- src/vnet/session/mma_template.h | 2 +- src/vnet/session/segment_manager.c | 9 +- src/vnet/session/segment_manager.h | 2 +- src/vnet/session/session.c | 302 +-- src/vnet/session/session.h | 461 ++-- src/vnet/session/session_api.c | 297 ++- src/vnet/session/session_cli.c | 142 +- src/vnet/session/session_debug.h | 2 +- src/vnet/session/session_lookup.c | 130 +- src/vnet/session/session_lookup.h | 44 +- src/vnet/session/session_node.c | 138 +- src/vnet/session/session_rules_table.c | 10 +- src/vnet/session/session_rules_table.h | 7 +- src/vnet/session/session_table.c | 2 +- src/vnet/session/session_table.h | 2 +- src/vnet/session/session_types.h | 491 ++++ src/vnet/session/transport.c | 38 +- src/vnet/session/transport.h | 303 +-- src/vnet/session/transport_types.h | 197 ++ src/vnet/srp/node.c | 8 +- src/vnet/syslog/syslog.c | 4 - src/vnet/tcp/tcp.c | 14 +- src/vnet/tcp/tcp.h | 1 - src/vnet/tcp/tcp_input.c | 19 +- src/vnet/tcp/tcp_output.c | 130 +- src/vnet/tls/tls.c | 108 +- src/vnet/tls/tls.h | 35 +- src/vnet/tls/tls_test.h | 87 + src/vnet/udp/udp.c | 12 +- src/vnet/udp/udp_input.c | 9 +- src/vnet/unix/tuntap.c | 4 +- src/vnet/vnet_all_api_h.h | 2 +- src/vnet/vxlan-gbp/decap.c | 6 +- src/vnet/vxlan-gbp/encap.c | 12 +- src/vpp-api/python/CMakeLists.txt | 1 - src/vpp/CMakeLists.txt | 3 + src/vpp/api/api.c | 5 +- src/vpp/api/custom_dump.c | 181 +- src/vpp/api/types.c | 59 +- src/vpp/api/types.h | 5 + src/vpp/api/vpe.api | 1 - src/vpp/conf/startup.conf | 16 +- src/vpp/vnet/main.c | 6 +- src/vppinfra/cache.h | 1 + src/vppinfra/config.h.in | 2 + src/vppinfra/cpu.h | 81 +- src/vppinfra/error_bootstrap.h | 4 + src/vppinfra/pmalloc.c | 3 + src/vppinfra/pool.h | 27 +- src/vppinfra/string.h | 68 - test/Makefile | 26 +- test/doc/conf.py | 10 +- test/ext/vom_test.cpp | 8 +- test/framework.py | 12 +- test/hook.py | 37 +- test/lisp.py | 6 +- test/requirements.txt | 2 +- test/template_ipsec.py | 132 +- test/test_acl_plugin_conns.py | 4 + test/test_bfd.py | 33 +- test/test_classifier.py | 8 +- test/test_classifier_ip6.py | 8 +- test/test_classify_l2_acl.py | 8 +- test/test_container.py | 4 + test/test_dhcp.py | 31 +- test/test_flowprobe.py | 54 +- test/test_gbp.py | 249 +- test/test_ip6.py | 59 +- test/test_ipip.py | 1 + test/test_ipsec_ah.py | 262 ++- test/test_ipsec_api.py | 61 +- test/test_ipsec_esp.py | 259 ++- test/test_ipsec_nat.py | 144 +- test/test_ipsec_tun_if_esp.py | 16 +- test/test_jvpp.py | 8 + test/test_l2bd_arp_term.py | 11 +- test/test_nat.py | 91 +- test/test_neighbor.py | 67 +- test/test_punt.py | 25 + test/test_srv6.py | 12 +- test/test_syslog.py | 12 +- test/test_vapi.py | 8 + test/test_vcl.py | 108 +- test/test_vom.py | 8 + test/test_vxlan6.py | 4 + test/test_vxlan_gpe.py | 4 + test/util.py | 12 +- test/vpp_interface.py | 16 +- test/vpp_ip.py | 10 +- test/vpp_ip_route.py | 9 +- test/vpp_ipsec.py | 254 +++ test/vpp_neighbor.py | 54 +- test/vpp_papi_provider.py | 195 +- 421 files changed, 18543 insertions(+), 12657 deletions(-) create mode 100644 build/external/ebuild-install create mode 100644 extras/vom/vom/l2_vtr.cpp create mode 100644 extras/vom/vom/l2_vtr.hpp create mode 100644 extras/vom/vom/l2_vtr_cmds.cpp create mode 100644 extras/vom/vom/l2_vtr_cmds.hpp create mode 100644 src/cmake/misc.cmake create mode 100644 src/pkg/CMakeLists.txt create mode 100644 src/pkg/debian/changelog.in create mode 100644 src/pkg/debian/control create mode 100644 src/pkg/debian/copyright create mode 100755 src/pkg/debian/rules.in create mode 100644 src/pkg/debian/vpp.postinst create mode 100644 src/pkg/debian/vpp.postrm create mode 100644 src/pkg/debian/vpp.preinst create mode 100644 src/pkg/debian/vpp.service create mode 100644 src/plugins/dpdk/buffer.h create mode 100644 src/plugins/gbp/gbp_sclass.c create mode 100644 src/plugins/gbp/gbp_sclass.h create mode 100755 src/scripts/remove-rpath create mode 100644 src/vnet/devices/virtio/cli.c create mode 100644 src/vnet/devices/virtio/pci.c create mode 100644 src/vnet/devices/virtio/pci.h create mode 100644 src/vnet/devices/virtio/virtio.api create mode 100644 src/vnet/devices/virtio/virtio_api.c create mode 100644 src/vnet/ipsec/ipsec_if.h create mode 100644 src/vnet/ipsec/ipsec_io.h create mode 100644 src/vnet/ipsec/ipsec_sa.c create mode 100644 src/vnet/ipsec/ipsec_sa.h create mode 100644 src/vnet/ipsec/ipsec_spd.c create mode 100644 src/vnet/ipsec/ipsec_spd.h create mode 100644 src/vnet/ipsec/ipsec_spd_policy.c create mode 100644 src/vnet/ipsec/ipsec_spd_policy.h create mode 100644 src/vnet/session/application_worker.c create mode 100644 src/vnet/session/session_types.h create mode 100644 src/vnet/session/transport_types.h create mode 100644 src/vnet/tls/tls_test.h create mode 100644 test/vpp_ipsec.py diff --git a/.gitignore b/.gitignore index 7d46d3d0ad3f..0dcf62b0f3ec 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ /build-config.mk /build/external/*.tar.gz /build/external/*.tar.xz +/build/external/vpp-*.rpm /build/external/vpp-*.deb /build/external/vpp-*.changes /build/external/downloads/ diff --git a/Makefile b/Makefile index 86ac60c735c2..015db5294319 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ DEB_DEPENDS += debhelper dkms git libtool libapr1-dev dh-systemd DEB_DEPENDS += libconfuse-dev git-review exuberant-ctags cscope pkg-config DEB_DEPENDS += lcov chrpath autoconf indent clang-format libnuma-dev DEB_DEPENDS += python-all python-dev python-virtualenv python-pip libffi6 check -DEB_DEPENDS += libboost-all-dev libffi-dev python-ply libmbedtls-dev +DEB_DEPENDS += libboost-all-dev libffi-dev python3-ply libmbedtls-dev DEB_DEPENDS += cmake ninja-build uuid-dev ifeq ($(OS_VERSION_ID),14.04) DEB_DEPENDS += openjdk-8-jdk-headless @@ -95,7 +95,7 @@ ifeq ($(OS_ID),fedora) RPM_DEPENDS += dnf-utils RPM_DEPENDS += subunit subunit-devel RPM_DEPENDS += compat-openssl10-devel - RPM_DEPENDS += python2-devel python2-ply + RPM_DEPENDS += python2-devel python34-ply RPM_DEPENDS += python2-virtualenv RPM_DEPENDS += mbedtls-devel RPM_DEPENDS += cmake @@ -103,7 +103,7 @@ ifeq ($(OS_ID),fedora) else RPM_DEPENDS += yum-utils RPM_DEPENDS += openssl-devel - RPM_DEPENDS += python-devel python-ply + RPM_DEPENDS += python-devel python34-ply RPM_DEPENDS += python-virtualenv RPM_DEPENDS += devtoolset-7 RPM_DEPENDS += cmake3 @@ -117,7 +117,7 @@ RPM_DEPENDS += chrpath libffi-devel rpm-build SUSE_NAME= $(shell grep '^NAME=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g' | cut -d' ' -f2) SUSE_ID= $(shell grep '^VERSION_ID=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g' | cut -d' ' -f2) RPM_SUSE_BUILDTOOLS_DEPS = autoconf automake ccache check-devel chrpath -RPM_SUSE_BUILDTOOLS_DEPS += clang cmake indent libtool make ninja python-ply +RPM_SUSE_BUILDTOOLS_DEPS += clang cmake indent libtool make ninja python3-ply RPM_SUSE_DEVEL_DEPS = glibc-devel-static java-1_8_0-openjdk-devel libnuma-devel RPM_SUSE_DEVEL_DEPS += libopenssl-devel openssl-devel mbedtls-devel libuuid-devel @@ -130,11 +130,11 @@ RPM_SUSE_PLATFORM_DEPS = distribution-release shadow rpm-build ifeq ($(OS_ID),opensuse) ifeq ($(SUSE_NAME),Tumbleweed) RPM_SUSE_DEVEL_DEPS = libboost_headers1_68_0-devel-1.68.0 libboost_thread1_68_0-devel-1.68.0 gcc - RPM_SUSE_PYTHON_DEPS += python2-ply python2-virtualenv + RPM_SUSE_PYTHON_DEPS += python3-ply python2-virtualenv endif ifeq ($(SUSE_ID),15.0) RPM_SUSE_DEVEL_DEPS = libboost_headers1_68_0-devel-1.68.0 libboost_thread1_68_0-devel-1.68.0 gcc6 - RPM_SUSE_PYTHON_DEPS += python2-ply python2-virtualenv + RPM_SUSE_PYTHON_DEPS += python3-ply python2-virtualenv else RPM_SUSE_DEVEL_DEPS += libboost_headers1_68_0-devel-1.68.0 gcc6 RPM_SUSE_PYTHON_DEPS += python-virtualenv @@ -144,7 +144,7 @@ endif ifeq ($(OS_ID),opensuse-leap) ifeq ($(SUSE_ID),15.0) RPM_SUSE_DEVEL_DEPS = libboost_headers-devel libboost_thread-devel gcc6 - RPM_SUSE_PYTHON_DEPS += python2-ply python2-virtualenv + RPM_SUSE_PYTHON_DEPS += python3-ply python2-virtualenv endif endif @@ -204,6 +204,7 @@ help: @echo " test-help - show help on test framework" @echo " run-vat - run vpp-api-test tool" @echo " pkg-deb - build DEB packages" + @echo " vom-pkg-deb - build vom DEB packages" @echo " pkg-rpm - build RPM packages" @echo " install-ext-deps - install external development dependencies" @echo " ctags - (re)generate ctags database" @@ -299,7 +300,7 @@ ifeq ($(OS_ID),rhel) @sudo -E yum install $(CONFIRM) $(RPM_DEPENDS) @sudo -E debuginfo-install $(CONFIRM) glibc openssl-libs mbedtls-devel zlib else ifeq ($(OS_ID),centos) - @sudo -E yum install $(CONFIRM) centos-release-scl-rh + @sudo -E yum install $(CONFIRM) centos-release-scl-rh epel-release @sudo -E yum groupinstall $(CONFIRM) $(RPM_DEPENDS_GROUPS) @sudo -E yum install $(CONFIRM) $(RPM_DEPENDS) @sudo -E debuginfo-install $(CONFIRM) glibc openssl-libs mbedtls-devel zlib @@ -434,6 +435,7 @@ test-wipe-doc: @make -C test wipe-doc test-cov: + @make -C $(BR) PLATFORM=vpp TAG=vpp_gcov vom-install japi-install $(eval EXTENDED_TESTS=yes) $(call test,vpp,vpp_gcov,cov) @@ -492,7 +494,18 @@ run-vat: @$(SUDO) $(BR)/install-$(PLATFORM)_debug-native/vpp/bin/vpp_api_test pkg-deb: - $(call make,$(PLATFORM),install-deb) + $(call make,$(PLATFORM),vpp-package-deb) + +vom-pkg-deb: + $(call make,$(PLATFORM),vpp-package-deb) + $(call make,$(PLATFORM),vom-package-deb) + +pkg-deb-debug: + $(call make,$(PLATFORM)_debug,vpp-package-deb) + +vom-pkg-deb-debug: + $(call make,$(PLATFORM)_debug,vpp-package-deb) + $(call make,$(PLATFORM)_debug,vom-package-deb) pkg-rpm: dist make -C extras/rpm @@ -578,5 +591,3 @@ ifeq ($(OS_ID)-$(OS_VERSION_ID),ubuntu-18.04) $(call banner,"Running tests") @make COMPRESS_FAILED_TEST_LOGS=yes RETRIES=3 test endif - - diff --git a/build-data/packages/vom.mk b/build-data/packages/vom.mk index e7e02282db44..fefd49ac8415 100644 --- a/build-data/packages/vom.mk +++ b/build-data/packages/vom.mk @@ -39,3 +39,7 @@ vom_configure = \ vom_build = $(CMAKE) --build $(PACKAGE_BUILD_DIR) -- $(MAKE_PARALLEL_FLAGS) vom_install = $(CMAKE) --build $(PACKAGE_BUILD_DIR) -- install + +vom-package-deb: vom-install + @$(CMAKE) --build $(PACKAGE_BUILD_DIR)/vom -- package + @find $(PACKAGE_BUILD_DIR)/vom -name '*.deb' -exec mv {} $(CURDIR) \; diff --git a/build-data/packages/vpp.mk b/build-data/packages/vpp.mk index 2a1b9a77de98..9ce557a99461 100644 --- a/build-data/packages/vpp.mk +++ b/build-data/packages/vpp.mk @@ -44,3 +44,10 @@ vpp_configure = \ #vpp_make_args = --no-print-directory vpp_build = $(CMAKE) --build $(PACKAGE_BUILD_DIR) vpp_install = $(CMAKE) --build $(PACKAGE_BUILD_DIR) -- install | grep -v 'Set runtime path' + +vpp-package-deb: vpp-install + @$(CMAKE) --build $(PACKAGE_BUILD_DIR)/vpp -- package-deb + @find $(PACKAGE_BUILD_DIR) \ + -maxdepth 1 \ + \( -name '*.changes' -o -name '*.deb' -o -name '*.buildinfo' \) \ + -exec mv {} $(CURDIR) \; diff --git a/build-root/Makefile b/build-root/Makefile index a3335236c041..d810abb86262 100644 --- a/build-root/Makefile +++ b/build-root/Makefile @@ -625,6 +625,10 @@ configure_check_timestamp = \ mkdir -p $(PACKAGE_INSTALL_DIR) ; \ conf="$(TIMESTAMP_DIR)/$(CONFIGURE_TIMESTAMP)" ; \ dirs="$(call package_mk_fn,$(PACKAGE)) \ + $(SOURCE_PATH_BUILD_DATA_DIRS)/platforms/$(PLATFORM).mk \ + $(wildcard $(call find_source_fn,$(PACKAGE_SOURCE))/cmake) \ + $(shell find $(call find_source_fn,$(PACKAGE_SOURCE)) \ + -name CMakeLists.txt) \ $(wildcard $(call find_source_fn, \ $(PACKAGE_SOURCE))$(PACKAGE_SUBDIR)/configure) \ $(MU_BUILD_ROOT_DIR)/config.site" ; \ diff --git a/build/external/ebuild-install b/build/external/ebuild-install new file mode 100644 index 000000000000..a5fd84119b9a --- /dev/null +++ b/build/external/ebuild-install @@ -0,0 +1 @@ +CONFIG_RTE_SCHED_COLLECT_STATS=y diff --git a/build/external/packages/dpdk.mk b/build/external/packages/dpdk.mk index 6c46ac298341..9e10fca1a49b 100644 --- a/build/external/packages/dpdk.mk +++ b/build/external/packages/dpdk.mk @@ -21,12 +21,12 @@ DPDK_MLX5_PMD_DLOPEN_DEPS ?= n DPDK_TAP_PMD ?= n DPDK_FAILSAFE_PMD ?= n -DPDK_VERSION ?= 18.11 +DPDK_VERSION ?= 19.02 DPDK_BASE_URL ?= http://fast.dpdk.org/rel DPDK_TARBALL := dpdk-$(DPDK_VERSION).tar.xz DPDK_TAR_URL := $(DPDK_BASE_URL)/$(DPDK_TARBALL) -DPDK_18.08_TARBALL_MD5_CKSUM := da5e7fb25ab063c47e53929fb8c58be5 DPDK_18.11_TARBALL_MD5_CKSUM := 04b86f4a77f4f81a7fbd26467dd2ea9f +DPDK_19.02_TARBALL_MD5_CKSUM := 23944a2cdee061aa4bd72ebe7d836db0 MACHINE=$(shell uname -m) # replace dot with space, and if 3rd word exists we deal with stable dpdk rel diff --git a/build/external/packages/nasm.mk b/build/external/packages/nasm.mk index 1219b3310afb..b782b53f54cd 100644 --- a/build/external/packages/nasm.mk +++ b/build/external/packages/nasm.mk @@ -15,7 +15,7 @@ nasm_version := 2.13.03 nasm_tarball := nasm-$(nasm_version).tar.xz nasm_tarball_md5sum := d5ca2ad7121ccbae69dd606b1038532c nasm_tarball_strip_dirs := 1 -nasm_url := http://www.nasm.us/pub/nasm/releasebuilds/$(nasm_version)/$(nasm_tarball) +nasm_url := https://ftp.osuosl.org/pub/blfs/conglomeration/nasm/$(nasm_tarball) nasm_cflags := -Wno-implicit-fallthrough -std=c11 $(eval $(call package,nasm)) diff --git a/docs/about.rst b/docs/about.rst index cd95bafbb7ca..76e83d5ca9ca 100644 --- a/docs/about.rst +++ b/docs/about.rst @@ -4,6 +4,6 @@ About ===== -**VPP Version:** 19.01-rc0~497-g05ce4b8 +**VPP Version:** 19.04-rc0~102-g22c0ece -**Built on:** Mon Dec 17 20:43:52 GMT 2018 +**Built on:** Wed Jan 30 15:37:57 GMT 2019 diff --git a/docs/gettingstarted/developers/buildwireshark.md b/docs/gettingstarted/developers/buildwireshark.md index 3da70e9e0bab..5d87b7a9b7b4 100644 --- a/docs/gettingstarted/developers/buildwireshark.md +++ b/docs/gettingstarted/developers/buildwireshark.md @@ -1,27 +1,20 @@ How to build a vpp dispatch trace aware Wireshark ================================================= -At some point, we will upstream our vpp pcap dispatch trace dissector. -It's not finished - contributions welcome - and we have to work through -whatever issues will be discovered during the upstreaming process. +The vpp pcap dispatch trace dissector has been merged into the wireshark +main branch, so the process is simple. Download wireshark, compile it, +and install it. -On the other hand, it's ready for some tire-kicking. Here's how to build -wireshark. +Download wireshark source code +------------------------------ -Download and patch wireshark source code ------------------------------------------ - -The wireshark git repo is large, so it takes a while to clone. +The wireshark git repo is large, so it takes a while to clone. ``` - git clone https://code.wireshark.org/review/wireshark - cp .../extras/wireshark/packet-vpp.c wireshark/epan/dissectors - patch -p1 < .../extras/wireshark/diffs.txt + git clone https://code.wireshark.org/review/wireshark ``` -The small patch adds packet-vpp.c to the dissector list. - -Install prerequisite Debian packages +Install prerequisite packages ------------------------------------ Here is a list of prerequisite packages which must be present in order @@ -29,22 +22,24 @@ to compile wireshark, beyond what's typically installed on an Ubuntu 18.04 system: ``` - libgcrypt11-dev flex bison qtbase5-dev qttools5-dev-tools qttools5-dev - qtmultimedia5-dev libqt5svg5-dev libpcap-dev qt5-default + libgcrypt11-dev flex bison qtbase5-dev qttools5-dev-tools qttools5-dev + qtmultimedia5-dev libqt5svg5-dev libpcap-dev qt5-default ``` Compile Wireshark ----------------- Mercifully, Wireshark uses cmake, so it's relatively easy to build, at -least on Ubuntu 18.04. +least on Ubuntu 18.04. ``` - $ cd wireshark - $ cmake -G Ninja - $ ninja -j 8 - $ sudo ninja install + $ cd wireshark + $ mkdir build + $ cd build + $ cmake -G Ninja ../ + $ ninja -j 8 + $ sudo ninja install ``` Make a pcap dispatch trace @@ -52,11 +47,12 @@ Make a pcap dispatch trace Configure vpp to pass traffic in some fashion or other, and then: + ``` vpp# pcap dispatch trace on max 10000 file vppcapture buffer-trace dpdk-input 1000 - ``` + or similar. Run traffic for long enough to capture some data. Save the dispatch trace capture like so: @@ -73,9 +69,4 @@ dispatch trace pcap files because they won't understand the encap type. Set wireshark to filter on vpp.bufferindex to watch a single packet traverse the forwarding graph. Otherwise, you'll see a vector of packets -in e.g. ip4-lookup, then a vector of packets in ip4-rewrite, etc. - - - - - +in e.g. ip4-lookup, then a vector of packets in ip4-rewrite, etc. diff --git a/docs/gettingstarted/developers/vnet.md b/docs/gettingstarted/developers/vnet.md index 092da1a28a63..79d539676b92 100644 --- a/docs/gettingstarted/developers/vnet.md +++ b/docs/gettingstarted/developers/vnet.md @@ -54,16 +54,16 @@ units to convert buffer indices to buffer pointers: n_left_from = frame->n_vectors; from = vlib_frame_vector_args (frame); - /* - * Convert up to VLIB_FRAME_SIZE indices in "from" to + /* + * Convert up to VLIB_FRAME_SIZE indices in "from" to * buffer pointers in bufs[] */ vlib_get_buffers (vm, from, bufs, n_left_from); b = bufs; next = nexts; - /* - * While we have at least 4 vector elements (pkts) to process.. + /* + * While we have at least 4 vector elements (pkts) to process.. */ while (n_left_from >= 4) { @@ -76,7 +76,7 @@ units to convert buffer indices to buffer pointers: vlib_prefetch_buffer_header (b[7], STORE); } - /* + /* * $$$ Process 4x packets right here... * set next[0..3] to send the packets where they need to go */ @@ -91,12 +91,12 @@ units to convert buffer indices to buffer pointers: next += 4; n_left_from -= 4; } - /* + /* * Clean up 0...3 remaining packets at the end of the incoming frame */ while (n_left_from > 0) { - /* + /* * $$$ Process one packet right here... * set next[0..3] to send the packets where they need to go */ @@ -117,7 +117,7 @@ units to convert buffer indices to buffer pointers: vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); return frame->n_vectors; - } + } ``` Given a packet processing task to implement, it pays to scout around @@ -150,12 +150,11 @@ tcp_get_free_buffer_index(...) for an example. The following example shows the **main points**, but is not to be blindly cut-'n-pasted. -```c +```c u32 bi0; vlib_buffer_t *b0; ip4_header_t *ip; udp_header_t *udp; - vlib_buffer_free_list_t *fl; /* Allocate a buffer */ if (vlib_buffer_alloc (vm, &bi0, 1) != 1) @@ -164,13 +163,11 @@ blindly cut-'n-pasted. b0 = vlib_get_buffer (vm, bi0); /* Initialize the buffer */ - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); /* At this point b0->current_data = 0, b0->current_length = 0 */ - /* + /* * Copy data into the buffer. This example ASSUMES that data will fit * in a single buffer, and is e.g. an ip4 packet. */ @@ -179,7 +176,7 @@ blindly cut-'n-pasted. clib_memcpy (b0->data, data, vec_len (data)); b0->current_length = vec_len (data); } - else + else { /* OR, build a udp-ip packet (for example) */ ip = vlib_buffer_get_current (b0); @@ -204,7 +201,7 @@ blindly cut-'n-pasted. udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip); if (udp->checksum == 0) udp->checksum = 0xffff; - } + } b0->current_length = vec_len (sizeof (*ip) + sizeof (*udp) + vec_len (udp_data)); @@ -217,7 +214,7 @@ blindly cut-'n-pasted. /* Use the default FIB index for tx lookup. Set non-zero to use another fib */ vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; -``` +``` If your use-case calls for large packet transmission, use vlib_buffer_chain_append_data_with_alloc(...) to create the requisite @@ -239,8 +236,8 @@ indices, and dispatch the frame using vlib_put_frame_to_node(...). for (i = 0; i < vec_len (buffer_indices_to_send); i++) to_next[i] = buffer_indices_to_send[i]; - vlib_put_frame_to_node (vm, ip4_lookup_node_index, f); -``` + vlib_put_frame_to_node (vm, ip4_lookup_node_index, f); +``` It is inefficient to allocate and schedule single packet frames. That's typical in case you need to send one packet per second, but @@ -282,7 +279,7 @@ Here's a simple example: s = format (s, "My trace data was: %d", t->); return s; - } + } ``` The trace framework hands the per-node format function the data it @@ -381,17 +378,17 @@ To save the pcap trace, e.g. in /tmp/dispatch.pcap: ``` pcap dispatch trace off -``` +``` ### Wireshark dissection of dispatch pcap traces It almost goes without saying that we built a companion wireshark -dissector to display these traces. As of this writing, we're in the -process of trying to upstream the wireshark dissector. +dissector to display these traces. As of this writing, we have +upstreamed the wireshark dissector. -Until we manage to upstream the wireshark dissector, please see the -"How to build a vpp dispatch trace aware Wireshark" page for build -info, and/or take a look at .../extras/wireshark. +Since it will be a while before wireshark/master/latest makes it into +all of the popular Linux distros, please see the "How to build a vpp +dispatch trace aware Wireshark" page for build info. Here is a sample packet dissection, with some fields omitted for clarity. The point is that the wireshark dissector accurately @@ -406,15 +403,15 @@ node in question. BufferIndex: 0x00036663 NodeName: ethernet-input VPP Buffer Metadata - Metadata: flags: + Metadata: flags: Metadata: current_data: 0, current_length: 102 Metadata: current_config_index: 0, flow_id: 0, next_buffer: 0 Metadata: error: 0, n_add_refs: 0, buffer_pool_index: 0 Metadata: trace_index: 0, recycle_count: 0, len_not_first_buf: 0 Metadata: free_list_index: 0 - Metadata: + Metadata: VPP Buffer Opaque - Opaque: raw: 00000007 ffffffff 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + Opaque: raw: 00000007 ffffffff 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Opaque: sw_if_index[VLIB_RX]: 7, sw_if_index[VLIB_TX]: -1 Opaque: L2 offset 0, L3 offset 0, L4 offset 0, feature arc index 0 Opaque: ip.adj_index[VLIB_RX]: 0, ip.adj_index[VLIB_TX]: 0 @@ -443,14 +440,14 @@ node in question. Opaque: sctp.connection_index: 0, sctp.sid: 0, sctp.ssn: 0, sctp.tsn: 0, sctp.hdr_offset: 0 Opaque: sctp.data_offset: 0, sctp.data_len: 0, sctp.subconn_idx: 0, sctp.flags: 0x0 Opaque: snat.flags: 0x0 - Opaque: + Opaque: VPP Buffer Opaque2 - Opaque2: raw: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + Opaque2: raw: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 Opaque2: qos.bits: 0, qos.source: 0 Opaque2: loop_counter: 0 Opaque2: gbp.flags: 0, gbp.src_epg: 0 Opaque2: pg_replay_timestamp: 0 - Opaque2: + Opaque2: Ethernet II, Src: 06:d6:01:41:3b:92 (06:d6:01:41:3b:92), Dst: IntelCor_3d:f6 Transmission Control Protocol, Src Port: 22432, Dst Port: 54084, Seq: 1, Ack: 1, Len: 36 Source Port: 22432 Destination Port: 54084 @@ -472,4 +469,3 @@ metadata changes, header checksum changes, and so forth. This should be of significant value when developing new vpp graph nodes. If new code mispositions b->current_data, it will be completely obvious from looking at the dispatch trace in wireshark. - diff --git a/docs/usecases/vmxnet3.rst b/docs/usecases/vmxnet3.rst index 7e7210f83fc4..e556207158b3 100644 --- a/docs/usecases/vmxnet3.rst +++ b/docs/usecases/vmxnet3.rst @@ -39,6 +39,28 @@ Prerequisites without IOMMU vfio driver can still be used with recent kernels which support no-iommu mode. +VMware Fusion for Mac +--------------------- + +VMware fusion does not have a menu option to change the default driver (e1000) +to the **vmxnet3** driver. VPP supports the **vmxnet3** driver. + +These instructions describe how to change the e100 driver for VMware fusion. + +* From the VMware Fusion menu bar select **Window** then **Virtual Machine Library**. +* From the Virtual Machine menu right click on the Virtual Machine you are using and select **Show in Finder** +* Find the name associated with the VM you are using, right click on it and select **Show Package Contents** +* Find the **.vmx** file and edit it. +* Find all the occurences of **e1000** and change them to **vmxnet3** + +If you are concerned more with configuration not performance the vmxnet3 driver can be set to +**interrupt** mode in VPP. This will save a great deal on battery usage. Do this with the following + +.. code-block:: console + + # vppctl set interface rx-mode interrupt + + System setup ~~~~~~~~~~~~ diff --git a/extras/scripts/vfctl b/extras/scripts/vfctl index 21868b021bb7..9fe6c8b02e76 100755 --- a/extras/scripts/vfctl +++ b/extras/scripts/vfctl @@ -4,6 +4,15 @@ function die() { echo "ERROR: $*" >&2 exit 1 } +function pci-unbind() { + echo $1 | sudo tee /sys/bus/pci/devices/$1/driver/unbind > /dev/null +} +function pci-bind() { + pci-unbind $1 + echo $2 | sudo tee /sys/bus/pci/devices/$1/driver_override > /dev/null + echo $1 | sudo tee /sys/bus/pci/drivers/$2/bind > /dev/null + echo | sudo tee /sys/bus/pci/devices/$1/driver_override > /dev/null +} function show_vfs() { path=$1 @@ -92,6 +101,7 @@ function create () { sudo ip link set dev ${netdev} vf ${vfid} mac ${mac} sudo ip link set dev ${netdev} vf ${vfid} trust on sudo ip link set dev ${netdev} vf ${vfid} spoofchk off + pci-bind ${vf} vfio-pci done [ $(cat ${path}/sriov_numvfs) -gt 0 ] && show_vfs ${path} ${netdev} diff --git a/extras/vom/vom/CMakeLists.txt b/extras/vom/vom/CMakeLists.txt index 475672a001a7..cebf152d1f82 100644 --- a/extras/vom/vom/CMakeLists.txt +++ b/extras/vom/vom/CMakeLists.txt @@ -146,6 +146,8 @@ list(APPEND VOM_SOURCES ip_unnumbered.cpp l2_binding_cmds.cpp l2_binding.cpp + l2_vtr.cpp + l2_vtr_cmds.cpp l2_xconnect_cmds.cpp l2_xconnect.cpp l3_binding_cmds.cpp @@ -252,6 +254,7 @@ list(APPEND VOM_HEADERS ip_punt_redirect.hpp ip_unnumbered.hpp l2_binding.hpp + l2_vtr.hpp l2_xconnect.hpp l3_binding.hpp lldp_binding.hpp @@ -279,17 +282,11 @@ list(APPEND VOM_HEADERS ) add_definitions(-Wall -Werror -std=gnu++11) - -add_vpp_library(vom - SOURCES ${VOM_SOURCES} - - INSTALL_HEADERS ${VOM_HEADERS} - - LINK_LIBRARIES ${VPPAPICLIENT_LIB} ${VAPICLIENT_LIB} Threads::Threads - ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY} m rt - - COMPONENT libvom -) +add_library(vom SHARED ${VOM_SOURCES}) +target_link_libraries(vom ${VPPAPICLIENT_LIB} ${VAPICLIENT_LIB} Threads::Threads + ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY} m rt) +install(TARGETS vom DESTINATION lib COMPONENT vom) +install(FILES ${VOM_HEADERS} DESTINATION include/vom COMPONENT vom) if (Boost_FOUND) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") diff --git a/extras/vom/vom/api_types.cpp b/extras/vom/vom/api_types.cpp index 4a81a41daa99..ea75d7fd8ee4 100644 --- a/extras/vom/vom/api_types.cpp +++ b/extras/vom/vom/api_types.cpp @@ -17,6 +17,45 @@ namespace VOM { +vapi_enum_ip_neighbor_flags +to_api(const neighbour::flags_t& f) +{ + vapi_enum_ip_neighbor_flags out = IP_API_NEIGHBOR_FLAG_NONE; + + if (f & neighbour::flags_t::STATIC) + out = static_cast(out | + IP_API_NEIGHBOR_FLAG_STATIC); + if (f & neighbour::flags_t::NO_FIB_ENTRY) + out = static_cast( + out | IP_API_NEIGHBOR_FLAG_NO_FIB_ENTRY); + + return (out); +} + +const neighbour::flags_t +from_api(vapi_enum_ip_neighbor_flags f) +{ + neighbour::flags_t out = neighbour::flags_t::NONE; + + if (f & IP_API_NEIGHBOR_FLAG_STATIC) + out |= neighbour::flags_t::STATIC; + if (f & IP_API_NEIGHBOR_FLAG_NO_FIB_ENTRY) + out |= neighbour::flags_t::NO_FIB_ENTRY; + + return out; +} + +void +to_api(const boost::asio::ip::address_v4& a, vapi_type_ip4_address& v) +{ + std::copy_n(std::begin(a.to_bytes()), a.to_bytes().size(), v); +} +void +to_api(const boost::asio::ip::address_v6& a, vapi_type_ip6_address& v) +{ + std::copy_n(std::begin(a.to_bytes()), a.to_bytes().size(), v); +} + void to_api(const ip_address_t& a, vapi_type_address& v) { @@ -43,10 +82,24 @@ to_api(const ip_address_t& a, } } -void -to_api(const boost::asio::ip::address& a, vapi_type_ip4_address& v) +boost::asio::ip::address_v6 +from_api(const vapi_type_ip6_address& v) { - memcpy(v, a.to_v4().to_bytes().data(), 4); + std::array a; + std::copy(v, v + 16, std::begin(a)); + boost::asio::ip::address_v6 v6(a); + + return v6; +} + +boost::asio::ip::address_v4 +from_api(const vapi_type_ip4_address& v) +{ + std::array a; + std::copy(v, v + 4, std::begin(a)); + boost::asio::ip::address_v4 v4(a); + + return v4; } ip_address_t diff --git a/extras/vom/vom/api_types.hpp b/extras/vom/vom/api_types.hpp index 5856c22d339b..789bbb19401b 100644 --- a/extras/vom/vom/api_types.hpp +++ b/extras/vom/vom/api_types.hpp @@ -14,6 +14,7 @@ */ #include +#include #include #include @@ -23,14 +24,19 @@ namespace VOM { typedef boost::asio::ip::address ip_address_t; +vapi_enum_ip_neighbor_flags to_api(const neighbour::flags_t& f); +const neighbour::flags_t from_api(vapi_enum_ip_neighbor_flags f); + void to_api(const ip_address_t& a, vapi_type_address& v); -void to_api(const boost::asio::ip::address& a, vapi_type_ip4_address& v); +void to_api(const boost::asio::ip::address_v4& a, vapi_type_ip4_address& v); +void to_api(const boost::asio::ip::address_v6& a, vapi_type_ip6_address& v); void to_api(const boost::asio::ip::address& a, vapi_union_address_union& u, vapi_enum_address_family& af); +boost::asio::ip::address_v4 from_api(const vapi_type_ip4_address& v); +boost::asio::ip::address_v6 from_api(const vapi_type_ip6_address& v); ip_address_t from_api(const vapi_type_address& v); -ip_address_t from_api(const vapi_type_ip4_address& v); ip_address_t from_api(const vapi_union_address_union& u, vapi_enum_address_family af); diff --git a/extras/vom/vom/arp_proxy_config.cpp b/extras/vom/vom/arp_proxy_config.cpp index 275d9f9bf74d..e21fd4a97b2b 100644 --- a/extras/vom/vom/arp_proxy_config.cpp +++ b/extras/vom/vom/arp_proxy_config.cpp @@ -14,6 +14,7 @@ */ #include "vom/arp_proxy_config.hpp" +#include "vom/api_types.hpp" #include "vom/arp_proxy_config_cmds.hpp" #include "vom/prefix.hpp" #include "vom/singular_db_funcs.hpp" @@ -125,8 +126,8 @@ arp_proxy_config::event_handler::handle_populate(const client_db::key_t& key) for (auto& record : *cmd) { auto& payload = record.get_payload(); - boost::asio::ip::address lo = from_bytes(0, payload.proxy.low_address); - boost::asio::ip::address hi = from_bytes(0, payload.proxy.hi_address); + boost::asio::ip::address lo = from_api(payload.proxy.low); + boost::asio::ip::address hi = from_api(payload.proxy.hi); arp_proxy_config ap(lo.to_v4(), hi.to_v4()); OM::commit(key, ap); diff --git a/extras/vom/vom/arp_proxy_config_cmds.cpp b/extras/vom/vom/arp_proxy_config_cmds.cpp index deb52c459b72..8185c0910d27 100644 --- a/extras/vom/vom/arp_proxy_config_cmds.cpp +++ b/extras/vom/vom/arp_proxy_config_cmds.cpp @@ -14,6 +14,7 @@ */ #include "vom/arp_proxy_config_cmds.hpp" +#include "vom/api_types.hpp" namespace VOM { namespace arp_proxy_config_cmds { @@ -41,10 +42,8 @@ config_cmd::issue(connection& con) auto& payload = req.get_request().get_payload(); payload.is_add = 1; - std::copy_n(std::begin(m_low.to_bytes()), m_low.to_bytes().size(), - payload.proxy.low_address); - std::copy_n(std::begin(m_high.to_bytes()), m_high.to_bytes().size(), - payload.proxy.hi_address); + to_api(m_low, payload.proxy.low); + to_api(m_high, payload.proxy.hi); VAPI_CALL(req.execute()); @@ -86,10 +85,8 @@ unconfig_cmd::issue(connection& con) auto& payload = req.get_request().get_payload(); payload.is_add = 0; - std::copy_n(std::begin(m_low.to_bytes()), m_low.to_bytes().size(), - payload.proxy.low_address); - std::copy_n(std::begin(m_high.to_bytes()), m_high.to_bytes().size(), - payload.proxy.hi_address); + to_api(m_low, payload.proxy.low); + to_api(m_high, payload.proxy.hi); VAPI_CALL(req.execute()); diff --git a/extras/vom/vom/enum_base.hpp b/extras/vom/vom/enum_base.hpp index 6756e2498de1..015410a57c58 100644 --- a/extras/vom/vom/enum_base.hpp +++ b/extras/vom/vom/enum_base.hpp @@ -51,6 +51,27 @@ class enum_base return (*this); } + /** + * bitwise or assignemnt + */ + enum_base& operator|=(const enum_base& e) + { + m_value += e.m_value; + m_desc += ":" + e.m_desc; + + return *this; + } + + /** + * bitwise or + */ + enum_base operator|(const enum_base& e1) const + { + enum_base e = *this; + e |= e1; + return e; + } + /** * Comparison operator */ diff --git a/extras/vom/vom/gbp_bridge_domain.cpp b/extras/vom/vom/gbp_bridge_domain.cpp index 03be83ddb9f9..e370a6abb0ce 100644 --- a/extras/vom/vom/gbp_bridge_domain.cpp +++ b/extras/vom/vom/gbp_bridge_domain.cpp @@ -21,6 +21,16 @@ namespace VOM { +const gbp_bridge_domain::flags_t gbp_bridge_domain::flags_t::NONE(0, "none"); +const gbp_bridge_domain::flags_t gbp_bridge_domain::flags_t::DO_NOT_LEARN( + 0, + "do-not-learn"); + +gbp_bridge_domain::flags_t::flags_t(int v, const std::string& s) + : enum_base(v, s) +{ +} + /** * A DB of al the interfaces, key on the name */ @@ -32,40 +42,56 @@ gbp_bridge_domain::event_handler gbp_bridge_domain::m_evh; * Construct a new object matching the desried state */ gbp_bridge_domain::gbp_bridge_domain(const bridge_domain& bd, - const interface& bvi) + const interface& bvi, + const flags_t& flags) : m_id(bd.id()) , m_bd(bd.singular()) , m_bvi(bvi.singular()) + , m_uu_fwd() + , m_bm_flood() + , m_flags(flags) { } gbp_bridge_domain::gbp_bridge_domain(const bridge_domain& bd, const interface& bvi, - const interface& uu_fwd) + const interface& uu_fwd, + const interface& bm_flood, + const flags_t& flags) : m_id(bd.id()) , m_bd(bd.singular()) , m_bvi(bvi.singular()) , m_uu_fwd(uu_fwd.singular()) + , m_bm_flood(bm_flood.singular()) + , m_flags(flags) { } gbp_bridge_domain::gbp_bridge_domain(const bridge_domain& bd, const std::shared_ptr bvi, - const std::shared_ptr uu_fwd) + const std::shared_ptr uu_fwd, + const std::shared_ptr bm_flood, + const flags_t& flags) : m_id(bd.id()) , m_bd(bd.singular()) - , m_bvi(bvi->singular()) - , m_uu_fwd(uu_fwd->singular()) + , m_bvi(bvi) + , m_uu_fwd(uu_fwd) + , m_bm_flood(bm_flood) + , m_flags(flags) { } gbp_bridge_domain::gbp_bridge_domain(const bridge_domain& bd, const interface& bvi, - const std::shared_ptr uu_fwd) + const std::shared_ptr uu_fwd, + const std::shared_ptr bm_flood, + const flags_t& flags) : m_id(bd.id()) , m_bd(bd.singular()) , m_bvi(bvi.singular()) - , m_uu_fwd(uu_fwd->singular()) + , m_uu_fwd(uu_fwd) + , m_bm_flood(bm_flood) + , m_flags(flags) { } @@ -74,6 +100,8 @@ gbp_bridge_domain::gbp_bridge_domain(const gbp_bridge_domain& bd) , m_bd(bd.m_bd) , m_bvi(bd.m_bvi) , m_uu_fwd(bd.m_uu_fwd) + , m_bm_flood(bd.m_bm_flood) + , m_flags(bd.m_flags) { } @@ -90,13 +118,13 @@ gbp_bridge_domain::id() const } const std::shared_ptr -gbp_bridge_domain::get_bridge_domain() +gbp_bridge_domain::get_bridge_domain() const { return m_bd; } const std::shared_ptr -gbp_bridge_domain::get_bvi() +gbp_bridge_domain::get_bvi() const { return m_bvi; } @@ -120,6 +148,13 @@ gbp_bridge_domain::operator==(const gbp_bridge_domain& b) const else equal = false; + if (m_bm_flood && b.m_bm_flood) + equal &= (m_bm_flood->key() == b.m_bm_flood->key()); + else if (!m_bm_flood && !b.m_bm_flood) + ; + else + equal = false; + return ((m_bd->key() == b.m_bd->key()) && equal); } @@ -138,7 +173,8 @@ gbp_bridge_domain::replay() if (rc_t::OK == m_id.rc()) { HW::enqueue(new gbp_bridge_domain_cmds::create_cmd( m_id, (m_bvi ? m_bvi->handle() : handle_t::INVALID), - (m_uu_fwd ? m_uu_fwd->handle() : handle_t::INVALID))); + (m_uu_fwd ? m_uu_fwd->handle() : handle_t::INVALID), + (m_bm_flood ? m_bm_flood->handle() : handle_t::INVALID), m_flags)); } } @@ -181,7 +217,8 @@ gbp_bridge_domain::update(const gbp_bridge_domain& desired) if (rc_t::OK != m_id.rc()) { HW::enqueue(new gbp_bridge_domain_cmds::create_cmd( m_id, (m_bvi ? m_bvi->handle() : handle_t::INVALID), - (m_uu_fwd ? m_uu_fwd->handle() : handle_t::INVALID))); + (m_uu_fwd ? m_uu_fwd->handle() : handle_t::INVALID), + (m_bm_flood ? m_bm_flood->handle() : handle_t::INVALID), m_flags)); } } @@ -220,11 +257,13 @@ gbp_bridge_domain::event_handler::handle_populate(const client_db::key_t& key) std::shared_ptr uu_fwd = interface::find(payload.bd.uu_fwd_sw_if_index); + std::shared_ptr bm_flood = + interface::find(payload.bd.bm_flood_sw_if_index); std::shared_ptr bvi = interface::find(payload.bd.bvi_sw_if_index); - if (uu_fwd && bvi) { - gbp_bridge_domain bd(payload.bd.bd_id, bvi, uu_fwd); + if (uu_fwd && bm_flood && bvi) { + gbp_bridge_domain bd(payload.bd.bd_id, bvi, uu_fwd, bm_flood); OM::commit(key, bd); VOM_LOG(log_level_t::DEBUG) << "dump: " << bd.to_string(); } else if (bvi) { diff --git a/extras/vom/vom/gbp_bridge_domain.hpp b/extras/vom/vom/gbp_bridge_domain.hpp index c86c53ebf24e..2c470aa3a6ec 100644 --- a/extras/vom/vom/gbp_bridge_domain.hpp +++ b/extras/vom/vom/gbp_bridge_domain.hpp @@ -34,19 +34,39 @@ class gbp_bridge_domain : public object_base */ typedef bridge_domain::key_t key_t; + struct flags_t : enum_base + { + const static flags_t NONE; + const static flags_t DO_NOT_LEARN; + + static const flags_t& from_vpp(int i); + + private: + flags_t(int v, const std::string& s); + flags_t(); + }; + /** * Construct a GBP bridge_domain */ - gbp_bridge_domain(const bridge_domain& bd, const interface& bvi); gbp_bridge_domain(const bridge_domain& bd, const interface& bvi, - const interface& uu_fwd); + const flags_t& flags = flags_t::NONE); + gbp_bridge_domain(const bridge_domain& bd, + const interface& bvi, + const interface& uu_fwd, + const interface& bm_flood, + const flags_t& flags = flags_t::NONE); gbp_bridge_domain(const bridge_domain& bd, const std::shared_ptr bvi, - const std::shared_ptr uu_fwd); + const std::shared_ptr uu_fwd, + const std::shared_ptr bm_flood, + const flags_t& flags = flags_t::NONE); gbp_bridge_domain(const bridge_domain& bd, const interface& bvi, - const std::shared_ptr uu_fwd); + const std::shared_ptr uu_fwd, + const std::shared_ptr bm_flood, + const flags_t& flags = flags_t::NONE); /** * Copy Construct @@ -98,8 +118,8 @@ class gbp_bridge_domain : public object_base */ std::string to_string() const; - const std::shared_ptr get_bridge_domain(); - const std::shared_ptr get_bvi(); + const std::shared_ptr get_bridge_domain() const; + const std::shared_ptr get_bvi() const; private: /** @@ -171,6 +191,8 @@ class gbp_bridge_domain : public object_base std::shared_ptr m_bd; std::shared_ptr m_bvi; std::shared_ptr m_uu_fwd; + std::shared_ptr m_bm_flood; + const flags_t& m_flags; /** * A map of all bridge_domains diff --git a/extras/vom/vom/gbp_bridge_domain_cmds.cpp b/extras/vom/vom/gbp_bridge_domain_cmds.cpp index 60f7cddd9f36..f5a6888b8a7f 100644 --- a/extras/vom/vom/gbp_bridge_domain_cmds.cpp +++ b/extras/vom/vom/gbp_bridge_domain_cmds.cpp @@ -20,10 +20,14 @@ namespace gbp_bridge_domain_cmds { create_cmd::create_cmd(HW::item& item, const handle_t bvi, - const handle_t uu_fwd) + const handle_t uu_fwd, + const handle_t bm_flood, + const gbp_bridge_domain::flags_t& flags) : rpc_cmd(item) , m_bvi(bvi) , m_uu_fwd(uu_fwd) + , m_bm_flood(bm_flood) + , m_flags(flags) { } @@ -31,7 +35,8 @@ bool create_cmd::operator==(const create_cmd& other) const { return ((m_hw_item.data() == other.m_hw_item.data()) && - (m_bvi == other.m_bvi) && (m_uu_fwd == other.m_uu_fwd)); + (m_bvi == other.m_bvi) && (m_uu_fwd == other.m_uu_fwd) && + (m_bm_flood == other.m_bm_flood) && (m_flags == other.m_flags)); } rc_t @@ -44,6 +49,11 @@ create_cmd::issue(connection& con) payload.bd.bd_id = m_hw_item.data(); payload.bd.bvi_sw_if_index = m_bvi.value(); payload.bd.uu_fwd_sw_if_index = m_uu_fwd.value(); + payload.bd.bm_flood_sw_if_index = m_bm_flood.value(); + + payload.bd.flags = GBP_BD_API_FLAG_NONE; + if (gbp_bridge_domain::flags_t::DO_NOT_LEARN == m_flags) + payload.bd.flags = GBP_BD_API_FLAG_DO_NOT_LEARN; VAPI_CALL(req.execute()); diff --git a/extras/vom/vom/gbp_bridge_domain_cmds.hpp b/extras/vom/vom/gbp_bridge_domain_cmds.hpp index e7c501fc598b..a4fd0d51b45f 100644 --- a/extras/vom/vom/gbp_bridge_domain_cmds.hpp +++ b/extras/vom/vom/gbp_bridge_domain_cmds.hpp @@ -36,7 +36,9 @@ class create_cmd */ create_cmd(HW::item& item, const handle_t bvi, - const handle_t uu_fwd); + const handle_t uu_fwd, + const handle_t bm_flood, + const gbp_bridge_domain::flags_t& flags); /** * Issue the command to VPP/HW @@ -55,6 +57,8 @@ class create_cmd private: const handle_t m_bvi; const handle_t m_uu_fwd; + const handle_t m_bm_flood; + const gbp_bridge_domain::flags_t& m_flags; }; /** diff --git a/extras/vom/vom/gbp_endpoint_group.cpp b/extras/vom/vom/gbp_endpoint_group.cpp index 5f09ed1ffc30..44bdcdb3524a 100644 --- a/extras/vom/vom/gbp_endpoint_group.cpp +++ b/extras/vom/vom/gbp_endpoint_group.cpp @@ -30,6 +30,7 @@ gbp_endpoint_group::gbp_endpoint_group(epg_id_t epg_id, const gbp_bridge_domain& bd) : m_hw(false) , m_epg_id(epg_id) + , m_sclass(0xffff) , m_itf(itf.singular()) , m_rd(rd.singular()) , m_bd(bd.singular()) @@ -41,6 +42,34 @@ gbp_endpoint_group::gbp_endpoint_group(epg_id_t epg_id, const gbp_bridge_domain& bd) : m_hw(false) , m_epg_id(epg_id) + , m_sclass(0xffff) + , m_itf() + , m_rd(rd.singular()) + , m_bd(bd.singular()) +{ +} + +gbp_endpoint_group::gbp_endpoint_group(epg_id_t epg_id, + uint16_t sclass, + const interface& itf, + const gbp_route_domain& rd, + const gbp_bridge_domain& bd) + : m_hw(false) + , m_epg_id(epg_id) + , m_sclass(sclass) + , m_itf(itf.singular()) + , m_rd(rd.singular()) + , m_bd(bd.singular()) +{ +} + +gbp_endpoint_group::gbp_endpoint_group(epg_id_t epg_id, + uint16_t sclass, + const gbp_route_domain& rd, + const gbp_bridge_domain& bd) + : m_hw(false) + , m_epg_id(epg_id) + , m_sclass(sclass) , m_itf() , m_rd(rd.singular()) , m_bd(bd.singular()) @@ -50,6 +79,7 @@ gbp_endpoint_group::gbp_endpoint_group(epg_id_t epg_id, gbp_endpoint_group::gbp_endpoint_group(const gbp_endpoint_group& epg) : m_hw(epg.m_hw) , m_epg_id(epg.m_epg_id) + , m_sclass(epg.m_sclass) , m_itf(epg.m_itf) , m_rd(epg.m_rd) , m_bd(epg.m_bd) @@ -77,8 +107,8 @@ gbp_endpoint_group::id() const bool gbp_endpoint_group::operator==(const gbp_endpoint_group& gg) const { - return (key() == gg.key() && (m_itf == gg.m_itf) && (m_rd == gg.m_rd) && - (m_bd == gg.m_bd)); + return (key() == gg.key() && (m_sclass == gg.m_sclass) && + (m_itf == gg.m_itf) && (m_rd == gg.m_rd) && (m_bd == gg.m_bd)); } void @@ -95,7 +125,7 @@ gbp_endpoint_group::replay() { if (m_hw) { HW::enqueue(new gbp_endpoint_group_cmds::create_cmd( - m_hw, m_epg_id, m_bd->id(), m_rd->id(), + m_hw, m_epg_id, m_sclass, m_bd->id(), m_rd->id(), (m_itf ? m_itf->handle() : handle_t::INVALID))); } } @@ -105,8 +135,9 @@ gbp_endpoint_group::to_string() const { std::ostringstream s; s << "gbp-endpoint-group:[" - << "epg:" << m_epg_id << ", " << (m_itf ? m_itf->to_string() : "NULL") - << ", " << m_bd->to_string() << ", " << m_rd->to_string() << "]"; + << "epg:" << m_epg_id << ", sclass:" << m_sclass << ", " + << (m_itf ? m_itf->to_string() : "NULL") << ", " << m_bd->to_string() + << ", " << m_rd->to_string() << "]"; return (s.str()); } @@ -116,7 +147,7 @@ gbp_endpoint_group::update(const gbp_endpoint_group& r) { if (rc_t::OK != m_hw.rc()) { HW::enqueue(new gbp_endpoint_group_cmds::create_cmd( - m_hw, m_epg_id, m_bd->id(), m_rd->id(), + m_hw, m_epg_id, m_sclass, m_bd->id(), m_rd->id(), (m_itf ? m_itf->handle() : handle_t::INVALID))); } } @@ -146,13 +177,13 @@ gbp_endpoint_group::dump(std::ostream& os) } const std::shared_ptr -gbp_endpoint_group::get_route_domain() +gbp_endpoint_group::get_route_domain() const { return m_rd; } const std::shared_ptr -gbp_endpoint_group::get_bridge_domain() +gbp_endpoint_group::get_bridge_domain() const { return m_bd; } @@ -194,12 +225,13 @@ gbp_endpoint_group::event_handler::handle_populate(const client_db::key_t& key) << payload.epg.bd_id << "]"; if (itf && bd && rd) { - gbp_endpoint_group gbpe(payload.epg.epg_id, *itf, *rd, *bd); + gbp_endpoint_group gbpe(payload.epg.epg_id, payload.epg.sclass, *itf, *rd, + *bd); OM::commit(key, gbpe); VOM_LOG(log_level_t::DEBUG) << "read: " << gbpe.to_string(); } else if (bd && rd) { - gbp_endpoint_group gbpe(payload.epg.epg_id, *rd, *bd); + gbp_endpoint_group gbpe(payload.epg.epg_id, payload.epg.sclass, *rd, *bd); OM::commit(key, gbpe); VOM_LOG(log_level_t::DEBUG) << "read: " << gbpe.to_string(); diff --git a/extras/vom/vom/gbp_endpoint_group.hpp b/extras/vom/vom/gbp_endpoint_group.hpp index ed0d8d93cdee..b60b1553e3fc 100644 --- a/extras/vom/vom/gbp_endpoint_group.hpp +++ b/extras/vom/vom/gbp_endpoint_group.hpp @@ -51,6 +51,15 @@ class gbp_endpoint_group : public object_base gbp_endpoint_group(epg_id_t epg_id, const gbp_route_domain& rd, const gbp_bridge_domain& bd); + gbp_endpoint_group(epg_id_t epg_id, + uint16_t sclass, + const interface& itf, + const gbp_route_domain& rd, + const gbp_bridge_domain& bd); + gbp_endpoint_group(epg_id_t epg_id, + uint16_t sclass, + const gbp_route_domain& rd, + const gbp_bridge_domain& bd); /** * Copy Construct @@ -102,8 +111,8 @@ class gbp_endpoint_group : public object_base */ epg_id_t id() const; - const std::shared_ptr get_route_domain(); - const std::shared_ptr get_bridge_domain(); + const std::shared_ptr get_route_domain() const; + const std::shared_ptr get_bridge_domain() const; private: /** @@ -177,6 +186,11 @@ class gbp_endpoint_group : public object_base */ epg_id_t m_epg_id; + /** + * The SClass on the wire + */ + uint16_t m_sclass; + /** * The uplink interface for the endpoint group */ diff --git a/extras/vom/vom/gbp_endpoint_group_cmds.cpp b/extras/vom/vom/gbp_endpoint_group_cmds.cpp index 45523a6326ef..8d0e48e5c2f0 100644 --- a/extras/vom/vom/gbp_endpoint_group_cmds.cpp +++ b/extras/vom/vom/gbp_endpoint_group_cmds.cpp @@ -20,11 +20,13 @@ namespace gbp_endpoint_group_cmds { create_cmd::create_cmd(HW::item& item, epg_id_t epg_id, + uint16_t sclass, uint32_t bd_id, route::table_id_t rd_id, const handle_t& itf) : rpc_cmd(item) , m_epg_id(epg_id) + , m_sclass(sclass) , m_bd_id(bd_id) , m_rd_id(rd_id) , m_itf(itf) @@ -46,6 +48,7 @@ create_cmd::issue(connection& con) auto& payload = req.get_request().get_payload(); payload.epg.uplink_sw_if_index = m_itf.value(); payload.epg.epg_id = m_epg_id; + payload.epg.sclass = m_sclass; payload.epg.bd_id = m_bd_id; payload.epg.rd_id = m_rd_id; diff --git a/extras/vom/vom/gbp_endpoint_group_cmds.hpp b/extras/vom/vom/gbp_endpoint_group_cmds.hpp index 39f69e081efb..2294629ee160 100644 --- a/extras/vom/vom/gbp_endpoint_group_cmds.hpp +++ b/extras/vom/vom/gbp_endpoint_group_cmds.hpp @@ -35,6 +35,7 @@ class create_cmd : public rpc_cmd, vapi::Gbp_endpoint_group_add> */ create_cmd(HW::item& item, epg_id_t epg_id, + uint16_t sclass, uint32_t bd_id, route::table_id_t rd_id, const handle_t& itf); @@ -56,6 +57,7 @@ class create_cmd : public rpc_cmd, vapi::Gbp_endpoint_group_add> private: const epg_id_t m_epg_id; + const uint16_t m_sclass; const uint32_t m_bd_id; const route::table_id_t m_rd_id; const handle_t m_itf; diff --git a/extras/vom/vom/gbp_route_domain.cpp b/extras/vom/vom/gbp_route_domain.cpp index 8e44db5a4f88..96216a6cab50 100644 --- a/extras/vom/vom/gbp_route_domain.cpp +++ b/extras/vom/vom/gbp_route_domain.cpp @@ -77,7 +77,7 @@ gbp_route_domain::id() const } const std::shared_ptr -gbp_route_domain::get_route_domain() +gbp_route_domain::get_route_domain() const { return m_rd; } diff --git a/extras/vom/vom/gbp_route_domain.hpp b/extras/vom/vom/gbp_route_domain.hpp index ff13d1d6e05e..0d133b377a57 100644 --- a/extras/vom/vom/gbp_route_domain.hpp +++ b/extras/vom/vom/gbp_route_domain.hpp @@ -96,7 +96,7 @@ class gbp_route_domain : public object_base */ std::string to_string() const; - const std::shared_ptr get_route_domain(); + const std::shared_ptr get_route_domain() const; private: /** diff --git a/extras/vom/vom/gbp_vxlan_cmds.cpp b/extras/vom/vom/gbp_vxlan_cmds.cpp index 90a77fbb8960..a3565902b5d9 100644 --- a/extras/vom/vom/gbp_vxlan_cmds.cpp +++ b/extras/vom/vom/gbp_vxlan_cmds.cpp @@ -15,8 +15,6 @@ #include "vom/gbp_vxlan_cmds.hpp" -#include - namespace VOM { namespace gbp_vxlan_cmds { create_cmd::create_cmd(HW::item& item, diff --git a/extras/vom/vom/interface_cmds.hpp b/extras/vom/vom/interface_cmds.hpp index 13a47e6a6d7f..218d4b083d11 100644 --- a/extras/vom/vom/interface_cmds.hpp +++ b/extras/vom/vom/interface_cmds.hpp @@ -25,7 +25,6 @@ #include #include -#include #include #include diff --git a/extras/vom/vom/interface_factory.hpp b/extras/vom/vom/interface_factory.hpp index c90c7942c521..fef2b638c183 100644 --- a/extras/vom/vom/interface_factory.hpp +++ b/extras/vom/vom/interface_factory.hpp @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/extras/vom/vom/l2_binding.cpp b/extras/vom/vom/l2_binding.cpp index 9e2c7228dd0c..5f0c7a79f94d 100644 --- a/extras/vom/vom/l2_binding.cpp +++ b/extras/vom/vom/l2_binding.cpp @@ -15,6 +15,7 @@ #include "vom/l2_binding.hpp" #include "vom/l2_binding_cmds.hpp" +#include "vom/l2_vtr_cmds.hpp" #include "vom/singular_db_funcs.hpp" namespace VOM { @@ -25,36 +26,6 @@ singular_db l2_binding::m_db; l2_binding::event_handler l2_binding::m_evh; -/* - * Make sure these are in sync with the smae enum in VPP - */ -const l2_binding::l2_vtr_op_t l2_binding::l2_vtr_op_t::L2_VTR_DISABLED( - 0, - "disabled"); -const l2_binding::l2_vtr_op_t l2_binding::l2_vtr_op_t::L2_VTR_PUSH_1(1, - "push-1"); -const l2_binding::l2_vtr_op_t l2_binding::l2_vtr_op_t::L2_VTR_PUSH_2(2, - "push-2"); -const l2_binding::l2_vtr_op_t l2_binding::l2_vtr_op_t::L2_VTR_POP_1(3, "pop-1"); -const l2_binding::l2_vtr_op_t l2_binding::l2_vtr_op_t::L2_VTR_POP_2(4, "pop-2"); -const l2_binding::l2_vtr_op_t l2_binding::l2_vtr_op_t::L2_VTR_TRANSLATE_1_1( - 5, - "translate-1-1"); -const l2_binding::l2_vtr_op_t l2_binding::l2_vtr_op_t::L2_VTR_TRANSLATE_1_2( - 6, - "translate-1-2"); -const l2_binding::l2_vtr_op_t l2_binding::l2_vtr_op_t::L2_VTR_TRANSLATE_2_1( - 7, - "translate-2-1"); -const l2_binding::l2_vtr_op_t l2_binding::l2_vtr_op_t::L2_VTR_TRANSLATE_2_2( - 5, - "translate-2-2"); - -l2_binding::l2_vtr_op_t::l2_vtr_op_t(int v, const std::string s) - : enum_base(v, s) -{ -} - const l2_binding::l2_port_type_t l2_binding::l2_port_type_t::L2_PORT_TYPE_NORMAL(0, "normal"); const l2_binding::l2_port_type_t l2_binding::l2_port_type_t::L2_PORT_TYPE_BVI( @@ -148,8 +119,7 @@ l2_binding::replay() } if (m_vtr_op && handle_t::INVALID != m_itf->handle()) { - HW::enqueue(new l2_binding_cmds::set_vtr_op_cmd(m_vtr_op, m_itf->handle(), - m_vtr_op_tag)); + HW::enqueue(new set_vtr_op_cmd(m_vtr_op, m_itf->handle(), m_vtr_op_tag)); } } @@ -204,8 +174,7 @@ l2_binding::update(const l2_binding& desired) * set the VTR operation if request */ if (m_vtr_op.update(desired.m_vtr_op)) { - HW::enqueue(new l2_binding_cmds::set_vtr_op_cmd(m_vtr_op, m_itf->handle(), - m_vtr_op_tag)); + HW::enqueue(new set_vtr_op_cmd(m_vtr_op, m_itf->handle(), m_vtr_op_tag)); } } diff --git a/extras/vom/vom/l2_binding.hpp b/extras/vom/vom/l2_binding.hpp index 0a30a0cc894c..19082892f5b8 100644 --- a/extras/vom/vom/l2_binding.hpp +++ b/extras/vom/vom/l2_binding.hpp @@ -20,6 +20,7 @@ #include "vom/hw.hpp" #include "vom/inspect.hpp" #include "vom/interface.hpp" +#include "vom/l2_vtr.hpp" #include "vom/object_base.hpp" #include "vom/om.hpp" #include "vom/singular_db.hpp" @@ -37,25 +38,6 @@ class l2_binding : public object_base */ typedef interface::key_t key_t; - struct l2_vtr_op_t : public enum_base - { - l2_vtr_op_t(const l2_vtr_op_t& l) = default; - ~l2_vtr_op_t() = default; - - const static l2_vtr_op_t L2_VTR_DISABLED; - const static l2_vtr_op_t L2_VTR_PUSH_1; - const static l2_vtr_op_t L2_VTR_PUSH_2; - const static l2_vtr_op_t L2_VTR_POP_1; - const static l2_vtr_op_t L2_VTR_POP_2; - const static l2_vtr_op_t L2_VTR_TRANSLATE_1_1; - const static l2_vtr_op_t L2_VTR_TRANSLATE_1_2; - const static l2_vtr_op_t L2_VTR_TRANSLATE_2_1; - const static l2_vtr_op_t L2_VTR_TRANSLATE_2_2; - - private: - l2_vtr_op_t(int v, const std::string s); - }; - struct l2_port_type_t : public enum_base { l2_port_type_t(const l2_port_type_t& l) = default; diff --git a/extras/vom/vom/l2_binding_cmds.cpp b/extras/vom/vom/l2_binding_cmds.cpp index 8769444032c3..70413adb411c 100644 --- a/extras/vom/vom/l2_binding_cmds.cpp +++ b/extras/vom/vom/l2_binding_cmds.cpp @@ -121,50 +121,6 @@ unbind_cmd::to_string() const return (s.str()); } - -set_vtr_op_cmd::set_vtr_op_cmd(HW::item& item, - const handle_t& itf, - uint16_t tag) - : rpc_cmd(item) - , m_itf(itf) - , m_tag(tag) -{ -} - -bool -set_vtr_op_cmd::operator==(const set_vtr_op_cmd& other) const -{ - return ( - (m_hw_item.data() == other.m_hw_item.data() && m_itf == other.m_itf) && - (m_tag == other.m_tag)); -} - -rc_t -set_vtr_op_cmd::issue(connection& con) -{ - msg_t req(con.ctx(), std::ref(*this)); - - auto& payload = req.get_request().get_payload(); - payload.sw_if_index = m_itf.value(); - payload.vtr_op = m_hw_item.data().value(); - payload.push_dot1q = 1; - payload.tag1 = m_tag; - - VAPI_CALL(req.execute()); - - return (wait()); -} - -std::string -set_vtr_op_cmd::to_string() const -{ - std::ostringstream s; - s << "L2-set-vtr-op: " << m_hw_item.to_string() - << " itf:" << m_itf.to_string() << " tag:" << m_tag; - - return (s.str()); -} - }; // namespace l2_binding_cmds }; // namespace VOM diff --git a/extras/vom/vom/l2_binding_cmds.hpp b/extras/vom/vom/l2_binding_cmds.hpp index e864f9d01492..45f90b0787f0 100644 --- a/extras/vom/vom/l2_binding_cmds.hpp +++ b/extras/vom/vom/l2_binding_cmds.hpp @@ -117,47 +117,6 @@ class unbind_cmd const l2_binding::l2_port_type_t& m_port_type; }; -/** - * A cmd class sets the VTR operation - */ -class set_vtr_op_cmd : public rpc_cmd, - vapi::L2_interface_vlan_tag_rewrite> -{ -public: - /** - * Constructor - */ - set_vtr_op_cmd(HW::item& item, - const handle_t& itf, - uint16_t tag); - - /** - * Issue the command to VPP/HW - */ - rc_t issue(connection& con); - - /** - * convert to string format for debug purposes - */ - std::string to_string() const; - - /** - * Comparison operator - only used for UT - */ - bool operator==(const set_vtr_op_cmd& i) const; - -private: - /** - * The interface to bind - */ - const handle_t m_itf; - - /** - * The tag for the operation - */ - uint16_t m_tag; -}; - }; // namespace l2_binding_cmds }; // namespace VOM diff --git a/extras/vom/vom/l2_vtr.cpp b/extras/vom/vom/l2_vtr.cpp new file mode 100644 index 000000000000..707a2239ebe6 --- /dev/null +++ b/extras/vom/vom/l2_vtr.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "vom/l2_vtr.hpp" + +namespace VOM { + +/* + * Make sure these are in sync with the smae enum in VPP + */ +const l2_vtr_op_t l2_vtr_op_t::L2_VTR_DISABLED(0, "disabled"); +const l2_vtr_op_t l2_vtr_op_t::L2_VTR_PUSH_1(1, "push-1"); +const l2_vtr_op_t l2_vtr_op_t::L2_VTR_PUSH_2(2, "push-2"); +const l2_vtr_op_t l2_vtr_op_t::L2_VTR_POP_1(3, "pop-1"); +const l2_vtr_op_t l2_vtr_op_t::L2_VTR_POP_2(4, "pop-2"); +const l2_vtr_op_t l2_vtr_op_t::L2_VTR_TRANSLATE_1_1(5, "translate-1-1"); +const l2_vtr_op_t l2_vtr_op_t::L2_VTR_TRANSLATE_1_2(6, "translate-1-2"); +const l2_vtr_op_t l2_vtr_op_t::L2_VTR_TRANSLATE_2_1(7, "translate-2-1"); +const l2_vtr_op_t l2_vtr_op_t::L2_VTR_TRANSLATE_2_2(5, "translate-2-2"); + +l2_vtr_op_t::l2_vtr_op_t(int v, const std::string s) + : enum_base(v, s) +{ +} +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "mozilla") + * End: + */ diff --git a/extras/vom/vom/l2_vtr.hpp b/extras/vom/vom/l2_vtr.hpp new file mode 100644 index 000000000000..540cc83fd076 --- /dev/null +++ b/extras/vom/vom/l2_vtr.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __VOM_L2_VTR_H__ +#define __VOM_L2_VTR_H__ + +#include "vom/hw.hpp" +#include "vom/object_base.hpp" +#include "vom/om.hpp" + +namespace VOM { +struct l2_vtr_op_t : public enum_base +{ + l2_vtr_op_t(const l2_vtr_op_t& l) = default; + ~l2_vtr_op_t() = default; + + const static l2_vtr_op_t L2_VTR_DISABLED; + const static l2_vtr_op_t L2_VTR_PUSH_1; + const static l2_vtr_op_t L2_VTR_PUSH_2; + const static l2_vtr_op_t L2_VTR_POP_1; + const static l2_vtr_op_t L2_VTR_POP_2; + const static l2_vtr_op_t L2_VTR_TRANSLATE_1_1; + const static l2_vtr_op_t L2_VTR_TRANSLATE_1_2; + const static l2_vtr_op_t L2_VTR_TRANSLATE_2_1; + const static l2_vtr_op_t L2_VTR_TRANSLATE_2_2; + +private: + l2_vtr_op_t(int v, const std::string s); +}; +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "mozilla") + * End: + */ +#endif diff --git a/extras/vom/vom/l2_vtr_cmds.cpp b/extras/vom/vom/l2_vtr_cmds.cpp new file mode 100644 index 000000000000..df50ae46fe76 --- /dev/null +++ b/extras/vom/vom/l2_vtr_cmds.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "vom/l2_vtr_cmds.hpp" + +namespace VOM { + +set_vtr_op_cmd::set_vtr_op_cmd(HW::item& item, + const handle_t& itf, + uint16_t tag) + : rpc_cmd(item) + , m_itf(itf) + , m_tag(tag) +{ +} + +bool +set_vtr_op_cmd::operator==(const set_vtr_op_cmd& other) const +{ + return ( + (m_hw_item.data() == other.m_hw_item.data() && m_itf == other.m_itf) && + (m_tag == other.m_tag)); +} + +rc_t +set_vtr_op_cmd::issue(connection& con) +{ + msg_t req(con.ctx(), std::ref(*this)); + + auto& payload = req.get_request().get_payload(); + payload.sw_if_index = m_itf.value(); + payload.vtr_op = m_hw_item.data().value(); + payload.push_dot1q = 1; + payload.tag1 = m_tag; + + VAPI_CALL(req.execute()); + + return (wait()); +} + +std::string +set_vtr_op_cmd::to_string() const +{ + std::ostringstream s; + s << "L2-set-vtr-op: " << m_hw_item.to_string() + << " itf:" << m_itf.to_string() << " tag:" << m_tag; + + return (s.str()); +} + +}; // namespace VOM + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "mozilla") + * End: + */ diff --git a/extras/vom/vom/l2_vtr_cmds.hpp b/extras/vom/vom/l2_vtr_cmds.hpp new file mode 100644 index 000000000000..d26b71958171 --- /dev/null +++ b/extras/vom/vom/l2_vtr_cmds.hpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __VOM_L2_VTR_CMDS_H__ +#define __VOM_L2_VTR_CMDS_H__ + +#include "vom/l2_vtr.hpp" +#include "vom/rpc_cmd.hpp" + +#include +#include + +namespace VOM { + +/** + * A cmd class sets the VTR operation + */ +class set_vtr_op_cmd + : public rpc_cmd, vapi::L2_interface_vlan_tag_rewrite> +{ +public: + /** + * Constructor + */ + set_vtr_op_cmd(HW::item& item, + const handle_t& itf, + uint16_t tag); + + /** + * Issue the command to VPP/HW + */ + rc_t issue(connection& con); + + /** + * convert to string format for debug purposes + */ + std::string to_string() const; + + /** + * Comparison operator - only used for UT + */ + bool operator==(const set_vtr_op_cmd& i) const; + +private: + /** + * The interface to bind + */ + const handle_t m_itf; + + /** + * The tag for the operation + */ + uint16_t m_tag; +}; + +}; // namespace VOM + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "mozilla") + * End: + */ + +#endif diff --git a/extras/vom/vom/l2_xconnect.cpp b/extras/vom/vom/l2_xconnect.cpp index 83d6541fad40..1bdb651ff9bb 100644 --- a/extras/vom/vom/l2_xconnect.cpp +++ b/extras/vom/vom/l2_xconnect.cpp @@ -14,6 +14,7 @@ */ #include "vom/l2_xconnect.hpp" +#include "vom/l2_vtr_cmds.hpp" #include "vom/l2_xconnect_cmds.hpp" #include "vom/singular_db_funcs.hpp" @@ -33,6 +34,8 @@ l2_xconnect::l2_xconnect(const interface& east_itf, const interface& west_itf) , m_west_itf(west_itf.singular()) , m_xconnect_east(0) , m_xconnect_west(0) + , m_vtr_op(l2_vtr_op_t::L2_VTR_DISABLED, rc_t::UNSET) + , m_vtr_op_tag(0) { } @@ -41,6 +44,8 @@ l2_xconnect::l2_xconnect(const l2_xconnect& o) , m_west_itf(o.m_west_itf) , m_xconnect_east(o.m_xconnect_east) , m_xconnect_west(o.m_xconnect_west) + , m_vtr_op(o.m_vtr_op) + , m_vtr_op_tag(o.m_vtr_op_tag) { } @@ -90,6 +95,11 @@ l2_xconnect::replay() HW::enqueue(new l2_xconnect_cmds::bind_cmd( m_xconnect_west, m_west_itf->handle(), m_east_itf->handle())); } + + if (m_vtr_op && handle_t::INVALID != m_east_itf->handle()) { + HW::enqueue( + new set_vtr_op_cmd(m_vtr_op, m_east_itf->handle(), m_vtr_op_tag)); + } } l2_xconnect::~l2_xconnect() @@ -111,6 +121,15 @@ l2_xconnect::to_string() const return (s.str()); } +void +l2_xconnect::set(const l2_vtr_op_t& op, uint16_t tag) +{ + assert(rc_t::UNSET == m_vtr_op.rc()); + m_vtr_op.set(rc_t::NOOP); + m_vtr_op.update(op); + m_vtr_op_tag = tag; +} + void l2_xconnect::update(const l2_xconnect& desired) { @@ -123,6 +142,14 @@ l2_xconnect::update(const l2_xconnect& desired) HW::enqueue(new l2_xconnect_cmds::bind_cmd( m_xconnect_west, m_west_itf->handle(), m_east_itf->handle())); } + + /* + * set the VTR operation if request + */ + if (m_vtr_op.update(desired.m_vtr_op)) { + HW::enqueue( + new set_vtr_op_cmd(m_vtr_op, m_east_itf->handle(), m_vtr_op_tag)); + } } std::shared_ptr diff --git a/extras/vom/vom/l2_xconnect.hpp b/extras/vom/vom/l2_xconnect.hpp index 0699869bcdb5..32acd61dd8ef 100644 --- a/extras/vom/vom/l2_xconnect.hpp +++ b/extras/vom/vom/l2_xconnect.hpp @@ -19,6 +19,7 @@ #include "vom/hw.hpp" #include "vom/inspect.hpp" #include "vom/interface.hpp" +#include "vom/l2_vtr.hpp" #include "vom/object_base.hpp" #include "vom/om.hpp" #include "vom/singular_db.hpp" @@ -77,6 +78,11 @@ class l2_xconnect : public object_base */ static void dump(std::ostream& os); + /** + * Set the VTR operation on the binding/interface + */ + void set(const l2_vtr_op_t& op, uint16_t tag); + /** * Static function to find the bridge_domain in the model */ @@ -174,6 +180,16 @@ class l2_xconnect : public object_base */ HW::item m_xconnect_west; + /** + * HW configuration for the VTR option + */ + HW::item m_vtr_op; + + /** + * The Dot1q tag for the VTR operation + */ + uint16_t m_vtr_op_tag; + /** * A map of all L2 interfaces key against the interface's handle_t */ diff --git a/extras/vom/vom/neighbour.cpp b/extras/vom/vom/neighbour.cpp index 44e2760a1ecd..a97892d7cdb7 100644 --- a/extras/vom/vom/neighbour.cpp +++ b/extras/vom/vom/neighbour.cpp @@ -14,6 +14,7 @@ */ #include "vom/neighbour.hpp" +#include "vom/api_types.hpp" #include "vom/neighbour_cmds.hpp" #include "vom/singular_db_funcs.hpp" @@ -21,21 +22,33 @@ namespace VOM { singular_db neighbour::m_db; neighbour::event_handler neighbour::m_evh; +const neighbour::flags_t neighbour::flags_t::NONE(0, ""); +const neighbour::flags_t neighbour::flags_t::STATIC(1, "static"); +const neighbour::flags_t neighbour::flags_t::NO_FIB_ENTRY(2, "no-fib-entry"); + +neighbour::flags_t::flags_t(int v, const std::string s) + : enum_base(v, s) +{ +} + neighbour::neighbour(const interface& itf, const boost::asio::ip::address& ip_addr, - const mac_address_t& mac) + const mac_address_t& mac, + const flags_t flags) : m_hw(false) , m_itf(itf.singular()) , m_ip_addr(ip_addr) , m_mac(mac) + , m_flags(flags) { } -neighbour::neighbour(const neighbour& bde) - : m_hw(bde.m_hw) - , m_itf(bde.m_itf) - , m_ip_addr(bde.m_ip_addr) - , m_mac(bde.m_mac) +neighbour::neighbour(const neighbour& n) + : m_hw(n.m_hw) + , m_itf(n.m_itf) + , m_ip_addr(n.m_ip_addr) + , m_mac(n.m_mac) + , m_flags(n.m_flags) { } @@ -63,8 +76,8 @@ void neighbour::sweep() { if (m_hw) { - HW::enqueue( - new neighbour_cmds::delete_cmd(m_hw, m_itf->handle(), m_mac, m_ip_addr)); + HW::enqueue(new neighbour_cmds::delete_cmd(m_hw, m_itf->handle(), m_mac, + m_ip_addr, m_flags)); } HW::write(); } @@ -73,8 +86,8 @@ void neighbour::replay() { if (m_hw) { - HW::enqueue( - new neighbour_cmds::create_cmd(m_hw, m_itf->handle(), m_mac, m_ip_addr)); + HW::enqueue(new neighbour_cmds::create_cmd(m_hw, m_itf->handle(), m_mac, + m_ip_addr, m_flags)); } } @@ -83,7 +96,7 @@ neighbour::to_string() const { std::ostringstream s; s << "neighbour:[" << m_itf->to_string() << ", " << m_mac.to_string() << ", " - << m_ip_addr.to_string() << "]"; + << m_ip_addr.to_string() << " " << m_flags.to_string() << "]"; return (s.str()); } @@ -95,8 +108,8 @@ neighbour::update(const neighbour& r) * create the table if it is not yet created */ if (rc_t::OK != m_hw.rc()) { - HW::enqueue( - new neighbour_cmds::create_cmd(m_hw, m_itf->handle(), m_mac, m_ip_addr)); + HW::enqueue(new neighbour_cmds::create_cmd(m_hw, m_itf->handle(), m_mac, + m_ip_addr, m_flags)); } } @@ -165,13 +178,15 @@ neighbour::populate_i(const client_db::key_t& key, */ auto& payload = record.get_payload(); - mac_address_t mac(payload.mac_address); - boost::asio::ip::address ip_addr = - from_bytes(payload.is_ipv6, payload.ip_address); - neighbour n(*itf, ip_addr, mac); + mac_address_t mac = from_api(payload.neighbor.mac_address); + boost::asio::ip::address ip_addr = from_api(payload.neighbor.ip_address); + neighbour::flags_t f = from_api(payload.neighbor.flags); + neighbour n(*itf, ip_addr, mac, f); + ; - VOM_LOG(log_level_t::DEBUG) << "neighbour-dump: " << itf->to_string() - << mac.to_string() << ip_addr.to_string(); + VOM_LOG(log_level_t::DEBUG) << "neighbour-dump: " << itf->to_string() << " " + << mac.to_string() << " " << ip_addr.to_string() + << " " << f.to_string(); /* * Write each of the discovered interfaces into the OM, diff --git a/extras/vom/vom/neighbour.hpp b/extras/vom/vom/neighbour.hpp index 500f03d0a61d..4e074bf7f453 100644 --- a/extras/vom/vom/neighbour.hpp +++ b/extras/vom/vom/neighbour.hpp @@ -27,6 +27,30 @@ namespace VOM { class neighbour : public object_base { public: + struct flags_t : public enum_base + { + /** + * Constructor + */ + flags_t(int v, const std::string s); + + /** + * Destructor + */ + ~flags_t() = default; + + flags_t operator|(const flags_t& e1) const + { + flags_t e = *this; + e |= e1; + return e; + } + + const static flags_t NONE; + const static flags_t STATIC; + const static flags_t NO_FIB_ENTRY; + }; + /** * The key for a neighbour entry; * the interface and IP address @@ -38,7 +62,8 @@ class neighbour : public object_base */ neighbour(const interface& itf, const boost::asio::ip::address& ip_addr, - const mac_address_t& mac); + const mac_address_t& mac, + const flags_t flags = flags_t::STATIC); /** * Copy Construct @@ -173,6 +198,11 @@ class neighbour : public object_base */ mac_address_t m_mac; + /** + * flags on the entry + */ + flags_t m_flags; + /** * A map of all bridge_domains */ diff --git a/extras/vom/vom/neighbour_cmds.cpp b/extras/vom/vom/neighbour_cmds.cpp index 9bd32923379b..5f9e180b01d8 100644 --- a/extras/vom/vom/neighbour_cmds.cpp +++ b/extras/vom/vom/neighbour_cmds.cpp @@ -14,17 +14,20 @@ */ #include "vom/neighbour_cmds.hpp" +#include "vom/api_types.hpp" namespace VOM { namespace neighbour_cmds { create_cmd::create_cmd(HW::item& item, handle_t itf, const mac_address_t& mac, - const boost::asio::ip::address& ip_addr) + const boost::asio::ip::address& ip_addr, + const neighbour::flags_t& flags) : rpc_cmd(item) , m_itf(itf) , m_mac(mac) , m_ip_addr(ip_addr) + , m_flags(flags) { } @@ -32,7 +35,7 @@ bool create_cmd::operator==(const create_cmd& other) const { return ((m_mac == other.m_mac) && (m_ip_addr == other.m_ip_addr) && - (m_itf == other.m_itf)); + (m_itf == other.m_itf) && (m_flags == other.m_flags)); } rc_t @@ -41,11 +44,12 @@ create_cmd::issue(connection& con) msg_t req(con.ctx(), std::ref(*this)); auto& payload = req.get_request().get_payload(); - payload.sw_if_index = m_itf.value(); payload.is_add = 1; - payload.is_static = 1; - m_mac.to_bytes(payload.mac_address, 6); - to_bytes(m_ip_addr, &payload.is_ipv6, payload.dst_address); + payload.neighbor.sw_if_index = m_itf.value(); + + to_api(m_mac, payload.neighbor.mac_address); + to_api(m_ip_addr, payload.neighbor.ip_address); + payload.neighbor.flags = to_api(m_flags); VAPI_CALL(req.execute()); @@ -66,11 +70,13 @@ create_cmd::to_string() const delete_cmd::delete_cmd(HW::item& item, handle_t itf, const mac_address_t& mac, - const boost::asio::ip::address& ip_addr) + const boost::asio::ip::address& ip_addr, + const neighbour::flags_t& flags) : rpc_cmd(item) , m_itf(itf) , m_mac(mac) , m_ip_addr(ip_addr) + , m_flags(flags) { } @@ -87,11 +93,12 @@ delete_cmd::issue(connection& con) msg_t req(con.ctx(), std::ref(*this)); auto& payload = req.get_request().get_payload(); - payload.sw_if_index = m_itf.value(); payload.is_add = 0; - payload.is_static = 1; - m_mac.to_bytes(payload.mac_address, 6); - to_bytes(m_ip_addr, &payload.is_ipv6, payload.dst_address); + payload.neighbor.sw_if_index = m_itf.value(); + + to_api(m_mac, payload.neighbor.mac_address); + to_api(m_ip_addr, payload.neighbor.ip_address); + payload.neighbor.flags = to_api(m_flags); VAPI_CALL(req.execute()); diff --git a/extras/vom/vom/neighbour_cmds.hpp b/extras/vom/vom/neighbour_cmds.hpp index 388dbf1b7ba9..d43a6fe8f3b4 100644 --- a/extras/vom/vom/neighbour_cmds.hpp +++ b/extras/vom/vom/neighbour_cmds.hpp @@ -37,7 +37,8 @@ class create_cmd : public rpc_cmd, create_cmd(HW::item& item, handle_t itf, const mac_address_t& mac, - const boost::asio::ip::address& ip_addr); + const boost::asio::ip::address& ip_addr, + const neighbour::flags_t &flags); /** * Issue the command to VPP/HW @@ -58,6 +59,7 @@ class create_cmd : public rpc_cmd, handle_t m_itf; mac_address_t m_mac; boost::asio::ip::address m_ip_addr; + const neighbour::flags_t &m_flags; }; /** @@ -73,7 +75,8 @@ class delete_cmd : public rpc_cmd, delete_cmd(HW::item& item, handle_t itf, const mac_address_t& mac, - const boost::asio::ip::address& ip_addr); + const boost::asio::ip::address& ip_addr, + const neighbour::flags_t &flags); /** * Issue the command to VPP/HW @@ -94,6 +97,7 @@ class delete_cmd : public rpc_cmd, handle_t m_itf; mac_address_t m_mac; boost::asio::ip::address m_ip_addr; + const neighbour::flags_t &m_flags; }; /** diff --git a/extras/vom/vom/ra_prefix.cpp b/extras/vom/vom/ra_prefix.cpp index 1cf096370826..fe3ad327f6dc 100644 --- a/extras/vom/vom/ra_prefix.cpp +++ b/extras/vom/vom/ra_prefix.cpp @@ -15,6 +15,7 @@ #include +#include "vom/api_types.hpp" #include "vom/ra_prefix.hpp" namespace VOM { @@ -37,9 +38,7 @@ ra_prefix::ra_prefix(const route::prefix_t& pfx, void ra_prefix::to_vpp(vapi_payload_sw_interface_ip6nd_ra_prefix& ra_prefix) const { - uint8_t is_ipv6 = 0; - - m_pfx.to_vpp(&is_ipv6, ra_prefix.address, &ra_prefix.address_length); + ra_prefix.prefix = to_api(m_pfx); ra_prefix.use_default = m_use_default; ra_prefix.no_advertise = m_no_advertise; diff --git a/extras/vom/vom/route.cpp b/extras/vom/vom/route.cpp index ae80fd9e55cd..722628fee873 100644 --- a/extras/vom/vom/route.cpp +++ b/extras/vom/vom/route.cpp @@ -678,7 +678,7 @@ ip_mroute::event_handler::handle_populate(const client_db::key_t& key) ip_r.add(from_vpp(p.path, nh_proto_t::IPV4), itf_flags_t::from_vpp(p.itf_flags)); } - VOM_LOG(log_level_t::INFO) << "ip-mroute-dump: " << ip_r.to_string(); + VOM_LOG(log_level_t::DEBUG) << "ip-mroute-dump: " << ip_r.to_string(); /* * Write each of the discovered interfaces into the OM, diff --git a/extras/vom/vom/tap_interface_cmds.cpp b/extras/vom/vom/tap_interface_cmds.cpp index ffe3e97ba0cc..1d16aff217e1 100644 --- a/extras/vom/vom/tap_interface_cmds.cpp +++ b/extras/vom/vom/tap_interface_cmds.cpp @@ -15,10 +15,8 @@ #include "vom/tap_interface_cmds.hpp" -#include #include -DEFINE_VAPI_MSG_IDS_TAP_API_JSON; DEFINE_VAPI_MSG_IDS_TAPV2_API_JSON; namespace VOM { diff --git a/extras/vom/vom/tap_interface_cmds.hpp b/extras/vom/vom/tap_interface_cmds.hpp index 84720fa3f8e2..386dafaa6482 100644 --- a/extras/vom/vom/tap_interface_cmds.hpp +++ b/extras/vom/vom/tap_interface_cmds.hpp @@ -22,7 +22,6 @@ #include "vom/rpc_cmd.hpp" #include -#include #include namespace VOM { diff --git a/extras/vom/vom/vxlan_tunnel.cpp b/extras/vom/vom/vxlan_tunnel.cpp index ca0790103fff..faf49bd93cd3 100644 --- a/extras/vom/vom/vxlan_tunnel.cpp +++ b/extras/vom/vom/vxlan_tunnel.cpp @@ -116,6 +116,12 @@ vxlan_tunnel::vxlan_tunnel(const vxlan_tunnel& o) { } +bool +vxlan_tunnel::operator==(const vxlan_tunnel& other) const +{ + return ((m_tep == other.m_tep) && (m_mode == other.m_mode)); +} + const handle_t& vxlan_tunnel::handle() const { @@ -177,7 +183,7 @@ vxlan_tunnel::update(const vxlan_tunnel& desired) /* * the desired state is always that the interface should be created */ - if (!m_hdl) { + if (rc_t::OK != m_hdl.rc()) { if (mode_t::STANDARD == m_mode) HW::enqueue(new vxlan_tunnel_cmds::create_cmd( m_hdl, name(), m_tep, diff --git a/extras/vom/vom/vxlan_tunnel.hpp b/extras/vom/vom/vxlan_tunnel.hpp index 63124e734260..c085ba0b6298 100644 --- a/extras/vom/vom/vxlan_tunnel.hpp +++ b/extras/vom/vom/vxlan_tunnel.hpp @@ -113,6 +113,11 @@ class vxlan_tunnel : public interface */ vxlan_tunnel(const vxlan_tunnel& o); + /** + * comparison operator + */ + bool operator==(const vxlan_tunnel& vx) const; + /** * Return the matching 'singular instance' */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fedf931f041b..161e3c57c447 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) project(vpp C) include(CheckCCompilerFlag) -include(cmake/message.cmake) +include(cmake/misc.cmake) include(cmake/cpu.cmake) include(cmake/ccache.cmake) @@ -32,20 +32,42 @@ execute_process( string(REPLACE "-" ";" VPP_LIB_VERSION ${VPP_VERSION}) list(GET VPP_LIB_VERSION 0 VPP_LIB_VERSION) +############################################################################## +# cross compiling +############################################################################## +if(CMAKE_CROSSCOMPILING) + set(CMAKE_IGNORE_PATH + /usr/lib/${CMAKE_HOST_SYSTEM_PROCESSOR}-linux-gnu/ + /usr/lib/${CMAKE_HOST_SYSTEM_PROCESSOR}-linux-gnu/lib/ + ) +endif() +set(CMAKE_C_COMPILER_TARGET ${CMAKE_SYSTEM_PROCESSOR}-linux-gnu) + ############################################################################## # build config ############################################################################## +check_c_compiler_flag("-Wno-address-of-packed-member" + compiler_flag_no_address_of_packed_member) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) -set(CMAKE_C_FLAGS_COMMON "-DFORTIFY_SOURCE=2 -fstack-protector-all -Werror") -set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${CMAKE_C_FLAGS_COMMON} -DCLIB_DEBUG") -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${CMAKE_C_FLAGS_COMMON}") -check_c_compiler_flag("-Wno-address-of-packed-member" compiler_flag_no_address_of_packed_member) +if (CMAKE_BUILD_TYPE) + set(CMAKE_C_FLAGS "-g -fPIC ${CMAKE_C_FLAGS}") + set(CMAKE_C_FLAGS "-fstack-protector-all ${CMAKE_C_FLAGS}") + set(CMAKE_C_FLAGS "-Werror ${CMAKE_C_FLAGS}") + set(CMAKE_C_FLAGS "-DFORTIFY_SOURCE=2 ${CMAKE_C_FLAGS}") +endif() + if (compiler_flag_no_address_of_packed_member) - add_definitions(-Wno-address-of-packed-member) + set(CMAKE_C_FLAGS "-Wno-address-of-packed-member ${CMAKE_C_FLAGS}") endif() +set(CMAKE_C_FLAGS_RELEASE "-O2 ${CMAKE_C_FLAGS_RELEASE}") +set(CMAKE_C_FLAGS_DEBUG "-O0 -DCLIB_DEBUG ${CMAKE_C_FLAGS_DEBUG}") +set(CMAKE_LINKER_FLAGS_RELEASE "-pie -Wl,-z,now ${CMAKE_LINKER_FLAGS_RELEASE}") +string(TOUPPER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_UC) + ############################################################################## # install config ############################################################################## @@ -75,7 +97,7 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") find_package(OpenSSL REQUIRED) set(SUBDIRS vppinfra svm vlib vlibmemory vlibapi vnet vpp vat vcl plugins - vpp-api tools/vppapigen tools/g2 tools/elftool tools/perftool cmake + vpp-api tools/vppapigen tools/g2 tools/elftool tools/perftool cmake pkg ) elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") set(SUBDIRS vppinfra) @@ -87,16 +109,6 @@ foreach(DIR ${SUBDIRS}) add_subdirectory(${DIR}) endforeach() -############################################################################## -# packaging -############################################################################## -include(cmake/pack.cmake) -add_vpp_packaging( - NAME "vpp" - VENDOR "fd.io" - DESCRIPTION "Vector Packet Processor" -) - ############################################################################## # detect if we are inside git repo and add configure dependency ############################################################################## @@ -105,6 +117,7 @@ execute_process( COMMAND git rev-parse --show-toplevel OUTPUT_VARIABLE VPP_GIT_TOPLEVEL_DIR OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET ) if (VPP_GIT_TOPLEVEL_DIR) @@ -121,10 +134,11 @@ message(STATUS "Configuration:") pr("VPP version" "${VPP_VERSION}") pr("VPP library version" "${VPP_LIB_VERSION}") pr("GIT toplevel dir" "${VPP_GIT_TOPLEVEL_DIR}") -pr("C flags" "${CMAKE_C_FLAGS}") -pr("Linker flags" "${CMAKE_LINKER_FLAGS}") -pr("Target processor" "${CMAKE_SYSTEM_PROCESSOR}") pr("Build type" "${CMAKE_BUILD_TYPE}") +pr("C flags" "${CMAKE_C_FLAGS}${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UC}}") +pr("Linker flags" "${CMAKE_LINKER_FLAGS}${CMAKE_LINKER_FLAGS_${CMAKE_BUILD_TYPE_UC}}") +pr("Host processor" "${CMAKE_HOST_SYSTEM_PROCESSOR}") +pr("Target processor" "${CMAKE_SYSTEM_PROCESSOR}") pr("Prefix path" "${CMAKE_PREFIX_PATH}") pr("Install prefix" "${CMAKE_INSTALL_PREFIX}") diff --git a/src/cmake/cpu.cmake b/src/cmake/cpu.cmake index 20dab7bfce1b..60cf3b9dcf00 100644 --- a/src/cmake/cpu.cmake +++ b/src/cmake/cpu.cmake @@ -14,7 +14,10 @@ ############################################################################## # Cache line size detection ############################################################################## -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") +if(CMAKE_CROSSCOMPILING) + message(STATUS "Cross-compiling - cache line size detection disabled") + set(VPP_LOG2_CACHE_LINE_SIZE 6) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") file(READ "/proc/cpuinfo" cpuinfo) string(REPLACE "\n" ";" cpuinfo ${cpuinfo}) foreach(l ${cpuinfo}) @@ -90,7 +93,7 @@ macro(vpp_library_set_multiarch_sources lib) set(l ${lib}_${VARIANT}) add_library(${l} OBJECT ${ARGN}) set_target_properties(${l} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_compile_options(${l} PUBLIC "-DCLIB_MARCH_VARIANT=${VARIANT}") + target_compile_options(${l} PUBLIC "-DCLIB_MARCH_VARIANT=${VARIANT}" -Wall -fno-common) separate_arguments(VARIANT_FLAGS) target_compile_options(${l} PUBLIC ${VARIANT_FLAGS}) target_sources(${lib} PRIVATE $) diff --git a/src/cmake/library.cmake b/src/cmake/library.cmake index 984d6eb8d753..747aeb4705f4 100644 --- a/src/cmake/library.cmake +++ b/src/cmake/library.cmake @@ -20,7 +20,7 @@ macro(add_vpp_library lib) ) add_library(${lib} SHARED ${ARG_SOURCES}) - target_compile_options(${lib} PRIVATE -Wall) + target_compile_options(${lib} PRIVATE -Wall -fno-common) if(VPP_LIB_VERSION) set_target_properties(${lib} PROPERTIES SOVERSION ${VPP_LIB_VERSION}) endif() diff --git a/src/cmake/misc.cmake b/src/cmake/misc.cmake new file mode 100644 index 000000000000..9542557ce9d9 --- /dev/null +++ b/src/cmake/misc.cmake @@ -0,0 +1,67 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################## +# Highlight WARNING and ERROR messages +############################################################################## +function(message) + list(GET ARGV 0 type) + if("$ENV{TERM}" STREQUAL "xterm-256color") + string(ASCII 27 esc) + set(red "${esc}[1;31m") + set(yellow "${esc}[1;33m") + set(reset "${esc}[m") + endif() + if(type STREQUAL FATAL_ERROR OR type STREQUAL SEND_ERROR) + list(REMOVE_AT ARGV 0) + _message(${type} "${red}${ARGV}${reset}") + elseif(type STREQUAL WARNING) + list(REMOVE_AT ARGV 0) + _message(STATUS "${yellow}${ARGV}${reset}") + elseif(type STREQUAL STATUS) + list(REMOVE_AT ARGV 0) + _message(STATUS "${ARGV}") + else() + _message(${ARGV}) + endif() +endfunction() + +############################################################################## +# aligned config output +############################################################################## +function(pr desc val) + if("$ENV{TERM}" STREQUAL "xterm-256color") + string(ASCII 27 esc) + set(reset "${esc}[m") + set(cyan "${esc}[36m") + endif() + string(LENGTH ${desc} len) + while (len LESS 20) + set (desc "${desc} ") + string(LENGTH ${desc} len) + endwhile() + _message("${cyan}${desc}${reset}: ${val}") +endfunction() + +############################################################################## +# string append +############################################################################## + +macro(string_append var str) + if (NOT ${var}) + set(${var} "${str}") + else() + set(${var} "${${var}} ${str}") + endif() +endmacro() + diff --git a/src/cmake/pack.cmake b/src/cmake/pack.cmake index 1df19811f5b5..0da3093b1a03 100644 --- a/src/cmake/pack.cmake +++ b/src/cmake/pack.cmake @@ -36,6 +36,7 @@ macro(add_vpp_packaging name) # extract version from git execute_process( COMMAND git describe --long --match v* + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE VER OUTPUT_STRIP_TRAILING_WHITESPACE ) diff --git a/src/cmake/plugin.cmake b/src/cmake/plugin.cmake index 1cff910db5f8..47296ed451fe 100644 --- a/src/cmake/plugin.cmake +++ b/src/cmake/plugin.cmake @@ -21,7 +21,7 @@ macro(add_vpp_plugin name) set(plugin_name ${name}_plugin) set(api_includes) if(NOT PLUGIN_COMPONENT) - set(PLUGIN_COMPONENT vpp-plugin-misc) + set(PLUGIN_COMPONENT vpp-plugin-core) endif() if(NOT PLUGIN_DEV_COMPONENT) if(NOT VPP_EXTERNAL_PROJECT) @@ -44,6 +44,7 @@ macro(add_vpp_plugin name) ) endforeach() add_library(${plugin_name} SHARED ${PLUGIN_SOURCES} ${api_includes}) + set_target_properties(${plugin_name} PROPERTIES NO_SONAME 1) target_compile_options(${plugin_name} PRIVATE -Wall) if(NOT VPP_EXTERNAL_PROJECT) add_dependencies(${plugin_name} vpp_version_h api_headers) @@ -74,6 +75,7 @@ macro(add_vpp_plugin name) set(test_plugin_name ${name}_test_plugin) add_library(${test_plugin_name} SHARED ${PLUGIN_API_TEST_SOURCES} ${api_includes}) + set_target_properties(${test_plugin_name} PROPERTIES NO_SONAME 1) if(NOT VPP_EXTERNAL_PROJECT) add_dependencies(${test_plugin_name} api_headers) endif() @@ -92,4 +94,3 @@ macro(add_vpp_plugin name) COMPONENT ${PLUGIN_COMPONENT} ) endmacro() - diff --git a/src/pkg/CMakeLists.txt b/src/pkg/CMakeLists.txt new file mode 100644 index 000000000000..357d966ddee7 --- /dev/null +++ b/src/pkg/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +get_cmake_property(VPP_COMPONENTS COMPONENTS) +string(REPLACE ";" " " VPP_COMPONENTS "${VPP_COMPONENTS}") + +execute_process( + COMMAND date -R + OUTPUT_VARIABLE TIMESTAMP + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +foreach(f rules changelog) + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/debian/${f}.in + ${CMAKE_BINARY_DIR}/debian/${f} + ) +endforeach() + +foreach(f control copyright vpp.preinst vpp.postrm vpp.postinst vpp.service) + file(COPY + ${CMAKE_CURRENT_SOURCE_DIR}/debian/${f} + DESTINATION ${CMAKE_BINARY_DIR}/debian + ) +endforeach() + +file(WRITE ${CMAKE_BINARY_DIR}/debian/compat "9\n") + +add_custom_target(package-deb + COMMENT "Building .deb packages..." + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMAND "dpkg-buildpackage" "-us" "-uc" "-b" + USES_TERMINAL +) diff --git a/src/pkg/debian/changelog.in b/src/pkg/debian/changelog.in new file mode 100644 index 000000000000..4d9b87ab553b --- /dev/null +++ b/src/pkg/debian/changelog.in @@ -0,0 +1,6 @@ +vpp (@VPP_VERSION@) unstable; urgency=low + + * no description + + -- fd.io VPP @TIMESTAMP@ + diff --git a/src/pkg/debian/control b/src/pkg/debian/control new file mode 100644 index 000000000000..a89371f3b88c --- /dev/null +++ b/src/pkg/debian/control @@ -0,0 +1,74 @@ +Source: vpp +Section: net +Priority: extra +Maintainer: fd.io VPP Packaging Team +Build-Depends: debhelper (>= 9), + dh-systemd, + dh-python, + python-all +Standards-Version: 3.9.4 + +Package: vpp +Architecture: any +Depends: libvppinfra (= ${source:Version}), + ${shlibs:Depends}, + ${misc:Depends} +Description: Vector Packet Processing--executables + This package provides VPP executables: vpp, vpp_api_test, vpp_json_test + vpp - the vector packet engine + vpp_api_test - vector packet engine API test tool + vpp_json_test - vector packet engine JSON test tool + +Package: vpp-dbg +Architecture: any +Depends: ${misc:Depends} +Description: Vector Packet Processing--debug symbols + +Package: vpp-dev +Architecture: any +Depends: libvppinfra-dev (= ${source:Version}), + ${misc:Depends}, + ${python:Depends} +Description: Vector Packet Processing--development support + This package contains development support files for the VPP libraries + . + +Package: libvppinfra +Architecture: any +Depends: ${shlibs:Depends}, + ${misc:Depends} +Description: Vector Packet Processing--runtime libraries + This package contains the VPP shared libraries, including: + . + +Package: libvppinfra-dev +Architecture: any +Depends: ${misc:Depends} +Description: Vector Packet Processing--runtime libraries + This package contains the VPP shared libraries, including: + . + +Package: vpp-plugin-core +Architecture: any +Depends: vpp (= ${source:Version}), + ${shlibs:Depends} +Description: Vector Packet Processing--runtime core plugins + This package contains VPP core plugins + . + +Package: vpp-plugin-dpdk +Architecture: any +Depends: vpp (= ${source:Version}), + ${shlibs:Depends} +Description: Vector Packet Processing--runtime dpdk plugin + This package contains the VPP dpdk plugin + . + +Package: vpp-api-python +Architecture: any +Depends: vpp (= ${source:Version}), + ${python:Depends}, + ${misc:Depends} +Description: VPP Python API bindings + This package contains VPP python api bindings + . diff --git a/src/pkg/debian/copyright b/src/pkg/debian/copyright new file mode 100644 index 000000000000..f9775c158f98 --- /dev/null +++ b/src/pkg/debian/copyright @@ -0,0 +1,9 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: optional. +Upstream-Contact: optional. +Source: optional. +Disclaimer: optional. +Comment: optional. +License: Apache-2.0 +Copyright: 2015 Cisco and/or its affiliates and others. + diff --git a/src/pkg/debian/rules.in b/src/pkg/debian/rules.in new file mode 100755 index 000000000000..0351ee503e6a --- /dev/null +++ b/src/pkg/debian/rules.in @@ -0,0 +1,47 @@ +#!/usr/bin/make -f +# See debhelper(7) (uncomment to enable) +# output every command that modifies files on the build system. +DH_VERBOSE = 1 + +# see EXAMPLES in dpkg-buildflags(1) and read /usr/share/dpkg/* +DPKG_EXPORT_BUILDFLAGS = 1 +include /usr/share/dpkg/default.mk + +export PYBUILD_NAME = vpp-api-python +export PYBUILD_DIR = @CMAKE_SOURCE_DIR@/vpp-api/python +export PYBUILD_DESTDIR_python2=debian/vpp-api-python/ +export PYBUILD_DISABLE_python2=test +export PYBUILD_SYSTEM=distutils + +# main packaging script based on dh7 syntax +%: + dh $@ --with systemd,python2 --buildsystem=pybuild + +override_dh_strip: + dh_strip --dbg-package=vpp-dbg + +DEB_HOST_MULTIARCH = $(shell dpkg-architecture -qDEB_HOST_MULTIARCH) + +override_dh_install: + @for c in @VPP_COMPONENTS@; do \ + @CMAKE_COMMAND@ \ + -D CMAKE_INSTALL_CONFIG_NAME=@CMAKE_BUILD_TYPE@ \ + -D CMAKE_INSTALL_COMPONENT=$$c \ + -D CMAKE_INSTALL_PREFIX=@CMAKE_BINARY_DIR@/debian/$$c \ + -P @CMAKE_BINARY_DIR@/cmake_install.cmake 2>&1 \ + | grep -v 'Set runtime path of' ; \ + if [ -d debian/$$c/lib ] ; then \ + mv debian/$$c/lib debian/$$c/$(DEB_HOST_MULTIARCH) ; \ + mkdir -p debian/$$c/usr/lib ; \ + mv debian/$$c/$(DEB_HOST_MULTIARCH) debian/$$c/usr/lib ; \ + fi ; \ + for d in bin include share ; do \ + if [ -d debian/$$c/$$d ] ; then \ + mkdir -p debian/$$c/usr ; \ + mv debian/$$c/$$d debian/$$c/usr/$$d ; \ + fi ; \ + done ; \ + if [ -d debian/$$c ] ; then \ + @CMAKE_SOURCE_DIR@/scripts/remove-rpath debian/$$c ; \ + fi ; \ + done diff --git a/src/pkg/debian/vpp.postinst b/src/pkg/debian/vpp.postinst new file mode 100644 index 000000000000..78fcac226a85 --- /dev/null +++ b/src/pkg/debian/vpp.postinst @@ -0,0 +1,8 @@ +#!/bin/sh -e + +# try to set the required values now. This may or may not work. +sysctl --system + +#DEBHELPER# + +exit 0 diff --git a/src/pkg/debian/vpp.postrm b/src/pkg/debian/vpp.postrm new file mode 100644 index 000000000000..24b4842fd0a2 --- /dev/null +++ b/src/pkg/debian/vpp.postrm @@ -0,0 +1,21 @@ +#!/bin/sh -e + +removed= + +# Unbind user-mode PCI drivers +pci_dirs=`find /sys/bus/pci/drivers -type d -name igb_uio -o -name uio_pci_generic -o -name vfio-pci` +for d in $pci_dirs; do + for f in ${d}/*; do + [ -e "${f}/config" ] || continue + echo ${f##*/} > ${d}/unbind + basename `dirname ${f}` | xargs echo -n "Removing driver"; echo " for PCI ID" `basename ${f}` + removed=y + done +done +if [ -n "${removed}" ]; then + echo "There are changes in PCI drivers, rescaning" + echo 1 > /sys/bus/pci/rescan +else + echo "There weren't PCI devices binded" +fi + diff --git a/src/pkg/debian/vpp.preinst b/src/pkg/debian/vpp.preinst new file mode 100644 index 000000000000..d33cacfc3fa7 --- /dev/null +++ b/src/pkg/debian/vpp.preinst @@ -0,0 +1,4 @@ +#!/bin/sh -e + +# Add the vpp group +groupadd -f -r vpp diff --git a/src/pkg/debian/vpp.service b/src/pkg/debian/vpp.service new file mode 100644 index 000000000000..2e86941de8b5 --- /dev/null +++ b/src/pkg/debian/vpp.service @@ -0,0 +1,13 @@ +[Unit] +Description=vector packet processing engine +After=network.target + +[Service] +Type=simple +ExecStartPre=-/sbin/modprobe uio_pci_generic +ExecStart=/usr/bin/vpp -c /etc/vpp/startup.conf +ExecStopPost=/bin/rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/src/plugins/abf/abf_policy.c b/src/plugins/abf/abf_policy.c index 1fde97f4c504..c411f3bae629 100644 --- a/src/plugins/abf/abf_policy.c +++ b/src/plugins/abf/abf_policy.c @@ -198,6 +198,7 @@ abf_policy_delete (u32 policy_id, const fib_route_path_t * rpaths) ap = abf_policy_get (api); old_pl = ap->ap_pl; + fib_path_list_lock (old_pl); ap->ap_pl = fib_path_list_copy_and_path_remove (ap->ap_pl, (FIB_PATH_LIST_FLAG_SHARED | @@ -227,6 +228,7 @@ abf_policy_delete (u32 policy_id, const fib_route_path_t * rpaths) fib_walk_sync (abf_policy_fib_node_type, api, &ctx); } + fib_path_list_unlock (old_pl); } return (0); diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index 83a1984c22f4..c969377ded77 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -241,7 +241,7 @@ typedef enum { } acl_fa_next_t; -enum +typedef enum { ACL_FA_CLEANER_RESCHEDULE = 1, ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, diff --git a/src/plugins/avf/avf.h b/src/plugins/avf/avf.h index 518c7d8329aa..9836451e0128 100644 --- a/src/plugins/avf/avf.h +++ b/src/plugins/avf/avf.h @@ -102,6 +102,7 @@ typedef struct u32 *bufs; u16 n_enqueued; u8 int_mode; + u8 buffer_pool_index; } avf_rxq_t; typedef struct @@ -167,7 +168,7 @@ typedef struct #define AVF_RX_VECTOR_SZ VLIB_FRAME_SIZE -enum +typedef enum { AVF_PROCESS_EVENT_START = 1, AVF_PROCESS_EVENT_STOP = 2, diff --git a/src/plugins/avf/device.c b/src/plugins/avf/device.c index e5c87ed1a2a4..248e3c173bd4 100644 --- a/src/plugins/avf/device.c +++ b/src/plugins/avf/device.c @@ -229,6 +229,9 @@ avf_rxq_init (vlib_main_t * vm, avf_device_t * ad, u16 qid, u16 rxq_size) 2 * CLIB_CACHE_LINE_BYTES, ad->numa_node); + rxq->buffer_pool_index = + vlib_buffer_pool_get_default_for_numa (vm, ad->numa_node); + if (rxq->descs == 0) return vlib_physmem_last_error (vm); @@ -239,7 +242,8 @@ avf_rxq_init (vlib_main_t * vm, avf_device_t * ad, u16 qid, u16 rxq_size) vec_validate_aligned (rxq->bufs, rxq->size, CLIB_CACHE_LINE_BYTES); rxq->qrx_tail = ad->bar0 + AVF_QRX_TAIL (qid); - n_alloc = vlib_buffer_alloc (vm, rxq->bufs, rxq->size - 8); + n_alloc = vlib_buffer_alloc_from_pool (vm, rxq->bufs, rxq->size - 8, + rxq->buffer_pool_index); if (n_alloc == 0) return clib_error_return (0, "buffer allocation error"); @@ -590,7 +594,7 @@ avf_op_config_vsi_queues (vlib_main_t * vm, avf_device_t * ad) { avf_rxq_t *q = vec_elt_at_index (ad->rxqs, i); rxq->ring_len = q->size; - rxq->databuffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + rxq->databuffer_size = vlib_buffer_get_default_data_size (vm); rxq->dma_ring_addr = avf_dma_addr (vm, ad, (void *) q->descs); avf_reg_write (ad, AVF_QRX_TAIL (i), q->size - 1); } diff --git a/src/plugins/avf/input.c b/src/plugins/avf/input.c index 8072e94346b4..32fa593dde66 100644 --- a/src/plugins/avf/input.c +++ b/src/plugins/avf/input.c @@ -72,7 +72,9 @@ avf_rxq_refill (vlib_main_t * vm, vlib_node_runtime_t * node, avf_rxq_t * rxq, slot = (rxq->next - n_refill - 1) & mask; n_refill &= ~7; /* round to 8 */ - n_alloc = vlib_buffer_alloc_to_ring (vm, rxq->bufs, slot, size, n_refill); + n_alloc = + vlib_buffer_alloc_to_ring_from_pool (vm, rxq->bufs, slot, size, n_refill, + rxq->buffer_pool_index); if (PREDICT_FALSE (n_alloc != n_refill)) { @@ -146,7 +148,7 @@ avf_rx_attach_tail (vlib_main_t * vm, vlib_buffer_t * bt, vlib_buffer_t * b, b->next_buffer = t->buffers[i]; b->flags |= VLIB_BUFFER_NEXT_PRESENT; b = vlib_get_buffer (vm, b->next_buffer); - clib_memcpy_fast (b, bt, sizeof (vlib_buffer_t)); + vlib_buffer_copy_template (b, bt); tlnifb += b->current_length = qw1 >> AVF_RXD_LEN_SHIFT; i++; } @@ -161,12 +163,15 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, avf_per_thread_data_t * ptd, u32 n_left, int maybe_multiseg) { - vlib_buffer_t *bt = &ptd->buffer_template; + vlib_buffer_t bt; vlib_buffer_t **b = ptd->bufs; u64 *qw1 = ptd->qw1s; avf_rx_tail_t *tail = ptd->tails; uword n_rx_bytes = 0; + /* copy template into local variable - will save per packet load */ + vlib_buffer_copy_template (&bt, &ptd->buffer_template); + while (n_left >= 4) { if (n_left >= 12) @@ -177,7 +182,10 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_prefetch_buffer_header (b[11], LOAD); } - clib_memcpy64_x4 (b[0], b[1], b[2], b[3], bt); + vlib_buffer_copy_template (b[0], &bt); + vlib_buffer_copy_template (b[1], &bt); + vlib_buffer_copy_template (b[2], &bt); + vlib_buffer_copy_template (b[3], &bt); n_rx_bytes += b[0]->current_length = qw1[0] >> AVF_RXD_LEN_SHIFT; n_rx_bytes += b[1]->current_length = qw1[1] >> AVF_RXD_LEN_SHIFT; @@ -186,10 +194,10 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, if (maybe_multiseg) { - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0); - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[1], qw1[1], tail + 1); - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[2], qw1[2], tail + 2); - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[3], qw1[3], tail + 3); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[0], qw1[0], tail + 0); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[1], qw1[1], tail + 1); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[2], qw1[2], tail + 2); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[3], qw1[3], tail + 3); } VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); @@ -205,12 +213,12 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, } while (n_left) { - clib_memcpy_fast (b[0], bt, sizeof (vlib_buffer_t)); + vlib_buffer_copy_template (b[0], &bt); n_rx_bytes += b[0]->current_length = qw1[0] >> AVF_RXD_LEN_SHIFT; if (maybe_multiseg) - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[0], qw1[0], tail + 0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); @@ -289,7 +297,7 @@ avf_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, or_q1x4 |= q1x4; u64x4_store_unaligned (q1x4, ptd->qw1s + n_rx_packets); - clib_memcpy_fast (bi, rxq->bufs + next, 4 * sizeof (u32)); + vlib_buffer_copy_indices (bi, rxq->bufs + next, 4); /* next */ next = (next + 4) & mask; @@ -362,6 +370,8 @@ avf_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (bt)->sw_if_index[VLIB_RX] = ad->sw_if_index; vnet_buffer (bt)->sw_if_index[VLIB_TX] = ~0; + bt->buffer_pool_index = rxq->buffer_pool_index; + bt->ref_count = 1; if (n_tail_desc) n_rx_bytes = avf_process_rx_burst (vm, node, ptd, n_rx_packets, 1); diff --git a/src/plugins/avf/output.c b/src/plugins/avf/output.c index 0db333be823c..47803fc2729e 100644 --- a/src/plugins/avf/output.c +++ b/src/plugins/avf/output.c @@ -66,7 +66,7 @@ avf_tx_enqueue (vlib_main_t * vm, avf_txq_t * txq, u32 * buffers, if (or_flags & VLIB_BUFFER_NEXT_PRESENT) goto one_by_one; - clib_memcpy_fast (txq->bufs + next, buffers, sizeof (u32) * 4); + vlib_buffer_copy_indices (txq->bufs + next, buffers, 4); if (use_va_dma) { diff --git a/src/plugins/dpdk/CMakeLists.txt b/src/plugins/dpdk/CMakeLists.txt index 45605baada0c..badf9af8349a 100644 --- a/src/plugins/dpdk/CMakeLists.txt +++ b/src/plugins/dpdk/CMakeLists.txt @@ -11,6 +11,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +############################################################################## +# macros +############################################################################## +macro(dpdk_find_library var name) + find_library(${var} NAMES ${name} ${ARGN}) +if (NOT ${var}) + message(WARNING "-- ${name} library not found - dpdk_plugin disabled") + return() +endif() + message(STATUS "DPDK plugin needs ${name} library - found at ${${var}}") +endmacro() + ############################################################################## # Find lib and include files ############################################################################## @@ -18,69 +30,38 @@ find_path(DPDK_INCLUDE_DIR PATH_SUFFIXES dpdk NAMES rte_config.h) find_library(DPDK_LIB NAMES libdpdk.a) if (NOT DPDK_INCLUDE_DIR) - message(WARNING "-- DPDK not found - dpdk_plugin disabled") + message(WARNING "-- DPDK headers not found - dpdk_plugin disabled") return() endif() +if (NOT DPDK_LIB) + dpdk_find_library(DPDK_SHLIB "libdpdk.so") + set(DPDK_IS_SHARED_LIB 1) + message(WARNING "-- linking dpdk plugin against DPDK shared libs") +endif() + +############################################################################## +# Parse DPDK config and version header files ############################################################################## -# Find DPDK Version -############################################################################## -file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dpdk_vars.c -" -#include -#include -int main() -{ - printf(\"VERSION=%s\\n\", strchr(rte_version(), ' ') + 1); - printf(\"RTE_PKTMBUF_HEADROOM=%u\\n\", RTE_PKTMBUF_HEADROOM); -#ifdef RTE_LIBRTE_PMD_AESNI_MB - printf(\"RTE_LIBRTE_PMD_AESNI_MB=%u\\n\", RTE_LIBRTE_PMD_AESNI_MB); -#endif -#ifdef RTE_LIBRTE_PMD_AESNI_GCM - printf(\"RTE_LIBRTE_PMD_AESNI_GCM=%u\\n\", RTE_LIBRTE_PMD_AESNI_GCM); -#endif -#ifdef RTE_LIBRTE_MLX4_PMD - printf(\"RTE_LIBRTE_MLX4_PMD=%u\\n\", RTE_LIBRTE_MLX4_PMD); -#endif -#ifdef RTE_LIBRTE_MLX5_PMD - printf(\"RTE_LIBRTE_MLX5_PMD=%u\\n\", RTE_LIBRTE_MLX5_PMD); -#ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS - printf(\"RTE_LIBRTE_MLX5_DLOPEN_DEPS=%u\\n\", RTE_LIBRTE_MLX5_DLOPEN_DEPS); -#endif -#endif - return 0; -} -") - -try_compile(DPDK_VARS_COMPILED - ${CMAKE_CURRENT_BINARY_DIR} - ${CMAKE_CURRENT_BINARY_DIR}/dpdk_vars.c - CMAKE_FLAGS - -DINCLUDE_DIRECTORIES=${DPDK_INCLUDE_DIR} - COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/dpdk_vars.bin -) -if(DPDK_VARS_COMPILED) - execute_process( - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMAND ./dpdk_vars.bin - OUTPUT_VARIABLE DPDK_VARS - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - string(REPLACE "\n" ";" DPDK_VARS ${DPDK_VARS}) - foreach(v ${DPDK_VARS}) - string(REPLACE "=" ";" v ${v}) +file(STRINGS ${DPDK_INCLUDE_DIR}/rte_config.h rte_config) +file(STRINGS ${DPDK_INCLUDE_DIR}/rte_version.h rte_version) + +foreach(l ${rte_config} ${rte_version}) + if (l MATCHES "^#define[\t ]*RTE_") + STRING(REGEX REPLACE "^#define[\t ]*([A-Z1-9_]+)[\t ]*(.+)" "\\1;\\2" v "${l}") list(GET v 0 name) list(GET v 1 value) set(DPDK_${name} ${value}) - endforeach() -endif() + endif() +endforeach() -file(REMOVE - ${CMAKE_CURRENT_BINARY_DIR}/dpdk_vars.c - ${CMAKE_CURRENT_BINARY_DIR}/dpdk_vars.bin -) +set(DPDK_VERSION + "${DPDK_RTE_VER_YEAR}.${DPDK_RTE_VER_MONTH}.${DPDK_RTE_VER_MINOR}") +############################################################################## +# verify headroom size +############################################################################## if(NOT ${DPDK_RTE_PKTMBUF_HEADROOM} EQUAL ${PRE_DATA_SIZE}) message( FATAL_ERROR @@ -90,75 +71,111 @@ if(NOT ${DPDK_RTE_PKTMBUF_HEADROOM} EQUAL ${PRE_DATA_SIZE}) endif() ############################################################################## -# DPDK plugin +# static or dynamic linking ############################################################################## -if(DPDK_INCLUDE_DIR AND DPDK_LIB) - include_directories (${DPDK_INCLUDE_DIR}) - - message(STATUS "Found DPDK ${DPDK_VERSION} in ${DPDK_INCLUDE_DIR}") +unset(DPDK_LINK_LIBRARIES) +unset(DPDK_LINK_FLAGS) +message(STATUS "Found DPDK ${DPDK_VERSION} in ${DPDK_INCLUDE_DIR}") +include_directories (${DPDK_INCLUDE_DIR}) + +if(DPDK_IS_SHARED_LIB) + get_filename_component(DPDK_LIB_DIR ${DPDK_SHLIB} DIRECTORY) + string_append(DPDK_LINK_FLAGS "-L${DPDK_LIB_DIR}") + list(APPEND DPDK_LINK_LIBRARIES ${DPDK_SHLIB}) +else() get_filename_component(DPDK_LIB_DIR ${DPDK_LIB} DIRECTORY) - set(DPDK_LINK_FLAGS "-L${DPDK_LIB_DIR} -Wl,--whole-archive,${DPDK_LIB},--no-whole-archive") - if(DPDK_RTE_LIBRTE_PMD_AESNI_MB OR DPDK_RTE_LIBRTE_PMD_AESNI_GCM) - set(DPDK_LINK_FLAGS "${DPDK_LINK_FLAGS} -Wl,--exclude-libs,libIPSec_MB.a,-l:libIPSec_MB.a") - message(STATUS "DPDK depends on IPSec MB library") + string_append(DPDK_LINK_FLAGS "-L${DPDK_LIB_DIR}") + string_append(DPDK_LINK_FLAGS "-Wl,--whole-archive,${DPDK_LIB},--no-whole-archive") +endif() + +############################################################################## +# libnuma +############################################################################## +dpdk_find_library(NUMA_LIB "numa") +list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB}) + +############################################################################## +# AESNI libraries +############################################################################## +if(DPDK_RTE_LIBRTE_PMD_AESNI_MB OR DPDK_RTE_LIBRTE_PMD_AESNI_GCM) + if(DPDK_IS_SHARED_LIB) + dpdk_find_library(IPSECMB_LIB "libIPSec_MB.so") + list(APPEND DPDK_LINK_LIBRARIES "${IPSECMB_LIB}") + else() + dpdk_find_library(IPSECMB_LIB "libIPSec_MB.a") + get_filename_component(IPSECMB_LIB_DIR ${IPSECMB_LIB} DIRECTORY) + string_append(DPDK_LINK_FLAGS "-L${IPSECMB_LIB_DIR}") + string_append(DPDK_LINK_FLAGS "-Wl,--exclude-libs,libIPSec_MB.a,-l:libIPSec_MB.a") endif() - if(DPDK_RTE_LIBRTE_MLX4_PMD OR DPDK_RTE_LIBRTE_MLX5_PMD) - if (DPDK_RTE_LIBRTE_MLX5_DLOPEN_DEPS) - set(DPDK_LINK_FLAGS "${DPDK_LINK_FLAGS} -Wl,-lmnl") - message(STATUS "DPDK depends on libmnl (Mellanox PMD requirement)") - else() - set(DPDK_LINK_FLAGS "${DPDK_LINK_FLAGS} -Wl,-lmnl,-libverbs,-lmlx5") - message(STATUS "DPDK depends on libmnl, libibverbs, libmlx5 (Mellanox PMD requirement)") - endif() +endif() + +############################################################################## +# Mellanox libraries +############################################################################## +if(DPDK_RTE_LIBRTE_MLX4_PMD OR DPDK_RTE_LIBRTE_MLX5_PMD) + dpdk_find_library(MNL_LIB "mnl") + list(APPEND DPDK_LINK_LIBRARIES "${MNL_LIB}") + if (DPDK_RTE_LIBRTE_MLX5_DLOPEN_DEPS) + message(STATUS "DPDK depends on libmnl (Mellanox PMD requirement)") + else() + dpdk_find_library(IBVERBS_LIB "ibverbs") + list(APPEND DPDK_LINK_LIBRARIES "${IBVERBS_LIB}") + dpdk_find_library(MLX5_LIB "mlx5") + list(APPEND DPDK_LINK_LIBRARIES "${MLX5_LIB}") + message(STATUS "DPDK depends on libmnl, libibverbs, libmlx5 (Mellanox PMD requirement)") endif() - set(DPDK_LINK_FLAGS "${DPDK_LINK_FLAGS} -Wl,-lnuma") - add_vpp_plugin(dpdk - SOURCES - buffer.c - main.c - thread.c - api/dpdk_api.c - api/dpdk_test.c - device/cli.c - device/common.c - device/device.c - device/flow.c - device/format.c - device/init.c - device/node.c - hqos/hqos.c - ipsec/cli.c - ipsec/crypto_node.c - ipsec/esp_decrypt.c - ipsec/esp_encrypt.c - ipsec/ipsec.c - - MULTIARCH_SOURCES - buffer.c - device/device.c - device/node.c - ipsec/crypto_node.c - ipsec/esp_decrypt.c - ipsec/esp_encrypt.c - - API_FILES - api/dpdk.api - - API_TEST_SOURCES - api/dpdk_test.c - - INSTALL_HEADERS - device/dpdk.h - api/dpdk_all_api_h.h - ipsec/ipsec.h - - LINK_FLAGS - ${DPDK_LINK_FLAGS} - - COMPONENT - vpp-plugin-dpdk - ) -else() - message(WARNING "DPDK not found - dpdk disabled") endif() +############################################################################## +# DPDK plugin +############################################################################## +add_vpp_plugin(dpdk + SOURCES + buffer.c + main.c + thread.c +# api/dpdk_api.c +# api/dpdk_test.c + device/cli.c + device/common.c + device/device.c + device/flow.c + device/format.c + device/init.c + device/node.c +# hqos/hqos.c + ipsec/cli.c + ipsec/crypto_node.c + ipsec/esp_decrypt.c + ipsec/esp_encrypt.c + ipsec/ipsec.c + + MULTIARCH_SOURCES + buffer.c + device/device.c + device/node.c + ipsec/crypto_node.c + ipsec/esp_decrypt.c + ipsec/esp_encrypt.c + +# API_FILES +# api/dpdk.api + +# API_TEST_SOURCES +# api/dpdk_test.c + + INSTALL_HEADERS + device/dpdk.h +# api/dpdk_all_api_h.h + ipsec/ipsec.h + + LINK_FLAGS + "${DPDK_LINK_FLAGS}" + + LINK_LIBRARIES + ${DPDK_LINK_LIBRARIES} + + COMPONENT + vpp-plugin-dpdk +) + diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c index ee63f76b0d4f..dd7bf4cc9d1d 100644 --- a/src/plugins/dpdk/buffer.c +++ b/src/plugins/dpdk/buffer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -12,633 +12,392 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* - * buffer.c: allocate/free network buffers. - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @file - * - * Allocate/free network buffers. - */ #include +#include #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include +#include #include -#include -#include -#include -#include +#include STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - struct rte_mbuf **mbuf_alloc_list; -} dpdk_buffer_per_thread_data; - -typedef struct -{ - int vfio_container_fd; - dpdk_buffer_per_thread_data *ptd; -} dpdk_buffer_main_t; - -dpdk_buffer_main_t dpdk_buffer_main; - -static_always_inline void -dpdk_rte_pktmbuf_free (vlib_main_t * vm, u32 thread_index, vlib_buffer_t * b, - int maybe_next) -{ - struct rte_mbuf *mb; - u32 next, flags; - -next: - flags = b->flags; - next = b->next_buffer; - mb = rte_mbuf_from_vlib_buffer (b); - - if (PREDICT_FALSE (b->n_add_refs)) - { - rte_mbuf_refcnt_update (mb, b->n_add_refs); - b->n_add_refs = 0; - } - - if ((mb = rte_pktmbuf_prefree_seg (mb))) - rte_mempool_put (mb->pool, mb); - - if (maybe_next && (flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b = vlib_get_buffer (vm, next); - goto next; - } -} - #ifndef CLIB_MARCH_VARIANT -static void -del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) -{ - u32 i; - vlib_buffer_t *b; - u32 thread_index = vlib_get_thread_index (); - - for (i = 0; i < vec_len (f->buffers); i++) - { - b = vlib_get_buffer (vm, f->buffers[i]); - dpdk_rte_pktmbuf_free (vm, thread_index, b, 1); - } - - vec_free (f->name); - vec_free (f->buffers); - /* Poison it. */ - clib_memset (f, 0xab, sizeof (f[0])); -} - -/* Add buffer free list. */ -static void -dpdk_buffer_delete_free_list (vlib_main_t * vm, - vlib_buffer_free_list_index_t free_list_index) -{ - vlib_buffer_free_list_t *f; - int i; - - ASSERT (vlib_get_thread_index () == 0); - - f = vlib_buffer_get_free_list (vm, free_list_index); +struct rte_mempool **dpdk_mempool_by_buffer_pool_index = 0; +struct rte_mempool **dpdk_no_cache_mempool_by_buffer_pool_index = 0; - del_free_list (vm, f); - - pool_put (vm->buffer_free_list_pool, f); - - for (i = 1; i < vec_len (vlib_mains); i++) - { - vlib_main_t *wvm = vlib_mains[i]; - f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index); - del_free_list (wvm, f); - pool_put (wvm->buffer_free_list_pool, f); - } -} -#endif - -/* Make sure free list has at least given number of free buffers. */ -uword -CLIB_MULTIARCH_FN (dpdk_buffer_fill_free_list) (vlib_main_t * vm, - vlib_buffer_free_list_t * fl, - uword min_free_buffers) +clib_error_t * +dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp) { - dpdk_main_t *dm = &dpdk_main; - dpdk_buffer_main_t *dbm = &dpdk_buffer_main; - struct rte_mbuf **mb; - uword n_left, first; - word n_alloc; - unsigned socket_id = rte_socket_id (); - u32 thread_index = vlib_get_thread_index (); - dpdk_buffer_per_thread_data *d = vec_elt_at_index (dbm->ptd, thread_index); - struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; - dpdk_mempool_private_t *privp = rte_mempool_get_priv (rmp); - vlib_buffer_t bt; + uword buffer_mem_start = vm->buffer_main->buffer_mem_start; + struct rte_mempool *mp, *nmp; + struct rte_pktmbuf_pool_private priv; + enum rte_iova_mode iova_mode; u32 *bi; + u8 *name = 0; - /* Too early? */ - if (PREDICT_FALSE (rmp == 0)) - return 0; + u32 elt_size = + sizeof (struct rte_mbuf) + sizeof (vlib_buffer_t) + bp->data_size; - /* Already have enough free buffers on free list? */ - n_alloc = min_free_buffers - vec_len (fl->buffers); - if (n_alloc <= 0) - return min_free_buffers; + /* create empty mempools */ + vec_validate_aligned (dpdk_mempool_by_buffer_pool_index, bp->index, + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dpdk_no_cache_mempool_by_buffer_pool_index, bp->index, + CLIB_CACHE_LINE_BYTES); - /* Always allocate round number of buffers. */ - n_alloc = round_pow2 (n_alloc, CLIB_CACHE_LINE_BYTES / sizeof (u32)); + /* normal mempool */ + name = format (name, "vpp pool %u%c", bp->index, 0); + mp = rte_mempool_create_empty ((char *) name, vec_len (bp->buffers), + elt_size, 512, sizeof (priv), + bp->numa_node, 0); + vec_reset_length (name); - /* Always allocate new buffers in reasonably large sized chunks. */ - n_alloc = clib_max (n_alloc, fl->min_n_buffers_each_alloc); + /* non-cached mempool */ + name = format (name, "vpp pool %u (no cache)%c", bp->index, 0); + nmp = rte_mempool_create_empty ((char *) name, vec_len (bp->buffers), + elt_size, 0, sizeof (priv), + bp->numa_node, 0); + vec_free (name); - vec_validate_aligned (d->mbuf_alloc_list, n_alloc - 1, - CLIB_CACHE_LINE_BYTES); + dpdk_mempool_by_buffer_pool_index[bp->index] = mp; + dpdk_no_cache_mempool_by_buffer_pool_index[bp->index] = nmp; - if (rte_mempool_get_bulk (rmp, (void *) d->mbuf_alloc_list, n_alloc) < 0) - return 0; + mp->pool_id = nmp->pool_id = bp->index; - clib_memset (&bt, 0, sizeof (vlib_buffer_t)); - vlib_buffer_init_for_free_list (&bt, fl); - bt.buffer_pool_index = privp->buffer_pool_index; + rte_mempool_set_ops_byname (mp, "vpp", NULL); + rte_mempool_set_ops_byname (nmp, "vpp-no-cache", NULL); - _vec_len (d->mbuf_alloc_list) = n_alloc; + /* Call the mempool priv initializer */ + priv.mbuf_data_room_size = VLIB_BUFFER_PRE_DATA_SIZE + + vlib_buffer_get_default_data_size (vm); + priv.mbuf_priv_size = VLIB_BUFFER_HDR_SIZE; + rte_pktmbuf_pool_init (mp, &priv); + rte_pktmbuf_pool_init (nmp, &priv); - first = vec_len (fl->buffers); - vec_resize_aligned (fl->buffers, n_alloc, CLIB_CACHE_LINE_BYTES); + iova_mode = rte_eal_iova_mode (); - n_left = n_alloc; - mb = d->mbuf_alloc_list; - bi = fl->buffers + first; + /* populate mempool object buffer header */ + /* *INDENT-OFF* */ + vec_foreach (bi, bp->buffers) + { + struct rte_mempool_objhdr *hdr; + vlib_buffer_t *b = vlib_get_buffer (vm, *bi); + struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer (b); + hdr = (struct rte_mempool_objhdr *) RTE_PTR_SUB (mb, sizeof (*hdr)); + hdr->mp = mp; + hdr->iova = (iova_mode == RTE_IOVA_VA) ? + pointer_to_uword (mb) : vlib_physmem_get_pa (vm, mb); + STAILQ_INSERT_TAIL (&mp->elt_list, hdr, next); + STAILQ_INSERT_TAIL (&nmp->elt_list, hdr, next); + mp->populated_size++; + nmp->populated_size++; + } + /* *INDENT-ON* */ - ASSERT (n_left % 8 == 0); + /* call the object initializers */ + rte_mempool_obj_iter (mp, rte_pktmbuf_init, 0); - while (n_left >= 8) + /* *INDENT-OFF* */ + vec_foreach (bi, bp->buffers) { - if (PREDICT_FALSE (n_left < 24)) - goto no_prefetch; - - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf (mb[16]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf (mb[17]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf (mb[18]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf (mb[19]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf (mb[20]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf (mb[21]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf (mb[22]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf (mb[23]), STORE); - - no_prefetch: - vlib_get_buffer_indices_with_offset (vm, (void **) mb, bi, 8, - sizeof (struct rte_mbuf)); - clib_memcpy64_x4 (vlib_buffer_from_rte_mbuf (mb[0]), - vlib_buffer_from_rte_mbuf (mb[1]), - vlib_buffer_from_rte_mbuf (mb[2]), - vlib_buffer_from_rte_mbuf (mb[3]), &bt); - clib_memcpy64_x4 (vlib_buffer_from_rte_mbuf (mb[4]), - vlib_buffer_from_rte_mbuf (mb[5]), - vlib_buffer_from_rte_mbuf (mb[6]), - vlib_buffer_from_rte_mbuf (mb[7]), &bt); - - n_left -= 8; - mb += 8; - bi += 8; + vlib_buffer_t *b; + b = vlib_buffer_ptr_from_index (buffer_mem_start, *bi, 0); + vlib_buffer_copy_template (b, &bp->buffer_template); } + /* *INDENT-ON* */ + + /* map DMA pages if at least one physical device exists */ + if (rte_eth_dev_count_avail ()) + { + uword i; + size_t page_sz; + vlib_physmem_map_t *pm; + int do_vfio_map = 1; - if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, fl->buffers + first, n_alloc); + pm = vlib_physmem_get_map (vm, bp->physmem_map_index); + page_sz = 1ULL << pm->log2_page_size; - fl->n_alloc += n_alloc; + for (i = 0; i < pm->n_pages; i++) + { + char *va = ((char *) pm->base) + i * page_sz; + uword pa = (iova_mode == RTE_IOVA_VA) ? + pointer_to_uword (va) : pm->page_table[i]; + + if (do_vfio_map && + rte_vfio_dma_map (pointer_to_uword (va), pa, page_sz)) + do_vfio_map = 0; + + struct rte_mempool_memhdr *memhdr; + memhdr = clib_mem_alloc (sizeof (*memhdr)); + memhdr->mp = mp; + memhdr->addr = va; + memhdr->iova = pa; + memhdr->len = page_sz; + memhdr->free_cb = 0; + memhdr->opaque = 0; + + STAILQ_INSERT_TAIL (&mp->mem_list, memhdr, next); + mp->nb_mem_chunks++; + } + } - return n_alloc; + return 0; } -static_always_inline void -dpdk_prefetch_buffer (vlib_buffer_t * b) +static int +dpdk_ops_vpp_alloc (struct rte_mempool *mp) { - struct rte_mbuf *mb; - mb = rte_mbuf_from_vlib_buffer (b); - CLIB_PREFETCH (mb, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); + clib_warning (""); + return 0; } -static_always_inline void -recycle_or_free (vlib_main_t * vm, vlib_buffer_main_t * bm, u32 bi, - vlib_buffer_t * b) +static void +dpdk_ops_vpp_free (struct rte_mempool *mp) { - u32 thread_index = vlib_get_thread_index (); - - dpdk_rte_pktmbuf_free (vm, thread_index, b, 1); + clib_warning (""); } +#endif + static_always_inline void -vlib_buffer_free_inline (vlib_main_t * vm, - u32 * buffers, u32 n_buffers, u32 follow_buffer_next) +dpdk_ops_vpp_enqueue_one (vlib_buffer_t * bt, void *obj) { - vlib_buffer_main_t *bm = &buffer_main; - vlib_buffer_t *bufp[n_buffers], **b = bufp; - u32 thread_index = vlib_get_thread_index (); - int i = 0; - u32 simple_mask = (VLIB_BUFFER_NON_DEFAULT_FREELIST | - VLIB_BUFFER_NEXT_PRESENT); - u32 n_left, *bi; - u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, - u32 follow_buffer_next); + /* Only non-replicated packets (b->ref_count == 1) expected */ - cb = bm->buffer_free_callback; - - if (PREDICT_FALSE (cb != 0)) - n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); + struct rte_mbuf *mb = obj; + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); + ASSERT (b->ref_count == 1); + ASSERT (b->buffer_pool_index == bt->buffer_pool_index); + vlib_buffer_copy_template (b, bt); +} - if (!n_buffers) - return; +int +CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue) (struct rte_mempool * mp, + void *const *obj_table, unsigned n) +{ + const int batch_size = 32; + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_t bt; + u8 buffer_pool_index = mp->pool_id; + vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); + u32 bufs[batch_size]; + u32 n_left = n; + void *const *obj = obj_table; - n_left = n_buffers; - bi = buffers; - b = bufp; - vlib_get_buffers (vm, bi, b, n_buffers); + vlib_buffer_copy_template (&bt, &bp->buffer_template); while (n_left >= 4) { - u32 or_flags; - vlib_buffer_t **p; - - if (n_left < 16) - goto no_prefetch; - - p = b + 12; - dpdk_prefetch_buffer (p[0]); - dpdk_prefetch_buffer (p[1]); - dpdk_prefetch_buffer (p[2]); - dpdk_prefetch_buffer (p[3]); - no_prefetch: - - for (i = 0; i < 4; i++) - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[i]); - - or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags; - - if (or_flags & simple_mask) - { - recycle_or_free (vm, bm, bi[0], b[0]); - recycle_or_free (vm, bm, bi[1], b[1]); - recycle_or_free (vm, bm, bi[2], b[2]); - recycle_or_free (vm, bm, bi[3], b[3]); - } - else - { - dpdk_rte_pktmbuf_free (vm, thread_index, b[0], 0); - dpdk_rte_pktmbuf_free (vm, thread_index, b[1], 0); - dpdk_rte_pktmbuf_free (vm, thread_index, b[2], 0); - dpdk_rte_pktmbuf_free (vm, thread_index, b[3], 0); - } - bi += 4; - b += 4; + dpdk_ops_vpp_enqueue_one (&bt, obj[0]); + dpdk_ops_vpp_enqueue_one (&bt, obj[1]); + dpdk_ops_vpp_enqueue_one (&bt, obj[2]); + dpdk_ops_vpp_enqueue_one (&bt, obj[3]); + obj += 4; n_left -= 4; } + while (n_left) { - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); - recycle_or_free (vm, bm, bi[0], b[0]); - bi += 1; - b += 1; + dpdk_ops_vpp_enqueue_one (&bt, obj[0]); + obj += 1; n_left -= 1; } -} - -void -CLIB_MULTIARCH_FN (dpdk_buffer_free) (vlib_main_t * vm, u32 * buffers, - u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 1); -} - -void -CLIB_MULTIARCH_FN (dpdk_buffer_free_no_next) (vlib_main_t * vm, u32 * buffers, - u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 0); -} -#ifndef CLIB_MARCH_VARIANT -clib_error_t * -dpdk_pool_create (vlib_main_t * vm, u8 * pool_name, u32 elt_size, - u32 num_elts, u32 pool_priv_size, u16 cache_size, u8 numa, - struct rte_mempool **_mp, u32 * map_index) -{ - struct rte_mempool *mp; - enum rte_iova_mode iova_mode; - dpdk_mempool_private_t priv; - vlib_physmem_map_t *pm; - clib_error_t *error = 0; - size_t min_chunk_size, align; - int map_dma = 1; - u32 size; - i32 ret; - uword i; - - mp = rte_mempool_create_empty ((char *) pool_name, num_elts, elt_size, - 512, pool_priv_size, numa, 0); - if (!mp) - return clib_error_return (0, "failed to create %s", pool_name); - - rte_mempool_set_ops_byname (mp, RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL); - - size = rte_mempool_op_calc_mem_size_default (mp, num_elts, 21, - &min_chunk_size, &align); - - if ((error = vlib_physmem_shared_map_create (vm, (char *) pool_name, size, - 0, numa, map_index))) + while (n >= batch_size) { - rte_mempool_free (mp); - return error; + vlib_get_buffer_indices_with_offset (vm, (void **) obj_table, bufs, + batch_size, + sizeof (struct rte_mbuf)); + vlib_buffer_pool_put (vm, buffer_pool_index, bufs, batch_size); + n -= batch_size; + obj_table += batch_size; } - pm = vlib_physmem_get_map (vm, *map_index); - - /* Call the mempool priv initializer */ - priv.mbp_priv.mbuf_data_room_size = VLIB_BUFFER_PRE_DATA_SIZE + - VLIB_BUFFER_DATA_SIZE; - priv.mbp_priv.mbuf_priv_size = VLIB_BUFFER_HDR_SIZE; - rte_pktmbuf_pool_init (mp, &priv); - - if (rte_eth_dev_count_avail () == 0) - map_dma = 0; - iova_mode = rte_eal_iova_mode (); - for (i = 0; i < pm->n_pages; i++) + if (n) { - size_t page_sz = 1ULL << pm->log2_page_size; - char *va = ((char *) pm->base) + i * page_sz; - uword pa = iova_mode == RTE_IOVA_VA ? - pointer_to_uword (va) : pm->page_table[i]; - ret = rte_mempool_populate_iova (mp, va, pa, page_sz, 0, 0); - if (ret < 0) - { - rte_mempool_free (mp); - return clib_error_return (0, "failed to populate %s", pool_name); - } - /* -1 likely means there is no PCI devices assigned to vfio - container or noiommu mode is used so we stop trying */ - if (map_dma && rte_vfio_dma_map (pointer_to_uword (va), pa, page_sz)) - map_dma = 0; + vlib_get_buffer_indices_with_offset (vm, (void **) obj_table, bufs, + n, sizeof (struct rte_mbuf)); + vlib_buffer_pool_put (vm, buffer_pool_index, bufs, n); } - _mp[0] = mp; - return 0; } -clib_error_t * -dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, - unsigned socket_id) -{ - dpdk_main_t *dm = &dpdk_main; - struct rte_mempool *rmp; - clib_error_t *error = 0; - u8 *pool_name; - u32 elt_size, i; - u32 map_index; - - vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); - - /* pool already exists, nothing to do */ - if (dm->pktmbuf_pools[socket_id]) - return 0; - - pool_name = format (0, "dpdk_mbuf_pool_socket%u%c", socket_id, 0); - - elt_size = sizeof (struct rte_mbuf) + - VLIB_BUFFER_HDR_SIZE /* priv size */ + - VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE; /*data room size */ - - error = dpdk_pool_create (vm, pool_name, elt_size, num_mbufs, - sizeof (dpdk_mempool_private_t), 512, socket_id, - &rmp, &map_index); +CLIB_MARCH_FN_REGISTRATION (dpdk_ops_vpp_enqueue); - vec_free (pool_name); +static_always_inline void +dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t * vm, struct rte_mempool *old, + struct rte_mempool *new, void *obj, + vlib_buffer_t * bt) +{ + struct rte_mbuf *mb = obj; + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - if (!error) + if (clib_atomic_sub_fetch (&b->ref_count, 1) == 0) { - /* call the object initializers */ - rte_mempool_obj_iter (rmp, rte_pktmbuf_init, 0); - - dpdk_mempool_private_t *privp = rte_mempool_get_priv (rmp); - privp->buffer_pool_index = - vlib_buffer_register_physmem_map (vm, map_index); + u32 bi = vlib_get_buffer_index (vm, b); + mb->pool = new; + vlib_buffer_copy_template (b, bt); + vlib_buffer_pool_put (vm, bt->buffer_pool_index, &bi, 1); + return; + } +} - dm->pktmbuf_pools[socket_id] = rmp; +int +CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue_no_cache) (struct rte_mempool * cmp, + void *const *obj_table, + unsigned n) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_t bt; + struct rte_mempool *mp; + mp = dpdk_mempool_by_buffer_pool_index[cmp->pool_id]; + u8 buffer_pool_index = cmp->pool_id; + vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); + vlib_buffer_copy_template (&bt, &bp->buffer_template); - return 0; + while (n >= 4) + { + dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[0], &bt); + dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[1], &bt); + dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[2], &bt); + dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[3], &bt); + obj_table += 4; + n -= 4; } - clib_error_report (error); - - /* no usable pool for this socket, try to use pool from another one */ - for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) + while (n) { - if (dm->pktmbuf_pools[i]) - { - clib_warning ("WARNING: Failed to allocate mempool for CPU socket " - "%u. Threads running on socket %u will use socket %u " - "mempool.", socket_id, socket_id, i); - dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; - return 0; - } + dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[0], &bt); + obj_table += 1; + n -= 1; } - return clib_error_return (0, "failed to allocate mempool on socket %u", - socket_id); + return 0; } -#if CLIB_DEBUG > 0 - -u32 *vlib_buffer_state_validation_lock; -uword *vlib_buffer_state_validation_hash; -void *vlib_buffer_state_heap; +CLIB_MARCH_FN_REGISTRATION (dpdk_ops_vpp_enqueue_no_cache); -static clib_error_t * -buffer_state_validation_init (vlib_main_t * vm) +int +CLIB_MULTIARCH_FN (dpdk_ops_vpp_dequeue) (struct rte_mempool * mp, + void **obj_table, unsigned n) { - void *oldheap; + const int batch_size = 32; + vlib_main_t *vm = vlib_get_main (); + u32 bufs[batch_size], total = 0, n_alloc = 0; + u8 buffer_pool_index = mp->pool_id; + void **obj = obj_table; - vlib_buffer_state_heap = - mheap_alloc_with_lock (0, 10 << 20, 0 /* locked */ ); - oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + while (n >= batch_size) + { + n_alloc = vlib_buffer_alloc_from_pool (vm, bufs, batch_size, + buffer_pool_index); + if (n_alloc != batch_size) + goto alloc_fail; + + vlib_get_buffers_with_offset (vm, bufs, obj, batch_size, + -(i32) sizeof (struct rte_mbuf)); + total += batch_size; + obj += batch_size; + n -= batch_size; + } - vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); - vec_validate_aligned (vlib_buffer_state_validation_lock, 0, - CLIB_CACHE_LINE_BYTES); - clib_mem_set_heap (oldheap); - return 0; -} + if (n) + { + n_alloc = vlib_buffer_alloc_from_pool (vm, bufs, n, buffer_pool_index); -VLIB_INIT_FUNCTION (buffer_state_validation_init); -#endif + if (n_alloc != n) + goto alloc_fail; -#if CLI_DEBUG -struct dpdk_validate_buf_result -{ - u32 invalid; - u32 uninitialized; -}; + vlib_get_buffers_with_offset (vm, bufs, obj, n, + -(i32) sizeof (struct rte_mbuf)); + } -#define DPDK_TRAJECTORY_POISON 31 + return 0; -static void -dpdk_buffer_validate_trajectory (struct rte_mempool *mp, void *opaque, - void *obj, unsigned obj_idx) -{ - vlib_buffer_t *b; - struct dpdk_validate_buf_result *counter = opaque; - b = vlib_buffer_from_rte_mbuf ((struct rte_mbuf *) obj); - if (b->pre_data[0] != 0) +alloc_fail: + /* dpdk doesn't support partial alloc, so we need to return what we + already got */ + if (n_alloc) + vlib_buffer_pool_put (vm, buffer_pool_index, bufs, n_alloc); + obj = obj_table; + while (total) { - if (b->pre_data[0] == DPDK_TRAJECTORY_POISON) - counter->uninitialized++; - else - counter->invalid++; + vlib_get_buffer_indices_with_offset (vm, obj, bufs, batch_size, + sizeof (struct rte_mbuf)); + vlib_buffer_pool_put (vm, buffer_pool_index, bufs, batch_size); + + obj += batch_size; + total -= batch_size; } + return -ENOENT; } -int -dpdk_buffer_validate_trajectory_all (u32 * uninitialized) +CLIB_MARCH_FN_REGISTRATION (dpdk_ops_vpp_dequeue); + +#ifndef CLIB_MARCH_VARIANT + +static int +dpdk_ops_vpp_dequeue_no_cache (struct rte_mempool *mp, void **obj_table, + unsigned n) { - dpdk_main_t *dm = &dpdk_main; - struct dpdk_validate_buf_result counter = { 0 }; - int i; - - for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) - rte_mempool_obj_iter (dm->pktmbuf_pools[i], - dpdk_buffer_validate_trajectory, &counter); - if (uninitialized) - *uninitialized = counter.uninitialized; - return counter.invalid; + clib_error ("bug"); + return 0; } -static void -dpdk_buffer_poison_trajectory (struct rte_mempool *mp, void *opaque, - void *obj, unsigned obj_idx) +static unsigned +dpdk_ops_vpp_get_count (const struct rte_mempool *mp) { - vlib_buffer_t *b; - b = vlib_buffer_from_rte_mbuf ((struct rte_mbuf *) obj); - b->pre_data[0] = DPDK_TRAJECTORY_POISON; + clib_warning (""); + return 0; } -void -dpdk_buffer_poison_trajectory_all (void) +static unsigned +dpdk_ops_vpp_get_count_no_cache (const struct rte_mempool *mp) { - dpdk_main_t *dm = &dpdk_main; - int i; - - for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) - rte_mempool_obj_iter (dm->pktmbuf_pools[i], dpdk_buffer_poison_trajectory, - 0); + struct rte_mempool *cmp; + cmp = dpdk_no_cache_mempool_by_buffer_pool_index[mp->pool_id]; + return dpdk_ops_vpp_get_count (cmp); } -#endif -static clib_error_t * -dpdk_buffer_init (vlib_main_t * vm) +clib_error_t * +dpdk_buffer_pools_create (vlib_main_t * vm) { - dpdk_buffer_main_t *dbm = &dpdk_buffer_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - vec_validate_aligned (dbm->ptd, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - dbm->vfio_container_fd = -1; - + clib_error_t *err; + vlib_buffer_pool_t *bp; + + struct rte_mempool_ops ops = { }; + + strncpy (ops.name, "vpp", 4); + ops.alloc = dpdk_ops_vpp_alloc; + ops.free = dpdk_ops_vpp_free; + ops.get_count = dpdk_ops_vpp_get_count; + ops.enqueue = CLIB_MARCH_FN_POINTER (dpdk_ops_vpp_enqueue); + ops.dequeue = CLIB_MARCH_FN_POINTER (dpdk_ops_vpp_dequeue); + rte_mempool_register_ops (&ops); + + strncpy (ops.name, "vpp-no-cache", 13); + ops.get_count = dpdk_ops_vpp_get_count_no_cache; + ops.enqueue = CLIB_MARCH_FN_POINTER (dpdk_ops_vpp_enqueue_no_cache); + ops.dequeue = dpdk_ops_vpp_dequeue_no_cache; + rte_mempool_register_ops (&ops); + + /* *INDENT-OFF* */ + vec_foreach (bp, vm->buffer_main->buffer_pools) + if (bp->start && (err = dpdk_buffer_pool_init (vm, bp))) + return err; + /* *INDENT-ON* */ return 0; } -VLIB_INIT_FUNCTION (dpdk_buffer_init); - -/* *INDENT-OFF* */ -VLIB_BUFFER_REGISTER_CALLBACKS (dpdk, static) = { - .vlib_buffer_fill_free_list_cb = &dpdk_buffer_fill_free_list, - .vlib_buffer_free_cb = &dpdk_buffer_free, - .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next, - .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list, -}; -/* *INDENT-ON* */ - -#if __x86_64__ -vlib_buffer_fill_free_list_cb_t __clib_weak dpdk_buffer_fill_free_list_avx512; -vlib_buffer_fill_free_list_cb_t __clib_weak dpdk_buffer_fill_free_list_avx2; -vlib_buffer_free_cb_t __clib_weak dpdk_buffer_free_avx512; -vlib_buffer_free_cb_t __clib_weak dpdk_buffer_free_avx2; -vlib_buffer_free_no_next_cb_t __clib_weak dpdk_buffer_free_no_next_avx512; -vlib_buffer_free_no_next_cb_t __clib_weak dpdk_buffer_free_no_next_avx2; - -static void __clib_constructor -dpdk_input_multiarch_select (void) -{ - vlib_buffer_callbacks_t *cb = &__dpdk_buffer_callbacks; - if (dpdk_buffer_fill_free_list_avx512 && clib_cpu_supports_avx512f ()) - { - cb->vlib_buffer_fill_free_list_cb = dpdk_buffer_fill_free_list_avx512; - cb->vlib_buffer_free_cb = dpdk_buffer_free_avx512; - cb->vlib_buffer_free_no_next_cb = dpdk_buffer_free_no_next_avx512; - } - else if (dpdk_buffer_fill_free_list_avx2 && clib_cpu_supports_avx2 ()) - { - cb->vlib_buffer_fill_free_list_cb = dpdk_buffer_fill_free_list_avx2; - cb->vlib_buffer_free_cb = dpdk_buffer_free_avx2; - cb->vlib_buffer_free_no_next_cb = dpdk_buffer_free_no_next_avx2; - } -} -#endif +VLIB_BUFFER_SET_EXT_HDR_SIZE (sizeof (struct rte_mempool_objhdr) + + sizeof (struct rte_mbuf)); + #endif /** @endcond */ diff --git a/src/plugins/dpdk/buffer.h b/src/plugins/dpdk/buffer.h new file mode 100644 index 000000000000..2c2579e98038 --- /dev/null +++ b/src/plugins/dpdk/buffer.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef include_dpdk_buffer_h +#define include_dpdk_buffer_h + +#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) +#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) + +extern struct rte_mempool **dpdk_mempool_by_buffer_pool_index; +extern struct rte_mempool **dpdk_no_cache_mempool_by_buffer_pool_index; + +clib_error_t *dpdk_buffer_pools_create (vlib_main_t * vm); + +#endif /* include_dpdk_buffer_h */ + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c index fd59509b5fc4..41f3fdef8382 100644 --- a/src/plugins/dpdk/device/cli.c +++ b/src/plugins/dpdk/device/cli.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -39,6 +40,7 @@ */ +#if 0 static clib_error_t * get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd, dpdk_device_config_t ** devconf) @@ -91,6 +93,7 @@ get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd, done: return error; } +#endif static inline clib_error_t * pcap_trace_command_internal (vlib_main_t * vm, @@ -381,27 +384,27 @@ static clib_error_t * show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - struct rte_mempool *rmp; - int i; + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_pool_t *bp; - for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++) - { - rmp = dpdk_main.pktmbuf_pools[i]; - if (rmp) - { - unsigned count = rte_mempool_avail_count (rmp); - unsigned free_count = rte_mempool_in_use_count (rmp); + vec_foreach (bp, bm->buffer_pools) + { + struct rte_mempool *rmp = dpdk_mempool_by_buffer_pool_index[bp->index]; + if (rmp) + { + unsigned count = rte_mempool_avail_count (rmp); + unsigned free_count = rte_mempool_in_use_count (rmp); - vlib_cli_output (vm, - "name=\"%s\" available = %7d allocated = %7d total = %7d\n", - rmp->name, (u32) count, (u32) free_count, - (u32) (count + free_count)); - } - else - { - vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); - } - } + vlib_cli_output (vm, + "name=\"%s\" available = %7d allocated = %7d total = %7d\n", + rmp->name, (u32) count, (u32) free_count, + (u32) (count + free_count)); + } + else + { + vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); + } + } return 0; } @@ -690,6 +693,7 @@ VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { }; /* *INDENT-ON* */ +#if 0 static int dpdk_device_queue_sort (void *a1, void *a2) { @@ -1848,7 +1852,6 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, dpdk_device_t *xd; uword *p = 0; struct rte_eth_dev_info dev_info; - struct rte_pci_device *pci_dev; dpdk_device_config_t *devconf = 0; u32 qindex; struct rte_sched_queue_stats stats; @@ -1894,16 +1897,14 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, xd = vec_elt_at_index (dm->devices, hw->dev_instance); rte_eth_dev_info_get (xd->port_id, &dev_info); - pci_dev = dpdk_get_pci_device (&dev_info); - - if (pci_dev) + if (dev_info.pci_dev) { /* bonded interface has no pci info */ vlib_pci_addr_t pci_addr; - pci_addr.domain = pci_dev->addr.domain; - pci_addr.bus = pci_dev->addr.bus; - pci_addr.slot = pci_dev->addr.devid; - pci_addr.function = pci_dev->addr.function; + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; p = hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); @@ -1988,6 +1989,7 @@ VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { .function = show_dpdk_hqos_queue_stats, }; /* *INDENT-ON* */ +#endif static clib_error_t * show_dpdk_version_command_fn (vlib_main_t * vm, @@ -2020,59 +2022,6 @@ VLIB_CLI_COMMAND (show_vpe_version_command, static) = { }; /* *INDENT-ON* */ -#if CLI_DEBUG - -static clib_error_t * -dpdk_validate_buffers_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd_arg) -{ - u32 n_invalid_bufs = 0, uninitialized = 0; - u32 is_poison = 0, is_test = 0; - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "poison")) - is_poison = 1; - else if (unformat (input, "trajectory")) - is_test = 1; - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - - if (VLIB_BUFFER_TRACE_TRAJECTORY == 0) - { - vlib_cli_output (vm, "Trajectory not enabled. Recompile with " - "VLIB_BUFFER_TRACE_TRAJECTORY 1"); - return 0; - } - if (is_poison) - { - dpdk_buffer_poison_trajectory_all (); - } - if (is_test) - { - n_invalid_bufs = dpdk_buffer_validate_trajectory_all (&uninitialized); - if (!n_invalid_bufs) - vlib_cli_output (vm, "All buffers are valid %d uninitialized", - uninitialized); - else - vlib_cli_output (vm, "Found %d invalid buffers and %d uninitialized", - n_invalid_bufs, uninitialized); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (test_dpdk_buffers_command, static) = -{ - .path = "test dpdk buffers", - .short_help = "test dpdk buffers [poison] [trajectory]", - .function = dpdk_validate_buffers_fn, -}; -/* *INDENT-ON* */ - -#endif - clib_error_t * dpdk_cli_init (vlib_main_t * vm) { diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index 57430eff9774..b239616b7d9d 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -22,8 +22,8 @@ #include #include #include +#include #include - #include #include @@ -40,6 +40,7 @@ void dpdk_device_setup (dpdk_device_t * xd) { dpdk_main_t *dm = &dpdk_main; + vlib_main_t *vm = vlib_get_main (); vnet_main_t *vnm = vnet_get_main (); vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index); vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index); @@ -116,26 +117,23 @@ dpdk_device_setup (dpdk_device_t * xd) CLIB_CACHE_LINE_BYTES); for (j = 0; j < xd->rx_q_used; j++) { - dpdk_mempool_private_t *privp; uword tidx = vnet_get_device_input_thread_index (dm->vnet_main, xd->hw_if_index, j); unsigned lcore = vlib_worker_threads[tidx].cpu_id; u16 socket_id = rte_lcore_to_socket_id (lcore); + u8 bpidx = vlib_buffer_pool_get_default_for_numa (vm, socket_id); + vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx); + struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx]; - rv = - rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc, - xd->cpu_socket, 0, - dm->pktmbuf_pools[socket_id]); + rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc, + xd->cpu_socket, 0, mp); /* retry with any other CPU socket */ if (rv < 0) - rv = - rte_eth_rx_queue_setup (xd->port_id, j, - xd->nb_rx_desc, SOCKET_ID_ANY, 0, - dm->pktmbuf_pools[socket_id]); + rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc, + SOCKET_ID_ANY, 0, mp); - privp = rte_mempool_get_priv (dm->pktmbuf_pools[socket_id]); - xd->buffer_pool_for_queue[j] = privp->buffer_pool_index; + xd->buffer_pool_for_queue[j] = bp->index; if (rv < 0) dpdk_device_error (xd, "rte_eth_rx_queue_setup", rv); diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c index 9b54f34a5b2a..466abc37e681 100644 --- a/src/plugins/dpdk/device/device.c +++ b/src/plugins/dpdk/device/device.c @@ -19,8 +19,8 @@ #include #include +#include #include - #include #include @@ -127,11 +127,9 @@ dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, mb->pkt_len = b->current_length; mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; first_mb->nb_segs++; - if (PREDICT_FALSE (b->n_add_refs)) - { - rte_mbuf_refcnt_update (mb, b->n_add_refs); - b->n_add_refs = 0; - } + if (PREDICT_FALSE (b->ref_count > 1)) + mb->pool = + dpdk_no_cache_mempool_by_buffer_pool_index[b->buffer_pool_index]; } } @@ -168,6 +166,7 @@ static_always_inline queue_id = (queue_id + 1) % xd->tx_q_used; } +#if 0 if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ { /* no wrap, transmit in one burst */ @@ -180,7 +179,9 @@ static_always_inline n_sent = rte_ring_sp_enqueue_burst (hqos->swq, (void **) mb, n_left, 0); } - else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) + else +#endif + if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) { /* no wrap, transmit in one burst */ n_sent = rte_eth_tx_burst (xd->port_id, queue_id, mb, n_left); diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index a1e07b42ca7f..b2ca56cbe4d8 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -56,8 +56,6 @@ #include #include -#define NB_MBUF (16<<10) - extern vnet_device_class_t dpdk_device_class; extern vlib_node_registration_t dpdk_input_node; extern vlib_node_registration_t admin_up_down_process_node; @@ -317,7 +315,9 @@ typedef struct dpdk_device_config_hqos_t int dpdk_hqos_validate_mask (u64 mask, u32 n); void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * hqos, u32 pipe_profile_id); +#if 0 void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos); +#endif clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos); void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, @@ -364,7 +364,7 @@ typedef struct u8 nchannels_set_manually; u32 coremask; u32 nchannels; - u32 num_mbufs; + u32 num_crypto_mbufs; /* * format interface names ala xxxEthernet%d/%d/%d instead of @@ -443,9 +443,6 @@ typedef struct vnet_main_t *vnet_main; dpdk_config_main_t *conf; - /* mempool */ - struct rte_mempool **pktmbuf_pools; - /* API message ID base */ u16 msg_id_base; @@ -530,14 +527,6 @@ clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); clib_error_t *unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos); -clib_error_t *dpdk_pool_create (vlib_main_t * vm, u8 * pool_name, - u32 elt_size, u32 num_elts, - u32 pool_priv_size, u16 cache_size, u8 numa, - struct rte_mempool **_mp, u32 * map_index); - -clib_error_t *dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, - unsigned socket_id); - struct rte_pci_device *dpdk_get_pci_device (const struct rte_eth_dev_info *info); diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h index b44cd49142c0..e0e068269352 100644 --- a/src/plugins/dpdk/device/dpdk_priv.h +++ b/src/plugins/dpdk/device/dpdk_priv.h @@ -13,9 +13,6 @@ * limitations under the License. */ -#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) -#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) - #define DPDK_NB_RX_DESC_DEFAULT 1024 #define DPDK_NB_TX_DESC_DEFAULT 1024 #define DPDK_NB_RX_DESC_VIRTIO 256 @@ -52,14 +49,6 @@ _(file-prefix) \ _(vdev) \ _(log-level) -typedef struct -{ - /* must be first */ - struct rte_pktmbuf_pool_private mbp_priv; - u8 buffer_pool_index; -} dpdk_mempool_private_t; - - static inline void dpdk_get_xstats (dpdk_device_t * xd) { diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index df1a823f6da0..ffd63ea0514e 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -150,68 +151,10 @@ dpdk_device_lock_init (dpdk_device_t * xd) } } -static struct rte_mempool_ops * -get_ops_by_name (char *ops_name) -{ - u32 i; - - for (i = 0; i < rte_mempool_ops_table.num_ops; i++) - { - if (!strcmp (ops_name, rte_mempool_ops_table.ops[i].name)) - return &rte_mempool_ops_table.ops[i]; - } - - return 0; -} - -static int -dpdk_ring_alloc (struct rte_mempool *mp) -{ - u32 rg_flags = 0, count; - i32 ret; - char rg_name[RTE_RING_NAMESIZE]; - struct rte_ring *r; - - ret = snprintf (rg_name, sizeof (rg_name), RTE_MEMPOOL_MZ_FORMAT, mp->name); - if (ret < 0 || ret >= (i32) sizeof (rg_name)) - return -ENAMETOOLONG; - - /* ring flags */ - if (mp->flags & MEMPOOL_F_SP_PUT) - rg_flags |= RING_F_SP_ENQ; - if (mp->flags & MEMPOOL_F_SC_GET) - rg_flags |= RING_F_SC_DEQ; - - count = rte_align32pow2 (mp->size + 1); - /* - * Allocate the ring that will be used to store objects. - * Ring functions will return appropriate errors if we are - * running as a secondary process etc., so no checks made - * in this function for that condition. - */ - /* XXX can we get memory from the right socket? */ - r = clib_mem_alloc_aligned (rte_ring_get_memsize (count), - CLIB_CACHE_LINE_BYTES); - - /* XXX rte_ring_lookup will not work */ - - ret = rte_ring_init (r, rg_name, count, rg_flags); - if (ret) - return ret; - - mp->pool_data = r; - - return 0; -} - static int dpdk_port_crc_strip_enabled (dpdk_device_t * xd) { -#if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0) - return ! !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP); -#else return !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC); -#endif } static clib_error_t * @@ -219,7 +162,6 @@ dpdk_lib_init (dpdk_main_t * dm) { u32 nports; u32 mtu, max_rx_frame; - u32 nb_desc = 0; int i; clib_error_t *error; vlib_main_t *vm = vlib_get_main (); @@ -273,11 +215,8 @@ dpdk_lib_init (dpdk_main_t * dm) CLIB_CACHE_LINE_BYTES); for (i = 0; i < tm->n_vlib_mains; i++) { - vlib_buffer_free_list_t *fl; dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data, i); - fl = vlib_buffer_get_free_list (vm, - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (&ptd->buffer_template, fl); + clib_memset (&ptd->buffer_template, 0, sizeof (vlib_buffer_t)); ptd->buffer_template.flags = dm->buffer_flags_template; vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_TX] = (u32) ~ 0; } @@ -478,16 +417,10 @@ dpdk_lib_init (dpdk_main_t * dm) case VNET_DPDK_PMD_IXGBEVF: case VNET_DPDK_PMD_I40EVF: xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF; -#if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0) - xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP; -#endif break; case VNET_DPDK_PMD_THUNDERX: xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF; -#if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0) - xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP; -#endif if (dm->conf->no_tx_checksum_offload == 0) { @@ -517,9 +450,6 @@ dpdk_lib_init (dpdk_main_t * dm) /* Intel Red Rock Canyon */ case VNET_DPDK_PMD_FM10K: xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; -#if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0) - xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP; -#endif break; /* virtio */ @@ -633,9 +563,6 @@ dpdk_lib_init (dpdk_main_t * dm) dq->queue_id = 0; } - /* count the number of descriptors used for this device */ - nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; - error = ethernet_register_interface (dm->vnet_main, dpdk_device_class.index, xd->device_index, /* ethernet address */ addr, @@ -813,10 +740,6 @@ dpdk_lib_init (dpdk_main_t * dm) } /* *INDENT-ON* */ - if (nb_desc > dm->conf->num_mbufs) - dpdk_log_err ("%d mbufs allocated but total rx/tx ring size is %d\n", - dm->conf->num_mbufs, nb_desc); - return 0; } @@ -1026,7 +949,9 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, devconf->pci_addr.as_u32 = pci_addr.as_u32; devconf->hqos_enabled = 0; +#if 0 dpdk_device_config_hqos_default (&devconf->hqos); +#endif if (!input) return 0; @@ -1211,7 +1136,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } else if (unformat (input, "num-mem-channels %d", &conf->nchannels)) conf->nchannels_set_manually = 0; - else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) + else if (unformat (input, "num-crypto-mbufs %d", + &conf->num_crypto_mbufs)) ; else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) ; @@ -1454,36 +1380,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) if (ret < 0) return clib_error_return (0, "rte_eal_init returned %d", ret); - /* set custom ring memory allocator */ - { - struct rte_mempool_ops *ops = NULL; - - ops = get_ops_by_name ("ring_sp_sc"); - ops->alloc = dpdk_ring_alloc; - - ops = get_ops_by_name ("ring_mp_sc"); - ops->alloc = dpdk_ring_alloc; - - ops = get_ops_by_name ("ring_sp_mc"); - ops->alloc = dpdk_ring_alloc; - - ops = get_ops_by_name ("ring_mp_mc"); - ops->alloc = dpdk_ring_alloc; - } - /* main thread 1st */ - error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); - if (error) + if ((error = dpdk_buffer_pools_create (vm))) return error; - for (i = 0; i < RTE_MAX_LCORE; i++) - { - error = dpdk_buffer_pool_create (vm, conf->num_mbufs, - rte_lcore_to_socket_id (i)); - if (error) - return error; - } - done: return error; } @@ -1770,7 +1670,6 @@ dpdk_init (vlib_main_t * vm) dm->conf = &dpdk_config_main; dm->conf->nchannels = 4; - dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); vec_add1 (dm->conf->eal_init_args, (u8 *) "--in-memory"); diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 194c359dbacc..bd23319d5afe 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -40,7 +41,7 @@ STATIC_ASSERT ((PKT_RX_IP_CKSUM_BAD | PKT_RX_FDIR) < static_always_inline uword dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, - struct rte_mbuf *mb, vlib_buffer_free_list_t * fl) + struct rte_mbuf *mb, vlib_buffer_t * bt) { u8 nb_seg = 1; struct rte_mbuf *mb_seg = 0; @@ -59,10 +60,7 @@ dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, ASSERT (mb_seg != 0); b_seg = vlib_buffer_from_rte_mbuf (mb_seg); - vlib_buffer_init_for_free_list (b_seg, fl); - - ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - ASSERT (b_seg->current_data == 0); + vlib_buffer_copy_template (b_seg, bt); /* * The driver (e.g. virtio) may not put the packet data at the start @@ -167,17 +165,16 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, { u32 n_left = n_rx_packets; vlib_buffer_t *b[4]; - vlib_buffer_free_list_t *fl; struct rte_mbuf **mb = ptd->mbufs; uword n_bytes = 0; u8 *flags, or_flags = 0; - - if (maybe_multiseg) - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + vlib_buffer_t bt; mb = ptd->mbufs; flags = ptd->flags; + /* copy template into local variable - will save per packet load */ + vlib_buffer_copy_template (&bt, &ptd->buffer_template); while (n_left >= 8) { dpdk_prefetch_buffer_x4 (mb + 4); @@ -187,7 +184,10 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, b[2] = vlib_buffer_from_rte_mbuf (mb[2]); b[3] = vlib_buffer_from_rte_mbuf (mb[3]); - clib_memcpy64_x4 (b[0], b[1], b[2], b[3], &ptd->buffer_template); + vlib_buffer_copy_template (b[0], &bt); + vlib_buffer_copy_template (b[1], &bt); + vlib_buffer_copy_template (b[2], &bt); + vlib_buffer_copy_template (b[3], &bt); dpdk_prefetch_mbuf_x4 (mb + 4); @@ -208,10 +208,10 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, if (maybe_multiseg) { - n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl); - n_bytes += dpdk_process_subseq_segs (vm, b[1], mb[1], fl); - n_bytes += dpdk_process_subseq_segs (vm, b[2], mb[2], fl); - n_bytes += dpdk_process_subseq_segs (vm, b[3], mb[3], fl); + n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], &bt); + n_bytes += dpdk_process_subseq_segs (vm, b[1], mb[1], &bt); + n_bytes += dpdk_process_subseq_segs (vm, b[2], mb[2], &bt); + n_bytes += dpdk_process_subseq_segs (vm, b[3], mb[3], &bt); } VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); @@ -227,7 +227,7 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, while (n_left) { b[0] = vlib_buffer_from_rte_mbuf (mb[0]); - clib_memcpy_fast (b[0], &ptd->buffer_template, 64); + vlib_buffer_copy_template (b[0], &bt); or_flags |= dpdk_ol_flags_extract (mb, flags, 1); flags += 1; @@ -235,7 +235,7 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, n_bytes += b[0]->current_length = mb[0]->data_len; if (maybe_multiseg) - n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl); + n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], &bt); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); /* next */ @@ -324,6 +324,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, /* as DPDK is allocating empty buffers from mempool provided before interface start for each queue, it is safe to store this in the template */ bt->buffer_pool_index = xd->buffer_pool_for_queue[queue_id]; + bt->ref_count = 1; vnet_buffer (bt)->feature_arc_index = 0; bt->current_config_index = 0; @@ -440,6 +441,12 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, if (PREDICT_FALSE (dm->pcap[VLIB_RX].pcap_enable)) { u32 bi0; + + if (single_next) + vlib_get_buffer_indices_with_offset (vm, (void **) ptd->mbufs, + ptd->buffers, n_rx_packets, + sizeof (struct rte_mbuf)); + n_left = n_rx_packets; buffers = ptd->buffers; while (n_left) diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c index 4efa88e4bc04..b773cd2529ab 100644 --- a/src/plugins/dpdk/ipsec/cli.c +++ b/src/plugins/dpdk/ipsec/cli.c @@ -141,9 +141,9 @@ format_crypto_worker (u8 * s, va_list * args) { ind = " "; res = vec_elt_at_index (dcm->resource, res_idx[0]); - s = format (s, "%s%-20s dev-id %2u inbound-queue %2u outbound-queue %2u\n", + s = format (s, "%s%-20s dev-id %2u queue-pair %2u\n", ind, vec_elt_at_index (dcm->dev, res->dev_id)->name, - res->dev_id, res->qp_id, res->qp_id + 1); + res->dev_id, res->qp_id); ind = " "; if (verbose) @@ -226,12 +226,12 @@ show_dpdk_crypto_placement_v_fn (vlib_main_t * vm, unformat_input_t * input, * @cliexstart{show dpdk crypto placement} * vpp# show dpdk crypto placement * Thread 1 (vpp_wk_0): - * cryptodev_aesni_mb_p dev-id 0 inbound-queue 0 outbound-queue 1 - * cryptodev_aesni_gcm_ dev-id 1 inbound-queue 0 outbound-queue 1 + * cryptodev_aesni_mb_p dev-id 0 queue-pair 0 + * cryptodev_aesni_gcm_ dev-id 1 queue-pair 0 * * Thread 2 (vpp_wk_1): - * cryptodev_aesni_mb_p dev-id 0 inbound-queue 2 outbound-queue 3 - * cryptodev_aesni_gcm_ dev-id 1 inbound-queue 2 outbound-queue 3 + * cryptodev_aesni_mb_p dev-id 0 queue-pair 1 + * cryptodev_aesni_gcm_ dev-id 1 queue-pair 1 * @cliexend ?*/ /* *INDENT-OFF* */ @@ -251,18 +251,18 @@ VLIB_CLI_COMMAND (show_dpdk_crypto_placement, static) = { * @cliexstart{show dpdk crypto placement verbose} * vpp# show dpdk crypto placement verbose * Thread 1 (vpp_wk_0): - * cryptodev_aesni_mb_p dev-id 0 inbound-queue 0 outbound-queue 1 + * cryptodev_aesni_mb_p dev-id 0 queue-pair 0 * Cipher: aes-cbc-128, aes-cbc-192, aes-cbc-256, aes-ctr-128, aes-ctr-192, aes-ctr-256 * Auth: md5-96, sha1-96, sha-256-128, sha-384-192, sha-512-256 - * cryptodev_aesni_gcm_ dev-id 1 inbound-queue 0 outbound-queue 1 + * cryptodev_aesni_gcm_ dev-id 1 queue-pair 0 * Cipher: aes-gcm-128, aes-gcm-192, aes-gcm-256 * Auth: * * Thread 2 (vpp_wk_1): - * cryptodev_aesni_mb_p dev-id 0 inbound-queue 2 outbound-queue 3 + * cryptodev_aesni_mb_p dev-id 0 queue-pair 1 * Cipher: aes-cbc-128, aes-cbc-192, aes-cbc-256, aes-ctr-128, aes-ctr-192, aes-ctr-256 * Auth: md5-96, sha1-96, sha-256-128, sha-384-192, sha-512-256 - * cryptodev_aesni_gcm_ dev-id 1 inbound-queue 2 outbound-queue 3 + * cryptodev_aesni_gcm_ dev-id 1 queue-pair 1 * Cipher: aes-gcm-128, aes-gcm-192, aes-gcm-256 * Auth: * diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c index 966e86da2886..6b57069ed220 100644 --- a/src/plugins/dpdk/ipsec/crypto_node.c +++ b/src/plugins/dpdk/ipsec/crypto_node.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -49,34 +50,11 @@ extern vlib_node_registration_t dpdk_crypto_input_node; typedef struct { - u32 status; + /* dev id of this cryptodev */ + u16 dev_id; + u16 next_index; } dpdk_crypto_input_trace_t; -#define foreach_cryptodev_status \ - _(SUCCESS, "success") \ - _(NOT_PROCESSED, "not processed") \ - _(AUTH_FAILED, "auth failed") \ - _(INVALID_SESSION, "invalid session") \ - _(INVALID_ARGS, "invalid arguments") \ - _(ERROR, "error") - -static u8 * -format_cryptodev_status (u8 * s, va_list * args) -{ - u32 status = va_arg (*args, u32); - char *str = 0; - - switch (status) - { -#define _(x, z) case RTE_CRYPTO_OP_STATUS_##x: str = z; break; - foreach_cryptodev_status -#undef _ - } - s = format (s, "%s", str); - - return s; -} - static u8 * format_dpdk_crypto_input_trace (u8 * s, va_list * args) { @@ -84,7 +62,7 @@ format_dpdk_crypto_input_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *); - s = format (s, "status: %U", format_cryptodev_status, t->status); + s = format (s, "cryptodev-id %d next-index %d", t->dev_id, t->next_index); return s; } @@ -109,9 +87,10 @@ dpdk_crypto_input_check_op (vlib_main_t * vm, vlib_node_runtime_t * node, always_inline void dpdk_crypto_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node, - struct rte_crypto_op **ops, u32 n_deq) + u8 dev_id, u32 * bis, u16 * nexts, u32 n_deq) { u32 n_left, n_trace; + if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)))) { n_left = n_deq; @@ -119,24 +98,25 @@ dpdk_crypto_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_trace && n_left) { vlib_buffer_t *b0; - struct rte_crypto_op *op0; u16 next; + u32 bi; - op0 = ops[0]; + bi = bis[0]; + next = nexts[0]; - next = crypto_op_get_priv (op0)->next; - - b0 = vlib_buffer_from_rte_mbuf (op0->sym[0].m_src); + b0 = vlib_get_buffer (vm, bi); vlib_trace_buffer (vm, node, next, b0, /* follow_chain */ 0); dpdk_crypto_input_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->status = op0->status; + tr->dev_id = dev_id; + tr->next_index = next; n_trace--; n_left--; - ops++; + nexts++; + bis++; } vlib_set_trace_count (vm, node, n_trace); } @@ -144,7 +124,7 @@ dpdk_crypto_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node, static_always_inline u32 dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, - crypto_resource_t * res, u8 outbound) + crypto_resource_t * res) { u32 thread_idx = vlib_get_thread_index (); u8 numa = rte_socket_id (); @@ -163,12 +143,14 @@ dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, ops = cwm->ops; n_ops = n_deq = rte_cryptodev_dequeue_burst (res->dev_id, - res->qp_id + outbound, + res->qp_id, ops, VLIB_FRAME_SIZE); - res->inflights[outbound] -= n_ops; + /* no op dequeued, do not proceed */ + if (n_deq == 0) + return 0; - dpdk_crypto_input_trace (vm, node, ops, n_deq); + res->inflights -= n_ops; while (n_ops >= 4) { @@ -183,14 +165,14 @@ dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, CLIB_PREFETCH (ops[6], CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (ops[7], CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (crypto_op_get_priv (ops[4]), CLIB_CACHE_LINE_BYTES, - LOAD); - CLIB_PREFETCH (crypto_op_get_priv (ops[5]), CLIB_CACHE_LINE_BYTES, - LOAD); - CLIB_PREFETCH (crypto_op_get_priv (ops[6]), CLIB_CACHE_LINE_BYTES, - LOAD); - CLIB_PREFETCH (crypto_op_get_priv (ops[7]), CLIB_CACHE_LINE_BYTES, - LOAD); + CLIB_PREFETCH (crypto_op_get_priv (ops[4]), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (crypto_op_get_priv (ops[5]), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (crypto_op_get_priv (ops[6]), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (crypto_op_get_priv (ops[7]), + CLIB_CACHE_LINE_BYTES, LOAD); } op0 = ops[0]; @@ -258,6 +240,8 @@ dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_enqueue_to_next (vm, node, bis, nexts, n_deq); + dpdk_crypto_input_trace (vm, node, res->dev_id, bis, nexts, n_deq); + crypto_free_ops (numa, cwm->ops, n_deq); return n_deq; @@ -280,13 +264,10 @@ dpdk_crypto_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { res = vec_elt_at_index (dcm->resource, res_idx[0]); - if (res->inflights[0]) - n_deq += dpdk_crypto_dequeue (vm, node, res, 0); - - if (res->inflights[1]) - n_deq += dpdk_crypto_dequeue (vm, node, res, 1); + if (res->inflights) + n_deq += dpdk_crypto_dequeue (vm, node, res); - if (PREDICT_FALSE (res->remove && !(res->inflights[0] || res->inflights[1]))) + if (PREDICT_FALSE (res->remove && !(res->inflights))) vec_add1 (remove, res_idx[0]); } /* *INDENT-ON* */ diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c index dff22439ce42..265877f3a186 100644 --- a/src/plugins/dpdk/ipsec/esp_decrypt.c +++ b/src/plugins/dpdk/ipsec/esp_decrypt.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -375,7 +376,7 @@ dpdk_esp_decrypt_inline (vlib_main_t * vm, ESP_DECRYPT_ERROR_RX_PKTS, from_frame->n_vectors); - crypto_enqueue_ops (vm, cwm, 0, dpdk_esp6_decrypt_node.index, + crypto_enqueue_ops (vm, cwm, dpdk_esp6_decrypt_node.index, ESP_DECRYPT_ERROR_ENQ_FAIL, numa); } else @@ -384,7 +385,7 @@ dpdk_esp_decrypt_inline (vlib_main_t * vm, ESP_DECRYPT_ERROR_RX_PKTS, from_frame->n_vectors); - crypto_enqueue_ops (vm, cwm, 0, dpdk_esp4_decrypt_node.index, + crypto_enqueue_ops (vm, cwm, dpdk_esp4_decrypt_node.index, ESP_DECRYPT_ERROR_ENQ_FAIL, numa); } diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c index cdeeb4022f6c..279cfea82499 100644 --- a/src/plugins/dpdk/ipsec/esp_encrypt.c +++ b/src/plugins/dpdk/ipsec/esp_encrypt.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -547,7 +548,7 @@ dpdk_esp_encrypt_inline (vlib_main_t * vm, ESP_ENCRYPT_ERROR_RX_PKTS, from_frame->n_vectors); - crypto_enqueue_ops (vm, cwm, 1, dpdk_esp6_encrypt_node.index, + crypto_enqueue_ops (vm, cwm, dpdk_esp6_encrypt_node.index, ESP_ENCRYPT_ERROR_ENQ_FAIL, numa); } else @@ -556,7 +557,7 @@ dpdk_esp_encrypt_inline (vlib_main_t * vm, ESP_ENCRYPT_ERROR_RX_PKTS, from_frame->n_vectors); - crypto_enqueue_ops (vm, cwm, 1, dpdk_esp4_encrypt_node.index, + crypto_enqueue_ops (vm, cwm, dpdk_esp4_encrypt_node.index, ESP_ENCRYPT_ERROR_ENQ_FAIL, numa); } diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c index 4910696fd81b..f79b4301e9c0 100644 --- a/src/plugins/dpdk/ipsec/ipsec.c +++ b/src/plugins/dpdk/ipsec/ipsec.c @@ -19,11 +19,13 @@ #include #include +#include #include dpdk_crypto_main_t dpdk_crypto_main; #define EMPTY_STRUCT {0} +#define NUM_CRYPTO_MBUFS 16384 static void algos_init (u32 n_mains) @@ -250,7 +252,7 @@ crypto_set_aead_xform (struct rte_crypto_sym_xform *xform, xform->type = RTE_CRYPTO_SYM_XFORM_AEAD; xform->aead.algo = c->alg; - xform->aead.key.data = sa->crypto_key; + xform->aead.key.data = sa->crypto_key.data; xform->aead.key.length = c->key_len; xform->aead.iv.offset = crypto_op_get_priv_offset () + offsetof (dpdk_op_priv_t, cb); @@ -278,7 +280,7 @@ crypto_set_cipher_xform (struct rte_crypto_sym_xform *xform, xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; xform->cipher.algo = c->alg; - xform->cipher.key.data = sa->crypto_key; + xform->cipher.key.data = sa->crypto_key.data; xform->cipher.key.length = c->key_len; xform->cipher.iv.offset = crypto_op_get_priv_offset () + offsetof (dpdk_op_priv_t, cb); @@ -304,7 +306,7 @@ crypto_set_auth_xform (struct rte_crypto_sym_xform *xform, xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; xform->auth.algo = a->alg; - xform->auth.key.data = sa->integ_key; + xform->auth.key.data = sa->integ_key.data; xform->auth.key.length = a->key_len; xform->auth.digest_length = a->trunc_size; xform->next = NULL; @@ -329,7 +331,7 @@ create_sym_session (struct rte_cryptodev_sym_session **session, struct rte_crypto_sym_xform auth_xform = { 0 }; struct rte_crypto_sym_xform *xfs; struct rte_cryptodev_sym_session **s; - clib_error_t *erorr = 0; + clib_error_t *error = 0; sa = pool_elt_at_index (im->sad, sa_idx); @@ -374,7 +376,7 @@ create_sym_session (struct rte_cryptodev_sym_session **session, if (!session[0]) { data->session_h_failed += 1; - erorr = clib_error_return (0, "failed to create session header"); + error = clib_error_return (0, "failed to create session header"); goto done; } hash_set (data->session_by_sa_index, sa_idx, session[0]); @@ -391,7 +393,7 @@ create_sym_session (struct rte_cryptodev_sym_session **session, if (ret) { data->session_drv_failed[res->drv_id] += 1; - erorr = clib_error_return (0, "failed to init session for drv %u", + error = clib_error_return (0, "failed to init session for drv %u", res->drv_id); goto done; } @@ -400,7 +402,7 @@ create_sym_session (struct rte_cryptodev_sym_session **session, done: clib_spinlock_unlock_if_init (&data->lockp); - return erorr; + return error; } static void __attribute__ ((unused)) clear_and_free_obj (void *obj) @@ -417,7 +419,14 @@ static inline void * get_session_private_data (const struct rte_cryptodev_sym_session *sess, uint8_t driver_id) { +#if RTE_VERSION < RTE_VERSION_NUM(19, 2, 0, 0) return sess->sess_private_data[driver_id]; +#else + if (unlikely (sess->nb_drivers <= driver_id)) + return 0; + + return sess->sess_data[driver_id].data; +#endif } /* This is from rte_cryptodev_pmd.h */ @@ -425,7 +434,13 @@ static inline void set_session_private_data (struct rte_cryptodev_sym_session *sess, uint8_t driver_id, void *private_data) { +#if RTE_VERSION < RTE_VERSION_NUM(19, 2, 0, 0) sess->sess_private_data[driver_id] = private_data; +#else + if (unlikely (sess->nb_drivers <= driver_id)) + return; + sess->sess_data[driver_id].data = private_data; +#endif } static clib_error_t * @@ -496,7 +511,8 @@ add_del_sa_session (u32 sa_index, u8 is_add) case IPSEC_CRYPTO_ALG_AES_GCM_128: case IPSEC_CRYPTO_ALG_AES_GCM_192: case IPSEC_CRYPTO_ALG_AES_GCM_256: - clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len - 4], 4); + clib_memcpy (&sa->salt, + &sa->crypto_key.data[sa->crypto_key.len - 4], 4); break; default: seed = (u32) clib_cpu_time_now (); @@ -629,8 +645,8 @@ crypto_parse_capabilities (crypto_dev_t * dev, static clib_error_t * crypto_dev_conf (u8 dev, u16 n_qp, u8 numa) { - struct rte_cryptodev_config dev_conf; - struct rte_cryptodev_qp_conf qp_conf; + struct rte_cryptodev_config dev_conf = { 0 }; + struct rte_cryptodev_qp_conf qp_conf = { 0 }; i32 ret; u16 qp; char *error_str; @@ -647,7 +663,11 @@ crypto_dev_conf (u8 dev, u16 n_qp, u8 numa) qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; for (qp = 0; qp < n_qp; qp++) { +#if RTE_VERSION < RTE_VERSION_NUM(19, 2, 0, 0) ret = rte_cryptodev_queue_pair_setup (dev, qp, &qp_conf, numa, NULL); +#else + ret = rte_cryptodev_queue_pair_setup (dev, qp, &qp_conf, numa); +#endif if (ret < 0) return clib_error_return (0, error_str, dev, qp); } @@ -664,7 +684,7 @@ crypto_scan_devs (u32 n_mains) { dpdk_crypto_main_t *dcm = &dpdk_crypto_main; struct rte_cryptodev *cryptodev; - struct rte_cryptodev_info info; + struct rte_cryptodev_info info = { 0 }; crypto_dev_t *dev; crypto_resource_t *res; clib_error_t *error; @@ -704,7 +724,7 @@ crypto_scan_devs (u32 n_mains) continue; } - max_res_idx = (dev->max_qp / 2) - 1; + max_res_idx = dev->max_qp - 1; vec_validate (dev->free_resources, max_res_idx); @@ -713,13 +733,13 @@ crypto_scan_devs (u32 n_mains) (crypto_resource_t) EMPTY_STRUCT, CLIB_CACHE_LINE_BYTES); - for (j = 0; j <= max_res_idx; j++, res_idx++) + for (j = 0; j <= max_res_idx; j++) { - vec_elt (dev->free_resources, max_res_idx - j) = res_idx; - res = &dcm->resource[res_idx]; + vec_elt (dev->free_resources, max_res_idx - j) = res_idx + j; + res = &dcm->resource[res_idx + j]; res->dev_id = i; res->drv_id = drv_id; - res->qp_id = j * 2; + res->qp_id = j; res->numa = dev->numa; res->thread_idx = (u16) ~ 0; } @@ -835,11 +855,12 @@ crypto_create_crypto_op_pool (vlib_main_t * vm, u8 numa) pool_name = format (0, "crypto_pool_numa%u%c", numa, 0); - mp = - rte_mempool_create ((char *) pool_name, - conf->num_mbufs, - crypto_op_len (), 512, pool_priv_size, NULL, NULL, - crypto_op_init, NULL, numa, 0); + if (conf->num_crypto_mbufs == 0) + conf->num_crypto_mbufs = NUM_CRYPTO_MBUFS; + + mp = rte_mempool_create ((char *) pool_name, conf->num_crypto_mbufs, + crypto_op_len (), 512, pool_priv_size, NULL, NULL, + crypto_op_init, NULL, numa, 0); vec_free (pool_name); @@ -875,10 +896,15 @@ crypto_create_session_h_pool (vlib_main_t * vm, u8 numa) elt_size = rte_cryptodev_sym_get_header_session_size (); - mp = - rte_mempool_create ((char *) pool_name, DPDK_CRYPTO_NB_SESS_OBJS, - elt_size, 512, 0, NULL, NULL, NULL, NULL, numa, 0); - +#if RTE_VERSION < RTE_VERSION_NUM(19, 2, 0, 0) + mp = rte_mempool_create ((char *) pool_name, DPDK_CRYPTO_NB_SESS_OBJS, + elt_size, 512, 0, NULL, NULL, NULL, NULL, numa, 0); +#else + /* XXX Experimental tag in DPDK 19.02 */ + mp = rte_cryptodev_sym_session_pool_create ((char *) pool_name, + DPDK_CRYPTO_NB_SESS_OBJS, + elt_size, 512, 0, numa); +#endif vec_free (pool_name); if (!mp) @@ -1041,12 +1067,15 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, } - ipsec_register_esp_backend (vm, im, "dpdk backend", - "dpdk-esp4-encrypt", - "dpdk-esp4-decrypt", - "dpdk-esp6-encrypt", - "dpdk-esp6-decrypt", - dpdk_ipsec_check_support, add_del_sa_session); + u32 idx = ipsec_register_esp_backend (vm, im, "dpdk backend", + "dpdk-esp4-encrypt", + "dpdk-esp4-decrypt", + "dpdk-esp6-encrypt", + "dpdk-esp6-decrypt", + dpdk_ipsec_check_support, + add_del_sa_session); + int rv = ipsec_select_esp_backend (im, idx); + ASSERT (rv == 0); vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "dpdk-crypto-input"); ASSERT (node); diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h index 054fe9b0223f..ac2b9b875a17 100644 --- a/src/plugins/dpdk/ipsec/ipsec.h +++ b/src/plugins/dpdk/ipsec/ipsec.h @@ -114,7 +114,7 @@ typedef struct u8 dev_id; u8 numa; u16 qp_id; - u16 inflights[2]; + u16 inflights; u16 n_ops; u16 __unused; struct rte_crypto_op *ops[VLIB_FRAME_SIZE]; @@ -301,7 +301,7 @@ crypto_free_ops (u8 numa, struct rte_crypto_op **ops, u32 n) } static_always_inline void -crypto_enqueue_ops (vlib_main_t * vm, crypto_worker_main_t * cwm, u8 outbound, +crypto_enqueue_ops (vlib_main_t * vm, crypto_worker_main_t * cwm, u32 node_index, u32 error, u8 numa) { dpdk_crypto_main_t *dcm = &dpdk_crypto_main; @@ -317,9 +317,9 @@ crypto_enqueue_ops (vlib_main_t * vm, crypto_worker_main_t * cwm, u8 outbound, if (!res->n_ops) continue; - enq = rte_cryptodev_enqueue_burst (res->dev_id, res->qp_id + outbound, + enq = rte_cryptodev_enqueue_burst (res->dev_id, res->qp_id, res->ops, res->n_ops); - res->inflights[outbound] += enq; + res->inflights += enq; if (PREDICT_FALSE (enq < res->n_ops)) { diff --git a/src/plugins/flowprobe/node.c b/src/plugins/flowprobe/node.c index 2cd754b67bb1..8dfc47cace96 100644 --- a/src/plugins/flowprobe/node.c +++ b/src/plugins/flowprobe/node.c @@ -631,7 +631,6 @@ flowprobe_get_buffer (vlib_main_t * vm, flowprobe_variant_t which) flow_report_main_t *frm = &flow_report_main; vlib_buffer_t *b0; u32 bi0; - vlib_buffer_free_list_t *fl; u32 my_cpu_number = vm->thread_index; /* Find or allocate a buffer */ @@ -650,9 +649,6 @@ flowprobe_get_buffer (vlib_main_t * vm, flowprobe_variant_t which) /* Initialize the buffer */ b0 = fm->context[which].buffers_per_worker[my_cpu_number] = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); b0->current_data = 0; diff --git a/src/plugins/gbp/CMakeLists.txt b/src/plugins/gbp/CMakeLists.txt index 4b511413b826..ef254024d438 100644 --- a/src/plugins/gbp/CMakeLists.txt +++ b/src/plugins/gbp/CMakeLists.txt @@ -29,6 +29,7 @@ add_vpp_plugin(gbp gbp_recirc.c gbp_route_domain.c gbp_scanner.c + gbp_sclass.c gbp_subnet.c gbp_vxlan.c diff --git a/src/plugins/gbp/gbp.api b/src/plugins/gbp/gbp.api index e96cb508387e..a7a9a7e8dc86 100644 --- a/src/plugins/gbp/gbp.api +++ b/src/plugins/gbp/gbp.api @@ -31,6 +31,7 @@ typedef gbp_bridge_domain vl_api_gbp_bridge_domain_flags_t flags; u32 bvi_sw_if_index; u32 uu_fwd_sw_if_index; + u32 bm_flood_sw_if_index; }; autoreply define gbp_bridge_domain_add @@ -159,6 +160,7 @@ define gbp_endpoint_details typeonly define gbp_endpoint_group { u16 epg_id; + u16 sclass; u32 bd_id; u32 rd_id; u32 uplink_sw_if_index; diff --git a/src/plugins/gbp/gbp_api.c b/src/plugins/gbp/gbp_api.c index 8d80365c55dc..665d97ea0b13 100644 --- a/src/plugins/gbp/gbp_api.c +++ b/src/plugins/gbp/gbp_api.c @@ -298,6 +298,7 @@ static void int rv = 0; rv = gbp_endpoint_group_add_and_lock (ntohs (mp->epg.epg_id), + ntohs (mp->epg.sclass), ntohl (mp->epg.bd_id), ntohl (mp->epg.rd_id), ntohl (mp->epg.uplink_sw_if_index)); @@ -341,7 +342,8 @@ vl_api_gbp_bridge_domain_add_t_handler (vl_api_gbp_bridge_domain_add_t * mp) gbp_bridge_domain_flags_from_api (mp->bd.flags), ntohl (mp->bd.bvi_sw_if_index), - ntohl (mp->bd.uu_fwd_sw_if_index)); + ntohl (mp->bd.uu_fwd_sw_if_index), + ntohl (mp->bd.bm_flood_sw_if_index)); REPLY_MACRO (VL_API_GBP_BRIDGE_DOMAIN_ADD_REPLY + GBP_MSG_BASE); } @@ -523,6 +525,7 @@ gbp_endpoint_group_send_details (gbp_endpoint_group_t * gg, void *args) mp->epg.uplink_sw_if_index = ntohl (gg->gg_uplink_sw_if_index); mp->epg.epg_id = ntohs (gg->gg_id); + mp->epg.sclass = ntohs (gg->gg_sclass); mp->epg.bd_id = ntohl (gbp_endpoint_group_get_bd_id (gg)); mp->epg.rd_id = ntohl (gbp_route_domain_get_rd_id (gg->gg_rd)); @@ -567,6 +570,7 @@ gbp_bridge_domain_send_details (gbp_bridge_domain_t * gb, void *args) mp->bd.bd_id = ntohl (gb->gb_bd_id); mp->bd.bvi_sw_if_index = ntohl (gb->gb_bvi_sw_if_index); mp->bd.uu_fwd_sw_if_index = ntohl (gb->gb_uu_fwd_sw_if_index); + mp->bd.bm_flood_sw_if_index = ntohl (gb->gb_bm_flood_sw_if_index); vl_api_send_msg (ctx->reg, (u8 *) mp); diff --git a/src/plugins/gbp/gbp_bridge_domain.c b/src/plugins/gbp/gbp_bridge_domain.c index 21ffe9cc314d..049c89bebabb 100644 --- a/src/plugins/gbp/gbp_bridge_domain.c +++ b/src/plugins/gbp/gbp_bridge_domain.c @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -147,7 +148,9 @@ format_gbp_bridge_domain (u8 * s, va_list * args) int gbp_bridge_domain_add_and_lock (u32 bd_id, gbp_bridge_domain_flags_t flags, - u32 bvi_sw_if_index, u32 uu_fwd_sw_if_index) + u32 bvi_sw_if_index, + u32 uu_fwd_sw_if_index, + u32 bm_flood_sw_if_index) { gbp_bridge_domain_t *gb; index_t gbi; @@ -175,6 +178,7 @@ gbp_bridge_domain_add_and_lock (u32 bd_id, gb->gb_bd_index = bd_index; gb->gb_uu_fwd_sw_if_index = uu_fwd_sw_if_index; gb->gb_bvi_sw_if_index = bvi_sw_if_index; + gb->gb_bm_flood_sw_if_index = bm_flood_sw_if_index; gb->gb_locks = 1; gb->gb_flags = flags; @@ -185,9 +189,19 @@ gbp_bridge_domain_add_and_lock (u32 bd_id, MODE_L2_BRIDGE, gb->gb_bvi_sw_if_index, bd_index, L2_BD_PORT_TYPE_BVI, 0, 0); if (~0 != gb->gb_uu_fwd_sw_if_index) - set_int_l2_mode (vlib_get_main (), vnet_get_main (), - MODE_L2_BRIDGE, gb->gb_uu_fwd_sw_if_index, - bd_index, L2_BD_PORT_TYPE_UU_FWD, 0, 0); + { + set_int_l2_mode (vlib_get_main (), vnet_get_main (), + MODE_L2_BRIDGE, gb->gb_uu_fwd_sw_if_index, + bd_index, L2_BD_PORT_TYPE_UU_FWD, 0, 0); + gbp_sclass_enable_l2 (gb->gb_uu_fwd_sw_if_index); + } + if (~0 != gb->gb_bm_flood_sw_if_index) + { + set_int_l2_mode (vlib_get_main (), vnet_get_main (), + MODE_L2_BRIDGE, gb->gb_bm_flood_sw_if_index, + bd_index, L2_BD_PORT_TYPE_NORMAL, 0, 0); + gbp_sclass_enable_l2 (gb->gb_bm_flood_sw_if_index); + } /* * Add the BVI's MAC to the L2FIB @@ -232,9 +246,19 @@ gbp_bridge_domain_unlock (index_t index) MODE_L3, gb->gb_bvi_sw_if_index, gb->gb_bd_index, L2_BD_PORT_TYPE_BVI, 0, 0); if (~0 != gb->gb_uu_fwd_sw_if_index) - set_int_l2_mode (vlib_get_main (), vnet_get_main (), - MODE_L3, gb->gb_uu_fwd_sw_if_index, - gb->gb_bd_index, L2_BD_PORT_TYPE_UU_FWD, 0, 0); + { + set_int_l2_mode (vlib_get_main (), vnet_get_main (), + MODE_L3, gb->gb_uu_fwd_sw_if_index, + gb->gb_bd_index, L2_BD_PORT_TYPE_UU_FWD, 0, 0); + gbp_sclass_disable_l2 (gb->gb_uu_fwd_sw_if_index); + } + if (~0 != gb->gb_bm_flood_sw_if_index) + { + set_int_l2_mode (vlib_get_main (), vnet_get_main (), + MODE_L3, gb->gb_bm_flood_sw_if_index, + gb->gb_bd_index, L2_BD_PORT_TYPE_NORMAL, 0, 0); + gbp_sclass_disable_l2 (gb->gb_bm_flood_sw_if_index); + } gbp_bridge_domain_db_remove (gb); @@ -280,6 +304,7 @@ gbp_bridge_domain_cli (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { vnet_main_t *vnm = vnet_get_main (); + u32 bm_flood_sw_if_index = ~0; u32 uu_fwd_sw_if_index = ~0; u32 bvi_sw_if_index = ~0; u32 bd_id = ~0; @@ -290,9 +315,12 @@ gbp_bridge_domain_cli (vlib_main_t * vm, if (unformat (input, "bvi %U", unformat_vnet_sw_interface, vnm, &bvi_sw_if_index)) ; - else if (unformat (input, "uu-flood %U", unformat_vnet_sw_interface, + else if (unformat (input, "uu-fwd %U", unformat_vnet_sw_interface, vnm, &uu_fwd_sw_if_index)) ; + else if (unformat (input, "bm-flood %U", unformat_vnet_sw_interface, + vnm, &bm_flood_sw_if_index)) + ; else if (unformat (input, "add")) add = 1; else if (unformat (input, "del")) @@ -312,7 +340,9 @@ gbp_bridge_domain_cli (vlib_main_t * vm, return clib_error_return (0, "interface must be specified"); gbp_bridge_domain_add_and_lock (bd_id, GBP_BD_FLAG_NONE, - bvi_sw_if_index, uu_fwd_sw_if_index); + bvi_sw_if_index, + uu_fwd_sw_if_index, + bm_flood_sw_if_index); } else gbp_bridge_domain_delete (bd_id); diff --git a/src/plugins/gbp/gbp_bridge_domain.h b/src/plugins/gbp/gbp_bridge_domain.h index 65f133c84da7..95b53dc2088b 100644 --- a/src/plugins/gbp/gbp_bridge_domain.h +++ b/src/plugins/gbp/gbp_bridge_domain.h @@ -57,6 +57,11 @@ typedef struct gbp_bridge_domain_t_ */ u32 gb_uu_fwd_sw_if_index; + /** + * The BD's interface to sned Broadcast and multicast packets + */ + u32 gb_bm_flood_sw_if_index; + /** * The BD's VNI interface on which packets from unkown endpoints * arrive @@ -73,7 +78,9 @@ typedef struct gbp_bridge_domain_t_ extern int gbp_bridge_domain_add_and_lock (u32 bd_id, gbp_bridge_domain_flags_t flags, u32 bvi_sw_if_index, - u32 uu_fwd_sw_if_index); + u32 uu_fwd_sw_if_index, + u32 bm_flood_sw_if_index); + extern void gbp_bridge_domain_unlock (index_t gbi); extern index_t gbp_bridge_domain_find_and_lock (u32 bd_id); extern int gbp_bridge_domain_delete (u32 bd_id); diff --git a/src/plugins/gbp/gbp_endpoint.c b/src/plugins/gbp/gbp_endpoint.c index 0746b70d57fb..f32a8820238b 100644 --- a/src/plugins/gbp/gbp_endpoint.c +++ b/src/plugins/gbp/gbp_endpoint.c @@ -111,7 +111,7 @@ static void gbp_endpoint_extract_key_mac_itf (const clib_bihash_kv_16_8_t * key, mac_address_t * mac, u32 * sw_if_index) { - mac_address_from_u64 (key->key[0], mac); + mac_address_from_u64 (mac, key->key[0]); *sw_if_index = key->key[1]; } diff --git a/src/plugins/gbp/gbp_endpoint_group.c b/src/plugins/gbp/gbp_endpoint_group.c index 834f865bc927..cefdbea3652b 100644 --- a/src/plugins/gbp/gbp_endpoint_group.c +++ b/src/plugins/gbp/gbp_endpoint_group.c @@ -33,6 +33,12 @@ gbp_endpoint_group_t *gbp_endpoint_group_pool; * DB of endpoint_groups */ gbp_endpoint_group_db_t gbp_endpoint_group_db; + +/** + * Map sclass to EPG + */ +uword *gbp_epg_sclass_db; + vlib_log_class_t gg_logger; #define GBP_EPG_DBG(...) \ @@ -68,6 +74,7 @@ gbp_endpoint_group_find (epg_id_t epg_id) int gbp_endpoint_group_add_and_lock (epg_id_t epg_id, + u16 sclass, u32 bd_id, u32 rd_id, u32 uplink_sw_if_index) { gbp_endpoint_group_t *gg; @@ -105,6 +112,10 @@ gbp_endpoint_group_add_and_lock (epg_id_t epg_id, gg->gg_uplink_sw_if_index = uplink_sw_if_index; gg->gg_locks = 1; + gg->gg_sclass = sclass; + + if (SCLASS_INVALID != gg->gg_sclass) + hash_set (gbp_epg_sclass_db, gg->gg_sclass, gg->gg_id); /* * an egress DVR dpo for internal subnets to use when sending @@ -179,6 +190,8 @@ gbp_endpoint_group_unlock (index_t ggi) gbp_bridge_domain_unlock (gg->gg_gbd); gbp_route_domain_unlock (gg->gg_rd); + if (SCLASS_INVALID != gg->gg_sclass) + hash_unset (gbp_epg_sclass_db, gg->gg_sclass); hash_unset (gbp_endpoint_group_db.gg_hash, gg->gg_id); pool_put (gbp_endpoint_group_pool, gg); @@ -243,8 +256,8 @@ static clib_error_t * gbp_endpoint_group_cli (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { + epg_id_t epg_id = EPG_INVALID, sclass; vnet_main_t *vnm = vnet_get_main (); - epg_id_t epg_id = EPG_INVALID; u32 uplink_sw_if_index = ~0; u32 bd_id = ~0; u32 rd_id = ~0; @@ -261,6 +274,8 @@ gbp_endpoint_group_cli (vlib_main_t * vm, add = 0; else if (unformat (input, "epg %d", &epg_id)) ; + else if (unformat (input, "sclass %d", &sclass)) + ; else if (unformat (input, "bd %d", &bd_id)) ; else if (unformat (input, "rd %d", &rd_id)) @@ -281,7 +296,7 @@ gbp_endpoint_group_cli (vlib_main_t * vm, if (~0 == rd_id) return clib_error_return (0, "route-domain must be specified"); - gbp_endpoint_group_add_and_lock (epg_id, bd_id, rd_id, + gbp_endpoint_group_add_and_lock (epg_id, sclass, bd_id, rd_id, uplink_sw_if_index); } else diff --git a/src/plugins/gbp/gbp_endpoint_group.h b/src/plugins/gbp/gbp_endpoint_group.h index 763a80e4d878..123954f63ea4 100644 --- a/src/plugins/gbp/gbp_endpoint_group.h +++ b/src/plugins/gbp/gbp_endpoint_group.h @@ -30,6 +30,11 @@ typedef struct gpb_endpoint_group_t_ */ epg_id_t gg_id; + /** + * Sclass. Could be unset => ~0 + */ + u16 gg_sclass; + /** * Bridge-domain ID the EPG is in */ @@ -71,6 +76,7 @@ typedef struct gbp_endpoint_group_db_t_ } gbp_endpoint_group_db_t; extern int gbp_endpoint_group_add_and_lock (epg_id_t epg_id, + u16 sclass, u32 bd_id, u32 rd_id, u32 uplink_sw_if_index); @@ -96,6 +102,19 @@ extern u8 *format_gbp_endpoint_group (u8 * s, va_list * args); */ extern gbp_endpoint_group_db_t gbp_endpoint_group_db; extern gbp_endpoint_group_t *gbp_endpoint_group_pool; +extern uword *gbp_epg_sclass_db; + +always_inline gbp_endpoint_group_t * +gbp_epg_get (epg_id_t epg) +{ + uword *p; + + p = hash_get (gbp_endpoint_group_db.gg_hash, epg); + + if (NULL != p) + return (pool_elt_at_index (gbp_endpoint_group_pool, p[0])); + return (NULL); +} always_inline u32 gbp_epg_itf_lookup (epg_id_t epg) @@ -114,6 +133,20 @@ gbp_epg_itf_lookup (epg_id_t epg) return (~0); } +always_inline epg_id_t +gbp_epg_sclass_2_id (u16 sclass) +{ + uword *p; + + p = hash_get (gbp_epg_sclass_db, sclass); + + if (NULL != p) + { + return (p[0]); + } + return (EPG_INVALID); +} + always_inline const dpo_id_t * gbp_epg_dpo_lookup (epg_id_t epg, fib_protocol_t fproto) { diff --git a/src/plugins/gbp/gbp_learn.c b/src/plugins/gbp/gbp_learn.c index 762b463223e6..514aca26ef90 100644 --- a/src/plugins/gbp/gbp_learn.c +++ b/src/plugins/gbp/gbp_learn.c @@ -706,7 +706,9 @@ void gbp_learn_enable (u32 sw_if_index, gbb_learn_mode_t mode) { if (GBP_LEARN_MODE_L2 == mode) - l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_GBP_LEARN, 1); + { + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_GBP_LEARN, 1); + } else { vnet_feature_enable_disable ("ip4-unicast", @@ -720,7 +722,9 @@ void gbp_learn_disable (u32 sw_if_index, gbb_learn_mode_t mode) { if (GBP_LEARN_MODE_L2 == mode) - l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_GBP_LEARN, 0); + { + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_GBP_LEARN, 0); + } else { vnet_feature_enable_disable ("ip4-unicast", diff --git a/src/plugins/gbp/gbp_route_domain.c b/src/plugins/gbp/gbp_route_domain.c index 67b6915b4638..6a3f4fa7f1ec 100644 --- a/src/plugins/gbp/gbp_route_domain.c +++ b/src/plugins/gbp/gbp_route_domain.c @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -182,6 +183,8 @@ gbp_route_domain_add_and_lock (u32 rd_id, &ADJ_BCAST_ADDR, grd->grd_uu_sw_if_index[fproto], rewrite); + + gbp_sclass_enable_ip (grd->grd_uu_sw_if_index[fproto]); } else { @@ -223,6 +226,8 @@ gbp_route_domain_unlock (index_t index) fproto, FIB_SOURCE_PLUGIN_HI); if (INDEX_INVALID != grd->grd_adj[fproto]) adj_unlock (grd->grd_adj[fproto]); + if (~0 != grd->grd_uu_sw_if_index[fproto]) + gbp_sclass_disable_ip (grd->grd_uu_sw_if_index[fproto]); } gbp_route_domain_db_remove (grd); diff --git a/src/plugins/gbp/gbp_sclass.c b/src/plugins/gbp/gbp_sclass.c new file mode 100644 index 000000000000..e25ea38de011 --- /dev/null +++ b/src/plugins/gbp/gbp_sclass.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +/** + * Grouping of global data for the GBP source EPG classification feature + */ +typedef struct gbp_sclass_main_t_ +{ + /** + * Next nodes for L2 output features + */ + u32 gel_l2_input_feat_next[32]; + u32 gel_l2_output_feat_next[32]; +} gbp_sclass_main_t; + +static gbp_sclass_main_t gbp_sclass_main; + +#define foreach_gbp_sclass \ + _(DROP, "drop") + + +typedef enum +{ +#define _(sym,str) GBP_SCLASS_NEXT_##sym, + foreach_gbp_sclass +#undef _ + GBP_SCLASS_N_NEXT, +} gbp_sclass_next_t; + +typedef struct gbp_sclass_trace_t_ +{ + /* per-pkt trace data */ + u32 epg; + u32 sclass; +} gbp_sclass_trace_t; + +static_always_inline uword +gbp_sclass_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_id_2_sclass, int is_l2) +{ + u32 n_left_from, *from, *to_next, next_index; + gbp_sclass_main_t *glm; + + glm = &gbp_sclass_main; + next_index = 0; + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + gbp_sclass_next_t next0; + vlib_buffer_t *b0; + epg_id_t epg0; + u16 sclass0; + u32 bi0; + + next0 = GBP_SCLASS_NEXT_DROP; + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (is_id_2_sclass) + { + // output direction - convert from the SRC-EPD to the sclass + gbp_endpoint_group_t *gg; + + epg0 = vnet_buffer2 (b0)->gbp.src_epg; + gg = gbp_epg_get (epg0); + + if (NULL != gg) + { + sclass0 = vnet_buffer2 (b0)->gbp.sclass = gg->gg_sclass; + if (is_l2) + next0 = + vnet_l2_feature_next (b0, glm->gel_l2_output_feat_next, + L2OUTPUT_FEAT_GBP_ID_2_SCLASS); + else + vnet_feature_next (&next0, b0); + } + else + sclass0 = 0; + } + else + { + /* input direction - convert from the sclass to the SRC-EGD */ + sclass0 = vnet_buffer2 (b0)->gbp.sclass; + vnet_buffer2 (b0)->gbp.src_epg = + gbp_epg_sclass_2_id (vnet_buffer2 (b0)->gbp.sclass); + epg0 = vnet_buffer2 (b0)->gbp.src_epg; + + if (EPG_INVALID != epg0) + { + if (is_l2) + next0 = + vnet_l2_feature_next (b0, glm->gel_l2_input_feat_next, + L2INPUT_FEAT_GBP_SCLASS_2_ID); + else + vnet_feature_next (&next0, b0); + } + } + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + gbp_sclass_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->epg = epg0; + t->sclass = sclass0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +uword +l2_gbp_id_2_sclass (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (gbp_sclass_inline (vm, node, frame, 1, 1)); +} + +uword +l2_gbp_sclass_2_id (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (gbp_sclass_inline (vm, node, frame, 0, 1)); +} + +uword +ip4_gbp_id_2_sclass (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (gbp_sclass_inline (vm, node, frame, 1, 0)); +} + +uword +ip4_gbp_sclass_2_id (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (gbp_sclass_inline (vm, node, frame, 0, 0)); +} + +uword +ip6_gbp_id_2_sclass (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (gbp_sclass_inline (vm, node, frame, 1, 0)); +} + +uword +ip6_gbp_sclass_2_id (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (gbp_sclass_inline (vm, node, frame, 0, 0)); +} + +/* packet trace format function */ +static u8 * +format_gbp_sclass_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + gbp_sclass_trace_t *t = va_arg (*args, gbp_sclass_trace_t *); + + s = format (s, "epg:%d sclass:%d", t->epg, t->sclass); + + return s; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2_gbp_id_2_sclass_node) = { + .function = l2_gbp_id_2_sclass, + .name = "l2-gbp-id-2-sclass", + .vector_size = sizeof (u32), + .format_trace = format_gbp_sclass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = GBP_SCLASS_N_NEXT, + + .next_nodes = { + [GBP_SCLASS_NEXT_DROP] = "error-drop", + }, +}; +VLIB_REGISTER_NODE (l2_gbp_sclass_2_id_node) = { + .function = l2_gbp_sclass_2_id, + .name = "l2-gbp-sclass-2-id", + .vector_size = sizeof (u32), + .format_trace = format_gbp_sclass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = GBP_SCLASS_N_NEXT, + + .next_nodes = { + [GBP_SCLASS_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE (ip4_gbp_id_2_sclass_node) = { + .function = ip4_gbp_id_2_sclass, + .name = "ip4-gbp-id-2-sclass", + .vector_size = sizeof (u32), + .format_trace = format_gbp_sclass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = GBP_SCLASS_N_NEXT, + + .next_nodes = { + [GBP_SCLASS_NEXT_DROP] = "error-drop", + }, +}; +VLIB_REGISTER_NODE (ip4_gbp_sclass_2_id_node) = { + .function = ip4_gbp_sclass_2_id, + .name = "ip4-gbp-sclass-2-id", + .vector_size = sizeof (u32), + .format_trace = format_gbp_sclass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = GBP_SCLASS_N_NEXT, + + .next_nodes = { + [GBP_SCLASS_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE (ip6_gbp_id_2_sclass_node) = { + .function = ip6_gbp_id_2_sclass, + .name = "ip6-gbp-id-2-sclass", + .vector_size = sizeof (u32), + .format_trace = format_gbp_sclass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = GBP_SCLASS_N_NEXT, + + .next_nodes = { + [GBP_SCLASS_NEXT_DROP] = "error-drop", + }, +}; +VLIB_REGISTER_NODE (ip6_gbp_sclass_2_id_node) = { + .function = ip6_gbp_sclass_2_id, + .name = "ip6-gbp-sclass-2-id", + .vector_size = sizeof (u32), + .format_trace = format_gbp_sclass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = GBP_SCLASS_N_NEXT, + + .next_nodes = { + [GBP_SCLASS_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (l2_gbp_id_2_sclass_node, l2_gbp_id_2_sclass); +VLIB_NODE_FUNCTION_MULTIARCH (l2_gbp_sclass_2_id_node, l2_gbp_sclass_2_id); + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_gbp_id_2_sclass_node, ip4_gbp_id_2_sclass); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_gbp_sclass_2_id_node, ip4_gbp_sclass_2_id); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_gbp_id_2_sclass_node, ip6_gbp_id_2_sclass); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_gbp_sclass_2_id_node, ip6_gbp_sclass_2_id); + +VNET_FEATURE_INIT (ip4_gbp_sclass_2_id_feat, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-gbp-sclass-2-id", + .runs_before = VNET_FEATURES ("gbp-learn-ip4"), +}; +VNET_FEATURE_INIT (ip6_gbp_sclass_2_id_feat, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "ip6-gbp-sclass-2-id", + .runs_before = VNET_FEATURES ("gbp-learn-ip6"), +}; +VNET_FEATURE_INIT (ip4_gbp_id_2_sclass_feat, static) = +{ + .arc_name = "ip4-output", + .node_name = "ip4-gbp-id-2-sclass", +}; +VNET_FEATURE_INIT (ip6_gbp_id_2_sclass_feat, static) = +{ + .arc_name = "ip6-output", + .node_name = "ip6-gbp-id-2-sclass", +}; +/* *INDENT-ON* */ + +void +gbp_sclass_enable_l2 (u32 sw_if_index) +{ + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_GBP_SCLASS_2_ID, 1); + l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_GBP_ID_2_SCLASS, 1); +} + +void +gbp_sclass_disable_l2 (u32 sw_if_index) +{ + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_GBP_SCLASS_2_ID, 0); + l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_GBP_ID_2_SCLASS, 0); +} + +void +gbp_sclass_enable_ip (u32 sw_if_index) +{ + vnet_feature_enable_disable ("ip4-unicast", + "ip4-gbp-sclass-2-id", sw_if_index, 1, 0, 0); + vnet_feature_enable_disable ("ip6-unicast", + "ip6-gbp-sclass-2-id", sw_if_index, 1, 0, 0); + vnet_feature_enable_disable ("ip4-output", + "ip4-gbp-id-2-sclass", sw_if_index, 1, 0, 0); + vnet_feature_enable_disable ("ip6-output", + "ip6-gbp-id-2-sclass", sw_if_index, 1, 0, 0); +} + +void +gbp_sclass_disable_ip (u32 sw_if_index) +{ + vnet_feature_enable_disable ("ip4-unicast", + "ip4-gbp-sclass-2-id", sw_if_index, 0, 0, 0); + vnet_feature_enable_disable ("ip6-unicast", + "ip6-gbp-sclass-2-id", sw_if_index, 0, 0, 0); + vnet_feature_enable_disable ("ip4-output", + "ip4-gbp-id-2-sclass", sw_if_index, 0, 0, 0); + vnet_feature_enable_disable ("ip6-output", + "ip6-gbp-id-2-sclass", sw_if_index, 0, 0, 0); +} + +static clib_error_t * +gbp_sclass_init (vlib_main_t * vm) +{ + gbp_sclass_main_t *glm = &gbp_sclass_main; + + /* Initialize the feature next-node indices */ + feat_bitmap_init_next_nodes (vm, + l2_gbp_sclass_2_id_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + glm->gel_l2_input_feat_next); + feat_bitmap_init_next_nodes (vm, + l2_gbp_id_2_sclass_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + glm->gel_l2_output_feat_next); + + return (NULL); +} + +VLIB_INIT_FUNCTION (gbp_sclass_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/gbp/gbp_sclass.h b/src/plugins/gbp/gbp_sclass.h new file mode 100644 index 000000000000..07c5fffcc96f --- /dev/null +++ b/src/plugins/gbp/gbp_sclass.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __GBP_SCLASS_H__ +#define __GBP_SCLASS_H__ + +#include + +extern void gbp_sclass_enable_ip (u32 sw_if_index); +extern void gbp_sclass_enable_l2 (u32 sw_if_index); +extern void gbp_sclass_disable_ip (u32 sw_if_index); +extern void gbp_sclass_disable_l2 (u32 sw_if_index); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/gbp/gbp_types.h b/src/plugins/gbp/gbp_types.h index afb17e3a5d5e..0faa74b694e0 100644 --- a/src/plugins/gbp/gbp_types.h +++ b/src/plugins/gbp/gbp_types.h @@ -20,6 +20,7 @@ typedef u16 epg_id_t; #define EPG_INVALID ((u16)~0) +#define SCLASS_INVALID ((u16)~0) #endif diff --git a/src/plugins/gbp/gbp_vxlan.c b/src/plugins/gbp/gbp_vxlan.c index 2b264f813ed4..7fbd7e9e7d50 100644 --- a/src/plugins/gbp/gbp_vxlan.c +++ b/src/plugins/gbp/gbp_vxlan.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -106,13 +107,12 @@ format_vxlan_tunnel_ref (u8 * s, va_list * args) static u32 gdb_vxlan_dep_add (gbp_vxlan_tunnel_t * gt, - u32 vni, const ip46_address_t * src, const ip46_address_t * dst) { vnet_vxlan_gbp_tunnel_add_del_args_t args = { .is_add = 1, .is_ip6 = !ip46_address_is_ip4 (src), - .vni = vni, + .vni = gt->gt_vni, .src = *src, .dst = *dst, .instance = ~0, @@ -140,7 +140,7 @@ gdb_vxlan_dep_add (gbp_vxlan_tunnel_t * gt, GBP_VXLAN_TUN_DBG ("add-dep:%U %U %U %d", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index, format_ip46_address, src, IP46_TYPE_ANY, - format_ip46_address, dst, IP46_TYPE_ANY, vni); + format_ip46_address, dst, IP46_TYPE_ANY, gt->gt_vni); pool_get_zero (vxlan_tunnel_ref_pool, vxr); @@ -161,13 +161,25 @@ gdb_vxlan_dep_add (gbp_vxlan_tunnel_t * gt, if (GBP_VXLAN_TUN_L2 == vxr->vxr_layer) { + l2output_feat_masks_t ofeat; + l2input_feat_masks_t ifeat; + gbp_bridge_domain_t *gbd; + + gbd = gbp_bridge_domain_get (gt->gt_gbd); vxr->vxr_itf = gbp_itf_add_and_lock (vxr->vxr_sw_if_index, gt->gt_bd_index); - gbp_itf_set_l2_output_feature (vxr->vxr_itf, vxr->vxr_sw_if_index, - L2OUTPUT_FEAT_GBP_POLICY_MAC); - gbp_itf_set_l2_input_feature (vxr->vxr_itf, vxr->vxr_sw_if_index, - L2INPUT_FEAT_GBP_LEARN); + ofeat = (L2OUTPUT_FEAT_GBP_POLICY_MAC | + L2OUTPUT_FEAT_GBP_ID_2_SCLASS); + ifeat = L2INPUT_FEAT_GBP_SCLASS_2_ID; + + if (!(gbd->gb_flags & GBP_BD_FLAG_DO_NOT_LEARN)) + ifeat |= L2INPUT_FEAT_GBP_LEARN; + + gbp_itf_set_l2_output_feature (vxr->vxr_itf, + vxr->vxr_sw_if_index, ofeat); + gbp_itf_set_l2_input_feature (vxr->vxr_itf, + vxr->vxr_sw_if_index, ifeat); } else { @@ -181,6 +193,7 @@ gdb_vxlan_dep_add (gbp_vxlan_tunnel_t * gt, grd->grd_table_id[fproto], 1); gbp_learn_enable (vxr->vxr_sw_if_index, GBP_LEARN_MODE_L3); + gbp_sclass_enable_ip (vxr->vxr_sw_if_index); } } @@ -235,7 +248,7 @@ gbp_vxlan_tunnel_clone_and_lock (u32 sw_if_index, gt = pool_elt_at_index (gbp_vxlan_tunnel_pool, gti); - return (gdb_vxlan_dep_add (gt, gt->gt_vni, src, dst)); + return (gdb_vxlan_dep_add (gt, src, dst)); } static void @@ -270,6 +283,8 @@ gdb_vxlan_dep_del (index_t vxri) FOR_EACH_FIB_IP_PROTOCOL (fproto) ip_table_bind (fproto, vxr->vxr_sw_if_index, 0, 0); + gbp_sclass_disable_ip (vxr->vxr_sw_if_index); + gbp_learn_disable (vxr->vxr_sw_if_index, GBP_LEARN_MODE_L3); } vnet_vxlan_gbp_tunnel_del (vxr->vxr_sw_if_index); @@ -712,6 +727,7 @@ gbp_vxlan_tunnel_add (u32 vni, gbp_vxlan_tunnel_layer_t layer, gt->gt_itf = gbp_itf_add_and_lock (gt->gt_sw_if_index, gt->gt_bd_index); gbp_learn_enable (gt->gt_sw_if_index, GBP_LEARN_MODE_L2); + gbp_sclass_enable_l2 (gt->gt_sw_if_index); } else { @@ -724,6 +740,7 @@ gbp_vxlan_tunnel_add (u32 vni, gbp_vxlan_tunnel_layer_t layer, grd->grd_vni_sw_if_index = gt->gt_sw_if_index; gbp_learn_enable (gt->gt_sw_if_index, GBP_LEARN_MODE_L3); + gbp_sclass_enable_ip (gt->gt_sw_if_index); ip4_sw_interface_enable_disable (gt->gt_sw_if_index, 1); ip6_sw_interface_enable_disable (gt->gt_sw_if_index, 1); @@ -788,6 +805,7 @@ gbp_vxlan_tunnel_del (u32 vni) if (GBP_VXLAN_TUN_L2 == gt->gt_layer) { gbp_learn_disable (gt->gt_sw_if_index, GBP_LEARN_MODE_L2); + gbp_sclass_disable_l2 (gt->gt_sw_if_index); gbp_itf_unlock (gt->gt_itf); gbp_bridge_domain_unlock (gt->gt_gbd); } @@ -802,6 +820,7 @@ gbp_vxlan_tunnel_del (u32 vni) ip6_sw_interface_enable_disable (gt->gt_sw_if_index, 0); gbp_learn_disable (gt->gt_sw_if_index, GBP_LEARN_MODE_L3); + gbp_sclass_disable_ip (gt->gt_sw_if_index); gbp_route_domain_unlock (gt->gt_grd); } diff --git a/src/plugins/igmp/igmp_pkt.c b/src/plugins/igmp/igmp_pkt.c index 81b8ecb18bc2..e93dd9c26678 100644 --- a/src/plugins/igmp/igmp_pkt.c +++ b/src/plugins/igmp/igmp_pkt.c @@ -27,7 +27,6 @@ vlib_buffer_append (vlib_buffer_t * b, uword l) static vlib_buffer_t * igmp_pkt_get_buffer (igmp_pkt_build_t * bk) { - vlib_buffer_free_list_t *fl; vlib_main_t *vm; vlib_buffer_t *b; u32 bi; @@ -38,8 +37,6 @@ igmp_pkt_get_buffer (igmp_pkt_build_t * bk) return (NULL); b = vlib_get_buffer (vm, bi); - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b, fl); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c index a3c0c978ab38..8faa9a91ca25 100644 --- a/src/plugins/ixge/ixge.c +++ b/src/plugins/ixge/ixge.c @@ -2238,8 +2238,6 @@ VLIB_REGISTER_NODE (ixge_input_node, static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH_CLONE (ixge_input) -CLIB_MULTIARCH_SELECT_FN (ixge_input) /* *INDENT-ON* */ static u8 * @@ -2851,7 +2849,7 @@ ixge_pci_init (vlib_main_t * vm, vlib_pci_dev_handle_t h) if (vec_len (xm->devices) == 1) { - ixge_input_node.function = ixge_input_multiarch_select (); + ixge_input_node.function = ixge_input; } xd->pci_dev_handle = h; diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c index 532643398115..fe7e54505127 100644 --- a/src/plugins/lb/lb.c +++ b/src/plugins/lb/lb.c @@ -984,10 +984,15 @@ static int lb_vip_del_port_filter(lb_main_t *lbm, lb_vip_t *vip) key.vip_prefix_index = vip->vip_prefix_index; key.protocol = vip->protocol; key.port = clib_host_to_net_u16(vip->port); + key.rsv = 0; kv.key = key.as_u64; - if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) == 0) - m = pool_elt_at_index (lbm->vips, value.value); + if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) != 0) + { + clib_warning("looking up vip_index_per_port failed."); + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + m = pool_elt_at_index (lbm->vips, value.value); ASSERT (m); kv.value = m - lbm->vips; @@ -1190,10 +1195,11 @@ int lb_vip_del(u32 vip_index) { lb_main_t *lbm = &lb_main; lb_vip_t *vip; + int rv = 0; /* Does not remove default vip, i.e. vip_index = 0 */ if (vip_index == 0) - return 0; + return VNET_API_ERROR_INVALID_VALUE; lb_get_writer_lock(); if (!(vip = lb_vip_get_by_index(vip_index))) { @@ -1225,14 +1231,14 @@ int lb_vip_del(u32 vip_index) //Delete per-port vip filtering entry if (vip->port != 0) { - lb_vip_del_port_filter(lbm, vip); + rv = lb_vip_del_port_filter(lbm, vip); } //Set the VIP as unused vip->flags &= ~LB_VIP_FLAGS_USED; lb_put_writer_lock(); - return 0; + return rv; } /* *INDENT-OFF* */ diff --git a/src/plugins/mactime/mactime.c b/src/plugins/mactime/mactime.c index 837b317027d8..572b913ba07a 100644 --- a/src/plugins/mactime/mactime.c +++ b/src/plugins/mactime/mactime.c @@ -553,8 +553,7 @@ show_mactime_command_fn (vlib_main_t * vm, for (j = 0; j < vec_len (mm->arp_cache_copy); j++) { n = mm->arp_cache_copy + j; - if (!memcmp (dp->mac_address, n->ethernet_address, - sizeof (n->ethernet_address))) + if (!memcmp (dp->mac_address, n->mac.bytes, sizeof (n->mac))) { vlib_cli_output (vm, "%17s%U", " ", format_ip4_address, &n->ip4_address); diff --git a/src/plugins/mactime/mactime.h b/src/plugins/mactime/mactime.h index 1a13e41f6065..8d4165212a44 100644 --- a/src/plugins/mactime/mactime.h +++ b/src/plugins/mactime/mactime.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/plugins/map/map.c b/src/plugins/map/map.c index 6b15ee1919ed..47af8947fb9d 100644 --- a/src/plugins/map/map.c +++ b/src/plugins/map/map.c @@ -1048,7 +1048,7 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, map_main_t *mm = &map_main; map_domain_t *d; int domains = 0, rules = 0, domaincount = 0, rulecount = 0; - if (pool_elts (mm->domains) == 0) + if (pool_elts (mm->domains) <= 1) { vlib_cli_output (vm, "No MAP domains are configured..."); return 0; diff --git a/src/plugins/map/map_doc.md b/src/plugins/map/map_doc.md index 17f3c51174bf..f3e2a56706d8 100644 --- a/src/plugins/map/map_doc.md +++ b/src/plugins/map/map_doc.md @@ -33,7 +33,7 @@ IPv4 and IPv6 virtual reassembly support the following configuration: map params reassembly [ip4 | ip6] [lifetime ] [pool-size ] [buffers ] [ht-ratio ] lifetime: - The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 people. Those values are not realistic for high-throughput cases. + The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 seconds. Those values are not realistic for high-throughput cases. buffers: The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool. diff --git a/src/plugins/marvell/pp2/output.c b/src/plugins/marvell/pp2/output.c index dbd106a59306..911b2f55a178 100644 --- a/src/plugins/marvell/pp2/output.c +++ b/src/plugins/marvell/pp2/output.c @@ -101,7 +101,7 @@ mrvl_pp2_interface_tx (vlib_main_t * vm, buffers = vlib_frame_vector_args (frame); u16 n_copy = clib_min (outq->size - slot, n_sent); - clib_memcpy_fast (outq->buffers + slot, buffers, n_copy * sizeof (u32)); + vlib_buffer_copy_indices (outq->buffers + slot, buffers, n_copy); if (PREDICT_FALSE (n_copy < n_sent)) clib_memcpy_fast (outq->buffers, buffers + n_copy, (n_sent - n_copy) * sizeof (u32)); diff --git a/src/plugins/marvell/pp2/pp2.c b/src/plugins/marvell/pp2/pp2.c index 016ad53cc969..d40b9ec7dd6a 100644 --- a/src/plugins/marvell/pp2/pp2.c +++ b/src/plugins/marvell/pp2/pp2.c @@ -244,7 +244,7 @@ mrvl_pp2_create_if (mrvl_pp2_create_if_args_t * args) /* FIXME bpool bit select per pp */ s = format (s, "pool-%d:%d%c", pp2_id, pp2_id + 8, 0); bpool_params.match = (char *) s; - bpool_params.buff_len = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + bpool_params.buff_len = vlib_buffer_get_default_data_size (vm); /* FIXME +64 ? */ if (pp2_bpool_init (&bpool_params, &ppif->inqs[0].bpool)) { diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index f976f16dec84..7f29f4d7f877 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -185,6 +185,7 @@ memif_int_fd_read_ready (clib_file_t * uf) clib_error_t * memif_connect (memif_if_t * mif) { + vlib_main_t *vm = vlib_get_main (); vnet_main_t *vnm = vnet_get_main (); clib_file_t template = { 0 }; memif_region_t *mr; @@ -235,6 +236,7 @@ memif_connect (memif_if_t * mif) vec_foreach_index (i, mif->rx_queues) { memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); + u32 ti; int rv; mq->ring = mif->regions[mq->region].shm + mq->offset; @@ -254,6 +256,9 @@ memif_connect (memif_if_t * mif) memif_file_add (&mq->int_clib_file_index, &template); } vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0); + ti = vnet_get_device_input_thread_index (vnm, mif->hw_if_index, i); + mq->buffer_pool_index = + vlib_buffer_pool_get_default_for_numa (vm, vlib_mains[ti]->numa_node); rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i, VNET_HW_INTERFACE_RX_MODE_DEFAULT); if (rv) @@ -338,7 +343,7 @@ memif_init_regions_and_queues (memif_if_t * mif) { vlib_buffer_pool_t *bp; /* *INDENT-OFF* */ - vec_foreach (bp, buffer_main.buffer_pools) + vec_foreach (bp, vm->buffer_main->buffer_pools) { vlib_physmem_map_t *pm; pm = vlib_physmem_get_map (vm, bp->physmem_map_index); @@ -848,19 +853,16 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) if (mm->per_thread_data == 0) { int i; - vlib_buffer_free_list_t *fl; vec_validate_aligned (mm->per_thread_data, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); for (i = 0; i < tm->n_vlib_mains; i++) { memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data, i); vlib_buffer_t *bt = &ptd->buffer_template; - vlib_buffer_init_for_free_list (bt, fl); + clib_memset (bt, 0, sizeof (vlib_buffer_t)); bt->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; bt->total_length_not_including_first_buffer = 0; vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0; diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index 3cb79541c17d..3f4f5c571440 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -180,7 +180,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, memif_main_t *mm = &memif_main; memif_ring_t *ring; memif_queue_t *mq; - u16 buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + u16 buffer_size = vlib_buffer_get_default_data_size (vm); uword n_trace = vlib_get_trace_count (vm, node); u16 nexts[MEMIF_RX_VECTOR_SZ], *next = nexts; u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi; @@ -190,7 +190,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 thread_index = vm->thread_index; memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data, thread_index); - vlib_buffer_t *bt = &ptd->buffer_template; + vlib_buffer_t bt; u16 cur_slot, last_slot, ring_size, n_slots, mask; i16 start_offset; u16 n_buffers = 0, n_alloc; @@ -280,7 +280,8 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* allocate free buffers */ vec_validate_aligned (ptd->buffers, n_buffers - 1, CLIB_CACHE_LINE_BYTES); - n_alloc = vlib_buffer_alloc (vm, ptd->buffers, n_buffers); + n_alloc = vlib_buffer_alloc_from_pool (vm, ptd->buffers, n_buffers, + mq->buffer_pool_index); if (PREDICT_FALSE (n_alloc != n_buffers)) { if (n_alloc) @@ -338,10 +339,13 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } /* prepare buffer template and next indices */ - vnet_buffer (bt)->sw_if_index[VLIB_RX] = mif->sw_if_index; - vnet_buffer (bt)->feature_arc_index = 0; - bt->current_data = start_offset; - bt->current_config_index = 0; + vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = + mif->sw_if_index; + vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0; + ptd->buffer_template.current_data = start_offset; + ptd->buffer_template.current_config_index = 0; + ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index; + ptd->buffer_template.ref_count = 1; if (mode == MEMIF_INTERFACE_MODE_ETHERNET) { @@ -350,7 +354,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, next_index = mif->per_interface_next_index; else vnet_feature_start_device_input_x1 (mif->sw_if_index, &next_index, - bt); + &ptd->buffer_template); vlib_get_new_next_frame (vm, node, next_index, to_next_bufs, n_left_to_next); @@ -374,12 +378,16 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, po = ptd->packet_ops; bi = to_next_bufs; + /* copy template into local variable - will save per packet load */ + vlib_buffer_copy_template (&bt, &ptd->buffer_template); + while (n_from >= 8) { - b0 = vlib_get_buffer (vm, po[4].first_buffer_vec_index); - b1 = vlib_get_buffer (vm, po[5].first_buffer_vec_index); - b2 = vlib_get_buffer (vm, po[6].first_buffer_vec_index); - b3 = vlib_get_buffer (vm, po[7].first_buffer_vec_index); + b0 = vlib_get_buffer (vm, ptd->buffers[po[0].first_buffer_vec_index]); + b1 = vlib_get_buffer (vm, ptd->buffers[po[1].first_buffer_vec_index]); + b2 = vlib_get_buffer (vm, ptd->buffers[po[2].first_buffer_vec_index]); + b3 = vlib_get_buffer (vm, ptd->buffers[po[3].first_buffer_vec_index]); + vlib_prefetch_buffer_header (b0, STORE); vlib_prefetch_buffer_header (b1, STORE); vlib_prefetch_buffer_header (b2, STORE); @@ -402,7 +410,10 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, b2 = vlib_get_buffer (vm, bi[2]); b3 = vlib_get_buffer (vm, bi[3]); - clib_memcpy64_x4 (b0, b1, b2, b3, bt); + vlib_buffer_copy_template (b0, &bt); + vlib_buffer_copy_template (b1, &bt); + vlib_buffer_copy_template (b2, &bt); + vlib_buffer_copy_template (b3, &bt); b0->current_length = po[0].packet_len; n_rx_bytes += b0->current_length; @@ -439,7 +450,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, fbvi[0] = po[0].first_buffer_vec_index; bi[0] = ptd->buffers[fbvi[0]]; b0 = vlib_get_buffer (vm, bi[0]); - clib_memcpy_fast (b0, bt, 64); + vlib_buffer_copy_template (b0, &bt); b0->current_length = po->packet_len; n_rx_bytes += b0->current_length; @@ -559,7 +570,7 @@ memif_device_input_zc_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* asume that somebody will want to add ethernet header on the packet so start with IP header at offset 14 */ start_offset = (mode == MEMIF_INTERFACE_MODE_IP) ? 14 : 0; - buffer_length = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES - start_offset; + buffer_length = vlib_buffer_get_default_data_size (vm) - start_offset; cur_slot = mq->last_tail; last_slot = ring->tail; @@ -776,8 +787,9 @@ memif_device_input_zc_inline (vlib_main_t * vm, vlib_node_runtime_t * node, clib_memset (dt, 0, sizeof (memif_desc_t)); dt->length = buffer_length; - n_alloc = vlib_buffer_alloc_to_ring (vm, mq->buffers, head & mask, - ring_size, n_slots); + n_alloc = vlib_buffer_alloc_to_ring_from_pool (vm, mq->buffers, head & mask, + ring_size, n_slots, + mq->buffer_pool_index); if (PREDICT_FALSE (n_alloc != n_slots)) { diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index a938b85e7da1..4613512041d2 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -123,6 +123,7 @@ typedef struct u16 last_head; u16 last_tail; u32 *buffers; + u8 buffer_pool_index; /* interrupts */ int int_fd; @@ -254,7 +255,7 @@ extern memif_main_t memif_main; extern vnet_device_class_t memif_device_class; extern vlib_node_registration_t memif_input_node; -enum +typedef enum { MEMIF_PROCESS_EVENT_START = 1, MEMIF_PROCESS_EVENT_STOP = 2, diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index 15dae6508445..abb916060f1e 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -217,7 +217,8 @@ nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg) if (clib_bihash_add_del_8_8 (&tsm->out2in, &s_kv, 0)) nat_log_warn ("out2in key del failed"); - snat_ipfix_logging_nat44_ses_delete (s->in2out.addr.as_u32, + snat_ipfix_logging_nat44_ses_delete (ctx->thread_index, + s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, @@ -269,7 +270,7 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0, if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index))) { b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_ipfix_logging_max_sessions (sm->max_translations); + nat_ipfix_logging_max_sessions (thread_index, sm->max_translations); nat_log_notice ("maximum sessions exceeded"); return SNAT_IN2OUT_NEXT_DROP; } @@ -373,7 +374,8 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0, nat_log_notice ("out2in key add failed"); /* log NAT event */ - snat_ipfix_logging_nat44_ses_create (s->in2out.addr.as_u32, + snat_ipfix_logging_nat44_ses_create (thread_index, + s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, @@ -1844,7 +1846,7 @@ nat44_in2out_reass_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) { if (nat_ip4_reass_add_fragment - (reass0, bi0, &fragments_to_drop)) + (thread_index, reass0, bi0, &fragments_to_drop)) { b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG]; nat_log_notice diff --git a/src/plugins/nat/in2out_ed.c b/src/plugins/nat/in2out_ed.c index 9a61af9dad9a..2cde378e801a 100644 --- a/src/plugins/nat/in2out_ed.c +++ b/src/plugins/nat/in2out_ed.c @@ -199,7 +199,8 @@ nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg) if (snat_is_unk_proto_session (s)) goto delete; - snat_ipfix_logging_nat44_ses_delete (s->in2out.addr.as_u32, + snat_ipfix_logging_nat44_ses_delete (ctx->thread_index, + s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, @@ -296,7 +297,7 @@ slow_path_ed (snat_main_t * sm, if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index))) { b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_ipfix_logging_max_sessions (sm->max_translations); + nat_ipfix_logging_max_sessions (thread_index, sm->max_translations); nat_log_notice ("maximum sessions exceeded"); return NAT_IN2OUT_ED_NEXT_DROP; } @@ -420,7 +421,8 @@ slow_path_ed (snat_main_t * sm, *sessionp = s; /* log NAT event */ - snat_ipfix_logging_nat44_ses_create (s->in2out.addr.as_u32, + snat_ipfix_logging_nat44_ses_create (thread_index, + s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, @@ -767,7 +769,7 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm, if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index))) { b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_ipfix_logging_max_sessions (sm->max_translations); + nat_ipfix_logging_max_sessions (thread_index, sm->max_translations); nat_log_notice ("maximum sessions exceeded"); return 0; } @@ -1965,7 +1967,7 @@ nat44_ed_in2out_reass_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) { if (nat_ip4_reass_add_fragment - (reass0, bi0, &fragments_to_drop)) + (thread_index, reass0, bi0, &fragments_to_drop)) { b0->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_FRAG]; nat_log_notice diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 0cfcbf16ceb2..dabb8122adf1 100755 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -264,7 +264,8 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index) return; /* log NAT event */ - snat_ipfix_logging_nat44_ses_delete (s->in2out.addr.as_u32, + snat_ipfix_logging_nat44_ses_delete (thread_index, + s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, @@ -454,7 +455,7 @@ nat_ed_session_alloc (snat_main_t * sm, snat_user_t * u, u32 thread_index, nat_log_warn ("max translations per user %U", format_ip4_address, &u->addr); snat_ipfix_logging_max_entries_per_user - (sm->max_translations_per_user, u->addr.as_u32); + (thread_index, sm->max_translations_per_user, u->addr.as_u32); return 0; } else @@ -1768,7 +1769,7 @@ snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del) { if (is_del) { - outside_fib->refcount--; + outside_fib->refcount--; if (!outside_fib->refcount) vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs); } @@ -1968,6 +1969,10 @@ snat_interface_add_del_output_feature (u32 sw_if_index, snat_interface_t *i; snat_address_t *ap; snat_static_mapping_t *m; + nat_outside_fib_t *outside_fib; + u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, + sw_if_index); + if (sm->deterministic || (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))) @@ -1981,6 +1986,34 @@ snat_interface_add_del_output_feature (u32 sw_if_index, })); /* *INDENT-ON* */ + if (!is_inside) + { + /* *INDENT-OFF* */ + vec_foreach (outside_fib, sm->outside_fibs) + { + if (outside_fib->fib_index == fib_index) + { + if (is_del) + { + outside_fib->refcount--; + if (!outside_fib->refcount) + vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs); + } + else + outside_fib->refcount++; + goto feature_set; + } + } + /* *INDENT-ON* */ + if (!is_del) + { + vec_add2 (sm->outside_fibs, outside_fib, 1); + outside_fib->refcount = 1; + outside_fib->fib_index = fib_index; + } + } + +feature_set: if (is_inside) { if (sm->endpoint_dependent) @@ -2111,6 +2144,65 @@ snat_set_workers (uword * bitmap) return 0; } +static void +snat_update_outside_fib (u32 sw_if_index, u32 new_fib_index, + u32 old_fib_index) +{ + snat_main_t *sm = &snat_main; + nat_outside_fib_t *outside_fib; + snat_interface_t *i; + u8 is_add = 1; + + if (new_fib_index == old_fib_index) + return; + + if (!vec_len (sm->outside_fibs)) + return; + + pool_foreach (i, sm->interfaces, ( + { + if (i->sw_if_index == sw_if_index) + { + if (!(nat_interface_is_outside (i))) + return;} + } + )); + vec_foreach (outside_fib, sm->outside_fibs) + { + if (outside_fib->fib_index == old_fib_index) + { + outside_fib->refcount--; + if (!outside_fib->refcount) + vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs); + break; + } + } + + vec_foreach (outside_fib, sm->outside_fibs) + { + if (outside_fib->fib_index == new_fib_index) + { + outside_fib->refcount++; + is_add = 0; + break; + } + } + + if (is_add) + { + vec_add2 (sm->outside_fibs, outside_fib, 1); + outside_fib->refcount = 1; + outside_fib->fib_index = new_fib_index; + } +} + +static void +snat_ip4_table_bind (ip4_main_t * im, + uword opaque, + u32 sw_if_index, u32 new_fib_index, u32 old_fib_index) +{ + snat_update_outside_fib (sw_if_index, new_fib_index, old_fib_index); +} static void snat_ip4_add_del_interface_address_cb (ip4_main_t * im, @@ -2239,6 +2331,11 @@ snat_init (vlib_main_t * vm) nat66_init (); + ip4_table_bind_callback_t cbt4 = { + .function = snat_ip4_table_bind, + }; + vec_add1 (ip4_main.table_bind_callbacks, cbt4); + /* Init virtual fragmenentation reassembly */ return nat_reass_init (vm); } @@ -2525,7 +2622,7 @@ nat_alloc_addr_and_port_default (snat_address_t * addresses, } /* Totally out of translations to use... */ - snat_ipfix_logging_addresses_exhausted (0); + snat_ipfix_logging_addresses_exhausted (thread_index, 0); return 1; } @@ -2575,7 +2672,7 @@ nat_alloc_addr_and_port_mape (snat_address_t * addresses, exhausted: /* Totally out of translations to use... */ - snat_ipfix_logging_addresses_exhausted (0); + snat_ipfix_logging_addresses_exhausted (thread_index, 0); return 1; } @@ -2623,7 +2720,7 @@ nat_alloc_addr_and_port_range (snat_address_t * addresses, exhausted: /* Totally out of translations to use... */ - snat_ipfix_logging_addresses_exhausted (0); + snat_ipfix_logging_addresses_exhausted (thread_index, 0); return 1; } diff --git a/src/plugins/nat/nat44_classify.c b/src/plugins/nat/nat44_classify.c index 8a417dcf5d9c..ed0c37ea21a1 100644 --- a/src/plugins/nat/nat44_classify.c +++ b/src/plugins/nat/nat44_classify.c @@ -217,8 +217,8 @@ nat44_classify_node_fn_inline (vlib_main_t * vm, !(reass0->flags & NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE)) { /* first fragment still hasn't arrived, cache this fragment */ - if (nat_ip4_reass_add_fragment (reass0, bi0, - &fragments_to_drop)) + if (nat_ip4_reass_add_fragment + (thread_index, reass0, bi0, &fragments_to_drop)) { b0->error = node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG]; @@ -328,8 +328,8 @@ nat44_classify_node_fn_inline (vlib_main_t * vm, if (reass0->classify_next == NAT_REASS_IP4_CLASSIFY_NONE) /* first fragment still hasn't arrived */ { - if (nat_ip4_reass_add_fragment (reass0, bi0, - &fragments_to_drop)) + if (nat_ip4_reass_add_fragment + (thread_index, reass0, bi0, &fragments_to_drop)) { b0->error = node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG]; diff --git a/src/plugins/nat/nat44_cli.c b/src/plugins/nat/nat44_cli.c index 9aaa8498e22a..eba5d575cd68 100644 --- a/src/plugins/nat/nat44_cli.c +++ b/src/plugins/nat/nat44_cli.c @@ -128,7 +128,13 @@ snat_ipfix_logging_enable_disable_command_fn (vlib_main_t * vm, /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) - return 0; + { + rv = snat_ipfix_logging_enable_disable (enable, domain_id, + (u16) src_port); + if (rv) + return clib_error_return (0, "ipfix logging enable failed"); + return 0; + } while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { diff --git a/src/plugins/nat/nat64.c b/src/plugins/nat/nat64.c index 4dbfb05c78cc..2f665ab4cfe5 100644 --- a/src/plugins/nat/nat64.c +++ b/src/plugins/nat/nat64.c @@ -82,12 +82,14 @@ nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque, if (nm->addr_pool[j].addr.as_u32 == address->as_u32) return; - (void) nat64_add_del_pool_addr (address, ~0, 1); + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + address, ~0, 1); return; } else { - (void) nat64_add_del_pool_addr (address, ~0, 0); + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + address, ~0, 0); return; } } @@ -280,7 +282,8 @@ nat64_set_hash (u32 bib_buckets, u32 bib_memory_size, u32 st_buckets, } int -nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) +nat64_add_del_pool_addr (u32 thread_index, + ip4_address_t * addr, u32 vrf_id, u8 is_add) { nat64_main_t *nm = &nat64_main; snat_address_t *a = 0; @@ -330,7 +333,7 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) /* *INDENT-OFF* */ vec_foreach (db, nm->db) { - nat64_db_free_out_addr (db, &a->addr); + nat64_db_free_out_addr (thread_index, db, &a->addr); vlib_set_simple_counter (&nm->total_bibs, db - nm->db, 0, db->bib.bib_entries_num); vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0, @@ -394,8 +397,8 @@ nat64_add_interface_address (u32 sw_if_index, int is_add) { /* if have address remove it */ if (first_int_addr) - (void) nat64_add_del_pool_addr (first_int_addr, ~0, 0); - + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + first_int_addr, ~0, 0); vec_del1 (nm->auto_add_sw_if_indices, i); return 0; } @@ -410,7 +413,8 @@ nat64_add_interface_address (u32 sw_if_index, int is_add) /* If the address is already bound - or static - add it now */ if (first_int_addr) - (void) nat64_add_del_pool_addr (first_int_addr, ~0, 1); + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + first_int_addr, ~0, 1); return 0; } @@ -601,7 +605,8 @@ nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, if (static_bib->is_add) { - (void) nat64_db_bib_entry_create (db, &static_bib->in_addr, + (void) nat64_db_bib_entry_create (thread_index, db, + &static_bib->in_addr, &static_bib->out_addr, static_bib->in_port, static_bib->out_port, @@ -619,7 +624,7 @@ nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, static_bib->fib_index, 1); if (bibe) { - nat64_db_bib_entry_free (db, bibe); + nat64_db_bib_entry_free (thread_index, db, bibe); vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, db->bib.bib_entries_num); vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, @@ -723,7 +728,7 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, if (!nm->sm->num_workers) { bibe = - nat64_db_bib_entry_create (db, in_addr, out_addr, + nat64_db_bib_entry_create (thread_index, db, in_addr, out_addr, clib_host_to_net_u16 (in_port), clib_host_to_net_u16 (out_port), fib_index, proto, 1); @@ -741,7 +746,7 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, if (!nm->sm->num_workers) { - nat64_db_bib_entry_free (db, bibe); + nat64_db_bib_entry_free (thread_index, db, bibe); vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, db->bib.bib_entries_num); } @@ -1175,7 +1180,7 @@ nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, nat64_db_t *db = &nm->db[thread_index]; u32 now = (u32) vlib_time_now (vm); - nad64_db_st_free_expired (db, now); + nad64_db_st_free_expired (thread_index, db, now); vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, db->bib.bib_entries_num); vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, diff --git a/src/plugins/nat/nat64.h b/src/plugins/nat/nat64.h index 0a60571a069f..b9251402a0c5 100644 --- a/src/plugins/nat/nat64.h +++ b/src/plugins/nat/nat64.h @@ -123,13 +123,15 @@ extern vlib_node_registration_t nat64_out2in_node; /** * @brief Add/delete address to NAT64 pool. * + * @param thread_index Thread index used by ipfix nat logging (not address per thread). * @param addr IPv4 address. * @param vrf_id VRF id of tenant, ~0 means independent of VRF. * @param is_add 1 if add, 0 if delete. * * @returns 0 on success, non-zero value otherwise. */ -int nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add); +int nat64_add_del_pool_addr (u32 thread_index, + ip4_address_t * addr, u32 vrf_id, u8 is_add); /** * @brief Call back function when walking addresses in NAT64 pool, non-zero diff --git a/src/plugins/nat/nat64_cli.c b/src/plugins/nat/nat64_cli.c index efeaa0be0c23..4efdf1330427 100644 --- a/src/plugins/nat/nat64_cli.c +++ b/src/plugins/nat/nat64_cli.c @@ -73,7 +73,7 @@ nat64_add_del_pool_addr_command_fn (vlib_main_t * vm, for (i = 0; i < count; i++) { - rv = nat64_add_del_pool_addr (&this_addr, vrf_id, is_add); + rv = nat64_add_del_pool_addr (0, &this_addr, vrf_id, is_add); switch (rv) { diff --git a/src/plugins/nat/nat64_db.c b/src/plugins/nat/nat64_db.c index ca8358ef8a21..178e483ff989 100644 --- a/src/plugins/nat/nat64_db.c +++ b/src/plugins/nat/nat64_db.c @@ -50,7 +50,8 @@ nat64_db_init (nat64_db_t * db, u32 bib_buckets, u32 bib_memory_size, } nat64_db_bib_entry_t * -nat64_db_bib_entry_create (nat64_db_t * db, ip6_address_t * in_addr, +nat64_db_bib_entry_create (u32 thread_index, nat64_db_t * db, + ip6_address_t * in_addr, ip4_address_t * out_addr, u16 in_port, u16 out_port, u32 fib_index, u8 proto, u8 is_static) @@ -63,7 +64,7 @@ nat64_db_bib_entry_create (nat64_db_t * db, ip6_address_t * in_addr, if (db->bib.bib_entries_num >= db->bib.limit) { db->free_addr_port_cb (db, out_addr, out_port, proto); - nat_ipfix_logging_max_bibs (db->bib.limit); + nat_ipfix_logging_max_bibs (thread_index, db->bib.limit); return 0; } @@ -119,13 +120,14 @@ nat64_db_bib_entry_create (nat64_db_t * db, ip6_address_t * in_addr, clib_bihash_add_del_24_8 (&db->bib.out2in, &kv, 1); fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); - nat_ipfix_logging_nat64_bib (in_addr, out_addr, proto, in_port, out_port, - fib->ft_table_id, 1); + nat_ipfix_logging_nat64_bib (thread_index, in_addr, out_addr, proto, + in_port, out_port, fib->ft_table_id, 1); return bibe; } void -nat64_db_bib_entry_free (nat64_db_t * db, nat64_db_bib_entry_t * bibe) +nat64_db_bib_entry_free (u32 thread_index, nat64_db_t * db, + nat64_db_bib_entry_t * bibe) { nat64_db_bib_entry_key_t bibe_key; clib_bihash_kv_24_8_t kv; @@ -164,7 +166,8 @@ nat64_db_bib_entry_free (nat64_db_t * db, nat64_db_bib_entry_t * bibe) vec_add1 (ste_to_be_free, ste - st);} )); vec_foreach (ste_index, ste_to_be_free) - nat64_db_st_entry_free (db, pool_elt_at_index (st, ste_index[0])); + nat64_db_st_entry_free (thread_index, db, + pool_elt_at_index (st, ste_index[0])); vec_free (ste_to_be_free); } @@ -193,8 +196,8 @@ nat64_db_bib_entry_free (nat64_db_t * db, nat64_db_bib_entry_t * bibe) db->free_addr_port_cb (db, &bibe->out_addr, bibe->out_port, bibe->proto); fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); - nat_ipfix_logging_nat64_bib (&bibe->in_addr, &bibe->out_addr, bibe->proto, - bibe->in_port, bibe->out_port, + nat_ipfix_logging_nat64_bib (thread_index, &bibe->in_addr, &bibe->out_addr, + bibe->proto, bibe->in_port, bibe->out_port, fib->ft_table_id, 0); /* delete from pool */ @@ -370,7 +373,8 @@ nat64_db_st_walk (nat64_db_t * db, u8 proto, } nat64_db_st_entry_t * -nat64_db_st_entry_create (nat64_db_t * db, nat64_db_bib_entry_t * bibe, +nat64_db_st_entry_create (u32 thread_index, nat64_db_t * db, + nat64_db_bib_entry_t * bibe, ip6_address_t * in_r_addr, ip4_address_t * out_r_addr, u16 r_port) { @@ -382,7 +386,7 @@ nat64_db_st_entry_create (nat64_db_t * db, nat64_db_bib_entry_t * bibe, if (db->st.st_entries_num >= db->st.limit) { - nat_ipfix_logging_max_sessions (db->st.limit); + nat_ipfix_logging_max_sessions (thread_index, db->st.limit); return 0; } @@ -452,8 +456,9 @@ nat64_db_st_entry_create (nat64_db_t * db, nat64_db_bib_entry_t * bibe, clib_bihash_add_del_48_8 (&db->st.out2in, &kv, 1); fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); - nat_ipfix_logging_nat64_session (&bibe->in_addr, &bibe->out_addr, - bibe->proto, bibe->in_port, bibe->out_port, + nat_ipfix_logging_nat64_session (thread_index, &bibe->in_addr, + &bibe->out_addr, bibe->proto, + bibe->in_port, bibe->out_port, &ste->in_r_addr, &ste->out_r_addr, ste->r_port, ste->r_port, fib->ft_table_id, 1); @@ -464,7 +469,8 @@ nat64_db_st_entry_create (nat64_db_t * db, nat64_db_bib_entry_t * bibe, } void -nat64_db_st_entry_free (nat64_db_t * db, nat64_db_st_entry_t * ste) +nat64_db_st_entry_free (u32 thread_index, + nat64_db_t * db, nat64_db_st_entry_t * ste) { nat64_db_st_entry_t *st; nat64_db_bib_entry_t *bib, *bibe; @@ -526,8 +532,9 @@ nat64_db_st_entry_free (nat64_db_t * db, nat64_db_st_entry_t * ste) clib_bihash_add_del_48_8 (&db->st.out2in, &kv, 0); fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); - nat_ipfix_logging_nat64_session (&bibe->in_addr, &bibe->out_addr, - bibe->proto, bibe->in_port, bibe->out_port, + nat_ipfix_logging_nat64_session (thread_index, &bibe->in_addr, + &bibe->out_addr, bibe->proto, + bibe->in_port, bibe->out_port, &ste->in_r_addr, &ste->out_r_addr, ste->r_port, ste->r_port, fib->ft_table_id, 0); @@ -543,7 +550,7 @@ nat64_db_st_entry_free (nat64_db_t * db, nat64_db_st_entry_t * ste) /* delete BIB entry if last session and dynamic */ if (!bibe->is_static && !bibe->ses_num) - nat64_db_bib_entry_free (db, bibe); + nat64_db_bib_entry_free (thread_index, db, bibe); } nat64_db_st_entry_t * @@ -641,7 +648,7 @@ nat64_db_st_entry_by_index (nat64_db_t * db, u8 proto, u32 ste_index) } void -nad64_db_st_free_expired (nat64_db_t * db, u32 now) +nad64_db_st_free_expired (u32 thread_index, nat64_db_t * db, u32 now) { u32 *ste_to_be_free = 0, *ste_index; nat64_db_st_entry_t *st, *ste; @@ -656,7 +663,8 @@ nad64_db_st_free_expired (nat64_db_t * db, u32 now) vec_add1 (ste_to_be_free, ste - st); \ })); \ vec_foreach (ste_index, ste_to_be_free) \ - nat64_db_st_entry_free (db, pool_elt_at_index(st, ste_index[0])); \ + nat64_db_st_entry_free (thread_index, db, \ + pool_elt_at_index(st, ste_index[0])); \ vec_free (ste_to_be_free); \ ste_to_be_free = 0; foreach_snat_protocol @@ -667,13 +675,15 @@ nad64_db_st_free_expired (nat64_db_t * db, u32 now) vec_add1 (ste_to_be_free, ste - st); })); vec_foreach (ste_index, ste_to_be_free) - nat64_db_st_entry_free (db, pool_elt_at_index(st, ste_index[0])); + nat64_db_st_entry_free (thread_index, db, + pool_elt_at_index(st, ste_index[0])); vec_free (ste_to_be_free); /* *INDENT-ON* */ } void -nat64_db_free_out_addr (nat64_db_t * db, ip4_address_t * out_addr) +nat64_db_free_out_addr (u32 thread_index, + nat64_db_t * db, ip4_address_t * out_addr) { u32 *ste_to_be_free = 0, *ste_index; nat64_db_st_entry_t *st, *ste; @@ -689,7 +699,8 @@ nat64_db_free_out_addr (nat64_db_t * db, ip4_address_t * out_addr) vec_add1 (ste_to_be_free, ste - st); \ })); \ vec_foreach (ste_index, ste_to_be_free) \ - nat64_db_st_entry_free (db, pool_elt_at_index(st, ste_index[0])); \ + nat64_db_st_entry_free (thread_index, db, \ + pool_elt_at_index(st, ste_index[0])); \ vec_free (ste_to_be_free); \ ste_to_be_free = 0; foreach_snat_protocol @@ -701,7 +712,8 @@ nat64_db_free_out_addr (nat64_db_t * db, ip4_address_t * out_addr) vec_add1 (ste_to_be_free, ste - st); })); vec_foreach (ste_index, ste_to_be_free) - nat64_db_st_entry_free (db, pool_elt_at_index(st, ste_index[0])); + nat64_db_st_entry_free (thread_index, db, + pool_elt_at_index(st, ste_index[0])); vec_free (ste_to_be_free); db->addr_free = 0; /* *INDENT-ON* */ diff --git a/src/plugins/nat/nat64_db.h b/src/plugins/nat/nat64_db.h index f1b93cf89287..e6fa8e727ad1 100644 --- a/src/plugins/nat/nat64_db.h +++ b/src/plugins/nat/nat64_db.h @@ -160,6 +160,7 @@ int nat64_db_init (nat64_db_t * db, u32 bib_buckets, u32 bib_memory_size, /** * @brief Create new NAT64 BIB entry. * + * @param thread_index thread index. * @param db NAT64 DB. * @param in_addr Inside IPv6 address. * @param out_addr Outside IPv4 address. @@ -171,20 +172,23 @@ int nat64_db_init (nat64_db_t * db, u32 bib_buckets, u32 bib_memory_size, * * @returns BIB entry on success, 0 otherwise. */ -nat64_db_bib_entry_t *nat64_db_bib_entry_create (nat64_db_t * db, +nat64_db_bib_entry_t *nat64_db_bib_entry_create (u32 thread_index, + nat64_db_t * db, ip6_address_t * in_addr, ip4_address_t * out_addr, u16 in_port, u16 out_port, - u32 fib_index, - u8 proto, u8 is_static); + u32 fib_index, u8 proto, + u8 is_static); /** * @brief Free NAT64 BIB entry. * + * @param thread_index thread index. * @param db NAT64 DB. * @param bibe BIB entry. */ -void nat64_db_bib_entry_free (nat64_db_t * db, nat64_db_bib_entry_t * bibe); +void nat64_db_bib_entry_free (u32 thread_index, nat64_db_t * db, + nat64_db_bib_entry_t * bibe); /** * @brief Call back function when walking NAT64 BIB, non-zero @@ -201,7 +205,8 @@ typedef int (*nat64_db_bib_walk_fn_t) (nat64_db_bib_entry_t * bibe, * - 6 TCP BIB * - 17 UDP BIB * - 1/58 ICMP BIB - * - otherwise "unknown" protocol BIB + * + * u - otherwise "unknown" protocol BIB * @param fn The function to invoke on each entry visited. * @param ctx A context passed in the visit function. */ @@ -240,6 +245,7 @@ nat64_db_bib_entry_t *nat64_db_bib_entry_by_index (nat64_db_t * db, /** * @brief Create new NAT64 session table entry. * + * @param thread_index thread index. * @param db NAT64 DB. * @param bibe Corresponding BIB entry. * @param in_r_addr Inside IPv6 address of the remote host. @@ -248,7 +254,8 @@ nat64_db_bib_entry_t *nat64_db_bib_entry_by_index (nat64_db_t * db, * * @returns BIB entry on success, 0 otherwise. */ -nat64_db_st_entry_t *nat64_db_st_entry_create (nat64_db_t * db, +nat64_db_st_entry_t *nat64_db_st_entry_create (u32 thread_index, + nat64_db_t * db, nat64_db_bib_entry_t * bibe, ip6_address_t * in_r_addr, ip4_address_t * out_r_addr, @@ -257,10 +264,12 @@ nat64_db_st_entry_t *nat64_db_st_entry_create (nat64_db_t * db, /** * @brief Free NAT64 session table entry. * + * @param thread_index thread index. * @param db NAT64 DB. * @param ste Session table entry. */ -void nat64_db_st_entry_free (nat64_db_t * db, nat64_db_st_entry_t * ste); +void nat64_db_st_entry_free (u32 thread_index, nat64_db_t * db, + nat64_db_st_entry_t * ste); /** * @brief Find NAT64 session table entry. @@ -308,18 +317,21 @@ void nat64_db_st_walk (nat64_db_t * db, u8 proto, /** * @brief Free expired session entries in session tables. * + * @param thread_index thread index. * @param db NAT64 DB. * @param now Current time. */ -void nad64_db_st_free_expired (nat64_db_t * db, u32 now); +void nad64_db_st_free_expired (u32 thread_index, nat64_db_t * db, u32 now); /** * @brief Free sessions using specific outside address. * + * @param thread_index thread index. * @param db NAT64 DB. * @param out_addr Outside address to match. */ -void nat64_db_free_out_addr (nat64_db_t * db, ip4_address_t * out_addr); +void nat64_db_free_out_addr (u32 thread_index, nat64_db_t * db, + ip4_address_t * out_addr); /* * @brief Get ST entry index. diff --git a/src/plugins/nat/nat64_in2out.c b/src/plugins/nat/nat64_in2out.c index 660df093bf90..d30a9cbe519f 100644 --- a/src/plugins/nat/nat64_in2out.c +++ b/src/plugins/nat/nat64_in2out.c @@ -217,8 +217,9 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, - sport, out_port, fib_index, proto, 0); + nat64_db_bib_entry_create (ctx->thread_index, db, + &ip6->src_address, &out_addr, sport, + out_port, fib_index, proto, 0); if (!bibe) return -1; @@ -228,8 +229,8 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, - &daddr.ip4, dport); + nat64_db_st_entry_create (ctx->thread_index, db, bibe, + &ip6->dst_address, &daddr.ip4, dport); if (!ste) return -1; @@ -313,9 +314,10 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, - &out_addr, in_id, out_id, - fib_index, IP_PROTOCOL_ICMP, 0); + nat64_db_bib_entry_create (ctx->thread_index, db, + &ip6->src_address, &out_addr, + in_id, out_id, fib_index, + IP_PROTOCOL_ICMP, 0); if (!bibe) return -1; @@ -325,8 +327,8 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, - &daddr.ip4, 0); + nat64_db_st_entry_create (ctx->thread_index, db, bibe, + &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; @@ -556,9 +558,9 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, - &ctx.out_addr, 0, 0, fib_index, proto, - 0); + nat64_db_bib_entry_create (s_ctx->thread_index, db, + &ip6->src_address, &ctx.out_addr, + 0, 0, fib_index, proto, 0); if (!bibe) return -1; @@ -568,7 +570,8 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); + nat64_db_st_entry_create (s_ctx->thread_index, db, bibe, + &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; @@ -649,8 +652,9 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, - sport, out_port, fib_index, proto, 0); + nat64_db_bib_entry_create (thread_index, db, &ip6->src_address, + &out_addr, sport, out_port, fib_index, + proto, 0); if (!bibe) return -1; @@ -660,7 +664,7 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, + nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address, &daddr.ip4, dport); if (!ste) return -1; @@ -909,7 +913,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, + nat64_db_bib_entry_create (thread_index, db, &ip6->src_address, &ctx.out_addr, 0, 0, fib_index, proto, 0); if (!bibe) @@ -921,7 +925,8 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); + nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address, + &daddr.ip4, 0); if (!ste) return -1; @@ -1485,7 +1490,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) { if (nat_ip6_reass_add_fragment - (reass0, bi0, &fragments_to_drop)) + (thread_index, reass0, bi0, &fragments_to_drop)) { b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG]; next0 = NAT64_IN2OUT_NEXT_DROP; @@ -1528,7 +1533,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, } bibe0 = - nat64_db_bib_entry_create (db, + nat64_db_bib_entry_create (thread_index, db, &ip60->src_address, &out_addr0, udp0->src_port, out_port0, fib_index0, @@ -1546,7 +1551,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4, fib_index0); ste0 = - nat64_db_st_entry_create (db, bibe0, + nat64_db_st_entry_create (thread_index, db, bibe0, &ip60->dst_address, &daddr0.ip4, udp0->dst_port); if (!ste0) diff --git a/src/plugins/nat/nat64_out2in.c b/src/plugins/nat/nat64_out2in.c index 437052fc441e..6c9e216d96e5 100644 --- a/src/plugins/nat/nat64_out2in.c +++ b/src/plugins/nat/nat64_out2in.c @@ -164,7 +164,8 @@ nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, sport); + nat64_db_st_entry_create (ctx->thread_index, db, bibe, &ip6_saddr, + &saddr.ip4, sport); if (!ste) return -1; @@ -245,7 +246,8 @@ nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0); + nat64_db_st_entry_create (ctx->thread_index, db, + bibe, &ip6_saddr, &saddr.ip4, 0); if (!ste) return -1; @@ -408,7 +410,8 @@ nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, return -1; nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index); - ste = nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0); + ste = nat64_db_st_entry_create (ctx->thread_index, db, + bibe, &ip6_saddr, &saddr.ip4, 0); if (!ste) return -1; @@ -810,7 +813,8 @@ nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address, bibe0->fib_index); ste0 = - nat64_db_st_entry_create (db, bibe0, &ip6_saddr0, + nat64_db_st_entry_create (thread_index, + db, bibe0, &ip6_saddr0, &saddr0.ip4, udp0->src_port); if (!ste0) @@ -836,7 +840,7 @@ nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) { if (nat_ip4_reass_add_fragment - (reass0, bi0, &fragments_to_drop)) + (thread_index, reass0, bi0, &fragments_to_drop)) { b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG]; next0 = NAT64_OUT2IN_NEXT_DROP; diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index b1aa3243ab9a..49b5f36946b1 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -2324,7 +2324,7 @@ static void for (i = 0; i < count; i++) { - if ((rv = nat64_add_del_pool_addr (&this_addr, vrf_id, mp->is_add))) + if ((rv = nat64_add_del_pool_addr (0, &this_addr, vrf_id, mp->is_add))) goto send_reply; increment_v4_address (&this_addr); diff --git a/src/plugins/nat/nat_det.h b/src/plugins/nat/nat_det.h index 7878a4c4ed62..d2966356d8a8 100644 --- a/src/plugins/nat/nat_det.h +++ b/src/plugins/nat/nat_det.h @@ -147,8 +147,9 @@ snat_det_find_ses_by_in (snat_det_map_t * dm, ip4_address_t * in_addr, } always_inline snat_det_session_t * -snat_det_ses_create (snat_det_map_t * dm, ip4_address_t * in_addr, - u16 in_port, snat_det_out_key_t * out) +snat_det_ses_create (u32 thread_index, snat_det_map_t * dm, + ip4_address_t * in_addr, u16 in_port, + snat_det_out_key_t * out) { u32 user_offset; u16 i; @@ -171,7 +172,8 @@ snat_det_ses_create (snat_det_map_t * dm, ip4_address_t * in_addr, } } - snat_ipfix_logging_max_entries_per_user (SNAT_DET_SES_PER_USER, + snat_ipfix_logging_max_entries_per_user (thread_index, + SNAT_DET_SES_PER_USER, in_addr->as_u32); return 0; } diff --git a/src/plugins/nat/nat_det_in2out.c b/src/plugins/nat/nat_det_in2out.c index 1366c2fce690..e4aa0463a065 100644 --- a/src/plugins/nat/nat_det_in2out.c +++ b/src/plugins/nat/nat_det_in2out.c @@ -202,7 +202,8 @@ icmp_match_in2out_det (snat_main_t * sm, vlib_node_runtime_t * node, continue; ses0 = - snat_det_ses_create (dm0, &in_addr, echo0->identifier, &key0); + snat_det_ses_create (thread_index, dm0, + &in_addr, echo0->identifier, &key0); break; } if (PREDICT_FALSE (!ses0)) @@ -374,8 +375,8 @@ snat_det_in2out_node_fn (vlib_main_t * vm, continue; ses0 = - snat_det_ses_create (dm0, &ip0->src_address, tcp0->src, - &key0); + snat_det_ses_create (thread_index, dm0, &ip0->src_address, + tcp0->src, &key0); break; } if (PREDICT_FALSE (!ses0)) @@ -538,8 +539,8 @@ snat_det_in2out_node_fn (vlib_main_t * vm, continue; ses1 = - snat_det_ses_create (dm1, &ip1->src_address, tcp1->src, - &key1); + snat_det_ses_create (thread_index, dm1, &ip1->src_address, + tcp1->src, &key1); break; } if (PREDICT_FALSE (!ses1)) @@ -738,8 +739,8 @@ snat_det_in2out_node_fn (vlib_main_t * vm, continue; ses0 = - snat_det_ses_create (dm0, &ip0->src_address, tcp0->src, - &key0); + snat_det_ses_create (thread_index, dm0, &ip0->src_address, + tcp0->src, &key0); break; } if (PREDICT_FALSE (!ses0)) diff --git a/src/plugins/nat/nat_ipfix_logging.c b/src/plugins/nat/nat_ipfix_logging.c index 042239fee85f..4bb96bcf4a0a 100644 --- a/src/plugins/nat/nat_ipfix_logging.c +++ b/src/plugins/nat/nat_ipfix_logging.c @@ -19,7 +19,9 @@ #include #include #include +#include +vlib_node_registration_t snat_ipfix_flush_node; snat_ipfix_logging_main_t snat_ipfix_logging_main; #define NAT44_SESSION_CREATE_LEN 26 @@ -111,11 +113,17 @@ typedef struct u32 vrf_id; } nat_ipfix_logging_nat64_bib_args_t; -#define skip_if_disabled() \ -do { \ - snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; \ - if (PREDICT_TRUE (!silm->enabled)) \ - return; \ +#define skip_if_disabled() \ +do { \ + snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; \ + if (PREDICT_TRUE (!clib_atomic_fetch_or(&silm->enabled, 0))) \ + return; \ +} while (0) + +#define update_template_id(old_id, new_id) \ +do { \ + u16 template_id = clib_atomic_fetch_or(old_id, 0); \ + clib_atomic_cmp_and_swap(old_id, template_id, new_id); \ } while (0) /** @@ -151,56 +159,79 @@ snat_template_rewrite (flow_report_main_t * frm, ip4_ipfix_template_packet_t *tp; u32 field_count = 0; flow_report_stream_t *stream; + u32 stream_index; stream = &frm->streams[fr->stream_index]; - silm->stream_index = fr->stream_index; + + stream_index = clib_atomic_fetch_or(&silm->stream_index, 0); + clib_atomic_cmp_and_swap (&silm->stream_index, + stream_index, fr->stream_index); if (event == NAT_ADDRESSES_EXHAUTED) { field_count = NAT_ADDRESSES_EXHAUTED_FIELD_COUNT; - silm->addr_exhausted_template_id = fr->template_id; + + update_template_id(&silm->addr_exhausted_template_id, + fr->template_id); } else if (event == NAT44_SESSION_CREATE) { field_count = NAT44_SESSION_CREATE_FIELD_COUNT; - silm->nat44_session_template_id = fr->template_id; + + update_template_id(&silm->nat44_session_template_id, + fr->template_id); } else if (event == NAT64_BIB_CREATE) { field_count = NAT64_BIB_FIELD_COUNT; - silm->nat64_bib_template_id = fr->template_id; + + update_template_id(&silm->nat64_bib_template_id, + fr->template_id); } else if (event == NAT64_SESSION_CREATE) { field_count = NAT64_SES_FIELD_COUNT; - silm->nat64_ses_template_id = fr->template_id; + + update_template_id(&silm->nat64_ses_template_id, + fr->template_id); } else if (event == QUOTA_EXCEEDED) { if (quota_event == MAX_ENTRIES_PER_USER) { field_count = MAX_ENTRIES_PER_USER_FIELD_COUNT; - silm->max_entries_per_user_template_id = fr->template_id; + + update_template_id(&silm->max_entries_per_user_template_id, + fr->template_id); + } else if (quota_event == MAX_SESSION_ENTRIES) { field_count = MAX_SESSIONS_FIELD_COUNT; - silm->max_sessions_template_id = fr->template_id; + + update_template_id(&silm->max_sessions_template_id, + fr->template_id); } else if (quota_event == MAX_BIB_ENTRIES) { field_count = MAX_BIBS_FIELD_COUNT; - silm->max_bibs_template_id = fr->template_id; + + update_template_id(&silm->max_bibs_template_id, + fr->template_id); } else if (quota_event == MAX_FRAGMENTS_PENDING_REASSEMBLY) { field_count = MAX_FRAGMENTS_FIELD_COUNT; - silm->max_frags_ip4_template_id = fr->template_id; + + update_template_id(&silm->max_frags_ip4_template_id, + fr->template_id); } else if (quota_event == MAX_FRAGMENTS_PENDING_REASSEMBLY_IP6) { field_count = MAX_FRAGMENTS_FIELD_COUNT; - silm->max_frags_ip6_template_id = fr->template_id; + + update_template_id(&silm->max_frags_ip6_template_id, + fr->template_id); } } @@ -530,10 +561,13 @@ snat_ipfix_header_create (flow_report_main_t * frm, ip4_ipfix_template_packet_t *tp; ipfix_message_header_t *h = 0; ipfix_set_header_t *s = 0; + u32 sequence_number; + u32 stream_index; ip4_header_t *ip; udp_header_t *udp; - - stream = &frm->streams[silm->stream_index]; + + stream_index = clib_atomic_fetch_or(&silm->stream_index, 0); + stream = &frm->streams[stream_index]; b0->current_data = 0; b0->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h) + @@ -561,7 +595,9 @@ snat_ipfix_header_create (flow_report_main_t * frm, (((f64) frm->unix_time_0) + (vlib_time_now (frm->vlib_main) - frm->vlib_time_0))); - h->sequence_number = clib_host_to_net_u32 (stream->sequence_number++); + + sequence_number = clib_atomic_fetch_add (&stream->sequence_number, 1); + h->sequence_number = clib_host_to_net_u32 (sequence_number); h->domain_id = clib_host_to_net_u32 (stream->domain_id); *offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp); @@ -608,11 +644,13 @@ snat_ipfix_send (flow_report_main_t * frm, } static void -snat_ipfix_logging_nat44_ses (u8 nat_event, u32 src_ip, u32 nat_src_ip, - snat_protocol_t snat_proto, u16 src_port, - u16 nat_src_port, u32 vrf_id, int do_flush) +snat_ipfix_logging_nat44_ses (u32 thread_index, u8 nat_event, u32 src_ip, + u32 nat_src_ip, snat_protocol_t snat_proto, + u16 src_port, u16 nat_src_port, u32 vrf_id, + int do_flush) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + snat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index]; flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; vlib_buffer_t *b0 = 0; @@ -620,18 +658,15 @@ snat_ipfix_logging_nat44_ses (u8 nat_event, u32 src_ip, u32 nat_src_ip, u32 offset; vlib_main_t *vm = frm->vlib_main; u64 now; - vlib_buffer_free_list_t *fl; u8 proto = ~0; - - if (!silm->enabled) - return; + u16 template_id; proto = snat_proto_to_ip_proto (snat_proto); now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3); now += silm->milisecond_time_0; - b0 = silm->nat44_session_buffer; + b0 = sitd->nat44_session_buffer; if (PREDICT_FALSE (b0 == 0)) { @@ -644,25 +679,22 @@ snat_ipfix_logging_nat44_ses (u8 nat_event, u32 src_ip, u32 nat_src_ip, return; } - b0 = silm->nat44_session_buffer = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); + b0 = sitd->nat44_session_buffer = vlib_get_buffer (vm, bi0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; } else { bi0 = vlib_get_buffer_index (vm, b0); - offset = silm->nat44_session_next_record_offset; + offset = sitd->nat44_session_next_record_offset; } - f = silm->nat44_session_frame; + f = sitd->nat44_session_frame; if (PREDICT_FALSE (f == 0)) { u32 *to_next; f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - silm->nat44_session_frame = f; + sitd->nat44_session_frame = f; to_next = vlib_frame_vector_args (f); to_next[0] = bi0; f->n_vectors = 1; @@ -704,18 +736,22 @@ snat_ipfix_logging_nat44_ses (u8 nat_event, u32 src_ip, u32 nat_src_ip, if (PREDICT_FALSE (do_flush || (offset + NAT44_SESSION_CREATE_LEN) > frm->path_mtu)) { - snat_ipfix_send (frm, f, b0, silm->nat44_session_template_id); - silm->nat44_session_frame = 0; - silm->nat44_session_buffer = 0; + template_id = clib_atomic_fetch_or ( + &silm->nat44_session_template_id, + 0); + snat_ipfix_send (frm, f, b0, template_id); + sitd->nat44_session_frame = 0; + sitd->nat44_session_buffer = 0; offset = 0; } - silm->nat44_session_next_record_offset = offset; + sitd->nat44_session_next_record_offset = offset; } static void -snat_ipfix_logging_addr_exhausted (u32 pool_id, int do_flush) +snat_ipfix_logging_addr_exhausted (u32 thread_index, u32 pool_id, int do_flush) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + snat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index]; flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; vlib_buffer_t *b0 = 0; @@ -723,16 +759,13 @@ snat_ipfix_logging_addr_exhausted (u32 pool_id, int do_flush) u32 offset; vlib_main_t *vm = frm->vlib_main; u64 now; - vlib_buffer_free_list_t *fl; u8 nat_event = NAT_ADDRESSES_EXHAUTED; - - if (!silm->enabled) - return; + u16 template_id; now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3); now += silm->milisecond_time_0; - b0 = silm->addr_exhausted_buffer; + b0 = sitd->addr_exhausted_buffer; if (PREDICT_FALSE (b0 == 0)) { @@ -745,25 +778,22 @@ snat_ipfix_logging_addr_exhausted (u32 pool_id, int do_flush) return; } - b0 = silm->addr_exhausted_buffer = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); + b0 = sitd->addr_exhausted_buffer = vlib_get_buffer (vm, bi0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; } else { bi0 = vlib_get_buffer_index (vm, b0); - offset = silm->addr_exhausted_next_record_offset; + offset = sitd->addr_exhausted_next_record_offset; } - f = silm->addr_exhausted_frame; + f = sitd->addr_exhausted_frame; if (PREDICT_FALSE (f == 0)) { u32 *to_next; f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - silm->addr_exhausted_frame = f; + sitd->addr_exhausted_frame = f; to_next = vlib_frame_vector_args (f); to_next[0] = bi0; f->n_vectors = 1; @@ -790,18 +820,23 @@ snat_ipfix_logging_addr_exhausted (u32 pool_id, int do_flush) if (PREDICT_FALSE (do_flush || (offset + NAT_ADDRESSES_EXHAUTED_LEN) > frm->path_mtu)) { - snat_ipfix_send (frm, f, b0, silm->addr_exhausted_template_id); - silm->addr_exhausted_frame = 0; - silm->addr_exhausted_buffer = 0; + template_id = clib_atomic_fetch_or ( + &silm->addr_exhausted_template_id, + 0); + snat_ipfix_send (frm, f, b0, template_id); + sitd->addr_exhausted_frame = 0; + sitd->addr_exhausted_buffer = 0; offset = 0; } - silm->addr_exhausted_next_record_offset = offset; + sitd->addr_exhausted_next_record_offset = offset; } static void -snat_ipfix_logging_max_entries_per_usr (u32 limit, u32 src_ip, int do_flush) +snat_ipfix_logging_max_entries_per_usr (u32 thread_index, + u32 limit, u32 src_ip, int do_flush) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + snat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index]; flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; vlib_buffer_t *b0 = 0; @@ -809,17 +844,14 @@ snat_ipfix_logging_max_entries_per_usr (u32 limit, u32 src_ip, int do_flush) u32 offset; vlib_main_t *vm = frm->vlib_main; u64 now; - vlib_buffer_free_list_t *fl; u8 nat_event = QUOTA_EXCEEDED; u32 quota_event = MAX_ENTRIES_PER_USER; - - if (!silm->enabled) - return; + u16 template_id; now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3); now += silm->milisecond_time_0; - b0 = silm->max_entries_per_user_buffer; + b0 = sitd->max_entries_per_user_buffer; if (PREDICT_FALSE (b0 == 0)) { @@ -832,25 +864,22 @@ snat_ipfix_logging_max_entries_per_usr (u32 limit, u32 src_ip, int do_flush) return; } - b0 = silm->max_entries_per_user_buffer = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); + b0 = sitd->max_entries_per_user_buffer = vlib_get_buffer (vm, bi0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; } else { bi0 = vlib_get_buffer_index (vm, b0); - offset = silm->max_entries_per_user_next_record_offset; + offset = sitd->max_entries_per_user_next_record_offset; } - f = silm->max_entries_per_user_frame; + f = sitd->max_entries_per_user_frame; if (PREDICT_FALSE (f == 0)) { u32 *to_next; f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - silm->max_entries_per_user_frame = f; + sitd->max_entries_per_user_frame = f; to_next = vlib_frame_vector_args (f); to_next[0] = bi0; f->n_vectors = 1; @@ -883,18 +912,22 @@ snat_ipfix_logging_max_entries_per_usr (u32 limit, u32 src_ip, int do_flush) if (PREDICT_FALSE (do_flush || (offset + MAX_ENTRIES_PER_USER_LEN) > frm->path_mtu)) { - snat_ipfix_send (frm, f, b0, silm->max_entries_per_user_template_id); - silm->max_entries_per_user_frame = 0; - silm->max_entries_per_user_buffer = 0; + template_id = clib_atomic_fetch_or ( + &silm->max_entries_per_user_template_id, + 0); + snat_ipfix_send (frm, f, b0, template_id); + sitd->max_entries_per_user_frame = 0; + sitd->max_entries_per_user_buffer = 0; offset = 0; } - silm->max_entries_per_user_next_record_offset = offset; + sitd->max_entries_per_user_next_record_offset = offset; } static void -nat_ipfix_logging_max_ses (u32 limit, int do_flush) +nat_ipfix_logging_max_ses (u32 thread_index, u32 limit, int do_flush) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + snat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index]; flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; vlib_buffer_t *b0 = 0; @@ -902,17 +935,14 @@ nat_ipfix_logging_max_ses (u32 limit, int do_flush) u32 offset; vlib_main_t *vm = frm->vlib_main; u64 now; - vlib_buffer_free_list_t *fl; u8 nat_event = QUOTA_EXCEEDED; u32 quota_event = MAX_SESSION_ENTRIES; - - if (!silm->enabled) - return; + u16 template_id; now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3); now += silm->milisecond_time_0; - b0 = silm->max_sessions_buffer; + b0 = sitd->max_sessions_buffer; if (PREDICT_FALSE (b0 == 0)) { @@ -925,25 +955,22 @@ nat_ipfix_logging_max_ses (u32 limit, int do_flush) return; } - b0 = silm->max_sessions_buffer = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); + b0 = sitd->max_sessions_buffer = vlib_get_buffer (vm, bi0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; } else { bi0 = vlib_get_buffer_index (vm, b0); - offset = silm->max_sessions_next_record_offset; + offset = sitd->max_sessions_next_record_offset; } - f = silm->max_sessions_frame; + f = sitd->max_sessions_frame; if (PREDICT_FALSE (f == 0)) { u32 *to_next; f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - silm->max_sessions_frame = f; + sitd->max_sessions_frame = f; to_next = vlib_frame_vector_args (f); to_next[0] = bi0; f->n_vectors = 1; @@ -973,18 +1000,22 @@ nat_ipfix_logging_max_ses (u32 limit, int do_flush) if (PREDICT_FALSE (do_flush || (offset + MAX_SESSIONS_LEN) > frm->path_mtu)) { - snat_ipfix_send (frm, f, b0, silm->max_sessions_template_id); - silm->max_sessions_frame = 0; - silm->max_sessions_buffer = 0; + template_id = clib_atomic_fetch_or ( + &silm->max_sessions_template_id, + 0); + snat_ipfix_send (frm, f, b0, template_id); + sitd->max_sessions_frame = 0; + sitd->max_sessions_buffer = 0; offset = 0; } - silm->max_sessions_next_record_offset = offset; + sitd->max_sessions_next_record_offset = offset; } static void -nat_ipfix_logging_max_bib (u32 limit, int do_flush) +nat_ipfix_logging_max_bib (u32 thread_index, u32 limit, int do_flush) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + snat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index]; flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; vlib_buffer_t *b0 = 0; @@ -992,17 +1023,14 @@ nat_ipfix_logging_max_bib (u32 limit, int do_flush) u32 offset; vlib_main_t *vm = frm->vlib_main; u64 now; - vlib_buffer_free_list_t *fl; u8 nat_event = QUOTA_EXCEEDED; u32 quota_event = MAX_BIB_ENTRIES; - - if (!silm->enabled) - return; + u16 template_id; now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3); now += silm->milisecond_time_0; - b0 = silm->max_bibs_buffer; + b0 = sitd->max_bibs_buffer; if (PREDICT_FALSE (b0 == 0)) { @@ -1015,25 +1043,22 @@ nat_ipfix_logging_max_bib (u32 limit, int do_flush) return; } - b0 = silm->max_bibs_buffer = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); + b0 = sitd->max_bibs_buffer = vlib_get_buffer (vm, bi0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; } else { bi0 = vlib_get_buffer_index (vm, b0); - offset = silm->max_bibs_next_record_offset; + offset = sitd->max_bibs_next_record_offset; } - f = silm->max_bibs_frame; + f = sitd->max_bibs_frame; if (PREDICT_FALSE (f == 0)) { u32 *to_next; f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - silm->max_bibs_frame = f; + sitd->max_bibs_frame = f; to_next = vlib_frame_vector_args (f); to_next[0] = bi0; f->n_vectors = 1; @@ -1063,18 +1088,23 @@ nat_ipfix_logging_max_bib (u32 limit, int do_flush) if (PREDICT_FALSE (do_flush || (offset + MAX_BIBS_LEN) > frm->path_mtu)) { - snat_ipfix_send (frm, f, b0, silm->max_bibs_template_id); - silm->max_bibs_frame = 0; - silm->max_bibs_buffer = 0; + template_id = clib_atomic_fetch_or ( + &silm->max_bibs_template_id, + 0); + snat_ipfix_send (frm, f, b0, template_id); + sitd->max_bibs_frame = 0; + sitd->max_bibs_buffer = 0; offset = 0; } - silm->max_bibs_next_record_offset = offset; + sitd->max_bibs_next_record_offset = offset; } static void -nat_ipfix_logging_max_frag_ip4 (u32 limit, u32 src, int do_flush) +nat_ipfix_logging_max_frag_ip4 (u32 thread_index, + u32 limit, u32 src, int do_flush) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + snat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index]; flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; vlib_buffer_t *b0 = 0; @@ -1082,17 +1112,14 @@ nat_ipfix_logging_max_frag_ip4 (u32 limit, u32 src, int do_flush) u32 offset; vlib_main_t *vm = frm->vlib_main; u64 now; - vlib_buffer_free_list_t *fl; u8 nat_event = QUOTA_EXCEEDED; u32 quota_event = MAX_FRAGMENTS_PENDING_REASSEMBLY; - - if (!silm->enabled) - return; + u16 template_id; now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3); now += silm->milisecond_time_0; - b0 = silm->max_frags_ip4_buffer; + b0 = sitd->max_frags_ip4_buffer; if (PREDICT_FALSE (b0 == 0)) { @@ -1105,25 +1132,22 @@ nat_ipfix_logging_max_frag_ip4 (u32 limit, u32 src, int do_flush) return; } - b0 = silm->max_frags_ip4_buffer = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); + b0 = sitd->max_frags_ip4_buffer = vlib_get_buffer (vm, bi0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; } else { bi0 = vlib_get_buffer_index (vm, b0); - offset = silm->max_frags_ip4_next_record_offset; + offset = sitd->max_frags_ip4_next_record_offset; } - f = silm->max_frags_ip4_frame; + f = sitd->max_frags_ip4_frame; if (PREDICT_FALSE (f == 0)) { u32 *to_next; f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - silm->max_frags_ip4_frame = f; + sitd->max_frags_ip4_frame = f; to_next = vlib_frame_vector_args (f); to_next[0] = bi0; f->n_vectors = 1; @@ -1156,18 +1180,23 @@ nat_ipfix_logging_max_frag_ip4 (u32 limit, u32 src, int do_flush) if (PREDICT_FALSE (do_flush || (offset + MAX_BIBS_LEN) > frm->path_mtu)) { - snat_ipfix_send (frm, f, b0, silm->max_frags_ip4_template_id); - silm->max_frags_ip4_frame = 0; - silm->max_frags_ip4_buffer = 0; + template_id = clib_atomic_fetch_or ( + &silm->max_frags_ip4_template_id, + 0); + snat_ipfix_send (frm, f, b0, template_id); + sitd->max_frags_ip4_frame = 0; + sitd->max_frags_ip4_buffer = 0; offset = 0; } - silm->max_frags_ip4_next_record_offset = offset; + sitd->max_frags_ip4_next_record_offset = offset; } static void -nat_ipfix_logging_max_frag_ip6 (u32 limit, ip6_address_t * src, int do_flush) +nat_ipfix_logging_max_frag_ip6 (u32 thread_index, + u32 limit, ip6_address_t * src, int do_flush) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + snat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index]; flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; vlib_buffer_t *b0 = 0; @@ -1175,17 +1204,14 @@ nat_ipfix_logging_max_frag_ip6 (u32 limit, ip6_address_t * src, int do_flush) u32 offset; vlib_main_t *vm = frm->vlib_main; u64 now; - vlib_buffer_free_list_t *fl; u8 nat_event = QUOTA_EXCEEDED; u32 quota_event = MAX_FRAGMENTS_PENDING_REASSEMBLY; - - if (!silm->enabled) - return; + u16 template_id; now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3); now += silm->milisecond_time_0; - b0 = silm->max_frags_ip6_buffer; + b0 = sitd->max_frags_ip6_buffer; if (PREDICT_FALSE (b0 == 0)) { @@ -1198,25 +1224,22 @@ nat_ipfix_logging_max_frag_ip6 (u32 limit, ip6_address_t * src, int do_flush) return; } - b0 = silm->max_frags_ip6_buffer = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); + b0 = sitd->max_frags_ip6_buffer = vlib_get_buffer (vm, bi0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; } else { bi0 = vlib_get_buffer_index (vm, b0); - offset = silm->max_frags_ip6_next_record_offset; + offset = sitd->max_frags_ip6_next_record_offset; } - f = silm->max_frags_ip6_frame; + f = sitd->max_frags_ip6_frame; if (PREDICT_FALSE (f == 0)) { u32 *to_next; f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - silm->max_frags_ip6_frame = f; + sitd->max_frags_ip6_frame = f; to_next = vlib_frame_vector_args (f); to_next[0] = bi0; f->n_vectors = 1; @@ -1249,20 +1272,25 @@ nat_ipfix_logging_max_frag_ip6 (u32 limit, ip6_address_t * src, int do_flush) if (PREDICT_FALSE (do_flush || (offset + MAX_BIBS_LEN) > frm->path_mtu)) { - snat_ipfix_send (frm, f, b0, silm->max_frags_ip6_template_id); - silm->max_frags_ip6_frame = 0; - silm->max_frags_ip6_buffer = 0; + template_id = clib_atomic_fetch_or ( + &silm->max_frags_ip6_template_id, + 0); + snat_ipfix_send (frm, f, b0, template_id); + sitd->max_frags_ip6_frame = 0; + sitd->max_frags_ip6_buffer = 0; offset = 0; } - silm->max_frags_ip6_next_record_offset = offset; + sitd->max_frags_ip6_next_record_offset = offset; } static void -nat_ipfix_logging_nat64_bibe (u8 nat_event, ip6_address_t * src_ip, - u32 nat_src_ip, u8 proto, u16 src_port, - u16 nat_src_port, u32 vrf_id, int do_flush) +nat_ipfix_logging_nat64_bibe (u32 thread_index, u8 nat_event, + ip6_address_t * src_ip, u32 nat_src_ip, + u8 proto, u16 src_port, u16 nat_src_port, + u32 vrf_id, int do_flush) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + snat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index]; flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; vlib_buffer_t *b0 = 0; @@ -1270,15 +1298,12 @@ nat_ipfix_logging_nat64_bibe (u8 nat_event, ip6_address_t * src_ip, u32 offset; vlib_main_t *vm = frm->vlib_main; u64 now; - vlib_buffer_free_list_t *fl; - - if (!silm->enabled) - return; + u16 template_id; now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3); now += silm->milisecond_time_0; - b0 = silm->nat64_bib_buffer; + b0 = sitd->nat64_bib_buffer; if (PREDICT_FALSE (b0 == 0)) { @@ -1291,25 +1316,22 @@ nat_ipfix_logging_nat64_bibe (u8 nat_event, ip6_address_t * src_ip, return; } - b0 = silm->nat64_bib_buffer = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); + b0 = sitd->nat64_bib_buffer = vlib_get_buffer (vm, bi0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; } else { bi0 = vlib_get_buffer_index (vm, b0); - offset = silm->nat64_bib_next_record_offset; + offset = sitd->nat64_bib_next_record_offset; } - f = silm->nat64_bib_frame; + f = sitd->nat64_bib_frame; if (PREDICT_FALSE (f == 0)) { u32 *to_next; f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - silm->nat64_bib_frame = f; + sitd->nat64_bib_frame = f; to_next = vlib_frame_vector_args (f); to_next[0] = bi0; f->n_vectors = 1; @@ -1351,22 +1373,27 @@ nat_ipfix_logging_nat64_bibe (u8 nat_event, ip6_address_t * src_ip, if (PREDICT_FALSE (do_flush || (offset + NAT64_BIB_LEN) > frm->path_mtu)) { - snat_ipfix_send (frm, f, b0, silm->nat64_bib_template_id); - silm->nat64_bib_frame = 0; - silm->nat64_bib_buffer = 0; + template_id = clib_atomic_fetch_or ( + &silm->nat64_bib_template_id, + 0); + snat_ipfix_send (frm, f, b0, template_id); + sitd->nat64_bib_frame = 0; + sitd->nat64_bib_buffer = 0; offset = 0; } - silm->nat64_bib_next_record_offset = offset; + sitd->nat64_bib_next_record_offset = offset; } static void -nat_ipfix_logging_nat64_ses (u8 nat_event, ip6_address_t * src_ip, - u32 nat_src_ip, u8 proto, u16 src_port, - u16 nat_src_port, ip6_address_t * dst_ip, - u32 nat_dst_ip, u16 dst_port, u16 nat_dst_port, +nat_ipfix_logging_nat64_ses (u32 thread_index, u8 nat_event, + ip6_address_t * src_ip, u32 nat_src_ip, + u8 proto, u16 src_port, u16 nat_src_port, + ip6_address_t * dst_ip, u32 nat_dst_ip, + u16 dst_port, u16 nat_dst_port, u32 vrf_id, int do_flush) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + snat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index]; flow_report_main_t *frm = &flow_report_main; vlib_frame_t *f; vlib_buffer_t *b0 = 0; @@ -1374,15 +1401,12 @@ nat_ipfix_logging_nat64_ses (u8 nat_event, ip6_address_t * src_ip, u32 offset; vlib_main_t *vm = frm->vlib_main; u64 now; - vlib_buffer_free_list_t *fl; - - if (!silm->enabled) - return; + u16 template_id; now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3); now += silm->milisecond_time_0; - b0 = silm->nat64_ses_buffer; + b0 = sitd->nat64_ses_buffer; if (PREDICT_FALSE (b0 == 0)) { @@ -1395,25 +1419,22 @@ nat_ipfix_logging_nat64_ses (u8 nat_event, ip6_address_t * src_ip, return; } - b0 = silm->nat64_ses_buffer = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); + b0 = sitd->nat64_ses_buffer = vlib_get_buffer (vm, bi0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; } else { bi0 = vlib_get_buffer_index (vm, b0); - offset = silm->nat64_ses_next_record_offset; + offset = sitd->nat64_ses_next_record_offset; } - f = silm->nat64_ses_frame; + f = sitd->nat64_ses_frame; if (PREDICT_FALSE (f == 0)) { u32 *to_next; f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); - silm->nat64_ses_frame = f; + sitd->nat64_ses_frame = f; to_next = vlib_frame_vector_args (f); to_next[0] = bi0; f->n_vectors = 1; @@ -1467,25 +1488,73 @@ nat_ipfix_logging_nat64_ses (u8 nat_event, ip6_address_t * src_ip, if (PREDICT_FALSE (do_flush || (offset + NAT64_SES_LEN) > frm->path_mtu)) { - snat_ipfix_send (frm, f, b0, silm->nat64_ses_template_id); - silm->nat64_ses_frame = 0; - silm->nat64_ses_buffer = 0; + template_id = clib_atomic_fetch_or ( + &silm->nat64_ses_template_id, + 0); + snat_ipfix_send (frm, f, b0, template_id); + sitd->nat64_ses_frame = 0; + sitd->nat64_ses_buffer = 0; offset = 0; } - silm->nat64_ses_next_record_offset = offset; + sitd->nat64_ses_next_record_offset = offset; } -static void -snat_ipfix_logging_nat44_ses_rpc_cb (snat_ipfix_logging_nat44_ses_args_t * a) +void +snat_ipfix_flush (u32 thread_index) { - snat_ipfix_logging_nat44_ses (a->nat_event, a->src_ip, a->nat_src_ip, - a->snat_proto, a->src_port, a->nat_src_port, - a->vrf_id, 0); + int do_flush = 1; + + snat_ipfix_logging_nat44_ses (thread_index, + 0, 0, 0, 0, 0, 0, 0, do_flush); + snat_ipfix_logging_addr_exhausted (thread_index, 0, do_flush); + snat_ipfix_logging_max_entries_per_usr (thread_index, 0, 0, do_flush); + nat_ipfix_logging_max_ses (thread_index, 0, do_flush); + nat_ipfix_logging_max_bib (thread_index, 0, do_flush); + nat_ipfix_logging_max_frag_ip4 (thread_index, 0, 0, do_flush); + nat_ipfix_logging_max_frag_ip6 (thread_index, 0, 0, do_flush); + nat_ipfix_logging_nat64_bibe (thread_index, + 0, 0, 0, 0, 0, 0, 0, do_flush); + nat_ipfix_logging_nat64_ses (thread_index, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, do_flush); +} + +void +snat_ipfix_flush_from_main (void) +{ + snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + vlib_main_t *worker_vm; + int i; + + if (PREDICT_TRUE (!clib_atomic_fetch_or(&silm->enabled, 0))) + return; + + if (PREDICT_FALSE (!silm->worker_vms)) + { + for (i = 1; i < vec_len (vlib_mains); i++) + { + worker_vm = vlib_mains[i]; + if (worker_vm) + vec_add1 (silm->worker_vms, worker_vm); + } + } + + /* Trigger flush for each worker thread */ + for (i = 1; i < vec_len (silm->worker_vms); i++) + { + worker_vm = silm->worker_vms[i]; + if (worker_vm) + vlib_node_set_interrupt_pending (worker_vm, + snat_ipfix_flush_node.index); + } + + /* Finally flush main thread */ + snat_ipfix_flush (0); } /** * @brief Generate NAT44 session create event * + * @param thread_index thread index * @param src_ip source IPv4 address * @param nat_src_ip transaltes source IPv4 address * @param snat_proto NAT transport protocol @@ -1494,31 +1563,24 @@ snat_ipfix_logging_nat44_ses_rpc_cb (snat_ipfix_logging_nat44_ses_args_t * a) * @param vrf_id VRF ID */ void -snat_ipfix_logging_nat44_ses_create (u32 src_ip, +snat_ipfix_logging_nat44_ses_create (u32 thread_index, + u32 src_ip, u32 nat_src_ip, snat_protocol_t snat_proto, u16 src_port, u16 nat_src_port, u32 vrf_id) { - snat_ipfix_logging_nat44_ses_args_t a; - skip_if_disabled (); - a.nat_event = NAT44_SESSION_CREATE; - a.src_ip = src_ip; - a.nat_src_ip = nat_src_ip; - a.snat_proto = snat_proto; - a.src_port = src_port; - a.nat_src_port = nat_src_port; - a.vrf_id = vrf_id; - - vl_api_rpc_call_main_thread (snat_ipfix_logging_nat44_ses_rpc_cb, - (u8 *) & a, sizeof (a)); + snat_ipfix_logging_nat44_ses (thread_index, NAT44_SESSION_CREATE, src_ip, + nat_src_ip, snat_proto, src_port, nat_src_port, + vrf_id, 0); } /** * @brief Generate NAT44 session delete event * + * @param thread_index thread index * @param src_ip source IPv4 address * @param nat_src_ip transaltes source IPv4 address * @param snat_proto NAT transport protocol @@ -1527,274 +1589,131 @@ snat_ipfix_logging_nat44_ses_create (u32 src_ip, * @param vrf_id VRF ID */ void -snat_ipfix_logging_nat44_ses_delete (u32 src_ip, +snat_ipfix_logging_nat44_ses_delete (u32 thread_index, + u32 src_ip, u32 nat_src_ip, snat_protocol_t snat_proto, u16 src_port, u16 nat_src_port, u32 vrf_id) { - snat_ipfix_logging_nat44_ses_args_t a; - skip_if_disabled (); - a.nat_event = NAT44_SESSION_DELETE; - a.src_ip = src_ip; - a.nat_src_ip = nat_src_ip; - a.snat_proto = snat_proto; - a.src_port = src_port; - a.nat_src_port = nat_src_port; - a.vrf_id = vrf_id; - - vl_api_rpc_call_main_thread (snat_ipfix_logging_nat44_ses_rpc_cb, - (u8 *) & a, sizeof (a)); -} - -vlib_frame_t * -snat_data_callback_nat44_session (flow_report_main_t * frm, - flow_report_t * fr, - vlib_frame_t * f, - u32 * to_next, u32 node_index) -{ - snat_ipfix_logging_nat44_ses (0, 0, 0, 0, 0, 0, 0, 1); - return f; -} - -static void - snat_ipfix_logging_addr_exhausted_rpc_cb - (snat_ipfix_logging_addr_exhausted_args_t * a) -{ - snat_ipfix_logging_addr_exhausted (a->pool_id, 0); + snat_ipfix_logging_nat44_ses (thread_index, NAT44_SESSION_DELETE, src_ip, + nat_src_ip, snat_proto, src_port, nat_src_port, + vrf_id, 0); } /** * @brief Generate NAT addresses exhausted event * + * @param thread_index thread index * @param pool_id NAT pool ID */ void -snat_ipfix_logging_addresses_exhausted (u32 pool_id) +snat_ipfix_logging_addresses_exhausted (u32 thread_index, u32 pool_id) { //TODO: This event SHOULD be rate limited - snat_ipfix_logging_addr_exhausted_args_t a; - skip_if_disabled (); - a.pool_id = pool_id; - - vl_api_rpc_call_main_thread (snat_ipfix_logging_addr_exhausted_rpc_cb, - (u8 *) & a, sizeof (a)); -} - -vlib_frame_t * -snat_data_callback_addr_exhausted (flow_report_main_t * frm, - flow_report_t * fr, - vlib_frame_t * f, - u32 * to_next, u32 node_index) -{ - snat_ipfix_logging_addr_exhausted (0, 1); - return f; -} - -static void - snat_ipfix_logging_max_entries_per_usr_rpc_cb - (snat_ipfix_logging_max_entries_per_user_args_t * a) -{ - snat_ipfix_logging_max_entries_per_usr (a->limit, a->src_ip, 0); + snat_ipfix_logging_addr_exhausted (thread_index, pool_id, 0); } /** * @brief Generate maximum entries per user exceeded event * + * @param thread_index thread index * @param limit maximum NAT entries that can be created per user * @param src_ip source IPv4 address */ void -snat_ipfix_logging_max_entries_per_user (u32 limit, u32 src_ip) +snat_ipfix_logging_max_entries_per_user (u32 thread_index, u32 limit, u32 src_ip) { //TODO: This event SHOULD be rate limited - snat_ipfix_logging_max_entries_per_user_args_t a; - skip_if_disabled (); - a.limit = limit; - a.src_ip = src_ip; - - vl_api_rpc_call_main_thread (snat_ipfix_logging_max_entries_per_usr_rpc_cb, - (u8 *) & a, sizeof (a)); + snat_ipfix_logging_max_entries_per_usr (thread_index, limit, src_ip, 0); } vlib_frame_t * -snat_data_callback_max_entries_per_usr (flow_report_main_t * frm, +deterministic_nat_data_callback +(flow_report_main_t * frm, flow_report_t * fr, vlib_frame_t * f, u32 * to_next, u32 node_index) { - snat_ipfix_logging_max_entries_per_usr (0, 0, 1); - return f; -} + snat_ipfix_flush_from_main(); -static void -nat_ipfix_logging_max_ses_rpc_cb (nat_ipfix_logging_max_sessions_args_t * a) -{ - nat_ipfix_logging_max_ses (a->limit, 0); + return f; } /** * @brief Generate maximum session entries exceeded event * + * @param thread_index thread index * @param limit configured limit */ void -nat_ipfix_logging_max_sessions (u32 limit) +nat_ipfix_logging_max_sessions (u32 thread_index, u32 limit) { //TODO: This event SHOULD be rate limited - nat_ipfix_logging_max_sessions_args_t a; - skip_if_disabled (); - a.limit = limit; - - vl_api_rpc_call_main_thread (nat_ipfix_logging_max_ses_rpc_cb, - (u8 *) & a, sizeof (a)); -} - -vlib_frame_t * -nat_data_callback_max_sessions (flow_report_main_t * frm, - flow_report_t * fr, - vlib_frame_t * f, - u32 * to_next, u32 node_index) -{ - nat_ipfix_logging_max_ses (0, 1); - return f; -} - -static void -nat_ipfix_logging_max_bib_rpc_cb (nat_ipfix_logging_max_bibs_args_t * a) -{ - nat_ipfix_logging_max_bib (a->limit, 0); + nat_ipfix_logging_max_ses (thread_index, limit, 0); } /** * @brief Generate maximum BIB entries exceeded event * + * @param thread_index thread index * @param limit configured limit */ void -nat_ipfix_logging_max_bibs (u32 limit) +nat_ipfix_logging_max_bibs (u32 thread_index, u32 limit) { //TODO: This event SHOULD be rate limited - nat_ipfix_logging_max_bibs_args_t a; - skip_if_disabled (); - a.limit = limit; - - vl_api_rpc_call_main_thread (nat_ipfix_logging_max_bib_rpc_cb, - (u8 *) & a, sizeof (a)); -} - -vlib_frame_t * -nat_data_callback_max_bibs (flow_report_main_t * frm, - flow_report_t * fr, - vlib_frame_t * f, - u32 * to_next, u32 node_index) -{ - nat_ipfix_logging_max_bib (0, 1); - return f; -} - -static void -nat_ipfix_logging_max_frag_ip4_rpc_cb (nat_ipfix_logging_max_frags_ip4_args_t * a) -{ - nat_ipfix_logging_max_frag_ip4 (a->limit, a->src, 0); + nat_ipfix_logging_max_bib (thread_index, limit, 0); } /** * @brief Generate maximum IPv4 fragments pending reassembly exceeded event * + * @param thread_index thread index * @param limit configured limit * @param src source IPv4 address */ void -nat_ipfix_logging_max_fragments_ip4 (u32 limit, ip4_address_t * src) +nat_ipfix_logging_max_fragments_ip4 (u32 thread_index, + u32 limit, ip4_address_t * src) { //TODO: This event SHOULD be rate limited - nat_ipfix_logging_max_frags_ip4_args_t a; - skip_if_disabled (); - a.limit = limit; - a.src = src->as_u32; - - vl_api_rpc_call_main_thread (nat_ipfix_logging_max_frag_ip4_rpc_cb, - (u8 *) & a, sizeof (a)); -} - -vlib_frame_t * -nat_data_callback_max_frags_ip4 (flow_report_main_t * frm, - flow_report_t * fr, - vlib_frame_t * f, - u32 * to_next, u32 node_index) -{ - nat_ipfix_logging_max_frag_ip4 (0, 0, 1); - return f; -} - -static void -nat_ipfix_logging_max_frag_ip6_rpc_cb (nat_ipfix_logging_max_frags_ip6_args_t * a) -{ - ip6_address_t src; - src.as_u64[0] = a->src[0]; - src.as_u64[1] = a->src[1]; - nat_ipfix_logging_max_frag_ip6 (a->limit, &src, 0); + nat_ipfix_logging_max_frag_ip4 (thread_index, limit, src->as_u32, 0); } /** * @brief Generate maximum IPv6 fragments pending reassembly exceeded event * + * @param thread_index thread index * @param limit configured limit * @param src source IPv6 address */ void -nat_ipfix_logging_max_fragments_ip6 (u32 limit, ip6_address_t * src) +nat_ipfix_logging_max_fragments_ip6 (u32 thread_index, + u32 limit, ip6_address_t * src) { //TODO: This event SHOULD be rate limited - nat_ipfix_logging_max_frags_ip6_args_t a; - skip_if_disabled (); - a.limit = limit; - a.src[0] = src->as_u64[0]; - a.src[1] = src->as_u64[1]; - - vl_api_rpc_call_main_thread (nat_ipfix_logging_max_frag_ip6_rpc_cb, - (u8 *) & a, sizeof (a)); -} - -vlib_frame_t * -nat_data_callback_max_frags_ip6 (flow_report_main_t * frm, - flow_report_t * fr, - vlib_frame_t * f, - u32 * to_next, u32 node_index) -{ - nat_ipfix_logging_max_frag_ip6 (0, 0, 1); - return f; -} - -static void -nat_ipfix_logging_nat64_bib_rpc_cb (nat_ipfix_logging_nat64_bib_args_t * a) -{ - ip6_address_t src_ip; - src_ip.as_u64[0] = a->src_ip[0]; - src_ip.as_u64[1] = a->src_ip[1]; - nat_ipfix_logging_nat64_bibe (a->nat_event, &src_ip, a->nat_src_ip, - a->proto, a->src_port, a->nat_src_port, - a->vrf_id, 0); + nat_ipfix_logging_max_frag_ip6 (thread_index, limit, src, 0); } /** * @brief Generate NAT64 BIB create and delete events * + * @param thread_index thread index * @param src_ip source IPv6 address * @param nat_src_ip transaltes source IPv4 address * @param proto L4 protocol @@ -1804,55 +1723,26 @@ nat_ipfix_logging_nat64_bib_rpc_cb (nat_ipfix_logging_nat64_bib_args_t * a) * @param is_create non-zero value if create event otherwise delete event */ void -nat_ipfix_logging_nat64_bib (ip6_address_t * src_ip, +nat_ipfix_logging_nat64_bib (u32 thread_index, ip6_address_t * src_ip, ip4_address_t * nat_src_ip, u8 proto, u16 src_port, u16 nat_src_port, u32 vrf_id, u8 is_create) { - nat_ipfix_logging_nat64_bib_args_t a; + u8 nat_event; skip_if_disabled (); - a.src_ip[0] = src_ip->as_u64[0]; - a.src_ip[1] = src_ip->as_u64[1]; - a.nat_src_ip = nat_src_ip->as_u32; - a.proto = proto; - a.src_port = src_port; - a.nat_src_port = nat_src_port; - a.vrf_id = vrf_id; - a.nat_event = is_create ? NAT64_BIB_CREATE : NAT64_BIB_DELETE; - - vl_api_rpc_call_main_thread (nat_ipfix_logging_nat64_bib_rpc_cb, - (u8 *) & a, sizeof (a)); -} + nat_event = is_create ? NAT64_BIB_CREATE : NAT64_BIB_DELETE; -vlib_frame_t * -nat_data_callback_nat64_bib (flow_report_main_t * frm, - flow_report_t * fr, - vlib_frame_t * f, - u32 * to_next, u32 node_index) -{ - nat_ipfix_logging_nat64_bibe (0, 0, 0, 0, 0, 0, 0, 1); - return f; -} - -static void -nat_ipfix_logging_nat64_ses_rpc_cb (nat_ipfix_logging_nat64_ses_args_t * a) -{ - ip6_address_t src_ip, dst_ip; - src_ip.as_u64[0] = a->src_ip[0]; - src_ip.as_u64[1] = a->src_ip[1]; - dst_ip.as_u64[0] = a->dst_ip[0]; - dst_ip.as_u64[1] = a->dst_ip[1]; - nat_ipfix_logging_nat64_ses (a->nat_event, &src_ip, a->nat_src_ip, - a->proto, a->src_port, a->nat_src_port, - &dst_ip, a->nat_dst_ip, a->dst_port, - a->nat_dst_port, a->vrf_id, 0); + nat_ipfix_logging_nat64_bibe (thread_index, nat_event, src_ip, + nat_src_ip->as_u32, proto, src_port, + nat_src_port, vrf_id, 0); } /** * @brief Generate NAT64 session create and delete events * + * @param thread_index thread index * @param src_ip source IPv6 address * @param nat_src_ip transaltes source IPv4 address * @param proto L4 protocol @@ -1866,42 +1756,38 @@ nat_ipfix_logging_nat64_ses_rpc_cb (nat_ipfix_logging_nat64_ses_args_t * a) * @param is_create non-zero value if create event otherwise delete event */ void -nat_ipfix_logging_nat64_session (ip6_address_t * src_ip, +nat_ipfix_logging_nat64_session (u32 thread_index, + ip6_address_t * src_ip, ip4_address_t * nat_src_ip, u8 proto, u16 src_port, u16 nat_src_port, ip6_address_t * dst_ip, ip4_address_t * nat_dst_ip, u16 dst_port, u16 nat_dst_port, u32 vrf_id, u8 is_create) { - nat_ipfix_logging_nat64_ses_args_t a; + u8 nat_event; skip_if_disabled (); - a.src_ip[0] = src_ip->as_u64[0]; - a.src_ip[1] = src_ip->as_u64[1]; - a.nat_src_ip = nat_src_ip->as_u32; - a.proto = proto; - a.src_port = src_port; - a.nat_src_port = nat_src_port; - a.dst_ip[0] = dst_ip->as_u64[0]; - a.dst_ip[1] = dst_ip->as_u64[1]; - a.nat_dst_ip = nat_dst_ip->as_u32; - a.dst_port = dst_port; - a.nat_dst_port = nat_dst_port; - a.vrf_id = vrf_id; - a.nat_event = is_create ? NAT64_SESSION_CREATE : NAT64_SESSION_DELETE; - - vl_api_rpc_call_main_thread (nat_ipfix_logging_nat64_ses_rpc_cb, - (u8 *) & a, sizeof (a)); + nat_event = is_create ? NAT64_SESSION_CREATE : NAT64_SESSION_DELETE; + + nat_ipfix_logging_nat64_ses (thread_index, nat_event, src_ip, + nat_src_ip->as_u32, proto, src_port, + nat_src_port, dst_ip, nat_dst_ip->as_u32, + dst_port, nat_dst_port, vrf_id, 0); } vlib_frame_t * -nat_data_callback_nat64_session (flow_report_main_t * frm, - flow_report_t * fr, - vlib_frame_t * f, - u32 * to_next, u32 node_index) +data_callback (flow_report_main_t * frm, flow_report_t * fr, + vlib_frame_t * f, u32 * to_next, u32 node_index) { - nat_ipfix_logging_nat64_ses (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + + if (PREDICT_FALSE (++silm->call_counter >= vec_len (frm->reports))) + { + snat_ipfix_flush_from_main(); + silm->call_counter = 0; + } + return f; } @@ -1924,20 +1810,18 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) int rv; u8 e = enable ? 1 : 0; - if (silm->enabled == e) + if (clib_atomic_cmp_and_swap (&silm->enabled, e ^ 1, e) == e) return 0; - silm->enabled = e; - clib_memset (&a, 0, sizeof (a)); a.is_add = enable; a.domain_id = domain_id ? domain_id : 1; a.src_port = src_port ? src_port : UDP_DST_PORT_ipfix; + a.flow_data_callback = data_callback; if (sm->deterministic) { a.rewrite_callback = snat_template_rewrite_max_entries_per_usr; - a.flow_data_callback = snat_data_callback_max_entries_per_usr; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -1949,7 +1833,6 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) else { a.rewrite_callback = snat_template_rewrite_nat44_session; - a.flow_data_callback = snat_data_callback_nat44_session; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -1959,7 +1842,6 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) } a.rewrite_callback = snat_template_rewrite_addr_exhausted; - a.flow_data_callback = snat_data_callback_addr_exhausted; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -1969,7 +1851,6 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) } a.rewrite_callback = nat_template_rewrite_max_sessions; - a.flow_data_callback = nat_data_callback_max_sessions; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -1979,7 +1860,6 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) } a.rewrite_callback = nat_template_rewrite_max_bibs; - a.flow_data_callback = nat_data_callback_max_bibs; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -1989,7 +1869,6 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) } a.rewrite_callback = nat_template_rewrite_max_frags_ip4; - a.flow_data_callback = nat_data_callback_max_frags_ip4; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -1999,7 +1878,6 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) } a.rewrite_callback = nat_template_rewrite_max_frags_ip6; - a.flow_data_callback = nat_data_callback_max_frags_ip6; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -2009,7 +1887,6 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) } a.rewrite_callback = nat_template_rewrite_nat64_bib; - a.flow_data_callback = nat_data_callback_nat64_bib; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -2019,7 +1896,6 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) } a.rewrite_callback = nat_template_rewrite_nat64_session; - a.flow_data_callback = nat_data_callback_nat64_session; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -2031,7 +1907,6 @@ snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port) if (sm->endpoint_dependent) { a.rewrite_callback = snat_template_rewrite_max_entries_per_usr; - a.flow_data_callback = snat_data_callback_max_entries_per_usr; rv = vnet_flow_report_add_del (frm, &a, NULL); if (rv) @@ -2054,10 +1929,33 @@ void snat_ipfix_logging_init (vlib_main_t * vm) { snat_ipfix_logging_main_t *silm = &snat_ipfix_logging_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); silm->enabled = 0; + silm->worker_vms = 0; + silm->call_counter = 0; /* Set up time reference pair */ silm->vlib_time_0 = vlib_time_now (vm); silm->milisecond_time_0 = unix_time_now_nsec () * 1e-6; + + vec_validate (silm->per_thread_data, tm->n_vlib_mains - 1); } + +static uword +ipfix_flush_process (vlib_main_t *vm, + vlib_node_runtime_t *rt, + vlib_frame_t *f) +{ + snat_ipfix_flush(vm->thread_index); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_ipfix_flush_node) = { + .function = ipfix_flush_process, + .name = "snat-ipfix-flush", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, +}; +/* *INDENT-ON* */ diff --git a/src/plugins/nat/nat_ipfix_logging.h b/src/plugins/nat/nat_ipfix_logging.h index 0750149d7a54..a5cdb1a3c1aa 100644 --- a/src/plugins/nat/nat_ipfix_logging.h +++ b/src/plugins/nat/nat_ipfix_logging.h @@ -40,8 +40,6 @@ typedef enum { } quota_exceed_event_t; typedef struct { - /** NAT plugin IPFIX logging enabled */ - u8 enabled; /** ipfix buffers under construction */ vlib_buffer_t *nat44_session_buffer; @@ -76,10 +74,19 @@ typedef struct { u32 nat64_bib_next_record_offset; u32 nat64_ses_next_record_offset; +} snat_ipfix_per_thread_data_t; + +typedef struct { + /** NAT plugin IPFIX logging enabled */ + u8 enabled; + /** Time reference pair */ u64 milisecond_time_0; f64 vlib_time_0; + /* Per thread data */ + snat_ipfix_per_thread_data_t *per_thread_data; + /** template IDs */ u16 nat44_session_template_id; u16 addr_exhausted_template_id; @@ -93,34 +100,48 @@ typedef struct { /** stream index */ u32 stream_index; + + /** vector of worker vlib mains */ + vlib_main_t **worker_vms; + + /** nat data callbacks call counter */ + u16 call_counter; + } snat_ipfix_logging_main_t; extern snat_ipfix_logging_main_t snat_ipfix_logging_main; void snat_ipfix_logging_init (vlib_main_t * vm); int snat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port); -void snat_ipfix_logging_nat44_ses_create (u32 src_ip, u32 nat_src_ip, +void snat_ipfix_logging_nat44_ses_create (u32 thread_index, u32 src_ip, + u32 nat_src_ip, snat_protocol_t snat_proto, u16 src_port, u16 nat_src_port, u32 vrf_id); -void snat_ipfix_logging_nat44_ses_delete (u32 src_ip, u32 nat_src_ip, +void snat_ipfix_logging_nat44_ses_delete (u32 thread_index, u32 src_ip, + u32 nat_src_ip, snat_protocol_t snat_proto, u16 src_port, u16 nat_src_port, u32 vrf_id); -void snat_ipfix_logging_addresses_exhausted(u32 pool_id); -void snat_ipfix_logging_max_entries_per_user(u32 limit, u32 src_ip); -void nat_ipfix_logging_max_sessions(u32 limit); -void nat_ipfix_logging_max_bibs(u32 limit); -void nat_ipfix_logging_max_fragments_ip4(u32 limit, ip4_address_t * src); -void nat_ipfix_logging_max_fragments_ip6(u32 limit, ip6_address_t * src); -void nat_ipfix_logging_nat64_session(ip6_address_t * src_ip, +void snat_ipfix_logging_addresses_exhausted(u32 thread_index, u32 pool_id); +void snat_ipfix_logging_max_entries_per_user(u32 thread_index, + u32 limit, u32 src_ip); +void nat_ipfix_logging_max_sessions(u32 thread_index, u32 limit); +void nat_ipfix_logging_max_bibs(u32 thread_index, u32 limit); +void nat_ipfix_logging_max_fragments_ip4(u32 thread_index, + u32 limit, ip4_address_t * src); +void nat_ipfix_logging_max_fragments_ip6(u32 thread_index, + u32 limit, ip6_address_t * src); +void nat_ipfix_logging_nat64_session(u32 thread_index, + ip6_address_t * src_ip, ip4_address_t * nat_src_ip, u8 proto, u16 src_port, u16 nat_src_port, ip6_address_t * dst_ip, ip4_address_t * nat_dst_ip, u16 dst_port, u16 nat_dst_port, u32 vrf_id, u8 is_create); -void nat_ipfix_logging_nat64_bib(ip6_address_t * src_ip, +void nat_ipfix_logging_nat64_bib(u32 thread_index, + ip6_address_t * src_ip, ip4_address_t * nat_src_ip, u8 proto, u16 src_port, u16 nat_src_port, u32 vrf_id, u8 is_create); diff --git a/src/plugins/nat/nat_reass.c b/src/plugins/nat/nat_reass.c index 1185e1bb4af3..ed827a9038bc 100755 --- a/src/plugins/nat/nat_reass.c +++ b/src/plugins/nat/nat_reass.c @@ -335,8 +335,8 @@ nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst, } int -nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi, - u32 ** bi_to_drop) +nat_ip4_reass_add_fragment (u32 thread_index, nat_reass_ip4_t * reass, + u32 bi, u32 ** bi_to_drop) { nat_reass_main_t *srm = &nat_reass_main; dlist_elt_t *elt; @@ -344,7 +344,7 @@ nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi, if (reass->frag_n >= srm->ip4_max_frag) { - nat_ipfix_logging_max_fragments_ip4 (srm->ip4_max_frag, + nat_ipfix_logging_max_fragments_ip4 (thread_index, srm->ip4_max_frag, &reass->key.src); reass->flags |= NAT_REASS_FLAG_MAX_FRAG_DROP; nat_ip4_reass_get_frags_inline (reass, bi_to_drop); @@ -541,8 +541,8 @@ nat_ip6_reass_find_or_create (ip6_address_t src, ip6_address_t dst, } int -nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi, - u32 ** bi_to_drop) +nat_ip6_reass_add_fragment (u32 thread_index, nat_reass_ip6_t * reass, + u32 bi, u32 ** bi_to_drop) { nat_reass_main_t *srm = &nat_reass_main; dlist_elt_t *elt; @@ -550,7 +550,7 @@ nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi, if (reass->frag_n >= srm->ip6_max_frag) { - nat_ipfix_logging_max_fragments_ip6 (srm->ip6_max_frag, + nat_ipfix_logging_max_fragments_ip6 (thread_index, srm->ip6_max_frag, &reass->key.src); reass->flags |= NAT_REASS_FLAG_MAX_FRAG_DROP; nat_ip6_reass_get_frags_inline (reass, bi_to_drop); diff --git a/src/plugins/nat/nat_reass.h b/src/plugins/nat/nat_reass.h index 579961d72c9e..e58db445e6e0 100644 --- a/src/plugins/nat/nat_reass.h +++ b/src/plugins/nat/nat_reass.h @@ -240,8 +240,8 @@ nat_reass_ip4_t *nat_ip4_reass_find_or_create (ip4_address_t src, * * @returns 0 on success, non-zero value otherwise. */ -int nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi, - u32 ** bi_to_drop); +int nat_ip4_reass_add_fragment (u32 thread_index, nat_reass_ip4_t * reass, + u32 bi, u32 ** bi_to_drop); /** * @brief Get cached fragments. @@ -291,8 +291,8 @@ nat_reass_ip6_t *nat_ip6_reass_find_or_create (ip6_address_t src, * * @returns 0 on success, non-zero value otherwise. */ -int nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi, - u32 ** bi_to_drop); +int nat_ip6_reass_add_fragment (u32 thread_index, nat_reass_ip6_t * reass, + u32 bi, u32 ** bi_to_drop); /** * @brief Get cached fragments. diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 18595e9f0ad0..c687a06f4984 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -133,7 +133,8 @@ nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg) if (clib_bihash_add_del_8_8 (&tsm->in2out, &s_kv, 0)) nat_log_warn ("out2in key del failed"); - snat_ipfix_logging_nat44_ses_delete (s->in2out.addr.as_u32, + snat_ipfix_logging_nat44_ses_delete (ctx->thread_index, + s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, @@ -237,7 +238,8 @@ create_session_for_static_mapping (snat_main_t * sm, nat_log_notice ("out2in key add failed"); /* log NAT event */ - snat_ipfix_logging_nat44_ses_create (s->in2out.addr.as_u32, + snat_ipfix_logging_nat44_ses_create (thread_index, + s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, @@ -1532,7 +1534,7 @@ nat44_out2in_reass_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) { if (nat_ip4_reass_add_fragment - (reass0, bi0, &fragments_to_drop)) + (thread_index, reass0, bi0, &fragments_to_drop)) { b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG]; nat_log_notice diff --git a/src/plugins/nat/out2in_ed.c b/src/plugins/nat/out2in_ed.c index c3f05592acce..c53d6d0066cd 100644 --- a/src/plugins/nat/out2in_ed.c +++ b/src/plugins/nat/out2in_ed.c @@ -174,7 +174,8 @@ nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg) if (snat_is_unk_proto_session (s)) goto delete; - snat_ipfix_logging_nat44_ses_delete (s->in2out.addr.as_u32, + snat_ipfix_logging_nat44_ses_delete (ctx->thread_index, + s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, @@ -318,7 +319,8 @@ create_session_for_static_mapping_ed (snat_main_t * sm, &ctx)) nat_log_notice ("in2out-ed key add failed"); - snat_ipfix_logging_nat44_ses_create (s->in2out.addr.as_u32, + snat_ipfix_logging_nat44_ses_create (thread_index, + s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, @@ -1819,7 +1821,7 @@ nat44_ed_out2in_reass_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) { if (nat_ip4_reass_add_fragment - (reass0, bi0, &fragments_to_drop)) + (thread_index, reass0, bi0, &fragments_to_drop)) { b0->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_FRAG]; nat_log_notice diff --git a/src/plugins/nsim/nsim_input.c b/src/plugins/nsim/nsim_input.c index 2e328a58bf35..44c9f535f925 100644 --- a/src/plugins/nsim/nsim_input.c +++ b/src/plugins/nsim/nsim_input.c @@ -81,7 +81,6 @@ nsim_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_rx_packets = 0; vlib_buffer_t *b0; u32 bi0, next0; - vlib_buffer_free_list_t *fl; u32 *to_next; u32 next_index; u32 n_left_to_next; @@ -100,7 +99,6 @@ nsim_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * We use per-thread buffer caches, so we need the freelist to * initialize them... */ - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); next_index = node->cached_next_index; while (wp->cursize) @@ -149,7 +147,6 @@ nsim_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); /* Initialize the buffer */ - vlib_buffer_init_for_free_list (b0, fl); b0->current_data = 0; b0->current_length = ep->current_length; diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c index c6a80224e0ef..7f621a1e6714 100644 --- a/src/plugins/perfmon/perfmon.c +++ b/src/plugins/perfmon/perfmon.c @@ -157,10 +157,16 @@ perfmon_init (vlib_main_t * vm) pm->log_class = vlib_log_register_class ("perfmon", 0); /* Default data collection interval */ - pm->timeout_interval = 3.0; - vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1); - vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1); - vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1); + pm->timeout_interval = 2.0; /* seconds */ + vec_validate (pm->pm_fds, 1); + vec_validate (pm->pm_fds[0], vec_len (vlib_mains) - 1); + vec_validate (pm->pm_fds[1], vec_len (vlib_mains) - 1); + vec_validate (pm->perf_event_pages, 1); + vec_validate (pm->perf_event_pages[0], vec_len (vlib_mains) - 1); + vec_validate (pm->perf_event_pages[1], vec_len (vlib_mains) - 1); + vec_validate (pm->rdpmc_indices, 1); + vec_validate (pm->rdpmc_indices[0], vec_len (vlib_mains) - 1); + vec_validate (pm->rdpmc_indices[1], vec_len (vlib_mains) - 1); pm->page_size = getpagesize (); ht = pm->perfmon_table = 0; @@ -295,18 +301,26 @@ set_pmc_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { perfmon_main_t *pm = &perfmon_main; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + int num_threads = 1 + vtm->n_threads; unformat_input_t _line_input, *line_input = &_line_input; perfmon_event_config_t ec; + f64 delay; u32 timeout_seconds; u32 deadman; + int last_set; + clib_error_t *error; - vec_reset_length (pm->events_to_collect); + vec_reset_length (pm->single_events_to_collect); + vec_reset_length (pm->paired_events_to_collect); pm->ipc_event_index = ~0; pm->mispredict_event_index = ~0; if (!unformat_user (input, unformat_line_input, line_input)) return clib_error_return (0, "counter names required..."); + clib_bitmap_zero (pm->thread_bitmap); + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "timeout %u", &timeout_seconds)) @@ -316,28 +330,34 @@ set_pmc_command_fn (vlib_main_t * vm, ec.name = "instructions"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS; - pm->ipc_event_index = vec_len (pm->events_to_collect); - vec_add1 (pm->events_to_collect, ec); + pm->ipc_event_index = vec_len (pm->paired_events_to_collect); + vec_add1 (pm->paired_events_to_collect, ec); ec.name = "cpu-cycles"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_CPU_CYCLES; - vec_add1 (pm->events_to_collect, ec); + vec_add1 (pm->paired_events_to_collect, ec); } else if (unformat (line_input, "branch-mispredict-rate")) { ec.name = "branch-misses"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES; - pm->mispredict_event_index = vec_len (pm->events_to_collect); - vec_add1 (pm->events_to_collect, ec); + pm->mispredict_event_index = vec_len (pm->paired_events_to_collect); + vec_add1 (pm->paired_events_to_collect, ec); ec.name = "branches"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; - vec_add1 (pm->events_to_collect, ec); + vec_add1 (pm->paired_events_to_collect, ec); } + else if (unformat (line_input, "threads %U", + unformat_bitmap_list, &pm->thread_bitmap)) + ; + else if (unformat (line_input, "thread %U", + unformat_bitmap_list, &pm->thread_bitmap)) + ; else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec)) { - vec_add1 (pm->events_to_collect, ec); + vec_add1 (pm->single_events_to_collect, ec); } #define _(type,event,str) \ else if (unformat (line_input, str)) \ @@ -345,30 +365,52 @@ set_pmc_command_fn (vlib_main_t * vm, ec.name = str; \ ec.pe_type = type; \ ec.pe_config = event; \ - vec_add1 (pm->events_to_collect, ec); \ + vec_add1 (pm->single_events_to_collect, ec); \ } foreach_perfmon_event #undef _ else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } + } + + unformat_free (line_input); + + last_set = clib_bitmap_last_set (pm->thread_bitmap); + if (last_set != ~0 && last_set >= num_threads) + return clib_error_return (0, "thread %d does not exist", last_set); + + /* Stick paired events at the front of the (unified) list */ + if (vec_len (pm->paired_events_to_collect) > 0) + { + perfmon_event_config_t *tmp; + /* first 2n events are pairs... */ + vec_append (pm->paired_events_to_collect, pm->single_events_to_collect); + tmp = pm->single_events_to_collect; + pm->single_events_to_collect = pm->paired_events_to_collect; + pm->paired_events_to_collect = tmp; } - if (vec_len (pm->events_to_collect) == 0) + if (vec_len (pm->single_events_to_collect) == 0) return clib_error_return (0, "no events specified..."); + /* Figure out how long data collection will take */ + delay = + ((f64) vec_len (pm->single_events_to_collect)) * pm->timeout_interval; + delay /= 2.0; /* collect 2 stats at once */ + vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds", - vec_len (pm->events_to_collect), - (f64) (vec_len (pm->events_to_collect)) - * pm->timeout_interval); + vec_len (pm->single_events_to_collect), delay); vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index, PERFMON_START, 0); /* Coarse-grained wait */ - vlib_process_suspend (vm, - ((f64) (vec_len (pm->events_to_collect) - * pm->timeout_interval))); + vlib_process_suspend (vm, delay); deadman = 0; /* Reasonable to guess that collection may not be quite done... */ @@ -390,7 +432,7 @@ set_pmc_command_fn (vlib_main_t * vm, VLIB_CLI_COMMAND (set_pmc_command, static) = { .path = "set pmc", - .short_help = "set pmc c1 [..., use \"show pmc events\"]", + .short_help = "set pmc [threads n,n1-n2] c1... [see \"show pmc events\"]", .function = set_pmc_command_fn, .is_mp_safe = 1, }; @@ -438,7 +480,7 @@ format_capture (u8 * s, va_list * args) if (i == pm->ipc_event_index) { f64 ipc_rate; - ASSERT (i + 1 < vec_len (c->counter_names)); + ASSERT ((i + 1) < vec_len (c->counter_names)); if (c->counter_values[i + 1] > 0) ipc_rate = (f64) c->counter_values[i] diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h index 47ee471d5fc1..9c4c34e36c13 100644 --- a/src/plugins/perfmon/perfmon.h +++ b/src/plugins/perfmon/perfmon.h @@ -97,8 +97,11 @@ typedef struct perfmon_cpuid_and_table_t *perfmon_tables; uword *perfmon_table; - /* vector of events to collect */ - perfmon_event_config_t *events_to_collect; + /* vector of single events to collect */ + perfmon_event_config_t *single_events_to_collect; + + /* vector of paired events to collect */ + perfmon_event_config_t *paired_events_to_collect; /* Base indices of synthetic event tuples */ u32 ipc_event_index; @@ -109,13 +112,17 @@ typedef struct /* Current event (index) being collected */ u32 current_event; - u32 *rdpmc_indices; + int n_active; + u32 **rdpmc_indices; /* mmap base / size of (mapped) struct perf_event_mmap_page */ - u8 **perf_event_pages; + u8 ***perf_event_pages; u32 page_size; /* Current perf_event file descriptors, per thread */ - int *pm_fds; + int **pm_fds; + + /* thread bitmap */ + uword *thread_bitmap; /* Logging */ vlib_log_class_t log_class; diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c index 4e7e2378320a..12a1891518f8 100644 --- a/src/plugins/perfmon/perfmon_periodic.c +++ b/src/plugins/perfmon/perfmon_periodic.c @@ -21,6 +21,7 @@ #include #include +/* "not in glibc" */ static long perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) @@ -31,22 +32,34 @@ perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, return ret; } -static u64 -read_current_perf_counter (vlib_main_t * vm) +static void +read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1) { - if (vm->perf_counter_id) - return clib_rdpmc (vm->perf_counter_id); - else + int i; + u64 *cc; + perfmon_main_t *pm = &perfmon_main; + uword my_thread_index = vm->thread_index; + + *c0 = *c1 = 0; + + for (i = 0; i < pm->n_active; i++) { - u64 sw_value; - if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) != - sizeof (sw_value)) + cc = (i == 0) ? c0 : c1; + if (pm->rdpmc_indices[i][my_thread_index] != ~0) + *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]); + else { - clib_unix_warning ("counter read failed, disable collection..."); - vm->vlib_node_runtime_perf_counter_cb = 0; - return 0ULL; + u64 sw_value; + if (read (pm->pm_fds[i][my_thread_index], &sw_value, + sizeof (sw_value)) != sizeof (sw_value)) + { + clib_unix_warning + ("counter read failed, disable collection..."); + vm->vlib_node_runtime_perf_counter_cb = 0; + return; + } + *cc = sw_value; } - return sw_value; } } @@ -80,9 +93,11 @@ clear_counters (perfmon_main_t * pm) for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; - n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter0_ticks = 0; + n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; - n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter0_ticks = 0; + n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } @@ -90,7 +105,7 @@ clear_counters (perfmon_main_t * pm) } static void -enable_current_event (perfmon_main_t * pm) +enable_current_events (perfmon_main_t * pm) { struct perf_event_attr pe; int fd; @@ -98,91 +113,111 @@ enable_current_event (perfmon_main_t * pm) perfmon_event_config_t *c; vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; + u32 index; + int i, limit = 1; + int cpu; - c = vec_elt_at_index (pm->events_to_collect, pm->current_event); - - memset (&pe, 0, sizeof (struct perf_event_attr)); - pe.type = c->pe_type; - pe.size = sizeof (struct perf_event_attr); - pe.config = c->pe_config; - pe.disabled = 1; - pe.pinned = 1; - /* - * Note: excluding the kernel makes the - * (software) context-switch counter read 0... - */ - if (pe.type != PERF_TYPE_SOFTWARE) - { - /* Exclude kernel and hypervisor */ - pe.exclude_kernel = 1; - pe.exclude_hv = 1; - } + if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect)) + limit = 2; - fd = perf_event_open (&pe, 0, -1, -1, 0); - if (fd == -1) + for (i = 0; i < limit; i++) { - clib_unix_warning ("event open: type %d config %d", c->pe_type, - c->pe_config); - return; - } + c = vec_elt_at_index (pm->single_events_to_collect, + pm->current_event + i); + + memset (&pe, 0, sizeof (struct perf_event_attr)); + pe.type = c->pe_type; + pe.size = sizeof (struct perf_event_attr); + pe.config = c->pe_config; + pe.disabled = 1; + pe.pinned = 1; + /* + * Note: excluding the kernel makes the + * (software) context-switch counter read 0... + */ + if (pe.type != PERF_TYPE_SOFTWARE) + { + /* Exclude kernel and hypervisor */ + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + } - if (pe.type != PERF_TYPE_SOFTWARE) - { - p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); - if (p == MAP_FAILED) + cpu = vm->cpu_id; + + fd = perf_event_open (&pe, 0, cpu, -1, 0); + if (fd == -1) { - clib_unix_warning ("mmap"); - close (fd); + clib_unix_warning ("event open: type %d config %d", c->pe_type, + c->pe_config); return; } - } - if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) - clib_unix_warning ("reset ioctl"); + if (pe.type != PERF_TYPE_SOFTWARE) + { + p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (fd); + return; + } + } + else + p = 0; + + if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) + clib_unix_warning ("reset ioctl"); - if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) - clib_unix_warning ("enable ioctl"); + if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) + clib_unix_warning ("enable ioctl"); - /* - * Software event counters - and others not capable of being - * read via the "rdpmc" instruction - will be read - * by system calls. - */ - if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0) - pm->rdpmc_indices[my_thread_index] = 0; - else /* use rdpmc instrs */ - pm->rdpmc_indices[my_thread_index] = p->index - 1; - pm->perf_event_pages[my_thread_index] = (void *) p; + /* + * Software event counters - and others not capable of being + * read via the "rdpmc" instruction - will be read + * by system calls. + */ + if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0) + index = ~0; + else + index = p->index - 1; - pm->pm_fds[my_thread_index] = fd; + pm->rdpmc_indices[i][my_thread_index] = index; + pm->perf_event_pages[i][my_thread_index] = (void *) p; + pm->pm_fds[i][my_thread_index] = fd; + } + pm->n_active = i; /* Enable the main loop counter snapshot mechanism */ - vm->perf_counter_id = pm->rdpmc_indices[my_thread_index]; - vm->perf_counter_fd = fd; - vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter; + vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counters; } static void -disable_event (perfmon_main_t * pm) +disable_events (perfmon_main_t * pm) { vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; - - if (pm->pm_fds[my_thread_index] == 0) - return; + int i; /* Stop main loop collection */ vm->vlib_node_runtime_perf_counter_cb = 0; - if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0) - clib_unix_warning ("disable ioctl"); + for (i = 0; i < pm->n_active; i++) + { + if (pm->pm_fds[i][my_thread_index] == 0) + continue; + + if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < + 0) + clib_unix_warning ("disable ioctl"); - if (pm->perf_event_pages[my_thread_index]) - if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0) - clib_unix_warning ("munmap"); + if (pm->perf_event_pages[i][my_thread_index]) + if (munmap (pm->perf_event_pages[i][my_thread_index], + pm->page_size) < 0) + clib_unix_warning ("munmap"); - (void) close (pm->pm_fds[my_thread_index]); - pm->pm_fds[my_thread_index] = 0; + (void) close (pm->pm_fds[i][my_thread_index]); + pm->pm_fds[i][my_thread_index] = 0; + } } static void @@ -190,7 +225,7 @@ worker_thread_start_event (vlib_main_t * vm) { perfmon_main_t *pm = &perfmon_main; - enable_current_event (pm); + enable_current_events (pm); vm->worker_thread_main_loop_callback = 0; } @@ -198,7 +233,7 @@ static void worker_thread_stop_event (vlib_main_t * vm) { perfmon_main_t *pm = &perfmon_main; - disable_event (pm); + disable_events (pm); vm->worker_thread_main_loop_callback = 0; } @@ -206,32 +241,45 @@ static void start_event (perfmon_main_t * pm, f64 now, uword event_data) { int i; + int last_set; + int all = 0; pm->current_event = 0; - if (vec_len (pm->events_to_collect) == 0) + + if (vec_len (pm->single_events_to_collect) == 0) { pm->state = PERFMON_STATE_OFF; return; } + + last_set = clib_bitmap_last_set (pm->thread_bitmap); + all = (last_set == ~0); + pm->state = PERFMON_STATE_RUNNING; clear_counters (pm); - /* Start collection on this thread */ - enable_current_event (pm); + /* Start collection on thread 0? */ + if (all || clib_bitmap_get (pm->thread_bitmap, 0)) + { + /* Start collection on this thread */ + enable_current_events (pm); + } /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; - vlib_mains[i]->worker_thread_main_loop_callback = (void *) - worker_thread_start_event; + + if (all || clib_bitmap_get (pm->thread_bitmap, i)) + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_start_event; } } void scrape_and_clear_counters (perfmon_main_t * pm) { - int i, j; + int i, j, k; vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; vlib_node_main_t *nm; @@ -242,7 +290,6 @@ scrape_and_clear_counters (perfmon_main_t * pm) perfmon_event_config_t *current_event; uword *p; u8 *counter_name; - u64 counter_value; u64 vectors_this_counter; /* snapshoot the nodes, including pm counters */ @@ -272,17 +319,17 @@ scrape_and_clear_counters (perfmon_main_t * pm) n = nm->nodes[i]; nodes[i] = clib_mem_alloc (sizeof (*n)); clib_memcpy_fast (nodes[i], n, sizeof (*n)); - n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter0_ticks = 0; + n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; - n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter0_ticks = 0; + n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } vlib_worker_thread_barrier_release (vm); - current_event = pm->events_to_collect + pm->current_event; - for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; @@ -296,38 +343,69 @@ scrape_and_clear_counters (perfmon_main_t * pm) u8 *capture_name; n = nodes[i]; - if (n->stats_total.perf_counter_ticks == 0) - { - clib_mem_free (n); - continue; - } - capture_name = format (0, "t%d-%v%c", j, n->name, 0); + if (n->stats_total.perf_counter0_ticks == 0 && + n->stats_total.perf_counter1_ticks == 0) + goto skip_this_node; - p = hash_get_mem (pm->capture_by_thread_and_node_name, - capture_name); - - if (p == 0) + for (k = 0; k < 2; k++) { - pool_get (pm->capture_pool, c); - memset (c, 0, sizeof (*c)); - c->thread_and_node_name = capture_name; - hash_set_mem (pm->capture_by_thread_and_node_name, - capture_name, c - pm->capture_pool); + u64 counter_value, counter_last_clear; + + /* + * We collect 2 counters at once, except for the + * last counter when the user asks for an odd number of + * counters + */ + if ((pm->current_event + k) + >= vec_len (pm->single_events_to_collect)) + break; + + if (k == 0) + { + counter_value = n->stats_total.perf_counter0_ticks; + counter_last_clear = + n->stats_last_clear.perf_counter0_ticks; + } + else + { + counter_value = n->stats_total.perf_counter1_ticks; + counter_last_clear = + n->stats_last_clear.perf_counter1_ticks; + } + + capture_name = format (0, "t%d-%v%c", j, n->name, 0); + + p = hash_get_mem (pm->capture_by_thread_and_node_name, + capture_name); + + if (p == 0) + { + pool_get (pm->capture_pool, c); + memset (c, 0, sizeof (*c)); + c->thread_and_node_name = capture_name; + hash_set_mem (pm->capture_by_thread_and_node_name, + capture_name, c - pm->capture_pool); + } + else + { + c = pool_elt_at_index (pm->capture_pool, p[0]); + vec_free (capture_name); + } + + /* Snapshoot counters, etc. into the capture */ + current_event = pm->single_events_to_collect + + pm->current_event + k; + counter_name = (u8 *) current_event->name; + vectors_this_counter = n->stats_total.perf_counter_vectors - + n->stats_last_clear.perf_counter_vectors; + + vec_add1 (c->counter_names, counter_name); + vec_add1 (c->counter_values, + counter_value - counter_last_clear); + vec_add1 (c->vectors_this_counter, vectors_this_counter); } - else - c = pool_elt_at_index (pm->capture_pool, p[0]); - - /* Snapshoot counters, etc. into the capture */ - counter_name = (u8 *) current_event->name; - counter_value = n->stats_total.perf_counter_ticks - - n->stats_last_clear.perf_counter_ticks; - vectors_this_counter = n->stats_total.perf_counter_vectors - - n->stats_last_clear.perf_counter_vectors; - - vec_add1 (c->counter_names, counter_name); - vec_add1 (c->counter_values, counter_value); - vec_add1 (c->vectors_this_counter, vectors_this_counter); + skip_this_node: clib_mem_free (n); } vec_free (nodes); @@ -336,40 +414,66 @@ scrape_and_clear_counters (perfmon_main_t * pm) } static void -handle_timeout (perfmon_main_t * pm, f64 now) +handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now) { int i; - disable_event (pm); + int last_set, all; + + last_set = clib_bitmap_last_set (pm->thread_bitmap); + all = (last_set == ~0); + + if (all || clib_bitmap_get (pm->thread_bitmap, 0)) + disable_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; - vlib_mains[i]->worker_thread_main_loop_callback = (void *) - worker_thread_stop_event; + if (all || clib_bitmap_get (pm->thread_bitmap, i)) + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_stop_event; } - /* Short delay to make sure workers have stopped collection */ + /* Make sure workers have stopped collection */ if (i > 1) - vlib_process_suspend (pm->vlib_main, 1e-3); + { + f64 deadman = vlib_time_now (vm) + 1.0; + + for (i = 1; i < vec_len (vlib_mains); i++) + { + /* Has the worker actually stopped collecting data? */ + while (vlib_mains[i]->worker_thread_main_loop_callback) + { + if (vlib_time_now (vm) > deadman) + { + clib_warning ("Thread %d deadman timeout!", i); + break; + } + vlib_process_suspend (pm->vlib_main, 1e-3); + } + } + } scrape_and_clear_counters (pm); - pm->current_event++; - if (pm->current_event >= vec_len (pm->events_to_collect)) + pm->current_event += pm->n_active; + if (pm->current_event >= vec_len (pm->single_events_to_collect)) { pm->current_event = 0; pm->state = PERFMON_STATE_OFF; return; } - enable_current_event (pm); + + if (all || clib_bitmap_get (pm->thread_bitmap, 0)) + enable_current_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; - vlib_mains[i]->worker_thread_main_loop_callback = (void *) - worker_thread_start_event; + if (all || clib_bitmap_get (pm->thread_bitmap, i)) + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_start_event; } } @@ -403,7 +507,7 @@ perfmon_periodic_process (vlib_main_t * vm, /* Handle timeout */ case ~0: - handle_timeout (pm, now); + handle_timeout (vm, pm, now); break; default: diff --git a/src/plugins/pppoe/pppoe.h b/src/plugins/pppoe/pppoe.h index f010750f420a..471727ce8931 100644 --- a/src/plugins/pppoe/pppoe.h +++ b/src/plugins/pppoe/pppoe.h @@ -96,7 +96,7 @@ typedef enum #define MTU 1500 -#define MTU_BUFFERS ((MTU + VLIB_BUFFER_DATA_SIZE - 1) / VLIB_BUFFER_DATA_SIZE) +#define MTU_BUFFERS ((MTU + vlib_buffer_get_default_data_size(vm) - 1) / vlib_buffer_get_default_data_size(vm)) #define NUM_BUFFERS_TO_ALLOC 32 /* diff --git a/src/plugins/tlsmbedtls/tls_mbedtls.c b/src/plugins/tlsmbedtls/tls_mbedtls.c index 52d124eb84ee..93beebe418ca 100644 --- a/src/plugins/tlsmbedtls/tls_mbedtls.c +++ b/src/plugins/tlsmbedtls/tls_mbedtls.c @@ -158,7 +158,7 @@ tls_get_ctr_drbg () static int tls_net_send (void *ctx_indexp, const unsigned char *buf, size_t len) { - stream_session_t *tls_session; + session_t *tls_session; uword ctx_index; tls_ctx_t *ctx; int rv; @@ -166,7 +166,7 @@ tls_net_send (void *ctx_indexp, const unsigned char *buf, size_t len) ctx_index = pointer_to_uword (ctx_indexp); ctx = mbedtls_ctx_get (ctx_index); tls_session = session_get_from_handle (ctx->tls_session_handle); - rv = svm_fifo_enqueue_nowait (tls_session->server_tx_fifo, len, buf); + rv = svm_fifo_enqueue_nowait (tls_session->tx_fifo, len, buf); if (rv < 0) return MBEDTLS_ERR_SSL_WANT_WRITE; tls_add_vpp_q_tx_evt (tls_session); @@ -176,7 +176,7 @@ tls_net_send (void *ctx_indexp, const unsigned char *buf, size_t len) static int tls_net_recv (void *ctx_indexp, unsigned char *buf, size_t len) { - stream_session_t *tls_session; + session_t *tls_session; uword ctx_index; tls_ctx_t *ctx; int rv; @@ -184,7 +184,7 @@ tls_net_recv (void *ctx_indexp, unsigned char *buf, size_t len) ctx_index = pointer_to_uword (ctx_indexp); ctx = mbedtls_ctx_get (ctx_index); tls_session = session_get_from_handle (ctx->tls_session_handle); - rv = svm_fifo_dequeue_nowait (tls_session->server_rx_fifo, len, buf); + rv = svm_fifo_dequeue_nowait (tls_session->rx_fifo, len, buf); return (rv < 0) ? 0 : rv; } @@ -427,23 +427,23 @@ mbedtls_ctx_handshake_rx (tls_ctx_t * ctx) } static int -mbedtls_ctx_write (tls_ctx_t * ctx, stream_session_t * app_session) +mbedtls_ctx_write (tls_ctx_t * ctx, session_t * app_session) { mbedtls_ctx_t *mc = (mbedtls_ctx_t *) ctx; u8 thread_index = ctx->c_thread_index; mbedtls_main_t *mm = &mbedtls_main; u32 enq_max, deq_max, deq_now; - stream_session_t *tls_session; + session_t *tls_session; int wrote; ASSERT (mc->ssl.state == MBEDTLS_SSL_HANDSHAKE_OVER); - deq_max = svm_fifo_max_dequeue (app_session->server_tx_fifo); + deq_max = svm_fifo_max_dequeue (app_session->tx_fifo); if (!deq_max) return 0; tls_session = session_get_from_handle (ctx->tls_session_handle); - enq_max = svm_fifo_max_enqueue (tls_session->server_tx_fifo); + enq_max = svm_fifo_max_enqueue (tls_session->tx_fifo); deq_now = clib_min (deq_max, TLS_CHUNK_SIZE); if (PREDICT_FALSE (enq_max == 0)) @@ -453,8 +453,7 @@ mbedtls_ctx_write (tls_ctx_t * ctx, stream_session_t * app_session) } vec_validate (mm->tx_bufs[thread_index], deq_now); - svm_fifo_peek (app_session->server_tx_fifo, 0, deq_now, - mm->tx_bufs[thread_index]); + svm_fifo_peek (app_session->tx_fifo, 0, deq_now, mm->tx_bufs[thread_index]); wrote = mbedtls_ssl_write (&mc->ssl, mm->tx_bufs[thread_index], deq_now); if (wrote <= 0) @@ -463,7 +462,7 @@ mbedtls_ctx_write (tls_ctx_t * ctx, stream_session_t * app_session) return 0; } - svm_fifo_dequeue_drop (app_session->server_tx_fifo, wrote); + svm_fifo_dequeue_drop (app_session->tx_fifo, wrote); vec_reset_length (mm->tx_bufs[thread_index]); tls_add_vpp_q_tx_evt (tls_session); @@ -474,13 +473,13 @@ mbedtls_ctx_write (tls_ctx_t * ctx, stream_session_t * app_session) } static int -mbedtls_ctx_read (tls_ctx_t * ctx, stream_session_t * tls_session) +mbedtls_ctx_read (tls_ctx_t * ctx, session_t * tls_session) { mbedtls_ctx_t *mc = (mbedtls_ctx_t *) ctx; mbedtls_main_t *mm = &mbedtls_main; u8 thread_index = ctx->c_thread_index; u32 deq_max, enq_max, enq_now; - stream_session_t *app_session; + session_t *app_session; int read, enq; if (PREDICT_FALSE (mc->ssl.state != MBEDTLS_SSL_HANDSHAKE_OVER)) @@ -489,12 +488,12 @@ mbedtls_ctx_read (tls_ctx_t * ctx, stream_session_t * tls_session) return 0; } - deq_max = svm_fifo_max_dequeue (tls_session->server_rx_fifo); + deq_max = svm_fifo_max_dequeue (tls_session->rx_fifo); if (!deq_max) return 0; app_session = session_get_from_handle (ctx->app_session_handle); - enq_max = svm_fifo_max_enqueue (app_session->server_rx_fifo); + enq_max = svm_fifo_max_enqueue (app_session->rx_fifo); enq_now = clib_min (enq_max, TLS_CHUNK_SIZE); if (PREDICT_FALSE (enq_now == 0)) @@ -511,12 +510,12 @@ mbedtls_ctx_read (tls_ctx_t * ctx, stream_session_t * tls_session) return 0; } - enq = svm_fifo_enqueue_nowait (app_session->server_rx_fifo, read, + enq = svm_fifo_enqueue_nowait (app_session->rx_fifo, read, mm->rx_bufs[thread_index]); ASSERT (enq == read); vec_reset_length (mm->rx_bufs[thread_index]); - if (svm_fifo_max_dequeue (tls_session->server_rx_fifo)) + if (svm_fifo_max_dequeue (tls_session->rx_fifo)) tls_add_vpp_q_builtin_rx_evt (tls_session); if (enq > 0) diff --git a/src/plugins/tlsopenssl/tls_async.c b/src/plugins/tlsopenssl/tls_async.c index facb94ec2530..ade073c66bc7 100644 --- a/src/plugins/tlsopenssl/tls_async.c +++ b/src/plugins/tlsopenssl/tls_async.c @@ -342,7 +342,7 @@ event_handler (void *tls_async) openssl_resume_handler *handler; openssl_evt_t *callback; - stream_session_t *tls_session; + session_t *tls_session; int thread_index; tls_ctx_t *ctx; diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c index d07e0f898380..0a25ecfa9438 100644 --- a/src/plugins/tlsopenssl/tls_openssl.c +++ b/src/plugins/tlsopenssl/tls_openssl.c @@ -105,14 +105,13 @@ openssl_lctx_get (u32 lctx_index) } static int -openssl_try_handshake_read (openssl_ctx_t * oc, - stream_session_t * tls_session) +openssl_try_handshake_read (openssl_ctx_t * oc, session_t * tls_session) { u32 deq_max, deq_now; svm_fifo_t *f; int wrote, rv; - f = tls_session->server_rx_fifo; + f = tls_session->rx_fifo; deq_max = svm_fifo_max_dequeue (f); if (!deq_max) return 0; @@ -137,8 +136,7 @@ openssl_try_handshake_read (openssl_ctx_t * oc, } static int -openssl_try_handshake_write (openssl_ctx_t * oc, - stream_session_t * tls_session) +openssl_try_handshake_write (openssl_ctx_t * oc, session_t * tls_session) { u32 enq_max, deq_now; svm_fifo_t *f; @@ -147,7 +145,7 @@ openssl_try_handshake_write (openssl_ctx_t * oc, if (BIO_ctrl_pending (oc->rbio) <= 0) return 0; - f = tls_session->server_tx_fifo; + f = tls_session->tx_fifo; enq_max = svm_fifo_max_enqueue (f); if (!enq_max) return 0; @@ -196,12 +194,10 @@ vpp_ssl_async_process_event (tls_ctx_t * ctx, static int vpp_ssl_async_retry_func (tls_ctx_t * ctx, openssl_resume_handler * handler) { - openssl_ctx_t *oc = (openssl_ctx_t *) ctx; if (vpp_add_async_run_event (ctx, handler)) - { - SSL_clear_async_status (oc->ssl); - } + return 1; + return 0; } @@ -209,7 +205,7 @@ vpp_ssl_async_retry_func (tls_ctx_t * ctx, openssl_resume_handler * handler) #endif int -openssl_ctx_handshake_rx (tls_ctx_t * ctx, stream_session_t * tls_session) +openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session) { openssl_ctx_t *oc = (openssl_ctx_t *) ctx; int rv = 0, err; @@ -301,15 +297,15 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, stream_session_t * tls_session) } static inline int -openssl_ctx_write (tls_ctx_t * ctx, stream_session_t * app_session) +openssl_ctx_write (tls_ctx_t * ctx, session_t * app_session) { openssl_ctx_t *oc = (openssl_ctx_t *) ctx; int wrote = 0, rv, read, max_buf = 100 * TLS_CHUNK_SIZE, max_space; u32 enq_max, deq_max, deq_now, to_write; - stream_session_t *tls_session; + session_t *tls_session; svm_fifo_t *f; - f = app_session->server_tx_fifo; + f = app_session->tx_fifo; deq_max = svm_fifo_max_dequeue (f); if (!deq_max) goto check_tls_fifo; @@ -324,14 +320,14 @@ openssl_ctx_write (tls_ctx_t * ctx, stream_session_t * app_session) tls_add_vpp_q_builtin_tx_evt (app_session); goto check_tls_fifo; } - svm_fifo_dequeue_drop (app_session->server_tx_fifo, wrote); + svm_fifo_dequeue_drop (app_session->tx_fifo, wrote); if (wrote < deq_now) { to_write = clib_min (svm_fifo_max_read_chunk (f), deq_now - wrote); rv = SSL_write (oc->ssl, svm_fifo_head (f), to_write); if (rv > 0) { - svm_fifo_dequeue_drop (app_session->server_tx_fifo, rv); + svm_fifo_dequeue_drop (app_session->tx_fifo, rv); wrote += rv; } } @@ -345,7 +341,7 @@ openssl_ctx_write (tls_ctx_t * ctx, stream_session_t * app_session) return wrote; tls_session = session_get_from_handle (ctx->tls_session_handle); - f = tls_session->server_tx_fifo; + f = tls_session->tx_fifo; enq_max = svm_fifo_max_enqueue (f); if (!enq_max) { @@ -379,12 +375,12 @@ openssl_ctx_write (tls_ctx_t * ctx, stream_session_t * app_session) } static inline int -openssl_ctx_read (tls_ctx_t * ctx, stream_session_t * tls_session) +openssl_ctx_read (tls_ctx_t * ctx, session_t * tls_session) { int read, wrote = 0, max_space, max_buf = 100 * TLS_CHUNK_SIZE, rv; openssl_ctx_t *oc = (openssl_ctx_t *) ctx; u32 deq_max, enq_max, deq_now, to_read; - stream_session_t *app_session; + session_t *app_session; svm_fifo_t *f; if (PREDICT_FALSE (SSL_in_init (oc->ssl))) @@ -393,7 +389,7 @@ openssl_ctx_read (tls_ctx_t * ctx, stream_session_t * tls_session) return 0; } - f = tls_session->server_rx_fifo; + f = tls_session->rx_fifo; deq_max = svm_fifo_max_dequeue (f); max_space = max_buf - BIO_ctrl_pending (oc->wbio); max_space = max_space < 0 ? 0 : max_space; @@ -428,7 +424,7 @@ openssl_ctx_read (tls_ctx_t * ctx, stream_session_t * tls_session) return wrote; app_session = session_get_from_handle (ctx->app_session_handle); - f = app_session->server_rx_fifo; + f = app_session->rx_fifo; enq_max = svm_fifo_max_enqueue (f); if (!enq_max) { @@ -465,7 +461,7 @@ openssl_ctx_init_client (tls_ctx_t * ctx) long flags = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_COMPRESSION; openssl_ctx_t *oc = (openssl_ctx_t *) ctx; openssl_main_t *om = &openssl_main; - stream_session_t *tls_session; + session_t *tls_session; const SSL_METHOD *method; int rv, err; #ifdef HAVE_OPENSSL_ASYNC @@ -664,7 +660,7 @@ openssl_ctx_init_server (tls_ctx_t * ctx) openssl_ctx_t *oc = (openssl_ctx_t *) ctx; u32 olc_index = ctx->tls_ssl_ctx; openssl_listen_ctx_t *olc; - stream_session_t *tls_session; + session_t *tls_session; int rv, err; #ifdef HAVE_OPENSSL_ASYNC openssl_resume_handler *handler; diff --git a/src/plugins/tlsopenssl/tls_openssl.h b/src/plugins/tlsopenssl/tls_openssl.h index 712b4cac628d..66e0b364cbac 100644 --- a/src/plugins/tlsopenssl/tls_openssl.h +++ b/src/plugins/tlsopenssl/tls_openssl.h @@ -57,8 +57,7 @@ typedef struct openssl_tls_callback_ void *arg; } openssl_tls_callback_t; -typedef int openssl_resume_handler (tls_ctx_t * ctx, - stream_session_t * tls_session); +typedef int openssl_resume_handler (tls_ctx_t * ctx, session_t * tls_session); tls_ctx_t *openssl_ctx_get_w_thread (u32 ctx_index, u8 thread_index); openssl_tls_callback_t *vpp_add_async_pending_event (tls_ctx_t * ctx, diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c index cc273321d321..fb1c54fcb1ca 100644 --- a/src/plugins/unittest/session_test.c +++ b/src/plugins/unittest/session_test.c @@ -41,7 +41,7 @@ } void -dummy_session_reset_callback (stream_session_t * s) +dummy_session_reset_callback (session_t * s) { clib_warning ("called..."); } @@ -50,7 +50,7 @@ volatile u32 connected_session_index = ~0; volatile u32 connected_session_thread = ~0; int dummy_session_connected_callback (u32 app_index, u32 api_context, - stream_session_t * s, u8 is_fail) + session_t * s, u8 is_fail) { if (s) { @@ -77,7 +77,7 @@ dummy_del_segment_callback (u32 client_index, u64 segment_handle) } void -dummy_session_disconnect_callback (stream_session_t * s) +dummy_session_disconnect_callback (session_t * s) { clib_warning ("called..."); } @@ -87,7 +87,7 @@ volatile u32 accepted_session_index; volatile u32 accepted_session_thread; int -dummy_session_accept_callback (stream_session_t * s) +dummy_session_accept_callback (session_t * s) { dummy_accept = 1; accepted_session_index = s->session_index; @@ -97,7 +97,7 @@ dummy_session_accept_callback (stream_session_t * s) } int -dummy_server_rx_callback (stream_session_t * s) +dummy_server_rx_callback (session_t * s) { clib_warning ("called..."); return -1; @@ -161,8 +161,8 @@ session_test_basic (vlib_main_t * vm, unformat_input_t * input) { session_endpoint_t server_sep = SESSION_ENDPOINT_NULL; u64 options[APP_OPTIONS_N_OPTIONS], bind4_handle, bind6_handle; - clib_error_t *error = 0; u32 server_index; + int error = 0; clib_memset (options, 0, sizeof (options)); options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; @@ -182,36 +182,36 @@ session_test_basic (vlib_main_t * vm, unformat_input_t * input) vec_free (attach_args.name); server_sep.is_ip4 = 1; - vnet_bind_args_t bind_args = { + vnet_listen_args_t bind_args = { .sep = server_sep, .app_index = 0, }; bind_args.app_index = server_index; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); SESSION_TEST ((error == 0), "server bind4 should work"); bind4_handle = bind_args.handle; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); SESSION_TEST ((error != 0), "double server bind4 should not work"); bind_args.sep.is_ip4 = 0; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); SESSION_TEST ((error == 0), "server bind6 should work"); bind6_handle = bind_args.handle; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); SESSION_TEST ((error != 0), "double server bind6 should not work"); - vnet_unbind_args_t unbind_args = { + vnet_unlisten_args_t unbind_args = { .handle = bind4_handle, .app_index = server_index, }; - error = vnet_unbind (&unbind_args); + error = vnet_unlisten (&unbind_args); SESSION_TEST ((error == 0), "unbind4 should work"); unbind_args.handle = bind6_handle; - error = vnet_unbind (&unbind_args); + error = vnet_unlisten (&unbind_args); SESSION_TEST ((error == 0), "unbind6 should work"); vnet_app_detach_args_t detach_args = { @@ -276,9 +276,9 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) session_endpoint_t server_sep = SESSION_ENDPOINT_NULL; ip4_address_t intf_addr[3]; transport_connection_t *tc; - stream_session_t *s; - clib_error_t *error; + session_t *s; u8 *appns_id; + int error; /* * Create the loopbacks @@ -306,8 +306,7 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) .is_add = 1 }; error = vnet_app_namespace_add_del (&ns_args); - SESSION_TEST ((error == 0), "app ns insertion should succeed: %d", - clib_error_get_code (error)); + SESSION_TEST ((error == 0), "app ns insertion should succeed: %d", error); /* * Attach client/server @@ -340,11 +339,11 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) server_sep.is_ip4 = 1; server_sep.port = dummy_server_port; - vnet_bind_args_t bind_args = { + vnet_listen_args_t bind_args = { .sep = server_sep, .app_index = server_index, }; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); SESSION_TEST ((error == 0), "server bind should work"); /* @@ -424,13 +423,12 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) session_endpoint_t server_sep = SESSION_ENDPOINT_NULL; session_endpoint_t client_sep = SESSION_ENDPOINT_NULL; session_endpoint_t intf_sep = SESSION_ENDPOINT_NULL; - clib_error_t *error = 0; u8 *ns_id = format (0, "appns1"); app_namespace_t *app_ns; application_t *server; - stream_session_t *s; + session_t *s; u64 handle; - int code; + int error = 0; server_sep.is_ip4 = 1; server_sep.port = dummy_port; @@ -447,7 +445,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) .name = format (0, "session_test"), }; - vnet_bind_args_t bind_args = { + vnet_listen_args_t bind_args = { .sep = server_sep, .app_index = 0, }; @@ -458,7 +456,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) }; clib_memcpy (&connect_args.sep, &client_sep, sizeof (client_sep)); - vnet_unbind_args_t unbind_args = { + vnet_unlisten_args_t unbind_args = { .handle = bind_args.handle, .app_index = 0, }; @@ -487,8 +485,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) .is_add = 1 }; error = vnet_app_namespace_add_del (&ns_args); - SESSION_TEST ((error == 0), "app ns insertion should succeed: %d", - clib_error_get_code (error)); + SESSION_TEST ((error == 0), "app ns insertion should succeed: %d", error); app_ns = app_namespace_get_from_id (ns_id); SESSION_TEST ((app_ns != 0), "should find ns %v status", ns_id); @@ -509,9 +506,8 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) error = vnet_application_attach (&attach_args); SESSION_TEST ((error != 0), "app attachment should fail"); - code = clib_error_get_code (error); - SESSION_TEST ((code == VNET_API_ERROR_APP_WRONG_NS_SECRET), - "code should be wrong ns secret: %d", code); + SESSION_TEST ((error == VNET_API_ERROR_APP_WRONG_NS_SECRET), + "code should be wrong ns secret: %d", error); /* * Attach server with global default scope @@ -530,7 +526,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) "server should be in the default ns"); bind_args.app_index = server_index; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); SESSION_TEST ((error == 0), "server bind should work"); server_st_index = application_session_table (server, FIB_PROTOCOL_IP4); @@ -544,7 +540,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) unbind_args.app_index = server_index; unbind_args.handle = bind_args.handle; - error = vnet_unbind (&unbind_args); + error = vnet_unlisten (&unbind_args); SESSION_TEST ((error == 0), "unbind should work"); s = session_lookup_listener (server_st_index, &server_sep); @@ -570,7 +566,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) "server should be in the right ns"); bind_args.app_index = server_index; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); SESSION_TEST ((error == 0), "bind should work"); server_st_index = application_session_table (server, FIB_PROTOCOL_IP4); s = session_lookup_listener (server_st_index, &server_sep); @@ -594,15 +590,13 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) connect_args.app_index = client_index; error = vnet_connect (&connect_args); SESSION_TEST ((error != 0), "client connect should return error code"); - code = clib_error_get_code (error); - SESSION_TEST ((code == VNET_API_ERROR_INVALID_VALUE), + SESSION_TEST ((error == VNET_API_ERROR_INVALID_VALUE), "error code should be invalid value (zero ip)"); SESSION_TEST ((dummy_segment_count == 0), "shouldn't have received request to map new segment"); connect_args.sep.ip.ip4.as_u8[0] = 127; error = vnet_connect (&connect_args); SESSION_TEST ((error == 0), "client connect should not return error code"); - code = clib_error_get_code (error); SESSION_TEST ((dummy_segment_count == 1), "should've received request to map new segment"); SESSION_TEST ((dummy_accept == 1), "should've received accept request"); @@ -616,8 +610,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) SESSION_TEST ((error == 0), "client attachment should work"); error = vnet_connect (&connect_args); SESSION_TEST ((error != 0), "client connect should return error code"); - code = clib_error_get_code (error); - SESSION_TEST ((code == VNET_API_ERROR_SESSION_CONNECT), + SESSION_TEST ((error == VNET_API_ERROR_SESSION_CONNECT), "error code should be connect (nothing in local scope)"); detach_args.app_index = client_index; vnet_application_detach (&detach_args); @@ -627,7 +620,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) */ unbind_args.handle = bind_args.handle; unbind_args.app_index = server_index; - error = vnet_unbind (&unbind_args); + error = vnet_unlisten (&unbind_args); SESSION_TEST ((error == 0), "unbind should work"); s = session_lookup_listener (server_st_index, &server_sep); @@ -650,7 +643,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) "app should be in the right ns"); bind_args.app_index = server_index; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); SESSION_TEST ((error == 0), "bind should work"); server_st_index = application_session_table (server, FIB_PROTOCOL_IP4); @@ -662,7 +655,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) "listener should exist in local table"); unbind_args.handle = bind_args.handle; - error = vnet_unbind (&unbind_args); + error = vnet_unlisten (&unbind_args); SESSION_TEST ((error == 0), "unbind should work"); handle = session_lookup_local_endpoint (server_local_st_index, &server_sep); @@ -679,8 +672,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) vnet_application_attach (&attach_args); error = vnet_connect (&connect_args); SESSION_TEST ((error != 0), "client connect should return error code"); - code = clib_error_get_code (error); - SESSION_TEST ((code == VNET_API_ERROR_SESSION_CONNECT), + SESSION_TEST ((error == VNET_API_ERROR_SESSION_CONNECT), "error code should be connect (not in same ns)"); detach_args.app_index = client_index; vnet_application_detach (&detach_args); @@ -697,12 +689,11 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) session_create_lookpback (0, &sw_if_index, &intf_addr); /* - * Update namespace + * Update namespace with interface */ ns_args.sw_if_index = sw_if_index; error = vnet_app_namespace_add_del (&ns_args); - SESSION_TEST ((error == 0), "app ns insertion should succeed: %d", - clib_error_get_code (error)); + SESSION_TEST ((error == 0), "app ns insertion should succeed: %d", error); /* * Attach server with local and global scope @@ -719,7 +710,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input) server_wrk_index = application_get_default_worker (server)->wrk_index; bind_args.app_index = server_index; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); server_st_index = application_session_table (server, FIB_PROTOCOL_IP4); s = session_lookup_listener (server_st_index, &server_sep); SESSION_TEST ((s == 0), "zero listener should not exist in global table"); @@ -751,8 +742,7 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input) u16 lcl_port = 1234, rmt_port = 4321; u32 action_index = 1, res; ip4_address_t lcl_lkup, rmt_lkup; - clib_error_t *error; - int verbose = 0; + int verbose = 0, error; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -1023,13 +1013,14 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input) u32 dummy_server_api_index = ~0; transport_connection_t *tc; u32 dummy_port = 1111; - clib_error_t *error = 0; u8 is_filtered = 0, *ns_id = format (0, "appns1"); - stream_session_t *listener, *s; + session_t *listener, *s; app_namespace_t *default_ns = app_namespace_get_default (); u32 local_ns_index = default_ns->local_table_index; - int verbose = 0, rv; + int verbose = 0; app_namespace_t *app_ns; + app_listener_t *al; + int error = 0; u64 handle; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -1056,7 +1047,7 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input) .name = format (0, "session_test"), }; - vnet_bind_args_t bind_args = { + vnet_listen_args_t bind_args = { .sep = server_sep, .app_index = 0, }; @@ -1074,10 +1065,11 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input) server_index = attach_args.app_index; bind_args.app_index = server_index; - error = vnet_bind (&bind_args); + error = vnet_listen (&bind_args); SESSION_TEST ((error == 0), "server bound to %U/%d", format_ip46_address, &server_sep.ip, 1, server_sep.port); - listener = listen_session_get_from_handle (bind_args.handle); + al = app_listener_get_w_handle (bind_args.handle); + listener = app_listener_get_session (al); ip4_address_t lcl_ip = { .as_u32 = clib_host_to_net_u32 (0x01020304), }; @@ -1257,8 +1249,7 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input) /* Try connecting */ error = vnet_connect (&connect_args); SESSION_TEST ((error != 0), "connect should fail"); - rv = clib_error_get_code (error); - SESSION_TEST ((rv == VNET_API_ERROR_APP_CONNECT_FILTERED), + SESSION_TEST ((error == VNET_API_ERROR_APP_CONNECT_FILTERED), "connect should be filtered"); sep.ip.ip4.as_u32 -= 1 << 24; @@ -1489,8 +1480,7 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input) .is_add = 1 }; error = vnet_app_namespace_add_del (&ns_args); - SESSION_TEST ((error == 0), "app ns insertion should succeed: %d", - clib_error_get_code (error)); + SESSION_TEST ((error == 0), "app ns insertion should succeed: %d", error); app_ns = app_namespace_get_from_id (ns_id); attach_args.namespace_id = ns_id; @@ -1533,8 +1523,7 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input) error = vnet_connect (&connect_args); SESSION_TEST ((error != 0), "connect should fail"); - rv = clib_error_get_code (error); - SESSION_TEST ((rv == VNET_API_ERROR_APP_CONNECT_FILTERED), + SESSION_TEST ((error == VNET_API_ERROR_APP_CONNECT_FILTERED), "connect should be filtered"); /* @@ -1547,8 +1536,7 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input) connect_args.app_index = server_index; error = vnet_connect (&connect_args); SESSION_TEST ((error != 0), "connect should fail"); - rv = clib_error_get_code (error); - SESSION_TEST ((rv == VNET_API_ERROR_APP_CONNECT_FILTERED), + SESSION_TEST ((error == VNET_API_ERROR_APP_CONNECT_FILTERED), "connect should be filtered"); args.table_args.is_add = 0; @@ -1587,13 +1575,12 @@ session_test_proxy (vlib_main_t * vm, unformat_input_t * input) unformat_input_t tmp_input; u32 server_index, app_index; u32 dummy_server_api_index = ~0, sw_if_index = 0; - clib_error_t *error = 0; u8 is_filtered = 0; - stream_session_t *s; + session_t *s; transport_connection_t *tc; u16 lcl_port = 1234, rmt_port = 4321; app_namespace_t *app_ns; - int verbose = 0; + int verbose = 0, error = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { diff --git a/src/plugins/unittest/string_test.c b/src/plugins/unittest/string_test.c index 41b4c61dff43..cbceb8fb3b26 100644 --- a/src/plugins/unittest/string_test.c +++ b/src/plugins/unittest/string_test.c @@ -750,6 +750,8 @@ test_clib_strncpy (vlib_main_t * vm, unformat_input_t * input) return -1; /* Verify it against strncpy */ +#if __GNUC__ < 8 + /* GCC 8 debian flunks this one at compile time */ strncpy (dst, src, strlen (src)); /* This better not fail but check anyhow */ @@ -758,6 +760,7 @@ test_clib_strncpy (vlib_main_t * vm, unformat_input_t * input) return -1; if (indicator != 0) return -1; +#endif /* limited copy -- strlen src > n, copy up to n */ err = clib_strncpy (dst, "The price of greatness is responsibility.", 10); @@ -791,12 +794,15 @@ test_clib_strncpy (vlib_main_t * vm, unformat_input_t * input) if (indicator != 0) return -1; /* Verify it against strncpy */ +#if __GNUC__ < 8 + /* GCC 8 debian flunks this one at compile time */ strncpy (dst, src, strlen (src)); if (strcmp_s (dst, clib_strnlen (dst, sizeof (dst)), src, &indicator) != EOK) return -1; if (indicator != 0) return -1; +#endif /* zero length copy */ clib_strncpy (old_dst, dst, clib_strnlen (dst, sizeof (dst))); @@ -1046,6 +1052,8 @@ test_strncat_s (vlib_main_t * vm, unformat_input_t * input) if (indicator != 0) return -1; /* verify it against strncat */ +#if __GNUC__ < 8 + /* GCC 8 debian flunks this one at compile time */ strcpy_s (dst, sizeof (dst), s1); strncat (dst, s2, 13); if (strcmp_s (dst, s1size - 1, "Two things are infinite: the universe ", @@ -1053,6 +1061,7 @@ test_strncat_s (vlib_main_t * vm, unformat_input_t * input) return -1; if (indicator != 0) return -1; +#endif /* negative stuff */ err = strncat_s (0, 0, 0, 1); @@ -1169,6 +1178,8 @@ test_clib_strncat (vlib_main_t * vm, unformat_input_t * input) if (indicator != 0) return -1; /* verify it against strncat */ +#if __GNUC__ < 8 + /* GCC 8 debian flunks this one at compile time */ strcpy_s (dst, sizeof (dst), s1); strncat (dst, s2, 13); if (strcmp_s (dst, s1size - 1, "Two things are infinite: the universe ", @@ -1176,6 +1187,7 @@ test_clib_strncat (vlib_main_t * vm, unformat_input_t * input) return -1; if (indicator != 0) return -1; +#endif /* negative stuff */ err = clib_strncat (0, 0, 1); diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c index 66260df2ff60..c259f9d9a03c 100644 --- a/src/plugins/unittest/tcp_test.c +++ b/src/plugins/unittest/tcp_test.c @@ -1639,7 +1639,7 @@ tcp_test_lookup (vlib_main_t * vm, unformat_input_t * input) tcp_main_t *tm = &tcp_main; transport_connection_t _tc1, *tc1 = &_tc1, _tc2, *tc2 = &_tc2, *tconn; tcp_connection_t *tc; - stream_session_t *s, *s1; + session_t *s, *s1; u8 cmp = 0, is_filtered = 0; u32 sidx; @@ -1785,7 +1785,7 @@ tcp_test_session (vlib_main_t * vm, unformat_input_t * input) TCP_EVT_DBG (TCP_EVT_OPEN, tc0); - if (stream_session_accept (&tc0->connection, 0 /* listener index */ , + if (session_stream_accept (&tc0->connection, 0 /* listener index */ , 0 /* notify */ )) clib_warning ("stream_session_accept failed"); diff --git a/src/plugins/vmxnet3/README.md b/src/plugins/vmxnet3/README.md index ef715a096965..65a0bc89f4a4 100644 --- a/src/plugins/vmxnet3/README.md +++ b/src/plugins/vmxnet3/README.md @@ -16,7 +16,7 @@ vfio driver can still be used with recent kernels which support no-iommu mode. ##Known issues * TSO/LRO -* RSS/multiple queues +* RSS * VLAN filter ## Usage diff --git a/src/plugins/vmxnet3/cli.c b/src/plugins/vmxnet3/cli.c index 566b0d680793..e110a4799882 100644 --- a/src/plugins/vmxnet3/cli.c +++ b/src/plugins/vmxnet3/cli.c @@ -32,7 +32,6 @@ vmxnet3_create_command_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; vmxnet3_create_if_args_t args; - u32 tmp; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -45,10 +44,12 @@ vmxnet3_create_command_fn (vlib_main_t * vm, unformat_input_t * input, ; else if (unformat (line_input, "elog")) args.enable_elog = 1; - else if (unformat (line_input, "rx-queue-size %u", &tmp)) - args.rxq_size = tmp; - else if (unformat (line_input, "tx-queue-size %u", &tmp)) - args.txq_size = tmp; + else if (unformat (line_input, "rx-queue-size %u", &args.rxq_size)) + ; + else if (unformat (line_input, "tx-queue-size %u", &args.txq_size)) + ; + else if (unformat (line_input, "num-tx-queues %u", &args.txq_num)) + ; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); @@ -65,7 +66,8 @@ vmxnet3_create_command_fn (vlib_main_t * vm, unformat_input_t * input, VLIB_CLI_COMMAND (vmxnet3_create_command, static) = { .path = "create interface vmxnet3", .short_help = "create interface vmxnet3 " - "[rx-queue-size ] [tx-queue-size ]", + "[rx-queue-size ] [tx-queue-size ]" + "[num-tx-queues ]", .function = vmxnet3_create_command_fn, }; /* *INDENT-ON* */ @@ -319,9 +321,9 @@ show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, } } - vec_foreach_index (qid, vd->rxqs) + vec_foreach_index (qid, vd->txqs) { - txq = vec_elt_at_index (vd->txqs, 0); + txq = vec_elt_at_index (vd->txqs, qid); vlib_cli_output (vm, " Queue %u (TX)", qid); vlib_cli_output (vm, " TX completion next index %u", txq->tx_comp_ring.next); diff --git a/src/plugins/vmxnet3/format.c b/src/plugins/vmxnet3/format.c index 981cda73ff69..8e39b5faf364 100644 --- a/src/plugins/vmxnet3/format.c +++ b/src/plugins/vmxnet3/format.c @@ -58,9 +58,11 @@ format_vmxnet3_device (u8 * s, va_list * args) vmxnet3_main_t *vmxm = &vmxnet3_main; vmxnet3_device_t *vd = vec_elt_at_index (vmxm->devices, i); u32 indent = format_get_indent (s); - vmxnet3_queues *q = &vd->dma->queues; vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, 0); vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, 0); + vmxnet3_tx_queue *tx = VMXNET3_TX_START (vd); + vmxnet3_rx_queue *rx = VMXNET3_RX_START (vd); + u16 qid; s = format (s, "flags: %U", format_vmxnet3_device_flags, vd); s = format (s, "\n%Urx queues %u, rx desc %u, tx queues %u, tx desc %u", @@ -72,69 +74,81 @@ format_vmxnet3_device (u8 * s, va_list * args) vmxnet3_reg_write (vd, 1, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS); - s = format (s, "\n%UTX:", format_white_space, indent); - s = format (s, "\n%U TSO packets %llu", - format_white_space, indent, - q->tx.stats.tso_pkts - vd->tx_stats.tso_pkts); - s = format (s, "\n%U TSO bytes %llu", - format_white_space, indent, - q->tx.stats.tso_bytes - vd->tx_stats.tso_bytes); - s = format (s, "\n%U ucast packets %llu", - format_white_space, indent, - q->tx.stats.ucast_pkts - vd->tx_stats.ucast_pkts); - s = format (s, "\n%U ucast bytes %llu", - format_white_space, indent, - q->tx.stats.ucast_bytes - vd->tx_stats.ucast_bytes); - s = format (s, "\n%U mcast packets %llu", - format_white_space, indent, - q->tx.stats.mcast_pkts - vd->tx_stats.mcast_pkts); - s = format (s, "\n%U mcast bytes %llu", - format_white_space, indent, - q->tx.stats.mcast_bytes - vd->tx_stats.mcast_bytes); - s = format (s, "\n%U bcast packets %llu", - format_white_space, indent, - q->tx.stats.bcast_pkts - vd->tx_stats.bcast_pkts); - s = format (s, "\n%U bcast bytes %llu", - format_white_space, indent, - q->tx.stats.bcast_bytes - vd->tx_stats.bcast_bytes); - s = format (s, "\n%U Errors packets %llu", - format_white_space, indent, - q->tx.stats.error_pkts - vd->tx_stats.error_pkts); - s = format (s, "\n%U Discard packets %llu", - format_white_space, indent, - q->tx.stats.discard_pkts - vd->tx_stats.discard_pkts); + vec_foreach_index (qid, vd->txqs) + { + vmxnet3_tx_stats *txs = vec_elt_at_index (vd->tx_stats, qid); - s = format (s, "\n%URX:", format_white_space, indent); - s = format (s, "\n%U LRO packets %llu", - format_white_space, indent, - q->rx.stats.lro_pkts - vd->rx_stats.lro_pkts); - s = format (s, "\n%U LRO bytes %llu", - format_white_space, indent, - q->rx.stats.lro_bytes - vd->rx_stats.lro_bytes); - s = format (s, "\n%U ucast packets %llu", - format_white_space, indent, - q->rx.stats.ucast_pkts - vd->rx_stats.ucast_pkts); - s = format (s, "\n%U ucast bytes %llu", - format_white_space, indent, - q->rx.stats.ucast_bytes - vd->rx_stats.ucast_bytes); - s = format (s, "\n%U mcast packets %llu", - format_white_space, indent, - q->rx.stats.mcast_pkts - vd->rx_stats.mcast_pkts); - s = format (s, "\n%U mcast bytes %llu", - format_white_space, indent, - q->rx.stats.mcast_bytes - vd->rx_stats.mcast_bytes); - s = format (s, "\n%U bcast packets %llu", - format_white_space, indent, - q->rx.stats.bcast_pkts - vd->rx_stats.bcast_pkts); - s = format (s, "\n%U bcast bytes %llu", - format_white_space, indent, - q->rx.stats.bcast_bytes - vd->rx_stats.bcast_bytes); - s = format (s, "\n%U No Bufs %llu", - format_white_space, indent, - q->rx.stats.nobuf_pkts - vd->rx_stats.nobuf_pkts); - s = format (s, "\n%U Error packets %llu", - format_white_space, indent, - q->rx.stats.error_pkts - vd->rx_stats.error_pkts); + s = format (s, "\n%UTX Queue %u:", format_white_space, indent, qid); + s = format (s, "\n%U TSO packets %llu", + format_white_space, indent, + tx->stats.tso_pkts - txs->tso_pkts); + s = format (s, "\n%U TSO bytes %llu", + format_white_space, indent, + tx->stats.tso_bytes - txs->tso_bytes); + s = format (s, "\n%U ucast packets %llu", + format_white_space, indent, + tx->stats.ucast_pkts - txs->ucast_pkts); + s = format (s, "\n%U ucast bytes %llu", + format_white_space, indent, + tx->stats.ucast_bytes - txs->ucast_bytes); + s = format (s, "\n%U mcast packets %llu", + format_white_space, indent, + tx->stats.mcast_pkts - txs->mcast_pkts); + s = format (s, "\n%U mcast bytes %llu", + format_white_space, indent, + tx->stats.mcast_bytes - txs->mcast_bytes); + s = format (s, "\n%U bcast packets %llu", + format_white_space, indent, + tx->stats.bcast_pkts - txs->bcast_pkts); + s = format (s, "\n%U bcast bytes %llu", + format_white_space, indent, + tx->stats.bcast_bytes - txs->bcast_bytes); + s = format (s, "\n%U Errors packets %llu", + format_white_space, indent, + tx->stats.error_pkts - txs->error_pkts); + s = format (s, "\n%U Discard packets %llu", + format_white_space, indent, + tx->stats.discard_pkts - txs->discard_pkts); + tx++; + } + + vec_foreach_index (qid, vd->rxqs) + { + vmxnet3_rx_stats *rxs = vec_elt_at_index (vd->rx_stats, qid); + + s = format (s, "\n%URX Queue %u:", format_white_space, indent, qid); + s = format (s, "\n%U LRO packets %llu", + format_white_space, indent, + rx->stats.lro_pkts - rxs->lro_pkts); + s = format (s, "\n%U LRO bytes %llu", + format_white_space, indent, + rx->stats.lro_bytes - rxs->lro_bytes); + s = format (s, "\n%U ucast packets %llu", + format_white_space, indent, + rx->stats.ucast_pkts - rxs->ucast_pkts); + s = format (s, "\n%U ucast bytes %llu", + format_white_space, indent, + rx->stats.ucast_bytes - rxs->ucast_bytes); + s = format (s, "\n%U mcast packets %llu", + format_white_space, indent, + rx->stats.mcast_pkts - rxs->mcast_pkts); + s = format (s, "\n%U mcast bytes %llu", + format_white_space, indent, + rx->stats.mcast_bytes - rxs->mcast_bytes); + s = format (s, "\n%U bcast packets %llu", + format_white_space, indent, + rx->stats.bcast_pkts - rxs->bcast_pkts); + s = format (s, "\n%U bcast bytes %llu", + format_white_space, indent, + rx->stats.bcast_bytes - rxs->bcast_bytes); + s = format (s, "\n%U No Bufs %llu", + format_white_space, indent, + rx->stats.nobuf_pkts - rxs->nobuf_pkts); + s = format (s, "\n%U Error packets %llu", + format_white_space, indent, + rx->stats.error_pkts - rxs->error_pkts); + rx++; + } return s; } diff --git a/src/plugins/vmxnet3/output.c b/src/plugins/vmxnet3/output.c index 2a6418dfa444..5c48549e60d0 100644 --- a/src/plugins/vmxnet3/output.c +++ b/src/plugins/vmxnet3/output.c @@ -108,8 +108,7 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, u16 space_left; u16 n_left = frame->n_vectors; vmxnet3_txq_t *txq; - u32 thread_index = vm->thread_index; - u16 qid = thread_index, produce; + u16 qid = vm->thread_index % vd->num_tx_queues, produce; if (PREDICT_FALSE (!(vd->flags & VMXNET3_DEVICE_F_LINK_UP))) { @@ -119,7 +118,7 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, return (0); } - txq = vec_elt_at_index (vd->txqs, qid % vd->num_tx_queues); + txq = vec_elt_at_index (vd->txqs, qid); clib_spinlock_lock_if_init (&txq->lock); vmxnet3_txq_release (vm, vd, txq); @@ -202,8 +201,7 @@ VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm, } if (PREDICT_TRUE (produce != txq->tx_ring.produce)) - vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_TXPROD, - txq->tx_ring.produce); + vmxnet3_reg_write_inline (vd, 0, txq->reg_txprod, txq->tx_ring.produce); clib_spinlock_unlock_if_init (&txq->lock); diff --git a/src/plugins/vmxnet3/vmxnet3.api b/src/plugins/vmxnet3/vmxnet3.api index 68beac030c13..8666820db69f 100644 --- a/src/plugins/vmxnet3/vmxnet3.api +++ b/src/plugins/vmxnet3/vmxnet3.api @@ -26,6 +26,7 @@ option version = "1.0.0"; @param enable_elog - turn on elog (optional - default is off) @param rxq_size - receive queue size (optional - default is 1024) @param txq_size - transmit queue size (optional - default is 1024) + @param txq_num - number of transmit queues (optional - default is 1) */ define vmxnet3_create @@ -37,6 +38,7 @@ define vmxnet3_create i32 enable_elog; u16 rxq_size; u16 txq_size; + u16 txq_num; }; /** \brief @@ -66,21 +68,36 @@ autoreply define vmxnet3_delete u32 sw_if_index; }; +/** \brief vmxnet3_tx_list structure + @param tx_qsize - tx queue size + @param tx_next - tx next index + @param tx_produce - tx produce index + @param tx_consume - tx consume index +*/ + +typeonly define vmxnet3_tx_list +{ + u16 tx_qsize; + u16 tx_next; + u16 tx_produce; + u16 tx_consume; +}; + /** \brief Memory interface details structure @param context - sender context, to match reply w/ request (memif_dump) @param sw_if_index - index of the interface @param if_name - name of the interface @param hw_addr - interface MAC address - @param id - id associated with the interface - @param role - role of the interface in the connection (master/slave) - @param mode - interface mode - @param socket_id - id of the socket filename used by this interface - to establish new connections - @param ring_size - the number of entries of RX/TX rings - @param buffer_size - size of the buffer allocated for each ring entry + @param pci_addr - pci address of the interface + @param version - vmxnet3 hardware version @param admin_up_down - interface administrative status - @param link_up_down - interface link status - + @param rx_qsize - rx queue size + @param rx_fill - rx fill count + @param rx_next - rx next index + @param rx_produce - rx produce index + @param rx_consume - rx consume index + @param tx_count - number of of elements in tx_list + @param tx_list - list of vmnxnet3_tx_list */ define vmxnet3_details { @@ -91,21 +108,16 @@ define vmxnet3_details u8 hw_addr[6]; u32 pci_addr; u8 version; + u8 admin_up_down; - u16 rx_qid; u16 rx_qsize; u16 rx_fill[2]; u16 rx_next; u16 rx_produce[2]; u16 rx_consume[2]; - u16 tx_qid; - u16 tx_qsize; - u16 tx_next; - u16 tx_produce; - u16 tx_consume; - - u8 admin_up_down; + u8 tx_count; + vl_api_vmxnet3_tx_list_t tx_list[8]; }; /** \brief Dump all vmxnet3 interfaces diff --git a/src/plugins/vmxnet3/vmxnet3.c b/src/plugins/vmxnet3/vmxnet3.c index f7ae58c0fe47..a3aae99f2117 100644 --- a/src/plugins/vmxnet3/vmxnet3.c +++ b/src/plugins/vmxnet3/vmxnet3.c @@ -102,7 +102,9 @@ vmxnet3_clear_hw_interface_counters (u32 instance) { vmxnet3_main_t *vmxm = &vmxnet3_main; vmxnet3_device_t *vd = pool_elt_at_index (vmxm->devices, instance); - vmxnet3_queues *q = &vd->dma->queues; + vmxnet3_tx_queue *tx = VMXNET3_TX_START (vd); + vmxnet3_rx_queue *rx = VMXNET3_RX_START (vd); + u16 qid; /* * Set the "last_cleared_stats" to the current stats, so that @@ -110,8 +112,18 @@ vmxnet3_clear_hw_interface_counters (u32 instance) */ vmxnet3_reg_write (vd, 1, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS); - clib_memcpy (&vd->tx_stats, &q->tx.stats, sizeof (vd->tx_stats)); - clib_memcpy (&vd->rx_stats, &q->rx.stats, sizeof (vd->rx_stats)); + vec_foreach_index (qid, vd->txqs) + { + vmxnet3_tx_stats *txs = vec_elt_at_index (vd->tx_stats, qid); + clib_memcpy (txs, &tx->stats, sizeof (*txs)); + tx++; + } + vec_foreach_index (qid, vd->rxqs) + { + vmxnet3_rx_stats *rxs = vec_elt_at_index (vd->rx_stats, qid); + clib_memcpy (rxs, &rx->stats, sizeof (*rxs)); + rx++; + } } static char *vmxnet3_tx_func_error_strings[] = { @@ -158,34 +170,46 @@ static clib_error_t * vmxnet3_provision_driver_shared (vlib_main_t * vm, vmxnet3_device_t * vd) { vmxnet3_shared *shared; - vmxnet3_queues *q; u64 shared_dma; - u16 qid = 0, rid; - vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid); - vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, qid); + u16 qid, rid; + vmxnet3_tx_queue *tx = VMXNET3_TX_START (vd); + vmxnet3_rx_queue *rx = VMXNET3_RX_START (vd); - vd->dma = vlib_physmem_alloc_aligned_on_numa (vm, sizeof (*vd->dma), 512, - vd->numa_node); - if (vd->dma == 0) + vd->driver_shared = + vlib_physmem_alloc_aligned_on_numa (vm, sizeof (*vd->driver_shared), 512, + vd->numa_node); + if (vd->driver_shared == 0) return vlib_physmem_last_error (vm); - clib_memset (vd->dma, 0, sizeof (*vd->dma)); + clib_memset (vd->driver_shared, 0, sizeof (*vd->driver_shared)); - q = &vd->dma->queues; - q->tx.cfg.desc_address = vmxnet3_dma_addr (vm, vd, txq->tx_desc); - q->tx.cfg.comp_address = vmxnet3_dma_addr (vm, vd, txq->tx_comp); - q->tx.cfg.num_desc = txq->size; - q->tx.cfg.num_comp = txq->size; - for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++) - { - q->rx.cfg.desc_address[rid] = vmxnet3_dma_addr (vm, vd, + vec_foreach_index (qid, vd->txqs) + { + vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, qid); + + tx->cfg.desc_address = vmxnet3_dma_addr (vm, vd, txq->tx_desc); + tx->cfg.comp_address = vmxnet3_dma_addr (vm, vd, txq->tx_comp); + tx->cfg.num_desc = txq->size; + tx->cfg.num_comp = txq->size; + tx++; + } + + vec_foreach_index (qid, vd->rxqs) + { + vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid); + + for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++) + { + rx->cfg.desc_address[rid] = vmxnet3_dma_addr (vm, vd, rxq->rx_desc[rid]); - q->rx.cfg.num_desc[rid] = rxq->size; - } - q->rx.cfg.comp_address = vmxnet3_dma_addr (vm, vd, rxq->rx_comp); - q->rx.cfg.num_comp = rxq->size; + rx->cfg.num_desc[rid] = rxq->size; + } + rx->cfg.comp_address = vmxnet3_dma_addr (vm, vd, rxq->rx_comp); + rx->cfg.num_comp = rxq->size; + rx++; + } - shared = &vd->dma->shared; + shared = vd->driver_shared; shared->magic = VMXNET3_SHARED_MAGIC; shared->misc.version = VMXNET3_VERSION_MAGIC; if (sizeof (void *) == 4) @@ -195,8 +219,9 @@ vmxnet3_provision_driver_shared (vlib_main_t * vm, vmxnet3_device_t * vd) shared->misc.guest_info |= VMXNET3_GOS_TYPE_LINUX; shared->misc.version_support = VMXNET3_VERSION_SELECT; shared->misc.upt_version_support = VMXNET3_UPT_VERSION_SELECT; - shared->misc.queue_desc_address = vmxnet3_dma_addr (vm, vd, q); - shared->misc.queue_desc_len = sizeof (*q); + shared->misc.queue_desc_address = vmxnet3_dma_addr (vm, vd, vd->queues); + shared->misc.queue_desc_len = sizeof (*tx) * vd->num_tx_queues + + sizeof (*rx) * vd->num_rx_queues; shared->misc.mtu = VMXNET3_MTU; shared->misc.num_tx_queues = vd->num_tx_queues; shared->misc.num_rx_queues = vd->num_rx_queues; @@ -217,7 +242,7 @@ static inline void vmxnet3_enable_interrupt (vmxnet3_device_t * vd) { int i; - vmxnet3_shared *shared = &vd->dma->shared; + vmxnet3_shared *shared = vd->driver_shared; shared->interrupt.control &= ~VMXNET3_IC_DISABLE_ALL; for (i = 0; i < vd->num_intrs; i++) @@ -228,7 +253,7 @@ static inline void vmxnet3_disable_interrupt (vmxnet3_device_t * vd) { int i; - vmxnet3_shared *shared = &vd->dma->shared; + vmxnet3_shared *shared = vd->driver_shared; shared->interrupt.control |= VMXNET3_IC_DISABLE_ALL; for (i = 0; i < vd->num_intrs; i++) @@ -239,8 +264,13 @@ static clib_error_t * vmxnet3_rxq_init (vlib_main_t * vm, vmxnet3_device_t * vd, u16 qid, u16 qsz) { vmxnet3_rxq_t *rxq; + vmxnet3_rx_stats *rxs; u16 rid; + vec_validate (vd->rx_stats, qid); + rxs = vec_elt_at_index (vd->rx_stats, qid); + clib_memset (rxs, 0, sizeof (*rxs)); + vec_validate_aligned (vd->rxqs, qid, CLIB_CACHE_LINE_BYTES); rxq = vec_elt_at_index (vd->rxqs, qid); clib_memset (rxq, 0, sizeof (*rxq)); @@ -280,6 +310,8 @@ static clib_error_t * vmxnet3_txq_init (vlib_main_t * vm, vmxnet3_device_t * vd, u16 qid, u16 qsz) { vmxnet3_txq_t *txq; + vmxnet3_tx_stats *txs; + u32 size; if (qid >= vd->num_tx_queues) { @@ -291,24 +323,31 @@ vmxnet3_txq_init (vlib_main_t * vm, vmxnet3_device_t * vd, u16 qid, u16 qsz) return 0; } + vec_validate (vd->tx_stats, qid); + txs = vec_elt_at_index (vd->tx_stats, qid); + clib_memset (txs, 0, sizeof (*txs)); + vec_validate_aligned (vd->txqs, qid, CLIB_CACHE_LINE_BYTES); txq = vec_elt_at_index (vd->txqs, qid); clib_memset (txq, 0, sizeof (*txq)); txq->size = qsz; + txq->reg_txprod = qid * 8 + VMXNET3_REG_TXPROD; + + size = qsz * sizeof (*txq->tx_desc); txq->tx_desc = - vlib_physmem_alloc_aligned_on_numa (vm, qsz * sizeof (*txq->tx_desc), 512, - vd->numa_node); + vlib_physmem_alloc_aligned_on_numa (vm, size, 512, vd->numa_node); if (txq->tx_desc == 0) return vlib_physmem_last_error (vm); - memset (txq->tx_desc, 0, qsz * sizeof (*txq->tx_desc)); + memset (txq->tx_desc, 0, size); + + size = qsz * sizeof (*txq->tx_comp); txq->tx_comp = - vlib_physmem_alloc_aligned_on_numa (vm, qsz * sizeof (*txq->tx_comp), 512, - vd->numa_node); + vlib_physmem_alloc_aligned_on_numa (vm, size, 512, vd->numa_node); if (txq->tx_comp == 0) return vlib_physmem_last_error (vm); - clib_memset (txq->tx_comp, 0, qsz * sizeof (*txq->tx_comp)); + clib_memset (txq->tx_comp, 0, size); vec_validate_aligned (txq->tx_ring.bufs, txq->size, CLIB_CACHE_LINE_BYTES); txq->tx_ring.gen = VMXNET3_TXF_GEN; txq->tx_comp_ring.gen = VMXNET3_TXCF_GEN; @@ -321,13 +360,9 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, vmxnet3_create_if_args_t * args) { clib_error_t *error = 0; - u32 ret, i; + u32 ret, i, size; vlib_thread_main_t *tm = vlib_get_thread_main (); - vd->num_tx_queues = 1; - vd->num_rx_queues = 1; - vd->num_intrs = 2; - /* Quiesce the device */ vmxnet3_reg_write (vd, 1, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV); ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_CMD); @@ -388,6 +423,16 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_MACH); clib_memcpy (vd->mac_addr + 4, &ret, 2); + size = sizeof (vmxnet3_rx_queue) * vd->num_rx_queues + + sizeof (vmxnet3_tx_queue) * vd->num_tx_queues; + + vd->queues = + vlib_physmem_alloc_aligned_on_numa (vm, size, 512, vd->numa_node); + if (vd->queues == 0) + return vlib_physmem_last_error (vm); + + clib_memset (vd->queues, 0, size); + error = vmxnet3_rxq_init (vm, vd, 0, args->rxq_size); if (error) return error; @@ -482,6 +527,16 @@ vmxnet3_queue_size_valid (u16 qsz) return 1; } +static u8 +vmxnet3_queue_num_valid (u16 num) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + + if ((num > VMXNET3_TXQ_MAX) || (num > tm->n_vlib_mains)) + return 0; + return 1; +} + void vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) { @@ -491,6 +546,21 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) vlib_pci_dev_handle_t h; clib_error_t *error = 0; + if (args->txq_num == 0) + args->txq_num = 1; + if (!vmxnet3_queue_num_valid (args->txq_num)) + { + args->rv = VNET_API_ERROR_INVALID_VALUE; + args->error = + clib_error_return (error, + "number of queues must be <= %u and <= number of " + "CPU's assigned to VPP", VMXNET3_TXQ_MAX); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + "number of queues must be <= %u and <= number of " + "CPU's assigned to VPP", VMXNET3_TXQ_MAX); + return; + } if (args->rxq_size == 0) args->rxq_size = VMXNET3_NUM_RX_DESC; if (args->txq_size == 0) @@ -525,6 +595,7 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) /* *INDENT-ON* */ pool_get (vmxm->devices, vd); + vd->num_tx_queues = args->txq_num; vd->dev_instance = vd - vmxm->devices; vd->per_interface_next_index = ~0; vd->pci_addr = args->addr; @@ -552,6 +623,9 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) */ vd->pci_dev_handle = h; vd->numa_node = vlib_pci_get_numa_node (vm, h); + vd->num_rx_queues = 1; + vd->num_intrs = 2; + vlib_pci_set_private_data (vm, h, vd->dev_instance); if ((error = vlib_pci_bus_master_enable (vm, h))) @@ -687,6 +761,7 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd) } /* *INDENT-ON* */ vec_free (vd->rxqs); + vec_free (vd->rx_stats); /* *INDENT-OFF* */ vec_foreach_index (i, vd->txqs) @@ -711,8 +786,10 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd) } /* *INDENT-ON* */ vec_free (vd->txqs); + vec_free (vd->tx_stats); - vlib_physmem_free (vm, vd->dma); + vlib_physmem_free (vm, vd->driver_shared); + vlib_physmem_free (vm, vd->queues); clib_error_free (vd->error); clib_memset (vd, 0, sizeof (*vd)); diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h index 781a9519a0fc..ffde9f0c6f8d 100644 --- a/src/plugins/vmxnet3/vmxnet3.h +++ b/src/plugins/vmxnet3/vmxnet3.h @@ -57,6 +57,11 @@ enum #undef _ }; +#define VMXNET3_TXQ_MAX 8 +#define VMXNET3_TX_START(vd) ((vd)->queues) +#define VMXNET3_RX_START(vd) \ + ((vd)->queues + (vd)->num_tx_queues * sizeof (vmxnet3_tx_queue)) + /* BAR 0 */ #define VMXNET3_REG_IMR 0x0000 /* Interrupt Mask Register */ #define VMXNET3_REG_TXPROD 0x0600 /* Tx Producer Index */ @@ -297,11 +302,6 @@ typedef CLIB_PACKED (struct u8 pad[88]; }) vmxnet3_rx_queue; -typedef CLIB_PACKED (struct - { - vmxnet3_tx_queue tx; vmxnet3_rx_queue rx; - }) vmxnet3_queues; - /* * flags: * buffer length -- bits 0-13 @@ -445,6 +445,7 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u16 size; + u32 reg_txprod; clib_spinlock_t lock; vmxnet3_tx_desc *tx_desc; @@ -453,11 +454,6 @@ typedef struct vmxnet3_tx_comp_ring tx_comp_ring; } vmxnet3_txq_t; -typedef CLIB_PACKED (struct - { - vmxnet3_queues queues; vmxnet3_shared shared; - }) vmxnet3_dma; - typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -486,11 +482,12 @@ typedef struct /* error */ clib_error_t *error; - vmxnet3_dma *dma; + vmxnet3_shared *driver_shared; + void *queues; u32 link_speed; - vmxnet3_tx_stats tx_stats; - vmxnet3_rx_stats rx_stats; + vmxnet3_tx_stats *tx_stats; + vmxnet3_rx_stats *rx_stats; } vmxnet3_device_t; typedef struct @@ -508,6 +505,7 @@ typedef struct u32 enable_elog; u16 rxq_size; u16 txq_size; + u16 txq_num; /* return */ i32 rv; u32 sw_if_index; @@ -593,7 +591,7 @@ vmxnet3_rxq_refill_ring0 (vlib_main_t * vm, vmxnet3_device_t * vd, vmxnet3_rx_desc *rxd; u16 n_refill, n_alloc; vmxnet3_rx_ring *ring; - vmxnet3_queues *q; + vmxnet3_rx_queue *rx; ring = &rxq->rx_ring[0]; n_refill = rxq->size - ring->fill; @@ -617,15 +615,15 @@ vmxnet3_rxq_refill_ring0 (vlib_main_t * vm, vmxnet3_device_t * vd, vlib_buffer_t *b = vlib_get_buffer (vm, ring->bufs[ring->produce]); rxd = &rxq->rx_desc[0][ring->produce]; rxd->address = vlib_buffer_get_pa (vm, b); - rxd->flags = ring->gen | VLIB_BUFFER_DATA_SIZE; + rxd->flags = ring->gen | vlib_buffer_get_default_data_size (vm); vmxnet3_rx_ring_advance_produce (rxq, ring); ring->fill++; n_alloc--; } - q = &vd->dma->queues; - if (PREDICT_FALSE (q->rx.ctrl.update_prod)) + rx = VMXNET3_RX_START (vd); + if (PREDICT_FALSE (rx->ctrl.update_prod)) vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD, ring->produce); return 0; @@ -638,7 +636,7 @@ vmxnet3_rxq_refill_ring1 (vlib_main_t * vm, vmxnet3_device_t * vd, vmxnet3_rx_desc *rxd; u16 n_refill, n_alloc; vmxnet3_rx_ring *ring; - vmxnet3_queues *q; + vmxnet3_rx_queue *rx; ring = &rxq->rx_ring[1]; n_refill = rxq->size - ring->fill; @@ -662,15 +660,16 @@ vmxnet3_rxq_refill_ring1 (vlib_main_t * vm, vmxnet3_device_t * vd, vlib_buffer_t *b = vlib_get_buffer (vm, ring->bufs[ring->produce]); rxd = &rxq->rx_desc[1][ring->produce]; rxd->address = vlib_buffer_get_pa (vm, b); - rxd->flags = ring->gen | VLIB_BUFFER_DATA_SIZE | VMXNET3_RXF_BTYPE; + rxd->flags = ring->gen | vlib_buffer_get_default_data_size (vm) | + VMXNET3_RXF_BTYPE; vmxnet3_rx_ring_advance_produce (rxq, ring); ring->fill++; n_alloc--; } - q = &vd->dma->queues; - if (PREDICT_FALSE (q->rx.ctrl.update_prod)) + rx = VMXNET3_RX_START (vd); + if (PREDICT_FALSE (rx->ctrl.update_prod)) vmxnet3_reg_write_inline (vd, 0, VMXNET3_REG_RXPROD2, ring->produce); return 0; diff --git a/src/plugins/vmxnet3/vmxnet3_api.c b/src/plugins/vmxnet3/vmxnet3_api.c index b41866be117f..635657c2bb6a 100644 --- a/src/plugins/vmxnet3/vmxnet3_api.c +++ b/src/plugins/vmxnet3/vmxnet3_api.c @@ -71,6 +71,7 @@ vl_api_vmxnet3_create_t_handler (vl_api_vmxnet3_create_t * mp) args.addr.as_u32 = ntohl (mp->pci_addr); args.rxq_size = ntohs (mp->rxq_size); args.txq_size = ntohs (mp->txq_size); + args.txq_num = ntohs (mp->txq_num); vmxnet3_create_if (vm, &args); rv = args.rv; @@ -111,8 +112,7 @@ vl_api_vmxnet3_delete_t_handler (vl_api_vmxnet3_delete_t * mp) static void send_vmxnet3_details (vl_api_registration_t * reg, vmxnet3_device_t * vd, - u16 rx_qid, vmxnet3_rxq_t * rxq, u16 tx_qid, - vmxnet3_txq_t * txq, vnet_sw_interface_t * swif, + vmxnet3_rxq_t * rxq, vnet_sw_interface_t * swif, u8 * interface_name, u32 context) { vl_api_vmxnet3_details_t *mp; @@ -120,7 +120,7 @@ send_vmxnet3_details (vl_api_registration_t * reg, vmxnet3_device_t * vd, vmxnet3_main_t *vmxm = &vmxnet3_main; vnet_hw_interface_t *hwif; vmxnet3_rx_ring *ring; - u16 rid; + u16 rid, qid; hwif = vnet_get_sup_hw_interface (vnm, swif->sw_if_index); @@ -139,6 +139,7 @@ send_vmxnet3_details (vl_api_registration_t * reg, vmxnet3_device_t * vd, mp->version = vd->version; mp->pci_addr = ntohl (vd->pci_addr.as_u32); + mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0; mp->rx_qsize = htons (rxq->size); mp->rx_next = htons (rxq->rx_comp_ring.next); @@ -149,12 +150,19 @@ send_vmxnet3_details (vl_api_registration_t * reg, vmxnet3_device_t * vd, mp->rx_produce[rid] = htons (ring->produce); mp->rx_consume[rid] = htons (ring->consume); } - mp->tx_qsize = htons (txq->size); - mp->tx_next = htons (txq->tx_comp_ring.next); - mp->tx_produce = htons (txq->tx_ring.produce); - mp->tx_consume = htons (txq->tx_ring.consume); - mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0; + mp->tx_count = clib_min (vec_len (vd->txqs), VMXNET3_TXQ_MAX); + vec_foreach_index (qid, vd->txqs) + { + vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, qid); + vl_api_vmxnet3_tx_list_t *tx_list = &mp->tx_list[qid]; + + ASSERT (qid < VMXNET3_TXQ_MAX); + tx_list->tx_qsize = htons (txq->size); + tx_list->tx_next = htons (txq->tx_comp_ring.next); + tx_list->tx_produce = htons (txq->tx_ring.produce); + tx_list->tx_consume = htons (txq->tx_ring.consume); + } vl_api_send_msg (reg, (u8 *) mp); } @@ -173,7 +181,6 @@ vl_api_vmxnet3_dump_t_handler (vl_api_vmxnet3_dump_t * mp) u8 *if_name = 0; vl_api_registration_t *reg; vmxnet3_rxq_t *rxq; - vmxnet3_txq_t *txq; u16 qid = 0; reg = vl_api_client_index_to_registration (mp->client_index); @@ -187,9 +194,7 @@ vl_api_vmxnet3_dump_t_handler (vl_api_vmxnet3_dump_t * mp) if_name = format (if_name, "%U%c", format_vnet_sw_interface_name, vnm, swif, 0); rxq = vec_elt_at_index (vd->rxqs, qid); - txq = vec_elt_at_index (vd->txqs, qid); - send_vmxnet3_details (reg, vd, qid, rxq, qid, txq, swif, if_name, - mp->context); + send_vmxnet3_details (reg, vd, rxq, swif, if_name, mp->context); _vec_len (if_name) = 0; })); /* *INDENT-ON* */ diff --git a/src/plugins/vmxnet3/vmxnet3_test.c b/src/plugins/vmxnet3/vmxnet3_test.c index 53097f02d635..848b16987970 100644 --- a/src/plugins/vmxnet3/vmxnet3_test.c +++ b/src/plugins/vmxnet3/vmxnet3_test.c @@ -121,6 +121,8 @@ api_vmxnet3_create (vat_main_t * vam) ; else if (unformat (i, "tx-queue-size %u", &args.txq_size)) ; + else if (unformat (i, "num-tx-queues %u", &args.txq_num)) + ; else { clib_warning ("unknown input '%U'", format_unformat_error, i); @@ -134,6 +136,7 @@ api_vmxnet3_create (vat_main_t * vam) mp->enable_elog = clib_host_to_net_u16 (args.enable_elog); mp->rxq_size = clib_host_to_net_u16 (args.rxq_size); mp->txq_size = clib_host_to_net_u16 (args.txq_size); + mp->txq_num = clib_host_to_net_u16 (args.txq_num); S (mp); W (ret); @@ -240,33 +243,38 @@ vl_api_vmxnet3_details_t_handler (vl_api_vmxnet3_details_t * mp) { vat_main_t *vam = vmxnet3_test_main.vat_main; u32 pci_addr = ntohl (mp->pci_addr); + u16 qid; fformat (vam->ofp, "%s: sw_if_index %u mac %U\n" " version: %u\n" " PCI Address: %U\n" - " RX completion next index %u" - " RX Queue %u\n" - " ring 0 size %u fill %u consume %u produce %u\n" - " ring 1 size %u fill %u consume %u produce %u\n" - " TX completion next index %u" - " TX Queue %u\n" - " size %u consume %u produce %u\n" - " state %s\n", + " state %s\n" + " RX Queue 0\n" + " RX completion next index %u\n" + " ring 0 size %u fill %u consume %u produce %u\n" + " ring 1 size %u fill %u consume %u produce %u\n", mp->if_name, ntohl (mp->sw_if_index), format_ethernet_address, mp->hw_addr, mp->version, format_pci_addr, &pci_addr, + mp->admin_up_down ? "up" : "down", ntohs (mp->rx_next), - ntohs (mp->rx_qid), ntohs (mp->rx_qsize), ntohs (mp->rx_fill[0]), ntohs (mp->rx_consume[0]), ntohs (mp->rx_produce[0]), ntohs (mp->rx_qsize), ntohs (mp->rx_fill[1]), - ntohs (mp->rx_consume[1]), - ntohs (mp->rx_produce[1]), - ntohs (mp->tx_next), - ntohs (mp->tx_qid), - ntohs (mp->tx_qsize), ntohs (mp->tx_consume), - ntohs (mp->tx_produce), mp->admin_up_down ? "up" : "down"); + ntohs (mp->rx_consume[1]), ntohs (mp->rx_produce[1])); + for (qid = 0; qid < mp->tx_count; qid++) + { + vl_api_vmxnet3_tx_list_t *tx_list = &mp->tx_list[qid]; + fformat (vam->ofp, + " TX Queue %u\n" + " TX completion next index %u\n" + " size %u consume %u produce %u\n", + qid, + ntohs (tx_list->tx_next), + ntohs (tx_list->tx_qsize), ntohs (tx_list->tx_consume), + ntohs (tx_list->tx_produce)); + } } /* @@ -275,8 +283,8 @@ vl_api_vmxnet3_details_t_handler (vl_api_vmxnet3_details_t * mp) */ #define foreach_vpe_api_msg \ _(vmxnet3_create, " [rx-queue-size ] " \ - "[tx-queue-size ]") \ -_(vmxnet3_delete, "") \ + "[tx-queue-size ] [num-tx-queues ]") \ +_(vmxnet3_delete, "sw_if_index ") \ _(vmxnet3_dump, "") static void diff --git a/src/scripts/generate_version_h b/src/scripts/generate_version_h index 993a682954b2..f4705489c87f 100755 --- a/src/scripts/generate_version_h +++ b/src/scripts/generate_version_h @@ -22,7 +22,7 @@ cat > ${1} << __EOF__ #define VPP_BUILD_DATE "$(date)" #define VPP_BUILD_USER "$(whoami)" #define VPP_BUILD_HOST "$(hostname)" -#define VPP_BUILD_TOPDIR "$(git rev-parse --show-toplevel)" +#define VPP_BUILD_TOPDIR "$(git rev-parse --show-toplevel 2> /dev/null)" #define VPP_BUILD_VER "$(scripts/version)" #endif __EOF__ diff --git a/src/scripts/remove-rpath b/src/scripts/remove-rpath new file mode 100755 index 000000000000..3e20b06dbfa5 --- /dev/null +++ b/src/scripts/remove-rpath @@ -0,0 +1,37 @@ +#!/bin/bash + +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ -z $1 ]; then + echo "Please specify path" + exit 1 +fi + +which chrpath &> /dev/null + +if [ $? -ne 0 ] ; then + echo "Please install chrpath tool" + exit 1 +fi + +libs=$(find $1 -type f -name \*.so\*) +execs=$(find $1 -type f -path \*/bin/\* ) + +for i in $libs $execs; do + chrpath $i 2> /dev/null | grep -q build-root + if [ $? -eq 0 ] ; then + chrpath -d $i + fi +done + diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c index e7a1188d5915..b53a2e294f73 100644 --- a/src/svm/svm_fifo.c +++ b/src/svm/svm_fifo.c @@ -845,6 +845,29 @@ svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer) f->head = f->tail = pointer % f->nitems; } +void +svm_fifo_add_subscriber (svm_fifo_t * f, u8 subscriber) +{ + if (f->n_subscribers >= SVM_FIFO_MAX_EVT_SUBSCRIBERS) + return; + f->subscribers[f->n_subscribers++] = subscriber; +} + +void +svm_fifo_del_subscriber (svm_fifo_t * f, u8 subscriber) +{ + int i; + + for (i = 0; i < f->n_subscribers; i++) + { + if (f->subscribers[i] != subscriber) + continue; + f->subscribers[i] = f->subscribers[f->n_subscribers - 1]; + f->n_subscribers--; + break; + } +} + #endif /* * fd.io coding-style-patch-verification: ON diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h index 613e0cad5ebf..07614b4dac94 100644 --- a/src/svm/svm_fifo.h +++ b/src/svm/svm_fifo.h @@ -40,6 +40,7 @@ format_function_t format_ooo_list; #define OOO_SEGMENT_INVALID_INDEX ((u32)~0) #define SVM_FIFO_INVALID_SESSION_INDEX ((u32)~0) #define SVM_FIFO_INVALID_INDEX ((u32)~0) +#define SVM_FIFO_MAX_EVT_SUBSCRIBERS 8 enum { @@ -57,13 +58,13 @@ typedef struct typedef struct _svm_fifo { + CLIB_CACHE_LINE_ALIGN_MARK (shared_first); volatile u32 cursize; /**< current fifo size */ u32 nitems; - CLIB_CACHE_LINE_ALIGN_MARK (end_cursize); + CLIB_CACHE_LINE_ALIGN_MARK (shared_second); volatile u32 has_event; /**< non-zero if deq event exists */ - /* Backpointers */ u32 master_session_index; u32 client_session_index; u8 master_thread_index; @@ -71,13 +72,15 @@ typedef struct _svm_fifo u32 segment_manager; u32 segment_index; u32 ct_session_index; /**< Local session index for vpp */ - CLIB_CACHE_LINE_ALIGN_MARK (end_shared); + u32 freelist_index; /**< aka log2(allocated_size) - const. */ + i8 refcnt; /**< reference count */ + + CLIB_CACHE_LINE_ALIGN_MARK (consumer); u32 head; volatile u32 want_tx_ntf; /**< producer wants nudge */ volatile u32 has_tx_ntf; - CLIB_CACHE_LINE_ALIGN_MARK (end_consumer); - /* producer */ + CLIB_CACHE_LINE_ALIGN_MARK (producer); u32 tail; ooo_segment_t *ooo_segments; /**< Pool of ooo segments */ @@ -85,11 +88,13 @@ typedef struct _svm_fifo u32 ooos_newest; /**< Last segment to have been updated */ struct _svm_fifo *next; /**< next in freelist/active chain */ struct _svm_fifo *prev; /**< prev in active chain */ + volatile u8 n_subscribers; + u8 subscribers[SVM_FIFO_MAX_EVT_SUBSCRIBERS]; + #if SVM_FIFO_TRACE svm_fifo_trace_elem_t *trace; #endif - u32 freelist_index; /**< aka log2(allocated_size) - const. */ - i8 refcnt; /**< reference count */ + CLIB_CACHE_LINE_ALIGN_MARK (data); } svm_fifo_t; @@ -198,6 +203,8 @@ int svm_fifo_segments (svm_fifo_t * f, svm_fifo_segment_t * fs); void svm_fifo_segments_free (svm_fifo_t * f, svm_fifo_segment_t * fs); void svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer); void svm_fifo_overwrite_head (svm_fifo_t * f, u8 * data, u32 len); +void svm_fifo_add_subscriber (svm_fifo_t * f, u8 subscriber); +void svm_fifo_del_subscriber (svm_fifo_t * f, u8 subscriber); format_function_t format_svm_fifo; /** @@ -297,6 +304,12 @@ svm_fifo_needs_tx_ntf (svm_fifo_t * f, u32 n_last_deq) return 0; } +always_inline u8 +svm_fifo_n_subscribers (svm_fifo_t * f) +{ + return f->n_subscribers; +} + u32 svm_fifo_number_ooo_segments (svm_fifo_t * f); ooo_segment_t *svm_fifo_first_ooo_segment (svm_fifo_t * f); diff --git a/src/tests/vnet/session/tcp_echo.c b/src/tests/vnet/session/tcp_echo.c index 1f49ab69e774..5249b8d60768 100644 --- a/src/tests/vnet/session/tcp_echo.c +++ b/src/tests/vnet/session/tcp_echo.c @@ -45,18 +45,17 @@ typedef struct { - svm_fifo_t *server_rx_fifo; - svm_fifo_t *server_tx_fifo; - - svm_msg_q_t *vpp_evt_q; - + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); +#define _(type, name) type name; + foreach_app_session_field +#undef _ u64 vpp_session_handle; u64 bytes_sent; u64 bytes_to_send; volatile u64 bytes_received; volatile u64 bytes_to_receive; f64 start; -} session_t; +} echo_session_t; typedef enum { @@ -81,7 +80,7 @@ typedef struct u8 *uri; /* Session pool */ - session_t *sessions; + echo_session_t *sessions; /* Hash table for disconnect processing */ uword *session_index_by_vpp_handles; @@ -153,62 +152,6 @@ echo_main_t echo_main; #define NITER 4000000 #endif -const char test_srv_crt_rsa[] = - "-----BEGIN CERTIFICATE-----\r\n" - "MIID5zCCAs+gAwIBAgIJALeMYCEHrTtJMA0GCSqGSIb3DQEBCwUAMIGJMQswCQYD\r\n" - "VQQGEwJVUzELMAkGA1UECAwCQ0ExETAPBgNVBAcMCFNhbiBKb3NlMQ4wDAYDVQQK\r\n" - "DAVDaXNjbzEOMAwGA1UECwwFZmQuaW8xFjAUBgNVBAMMDXRlc3R0bHMuZmQuaW8x\r\n" - "IjAgBgkqhkiG9w0BCQEWE3ZwcC1kZXZAbGlzdHMuZmQuaW8wHhcNMTgwMzA1MjEx\r\n" - "NTEyWhcNMjgwMzAyMjExNTEyWjCBiTELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNB\r\n" - "MREwDwYDVQQHDAhTYW4gSm9zZTEOMAwGA1UECgwFQ2lzY28xDjAMBgNVBAsMBWZk\r\n" - "LmlvMRYwFAYDVQQDDA10ZXN0dGxzLmZkLmlvMSIwIAYJKoZIhvcNAQkBFhN2cHAt\r\n" - "ZGV2QGxpc3RzLmZkLmlvMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA\r\n" - "4C1k8a1DuStgggqT4o09fP9sJ2dC54bxhS/Xk2VEfaIZ222WSo4X/syRVfVy9Yah\r\n" - "cpI1zJ/RDxaZSFhgA+nPZBrFMsrULkrdAOpOVj8eDEp9JuWdO2ODSoFnCvLxcYWB\r\n" - "Yc5kHryJpEaGJl1sFQSesnzMFty/59ta0stk0Fp8r5NhIjWvSovGzPo6Bhz+VS2c\r\n" - "ebIZh4x1t2hHaFcgm0qJoJ6DceReWCW8w+yOVovTolGGq+bpb2Hn7MnRSZ2K2NdL\r\n" - "+aLXpkZbS/AODP1FF2vTO1mYL290LO7/51vJmPXNKSDYMy5EvILr5/VqtjsFCwRL\r\n" - "Q4jcM/+GeHSAFWx4qIv0BwIDAQABo1AwTjAdBgNVHQ4EFgQUWa1SOB37xmT53tZQ\r\n" - "aXuLLhRI7U8wHwYDVR0jBBgwFoAUWa1SOB37xmT53tZQaXuLLhRI7U8wDAYDVR0T\r\n" - "BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAoUht13W4ya27NVzQuCMvqPWL3VM4\r\n" - "3xbPFk02FaGz/WupPu276zGlzJAZrbuDcQowwwU1Ni1Yygxl96s1c2M5rHDTrOKG\r\n" - "rK0hbkSFBo+i6I8u4HiiQ4rYmG0Hv6+sXn3of0HsbtDPGgWZoipPWDljPYEURu3e\r\n" - "3HRe/Dtsj9CakBoSDzs8ndWaBR+f4sM9Tk1cjD46Gq2T/qpSPXqKxEUXlzhdCAn4\r\n" - "twub17Bq2kykHpppCwPg5M+v30tHG/R2Go15MeFWbEJthFk3TZMjKL7UFs7fH+x2\r\n" - "wSonXb++jY+KmCb93C+soABBizE57g/KmiR2IxQ/LMjDik01RSUIaM0lLA==\r\n" - "-----END CERTIFICATE-----\r\n"; -const u32 test_srv_crt_rsa_len = sizeof (test_srv_crt_rsa); - -const char test_srv_key_rsa[] = - "-----BEGIN PRIVATE KEY-----\r\n" - "MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDgLWTxrUO5K2CC\r\n" - "CpPijT18/2wnZ0LnhvGFL9eTZUR9ohnbbZZKjhf+zJFV9XL1hqFykjXMn9EPFplI\r\n" - "WGAD6c9kGsUyytQuSt0A6k5WPx4MSn0m5Z07Y4NKgWcK8vFxhYFhzmQevImkRoYm\r\n" - "XWwVBJ6yfMwW3L/n21rSy2TQWnyvk2EiNa9Ki8bM+joGHP5VLZx5shmHjHW3aEdo\r\n" - "VyCbSomgnoNx5F5YJbzD7I5Wi9OiUYar5ulvYefsydFJnYrY10v5otemRltL8A4M\r\n" - "/UUXa9M7WZgvb3Qs7v/nW8mY9c0pINgzLkS8guvn9Wq2OwULBEtDiNwz/4Z4dIAV\r\n" - "bHioi/QHAgMBAAECggEBAMzGipP8+oT166U+NlJXRFifFVN1DvdhG9PWnOxGL+c3\r\n" - "ILmBBC08WQzmHshPemBvR6DZkA1H23cV5JTiLWrFtC00CvhXsLRMrE5+uWotI6yE\r\n" - "iofybMroHvD6/X5R510UX9hQ6MHu5ShLR5VZ9zXHz5MpTmB/60jG5dLx+jgcwBK8\r\n" - "LuGv2YB/WCUwT9QJ3YU2eaingnXtz/MrFbkbltrqlnBdlD+kTtw6Yac9y1XuuQXc\r\n" - "BPeulLNDuPolJVWbUvDBZrpt2dXTgz8ws1sv+wCNE0xwQJsqW4Nx3QkpibUL9RUr\r\n" - "CVbKlNfa9lopT6nGKlgX69R/uH35yh9AOsfasro6w0ECgYEA82UJ8u/+ORah+0sF\r\n" - "Q0FfW5MTdi7OAUHOz16pUsGlaEv0ERrjZxmAkHA/VRwpvDBpx4alCv0Hc39PFLIk\r\n" - "nhSsM2BEuBkTAs6/GaoNAiBtQVE/hN7awNRWVmlieS0go3Y3dzaE9IUMyj8sPOFT\r\n" - "5JdJ6BM69PHKCkY3dKdnnfpFEuECgYEA68mRpteunF1mdZgXs+WrN+uLlRrQR20F\r\n" - "ZyMYiUCH2Dtn26EzA2moy7FipIIrQcX/j+KhYNGM3e7MU4LymIO29E18mn8JODnH\r\n" - "sQOXzBTsf8A4yIVMkcuQD3bfb0JiUGYUPOidTp2N7IJA7+6Yc3vQOyb74lnKnJoO\r\n" - "gougPT2wS+cCgYAn7muzb6xFsXDhyW0Tm6YJYBfRS9yAWEuVufINobeBZPSl2cN1\r\n" - "Jrnw+HlrfTNbrJWuJmjtZJXUXQ6cVp2rUbjutNyRV4vG6iRwEXYQ40EJdkr1gZpi\r\n" - "CHQhuShuuPih2MNAy7EEbM+sXrDjTBR3bFqzuHPzu7dp+BshCFX3lRfAAQKBgGQt\r\n" - "K5i7IhCFDjb/+3IPLgOAK7mZvsvZ4eXD33TQ2eZgtut1PXtBtNl17/b85uv293Fm\r\n" - "VDISVcsk3eLNS8zIiT6afUoWlxAwXEs0v5WRfjl4radkGvgGiJpJYvyeM67877RB\r\n" - "EDSKc/X8ESLfOB44iGvZUEMG6zJFscx9DgN25iQZAoGAbyd+JEWwdVH9/K3IH1t2\r\n" - "PBkZX17kNWv+iVM1WyFjbe++vfKZCrOJiyiqhDeEqgrP3AuNMlaaduC3VRC3G5oV\r\n" - "Mj1tlhDWQ/qhvKdCKNdIVQYDE75nw+FRWV8yYkHAnXYW3tNoweDIwixE0hkPR1bc\r\n" - "oEjPLVNtx8SOj/M4rhaPT3I=\r\n" "-----END PRIVATE KEY-----\r\n"; -const u32 test_srv_key_rsa_len = sizeof (test_srv_key_rsa); - static u8 * format_api_error (u8 * s, va_list * args) { @@ -483,7 +426,7 @@ connect_to_vpp (char *name) return -1; } - if (vl_socket_client_init_shm (0)) + if (vl_socket_client_init_shm (0, 1 /* want_pthread */ )) { clib_warning ("init shm api failed"); return -1; @@ -534,7 +477,7 @@ vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp) } static void -session_print_stats (echo_main_t * em, session_t * session) +session_print_stats (echo_main_t * em, echo_session_t * session) { f64 deltat; u64 bytes; @@ -546,7 +489,7 @@ session_print_stats (echo_main_t * em, session_t * session) } static void -test_recv_bytes (session_t * s, u8 * rx_buf, u32 n_read) +test_recv_bytes (echo_session_t * s, u8 * rx_buf, u32 n_read) { int i; for (i = 0; i < n_read; i++) @@ -561,72 +504,63 @@ test_recv_bytes (session_t * s, u8 * rx_buf, u32 n_read) } static void -recv_test_chunk (echo_main_t * em, session_t * s, u8 * rx_buf) +recv_data_chunk (echo_main_t * em, echo_session_t * s, u8 * rx_buf) { - svm_fifo_t *rx_fifo = s->server_rx_fifo; - u32 n_read_now, n_to_read; - int n_read; + int n_to_read, n_read; - n_to_read = svm_fifo_max_dequeue (rx_fifo); - svm_fifo_unset_event (rx_fifo); + n_to_read = svm_fifo_max_dequeue (s->rx_fifo); + if (!n_to_read) + return; do { - n_read_now = clib_min (vec_len (rx_buf), n_to_read); - n_read = svm_fifo_dequeue_nowait (rx_fifo, n_read_now, rx_buf); - if (n_read <= 0) - break; + n_read = app_recv_stream ((app_session_t *) s, rx_buf, + vec_len (rx_buf)); - if (n_read_now != n_read) - clib_warning ("huh?"); + if (n_read > 0) + { + if (em->test_return_packets) + test_recv_bytes (s, rx_buf, n_read); - if (em->test_return_packets) - test_recv_bytes (s, rx_buf, n_read); + n_to_read -= n_read; - n_to_read -= n_read; - s->bytes_received += n_read; - s->bytes_to_receive -= n_read; + s->bytes_received += n_read; + s->bytes_to_receive -= n_read; + } + else + break; } while (n_to_read > 0); } void -client_handle_fifo_event_rx (echo_main_t * em, session_event_t * e, - u8 * rx_buf) +client_handle_rx (echo_main_t * em, session_event_t * e, u8 * rx_buf) { - session_t *s; + echo_session_t *s; s = pool_elt_at_index (em->sessions, e->fifo->client_session_index); - recv_test_chunk (em, s, rx_buf); + recv_data_chunk (em, s, rx_buf); } static void -send_test_chunk (echo_main_t * em, session_t * s) +send_data_chunk (echo_main_t * em, echo_session_t * s) { u64 test_buf_len, bytes_this_chunk, test_buf_offset; - svm_fifo_t *tx_fifo = s->server_tx_fifo; u8 *test_data = em->connect_test_data; - u32 enq_space = 16 << 10; - int written; + int n_sent; test_buf_len = vec_len (test_data); test_buf_offset = s->bytes_sent % test_buf_len; bytes_this_chunk = clib_min (test_buf_len - test_buf_offset, s->bytes_to_send); - enq_space = svm_fifo_max_enqueue (tx_fifo); - bytes_this_chunk = clib_min (bytes_this_chunk, enq_space); - written = svm_fifo_enqueue_nowait (tx_fifo, bytes_this_chunk, - test_data + test_buf_offset); + n_sent = app_send_stream ((app_session_t *) s, test_data + test_buf_offset, + bytes_this_chunk, 0); - if (written > 0) + if (n_sent > 0) { - s->bytes_to_send -= written; - s->bytes_sent += written; - - if (svm_fifo_set_event (tx_fifo)) - app_send_io_evt_to_vpp (s->vpp_evt_q, tx_fifo, FIFO_EVENT_APP_TX, - 0 /* do wait for mutex */ ); + s->bytes_to_send -= n_sent; + s->bytes_sent += n_sent; } } @@ -639,7 +573,7 @@ client_thread_fn (void *arg) echo_main_t *em = &echo_main; static u8 *rx_buf = 0; u32 session_index = *(u32 *) arg; - session_t *s; + echo_session_t *s; vec_validate (rx_buf, 1 << 20); @@ -649,8 +583,8 @@ client_thread_fn (void *arg) s = pool_elt_at_index (em->sessions, session_index); while (!em->time_to_stop) { - send_test_chunk (em, s); - recv_test_chunk (em, s, rx_buf); + send_data_chunk (em, s); + recv_data_chunk (em, s, rx_buf); if (!s->bytes_to_send && !s->bytes_to_receive) break; } @@ -688,7 +622,7 @@ client_rx_thread_fn (void *arg) switch (e->event_type) { case FIFO_EVENT_APP_RX: - client_handle_fifo_event_rx (em, e, rx_buf); + client_handle_rx (em, e, rx_buf); break; default: clib_warning ("unknown event type %d", e->event_type); @@ -714,7 +648,7 @@ client_send_connect (echo_main_t * em) } void -client_send_disconnect (echo_main_t * em, session_t * s) +client_send_disconnect (echo_main_t * em, echo_session_t * s) { vl_api_disconnect_session_t *dmp; dmp = vl_msg_api_alloc (sizeof (*dmp)); @@ -726,7 +660,7 @@ client_send_disconnect (echo_main_t * em, session_t * s) } int -client_disconnect (echo_main_t * em, session_t * s) +client_disconnect (echo_main_t * em, echo_session_t * s) { client_send_disconnect (em, s); pool_put (em->sessions, s); @@ -734,6 +668,24 @@ client_disconnect (echo_main_t * em, session_t * s) return 0; } +static void +session_bound_handler (session_bound_msg_t * mp) +{ + echo_main_t *em = &echo_main; + + if (mp->retval) + { + clib_warning ("bind failed: %U", format_api_error, + clib_net_to_host_u32 (mp->retval)); + em->state = STATE_FAILED; + return; + } + + clib_warning ("listening on %U:%u", format_ip46_address, mp->lcl_ip, + mp->lcl_is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6, mp->lcl_port); + em->state = STATE_READY; +} + static void session_accepted_handler (session_accepted_msg_t * mp) { @@ -741,7 +693,7 @@ session_accepted_handler (session_accepted_msg_t * mp) session_accepted_reply_msg_t *rmp; svm_fifo_t *rx_fifo, *tx_fifo; echo_main_t *em = &echo_main; - session_t *session; + echo_session_t *session; static f64 start_time; u32 session_index; u8 *ip_str; @@ -762,8 +714,8 @@ session_accepted_handler (session_accepted_msg_t * mp) tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *); tx_fifo->client_session_index = session_index; - session->server_rx_fifo = rx_fifo; - session->server_tx_fifo = tx_fifo; + session->rx_fifo = rx_fifo; + session->tx_fifo = tx_fifo; session->vpp_evt_q = uword_to_pointer (mp->vpp_event_queue_address, svm_msg_q_t *); @@ -799,7 +751,7 @@ static void session_connected_handler (session_connected_msg_t * mp) { echo_main_t *em = &echo_main; - session_t *session; + echo_session_t *session; u32 session_index; svm_fifo_t *rx_fifo, *tx_fifo; int rv; @@ -825,8 +777,8 @@ session_connected_handler (session_connected_msg_t * mp) tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *); tx_fifo->client_session_index = session_index; - session->server_rx_fifo = rx_fifo; - session->server_tx_fifo = tx_fifo; + session->rx_fifo = rx_fifo; + session->tx_fifo = tx_fifo; session->vpp_session_handle = mp->handle; session->start = clib_time_now (&em->clib_time); session->vpp_evt_q = uword_to_pointer (mp->vpp_event_queue_address, @@ -859,7 +811,7 @@ session_disconnected_handler (session_disconnected_msg_t * mp) app_session_evt_t _app_evt, *app_evt = &_app_evt; session_disconnected_reply_msg_t *rmp; echo_main_t *em = &echo_main; - session_t *session = 0; + echo_session_t *session = 0; uword *p; int rv = 0; @@ -891,7 +843,7 @@ session_reset_handler (session_reset_msg_t * mp) app_session_evt_t _app_evt, *app_evt = &_app_evt; echo_main_t *em = &echo_main; session_reset_reply_msg_t *rmp; - session_t *session = 0; + echo_session_t *session = 0; uword *p; int rv = 0; @@ -923,6 +875,9 @@ handle_mq_event (session_event_t * e) { switch (e->event_type) { + case SESSION_CTRL_EVT_BOUND: + session_bound_handler ((session_bound_msg_t *) e->data); + break; case SESSION_CTRL_EVT_ACCEPTED: session_accepted_handler ((session_accepted_msg_t *) e->data); break; @@ -946,7 +901,7 @@ clients_run (echo_main_t * em) f64 start_time, deltat, timeout = 100.0; svm_msg_q_msg_t msg; session_event_t *e; - session_t *s; + echo_session_t *s; int i; /* Init test data */ @@ -1141,37 +1096,38 @@ format_ip46_address (u8 * s, va_list * args) } static void -server_handle_fifo_event_rx (echo_main_t * em, session_event_t * e) +server_handle_rx (echo_main_t * em, session_event_t * e) { - svm_fifo_t *rx_fifo, *tx_fifo; - int n_read; - session_t *session; - int rv; - u32 max_dequeue, offset, max_transfer, rx_buf_len; + int n_read, max_dequeue, n_sent; + u32 offset, to_dequeue; + echo_session_t *s; - rx_buf_len = vec_len (em->rx_buf); - rx_fifo = e->fifo; - session = pool_elt_at_index (em->sessions, rx_fifo->client_session_index); - tx_fifo = session->server_tx_fifo; + s = pool_elt_at_index (em->sessions, e->fifo->client_session_index); - max_dequeue = svm_fifo_max_dequeue (rx_fifo); - /* Allow enqueuing of a new event */ - svm_fifo_unset_event (rx_fifo); + /* Clear event only once. Otherwise, if we do it in the loop by calling + * app_recv_stream, we may end up with a lot of unhandled rx events on the + * message queue */ + svm_fifo_unset_event (s->rx_fifo); + max_dequeue = svm_fifo_max_dequeue (s->rx_fifo); if (PREDICT_FALSE (!max_dequeue)) return; - /* Read the max_dequeue */ do { - max_transfer = clib_min (rx_buf_len, max_dequeue); - n_read = svm_fifo_dequeue_nowait (rx_fifo, max_transfer, em->rx_buf); + /* The options here are to limit ourselves to max_dequeue or read + * even the data that was enqueued while we were dequeueing and which + * now has an rx event in the mq. Either of the two work. */ + to_dequeue = clib_min (max_dequeue, vec_len (em->rx_buf)); + n_read = app_recv_stream_raw (s->rx_fifo, em->rx_buf, to_dequeue, + 0 /* clear evt */ , 0 /* peek */ ); if (n_read > 0) { max_dequeue -= n_read; - session->bytes_received += n_read; - session->bytes_to_receive -= n_read; + s->bytes_received += n_read; } + else + break; /* Reflect if a non-drop session */ if (!em->no_return && n_read > 0) @@ -1179,27 +1135,23 @@ server_handle_fifo_event_rx (echo_main_t * em, session_event_t * e) offset = 0; do { - rv = svm_fifo_enqueue_nowait (tx_fifo, n_read, - &em->rx_buf[offset]); - if (rv > 0) + n_sent = app_send_stream ((app_session_t *) s, + &em->rx_buf[offset], + n_read, SVM_Q_WAIT); + if (n_sent > 0) { - n_read -= rv; - offset += rv; + n_read -= n_sent; + offset += n_sent; } } - while ((rv <= 0 || n_read > 0) && !em->time_to_stop); - - /* If event wasn't set, add one */ - if (svm_fifo_set_event (tx_fifo)) - app_send_io_evt_to_vpp (session->vpp_evt_q, tx_fifo, - FIFO_EVENT_APP_TX, SVM_Q_WAIT); + while ((n_sent <= 0 || n_read > 0) && !em->time_to_stop); } } - while ((n_read < 0 || max_dequeue > 0) && !em->time_to_stop); + while (max_dequeue > 0 && !em->time_to_stop); } static void -server_handle_event_queue (echo_main_t * em) +server_handle_mq (echo_main_t * em) { svm_msg_q_msg_t msg; session_event_t *e; @@ -1211,7 +1163,7 @@ server_handle_event_queue (echo_main_t * em) switch (e->event_type) { case FIFO_EVENT_APP_RX: - server_handle_fifo_event_rx (em, e); + server_handle_rx (em, e); break; default: handle_mq_event (e); @@ -1283,7 +1235,7 @@ server_unbind (echo_main_t * em) void server_run (echo_main_t * em) { - session_t *session; + echo_session_t *session; int i; /* $$$$ hack preallocation */ @@ -1303,7 +1255,7 @@ server_run (echo_main_t * em) return; /* Enter handle event loop */ - server_handle_event_queue (em); + server_handle_mq (em); /* Cleanup */ server_send_unbind (em); @@ -1472,7 +1424,7 @@ main (int argc, char **argv) em->test_return_packets = test_return_packets; em->bytes_to_send = bytes_to_send; em->time_to_stop = 0; - vec_validate (em->rx_buf, 128 << 10); + vec_validate (em->rx_buf, 4 << 20); vec_validate (em->client_thread_handles, em->n_clients - 1); vec_validate (em->thread_args, em->n_clients - 1); diff --git a/src/tools/g2/g2version.c b/src/tools/g2/g2version.c index ef3d036c6d54..05961ecf81af 100644 --- a/src/tools/g2/g2version.c +++ b/src/tools/g2/g2version.c @@ -1,6 +1,6 @@ -/* +/* *------------------------------------------------------------------ - * Copyright (c) 2005-2016 Cisco and/or its affiliates. + * Copyright (c) 2005-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -14,6 +14,5 @@ * limitations under the License. */ -const char *version_string = "G2 (x86_64 GNU/Linux) major version 3.0"; -const char *minor_v_string = - "Changed Thu Dec 14 17:18:36 EST 2017"; +const char *version_string = "G2 (x86_64 GNU/Linux) major version 3.1"; +const char *minor_v_string = "Last Changed Sun Feb 3 10:38:26 EST 2019"; diff --git a/src/tools/g2/view1.c b/src/tools/g2/view1.c index 9c86fcdcec4b..ca05b78267f5 100644 --- a/src/tools/g2/view1.c +++ b/src/tools/g2/view1.c @@ -1,6 +1,6 @@ -/* +/* *------------------------------------------------------------------ - * Copyright (c) 2005-2016 Cisco and/or its affiliates. + * Copyright (c) 2005-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -24,10 +24,11 @@ #include #include #include +#include /* * The main event display view. - * + * * Important variables: * * "da" -- the drawing area, aka the screen representation of the @@ -39,7 +40,7 @@ * the backing store onto the screen. * * "s_v1" -- pointer to the current v1_geometry_t object. - * + * * Box heirarchy: * s_view1_vbox * s_view1_hbox @@ -74,7 +75,7 @@ static boolean color_mode = FALSE; /* start out in monochrome mode */ * Locals */ -/* +/* * user_data values passed to view1_button_click_callback, * which is used by the various action buttons noted above */ @@ -84,6 +85,9 @@ enum view1_button_click { START_BUTTON, ZOOMIN_BUTTON, SEARCH_BUTTON, + ANOMALY_BUTTON, + ANOMALY_NEXT_BUTTON, + ANOMALY_THRESHOLD_BUTTON, SEARCH_AGAIN_BUTTON, ZOOMOUT_BUTTON, END_BUTTON, @@ -144,18 +148,25 @@ typedef struct v1_geometry { int total_height; /* total height of da, see configure_event */ int total_width; /* ditto, for width */ double last_time_interval; /* last time interval, in f64 seconds */ - + double anomaly_threshold_stddevs; /* Anomaly detection threshold */ + /* Derived values */ int first_pid_index; /* Index of first displayed PID */ int npids; /* Max number of displayed pids */ ulonglong minvistime; /* in usec */ ulonglong maxvistime; /* in usec */ + + /* Anomaly detection statistics */ + f64 *means, *variances, *two_stddevs; + f64 *mins, *maxes; + u32 *matches; + } v1_geometry_t; /* The active geometry object */ -static v1_geometry_t s_v1record; -static v1_geometry_t *s_v1 = &s_v1record; +static v1_geometry_t s_v1record; +static v1_geometry_t *s_v1 = &s_v1record; /* The color array */ static GdkColor *s_color; @@ -182,8 +193,8 @@ static event_t *s_last_selected_event; /* * various widgets, see the box heirarchy chart above - * The toolkit keeps track of these things, we could lose many of - * these pointers. + * The toolkit keeps track of these things, we could lose many of + * these pointers. */ static GtkWidget *s_view1_vmenubox; static GtkWidget *s_view1_topbutton; @@ -197,6 +208,8 @@ static GtkWidget *s_view1_startbutton; static GtkWidget *s_view1_zoominbutton; static GtkWidget *s_view1_searchbutton; static GtkWidget *s_view1_srchagainbutton; +static GtkWidget *s_view1_anomalybutton; +static GtkWidget *s_view1_anomalynextbutton; static GtkWidget *s_view1_zoomoutbutton; static GtkWidget *s_view1_endbutton; @@ -218,6 +231,8 @@ static GtkWidget *s_view1_nosummary_button; static GtkWidget *s_view1_time_slew_right_button; static GtkWidget *s_view1_time_slew_left_button; +static GtkWidget *s_view1_anomalythresholdbutton; + static GtkWidget *s_view1_hscroll; static GtkObject *s_view1_hsadj; @@ -227,9 +242,10 @@ static GtkObject *s_view1_vsadj; static GtkWidget *s_view1_label; /* - * Search context + * Search context */ static ulong s_srchcode; /* search event code */ +static ulong s_anomalycode; /* anomaly event code */ static int s_srchindex; /* last hit was at this event index */ static boolean s_result_up; /* The SEARCH RESULT dongle is displayed */ static boolean s_srchfail_up; /* The status line "Search Failed" is up */ @@ -237,10 +253,10 @@ static int srch_chase_dir; /* search/chase dir, 0=>forward */ /* - * Print context + * Print context */ static int s_print_offset; /* Magic offset added to line, tbox fn codes */ -static FILE *s_printfp; +static FILE *s_printfp; /* * Forward reference prototypes @@ -294,7 +310,7 @@ static unsigned char zi_bkgd[] = { static GdkCursor *zi_cursor; static GdkPixmap *zi_source, *zi_mask; -/* +/* * Frequently-used small computations, best * done correctly once and instantiated. */ @@ -334,7 +350,7 @@ void set_window_title (const char *filename) /**************************************************************************** * recompute_hscrollbar * Adjust the horizontal scrollbar's adjustment object. -* +* * GtkAdjustments are really cool, but have to be set up exactly * right or the various client objects screw up completely. * @@ -357,7 +373,7 @@ static void recompute_hscrollbar (void) adj = GTK_ADJUSTMENT(s_view1_hsadj); - /* + /* * Structure member decoder ring * ----------------------------- * lower the minimum possible value @@ -368,7 +384,7 @@ static void recompute_hscrollbar (void) * page_size size of currently visible area */ - adj->lower = (gfloat)0.00; + adj->lower = (gfloat)0.00; adj->value = (gfloat)s_v1->minvistime; /* Minor click: move about 1/6 of a page */ @@ -382,8 +398,8 @@ static void recompute_hscrollbar (void) adj->page_size = (gfloat)(current_width); /* - * Tell all clients (e.g. the visible scrollbar) to - * make themselves look right + * Tell all clients (e.g. the visible scrollbar) to + * make themselves look right */ gtk_adjustment_changed(adj); gtk_adjustment_value_changed(adj); @@ -428,7 +444,7 @@ void format_popbox_string (char *tmpbuf, int len, event_t *ep, event_def_t *edp) u8 *s; eep = get_clib_event (ep->datum); - + s = format (0, "%U", format_elog_event, &elog_main, eep); memcpy (tmpbuf, s, vec_len(s)); tmpbuf[vec_len(s)] = 0; @@ -444,10 +460,10 @@ void format_popbox_string (char *tmpbuf, int len, event_t *ep, event_def_t *edp) snprintf(tmpbuf+strlen(tmpbuf), len - strlen(tmpbuf), ": "); /* %s only supported for cpel files */ if (fp[1] == 's') { - snprintf(tmpbuf+strlen(tmpbuf), len - strlen(tmpbuf), + snprintf(tmpbuf+strlen(tmpbuf), len - strlen(tmpbuf), edp->format, strtab_ref(ep->datum)); } else { - snprintf(tmpbuf+strlen(tmpbuf), len - strlen(tmpbuf), + snprintf(tmpbuf+strlen(tmpbuf), len - strlen(tmpbuf), edp->format, ep->datum); } return; @@ -497,10 +513,10 @@ static void next_snapshot(void) pid_data_t *pp; if (!s_snapshots) { - infobox("No snapshots", "\nNo snapshots in the ring...\n"); + infobox("No snapshots", "\nNo snapshots in the ring...\n"); return; } - + next = s_cursnap->next; if (next == 0) next = s_snapshots; @@ -550,7 +566,7 @@ static void del_snapshot(void) snapshot_t *this; if (!s_snapshots) { - infobox("No snapshots", "\nNo snapshots to delete...\n"); + infobox("No snapshots", "\nNo snapshots to delete...\n"); return; } @@ -563,10 +579,10 @@ static void del_snapshot(void) } if (this != s_cursnap) { - infobox("BUG", "\nSnapshot AWOL!\n"); + infobox("BUG", "\nSnapshot AWOL!\n"); return; } - + s_cursnap = this->next; /* middle of the list? */ @@ -579,7 +595,7 @@ static void del_snapshot(void) g_free(this->pidvec); g_free(this); } - + /* Note: both will be NULL after last delete */ if (s_cursnap == NULL) s_cursnap = s_snapshots; @@ -597,7 +613,7 @@ static void write_snapshot(void) snapshot_t *snap; char *error = NULL; int records = 0; - + if (s_snapshots == NULL) { error = "No snapshots defined"; errno = 0; @@ -615,17 +631,17 @@ static void write_snapshot(void) * world. Don't come running to me if you try to read it and crash. */ for (snap = s_snapshots; !error && snap != NULL; snap = snap->next) { - if (fwrite(&snap->geometry, + if (fwrite(&snap->geometry, sizeof(snap->geometry), 1, file) != 1 || - fwrite(&snap->show_event, + fwrite(&snap->show_event, sizeof(snap->show_event), 1, file) != 1 || - fwrite(snap->pidvec, + fwrite(snap->pidvec, sizeof(pid_sort_t) * g_npids, 1, file) != 1 || - fwrite(&snap->vscroll_value, + fwrite(&snap->vscroll_value, sizeof(snap->vscroll_value), 1, file) != 1 || - fwrite(&snap->summary_mode, + fwrite(&snap->summary_mode, sizeof(snap->summary_mode), 1, file) != 1 || - fwrite(&snap->color_mode, + fwrite(&snap->color_mode, sizeof(snap->color_mode), 1, file) != 1) { error = "Error writing data"; } @@ -642,7 +658,7 @@ static void write_snapshot(void) infobox(error, strerror(errno)); } else { char buf[64]; - snprintf(buf, sizeof(buf), "Wrote %d snapshots to snapshots.g2", + snprintf(buf, sizeof(buf), "Wrote %d snapshots to snapshots.g2", records); message_line(buf); } @@ -701,11 +717,11 @@ static void read_snapshot(void) error = "Problem reading third item from file"; break; } - if (fread(&snap->vscroll_value, + if (fread(&snap->vscroll_value, sizeof(snap->vscroll_value), 1, file) != 1 || - fread(&snap->summary_mode, + fread(&snap->summary_mode, sizeof(snap->summary_mode), 1, file) != 1 || - fread(&snap->color_mode, + fread(&snap->color_mode, sizeof(snap->color_mode), 1, file) != 1) { error = "Problem reading final items from file"; break; @@ -739,7 +755,7 @@ static void read_snapshot(void) error = "Unable to close file"; } } - + if (error) { /* * Problem - clear up any detritus @@ -759,7 +775,7 @@ static void read_snapshot(void) g_free(snap->pidvec); g_free(snap); } - + s_cursnap = s_snapshots = new_snaps; } @@ -767,7 +783,7 @@ static void read_snapshot(void) infobox(error, strerror(errno)); } else { char buf[64]; - snprintf(buf, sizeof(buf), + snprintf(buf, sizeof(buf), "Read %d snapshots from snapshots.g2", records); message_line(buf); } @@ -785,13 +801,13 @@ static void set_color(int pid_index) pid_sort_t *psp; psp = (g_pids + pid_index); - + if (psp->selected) gdk_gc_set_foreground(da->style->black_gc, &s_color[0]); else if (pid_index == COLOR_DEFAULT || !color_mode) { gdk_gc_set_foreground(da->style->black_gc, &fg_black); } else { - gdk_gc_set_foreground(da->style->black_gc, + gdk_gc_set_foreground(da->style->black_gc, &s_color[g_pids[pid_index].color_index]); } } @@ -822,8 +838,8 @@ static int toggle_event_select(GdkEventButton *event, v1_geometry_t *vp) /* Too far right? */ if (start_index >= g_nevents) return 0; - - /* + + /* * To see if the mouse hit a visible event, use a variant * of the event display loop. */ @@ -832,13 +848,13 @@ static int toggle_event_select(GdkEventButton *event, v1_geometry_t *vp) hit_rect.y = (int)event->y; hit_rect.width = 1; hit_rect.height = 1; - + ep = (g_events + start_index); - - while ((ep->time < vp->maxvistime) && + + while ((ep->time < vp->maxvistime) && (ep < (g_events + g_nevents))) { pid_index = ep->pid->pid_index; - + /* First filter: pid out of range */ if ((pid_index < vp->first_pid_index) || (pid_index >= vp->first_pid_index + vp->npids)) { @@ -852,22 +868,22 @@ static int toggle_event_select(GdkEventButton *event, v1_geometry_t *vp) ep++; continue; } - - /* + + /* * At this point, we know that the point is at least on the - * screen. See if the mouse hit within the bounding box + * screen. See if the mouse hit within the bounding box */ - /* + /* * $$$$ maybe keep looping until off the edge, * maintain a "best hit", then declare that one the winner? */ pid_index -= vp->first_pid_index; - + y = pid_index*vp->strip_height + vp->event_offset; - - x = vp->pid_ax_width + + + x = vp->pid_ax_width + (int)(((double)(ep->time - vp->minvistime)) / time_per_pixel); /* Perhaps we're trying to toggle the detail box? */ @@ -880,7 +896,7 @@ static int toggle_event_select(GdkEventButton *event, v1_geometry_t *vp) view1_display_when_idle(); return 0; } - } + } sprintf(tmpbuf, "%ld", ep->code); @@ -915,14 +931,14 @@ static int toggle_event_select(GdkEventButton *event, v1_geometry_t *vp) * toggle_track_select ****************************************************************************/ -static void toggle_track_select (GdkEventButton *event, +static void toggle_track_select (GdkEventButton *event, v1_geometry_t *vp) { int i; int pid_index; int y, delta_y; pid_sort_t *psp; - + if (g_nevents == 0) return; @@ -939,7 +955,7 @@ static void toggle_track_select (GdkEventButton *event, } infobox("NOTE", "\nNo PID/Track In Range\nPlease Try Again"); return; - + found: pid_index = i + vp->first_pid_index; psp = (g_pids + pid_index); @@ -966,7 +982,7 @@ static void deselect_tracks (void) typedef enum { MOVE_TOP, MOVE_BOTTOM } move_type; -static void move_current_track(GdkEventButton *event, +static void move_current_track(GdkEventButton *event, v1_geometry_t *vp, move_type type) { @@ -994,7 +1010,7 @@ static void move_current_track(GdkEventButton *event, } infobox("NOTE", "\nNo PID/Track In Range\nPlease Try Again"); return; - + found: pid_index = i + vp->first_pid_index; @@ -1026,7 +1042,7 @@ static void move_current_track(GdkEventButton *event, g_pids = new_pidvec; /* - * Revert the pid_index mapping to an identity map, + * Revert the pid_index mapping to an identity map, */ psp = g_pids; @@ -1040,7 +1056,7 @@ static void move_current_track(GdkEventButton *event, /**************************************************************************** * zoom_event -* Process a zoom gesture. The use of doubles is required to avoid +* Process a zoom gesture. The use of doubles is required to avoid * truncating the various variable values, which in turn would lead to * some pretty random-looking zoom responses. ****************************************************************************/ @@ -1053,15 +1069,15 @@ void zoom_event(GdkEventButton *e1, GdkEventButton *e2, v1_geometry_t *vp) double center_on_time, width_in_time; double center_on_pixel; - /* - * Clip the zoom area to the event display area. + /* + * Clip the zoom area to the event display area. * Otherwise, center_on_time - width_in_time is in hyperspace - * to the left of zero + * to the left of zero */ - + if (e1->x < vp->pid_ax_width) e1->x = vp->pid_ax_width; - + if (e2->x < vp->pid_ax_width) e2->x = vp->pid_ax_width; @@ -1078,20 +1094,20 @@ void zoom_event(GdkEventButton *e1, GdkEventButton *e2, v1_geometry_t *vp) width_in_time = width_in_pixels * time_per_pixel; /* Center the screen on the center of the zoom area */ - center_on_pixel = (double)((e2->x + e1->x) / 2.00) - + center_on_pixel = (double)((e2->x + e1->x) / 2.00) - (double)vp->pid_ax_width; center_on_time = center_on_pixel*time_per_pixel + (double)vp->minvistime; /* * Transform back to 64-bit integer microseconds, reset the - * scrollbar, schedule a repaint. + * scrollbar, schedule a repaint. */ vp->minvistime = (ulonglong)(center_on_time - width_in_time); vp->maxvistime = (ulonglong)(center_on_time + width_in_time); loser_zoom_repaint: recompute_hscrollbar(); - + view1_display_when_idle(); } @@ -1108,7 +1124,7 @@ static void scroll_y(int delta) new_index = g_npids - s_v1->npids; if (new_index < 0) new_index = 0; - + if (new_index != s_v1->first_pid_index) { s_v1->first_pid_index = new_index; GTK_ADJUSTMENT(s_view1_vsadj)->value = (gdouble)new_index; @@ -1238,9 +1254,9 @@ view1_handle_key_press_event (GtkWidget *widget, GdkEventKey *event) * Relevant definitions in: /usr/include/gtk-1.2/gdk/gdktypes.h * * This routine implements three functions: zoom-to-area, time ruler, and -* show/hide event detail popup. +* show/hide event detail popup. * -* The left mouse button (button 1) has two simultaneous functions: event +* The left mouse button (button 1) has two simultaneous functions: event * detail popup, and zoom-to-area. If the press and release events occur * within a small delta-x, it's a detail popup event. Otherwise, it's * an area zoom. @@ -1336,19 +1352,19 @@ button_press_event (GtkWidget *widget, GdkEventButton *event) /* Fence, cursor already set */ if (zoom_bar_up) return(TRUE); - + xdelta = (int)(press1_event.x - event->x); if (xdelta < 0) xdelta = -xdelta; - + /* Haven't moved enough to declare a zoom sequence yet */ - if (xdelta < 10) + if (xdelta < 10) return(TRUE); - + /* Draw the zoom fence, use the key-down X coordinate */ time_ax_y = s_v1->npids * s_v1->strip_height + s_v1->pid_ax_offset; - - line((int)(press1_event.x), s_v1->pop_offset, + + line((int)(press1_event.x), s_v1->pop_offset, (int)(press1_event.x), time_ax_y, LINE_DRAW_BLACK); tbox("Zoom From Here...", (int)(press1_event.x), s_v1->pop_offset, TBOX_DRAW_BOXED); @@ -1361,7 +1377,7 @@ button_press_event (GtkWidget *widget, GdkEventButton *event) gdk_window_set_cursor(da->window, zi_cursor); - /* + /* * Some filtration is needed on Solaris, or the server will hang */ if (event->time - last_truler_time < 75) @@ -1369,23 +1385,23 @@ button_press_event (GtkWidget *widget, GdkEventButton *event) last_truler_time = event->time; - line((int)(press3_event.x), s_v1->pop_offset, + line((int)(press3_event.x), s_v1->pop_offset, (int)(press3_event.x), time_ax_y, LINE_DRAW_BLACK); xdelta = (int)(press3_event.x - event->x); if (xdelta < 0) xdelta = -xdelta; - - time_per_pixel = ((double)(s_v1->maxvistime - s_v1->minvistime)) / - ((double)(s_v1->total_width - s_v1->pid_ax_width)); + + time_per_pixel = ((double)(s_v1->maxvistime - s_v1->minvistime)) / + ((double)(s_v1->total_width - s_v1->pid_ax_width)); time_ax_y = s_v1->npids * s_v1->strip_height + s_v1->pid_ax_offset; - line((int)(press3_event.x), s_v1->pop_offset, + line((int)(press3_event.x), s_v1->pop_offset, (int)(press3_event.x), time_ax_y, LINE_DRAW_BLACK); /* * Note: use a fixed-width format so it looks like we're - * erasing and redrawing the box. + * erasing and redrawing the box. */ nsec = ((double)xdelta)*time_per_pixel; if (nsec >1e9) { @@ -1441,7 +1457,7 @@ configure_event (GtkWidget *widget, GdkEventConfigure *event) /* Toss the previous drawing area backing store pixmap */ if (pm) gdk_pixmap_unref(pm); - + /* Create a new pixmap, paint it */ pm = gdk_pixmap_new(widget->window, widget->allocation.width, @@ -1457,7 +1473,7 @@ configure_event (GtkWidget *widget, GdkEventConfigure *event) /* Reset the view geometry parameters, as required */ s_v1->total_width = widget->allocation.width; s_v1->total_height = widget->allocation.height; - s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) / + s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) / s_v1->strip_height; /* Schedule a repaint */ @@ -1477,7 +1493,7 @@ static gint expose_event (GtkWidget *widget, GdkEventExpose *event) event->area.x, event->area.y, event->area.x, event->area.y, event->area.width, event->area.height); - + return(FALSE); } @@ -1504,7 +1520,7 @@ boolean event_search_internal (void) ep = (g_events + s_srchindex); ep->flags &= ~EVENT_FLAG_SEARCHRSLT; - /* + /* * Assume the user wants to search [plus or minus] * from where they are. */ @@ -1517,27 +1533,27 @@ boolean event_search_internal (void) index = (srch_chase_dir == SRCH_CHASE_BACKWARD) ? (s_srchindex - i) % g_nevents : (i + s_srchindex) % g_nevents; - + ep = (g_events + index); - + if (ep->code == s_srchcode) { if (s_srchfail_up) message_line(""); s_srchindex = index; pid_index = ep->pid->pid_index; - + /* Need a vertical scroll? */ if ((pid_index < s_v1->first_pid_index) || (pid_index >= s_v1->first_pid_index + s_v1->npids)) { if (pid_index > (g_npids - s_v1->npids)) pid_index = (g_npids - s_v1->npids); s_v1->first_pid_index = pid_index; - GTK_ADJUSTMENT(s_view1_vsadj)->value = + GTK_ADJUSTMENT(s_view1_vsadj)->value = (gdouble)s_v1->first_pid_index; gtk_adjustment_value_changed(GTK_ADJUSTMENT(s_view1_vsadj)); full_redisplay = TRUE; } - + /* Need a horizontal scroll? */ if (ep->time < s_v1->minvistime || ep->time > s_v1->maxvistime) { current_width = (s_v1->maxvistime - s_v1->minvistime); @@ -1559,10 +1575,10 @@ boolean event_search_internal (void) if (!s_result_up) { s_result_up = TRUE; time_per_pixel = dtime_per_pixel(s_v1); - + y = pid_index*s_v1->strip_height + s_v1->event_offset; - x = s_v1->pid_ax_width + - (int)(((double)(ep->time - s_v1->minvistime)) / + x = s_v1->pid_ax_width + + (int)(((double)(ep->time - s_v1->minvistime)) / time_per_pixel); sprintf(tmpbuf, "SEARCH RESULT"); tbox(tmpbuf, x, y - s_v1->pop_offset, TBOX_DRAW_BOXED); @@ -1595,13 +1611,277 @@ boolean event_search_callback (char *s) return(TRUE); s_srchcode = atol(s); - + if (s_srchcode == 0) return(FALSE); return(event_search_internal()); } + +/**************************************************************************** +* anomaly_statistics_init +****************************************************************************/ + +static int anomaly_statistics_init (void) +{ + elog_event_t *eep; + u32 data; + event_t *ep; + pid_data_t *pid; + int i; + int index; + int pid_index; + f64 fdata; + + /* Gather summary statistics... */ + ep = g_events; + + vec_reset_length (s_v1->means); + vec_reset_length (s_v1->matches); + vec_reset_length (s_v1->variances); + vec_reset_length (s_v1->two_stddevs); + vec_reset_length (s_v1->mins); + vec_reset_length (s_v1->maxes); + + for (i = 0; i < g_nevents; i++) { + if (ep->code != s_anomalycode) { + ep++; + continue; + } + pid = ep->pid; + vec_validate_init_empty (s_v1->matches, pid->pid_index, 0); + vec_validate_init_empty (s_v1->means, pid->pid_index, 0.0); + vec_validate_init_empty (s_v1->mins, pid->pid_index, 0.0); + vec_validate_init_empty (s_v1->maxes, pid->pid_index, 0.0); + eep = get_clib_event (ep->datum); + data = clib_mem_unaligned (eep->data, u32); + fdata = data; + s_v1->means[pid->pid_index] += fdata; + s_v1->matches[pid->pid_index] += 1; + /* First data point? set min, max */ + if (PREDICT_FALSE(s_v1->matches[pid->pid_index] == 1)) { + s_v1->mins[pid->pid_index] = fdata; + s_v1->maxes[pid->pid_index] = fdata; + } else { + s_v1->mins[pid->pid_index] = (fdata < s_v1->mins[pid->pid_index]) ? + fdata : s_v1->mins[pid->pid_index]; + s_v1->maxes[pid->pid_index] = + (fdata > s_v1->maxes[pid->pid_index]) ? + fdata : s_v1->maxes[pid->pid_index]; + } + ep++; + } + if (vec_len (s_v1->matches) == 0) + return -1; + + /* Compute s_v1->means */ + for (i = 0; i < vec_len (s_v1->means); i++) + s_v1->means[i] = s_v1->matches[i] + ? (s_v1->means[i] / (f64) s_v1->matches[i]) : 0.0; + + /* Compute s_v1->variances */ + ep = g_events; + for (i = 0; i < g_nevents; i++) { + if (ep->code != s_anomalycode) { + ep++; + continue; + } + pid = ep->pid; + vec_validate_init_empty (s_v1->variances, pid->pid_index, 0); + eep = get_clib_event (ep->datum); + data = clib_mem_unaligned (eep->data, u32); + fdata = data; + s_v1->variances[pid->pid_index] += + (fdata - s_v1->means[pid->pid_index]) + * (fdata - s_v1->means[pid->pid_index]); + ep++; + } + + /* Normalize variances */ + for (i = 0; i < vec_len (s_v1->variances); i++) + s_v1->variances[i] = s_v1->matches[i] + ? (s_v1->variances[i] / (f64) s_v1->matches[i]) : 0.0; + + /* Compute the anomaly threshold, by default 2.5*stddev */ + for (i = 0; i < vec_len (s_v1->variances); i++) + vec_add1 (s_v1->two_stddevs, + s_v1->anomaly_threshold_stddevs * sqrt(s_v1->variances[i])); + return 0; +} + +/**************************************************************************** +* anomaly_search_internal +* This routine searches forward from s_srchindex, looking for s_srchcode; +* wraps at the end of the buffer. +****************************************************************************/ + +boolean anomaly_search_internal (void) +{ + elog_event_t *eep; + u32 data; + event_t *ep; + pid_data_t *pid; + int i; + int index; + int pid_index; + boolean full_redisplay = FALSE; + ulonglong current_width; + char tmpbuf [64]; + f64 fdata; + + if (vec_len (s_v1->matches) == 0) + anomaly_statistics_init(); + + ep = (g_events + s_srchindex); + ep->flags &= ~EVENT_FLAG_SEARCHRSLT; + + /* + * If the user rearranged the screen, start from the minimum + * visible time + */ + if (ep->time < s_v1->minvistime) + s_srchindex = find_event_index (s_v1->minvistime); + + for (i = 1; i <= g_nevents; i++) { + index = (i + s_srchindex) % g_nevents; + + ep = (g_events + index); + if (ep->code != s_anomalycode) + continue; + pid = ep->pid; + + eep = get_clib_event (ep->datum); + data = clib_mem_unaligned (eep->data, u32); + fdata = data; + + /* + * Found an anomaly? Define an anomaly as a datum + * greater than 2*stddev above average. + */ + if ((fdata - s_v1->means[pid->pid_index]) > + s_v1->two_stddevs[pid->pid_index]) { + u8 *s; + + s = format (0, "%.1f*stddev {min,max,mean,threshold}: ", + s_v1->anomaly_threshold_stddevs); + + for (i = 0; i < vec_len (s_v1->means); i++) { + if (s_v1->matches[i] > 0) + s = format (s, "{%.0f, %.0f, %.0f, %.0f} ", + s_v1->mins[i], s_v1->maxes[i], + s_v1->means[i], + s_v1->means[i]+s_v1->two_stddevs[i]); + else + s = format (s, "{no match} "); + } + + message_line ((char *)s); + vec_free (s); + + s_srchindex = index; + pid_index = ep->pid->pid_index; + + /* Need a vertical scroll? */ + if ((pid_index < s_v1->first_pid_index) || + (pid_index >= s_v1->first_pid_index + s_v1->npids)) { + if (pid_index > (g_npids - s_v1->npids)) + pid_index = (g_npids - s_v1->npids); + s_v1->first_pid_index = pid_index; + GTK_ADJUSTMENT(s_view1_vsadj)->value = + (gdouble)s_v1->first_pid_index; + gtk_adjustment_value_changed(GTK_ADJUSTMENT(s_view1_vsadj)); + full_redisplay = TRUE; + } + + /* Need a horizontal scroll? */ + if (ep->time < s_v1->minvistime || ep->time > s_v1->maxvistime) { + current_width = (s_v1->maxvistime - s_v1->minvistime); + if (ep->time < ((current_width+1) / 2)) { + s_v1->minvistime = 0ll; + s_v1->maxvistime = current_width; + } else { + s_v1->minvistime = ep->time - ((current_width+1)/2); + s_v1->maxvistime = ep->time + ((current_width+1)/2); + } + recompute_hscrollbar(); + full_redisplay = TRUE; + } + ep->flags |= EVENT_FLAG_SEARCHRSLT; + full_redisplay = TRUE; + + if (full_redisplay) + view1_display_when_idle(); + + return(TRUE); + } + } + sprintf (tmpbuf, "Search for an anomalous event %ld failed...\n", + s_anomalycode); + message_line(tmpbuf); + s_srchfail_up = TRUE; + return(TRUE); +} + +/**************************************************************************** +* anomaly_search_callback +****************************************************************************/ + +boolean anomaly_search_callback (char *s) +{ + ulong new_anomalycode; + + /* No events yet? Act like the search worked, to avoid a loop */ + if (g_nevents == 0) + return(TRUE); + + new_anomalycode = atol(s); + + if (new_anomalycode == 0) + return(FALSE); + + if (new_anomalycode != s_anomalycode || + vec_len (s_v1->matches) == 0) { + s_anomalycode = new_anomalycode; + if (anomaly_statistics_init()) { + u8 *s; + + s = format (0, "Search for an anomalous event %ld failed...\n", + s_anomalycode); + message_line ((char *) s); + vec_free (s); + return (TRUE); + } + } + return(anomaly_search_internal()); +} + +/**************************************************************************** +* anomaly_threshold_callback +****************************************************************************/ + +boolean anomaly_threshold_callback (char *s) +{ + f64 new_threshold; + + /* No events yet? Act like the search worked, to avoid a loop */ + if (g_nevents == 0) + return(TRUE); + + new_threshold = atof (s); + + if (new_threshold == 0.0 || new_threshold > 10.0) + return(FALSE); + + s_v1->anomaly_threshold_stddevs = new_threshold; + + vec_reset_length (s_v1->means); + vec_reset_length (s_v1->matches); + vec_reset_length (s_v1->variances); + vec_reset_length (s_v1->two_stddevs); + return (TRUE); +} + /**************************************************************************** * event_search ****************************************************************************/ @@ -1613,6 +1893,28 @@ static void event_search (void) event_search_callback); } +/**************************************************************************** +* anomaly_search +****************************************************************************/ + +static void anomaly_search (void) +{ + modal_dialog ("Anomaly Search: Please Enter Event Code", + "Invalid: Please Reenter Event Code", NULL, + anomaly_search_callback); +} + +/**************************************************************************** +* anomaly_threshold +****************************************************************************/ + +static void anomaly_threshold (void) +{ + modal_dialog ("Anomaly Threshold: Please Enter Threshold", + "Invalid: Please Reenter Threshold in Standard Deviations", + NULL, anomaly_threshold_callback); +} + /**************************************************************************** * init_track_colors ****************************************************************************/ @@ -1631,10 +1933,10 @@ static void init_track_colors(void) * However, it's easier just to allocate everything from fresh. As a nod in * the direction of politeness towards our poor abused X server, we at * least mop up the previously allocated GCs first, although in practice - * even omitting this didn't seem to cause a problem. + * even omitting this didn't seem to cause a problem. */ if (s_color != NULL ) { - gdk_colormap_free_colors(gtk_widget_get_colormap(da), + gdk_colormap_free_colors(gtk_widget_get_colormap(da), s_color, g_npids); clib_memset(s_color, 0, sizeof(GdkColor) * g_npids); } else { @@ -1701,7 +2003,7 @@ static void init_track_colors(void) * Actually allocate the colors in one bulk operation. We ignore the return * values. */ - gdk_colormap_alloc_colors(gtk_widget_get_colormap(da), + gdk_colormap_alloc_colors(gtk_widget_get_colormap(da), s_color, g_npids+1, FALSE, TRUE, dont_care); } @@ -1725,7 +2027,7 @@ static void chase_event_etc(enum chase_mode mode) int winner; if (!s_last_selected_event) { - infobox("No selected event", + infobox("No selected event", "\nPlease select an event and try again...\n"); return; } @@ -1818,10 +2120,10 @@ static void chase_event_etc(enum chase_mode mode) g_free (g_pids); g_pids = new_pidvec; - + /* * The new g_pids vector contains the "chase" sort, so we revert - * the pid_index mapping to an identity map + * the pid_index mapping to an identity map */ psp = g_pids; @@ -1849,7 +2151,7 @@ static void unchase_event_etc(void) pid_sort_t *psp; pid_data_t *pp; - memcpy (g_pids, g_original_pids, sizeof(pid_sort_t)*g_npids); + memcpy (g_pids, g_original_pids, sizeof(pid_sort_t)*g_npids); /* Fix the pid structure index mappings */ psp = g_pids; @@ -1886,7 +2188,7 @@ static void print_ps_header (v1_geometry_t *vp, char *filename) fprintf(s_printfp, "%%%%CreationDate: %s", ctime(&now)); fprintf(s_printfp, "%%%%DocumentData: Clean7Bit\n"); fprintf(s_printfp, "%%%%Origin: 0 0\n"); - fprintf(s_printfp, "%%%%BoundingBox: 0 0 %d %d\n", vp->total_height, + fprintf(s_printfp, "%%%%BoundingBox: 0 0 %d %d\n", vp->total_height, vp->total_width); fprintf(s_printfp, "%%%%LanguageLevel: 2\n"); fprintf(s_printfp, "%%%%Pages: 1\n"); @@ -1902,9 +2204,9 @@ static void print_ps_header (v1_geometry_t *vp, char *filename) * xrt * Xcoordinate rotate and translate. We need to emit postscript that * has a reasonable aspect ratio for printing. To do that, we rotate the -* intended picture by 90 degrees, using the standard 2D rotation +* intended picture by 90 degrees, using the standard 2D rotation * formula: -* +* * Xr = x*cos(theta) - y*sin(theta); * Yr = x*sin(theta) + y*cos(theta); * @@ -1982,7 +2284,7 @@ static void slew_tracks (v1_geometry_t *vp, enum view1_button_click which) pid_sort_t *pp; int pid_index; ulonglong delta; - + delta = (ulonglong) (vp->last_time_interval); /* Make sure we don't push events to the left of the big bang */ @@ -1990,10 +2292,10 @@ static void slew_tracks (v1_geometry_t *vp, enum view1_button_click which) for (ep = g_events; ep < (g_events + g_nevents); ep++) { pid_index = ep->pid->pid_index; pp = (g_pids + pid_index); - + if (pp->selected) { if (ep->time < delta) { - infobox("Slew Range Error", + infobox("Slew Range Error", "\nCan't slew selected data left that far..." "\nEvents would preceed the Big Bang (t=0)..."); goto out; @@ -2025,7 +2327,7 @@ static void slew_tracks (v1_geometry_t *vp, enum view1_button_click which) } /**************************************************************************** -* view1_button_click_callback +* view1_button_click_callback ****************************************************************************/ static void view1_button_click_callback(GtkButton *item, gpointer data) @@ -2112,6 +2414,21 @@ static void view1_button_click_callback(GtkButton *item, gpointer data) event_search(); break; + case ANOMALY_THRESHOLD_BUTTON: + anomaly_threshold(); + break; + + case ANOMALY_NEXT_BUTTON: + if (s_anomalycode) { + anomaly_search_internal(); + break; + } + /* NOTE FALLTHROUGH */ + + case ANOMALY_BUTTON: + anomaly_search(); + break; + case ZOOMOUT_BUTTON: if (zoom_delta == 0LL) zoom_delta = 1; @@ -2123,14 +2440,14 @@ static void view1_button_click_callback(GtkButton *item, gpointer data) s_v1->minvistime = 0; s_v1->maxvistime += zoom_delta*2; } - - if ((s_v1->maxvistime - s_v1->minvistime) * 8 > + + if ((s_v1->maxvistime - s_v1->minvistime) * 8 > g_events[g_nevents-1].time * 9) { s_v1->minvistime = 0; s_v1->maxvistime = g_events[g_nevents-1].time * 9 / 8; /* Single event? Make window 1s wide... */ if (g_nevents == 1) - s_v1->maxvistime = 1000000; + s_v1->maxvistime = 1000000; } recompute_hscrollbar(); @@ -2154,7 +2471,7 @@ static void view1_button_click_callback(GtkButton *item, gpointer data) } /* Recalculate the number of strips on the screen */ - s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) / + s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) / s_v1->strip_height; recompute_vscrollbar(); break; @@ -2167,7 +2484,7 @@ static void view1_button_click_callback(GtkButton *item, gpointer data) } /* Recalculate the number of strips on the screen */ - s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) / + s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) / s_v1->strip_height; recompute_vscrollbar(); break; @@ -2199,7 +2516,7 @@ static void view1_button_click_callback(GtkButton *item, gpointer data) case SLEW_LEFT_BUTTON: case SLEW_RIGHT_BUTTON: if (s_v1->last_time_interval < 10e-9) { - infobox("slew", "\nNo time interval set...\n"); + infobox("slew", "\nNo time interval set...\n"); break; } slew_tracks (s_v1, click); @@ -2232,7 +2549,7 @@ static void view1_hscroll (GtkAdjustment *adj, GtkWidget *notused) s_v1->minvistime = (ulonglong)(adj->value); s_v1->maxvistime = s_v1->minvistime + current_width; - + view1_display_when_idle(); #ifdef NOTDEF @@ -2267,7 +2584,6 @@ void set_pid_ax_width(int width) void view1_init(void) { - c_view1_draw_width = atol(getprop_default("drawbox_width", "700")); c_view1_draw_height = atol(getprop_default("drawbox_height", "400")); @@ -2281,8 +2597,9 @@ void view1_init(void) s_v1->total_height = c_view1_draw_height; s_v1->total_width = c_view1_draw_width; s_v1->first_pid_index = 0; - - s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) / + s_v1->anomaly_threshold_stddevs = + atof(getprop_default("anomaly_threshold_stddevs", "2.5")); + s_v1->npids = (s_v1->total_height - s_v1->time_ax_height) / s_v1->strip_height; s_v1->minvistime = 0; @@ -2293,9 +2610,9 @@ void view1_init(void) s_view1_hbox = gtk_hbox_new(FALSE, 5); da = gtk_drawing_area_new(); - gtk_drawing_area_size(GTK_DRAWING_AREA(da), c_view1_draw_width, + gtk_drawing_area_size(GTK_DRAWING_AREA(da), c_view1_draw_width, c_view1_draw_height); - + #ifdef NOTDEF gtk_signal_connect (GTK_OBJECT (da), "motion_notify_event", (GtkSignalFunc) motion_notify_event, NULL); @@ -2309,15 +2626,15 @@ void view1_init(void) gtk_signal_connect (GTK_OBJECT (da), "button_press_event", (GtkSignalFunc) button_press_event, NULL); - + gtk_signal_connect (GTK_OBJECT (da), "button_release_event", (GtkSignalFunc) button_press_event, NULL); - + gtk_signal_connect (GTK_OBJECT (da), "motion_notify_event", (GtkSignalFunc) button_press_event, NULL); - - gtk_widget_set_events (da, GDK_BUTTON_PRESS_MASK - | GDK_BUTTON_RELEASE_MASK | GDK_EXPOSURE_MASK + + gtk_widget_set_events (da, GDK_BUTTON_PRESS_MASK + | GDK_BUTTON_RELEASE_MASK | GDK_EXPOSURE_MASK | GDK_BUTTON_MOTION_MASK); @@ -2332,40 +2649,40 @@ void view1_init(void) /* PID axis menu */ s_view1_vmenubox = gtk_vbox_new(FALSE, 5); - s_view1_vsadj = gtk_adjustment_new(0.0 /* initial value */, + s_view1_vsadj = gtk_adjustment_new(0.0 /* initial value */, 0.0 /* minimum value */, 2000.0 /* maximum value */, - 0.1 /* step increment */, - 10.0/* page increment */, + 0.1 /* step increment */, + 10.0/* page increment */, 10.0/* page size */); s_view1_vscroll = gtk_vscrollbar_new (GTK_ADJUSTMENT(s_view1_vsadj)); gtk_signal_connect (GTK_OBJECT (s_view1_vsadj), "value-changed", - GTK_SIGNAL_FUNC (view1_vscroll), + GTK_SIGNAL_FUNC (view1_vscroll), (gpointer)s_view1_vscroll); s_view1_topbutton = gtk_button_new_with_label("Top"); s_view1_bottombutton = gtk_button_new_with_label("Bottom"); gtk_signal_connect (GTK_OBJECT(s_view1_topbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) TOP_BUTTON); - + gtk_signal_connect (GTK_OBJECT(s_view1_bottombutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) BOTTOM_BUTTON); /* More Traces button and Less Traces button */ s_view1_more_traces_button = gtk_button_new_with_label("More Traces"); s_view1_less_traces_button = gtk_button_new_with_label("Less Traces"); gtk_signal_connect (GTK_OBJECT(s_view1_more_traces_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) MORE_TRACES_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_less_traces_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) LESS_TRACES_BUTTON); - + #ifdef NOTDEF /* Trick to bottom-justify the menu: */ s_view1_pad1 = gtk_vbox_new(FALSE, 0); @@ -2373,76 +2690,92 @@ void view1_init(void) TRUE, FALSE, 0); #endif - + gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_topbutton, FALSE, FALSE, 0); gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_vscroll, TRUE, TRUE, 0); - + gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_bottombutton, FALSE, FALSE, 0); gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_more_traces_button, FALSE, FALSE, 0); - + gtk_box_pack_start (GTK_BOX(s_view1_vmenubox), s_view1_less_traces_button, FALSE, FALSE, 0); - + gtk_box_pack_start (GTK_BOX(s_view1_hbox), s_view1_vmenubox, FALSE, FALSE, 0); /* Time axis menu */ s_view1_hmenubox = gtk_hbox_new(FALSE, 5); - + s_view1_startbutton = gtk_button_new_with_label("Start"); s_view1_zoominbutton = gtk_button_new_with_label("ZoomIn"); s_view1_searchbutton = gtk_button_new_with_label("Search"); - s_view1_srchagainbutton = gtk_button_new_with_label("Search Again"); + s_view1_anomalybutton = gtk_button_new_with_label("Anomaly"); + s_view1_anomalynextbutton = gtk_button_new_with_label("Next Anomaly"); + s_view1_anomalythresholdbutton = + gtk_button_new_with_label ("Anomaly Threshold"); + s_view1_zoomoutbutton = gtk_button_new_with_label("ZoomOut"); s_view1_endbutton = gtk_button_new_with_label("End"); gtk_signal_connect (GTK_OBJECT(s_view1_startbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) START_BUTTON); - + gtk_signal_connect (GTK_OBJECT(s_view1_zoominbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) ZOOMIN_BUTTON); - + gtk_signal_connect (GTK_OBJECT(s_view1_searchbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) SEARCH_BUTTON); - + gtk_signal_connect (GTK_OBJECT(s_view1_srchagainbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) SEARCH_AGAIN_BUTTON); - + + gtk_signal_connect (GTK_OBJECT(s_view1_anomalybutton), "clicked", + GTK_SIGNAL_FUNC(view1_button_click_callback), + (gpointer) ANOMALY_BUTTON); + + gtk_signal_connect (GTK_OBJECT(s_view1_anomalynextbutton), "clicked", + GTK_SIGNAL_FUNC(view1_button_click_callback), + (gpointer) ANOMALY_NEXT_BUTTON); + + gtk_signal_connect (GTK_OBJECT(s_view1_anomalythresholdbutton), + "clicked", GTK_SIGNAL_FUNC(view1_button_click_callback), + (gpointer) ANOMALY_THRESHOLD_BUTTON); + gtk_signal_connect (GTK_OBJECT(s_view1_zoomoutbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) ZOOMOUT_BUTTON); - + gtk_signal_connect (GTK_OBJECT(s_view1_endbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) END_BUTTON); - - s_view1_hsadj = gtk_adjustment_new(0.0 /* initial value */, + + s_view1_hsadj = gtk_adjustment_new(0.0 /* initial value */, 0.0 /* minimum value */, 2000.0 /* maximum value */, - 0.1 /* step increment */, - 10.0/* page increment */, + 0.1 /* step increment */, + 10.0/* page increment */, 10.0/* page size */); s_view1_hscroll = gtk_hscrollbar_new (GTK_ADJUSTMENT(s_view1_hsadj)); gtk_signal_connect (GTK_OBJECT (s_view1_hsadj), "value-changed", - GTK_SIGNAL_FUNC (view1_hscroll), + GTK_SIGNAL_FUNC (view1_hscroll), (gpointer)s_view1_hscroll); gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_startbutton, @@ -2457,6 +2790,10 @@ void view1_init(void) gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_zoominbutton, FALSE, FALSE, 0); + gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_anomalybutton, + FALSE, FALSE, 0); + gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_anomalynextbutton, + FALSE, FALSE, 0); gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_searchbutton, FALSE, FALSE, 0); @@ -2466,7 +2803,7 @@ void view1_init(void) gtk_box_pack_start (GTK_BOX(s_view1_hmenubox), s_view1_zoomoutbutton, FALSE, FALSE, 0); - gtk_box_pack_start (GTK_BOX(s_view1_vbox), s_view1_hbox, + gtk_box_pack_start (GTK_BOX(s_view1_vbox), s_view1_hbox, TRUE, TRUE, 0); gtk_box_pack_start (GTK_BOX(s_view1_vbox), s_view1_hmenubox, @@ -2499,60 +2836,60 @@ void view1_init(void) s_view1_time_slew_right_button = gtk_button_new_with_label("TimeSlew->"); gtk_signal_connect (GTK_OBJECT(s_view1_snapbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) SNAP_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_nextbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) NEXT_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_delbutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) DEL_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_chase_event_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) CHASE_EVENT_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_chase_datum_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) CHASE_DATUM_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_chase_track_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) CHASE_TRACK_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_unchasebutton), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) UNCHASE_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_forward_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) FORWARD_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_backward_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) BACKWARD_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_summary_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) SUMMARY_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_nosummary_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) NOSUMMARY_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_time_slew_left_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) SLEW_LEFT_BUTTON); gtk_signal_connect (GTK_OBJECT(s_view1_time_slew_right_button), "clicked", - GTK_SIGNAL_FUNC(view1_button_click_callback), + GTK_SIGNAL_FUNC(view1_button_click_callback), (gpointer) SLEW_RIGHT_BUTTON); gtk_box_pack_start (GTK_BOX(s_view1_vbox), s_view1_hmenubox2, FALSE, FALSE, 0); - + gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_snapbutton, FALSE, FALSE, 0); @@ -2586,14 +2923,18 @@ void view1_init(void) gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_nosummary_button, FALSE, FALSE, 0); - gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), + gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_time_slew_left_button, FALSE, FALSE, 0); - gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), + gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), s_view1_time_slew_right_button, FALSE, FALSE, 0); + gtk_box_pack_start (GTK_BOX(s_view1_hmenubox2), + s_view1_anomalythresholdbutton, + FALSE, FALSE, 0); + s_view1_label = gtk_label_new(NULL); gtk_box_pack_start (GTK_BOX(s_view1_vbox), s_view1_label, @@ -2610,12 +2951,12 @@ void view1_init(void) gtk_widget_hide (summary_mode ? s_view1_summary_button : s_view1_nosummary_button); - zi_source = gdk_bitmap_create_from_data (NULL, (char *)zi_bits, zi_width, + zi_source = gdk_bitmap_create_from_data (NULL, (char *)zi_bits, zi_width, zi_height); zi_mask = gdk_bitmap_create_from_data (NULL, (char *)zi_bkgd, zi_width, zi_height); - zi_cursor = (GdkCursor *) gdk_cursor_new_from_pixmap (zi_source, + zi_cursor = (GdkCursor *) gdk_cursor_new_from_pixmap (zi_source, zi_mask, &fg_black, &bg_white, zi_x_hot, zi_y_hot); @@ -2632,7 +2973,7 @@ void view1_init(void) void line_print (int x1, int y1, int x2, int y2) { fprintf(s_printfp, "newpath\n"); - fprintf(s_printfp, "%d %d moveto\n", xrt(x1, s_v1->total_height - y1), + fprintf(s_printfp, "%d %d moveto\n", xrt(x1, s_v1->total_height - y1), yrt(x1, s_v1->total_height - y1)); fprintf(s_printfp, "%d %d lineto\n", xrt (x2, s_v1->total_height - y2), @@ -2656,23 +2997,23 @@ GdkRectangle *tbox_print (char *s, int x, int y, enum view1_tbox_fn function, fprintf(s_printfp, "newpath\n"); fprintf(s_printfp, "0 setlinewidth\n"); - fprintf(s_printfp, "%d %d moveto\n", + fprintf(s_printfp, "%d %d moveto\n", xrt(rp->x, s_v1->total_height - rp->y), yrt(rp->x, s_v1->total_height - rp->y)); - - fprintf(s_printfp, "%d %d lineto\n", + + fprintf(s_printfp, "%d %d lineto\n", xrt (rp->x+rp->width, s_v1->total_height - rp->y), yrt (rp->x+rp->width, s_v1->total_height - rp->y)); - fprintf(s_printfp, "%d %d lineto\n", + fprintf(s_printfp, "%d %d lineto\n", xrt(rp->x+rp->width, s_v1->total_height - (rp->y+rp->height)), yrt(rp->x+rp->width, s_v1->total_height - (rp->y+rp->height))); - fprintf(s_printfp, "%d %d lineto\n", + fprintf(s_printfp, "%d %d lineto\n", xrt(rp->x, s_v1->total_height - (rp->y+rp->height)), yrt(rp->x, s_v1->total_height - (rp->y+rp->height))); - fprintf(s_printfp, "%d %d lineto\n", + fprintf(s_printfp, "%d %d lineto\n", xrt(rp->x, s_v1->total_height - rp->y), yrt(rp->x, s_v1->total_height - rp->y)); @@ -2683,7 +3024,7 @@ GdkRectangle *tbox_print (char *s, int x, int y, enum view1_tbox_fn function, (function == TBOX_PRINT_PLAIN)) { fprintf(s_printfp, "newpath\n"); - fprintf(s_printfp, "%d %d moveto\n", + fprintf(s_printfp, "%d %d moveto\n", xrt(x, s_v1->total_height - (y-2)), yrt(x, s_v1->total_height - (y-2))); fprintf(s_printfp, "gsave\n"); @@ -2693,10 +3034,10 @@ GdkRectangle *tbox_print (char *s, int x, int y, enum view1_tbox_fn function, } return(rp); -} +} /**************************************************************************** -* tbox - draws an optionally boxed string whose lower lefthand +* tbox - draws an optionally boxed string whose lower lefthand * corner is at (x, y). As usual, Y is backwards. ****************************************************************************/ @@ -2723,17 +3064,17 @@ GdkRectangle *tbox (char *s, int x, int y, enum view1_tbox_fn function) /* Nothing */ } } - + switch (function) { case TBOX_DRAW_BOXED: gdk_draw_rectangle (pm, da->style->white_gc, TRUE, - x, y - (ascent+descent+3), width + 2, + x, y - (ascent+descent+3), width + 2, ascent + descent + 3); - + gdk_draw_rectangle (pm, da->style->black_gc, FALSE, - x, y - (ascent+descent+3), width + 2, + x, y - (ascent+descent+3), width + 2, ascent + descent + 3); - + gdk_draw_string (pm, g_font, da->style->black_gc, x + 1, y - 1, (const gchar *)s); /* NOTE FALLTHROUGH */ @@ -2759,10 +3100,10 @@ GdkRectangle *tbox (char *s, int x, int y, enum view1_tbox_fn function) if (function == TBOX_DRAW_EVENT) gtk_widget_draw (da, &update_rect); break; - - + + case TBOX_DRAW_PLAIN: - + gdk_draw_string (pm, g_font, da->style->black_gc, x + 1, y - 1, (const gchar *)s); /* NOTE FALLTHROUGH */ @@ -2900,7 +3241,7 @@ static void display_pid_axis(v1_geometry_t *vp) pid_sort_t *pp; int pid_index; char *label_fmt; - char tmpbuf [128]; + char tmpbuf [128]; /* No pids yet? Outta here */ if (g_pids == NULL) @@ -2968,7 +3309,7 @@ void view1_read_events_callback(void) max_vis_index = 300; if (max_vis_index > g_nevents) max_vis_index = g_nevents-1; - + s_v1->minvistime = 0LL; s_v1->maxvistime = (g_events[g_nevents - 1].time * 9)/ 8; /* Single event? Make the initial display 1s wide */ @@ -3026,7 +3367,7 @@ static void display_event_data(v1_geometry_t *vp) (ep->time < vp->maxvistime)) { pid_index = ep->pid->pid_index; set_color(pid_index); - + /* First filter: pid out of range */ if ((pid_index < vp->first_pid_index) || (pid_index >= vp->first_pid_index + vp->npids)) { @@ -3040,14 +3381,14 @@ static void display_event_data(v1_geometry_t *vp) ep++; continue; } - + /* Display it... */ pid_index -= vp->first_pid_index; - + y = pid_index*vp->strip_height + vp->event_offset; - - x = vp->pid_ax_width + + + x = vp->pid_ax_width + (int)(((double)(ep->time - vp->minvistime)) / time_per_pixel); if (last_x_used != NULL && x < last_x_used[pid_index]) { @@ -3066,12 +3407,12 @@ static void display_event_data(v1_geometry_t *vp) } else { sprintf(tmpbuf, "SEARCH RESULT"); } - print_rect = tbox(tmpbuf, x, y - vp->pop_offset, + print_rect = tbox(tmpbuf, x, y - vp->pop_offset, TBOX_DRAW_BOXED+s_print_offset); line(x, y-vp->pop_offset, x, y, LINE_DRAW_BLACK+s_print_offset); if (last_x_used != NULL) last_x_used[pid_index] = x + print_rect->width; - } + } if (summary_mode) { int delta = vp->strip_height / 3; if (delta < 1) @@ -3138,7 +3479,7 @@ static void display_time_axis(v1_geometry_t *vp) units = "ns"; unit_divisor = 1.00; - + if ((vp->maxvistime / unit_divisor) > 1000) { units = "us"; unit_divisor = 1000.00; @@ -3157,7 +3498,7 @@ static void display_time_axis(v1_geometry_t *vp) line(x, y, vp->total_width, y, LINE_DRAW_BLACK+s_print_offset); xoffset = 0; - + for (i = 0; i < nticks; i++) { /* Tick mark */ line(x+xoffset, y-3, x+xoffset, y+3, LINE_DRAW_BLACK+s_print_offset); @@ -3170,7 +3511,7 @@ static void display_time_axis(v1_geometry_t *vp) sprintf (tmpbuf, "%.2f%s", time, units); tbox(tmpbuf, x+xoffset, y+15, TBOX_DRAW_PLAIN+s_print_offset); - + xoffset += vp->time_ax_spacing; } } @@ -3234,7 +3575,7 @@ void view1_about (char *tmpbuf) sprintf(tmpbuf+strlen(tmpbuf), "Minvistime %lld\nMaxvistime %lld\n", s_v1->minvistime, s_v1->maxvistime); - sprintf(tmpbuf+strlen(tmpbuf), "Strip Height %d\n", + sprintf(tmpbuf+strlen(tmpbuf), "Strip Height %d\n", s_v1->strip_height); for (nsnaps = 0, snaps = s_snapshots; snaps; snaps = snaps->next) { diff --git a/src/tools/vppapigen/test_vppapigen.py b/src/tools/vppapigen/test_vppapigen.py index 09187f4c9654..a8a0a49a8dba 100755 --- a/src/tools/vppapigen/test_vppapigen.py +++ b/src/tools/vppapigen/test_vppapigen.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import unittest from vppapigen import VPPAPI, Option, ParseError @@ -60,7 +60,8 @@ def test_flags(self): test_string = ''' nonexisting_flag define foo { u8 foo; }; ''' - self.assertRaises(ParseError, self.parser.parse_string, test_string) + with self.assertRaises(ParseError): + self.parser.parse_string(test_string) class TestService(unittest.TestCase): diff --git a/src/tools/vppapigen/vppapigen.py b/src/tools/vppapigen/vppapigen.py index 4ca9b954e671..431a9dc7f939 100755 --- a/src/tools/vppapigen/vppapigen.py +++ b/src/tools/vppapigen/vppapigen.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from __future__ import print_function import ply.lex as lex @@ -133,11 +133,11 @@ def crc_block(block): class Service(): - def __init__(self, caller, reply, events=[], stream=False): + def __init__(self, caller, reply, events=None, stream=False): self.caller = caller self.reply = reply self.stream = stream - self.events = events + self.events = [] if events is None else events class Typedef(): @@ -170,8 +170,8 @@ def __init__(self, name, alias): else: a = { 'type': alias.fieldtype } self.alias = a - self.crc = binascii.crc32(str(alias)) & 0xffffffff - global_crc = binascii.crc32(str(alias), global_crc) + self.crc = binascii.crc32(str(alias).encode()) & 0xffffffff + global_crc = binascii.crc32(str(alias).encode(), global_crc) global_type_add(name) def __repr__(self): @@ -759,12 +759,17 @@ def main(): if sys.version[0] == '2': cliparser.add_argument('--input', type=argparse.FileType('r'), default=sys.stdin) + cliparser.add_argument('--output', nargs='?', + type=argparse.FileType('w'), + default=sys.stdout) + else: cliparser.add_argument('--input', type=argparse.FileType('r', encoding='UTF-8'), default=sys.stdin) - cliparser.add_argument('--output', nargs='?', type=argparse.FileType('w'), - default=sys.stdout) + cliparser.add_argument('--output', nargs='?', + type=argparse.FileType('w', encoding='UTF-8'), + default=sys.stdout) cliparser.add_argument('output_module', nargs='?', default='C') cliparser.add_argument('--debug', action='store_true') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 8a9a9971af23..1d93cc176482 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,8 @@ #include #include #include +#include +#include #include "vat/json_format.h" #include #include @@ -358,20 +361,6 @@ unformat_ipsec_policy_action (unformat_input_t * input, va_list * args) return 1; } -uword -unformat_ipsec_crypto_alg (unformat_input_t * input, va_list * args) -{ - u32 *r = va_arg (*args, u32 *); - - if (0); -#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_CRYPTO_ALG_##f; - foreach_ipsec_crypto_alg -#undef _ - else - return 0; - return 1; -} - u8 * format_ipsec_crypto_alg (u8 * s, va_list * args) { @@ -389,20 +378,6 @@ format_ipsec_crypto_alg (u8 * s, va_list * args) return format (s, "%s", t); } -uword -unformat_ipsec_integ_alg (unformat_input_t * input, va_list * args) -{ - u32 *r = va_arg (*args, u32 *); - - if (0); -#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_INTEG_ALG_##f; - foreach_ipsec_integ_alg -#undef _ - else - return 0; - return 1; -} - u8 * format_ipsec_integ_alg (u8 * s, va_list * args) { @@ -470,6 +445,34 @@ api_unformat_hw_if_index (unformat_input_t * input, va_list * args) #endif /* VPP_API_TEST_BUILTIN */ +uword +unformat_ipsec_api_crypto_alg (unformat_input_t * input, va_list * args) +{ + u32 *r = va_arg (*args, u32 *); + + if (0); +#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_API_CRYPTO_ALG_##f; + foreach_ipsec_crypto_alg +#undef _ + else + return 0; + return 1; +} + +uword +unformat_ipsec_api_integ_alg (unformat_input_t * input, va_list * args) +{ + u32 *r = va_arg (*args, u32 *); + + if (0); +#define _(v,f,s) else if (unformat (input, s)) *r = IPSEC_API_INTEG_ALG_##f; + foreach_ipsec_integ_alg +#undef _ + else + return 0; + return 1; +} + static uword unformat_policer_rate_type (unformat_input_t * input, va_list * args) { @@ -1441,8 +1444,8 @@ vl_api_ip4_arp_event_t_handler (vl_api_ip4_arp_event_t * mp) u32 sw_if_index = ntohl (mp->sw_if_index); errmsg ("arp %s event: pid %d address %U new mac %U sw_if_index %d\n", mp->mac_ip ? "mac/ip binding" : "address resolution", - ntohl (mp->pid), format_ip4_address, &mp->address, - format_ethernet_address, mp->new_mac, sw_if_index); + ntohl (mp->pid), format_ip4_address, mp->ip, + format_vl_api_mac_address, &mp->mac, sw_if_index); } static void @@ -1457,8 +1460,8 @@ vl_api_ip6_nd_event_t_handler (vl_api_ip6_nd_event_t * mp) u32 sw_if_index = ntohl (mp->sw_if_index); errmsg ("ip6 nd %s event: pid %d address %U new mac %U sw_if_index %d\n", mp->mac_ip ? "mac/ip binding" : "address resolution", - ntohl (mp->pid), format_ip6_address, mp->address, - format_ethernet_address, mp->new_mac, sw_if_index); + ntohl (mp->pid), format_vl_api_ip6_address, mp->ip, + format_vl_api_mac_address, mp->mac, sw_if_index); } static void @@ -1735,8 +1738,8 @@ static void vl_api_bridge_flags_reply_t_handler_json vam->result_ready = 1; } -static void vl_api_tap_connect_reply_t_handler - (vl_api_tap_connect_reply_t * mp) +static void +vl_api_tap_create_v2_reply_t_handler (vl_api_tap_create_v2_reply_t * mp) { vat_main_t *vam = &vat_main; i32 retval = ntohl (mp->retval); @@ -1753,8 +1756,8 @@ static void vl_api_tap_connect_reply_t_handler } -static void vl_api_tap_connect_reply_t_handler_json - (vl_api_tap_connect_reply_t * mp) +static void vl_api_tap_create_v2_reply_t_handler_json + (vl_api_tap_create_v2_reply_t * mp) { vat_main_t *vam = &vat_main; vat_json_node_t node; @@ -1772,41 +1775,7 @@ static void vl_api_tap_connect_reply_t_handler_json } static void -vl_api_tap_modify_reply_t_handler (vl_api_tap_modify_reply_t * mp) -{ - vat_main_t *vam = &vat_main; - i32 retval = ntohl (mp->retval); - if (vam->async_mode) - { - vam->async_errors += (retval < 0); - } - else - { - vam->retval = retval; - vam->sw_if_index = ntohl (mp->sw_if_index); - vam->result_ready = 1; - } -} - -static void vl_api_tap_modify_reply_t_handler_json - (vl_api_tap_modify_reply_t * mp) -{ - vat_main_t *vam = &vat_main; - vat_json_node_t node; - - vat_json_init_object (&node); - vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); - vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index)); - - vat_json_print (vam->ofp, &node); - vat_json_free (&node); - - vam->retval = ntohl (mp->retval); - vam->result_ready = 1; -} - -static void -vl_api_tap_delete_reply_t_handler (vl_api_tap_delete_reply_t * mp) +vl_api_tap_delete_v2_reply_t_handler (vl_api_tap_delete_v2_reply_t * mp) { vat_main_t *vam = &vat_main; i32 retval = ntohl (mp->retval); @@ -1821,8 +1790,8 @@ vl_api_tap_delete_reply_t_handler (vl_api_tap_delete_reply_t * mp) } } -static void vl_api_tap_delete_reply_t_handler_json - (vl_api_tap_delete_reply_t * mp) +static void vl_api_tap_delete_v2_reply_t_handler_json + (vl_api_tap_delete_v2_reply_t * mp) { vat_main_t *vam = &vat_main; vat_json_node_t node; @@ -1838,7 +1807,8 @@ static void vl_api_tap_delete_reply_t_handler_json } static void -vl_api_tap_create_v2_reply_t_handler (vl_api_tap_create_v2_reply_t * mp) +vl_api_virtio_pci_create_reply_t_handler (vl_api_virtio_pci_create_reply_t * + mp) { vat_main_t *vam = &vat_main; i32 retval = ntohl (mp->retval); @@ -1852,11 +1822,10 @@ vl_api_tap_create_v2_reply_t_handler (vl_api_tap_create_v2_reply_t * mp) vam->sw_if_index = ntohl (mp->sw_if_index); vam->result_ready = 1; } - } -static void vl_api_tap_create_v2_reply_t_handler_json - (vl_api_tap_create_v2_reply_t * mp) +static void vl_api_virtio_pci_create_reply_t_handler_json + (vl_api_virtio_pci_create_reply_t * mp) { vat_main_t *vam = &vat_main; vat_json_node_t node; @@ -1874,7 +1843,8 @@ static void vl_api_tap_create_v2_reply_t_handler_json } static void -vl_api_tap_delete_v2_reply_t_handler (vl_api_tap_delete_v2_reply_t * mp) +vl_api_virtio_pci_delete_reply_t_handler (vl_api_virtio_pci_delete_reply_t * + mp) { vat_main_t *vam = &vat_main; i32 retval = ntohl (mp->retval); @@ -1889,8 +1859,8 @@ vl_api_tap_delete_v2_reply_t_handler (vl_api_tap_delete_v2_reply_t * mp) } } -static void vl_api_tap_delete_v2_reply_t_handler_json - (vl_api_tap_delete_v2_reply_t * mp) +static void vl_api_virtio_pci_delete_reply_t_handler_json + (vl_api_virtio_pci_delete_reply_t * mp) { vat_main_t *vam = &vat_main; vat_json_node_t node; @@ -5238,8 +5208,8 @@ _(want_l2_macs_events_reply) \ _(input_acl_set_interface_reply) \ _(ipsec_spd_add_del_reply) \ _(ipsec_interface_add_del_spd_reply) \ -_(ipsec_spd_add_del_entry_reply) \ -_(ipsec_sad_add_del_entry_reply) \ +_(ipsec_spd_entry_add_del_reply) \ +_(ipsec_sad_entry_add_del_reply) \ _(ipsec_sa_set_key_reply) \ _(ipsec_tunnel_if_add_del_reply) \ _(ipsec_tunnel_if_set_key_reply) \ @@ -5392,13 +5362,12 @@ _(L2FIB_FLUSH_INT_REPLY, l2fib_flush_int_reply) \ _(L2FIB_FLUSH_BD_REPLY, l2fib_flush_bd_reply) \ _(L2_FLAGS_REPLY, l2_flags_reply) \ _(BRIDGE_FLAGS_REPLY, bridge_flags_reply) \ -_(TAP_CONNECT_REPLY, tap_connect_reply) \ -_(TAP_MODIFY_REPLY, tap_modify_reply) \ -_(TAP_DELETE_REPLY, tap_delete_reply) \ -_(SW_INTERFACE_TAP_DETAILS, sw_interface_tap_details) \ _(TAP_CREATE_V2_REPLY, tap_create_v2_reply) \ _(TAP_DELETE_V2_REPLY, tap_delete_v2_reply) \ _(SW_INTERFACE_TAP_V2_DETAILS, sw_interface_tap_v2_details) \ +_(VIRTIO_PCI_CREATE_REPLY, virtio_pci_create_reply) \ +_(VIRTIO_PCI_DELETE_REPLY, virtio_pci_delete_reply) \ +_(SW_INTERFACE_VIRTIO_PCI_DETAILS, sw_interface_virtio_pci_details) \ _(BOND_CREATE_REPLY, bond_create_reply) \ _(BOND_DELETE_REPLY, bond_delete_reply) \ _(BOND_ENSLAVE_REPLY, bond_enslave_reply) \ @@ -5494,8 +5463,8 @@ _(IP_ADDRESS_DETAILS, ip_address_details) \ _(IP_DETAILS, ip_details) \ _(IPSEC_SPD_ADD_DEL_REPLY, ipsec_spd_add_del_reply) \ _(IPSEC_INTERFACE_ADD_DEL_SPD_REPLY, ipsec_interface_add_del_spd_reply) \ -_(IPSEC_SPD_ADD_DEL_ENTRY_REPLY, ipsec_spd_add_del_entry_reply) \ -_(IPSEC_SAD_ADD_DEL_ENTRY_REPLY, ipsec_sad_add_del_entry_reply) \ +_(IPSEC_SPD_ENTRY_ADD_DEL_REPLY, ipsec_spd_entry_add_del_reply) \ +_(IPSEC_SAD_ENTRY_ADD_DEL_REPLY, ipsec_sad_entry_add_del_reply) \ _(IPSEC_SA_DETAILS, ipsec_sa_details) \ _(IPSEC_SA_SET_KEY_REPLY, ipsec_sa_set_key_reply) \ _(IPSEC_TUNNEL_IF_ADD_DEL_REPLY, ipsec_tunnel_if_add_del_reply) \ @@ -7515,206 +7484,6 @@ api_bd_ip_mac_dump (vat_main_t * vam) return ret; } -static int -api_tap_connect (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_tap_connect_t *mp; - u8 mac_address[6]; - u8 random_mac = 1; - u8 name_set = 0; - u8 *tap_name; - u8 *tag = 0; - ip4_address_t ip4_address; - u32 ip4_mask_width; - int ip4_address_set = 0; - ip6_address_t ip6_address; - u32 ip6_mask_width; - int ip6_address_set = 0; - int ret; - - clib_memset (mac_address, 0, sizeof (mac_address)); - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "mac %U", unformat_ethernet_address, mac_address)) - { - random_mac = 0; - } - else if (unformat (i, "random-mac")) - random_mac = 1; - else if (unformat (i, "tapname %s", &tap_name)) - name_set = 1; - else if (unformat (i, "tag %s", &tag)) - ; - else if (unformat (i, "address %U/%d", - unformat_ip4_address, &ip4_address, &ip4_mask_width)) - ip4_address_set = 1; - else if (unformat (i, "address %U/%d", - unformat_ip6_address, &ip6_address, &ip6_mask_width)) - ip6_address_set = 1; - else - break; - } - - if (name_set == 0) - { - errmsg ("missing tap name"); - return -99; - } - if (vec_len (tap_name) > 63) - { - errmsg ("tap name too long"); - return -99; - } - vec_add1 (tap_name, 0); - - if (vec_len (tag) > 63) - { - errmsg ("tag too long"); - return -99; - } - - /* Construct the API message */ - M (TAP_CONNECT, mp); - - mp->use_random_mac = random_mac; - clib_memcpy (mp->mac_address, mac_address, 6); - clib_memcpy (mp->tap_name, tap_name, vec_len (tap_name)); - if (tag) - clib_memcpy (mp->tag, tag, vec_len (tag)); - - if (ip4_address_set) - { - mp->ip4_address_set = 1; - clib_memcpy (mp->ip4_address, &ip4_address, sizeof (mp->ip4_address)); - mp->ip4_mask_width = ip4_mask_width; - } - if (ip6_address_set) - { - mp->ip6_address_set = 1; - clib_memcpy (mp->ip6_address, &ip6_address, sizeof (mp->ip6_address)); - mp->ip6_mask_width = ip6_mask_width; - } - - vec_free (tap_name); - vec_free (tag); - - /* send it... */ - S (mp); - - /* Wait for a reply... */ - W (ret); - return ret; -} - -static int -api_tap_modify (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_tap_modify_t *mp; - u8 mac_address[6]; - u8 random_mac = 1; - u8 name_set = 0; - u8 *tap_name; - u32 sw_if_index = ~0; - u8 sw_if_index_set = 0; - int ret; - - clib_memset (mac_address, 0, sizeof (mac_address)); - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %d", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "mac %U", unformat_ethernet_address, mac_address)) - { - random_mac = 0; - } - else if (unformat (i, "random-mac")) - random_mac = 1; - else if (unformat (i, "tapname %s", &tap_name)) - name_set = 1; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing vpp interface name"); - return -99; - } - if (name_set == 0) - { - errmsg ("missing tap name"); - return -99; - } - if (vec_len (tap_name) > 63) - { - errmsg ("tap name too long"); - } - vec_add1 (tap_name, 0); - - /* Construct the API message */ - M (TAP_MODIFY, mp); - - mp->use_random_mac = random_mac; - mp->sw_if_index = ntohl (sw_if_index); - clib_memcpy (mp->mac_address, mac_address, 6); - clib_memcpy (mp->tap_name, tap_name, vec_len (tap_name)); - vec_free (tap_name); - - /* send it... */ - S (mp); - - /* Wait for a reply... */ - W (ret); - return ret; -} - -static int -api_tap_delete (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_tap_delete_t *mp; - u32 sw_if_index = ~0; - u8 sw_if_index_set = 0; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %d", &sw_if_index)) - sw_if_index_set = 1; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing vpp interface name"); - return -99; - } - - /* Construct the API message */ - M (TAP_DELETE, mp); - - mp->sw_if_index = ntohl (sw_if_index); - - /* send it... */ - S (mp); - - /* Wait for a reply... */ - W (ret); - return ret; -} - static int api_tap_create_v2 (vat_main_t * vam) { @@ -7908,6 +7677,149 @@ api_tap_delete_v2 (vat_main_t * vam) return ret; } +uword +unformat_pci_addr (unformat_input_t * input, va_list * args) +{ + struct pci_addr_t + { + u16 domain; + u8 bus; + u8 slot:5; + u8 function:3; + } *addr; + addr = va_arg (*args, struct pci_addr_t *); + u32 x[4]; + + if (!unformat (input, "%x:%x:%x.%x", &x[0], &x[1], &x[2], &x[3])) + return 0; + + addr->domain = x[0]; + addr->bus = x[1]; + addr->slot = x[2]; + addr->function = x[3]; + + return 1; +} + +static int +api_virtio_pci_create (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_virtio_pci_create_t *mp; + u8 mac_address[6]; + u8 random_mac = 1; + u32 pci_addr = 0; + u64 features = (u64) ~ (0ULL); + u32 rx_ring_sz = 0, tx_ring_sz = 0; + int ret; + + clib_memset (mac_address, 0, sizeof (mac_address)); + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "hw-addr %U", unformat_ethernet_address, mac_address)) + { + random_mac = 0; + } + else if (unformat (i, "pci-addr %U", unformat_pci_addr, &pci_addr)) + ; + else if (unformat (i, "features 0x%llx", &features)) + ; + else if (unformat (i, "rx-ring-size %u", &rx_ring_sz)) + ; + else if (unformat (i, "tx-ring-size %u", &tx_ring_sz)) + ; + else + break; + } + + if (pci_addr == 0) + { + errmsg ("pci address must be non zero. "); + return -99; + } + if (!is_pow2 (rx_ring_sz)) + { + errmsg ("rx ring size must be power of 2. "); + return -99; + } + if (rx_ring_sz > 32768) + { + errmsg ("rx ring size must be 32768 or lower. "); + return -99; + } + if (!is_pow2 (tx_ring_sz)) + { + errmsg ("tx ring size must be power of 2. "); + return -99; + } + if (tx_ring_sz > 32768) + { + errmsg ("tx ring size must be 32768 or lower. "); + return -99; + } + + /* Construct the API message */ + M (VIRTIO_PCI_CREATE, mp); + + mp->use_random_mac = random_mac; + + mp->pci_addr = htonl (pci_addr); + mp->features = clib_host_to_net_u64 (features); + mp->rx_ring_sz = htons (rx_ring_sz); + mp->tx_ring_sz = htons (tx_ring_sz); + + if (random_mac == 0) + clib_memcpy (mp->mac_address, mac_address, 6); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int +api_virtio_pci_delete (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_virtio_pci_delete_t *mp; + u32 sw_if_index = ~0; + u8 sw_if_index_set = 0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + sw_if_index_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing vpp interface name. "); + return -99; + } + + /* Construct the API message */ + M (VIRTIO_PCI_DELETE, mp); + + mp->sw_if_index = htonl (sw_if_index); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + static int api_bond_create (vat_main_t * vam) { @@ -9208,7 +9120,7 @@ api_proxy_arp_add_del (vat_main_t * vam) vl_api_proxy_arp_add_del_t *mp; u32 vrf_id = 0; u8 is_add = 1; - ip4_address_t lo, hi; + vl_api_ip4_address_t lo, hi; u8 range_set = 0; int ret; @@ -9216,8 +9128,8 @@ api_proxy_arp_add_del (vat_main_t * vam) { if (unformat (i, "vrf %d", &vrf_id)) ; - else if (unformat (i, "%U - %U", unformat_ip4_address, &lo, - unformat_ip4_address, &hi)) + else if (unformat (i, "%U - %U", unformat_vl_api_ip4_address, &lo, + unformat_vl_api_ip4_address, &hi)) range_set = 1; else if (unformat (i, "del")) is_add = 0; @@ -9236,10 +9148,10 @@ api_proxy_arp_add_del (vat_main_t * vam) M (PROXY_ARP_ADD_DEL, mp); - mp->proxy.vrf_id = ntohl (vrf_id); + mp->proxy.table_id = ntohl (vrf_id); mp->is_add = is_add; - clib_memcpy (mp->proxy.low_address, &lo, sizeof (mp->proxy.low_address)); - clib_memcpy (mp->proxy.hi_address, &hi, sizeof (mp->proxy.hi_address)); + clib_memcpy (mp->proxy.low, &lo, sizeof (lo)); + clib_memcpy (mp->proxy.hi, &hi, sizeof (hi)); S (mp); W (ret); @@ -9447,27 +9359,25 @@ api_sw_interface_set_unnumbered (vat_main_t * vam) static int api_ip_neighbor_add_del (vat_main_t * vam) { + vl_api_mac_address_t mac_address; unformat_input_t *i = vam->input; vl_api_ip_neighbor_add_del_t *mp; + vl_api_address_t ip_address; u32 sw_if_index; u8 sw_if_index_set = 0; u8 is_add = 1; - u8 is_static = 0; - u8 is_no_fib_entry = 0; - u8 mac_address[6]; u8 mac_set = 0; - u8 v4_address_set = 0; - u8 v6_address_set = 0; - ip4_address_t v4address; - ip6_address_t v6address; + u8 address_set = 0; int ret; + ip_neighbor_flags_t flags; - clib_memset (mac_address, 0, sizeof (mac_address)); - + flags = IP_NEIGHBOR_FLAG_NONE; + clib_memset (&ip_address, 0, sizeof (ip_address)); + clib_memset (&mac_address, 0, sizeof (mac_address)); /* Parse args required to build the message */ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { - if (unformat (i, "mac %U", unformat_ethernet_address, mac_address)) + if (unformat (i, "mac %U", unformat_vl_api_mac_address, &mac_address)) { mac_set = 1; } @@ -9478,14 +9388,12 @@ api_ip_neighbor_add_del (vat_main_t * vam) sw_if_index_set = 1; else if (unformat (i, "sw_if_index %d", &sw_if_index)) sw_if_index_set = 1; - else if (unformat (i, "is_static")) - is_static = 1; + else if (unformat (i, "static")) + flags |= IP_NEIGHBOR_FLAG_STATIC; else if (unformat (i, "no-fib-entry")) - is_no_fib_entry = 1; - else if (unformat (i, "dst %U", unformat_ip4_address, &v4address)) - v4_address_set = 1; - else if (unformat (i, "dst %U", unformat_ip6_address, &v6address)) - v6_address_set = 1; + flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY; + else if (unformat (i, "dst %U", unformat_vl_api_address, &ip_address)) + address_set = 1; else { clib_warning ("parse error '%U'", format_unformat_error, i); @@ -9498,12 +9406,7 @@ api_ip_neighbor_add_del (vat_main_t * vam) errmsg ("missing interface name or sw_if_index"); return -99; } - if (v4_address_set && v6_address_set) - { - errmsg ("both v4 and v6 addresses set"); - return -99; - } - if (!v4_address_set && !v6_address_set) + if (!address_set) { errmsg ("no address set"); return -99; @@ -9512,22 +9415,14 @@ api_ip_neighbor_add_del (vat_main_t * vam) /* Construct the API message */ M (IP_NEIGHBOR_ADD_DEL, mp); - mp->sw_if_index = ntohl (sw_if_index); + mp->neighbor.sw_if_index = ntohl (sw_if_index); mp->is_add = is_add; - mp->is_static = is_static; - mp->is_no_adj_fib = is_no_fib_entry; - if (mac_set) - clib_memcpy (mp->mac_address, mac_address, 6); - if (v6_address_set) - { - mp->is_ipv6 = 1; - clib_memcpy (mp->dst_address, &v6address, sizeof (v6address)); - } - else - { - /* mp->is_ipv6 = 0; via clib_memset in M macro above */ - clib_memcpy (mp->dst_address, &v4address, sizeof (v4address)); - } + mp->neighbor.flags = htonl (flags); + if (mac_set) + clib_memcpy (&mp->neighbor.mac_address, &mac_address, + sizeof (mac_address)); + if (address_set) + clib_memcpy (&mp->neighbor.ip_address, &ip_address, sizeof (ip_address)); /* send it... */ S (mp); @@ -10226,7 +10121,7 @@ api_ip6nd_proxy_add_del (vat_main_t * vam) vl_api_ip6nd_proxy_add_del_t *mp; u32 sw_if_index = ~0; u8 v6_address_set = 0; - ip6_address_t v6address; + vl_api_ip6_address_t v6address; u8 is_del = 0; int ret; @@ -10237,7 +10132,7 @@ api_ip6nd_proxy_add_del (vat_main_t * vam) ; else if (unformat (i, "sw_if_index %d", &sw_if_index)) ; - else if (unformat (i, "%U", unformat_ip6_address, &v6address)) + else if (unformat (i, "%U", unformat_vl_api_ip6_address, &v6address)) v6_address_set = 1; if (unformat (i, "del")) is_del = 1; @@ -10264,7 +10159,7 @@ api_ip6nd_proxy_add_del (vat_main_t * vam) mp->is_del = is_del; mp->sw_if_index = ntohl (sw_if_index); - clib_memcpy (mp->address, &v6address, sizeof (v6address)); + clib_memcpy (mp->ip, v6address, sizeof (v6address)); /* send it... */ S (mp); @@ -10299,7 +10194,7 @@ static void vl_api_ip6nd_proxy_details_t_handler vat_main_t *vam = &vat_main; print (vam->ofp, "host %U sw_if_index %d", - format_ip6_address, mp->address, ntohl (mp->sw_if_index)); + format_vl_api_ip6_address, mp->ip, ntohl (mp->sw_if_index)); } static void vl_api_ip6nd_proxy_details_t_handler_json @@ -10319,7 +10214,7 @@ static void vl_api_ip6nd_proxy_details_t_handler_json vat_json_init_object (node); vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index)); - clib_memcpy (&ip6, mp->address, sizeof (ip6)); + clib_memcpy (&ip6, mp->ip, sizeof (ip6)); vat_json_object_add_ip6 (node, "host", ip6); } @@ -10332,7 +10227,7 @@ api_sw_interface_ip6nd_ra_prefix (vat_main_t * vam) u8 sw_if_index_set = 0; u32 address_length = 0; u8 v6_address_set = 0; - ip6_address_t v6address; + vl_api_prefix_t pfx; u8 use_default = 0; u8 no_advertise = 0; u8 off_link = 0; @@ -10350,8 +10245,7 @@ api_sw_interface_ip6nd_ra_prefix (vat_main_t * vam) sw_if_index_set = 1; else if (unformat (i, "sw_if_index %d", &sw_if_index)) sw_if_index_set = 1; - else if (unformat (i, "%U/%d", - unformat_ip6_address, &v6address, &address_length)) + else if (unformat (i, "%U", unformat_vl_api_prefix, &pfx)) v6_address_set = 1; else if (unformat (i, "val_life %d", &val_lifetime)) ; @@ -10391,8 +10285,7 @@ api_sw_interface_ip6nd_ra_prefix (vat_main_t * vam) M (SW_INTERFACE_IP6ND_RA_PREFIX, mp); mp->sw_if_index = ntohl (sw_if_index); - clib_memcpy (mp->address, &v6address, sizeof (v6address)); - mp->address_length = address_length; + clib_memcpy (&mp->prefix, &pfx, sizeof (pfx)); mp->use_default = use_default; mp->no_advertise = no_advertise; mp->off_link = off_link; @@ -12659,53 +12552,6 @@ api_sw_if_l2tpv3_tunnel_dump (vat_main_t * vam) } -static void vl_api_sw_interface_tap_details_t_handler - (vl_api_sw_interface_tap_details_t * mp) -{ - vat_main_t *vam = &vat_main; - - print (vam->ofp, "%-16s %d", - mp->dev_name, clib_net_to_host_u32 (mp->sw_if_index)); -} - -static void vl_api_sw_interface_tap_details_t_handler_json - (vl_api_sw_interface_tap_details_t * mp) -{ - vat_main_t *vam = &vat_main; - vat_json_node_t *node = NULL; - - if (VAT_JSON_ARRAY != vam->json_tree.type) - { - ASSERT (VAT_JSON_NONE == vam->json_tree.type); - vat_json_init_array (&vam->json_tree); - } - node = vat_json_array_add (&vam->json_tree); - - vat_json_init_object (node); - vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index)); - vat_json_object_add_string_copy (node, "dev_name", mp->dev_name); -} - -static int -api_sw_interface_tap_dump (vat_main_t * vam) -{ - vl_api_sw_interface_tap_dump_t *mp; - vl_api_control_ping_t *mp_ping; - int ret; - - print (vam->ofp, "\n%-16s %s", "dev_name", "sw_if_index"); - /* Get list of tap interfaces */ - M (SW_INTERFACE_TAP_DUMP, mp); - S (mp); - - /* Use a control ping for synchronization */ - MPING (CONTROL_PING, mp_ping); - S (mp_ping); - - W (ret); - return ret; -} - static void vl_api_sw_interface_tap_v2_details_t_handler (vl_api_sw_interface_tap_v2_details_t * mp) { @@ -12789,6 +12635,85 @@ api_sw_interface_tap_v2_dump (vat_main_t * vam) return ret; } +static void vl_api_sw_interface_virtio_pci_details_t_handler + (vl_api_sw_interface_virtio_pci_details_t * mp) +{ + vat_main_t *vam = &vat_main; + + typedef union + { + struct + { + u16 domain; + u8 bus; + u8 slot:5; + u8 function:3; + }; + u32 as_u32; + } pci_addr_t; + pci_addr_t addr; + addr.as_u32 = ntohl (mp->pci_addr); + u8 *pci_addr = format (0, "%04x:%02x:%02x.%x", addr.domain, addr.bus, + addr.slot, addr.function); + + print (vam->ofp, + "\n%-12s %-12d %-12d %-12d %-17U 0x%-08llx", + pci_addr, ntohl (mp->sw_if_index), + ntohs (mp->rx_ring_sz), ntohs (mp->tx_ring_sz), + format_ethernet_address, mp->mac_addr, + clib_net_to_host_u64 (mp->features)); + vec_free (pci_addr); +} + +static void vl_api_sw_interface_virtio_pci_details_t_handler_json + (vl_api_sw_interface_virtio_pci_details_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t *node = NULL; + + if (VAT_JSON_ARRAY != vam->json_tree.type) + { + ASSERT (VAT_JSON_NONE == vam->json_tree.type); + vat_json_init_array (&vam->json_tree); + } + node = vat_json_array_add (&vam->json_tree); + + vat_json_init_object (node); + vat_json_object_add_uint (node, "pci-addr", ntohl (mp->pci_addr)); + vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index)); + vat_json_object_add_uint (node, "rx_ring_sz", ntohs (mp->rx_ring_sz)); + vat_json_object_add_uint (node, "tx_ring_sz", ntohs (mp->tx_ring_sz)); + vat_json_object_add_uint (node, "features", + clib_net_to_host_u64 (mp->features)); + vat_json_object_add_string_copy (node, "mac_addr", + format (0, "%U", format_ethernet_address, + &mp->mac_addr)); +} + +static int +api_sw_interface_virtio_pci_dump (vat_main_t * vam) +{ + vl_api_sw_interface_virtio_pci_dump_t *mp; + vl_api_control_ping_t *mp_ping; + int ret; + + print (vam->ofp, + "\n%-12s %-12s %-12s %-12s %-17s %-08s", + "pci_addr", "sw_if_index", "rx_ring_sz", "tx_ring_sz", + "mac_addr", "features"); + + /* Get list of tap interfaces */ + M (SW_INTERFACE_VIRTIO_PCI_DUMP, mp); + S (mp); + + /* Use a control ping for synchronization */ + MPING (CONTROL_PING, mp_ping); + S (mp_ping); + + W (ret); + return ret; +} + static int api_vxlan_offload_rx (vat_main_t * vam) { @@ -14369,10 +14294,9 @@ api_ip_probe_neighbor (vat_main_t * vam) { unformat_input_t *i = vam->input; vl_api_ip_probe_neighbor_t *mp; + vl_api_address_t dst_adr; u8 int_set = 0; u8 adr_set = 0; - u8 is_ipv6 = 0; - u8 dst_adr[16]; u32 sw_if_index; int ret; @@ -14382,13 +14306,8 @@ api_ip_probe_neighbor (vat_main_t * vam) int_set = 1; else if (unformat (i, "sw_if_index %d", &sw_if_index)) int_set = 1; - else if (unformat (i, "address %U", unformat_ip4_address, dst_adr)) + else if (unformat (i, "address %U", unformat_vl_api_address, dst_adr)) adr_set = 1; - else if (unformat (i, "address %U", unformat_ip6_address, dst_adr)) - { - adr_set = 1; - is_ipv6 = 1; - } else break; } @@ -14408,8 +14327,7 @@ api_ip_probe_neighbor (vat_main_t * vam) M (IP_PROBE_NEIGHBOR, mp); mp->sw_if_index = ntohl (sw_if_index); - mp->is_ipv6 = is_ipv6; - clib_memcpy (mp->dst_address, dst_adr, sizeof (dst_adr)); + clib_memcpy (&mp->dst, &dst_adr, sizeof (dst_adr)); S (mp); W (ret); @@ -14517,7 +14435,7 @@ api_want_ip4_arp_events (vat_main_t * vam) M (WANT_IP4_ARP_EVENTS, mp); mp->enable_disable = enable_disable; mp->pid = htonl (getpid ()); - mp->address = address.as_u32; + clib_memcpy (mp->ip, &address, sizeof (address)); S (mp); W (ret); @@ -14529,14 +14447,15 @@ api_want_ip6_nd_events (vat_main_t * vam) { unformat_input_t *line_input = vam->input; vl_api_want_ip6_nd_events_t *mp; - ip6_address_t address; + vl_api_ip6_address_t address; int address_set = 0; u32 enable_disable = 1; int ret; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "address %U", unformat_ip6_address, &address)) + if (unformat + (line_input, "address %U", unformat_vl_api_ip6_address, &address)) address_set = 1; else if (unformat (line_input, "del")) enable_disable = 0; @@ -14553,7 +14472,7 @@ api_want_ip6_nd_events (vat_main_t * vam) M (WANT_IP6_ND_EVENTS, mp); mp->enable_disable = enable_disable; mp->pid = htonl (getpid ()); - clib_memcpy (mp->address, &address, sizeof (ip6_address_t)); + clib_memcpy (&mp->ip, &address, sizeof (address)); S (mp); W (ret); @@ -14910,26 +14829,24 @@ api_ipsec_interface_add_del_spd (vat_main_t * vam) } static int -api_ipsec_spd_add_del_entry (vat_main_t * vam) +api_ipsec_spd_entry_add_del (vat_main_t * vam) { unformat_input_t *i = vam->input; - vl_api_ipsec_spd_add_del_entry_t *mp; + vl_api_ipsec_spd_entry_add_del_t *mp; u8 is_add = 1, is_outbound = 0, is_ipv6 = 0, is_ip_any = 1; u32 spd_id = 0, sa_id = 0, protocol = 0, policy = 0; i32 priority = 0; u32 rport_start = 0, rport_stop = (u32) ~ 0; u32 lport_start = 0, lport_stop = (u32) ~ 0; - ip4_address_t laddr4_start, laddr4_stop, raddr4_start, raddr4_stop; - ip6_address_t laddr6_start, laddr6_stop, raddr6_start, raddr6_stop; + vl_api_address_t laddr_start = { }, laddr_stop = + { + }, raddr_start = + { + }, raddr_stop = + { + }; int ret; - laddr4_start.as_u32 = raddr4_start.as_u32 = 0; - laddr4_stop.as_u32 = raddr4_stop.as_u32 = (u32) ~ 0; - laddr6_start.as_u64[0] = raddr6_start.as_u64[0] = 0; - laddr6_start.as_u64[1] = raddr6_start.as_u64[1] = 0; - laddr6_stop.as_u64[0] = raddr6_stop.as_u64[0] = (u64) ~ 0; - laddr6_stop.as_u64[1] = raddr6_stop.as_u64[1] = (u64) ~ 0; - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { if (unformat (i, "del")) @@ -14954,58 +14871,18 @@ api_ipsec_spd_add_del_entry (vat_main_t * vam) ; else if (unformat (i, "rport_stop %d", &rport_stop)) ; - else - if (unformat - (i, "laddr_start %U", unformat_ip4_address, &laddr4_start)) - { - is_ipv6 = 0; - is_ip_any = 0; - } - else - if (unformat (i, "laddr_stop %U", unformat_ip4_address, &laddr4_stop)) - { - is_ipv6 = 0; - is_ip_any = 0; - } - else - if (unformat - (i, "raddr_start %U", unformat_ip4_address, &raddr4_start)) - { - is_ipv6 = 0; - is_ip_any = 0; - } - else - if (unformat (i, "raddr_stop %U", unformat_ip4_address, &raddr4_stop)) - { - is_ipv6 = 0; - is_ip_any = 0; - } - else - if (unformat - (i, "laddr_start %U", unformat_ip6_address, &laddr6_start)) - { - is_ipv6 = 1; - is_ip_any = 0; - } - else - if (unformat (i, "laddr_stop %U", unformat_ip6_address, &laddr6_stop)) - { - is_ipv6 = 1; - is_ip_any = 0; - } - else - if (unformat - (i, "raddr_start %U", unformat_ip6_address, &raddr6_start)) - { - is_ipv6 = 1; - is_ip_any = 0; - } - else - if (unformat (i, "raddr_stop %U", unformat_ip6_address, &raddr6_stop)) - { - is_ipv6 = 1; - is_ip_any = 0; - } + else if (unformat (i, "laddr_start %U", + unformat_vl_api_address, &laddr_start)) + is_ip_any = 0; + else if (unformat (i, "laddr_stop %U", unformat_vl_api_address, + &laddr_stop)) + is_ip_any = 0; + else if (unformat (i, "raddr_start %U", unformat_vl_api_address, + &raddr_start)) + is_ip_any = 0; + else if (unformat (i, "raddr_stop %U", unformat_vl_api_address, + &raddr_stop)) + is_ip_any = 0; else if (unformat (i, "action %U", unformat_ipsec_policy_action, &policy)) { @@ -15023,65 +14900,50 @@ api_ipsec_spd_add_del_entry (vat_main_t * vam) } - M (IPSEC_SPD_ADD_DEL_ENTRY, mp); - - mp->spd_id = ntohl (spd_id); - mp->priority = ntohl (priority); - mp->is_outbound = is_outbound; + M (IPSEC_SPD_ENTRY_ADD_DEL, mp); - mp->is_ipv6 = is_ipv6; - if (is_ipv6 || is_ip_any) - { - clib_memcpy (mp->remote_address_start, &raddr6_start, - sizeof (ip6_address_t)); - clib_memcpy (mp->remote_address_stop, &raddr6_stop, - sizeof (ip6_address_t)); - clib_memcpy (mp->local_address_start, &laddr6_start, - sizeof (ip6_address_t)); - clib_memcpy (mp->local_address_stop, &laddr6_stop, - sizeof (ip6_address_t)); - } - else - { - clib_memcpy (mp->remote_address_start, &raddr4_start, - sizeof (ip4_address_t)); - clib_memcpy (mp->remote_address_stop, &raddr4_stop, - sizeof (ip4_address_t)); - clib_memcpy (mp->local_address_start, &laddr4_start, - sizeof (ip4_address_t)); - clib_memcpy (mp->local_address_stop, &laddr4_stop, - sizeof (ip4_address_t)); - } - mp->protocol = (u8) protocol; - mp->local_port_start = ntohs ((u16) lport_start); - mp->local_port_stop = ntohs ((u16) lport_stop); - mp->remote_port_start = ntohs ((u16) rport_start); - mp->remote_port_stop = ntohs ((u16) rport_stop); - mp->policy = (u8) policy; - mp->sa_id = ntohl (sa_id); mp->is_add = is_add; - mp->is_ip_any = is_ip_any; + + mp->entry.spd_id = ntohl (spd_id); + mp->entry.priority = ntohl (priority); + mp->entry.is_outbound = is_outbound; + + clib_memcpy (&mp->entry.remote_address_start, &raddr_start, + sizeof (vl_api_address_t)); + clib_memcpy (&mp->entry.remote_address_stop, &raddr_stop, + sizeof (vl_api_address_t)); + clib_memcpy (&mp->entry.local_address_start, &laddr_start, + sizeof (vl_api_address_t)); + clib_memcpy (&mp->entry.local_address_stop, &laddr_stop, + sizeof (vl_api_address_t)); + + mp->entry.protocol = (u8) protocol; + mp->entry.local_port_start = ntohs ((u16) lport_start); + mp->entry.local_port_stop = ntohs ((u16) lport_stop); + mp->entry.remote_port_start = ntohs ((u16) rport_start); + mp->entry.remote_port_stop = ntohs ((u16) rport_stop); + mp->entry.policy = (u8) policy; + mp->entry.sa_id = ntohl (sa_id); + S (mp); W (ret); return ret; } static int -api_ipsec_sad_add_del_entry (vat_main_t * vam) +api_ipsec_sad_entry_add_del (vat_main_t * vam) { unformat_input_t *i = vam->input; - vl_api_ipsec_sad_add_del_entry_t *mp; + vl_api_ipsec_sad_entry_add_del_t *mp; u32 sad_id = 0, spi = 0; u8 *ck = 0, *ik = 0; u8 is_add = 1; - u8 protocol = IPSEC_PROTOCOL_AH; - u8 is_tunnel = 0, is_tunnel_ipv6 = 0; - u32 crypto_alg = 0, integ_alg = 0; - ip4_address_t tun_src4; - ip4_address_t tun_dst4; - ip6_address_t tun_src6; - ip6_address_t tun_dst6; + vl_api_ipsec_crypto_alg_t crypto_alg = IPSEC_API_CRYPTO_ALG_NONE; + vl_api_ipsec_integ_alg_t integ_alg = IPSEC_API_INTEG_ALG_NONE; + vl_api_ipsec_sad_flags_t flags = IPSEC_API_SAD_FLAG_NONE; + vl_api_ipsec_proto_t protocol = IPSEC_API_PROTO_AH; + vl_api_address_t tun_src, tun_dst; int ret; while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) @@ -15093,51 +14955,30 @@ api_ipsec_sad_add_del_entry (vat_main_t * vam) else if (unformat (i, "spi %d", &spi)) ; else if (unformat (i, "esp")) - protocol = IPSEC_PROTOCOL_ESP; - else if (unformat (i, "tunnel_src %U", unformat_ip4_address, &tun_src4)) - { - is_tunnel = 1; - is_tunnel_ipv6 = 0; - } - else if (unformat (i, "tunnel_dst %U", unformat_ip4_address, &tun_dst4)) - { - is_tunnel = 1; - is_tunnel_ipv6 = 0; - } - else if (unformat (i, "tunnel_src %U", unformat_ip6_address, &tun_src6)) - { - is_tunnel = 1; - is_tunnel_ipv6 = 1; - } - else if (unformat (i, "tunnel_dst %U", unformat_ip6_address, &tun_dst6)) + protocol = IPSEC_API_PROTO_ESP; + else + if (unformat (i, "tunnel_src %U", unformat_vl_api_address, &tun_src)) { - is_tunnel = 1; - is_tunnel_ipv6 = 1; + flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL; + if (ADDRESS_IP6 == tun_src.af) + flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL_V6; } else - if (unformat - (i, "crypto_alg %U", unformat_ipsec_crypto_alg, &crypto_alg)) + if (unformat (i, "tunnel_dst %U", unformat_vl_api_address, &tun_dst)) { - if (crypto_alg >= IPSEC_CRYPTO_N_ALG) - { - clib_warning ("unsupported crypto-alg: '%U'", - format_ipsec_crypto_alg, crypto_alg); - return -99; - } + flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL; + if (ADDRESS_IP6 == tun_src.af) + flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL_V6; } + else + if (unformat (i, "crypto_alg %U", + unformat_ipsec_api_crypto_alg, &crypto_alg)) + ; else if (unformat (i, "crypto_key %U", unformat_hex_string, &ck)) ; - else - if (unformat - (i, "integ_alg %U", unformat_ipsec_integ_alg, &integ_alg)) - { - if (integ_alg >= IPSEC_INTEG_N_ALG) - { - clib_warning ("unsupported integ-alg: '%U'", - format_ipsec_integ_alg, integ_alg); - return -99; - } - } + else if (unformat (i, "integ_alg %U", + unformat_ipsec_api_integ_alg, &integ_alg)) + ; else if (unformat (i, "integ_key %U", unformat_hex_string, &ik)) ; else @@ -15148,46 +14989,37 @@ api_ipsec_sad_add_del_entry (vat_main_t * vam) } - M (IPSEC_SAD_ADD_DEL_ENTRY, mp); + M (IPSEC_SAD_ENTRY_ADD_DEL, mp); - mp->sad_id = ntohl (sad_id); mp->is_add = is_add; - mp->protocol = protocol; - mp->spi = ntohl (spi); - mp->is_tunnel = is_tunnel; - mp->is_tunnel_ipv6 = is_tunnel_ipv6; - mp->crypto_algorithm = crypto_alg; - mp->integrity_algorithm = integ_alg; - mp->crypto_key_length = vec_len (ck); - mp->integrity_key_length = vec_len (ik); + mp->entry.sad_id = ntohl (sad_id); + mp->entry.protocol = protocol; + mp->entry.spi = ntohl (spi); + mp->entry.flags = flags; + + mp->entry.crypto_algorithm = crypto_alg; + mp->entry.integrity_algorithm = integ_alg; + mp->entry.crypto_key.length = vec_len (ck); + mp->entry.integrity_key.length = vec_len (ik); - if (mp->crypto_key_length > sizeof (mp->crypto_key)) - mp->crypto_key_length = sizeof (mp->crypto_key); + if (mp->entry.crypto_key.length > sizeof (mp->entry.crypto_key.data)) + mp->entry.crypto_key.length = sizeof (mp->entry.crypto_key.data); - if (mp->integrity_key_length > sizeof (mp->integrity_key)) - mp->integrity_key_length = sizeof (mp->integrity_key); + if (mp->entry.integrity_key.length > sizeof (mp->entry.integrity_key.data)) + mp->entry.integrity_key.length = sizeof (mp->entry.integrity_key.data); if (ck) - clib_memcpy (mp->crypto_key, ck, mp->crypto_key_length); + clib_memcpy (mp->entry.crypto_key.data, ck, mp->entry.crypto_key.length); if (ik) - clib_memcpy (mp->integrity_key, ik, mp->integrity_key_length); + clib_memcpy (mp->entry.integrity_key.data, ik, + mp->entry.integrity_key.length); - if (is_tunnel) + if (flags & IPSEC_API_SAD_FLAG_IS_TUNNEL) { - if (is_tunnel_ipv6) - { - clib_memcpy (mp->tunnel_src_address, &tun_src6, - sizeof (ip6_address_t)); - clib_memcpy (mp->tunnel_dst_address, &tun_dst6, - sizeof (ip6_address_t)); - } - else - { - clib_memcpy (mp->tunnel_src_address, &tun_src4, - sizeof (ip4_address_t)); - clib_memcpy (mp->tunnel_dst_address, &tun_dst4, - sizeof (ip4_address_t)); - } + clib_memcpy (&mp->entry.tunnel_src, &tun_src, + sizeof (mp->entry.tunnel_src)); + clib_memcpy (&mp->entry.tunnel_dst, &tun_dst, + sizeof (mp->entry.tunnel_dst)); } S (mp); @@ -15222,19 +15054,19 @@ api_ipsec_sa_set_key (vat_main_t * vam) M (IPSEC_SA_SET_KEY, mp); mp->sa_id = ntohl (sa_id); - mp->crypto_key_length = vec_len (ck); - mp->integrity_key_length = vec_len (ik); + mp->crypto_key.length = vec_len (ck); + mp->integrity_key.length = vec_len (ik); - if (mp->crypto_key_length > sizeof (mp->crypto_key)) - mp->crypto_key_length = sizeof (mp->crypto_key); + if (mp->crypto_key.length > sizeof (mp->crypto_key.data)) + mp->crypto_key.length = sizeof (mp->crypto_key.data); - if (mp->integrity_key_length > sizeof (mp->integrity_key)) - mp->integrity_key_length = sizeof (mp->integrity_key); + if (mp->integrity_key.length > sizeof (mp->integrity_key.data)) + mp->integrity_key.length = sizeof (mp->integrity_key.data); if (ck) - clib_memcpy (mp->crypto_key, ck, mp->crypto_key_length); + clib_memcpy (mp->crypto_key.data, ck, mp->crypto_key.length); if (ik) - clib_memcpy (mp->integrity_key, ik, mp->integrity_key_length); + clib_memcpy (mp->integrity_key.data, ik, mp->integrity_key.length); S (mp); W (ret); @@ -15286,7 +15118,7 @@ api_ipsec_tunnel_if_add_del (vat_main_t * vam) ; else if (unformat - (i, "crypto_alg %U", unformat_ipsec_crypto_alg, &crypto_alg)) + (i, "crypto_alg %U", unformat_ipsec_api_crypto_alg, &crypto_alg)) { if (crypto_alg >= IPSEC_CRYPTO_N_ALG) { @@ -15297,7 +15129,7 @@ api_ipsec_tunnel_if_add_del (vat_main_t * vam) } else if (unformat - (i, "integ_alg %U", unformat_ipsec_integ_alg, &integ_alg)) + (i, "integ_alg %U", unformat_ipsec_api_integ_alg, &integ_alg)) { if (integ_alg >= IPSEC_INTEG_N_ALG) { @@ -15383,21 +15215,21 @@ vl_api_ipsec_sa_details_t_handler (vl_api_ipsec_sa_details_t * mp) vat_main_t *vam = &vat_main; print (vam->ofp, "sa_id %u sw_if_index %u spi %u proto %u crypto_alg %u " - "crypto_key %U integ_alg %u integ_key %U use_esn %u " - "use_anti_replay %u is_tunnel %u is_tunnel_ip6 %u " + "crypto_key %U integ_alg %u integ_key %U flags %x " "tunnel_src_addr %U tunnel_dst_addr %U " "salt %u seq_outbound %lu last_seq_inbound %lu " "replay_window %lu total_data_size %lu\n", - ntohl (mp->sa_id), ntohl (mp->sw_if_index), ntohl (mp->spi), - mp->protocol, - mp->crypto_alg, format_hex_bytes, mp->crypto_key, mp->crypto_key_len, - mp->integ_alg, format_hex_bytes, mp->integ_key, mp->integ_key_len, - mp->use_esn, mp->use_anti_replay, mp->is_tunnel, mp->is_tunnel_ip6, - (mp->is_tunnel_ip6) ? format_ip6_address : format_ip4_address, - mp->tunnel_src_addr, - (mp->is_tunnel_ip6) ? format_ip6_address : format_ip4_address, - mp->tunnel_dst_addr, - ntohl (mp->salt), + ntohl (mp->entry.sad_id), + ntohl (mp->sw_if_index), + ntohl (mp->entry.spi), + ntohl (mp->entry.protocol), + ntohl (mp->entry.crypto_algorithm), + format_hex_bytes, mp->entry.crypto_key.data, + mp->entry.crypto_key.length, ntohl (mp->entry.integrity_algorithm), + format_hex_bytes, mp->entry.integrity_key.data, + mp->entry.integrity_key.length, ntohl (mp->entry.flags), + format_vl_api_address, &mp->entry.tunnel_src, format_vl_api_address, + &mp->entry.tunnel_dst, ntohl (mp->salt), clib_net_to_host_u64 (mp->seq_outbound), clib_net_to_host_u64 (mp->last_seq_inbound), clib_net_to_host_u64 (mp->replay_window), @@ -15407,13 +15239,32 @@ vl_api_ipsec_sa_details_t_handler (vl_api_ipsec_sa_details_t * mp) #define vl_api_ipsec_sa_details_t_endian vl_noop_handler #define vl_api_ipsec_sa_details_t_print vl_noop_handler +static void +vat_json_object_add_address (vat_json_node_t * node, + const vl_api_address_t * addr) +{ + if (ADDRESS_IP6 == addr->af) + { + struct in6_addr ip6; + + clib_memcpy (&ip6, &addr->un.ip6, sizeof (ip6)); + vat_json_object_add_ip6 (node, "ip_address", ip6); + } + else + { + struct in_addr ip4; + + clib_memcpy (&ip4, &addr->un.ip4, sizeof (ip4)); + vat_json_object_add_ip4 (node, "ip_address", ip4); + } +} + static void vl_api_ipsec_sa_details_t_handler_json (vl_api_ipsec_sa_details_t * mp) { vat_main_t *vam = &vat_main; vat_json_node_t *node = NULL; - struct in_addr src_ip4, dst_ip4; - struct in6_addr src_ip6, dst_ip6; + vl_api_ipsec_sad_flags_t flags; if (VAT_JSON_ARRAY != vam->json_tree.type) { @@ -15423,39 +15274,36 @@ static void vl_api_ipsec_sa_details_t_handler_json node = vat_json_array_add (&vam->json_tree); vat_json_init_object (node); - vat_json_object_add_uint (node, "sa_id", ntohl (mp->sa_id)); + vat_json_object_add_uint (node, "sa_id", ntohl (mp->entry.sad_id)); vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index)); - vat_json_object_add_uint (node, "spi", ntohl (mp->spi)); - vat_json_object_add_uint (node, "proto", mp->protocol); - vat_json_object_add_uint (node, "crypto_alg", mp->crypto_alg); - vat_json_object_add_uint (node, "integ_alg", mp->integ_alg); - vat_json_object_add_uint (node, "use_esn", mp->use_esn); - vat_json_object_add_uint (node, "use_anti_replay", mp->use_anti_replay); - vat_json_object_add_uint (node, "is_tunnel", mp->is_tunnel); - vat_json_object_add_uint (node, "is_tunnel_ip6", mp->is_tunnel_ip6); - vat_json_object_add_bytes (node, "crypto_key", mp->crypto_key, - mp->crypto_key_len); - vat_json_object_add_bytes (node, "integ_key", mp->integ_key, - mp->integ_key_len); - if (mp->is_tunnel_ip6) - { - clib_memcpy (&src_ip6, mp->tunnel_src_addr, sizeof (src_ip6)); - vat_json_object_add_ip6 (node, "tunnel_src_addr", src_ip6); - clib_memcpy (&dst_ip6, mp->tunnel_dst_addr, sizeof (dst_ip6)); - vat_json_object_add_ip6 (node, "tunnel_dst_addr", dst_ip6); - } - else - { - clib_memcpy (&src_ip4, mp->tunnel_src_addr, sizeof (src_ip4)); - vat_json_object_add_ip4 (node, "tunnel_src_addr", src_ip4); - clib_memcpy (&dst_ip4, mp->tunnel_dst_addr, sizeof (dst_ip4)); - vat_json_object_add_ip4 (node, "tunnel_dst_addr", dst_ip4); - } + vat_json_object_add_uint (node, "spi", ntohl (mp->entry.spi)); + vat_json_object_add_uint (node, "proto", ntohl (mp->entry.protocol)); + vat_json_object_add_uint (node, "crypto_alg", + ntohl (mp->entry.crypto_algorithm)); + vat_json_object_add_uint (node, "integ_alg", + ntohl (mp->entry.integrity_algorithm)); + flags = ntohl (mp->entry.flags); + vat_json_object_add_uint (node, "use_esn", + ! !(flags & + IPSEC_API_SAD_FLAG_USE_EXTENDED_SEQ_NUM)); + vat_json_object_add_uint (node, "use_anti_replay", + ! !(flags & IPSEC_API_SAD_FLAG_USE_ANTI_REPLAY)); + vat_json_object_add_uint (node, "is_tunnel", + ! !(flags & IPSEC_API_SAD_FLAG_IS_TUNNEL)); + vat_json_object_add_uint (node, "is_tunnel_ip6", + ! !(flags & IPSEC_API_SAD_FLAG_IS_TUNNEL_V6)); + vat_json_object_add_uint (node, "udp_encap", + ! !(flags & IPSEC_API_SAD_FLAG_UDP_ENCAP)); + vat_json_object_add_bytes (node, "crypto_key", mp->entry.crypto_key.data, + mp->entry.crypto_key.length); + vat_json_object_add_bytes (node, "integ_key", mp->entry.integrity_key.data, + mp->entry.integrity_key.length); + vat_json_object_add_address (node, &mp->entry.tunnel_src); + vat_json_object_add_address (node, &mp->entry.tunnel_dst); vat_json_object_add_uint (node, "replay_window", clib_net_to_host_u64 (mp->replay_window)); vat_json_object_add_uint (node, "total_data_size", clib_net_to_host_u64 (mp->total_data_size)); - } static int @@ -15508,15 +15356,20 @@ api_ipsec_tunnel_if_set_key (vat_main_t * vam) if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) ; else - if (unformat (i, "local crypto %U", unformat_ipsec_crypto_alg, &alg)) + if (unformat + (i, "local crypto %U", unformat_ipsec_api_crypto_alg, &alg)) key_type = IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO; else - if (unformat (i, "remote crypto %U", unformat_ipsec_crypto_alg, &alg)) + if (unformat + (i, "remote crypto %U", unformat_ipsec_api_crypto_alg, &alg)) key_type = IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO; - else if (unformat (i, "local integ %U", unformat_ipsec_integ_alg, &alg)) + else + if (unformat + (i, "local integ %U", unformat_ipsec_api_integ_alg, &alg)) key_type = IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG; else - if (unformat (i, "remote integ %U", unformat_ipsec_integ_alg, &alg)) + if (unformat + (i, "remote integ %U", unformat_ipsec_api_integ_alg, &alg)) key_type = IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG; else if (unformat (i, "%U", unformat_hex_string, &key)) ; @@ -20237,10 +20090,9 @@ static void vl_api_ip_neighbor_details_t_handler vat_main_t *vam = &vat_main; print (vam->ofp, "%c %U %U", - (mp->is_static) ? 'S' : 'D', - format_ethernet_address, &mp->mac_address, - (mp->is_ipv6) ? format_ip6_address : format_ip4_address, - &mp->ip_address); + (ntohl (mp->neighbor.flags) & IP_NEIGHBOR_FLAG_STATIC) ? 'S' : 'D', + format_vl_api_mac_address, &mp->neighbor.mac_address, + format_vl_api_address, &mp->neighbor.ip_address); } static void vl_api_ip_neighbor_details_t_handler_json @@ -20249,8 +20101,6 @@ static void vl_api_ip_neighbor_details_t_handler_json vat_main_t *vam = &vat_main; vat_json_node_t *node; - struct in_addr ip4; - struct in6_addr ip6; if (VAT_JSON_ARRAY != vam->json_tree.type) { @@ -20260,24 +20110,15 @@ static void vl_api_ip_neighbor_details_t_handler_json node = vat_json_array_add (&vam->json_tree); vat_json_init_object (node); - vat_json_object_add_string_copy (node, "flag", - (mp->is_static) ? (u8 *) "static" : (u8 *) - "dynamic"); + vat_json_object_add_string_copy + (node, "flag", + ((ntohl (mp->neighbor.flags) & IP_NEIGHBOR_FLAG_STATIC) ? + (u8 *) "static" : (u8 *) "dynamic")); vat_json_object_add_string_copy (node, "link_layer", - format (0, "%U", format_ethernet_address, - &mp->mac_address)); - - if (mp->is_ipv6) - { - clib_memcpy (&ip6, &mp->ip_address, sizeof (ip6)); - vat_json_object_add_ip6 (node, "ip_address", ip6); - } - else - { - clib_memcpy (&ip4, &mp->ip_address, sizeof (ip4)); - vat_json_object_add_ip4 (node, "ip_address", ip4); - } + format (0, "%U", format_vl_api_mac_address, + &mp->neighbor.mac_address)); + vat_json_object_add_address (node, &mp->neighbor.ip_address); } static int @@ -21028,9 +20869,7 @@ api_ip_source_and_port_range_check_add_del (vat_main_t * vam) u16 *high_ports = 0; u16 this_low; u16 this_hi; - ip4_address_t ip4_addr; - ip6_address_t ip6_addr; - u32 length; + vl_api_prefix_t prefix; u32 tmp, tmp2; u8 prefix_set = 0; u32 vrf_id = ~0; @@ -21040,17 +20879,8 @@ api_ip_source_and_port_range_check_add_del (vat_main_t * vam) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "%U/%d", unformat_ip4_address, &ip4_addr, &length)) - { - prefix_set = 1; - } - else - if (unformat - (input, "%U/%d", unformat_ip6_address, &ip6_addr, &length)) - { - prefix_set = 1; - is_ipv6 = 1; - } + if (unformat (input, "%U", unformat_vl_api_prefix, &prefix)) + prefix_set = 1; else if (unformat (input, "vrf %d", &vrf_id)) ; else if (unformat (input, "del")) @@ -21121,18 +20951,8 @@ api_ip_source_and_port_range_check_add_del (vat_main_t * vam) mp->is_add = is_add; - if (is_ipv6) - { - mp->is_ipv6 = 1; - clib_memcpy (mp->address, &ip6_addr, sizeof (ip6_addr)); - } - else - { - mp->is_ipv6 = 0; - clib_memcpy (mp->address, &ip4_addr, sizeof (ip4_addr)); - } + clib_memcpy (&mp->prefix, &prefix, sizeof (prefix)); - mp->mask_length = length; mp->number_of_ranges = vec_len (low_ports); clib_memcpy (mp->low_ports, low_ports, vec_len (low_ports)); @@ -22209,7 +22029,7 @@ api_sock_init_shm (vat_main_t * vam) config[6].count = 128; config[6].size = sizeof (uword); - rv = vl_socket_client_init_shm (config); + rv = vl_socket_client_init_shm (config, 1 /* want_pthread */ ); if (!rv) vam->client_index_invalid = 1; return rv; @@ -22585,10 +22405,8 @@ api_ip_container_proxy_add_del (vat_main_t * vam) { vl_api_ip_container_proxy_add_del_t *mp; unformat_input_t *i = vam->input; - u32 plen = ~0, sw_if_index = ~0; - ip4_address_t ip4; - ip6_address_t ip6; - u8 is_ip4 = 1; + u32 sw_if_index = ~0; + vl_api_prefix_t pfx = { }; u8 is_add = 1; int ret; @@ -22598,22 +22416,14 @@ api_ip_container_proxy_add_del (vat_main_t * vam) is_add = 0; else if (unformat (i, "add")) ; - if (unformat (i, "%U", unformat_ip4_address, &ip4)) - { - is_ip4 = 1; - plen = 32; - } - else if (unformat (i, "%U", unformat_ip6_address, &ip6)) - { - is_ip4 = 0; - plen = 128; - } + if (unformat (i, "%U", unformat_vl_api_prefix, &pfx)) + ; else if (unformat (i, "sw_if_index %u", &sw_if_index)) ; else break; } - if (sw_if_index == ~0 || plen == ~0) + if (sw_if_index == ~0 || pfx.address_length == 0) { errmsg ("address and sw_if_index must be set"); return -99; @@ -22621,14 +22431,9 @@ api_ip_container_proxy_add_del (vat_main_t * vam) M (IP_CONTAINER_PROXY_ADD_DEL, mp); - mp->is_ip4 = is_ip4; mp->sw_if_index = clib_host_to_net_u32 (sw_if_index); - mp->plen = plen; mp->is_add = is_add; - if (is_ip4) - clib_memcpy (mp->ip, &ip4, sizeof (ip4)); - else - clib_memcpy (mp->ip, &ip6, sizeof (ip6)); + clib_memcpy (&mp->pfx, &pfx, sizeof (pfx)); S (mp); W (ret); @@ -23166,18 +22971,16 @@ _(l2_flags, \ "sw_if | sw_if_index [learn] [forward] [uu-flood] [flood] [arp-term] [disable]\n") \ _(bridge_flags, \ "bd_id [learn] [forward] [uu-flood] [flood] [arp-term] [disable]\n") \ -_(tap_connect, \ - "tapname mac | random-mac [tag ]") \ -_(tap_modify, \ - " | sw_if_index tapname mac | random-mac") \ -_(tap_delete, \ - " | sw_if_index ") \ -_(sw_interface_tap_dump, "") \ _(tap_create_v2, \ "id [hw-addr ] [host-ns ] [rx-ring-size [tx-ring-size ]") \ _(tap_delete_v2, \ " | sw_if_index ") \ _(sw_interface_tap_v2_dump, "") \ +_(virtio_pci_create, \ + "pci-addr [use_random_mac | hw-addr ] [tx-ring-size [rx-ring-size ] [features ]") \ +_(virtio_pci_delete, \ + " | sw_if_index ") \ +_(sw_interface_virtio_pci_dump, "") \ _(bond_create, \ "[hw-addr ] {round-robin | active-backup | " \ "broadcast | {lacp | xor} [load-balance { l2 | l23 | l34 }]} " \ @@ -23358,10 +23161,10 @@ _(ip_dump, "ipv4 | ipv6") \ _(ipsec_spd_add_del, "spd_id [del]") \ _(ipsec_interface_add_del_spd, "( | sw_if_index )\n" \ " spid_id ") \ -_(ipsec_sad_add_del_entry, "sad_id spi crypto_alg \n" \ +_(ipsec_sad_entry_add_del, "sad_id spi crypto_alg \n" \ " crypto_key tunnel_src tunnel_dst \n" \ " integ_alg integ_key ") \ -_(ipsec_spd_add_del_entry, "spd_id priority action \n" \ +_(ipsec_spd_entry_add_del, "spd_id priority action \n" \ " (inbound|outbound) [sa_id ] laddr_start \n" \ " laddr_stop raddr_start raddr_stop \n" \ " [lport_start lport_stop ] [rport_start rport_stop ]" ) \ diff --git a/src/vat/main.c b/src/vat/main.c index 295ccec25151..a5421140fb33 100644 --- a/src/vat/main.c +++ b/src/vat/main.c @@ -15,6 +15,7 @@ #include "vat.h" #include "plugin.h" #include +#include vat_main_t vat_main; @@ -283,6 +284,37 @@ setup_signal_handlers (void) } } +static void +vat_find_plugin_path () +{ + extern char *vat_plugin_path; + char *p, path[PATH_MAX]; + int rv; + u8 *s; + + /* find executable path */ + if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1) + return; + + /* readlink doesn't provide null termination */ + path[rv] = 0; + + /* strip filename */ + if ((p = strrchr (path, '/')) == 0) + return; + *p = 0; + + /* strip bin/ */ + if ((p = strrchr (path, '/')) == 0) + return; + *p = 0; + + s = format (0, "%s/lib/" CLIB_TARGET_TRIPLET "/vpp_api_test_plugins:" + "%s/lib/vpp_api_test_plugins", path, path); + vec_add1 (s, 0); + vat_plugin_path = (char *) s; +} + int main (int argc, char **argv) { @@ -309,6 +341,8 @@ main (int argc, char **argv) vec_validate (vam->cmd_reply, 0); vec_reset_length (vam->cmd_reply); + vat_find_plugin_path (); + unformat_init_command_line (a, argv); while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c index 088c118b6ede..f3ac3107a50c 100644 --- a/src/vcl/ldp.c +++ b/src/vcl/ldp.c @@ -261,7 +261,12 @@ ldp_init (void) } } - clib_time_init (&ldpw->clib_time); + /* *INDENT-OFF* */ + pool_foreach (ldpw, ldp->workers, ({ + clib_memset (&ldpw->clib_time, 0, sizeof (ldpw->clib_time)); + })); + /* *INDENT-ON* */ + LDBG (0, "LDP initialization: done!"); return 0; @@ -668,6 +673,9 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, return -1; } + if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) + clib_time_init (&ldpw->clib_time); + if (timeout) { time_out = (timeout->tv_sec == 0 && timeout->tv_nsec == 0) ? @@ -739,10 +747,9 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, vec_len (ldpw->ex_bitmap) * sizeof (clib_bitmap_t)); - rv = vppcom_select (si_bits, readfds ? ldpw->rd_bitmap : NULL, - writefds ? ldpw->wr_bitmap : NULL, - exceptfds ? ldpw->ex_bitmap : NULL, - vcl_timeout); + rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL, + writefds ? ldpw->wr_bitmap : NULL, + exceptfds ? ldpw->ex_bitmap : NULL, vcl_timeout); if (rv < 0) { errno = -rv; @@ -2142,7 +2149,7 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t * sigmask) { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); - double time_to_wait = (double) 0, time_out, now = 0; + double time_to_wait = (double) 0, max_time; int libc_epfd, rv = 0; vls_handle_t ep_vlsh; @@ -2166,8 +2173,10 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, return -1; } + if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) + clib_time_init (&ldpw->clib_time); time_to_wait = ((timeout >= 0) ? (double) timeout / 1000 : 0); - time_out = clib_time_now (&ldpw->clib_time) + time_to_wait; + max_time = clib_time_now (&ldpw->clib_time) + time_to_wait; libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); if (PREDICT_FALSE (libc_epfd < 0)) @@ -2179,8 +2188,7 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, LDBG (2, "epfd %d: vep_idx %d, libc_epfd %d, events %p, maxevents %d, " "timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vlsh, - libc_epfd, events, maxevents, timeout, sigmask, time_to_wait, - time_out); + libc_epfd, events, maxevents, timeout, sigmask, time_to_wait); do { if (!ldpw->epoll_wait_vcl) @@ -2207,11 +2215,8 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, if (rv != 0) goto done; } - - if (timeout != -1) - now = clib_time_now (&ldpw->clib_time); } - while (now < time_out); + while ((timeout == -1) || (clib_time_now (&ldpw->clib_time) < max_time)); done: return rv; @@ -2237,14 +2242,15 @@ poll (struct pollfd *fds, nfds_t nfds, int timeout) int rv, i, n_revents = 0; vls_handle_t vlsh; vcl_poll_t *vp; - double wait_for_time; + double max_time; LDBG (3, "fds %p, nfds %d, timeout %d", fds, nfds, timeout); - if (timeout >= 0) - wait_for_time = (f64) timeout / 1000; - else - wait_for_time = -1; + if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) + clib_time_init (&ldpw->clib_time); + + max_time = (timeout >= 0) ? (f64) timeout / 1000 : 0; + max_time += clib_time_now (&ldpw->clib_time); for (i = 0; i < nfds; i++) { @@ -2304,8 +2310,7 @@ poll (struct pollfd *fds, nfds_t nfds, int timeout) goto done; } } - while ((wait_for_time == -1) || - (clib_time_now (&ldpw->clib_time) < wait_for_time)); + while ((timeout < 0) || (clib_time_now (&ldpw->clib_time) < max_time)); rv = 0; done: @@ -2374,16 +2379,18 @@ ldp_constructor (void) void ldp_destructor (void) { - swrap_destructor (); - if (ldp->init) - ldp->init = 0; + /* + swrap_destructor (); + if (ldp->init) + ldp->init = 0; + */ /* Don't use clib_warning() here because that calls writev() * which will call ldp_init(). */ if (LDP_DEBUG > 0) - printf ("%s:%d: LDP<%d>: LDP destructor: done!\n", - __func__, __LINE__, getpid ()); + fprintf (stderr, "%s:%d: LDP<%d>: LDP destructor: done!\n", + __func__, __LINE__, getpid ()); } diff --git a/src/vcl/vcl_bapi.c b/src/vcl/vcl_bapi.c index 5b9a9d5d3ce8..b7f47d8c5798 100644 --- a/src/vcl/vcl_bapi.c +++ b/src/vcl/vcl_bapi.c @@ -594,9 +594,8 @@ vppcom_send_bind_sock (vcl_session_t * session) } void -vppcom_send_unbind_sock (u64 vpp_handle) +vppcom_send_unbind_sock (vcl_worker_t * wrk, u64 vpp_handle) { - vcl_worker_t *wrk = vcl_worker_get_current (); vl_api_unbind_sock_t *ump; ump = vl_msg_api_alloc (sizeof (*ump)); @@ -680,7 +679,7 @@ vppcom_connect_to_vpp (char *app_name) return VPPCOM_ECONNREFUSED; } - if (vl_socket_client_init_shm (0)) + if (vl_socket_client_init_shm (0, 1 /* want_pthread */ )) { VERR ("app (%s) init shm failed!", app_name); return VPPCOM_ECONNREFUSED; diff --git a/src/vcl/vcl_locked.c b/src/vcl/vcl_locked.c index 6254bad09b61..f5892c17e734 100644 --- a/src/vcl/vcl_locked.c +++ b/src/vcl/vcl_locked.c @@ -18,22 +18,35 @@ typedef struct vcl_locked_session_ { + clib_spinlock_t lock; u32 session_index; u32 worker_index; u32 vls_index; - u32 flags; - clib_spinlock_t lock; + u32 *workers_subscribed; + clib_bitmap_t *listeners; } vcl_locked_session_t; -typedef struct vcl_main_ +typedef struct vls_local_ +{ + int vls_wrk_index; + volatile int vls_mt_n_threads; + pthread_mutex_t vls_mt_mq_mlock; + pthread_mutex_t vls_mt_spool_mlock; + volatile u8 select_mp_check; + volatile u8 epoll_mp_check; +} vls_process_local_t; + +static vls_process_local_t vls_local; +static vls_process_local_t *vlsl = &vls_local; + +typedef struct vls_main_ { vcl_locked_session_t *vls_pool; clib_rwlock_t vls_table_lock; uword *session_index_to_vlsh_table; } vls_main_t; -vls_main_t vls_main; -vls_main_t *vlsm = &vls_main; +vls_main_t *vlsm; static inline void vls_table_rlock (void) @@ -59,10 +72,62 @@ vls_table_wunlock (void) clib_rwlock_writer_unlock (&vlsm->vls_table_lock); } +typedef enum +{ + VLS_MT_OP_READ, + VLS_MT_OP_WRITE, + VLS_MT_OP_SPOOL, + VLS_MT_OP_XPOLL, +} vls_mt_ops_t; + +typedef enum +{ + VLS_MT_LOCK_MQ = 1 << 0, + VLS_MT_LOCK_SPOOL = 1 << 1 +} vls_mt_lock_type_t; + +static void +vls_mt_add (void) +{ + vlsl->vls_mt_n_threads += 1; + vcl_set_worker_index (vlsl->vls_wrk_index); +} + +static inline void +vls_mt_mq_lock (void) +{ + pthread_mutex_lock (&vlsl->vls_mt_mq_mlock); +} + +static inline void +vls_mt_mq_unlock (void) +{ + pthread_mutex_unlock (&vlsl->vls_mt_mq_mlock); +} + +static inline void +vls_mt_spool_lock (void) +{ + pthread_mutex_lock (&vlsl->vls_mt_spool_mlock); +} + +static inline void +vls_mt_create_unlock (void) +{ + pthread_mutex_unlock (&vlsl->vls_mt_spool_mlock); +} + +static void +vls_mt_locks_init (void) +{ + pthread_mutex_init (&vlsl->vls_mt_mq_mlock, NULL); + pthread_mutex_init (&vlsl->vls_mt_spool_mlock, NULL); +} + static inline vcl_session_handle_t vls_to_sh (vcl_locked_session_t * vls) { - return vppcom_session_handle (vls->session_index); + return vcl_session_handle_from_index (vls->session_index); } static inline vcl_session_handle_t @@ -100,12 +165,12 @@ vls_get (vls_handle_t vlsh) } static void -vls_free (vcl_locked_session_t * fde) +vls_free (vcl_locked_session_t * vls) { - ASSERT (fde != 0); - hash_unset (vlsm->session_index_to_vlsh_table, fde->session_index); - clib_spinlock_free (&fde->lock); - pool_put (vlsm->vls_pool, fde); + ASSERT (vls != 0); + hash_unset (vlsm->session_index_to_vlsh_table, vls->session_index); + clib_spinlock_free (&vls->lock); + pool_put (vlsm->vls_pool, vls); } static vcl_locked_session_t * @@ -130,6 +195,12 @@ vls_get_w_dlock (vls_handle_t vlsh) return vls; } +static inline void +vls_lock (vcl_locked_session_t * vls) +{ + clib_spinlock_lock (&vls->lock); +} + static inline void vls_unlock (vcl_locked_session_t * vls) { @@ -153,17 +224,258 @@ vls_dunlock (vcl_locked_session_t * vls) vls_table_runlock (); } +vcl_session_handle_t +vlsh_to_sh (vls_handle_t vlsh) +{ + vcl_locked_session_t *vls; + int rv; + + vls = vls_get_w_dlock (vlsh); + if (!vls) + return INVALID_SESSION_ID; + rv = vls_to_sh (vls); + vls_dunlock (vls); + return rv; +} + +vcl_session_handle_t +vlsh_to_session_index (vls_handle_t vlsh) +{ + vcl_session_handle_t sh; + sh = vlsh_to_sh (vlsh); + return vppcom_session_index (sh); +} + +vls_handle_t +vls_si_to_vlsh (u32 session_index) +{ + uword *vlshp; + vlshp = hash_get (vlsm->session_index_to_vlsh_table, session_index); + return vlshp ? *vlshp : VLS_INVALID_HANDLE; +} + +vls_handle_t +vls_session_index_to_vlsh (uint32_t session_index) +{ + vls_handle_t vlsh; + + vls_table_rlock (); + vlsh = vls_si_to_vlsh (session_index); + vls_table_runlock (); + + return vlsh; +} + +u8 +vls_is_shared (vcl_locked_session_t * vls) +{ + return vec_len (vls->workers_subscribed); +} + +u8 +vls_is_shared_by_wrk (vcl_locked_session_t * vls, u32 wrk_index) +{ + int i; + for (i = 0; i < vec_len (vls->workers_subscribed); i++) + if (vls->workers_subscribed[i] == wrk_index) + return 1; + return 0; +} + static void -vls_get_and_free (vls_handle_t vlsh) +vls_listener_wrk_set (vcl_locked_session_t * vls, u32 wrk_index, u8 is_active) +{ + clib_bitmap_set (vls->listeners, wrk_index, is_active); +} + +static u8 +vls_listener_wrk_is_active (vcl_locked_session_t * vls, u32 wrk_index) +{ + return (clib_bitmap_get (vls->listeners, wrk_index) == 1); +} + +static void +vls_listener_wrk_start_listen (vcl_locked_session_t * vls, u32 wrk_index) +{ + vppcom_session_listen (vls_to_sh (vls), ~0); + vls_listener_wrk_set (vls, wrk_index, 1 /* is_active */ ); +} + +static void +vls_listener_wrk_stop_listen (vcl_locked_session_t * vls, u32 wrk_index) +{ + vcl_worker_t *wrk; + vcl_session_t *s; + + wrk = vcl_worker_get (wrk_index); + s = vcl_session_get (wrk, vls->session_index); + if (s->session_state != STATE_LISTEN) + return; + vppcom_send_unbind_sock (wrk, s->vpp_handle); + s->session_state = STATE_LISTEN_NO_MQ; + vls_listener_wrk_set (vls, wrk_index, 0 /* is_active */ ); +} + +int +vls_unshare_session (vcl_locked_session_t * vls, vcl_worker_t * wrk) +{ + int i, do_disconnect; + vcl_session_t *s; + + s = vcl_session_get (wrk, vls->session_index); + if (s->session_state == STATE_LISTEN) + vls_listener_wrk_set (vls, wrk->wrk_index, 0 /* is_active */ ); + + for (i = 0; i < vec_len (vls->workers_subscribed); i++) + { + if (vls->workers_subscribed[i] != wrk->wrk_index) + continue; + + if (s->rx_fifo) + { + svm_fifo_del_subscriber (s->rx_fifo, wrk->vpp_wrk_index); + svm_fifo_del_subscriber (s->tx_fifo, wrk->vpp_wrk_index); + } + vec_del1 (vls->workers_subscribed, i); + do_disconnect = s->session_state == STATE_LISTEN; + vcl_session_cleanup (wrk, s, vcl_session_handle (s), do_disconnect); + return 0; + } + + /* Return, if this is not the owning worker */ + if (vls->worker_index != wrk->wrk_index) + return 0; + + /* Check if we can change owner or close */ + if (vec_len (vls->workers_subscribed)) + { + vls->worker_index = vls->workers_subscribed[0]; + vec_del1 (vls->workers_subscribed, 0); + vcl_send_session_worker_update (wrk, s, vls->worker_index); + if (vec_len (vls->workers_subscribed)) + clib_warning ("more workers need to be updated"); + } + else + { + vcl_session_cleanup (wrk, s, vcl_session_handle (s), + 1 /* do_disconnect */ ); + } + + return 0; +} + +void +vls_share_vcl_session (vcl_worker_t * wrk, vcl_session_t * s) { vcl_locked_session_t *vls; + vls = vls_get (vls_si_to_vlsh (s->session_index)); + if (!vls) + return; + vls_lock (vls); + vec_add1 (vls->workers_subscribed, wrk->wrk_index); + if (s->rx_fifo) + { + svm_fifo_add_subscriber (s->rx_fifo, wrk->vpp_wrk_index); + svm_fifo_add_subscriber (s->tx_fifo, wrk->vpp_wrk_index); + } + else if (s->session_state == STATE_LISTEN) + { + s->session_state = STATE_LISTEN_NO_MQ; + } + + vls_unlock (vls); +} + +void +vls_worker_copy_on_fork (vcl_worker_t * parent_wrk) +{ + vcl_worker_t *wrk = vcl_worker_get_current (); + vcl_session_t *s; + + wrk->vpp_event_queues = vec_dup (parent_wrk->vpp_event_queues); + wrk->sessions = pool_dup (parent_wrk->sessions); + wrk->session_index_by_vpp_handles = + hash_dup (parent_wrk->session_index_by_vpp_handles); vls_table_wlock (); - vls = vls_get (vlsh); - vls_free (vls); + + /* *INDENT-OFF* */ + pool_foreach (s, wrk->sessions, ({ + vls_share_vcl_session (wrk, s); + })); + /* *INDENT-ON* */ + vls_table_wunlock (); } +static void +vls_mt_acq_locks (vcl_locked_session_t * vls, vls_mt_ops_t op, int *locks_acq) +{ + vcl_worker_t *wrk = vcl_worker_get_current (); + vcl_session_t *s = 0; + int is_nonblk = 0; + + if (vls) + { + s = vcl_session_get (wrk, vls->session_index); + if (PREDICT_FALSE (!s)) + return; + is_nonblk = VCL_SESS_ATTR_TEST (s->attr, VCL_SESS_ATTR_NONBLOCK); + } + + switch (op) + { + case VLS_MT_OP_READ: + if (!is_nonblk) + is_nonblk = vcl_session_read_ready (s) != 0; + if (!is_nonblk) + { + vls_mt_mq_lock (); + *locks_acq |= VLS_MT_LOCK_MQ; + } + break; + case VLS_MT_OP_WRITE: + if (!is_nonblk) + is_nonblk = vcl_session_write_ready (s) != 0; + if (!is_nonblk) + { + vls_mt_mq_lock (); + *locks_acq |= VLS_MT_LOCK_MQ; + } + break; + case VLS_MT_OP_XPOLL: + vls_mt_mq_lock (); + *locks_acq |= VLS_MT_LOCK_MQ; + break; + case VLS_MT_OP_SPOOL: + vls_mt_spool_lock (); + *locks_acq |= VLS_MT_LOCK_SPOOL; + break; + default: + break; + } +} + +static void +vls_mt_rel_locks (int locks_acq) +{ + if (locks_acq & VLS_MT_LOCK_MQ) + vls_mt_mq_unlock (); + if (locks_acq & VLS_MT_LOCK_SPOOL) + vls_mt_create_unlock (); +} + +#define vls_mt_guard(_vls, _op) \ + int _locks_acq = 0; \ + if (PREDICT_FALSE (vcl_get_worker_index () == ~0)); \ + vls_mt_add (); \ + if (PREDICT_FALSE (vlsl->vls_mt_n_threads > 1)) \ + vls_mt_acq_locks (_vls, _op, &_locks_acq); \ + +#define vls_mt_unguard() \ + if (PREDICT_FALSE (_locks_acq)) \ + vls_mt_rel_locks (_locks_acq) + int vls_write (vls_handle_t vlsh, void *buf, size_t nbytes) { @@ -172,7 +484,10 @@ vls_write (vls_handle_t vlsh, void *buf, size_t nbytes) if (!(vls = vls_get_w_dlock (vlsh))) return VPPCOM_EBADFD; + + vls_mt_guard (vls, VLS_MT_OP_WRITE); rv = vppcom_session_write (vls_to_sh_tu (vls), buf, nbytes); + vls_mt_unguard (); vls_get_and_unlock (vlsh); return rv; } @@ -185,7 +500,9 @@ vls_write_msg (vls_handle_t vlsh, void *buf, size_t nbytes) if (!(vls = vls_get_w_dlock (vlsh))) return VPPCOM_EBADFD; + vls_mt_guard (vls, VLS_MT_OP_WRITE); rv = vppcom_session_write_msg (vls_to_sh_tu (vls), buf, nbytes); + vls_mt_unguard (); vls_get_and_unlock (vlsh); return rv; } @@ -199,7 +516,9 @@ vls_sendto (vls_handle_t vlsh, void *buf, int buflen, int flags, if (!(vls = vls_get_w_dlock (vlsh))) return VPPCOM_EBADFD; + vls_mt_guard (vls, VLS_MT_OP_WRITE); rv = vppcom_session_sendto (vls_to_sh_tu (vls), buf, buflen, flags, ep); + vls_mt_unguard (); vls_get_and_unlock (vlsh); return rv; } @@ -212,7 +531,9 @@ vls_read (vls_handle_t vlsh, void *buf, size_t nbytes) if (!(vls = vls_get_w_dlock (vlsh))) return VPPCOM_EBADFD; + vls_mt_guard (vls, VLS_MT_OP_READ); rv = vppcom_session_read (vls_to_sh_tu (vls), buf, nbytes); + vls_mt_unguard (); vls_get_and_unlock (vlsh); return rv; } @@ -226,8 +547,10 @@ vls_recvfrom (vls_handle_t vlsh, void *buffer, uint32_t buflen, int flags, if (!(vls = vls_get_w_dlock (vlsh))) return VPPCOM_EBADFD; + vls_mt_guard (vls, VLS_MT_OP_READ); rv = vppcom_session_recvfrom (vls_to_sh_tu (vls), buffer, buflen, flags, ep); + vls_mt_unguard (); vls_get_and_unlock (vlsh); return rv; } @@ -266,7 +589,9 @@ vls_listen (vls_handle_t vlsh, int q_len) if (!(vls = vls_get_w_dlock (vlsh))) return VPPCOM_EBADFD; + vls_mt_guard (vls, VLS_MT_OP_XPOLL); rv = vppcom_session_listen (vls_to_sh_tu (vls), q_len); + vls_mt_unguard (); vls_get_and_unlock (vlsh); return rv; } @@ -279,11 +604,53 @@ vls_connect (vls_handle_t vlsh, vppcom_endpt_t * server_ep) if (!(vls = vls_get_w_dlock (vlsh))) return VPPCOM_EBADFD; + vls_mt_guard (vls, VLS_MT_OP_XPOLL); rv = vppcom_session_connect (vls_to_sh_tu (vls), server_ep); + vls_mt_unguard (); vls_get_and_unlock (vlsh); return rv; } +static inline void +vls_mp_checks (vcl_locked_session_t * vls, int is_add) +{ + vcl_worker_t *wrk = vcl_worker_get_current (); + vcl_session_t *s; + + s = vcl_session_get (wrk, vls->session_index); + switch (s->session_state) + { + case STATE_LISTEN: + if (is_add) + { + if (vls->worker_index == wrk->wrk_index) + vls_listener_wrk_set (vls, wrk->wrk_index, 1 /* is_active */ ); + break; + } + vls_listener_wrk_stop_listen (vls, vls->worker_index); + break; + case STATE_LISTEN_NO_MQ: + if (!is_add) + break; + + /* Register worker as listener */ + vls_listener_wrk_start_listen (vls, wrk->wrk_index); + + /* If owner worker did not attempt to accept/xpoll on the session, + * force a listen stop for it, since it may not be interested in + * accepting new sessions. + * This is pretty much a hack done to give app workers the illusion + * that it is fine to listen and not accept new sessions for a + * given listener. Without it, we would accumulate unhandled + * accepts on the passive worker message queue. */ + if (!vls_listener_wrk_is_active (vls, vls->worker_index)) + vls_listener_wrk_stop_listen (vls, vls->worker_index); + break; + default: + break; + } +} + vls_handle_t vls_accept (vls_handle_t listener_vlsh, vppcom_endpt_t * ep, int flags) { @@ -293,7 +660,11 @@ vls_accept (vls_handle_t listener_vlsh, vppcom_endpt_t * ep, int flags) if (!(vls = vls_get_w_dlock (listener_vlsh))) return VPPCOM_EBADFD; + if (vcl_n_workers () > 1) + vls_mp_checks (vls, 1 /* is_add */ ); + vls_mt_guard (vls, VLS_MT_OP_SPOOL); sh = vppcom_session_accept (vls_to_sh_tu (vls), ep, flags); + vls_mt_unguard (); vls_get_and_unlock (listener_vlsh); if (sh < 0) return sh; @@ -309,7 +680,9 @@ vls_create (uint8_t proto, uint8_t is_nonblocking) vcl_session_handle_t sh; vls_handle_t vlsh; + vls_mt_guard (0, VLS_MT_OP_SPOOL); sh = vppcom_session_create (proto, is_nonblocking); + vls_mt_unguard (); if (sh == INVALID_SESSION_ID) return VLS_INVALID_HANDLE; @@ -324,23 +697,34 @@ int vls_close (vls_handle_t vlsh) { vcl_locked_session_t *vls; - vcl_session_handle_t sh; - int rv, refcnt; + int rv; - if (!(vls = vls_get_w_dlock (vlsh))) - return VPPCOM_EBADFD; + vls_table_wlock (); - sh = vls_to_sh (vls); - refcnt = vppcom_session_attr (sh, VPPCOM_ATTR_GET_REFCNT, 0, 0); - if ((rv = vppcom_session_close (sh))) + vls = vls_get_and_lock (vlsh); + if (!vls) { - vls_dunlock (vls); - return rv; + vls_table_wunlock (); + return VPPCOM_EBADFD; } - vls_dunlock (vls); - if (refcnt <= 1) - vls_get_and_free (vlsh); + vls_mt_guard (0, VLS_MT_OP_SPOOL); + if (vls_is_shared (vls)) + { + /* At least two workers share the session so vls won't be freed */ + vls_unshare_session (vls, vcl_worker_get_current ()); + vls_unlock (vls); + vls_mt_unguard (); + vls_table_wunlock (); + return VPPCOM_OK; + } + + rv = vppcom_session_close (vls_to_sh (vls)); + vls_free (vls); + vls_mt_unguard (); + + vls_table_wunlock (); + return rv; } @@ -361,6 +745,22 @@ vls_epoll_create (void) return vlsh; } +static void +vls_epoll_ctl_mp_checks (vcl_locked_session_t * vls, int op) +{ + if (vcl_n_workers () <= 1) + { + vlsl->epoll_mp_check = 1; + return; + } + + if (op == EPOLL_CTL_MOD) + return; + + vlsl->epoll_mp_check = 1; + vls_mp_checks (vls, op == EPOLL_CTL_ADD); +} + int vls_epoll_ctl (vls_handle_t ep_vlsh, int op, vls_handle_t vlsh, struct epoll_event *event) @@ -374,6 +774,10 @@ vls_epoll_ctl (vls_handle_t ep_vlsh, int op, vls_handle_t vlsh, vls = vls_get_and_lock (vlsh); ep_sh = vls_to_sh (ep_vls); sh = vls_to_sh (vls); + + if (PREDICT_FALSE (!vlsl->epoll_mp_check)) + vls_epoll_ctl_mp_checks (vls, op); + vls_table_runlock (); rv = vppcom_epoll_ctl (ep_sh, op, sh, event); @@ -396,55 +800,262 @@ vls_epoll_wait (vls_handle_t ep_vlsh, struct epoll_event *events, if (!(vls = vls_get_w_dlock (ep_vlsh))) return VPPCOM_EBADFD; + vls_mt_guard (0, VLS_MT_OP_XPOLL); rv = vppcom_epoll_wait (vls_to_sh_tu (vls), events, maxevents, wait_for_time); + vls_mt_unguard (); vls_get_and_unlock (ep_vlsh); return rv; } -vcl_session_handle_t -vlsh_to_sh (vls_handle_t vlsh) +static void +vls_select_mp_checks (vcl_si_set * read_map) { vcl_locked_session_t *vls; + vcl_worker_t *wrk; + vcl_session_t *s; + u32 si; + + if (vcl_n_workers () <= 1) + { + vlsl->select_mp_check = 1; + return; + } + + if (!read_map) + return; + + vlsl->select_mp_check = 1; + wrk = vcl_worker_get_current (); + + /* *INDENT-OFF* */ + clib_bitmap_foreach (si, read_map, ({ + s = vcl_session_get (wrk, si); + if (s->session_state == STATE_LISTEN) + { + vls = vls_get (vls_session_index_to_vlsh (si)); + vls_mp_checks (vls, 1 /* is_add */); + } + })); + /* *INDENT-ON* */ +} + +int +vls_select (int n_bits, vcl_si_set * read_map, vcl_si_set * write_map, + vcl_si_set * except_map, double wait_for_time) +{ int rv; - vls = vls_get_w_dlock (vlsh); - if (!vls) - return INVALID_SESSION_ID; - rv = vls_to_sh (vls); - vls_dunlock (vls); + vls_mt_guard (0, VLS_MT_OP_XPOLL); + if (PREDICT_FALSE (!vlsl->select_mp_check)) + vls_select_mp_checks (read_map); + rv = vppcom_select (n_bits, read_map, write_map, except_map, wait_for_time); + vls_mt_unguard (); return rv; } -vcl_session_handle_t -vlsh_to_session_index (vls_handle_t vlsh) +static void +vls_unshare_vcl_worker_sessions (vcl_worker_t * wrk) { - vcl_session_handle_t sh; - sh = vlsh_to_sh (vlsh); - return vppcom_session_index (sh); + u32 current_wrk, is_current; + vcl_locked_session_t *vls; + vcl_session_t *s; + + current_wrk = vcl_get_worker_index (); + is_current = current_wrk == wrk->wrk_index; + vls_table_wlock (); + + /* *INDENT-OFF* */ + pool_foreach (s, wrk->sessions, ({ + vls = vls_get (vls_si_to_vlsh (s->session_index)); + if (vls && (is_current || vls_is_shared_by_wrk (vls, current_wrk))) + vls_unshare_session (vls, wrk); + })); + /* *INDENT-ON* */ + + vls_table_wunlock (); } -vls_handle_t -vls_session_index_to_vlsh (uint32_t session_index) +static void +vls_cleanup_vcl_worker (vcl_worker_t * wrk) { - vls_handle_t vlsh; - uword *vlshp; + /* Unshare sessions and also cleanup worker since child may have + * called _exit () and therefore vcl may not catch the event */ + vls_unshare_vcl_worker_sessions (wrk); + vcl_worker_cleanup (wrk, 1 /* notify vpp */ ); +} - vls_table_rlock (); - vlshp = hash_get (vlsm->session_index_to_vlsh_table, session_index); - vlsh = vlshp ? *vlshp : VLS_INVALID_HANDLE; - vls_table_runlock (); +static void +vls_cleanup_forked_child (vcl_worker_t * wrk, vcl_worker_t * child_wrk) +{ + vcl_worker_t *sub_child; + int tries = 0; - return vlsh; + if (child_wrk->forked_child != ~0) + { + sub_child = vcl_worker_get_if_valid (child_wrk->forked_child); + if (sub_child) + { + /* Wait a bit, maybe the process is going away */ + while (kill (sub_child->current_pid, 0) >= 0 && tries++ < 50) + usleep (1e3); + if (kill (sub_child->current_pid, 0) < 0) + vls_cleanup_forked_child (child_wrk, sub_child); + } + } + vls_cleanup_vcl_worker (child_wrk); + VDBG (0, "Cleaned up forked child wrk %u", child_wrk->wrk_index); + wrk->forked_child = ~0; +} + +static struct sigaction old_sa; + +static void +vls_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc) +{ + vcl_worker_t *wrk, *child_wrk; + + if (vcl_get_worker_index () == ~0) + return; + + if (sigaction (SIGCHLD, &old_sa, 0)) + { + VERR ("couldn't restore sigchld"); + exit (-1); + } + + wrk = vcl_worker_get_current (); + if (wrk->forked_child == ~0) + return; + + child_wrk = vcl_worker_get_if_valid (wrk->forked_child); + if (!child_wrk) + goto done; + + if (si && si->si_pid != child_wrk->current_pid) + { + VDBG (0, "unexpected child pid %u", si->si_pid); + goto done; + } + vls_cleanup_forked_child (wrk, child_wrk); + +done: + if (old_sa.sa_flags & SA_SIGINFO) + { + void (*fn) (int, siginfo_t *, void *) = old_sa.sa_sigaction; + fn (signum, si, uc); + } + else + { + void (*fn) (int) = old_sa.sa_handler; + if (fn) + fn (signum); + } +} + +static void +vls_incercept_sigchld () +{ + struct sigaction sa; + clib_memset (&sa, 0, sizeof (sa)); + sa.sa_sigaction = vls_intercept_sigchld_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction (SIGCHLD, &sa, &old_sa)) + { + VERR ("couldn't intercept sigchld"); + exit (-1); + } +} + +static void +vls_app_pre_fork (void) +{ + vls_incercept_sigchld (); + vcl_flush_mq_events (); +} + +static void +vls_app_fork_child_handler (void) +{ + vcl_worker_t *parent_wrk; + int rv, parent_wrk_index; + u8 *child_name; + + parent_wrk_index = vcl_get_worker_index (); + VDBG (0, "initializing forked child %u with parent wrk %u", getpid (), + parent_wrk_index); + + /* + * Allocate worker + */ + vcl_set_worker_index (~0); + if (!vcl_worker_alloc_and_init ()) + VERR ("couldn't allocate new worker"); + + /* + * Attach to binary api + */ + child_name = format (0, "%v-child-%u%c", vcm->app_name, getpid (), 0); + vcl_cleanup_bapi (); + vppcom_api_hookup (); + vcm->app_state = STATE_APP_START; + rv = vppcom_connect_to_vpp ((char *) child_name); + vec_free (child_name); + if (rv) + { + VERR ("couldn't connect to VPP!"); + return; + } + + /* + * Register worker with vpp and share sessions + */ + vcl_worker_register_with_vpp (); + parent_wrk = vcl_worker_get (parent_wrk_index); + vls_worker_copy_on_fork (parent_wrk); + parent_wrk->forked_child = vcl_get_worker_index (); + + /* Reset number of threads and set wrk index */ + vlsl->vls_mt_n_threads = 0; + vlsl->vls_wrk_index = vcl_get_worker_index (); + vlsl->select_mp_check = 0; + vlsl->epoll_mp_check = 0; + vls_mt_locks_init (); + + VDBG (0, "forked child main worker initialized"); + vcm->forking = 0; +} + +static void +vls_app_fork_parent_handler (void) +{ + vcm->forking = 1; + while (vcm->forking) + ; +} + +void +vls_app_exit (void) +{ + /* Unshare the sessions. VCL will clean up the worker */ + vls_unshare_vcl_worker_sessions (vcl_worker_get_current ()); } int vls_app_create (char *app_name) { int rv; + if ((rv = vppcom_app_create (app_name))) return rv; + vlsm = clib_mem_alloc (sizeof (vls_main_t)); + clib_memset (vlsm, 0, sizeof (*vlsm)); clib_rwlock_init (&vlsm->vls_table_lock); + pthread_atfork (vls_app_pre_fork, vls_app_fork_parent_handler, + vls_app_fork_child_handler); + atexit (vls_app_exit); + vlsl->vls_wrk_index = vcl_get_worker_index (); + vls_mt_locks_init (); return VPPCOM_OK; } diff --git a/src/vcl/vcl_locked.h b/src/vcl/vcl_locked.h index 4f40f434cc55..2c8f21152309 100644 --- a/src/vcl/vcl_locked.h +++ b/src/vcl/vcl_locked.h @@ -44,6 +44,8 @@ int vls_epoll_ctl (vls_handle_t ep_vlsh, int op, vls_handle_t vlsh, struct epoll_event *event); int vls_epoll_wait (vls_handle_t ep_vlsh, struct epoll_event *events, int maxevents, double wait_for_time); +int vls_select (int n_bits, vcl_si_set * read_map, vcl_si_set * write_map, + vcl_si_set * except_map, double wait_for_time); vcl_session_handle_t vlsh_to_sh (vls_handle_t vlsh); vcl_session_handle_t vlsh_to_session_index (vls_handle_t vlsh); vls_handle_t vls_session_index_to_vlsh (uint32_t session_index); diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c index 32664312f015..6c364e376f7e 100644 --- a/src/vcl/vcl_private.c +++ b/src/vcl/vcl_private.c @@ -346,144 +346,113 @@ vcl_worker_set_bapi (void) return -1; } -vcl_shared_session_t * -vcl_shared_session_alloc (void) +void +vcl_segment_table_add (u64 segment_handle, u32 svm_segment_index) { - vcl_shared_session_t *ss; - pool_get (vcm->shared_sessions, ss); - memset (ss, 0, sizeof (*ss)); - ss->ss_index = ss - vcm->shared_sessions; - return ss; + clib_rwlock_writer_lock (&vcm->segment_table_lock); + hash_set (vcm->segment_table, segment_handle, svm_segment_index); + clib_rwlock_writer_unlock (&vcm->segment_table_lock); } -vcl_shared_session_t * -vcl_shared_session_get (u32 ss_index) +u32 +vcl_segment_table_lookup (u64 segment_handle) { - if (pool_is_free_index (vcm->shared_sessions, ss_index)) - return 0; - return pool_elt_at_index (vcm->shared_sessions, ss_index); + uword *seg_indexp; + + clib_rwlock_reader_lock (&vcm->segment_table_lock); + seg_indexp = hash_get (vcm->segment_table, segment_handle); + clib_rwlock_reader_unlock (&vcm->segment_table_lock); + + if (!seg_indexp) + return VCL_INVALID_SEGMENT_INDEX; + return ((u32) * seg_indexp); } void -vcl_shared_session_free (vcl_shared_session_t * ss) +vcl_segment_table_del (u64 segment_handle) { - pool_put (vcm->shared_sessions, ss); + clib_rwlock_writer_lock (&vcm->segment_table_lock); + hash_unset (vcm->segment_table, segment_handle); + clib_rwlock_writer_unlock (&vcm->segment_table_lock); } void -vcl_worker_share_session (vcl_worker_t * parent, vcl_worker_t * wrk, - vcl_session_t * new_s) +vcl_cleanup_bapi (void) { - vcl_shared_session_t *ss; - vcl_session_t *old_s; + socket_client_main_t *scm = &socket_client_main; + api_main_t *am = &api_main; - if (new_s->shared_index == ~0) - { - ss = vcl_shared_session_alloc (); - ss->session_index = new_s->session_index; - vec_add1 (ss->workers, parent->wrk_index); - vec_add1 (ss->workers, wrk->wrk_index); - new_s->shared_index = ss->ss_index; - old_s = vcl_session_get (parent, new_s->session_index); - old_s->shared_index = ss->ss_index; - } - else - { - ss = vcl_shared_session_get (new_s->shared_index); - vec_add1 (ss->workers, wrk->wrk_index); - } + am->my_client_index = ~0; + am->my_registration = 0; + am->vl_input_queue = 0; + am->msg_index_by_name_and_crc = 0; + scm->socket_fd = 0; + + vl_client_api_unmap (); } int -vcl_worker_unshare_session (vcl_worker_t * wrk, vcl_session_t * s) +vcl_session_read_ready (vcl_session_t * session) { - vcl_shared_session_t *ss; - int i; - - ss = vcl_shared_session_get (s->shared_index); - for (i = 0; i < vec_len (ss->workers); i++) + /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ + if (PREDICT_FALSE (session->is_vep)) { - if (ss->workers[i] == wrk->wrk_index) - { - vec_del1 (ss->workers, i); - break; - } + VDBG (0, "ERROR: session %u: cannot read from an epoll session!", + session->session_index); + return VPPCOM_EBADFD; } - if (vec_len (ss->workers) == 0) + if (PREDICT_FALSE (!(session->session_state & (STATE_OPEN | STATE_LISTEN)))) { - vcl_shared_session_free (ss); - return 1; - } + vcl_session_state_t state = session->session_state; + int rv; - /* If the first removed and not last, start session worker change. - * First request goes to vpp and vpp reflects it back to the right - * worker */ - if (i == 0) - vcl_send_session_worker_update (wrk, s, ss->workers[0]); - - return 0; -} - -void -vcl_worker_share_sessions (vcl_worker_t * parent_wrk) -{ - vcl_session_t *new_s; - vcl_worker_t *wrk; + rv = ((state & STATE_DISCONNECT) ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN); - if (!parent_wrk->sessions) - return; + VDBG (1, "session %u [0x%llx]: not open! state 0x%x (%s), ret %d (%s)", + session->session_index, session->vpp_handle, state, + vppcom_session_state_str (state), rv, vppcom_retval_str (rv)); + return rv; + } - wrk = vcl_worker_get_current (); - wrk->sessions = pool_dup (parent_wrk->sessions); - wrk->session_index_by_vpp_handles = - hash_dup (parent_wrk->session_index_by_vpp_handles); + if (session->session_state & STATE_LISTEN) + return clib_fifo_elts (session->accept_evts_fifo); - /* *INDENT-OFF* */ - pool_foreach (new_s, wrk->sessions, ({ - vcl_worker_share_session (parent_wrk, wrk, new_s); - })); - /* *INDENT-ON* */ + return svm_fifo_max_dequeue (session->rx_fifo); } int -vcl_session_get_refcnt (vcl_session_t * s) -{ - vcl_shared_session_t *ss; - ss = vcl_shared_session_get (s->shared_index); - if (ss) - return vec_len (ss->workers); - return 0; -} - -void -vcl_segment_table_add (u64 segment_handle, u32 svm_segment_index) +vcl_session_write_ready (vcl_session_t * session) { - clib_rwlock_writer_lock (&vcm->segment_table_lock); - hash_set (vcm->segment_table, segment_handle, svm_segment_index); - clib_rwlock_writer_unlock (&vcm->segment_table_lock); -} - -u32 -vcl_segment_table_lookup (u64 segment_handle) -{ - uword *seg_indexp; + /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ + if (PREDICT_FALSE (session->is_vep)) + { + VDBG (0, "session %u [0x%llx]: cannot write to an epoll session!", + session->session_index, session->vpp_handle); + return VPPCOM_EBADFD; + } - clib_rwlock_reader_lock (&vcm->segment_table_lock); - seg_indexp = hash_get (vcm->segment_table, segment_handle); - clib_rwlock_reader_unlock (&vcm->segment_table_lock); + if (PREDICT_FALSE (session->session_state & STATE_LISTEN)) + { + if (session->tx_fifo) + return svm_fifo_max_enqueue (session->tx_fifo); + else + return VPPCOM_EBADFD; + } - if (!seg_indexp) - return VCL_INVALID_SEGMENT_INDEX; - return ((u32) * seg_indexp); -} + if (PREDICT_FALSE (!(session->session_state & STATE_OPEN))) + { + vcl_session_state_t state = session->session_state; + int rv; + + rv = ((state & STATE_DISCONNECT) ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN); + VDBG (0, "session %u [0x%llx]: not open! state 0x%x (%s), ret %d (%s)", + session->session_index, session->vpp_handle, state, + vppcom_session_state_str (state), rv, vppcom_retval_str (rv)); + return rv; + } -void -vcl_segment_table_del (u64 segment_handle) -{ - clib_rwlock_writer_lock (&vcm->segment_table_lock); - hash_unset (vcm->segment_table, segment_handle); - clib_rwlock_writer_unlock (&vcm->segment_table_lock); + return svm_fifo_max_enqueue (session->tx_fifo); } /* diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h index dd1d0cea440c..f77d4479e41b 100644 --- a/src/vcl/vcl_private.h +++ b/src/vcl/vcl_private.h @@ -63,15 +63,16 @@ typedef enum typedef enum { - STATE_START = 0x01, - STATE_CONNECT = 0x02, - STATE_LISTEN = 0x04, - STATE_ACCEPT = 0x08, - STATE_VPP_CLOSING = 0x10, - STATE_DISCONNECT = 0x20, - STATE_FAILED = 0x40, - STATE_UPDATED = 0x80, -} session_state_t; + STATE_START = 0, + STATE_CONNECT = 0x01, + STATE_LISTEN = 0x02, + STATE_ACCEPT = 0x04, + STATE_VPP_CLOSING = 0x08, + STATE_DISCONNECT = 0x10, + STATE_FAILED = 0x20, + STATE_UPDATED = 0x40, + STATE_LISTEN_NO_MQ = 0x80, +} vcl_session_state_t; #define SERVER_STATE_OPEN (STATE_ACCEPT|STATE_VPP_CLOSING) #define CLIENT_STATE_OPEN (STATE_CONNECT|STATE_VPP_CLOSING) @@ -114,7 +115,7 @@ typedef struct vcl_session_msg u32 flags; } vcl_session_msg_t; -enum +typedef enum { VCL_SESS_ATTR_SERVER, VCL_SESS_ATTR_CUT_THRU, @@ -181,7 +182,6 @@ typedef struct svm_msg_q_t *our_evt_q; u64 options[16]; vcl_session_msg_t *accept_evts_fifo; - u32 shared_index; #if VCL_ELOG elog_track_t elog_track; #endif @@ -371,7 +371,6 @@ vcl_session_alloc (vcl_worker_t * wrk) pool_get (wrk->sessions, s); memset (s, 0, sizeof (*s)); s->session_index = s - wrk->sessions; - s->shared_index = ~0; return s; } @@ -389,11 +388,17 @@ vcl_session_get (vcl_worker_t * wrk, u32 session_index) return pool_elt_at_index (wrk->sessions, session_index); } -static inline int +static inline vcl_session_handle_t +vcl_session_handle_from_index (u32 session_index) +{ + ASSERT (session_index < 2 << 24); + return (vcl_get_worker_index () << 24 | session_index); +} + +static inline vcl_session_handle_t vcl_session_handle (vcl_session_t * s) { - ASSERT (s->session_index < 2 << 24); - return (vcl_get_worker_index () << 24 | s->session_index); + return vcl_session_handle_from_index (s->session_index); } static inline void @@ -487,11 +492,11 @@ vcl_session_table_lookup_listener (vcl_worker_t * wrk, u64 listener_handle) } session = pool_elt_at_index (wrk->sessions, p[0]); - ASSERT (session->session_state & STATE_LISTEN); + ASSERT (session->session_state & (STATE_LISTEN | STATE_LISTEN_NO_MQ)); return session; } -const char *vppcom_session_state_str (session_state_t state); +const char *vppcom_session_state_str (vcl_session_state_t state); static inline u8 vcl_session_is_ct (vcl_session_t * s) @@ -499,6 +504,34 @@ vcl_session_is_ct (vcl_session_t * s) return (s->our_evt_q != 0); } +static inline u8 +vcl_session_is_open (vcl_session_t * s) +{ + return ((s->session_state & STATE_OPEN) + || (s->session_state == STATE_LISTEN + && s->session_type == VPPCOM_PROTO_UDP)); +} + +static inline u8 +vcl_session_is_closing (vcl_session_t * s) +{ + return (s->session_state == STATE_VPP_CLOSING + || s->session_state == STATE_DISCONNECT); +} + +static inline int +vcl_session_closing_error (vcl_session_t * s) +{ + return s->session_state == STATE_DISCONNECT ? VPPCOM_ECONNRESET : 0; +} + +static inline int +vcl_session_closed_error (vcl_session_t * s) +{ + return s->session_state == STATE_DISCONNECT + ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN; +} + /* * Helpers */ @@ -529,12 +562,19 @@ int vcl_worker_set_bapi (void); void vcl_worker_share_sessions (vcl_worker_t * parent_wrk); int vcl_worker_unshare_session (vcl_worker_t * wrk, vcl_session_t * s); vcl_shared_session_t *vcl_shared_session_get (u32 ss_index); -int vcl_session_get_refcnt (vcl_session_t * s); + +void vcl_flush_mq_events (void); +void vcl_cleanup_bapi (void); +int vcl_session_cleanup (vcl_worker_t * wrk, vcl_session_t * session, + vcl_session_handle_t sh, u8 do_disconnect); void vcl_segment_table_add (u64 segment_handle, u32 svm_segment_index); u32 vcl_segment_table_lookup (u64 segment_handle); void vcl_segment_table_del (u64 segment_handle); +int vcl_session_read_ready (vcl_session_t * session); +int vcl_session_write_ready (vcl_session_t * session); + static inline vcl_worker_t * vcl_worker_get (u32 wrk_index) { @@ -555,6 +595,12 @@ vcl_worker_get_current (void) return vcl_worker_get (vcl_get_worker_index ()); } +static inline u8 +vcl_n_workers (void) +{ + return pool_elts (vcm->workers); +} + static inline svm_msg_q_t * vcl_session_vpp_evt_q (vcl_worker_t * wrk, vcl_session_t * s) { @@ -577,7 +623,7 @@ void vppcom_app_send_detach (void); void vppcom_send_connect_sock (vcl_session_t * session); void vppcom_send_disconnect_session (u64 vpp_handle); void vppcom_send_bind_sock (vcl_session_t * session); -void vppcom_send_unbind_sock (u64 vpp_handle); +void vppcom_send_unbind_sock (vcl_worker_t * wrk, u64 vpp_handle); void vppcom_api_hookup (void); void vppcom_send_application_tls_cert_add (vcl_session_t * session, char *cert, u32 cert_len); diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 1797d93c6831..eecb2f48ce20 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -30,7 +30,7 @@ vcl_wait_for_segment (u64 segment_handle) f64 timeout; if (segment_handle == VCL_INVALID_SEGMENT_HANDLE) - return 1; + return 0; timeout = clib_time_now (&wrk->clib_time) + wait_for_seconds; while (clib_time_now (&wrk->clib_time) < timeout) @@ -60,7 +60,7 @@ vcl_mq_dequeue_batch (vcl_worker_t * wrk, svm_msg_q_t * mq) } const char * -vppcom_session_state_str (session_state_t state) +vppcom_session_state_str (vcl_session_state_t state) { char *st; @@ -94,6 +94,14 @@ vppcom_session_state_str (session_state_t state) st = "STATE_FAILED"; break; + case STATE_UPDATED: + st = "STATE_UPDATED"; + break; + + case STATE_LISTEN_NO_MQ: + st = "STATE_LISTEN_NO_MQ"; + break; + default: st = "UNKNOWN_STATE"; break; @@ -505,6 +513,10 @@ vcl_session_bound_handler (vcl_worker_t * wrk, session_bound_msg_t * mp) vcl_session_table_add_listener (wrk, mp->handle, sid); session->session_state = STATE_LISTEN; + session->vpp_evt_q = uword_to_pointer (mp->vpp_evt_q, svm_msg_q_t *); + vec_validate (wrk->vpp_event_queues, 0); + wrk->vpp_event_queues[0] = session->vpp_evt_q; + if (session->is_dgram) { svm_fifo_t *rx_fifo, *tx_fifo; @@ -607,22 +619,18 @@ vcl_session_worker_update_reply_handler (vcl_worker_t * wrk, void *data) s->session_index); return; } - s->rx_fifo = uword_to_pointer (msg->rx_fifo, svm_fifo_t *); - s->tx_fifo = uword_to_pointer (msg->tx_fifo, svm_fifo_t *); - s->rx_fifo->client_session_index = s->session_index; - s->tx_fifo->client_session_index = s->session_index; - s->rx_fifo->client_thread_index = wrk->wrk_index; - s->tx_fifo->client_thread_index = wrk->wrk_index; - s->session_state = STATE_UPDATED; - - if (s->shared_index != VCL_INVALID_SESSION_INDEX) + if (s->rx_fifo) { - vcl_shared_session_t *ss; - ss = vcl_shared_session_get (s->shared_index); - if (vec_len (ss->workers) > 1) - VDBG (0, "workers need to be updated"); + s->rx_fifo = uword_to_pointer (msg->rx_fifo, svm_fifo_t *); + s->tx_fifo = uword_to_pointer (msg->tx_fifo, svm_fifo_t *); + s->rx_fifo->client_session_index = s->session_index; + s->tx_fifo->client_session_index = s->session_index; + s->rx_fifo->client_thread_index = wrk->wrk_index; + s->tx_fifo->client_thread_index = wrk->wrk_index; } + s->session_state = STATE_UPDATED; + VDBG (0, "session %u[0x%llx] moved to worker %u", s->session_index, s->vpp_handle, wrk->wrk_index); } @@ -653,7 +661,6 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e) session = vcl_session_disconnected_handler (wrk, disconnected_msg); if (!session) break; - session->session_state = STATE_DISCONNECT; VDBG (0, "disconnected session %u [0x%llx]", session->session_index, session->vpp_handle); break; @@ -677,7 +684,7 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e) static int vppcom_wait_for_session_state_change (u32 session_index, - session_state_t state, + vcl_session_state_t state, f64 wait_for_time) { vcl_worker_t *wrk = vcl_worker_get_current (); @@ -723,7 +730,7 @@ vppcom_wait_for_session_state_change (u32 session_index, static void vcl_handle_pending_wrk_updates (vcl_worker_t * wrk) { - session_state_t state; + vcl_session_state_t state; vcl_session_t *s; u32 *sip; @@ -741,7 +748,7 @@ vcl_handle_pending_wrk_updates (vcl_worker_t * wrk) vec_reset_length (wrk->pending_session_wrk_updates); } -static void +void vcl_flush_mq_events (void) { vcl_worker_t *wrk = vcl_worker_get_current (); @@ -818,11 +825,11 @@ vppcom_session_unbind (u32 session_handle) session->vpp_handle = ~0; session->session_state = STATE_DISCONNECT; - VDBG (1, "VCL<%d>: vpp handle 0x%llx, sid %u: sending unbind msg! new state" - " 0x%x (%s)", getpid (), vpp_handle, session_handle, STATE_DISCONNECT, + VDBG (1, "vpp handle 0x%llx, sid %u: sending unbind msg! new state" + " 0x%x (%s)", vpp_handle, session_handle, STATE_DISCONNECT, vppcom_session_state_str (STATE_DISCONNECT)); vcl_evt (VCL_EVT_UNBIND, session); - vppcom_send_unbind_sock (vpp_handle); + vppcom_send_unbind_sock (wrk, vpp_handle); return VPPCOM_OK; } @@ -833,7 +840,7 @@ vppcom_session_disconnect (u32 session_handle) vcl_worker_t *wrk = vcl_worker_get_current (); svm_msg_q_t *vpp_evt_q; vcl_session_t *session; - session_state_t state; + vcl_session_state_t state; u64 vpp_handle; session = vcl_session_get_w_handle (wrk, session_handle); @@ -872,164 +879,6 @@ vppcom_session_disconnect (u32 session_handle) return VPPCOM_OK; } -static void -vcl_cleanup_bapi (void) -{ - socket_client_main_t *scm = &socket_client_main; - api_main_t *am = &api_main; - - am->my_client_index = ~0; - am->my_registration = 0; - am->vl_input_queue = 0; - am->msg_index_by_name_and_crc = 0; - scm->socket_fd = 0; - - vl_client_api_unmap (); -} - -static void -vcl_cleanup_forked_child (vcl_worker_t * wrk, vcl_worker_t * child_wrk) -{ - vcl_worker_t *sub_child; - int tries = 0; - - if (child_wrk->forked_child != ~0) - { - sub_child = vcl_worker_get_if_valid (child_wrk->forked_child); - if (sub_child) - { - /* Wait a bit, maybe the process is going away */ - while (kill (sub_child->current_pid, 0) >= 0 && tries++ < 50) - usleep (1e3); - if (kill (sub_child->current_pid, 0) < 0) - vcl_cleanup_forked_child (child_wrk, sub_child); - } - } - vcl_worker_cleanup (child_wrk, 1 /* notify vpp */ ); - VDBG (0, "Cleaned up wrk %u", child_wrk->wrk_index); - wrk->forked_child = ~0; -} - -static struct sigaction old_sa; - -static void -vcl_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc) -{ - vcl_worker_t *wrk, *child_wrk; - - if (vcl_get_worker_index () == ~0) - return; - - if (sigaction (SIGCHLD, &old_sa, 0)) - { - VERR ("couldn't restore sigchld"); - exit (-1); - } - - wrk = vcl_worker_get_current (); - if (wrk->forked_child == ~0) - return; - - child_wrk = vcl_worker_get_if_valid (wrk->forked_child); - if (!child_wrk) - goto done; - - if (si && si->si_pid != child_wrk->current_pid) - { - VDBG (0, "unexpected child pid %u", si->si_pid); - goto done; - } - vcl_cleanup_forked_child (wrk, child_wrk); - -done: - if (old_sa.sa_flags & SA_SIGINFO) - { - void (*fn) (int, siginfo_t *, void *) = old_sa.sa_sigaction; - fn (signum, si, uc); - } - else - { - void (*fn) (int) = old_sa.sa_handler; - if (fn) - fn (signum); - } -} - -static void -vcl_incercept_sigchld () -{ - struct sigaction sa; - clib_memset (&sa, 0, sizeof (sa)); - sa.sa_sigaction = vcl_intercept_sigchld_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction (SIGCHLD, &sa, &old_sa)) - { - VERR ("couldn't intercept sigchld"); - exit (-1); - } -} - -static void -vcl_app_pre_fork (void) -{ - vcl_incercept_sigchld (); - vcl_flush_mq_events (); -} - -static void -vcl_app_fork_child_handler (void) -{ - vcl_worker_t *parent_wrk, *wrk; - int rv, parent_wrk_index; - u8 *child_name; - - parent_wrk_index = vcl_get_worker_index (); - VDBG (0, "initializing forked child with parent wrk %u", parent_wrk_index); - - /* - * Allocate worker - */ - vcl_set_worker_index (~0); - if (!vcl_worker_alloc_and_init ()) - VERR ("couldn't allocate new worker"); - - /* - * Attach to binary api - */ - child_name = format (0, "%v-child-%u%c", vcm->app_name, getpid (), 0); - vcl_cleanup_bapi (); - vppcom_api_hookup (); - vcm->app_state = STATE_APP_START; - rv = vppcom_connect_to_vpp ((char *) child_name); - vec_free (child_name); - if (rv) - { - VERR ("couldn't connect to VPP!"); - return; - } - - /* - * Register worker with vpp and share sessions - */ - vcl_worker_register_with_vpp (); - parent_wrk = vcl_worker_get (parent_wrk_index); - wrk = vcl_worker_get_current (); - wrk->vpp_event_queues = vec_dup (parent_wrk->vpp_event_queues); - vcl_worker_share_sessions (parent_wrk); - parent_wrk->forked_child = vcl_get_worker_index (); - - VDBG (0, "forked child main worker initialized"); - vcm->forking = 0; -} - -static void -vcl_app_fork_parent_handler (void) -{ - vcm->forking = 1; - while (vcm->forking) - ; -} - /** * Handle app exit * @@ -1079,8 +928,6 @@ vppcom_app_create (char *app_name) pool_alloc (vcm->workers, vcl_cfg->max_workers); clib_spinlock_init (&vcm->workers_lock); clib_rwlock_init (&vcm->segment_table_lock); - pthread_atfork (vcl_app_pre_fork, vcl_app_fork_parent_handler, - vcl_app_fork_child_handler); atexit (vppcom_app_exit); /* Allocate default worker */ @@ -1177,22 +1024,14 @@ vppcom_session_create (u8 proto, u8 is_nonblocking) } int -vppcom_session_close (uint32_t session_handle) +vcl_session_cleanup (vcl_worker_t * wrk, vcl_session_t * session, + vcl_session_handle_t sh, u8 do_disconnect) { - vcl_worker_t *wrk = vcl_worker_get_current (); - u8 is_vep, do_disconnect = 1; - vcl_session_t *session = 0; - session_state_t state; + vcl_session_state_t state; u32 next_sh, vep_sh; int rv = VPPCOM_OK; u64 vpp_handle; - - session = vcl_session_get_w_handle (wrk, session_handle); - if (!session) - return VPPCOM_EBADFD; - - if (session->shared_index != ~0) - do_disconnect = vcl_worker_unshare_session (wrk, session); + u8 is_vep; is_vep = session->is_vep; next_sh = session->vep.next_sh; @@ -1200,14 +1039,13 @@ vppcom_session_close (uint32_t session_handle) state = session->session_state; vpp_handle = session->vpp_handle; - VDBG (1, "closing session handle %u vpp handle %u", session_handle, - vpp_handle); + VDBG (1, "session %u [0x%llx] closing", session->session_index, vpp_handle); if (is_vep) { while (next_sh != ~0) { - rv = vppcom_epoll_ctl (session_handle, EPOLL_CTL_DEL, next_sh, 0); + rv = vppcom_epoll_ctl (sh, EPOLL_CTL_DEL, next_sh, 0); if (PREDICT_FALSE (rv < 0)) VDBG (0, "vpp handle 0x%llx, sid %u: EPOLL_CTL_DEL vep_idx %u" " failed! rv %d (%s)", vpp_handle, next_sh, vep_sh, rv, @@ -1220,36 +1058,35 @@ vppcom_session_close (uint32_t session_handle) { if (session->is_vep_session) { - rv = vppcom_epoll_ctl (vep_sh, EPOLL_CTL_DEL, session_handle, 0); + rv = vppcom_epoll_ctl (vep_sh, EPOLL_CTL_DEL, sh, 0); if (rv < 0) - VDBG (0, "vpp handle 0x%llx, sid %u: EPOLL_CTL_DEL vep_idx %u " - "failed! rv %d (%s)", vpp_handle, session_handle, vep_sh, - rv, vppcom_retval_str (rv)); + VDBG (0, "session %u [0x%llx]: EPOLL_CTL_DEL vep_idx %u " + "failed! rv %d (%s)", session->session_index, vpp_handle, + vep_sh, rv, vppcom_retval_str (rv)); } if (!do_disconnect) { - VDBG (0, "session handle %u [0x%llx] disconnect skipped", - session_handle, vpp_handle); + VDBG (1, "session %u [0x%llx] disconnect skipped", + session->session_index, vpp_handle); goto cleanup; } if (state & STATE_LISTEN) { - rv = vppcom_session_unbind (session_handle); + rv = vppcom_session_unbind (sh); if (PREDICT_FALSE (rv < 0)) - VDBG (0, "vpp handle 0x%llx, sid %u: listener unbind failed! " - "rv %d (%s)", vpp_handle, session_handle, rv, + VDBG (0, "session %u [0x%llx]: listener unbind failed! " + "rv %d (%s)", session->session_index, vpp_handle, rv, vppcom_retval_str (rv)); } else if (state & STATE_OPEN) { - rv = vppcom_session_disconnect (session_handle); + rv = vppcom_session_disconnect (sh); if (PREDICT_FALSE (rv < 0)) - clib_warning ("VCL<%d>: ERROR: vpp handle 0x%llx, sid %u: " - "session disconnect failed! rv %d (%s)", - getpid (), vpp_handle, session_handle, - rv, vppcom_retval_str (rv)); + VDBG (0, "ERROR: session %u [0x%llx]: disconnect failed!" + " rv %d (%s)", session->session_index, vpp_handle, + rv, vppcom_retval_str (rv)); } else if (state == STATE_DISCONNECT) { @@ -1259,8 +1096,6 @@ vppcom_session_close (uint32_t session_handle) } } -cleanup: - if (vcl_session_is_ct (session)) { vcl_cut_through_registration_t *ctr; @@ -1278,16 +1113,29 @@ vppcom_session_close (uint32_t session_handle) vcl_ct_registration_unlock (wrk); } + VDBG (0, "session %u [0x%llx] removed", session->session_index, vpp_handle); + +cleanup: vcl_session_table_del_vpp_handle (wrk, vpp_handle); vcl_session_free (wrk, session); - - VDBG (0, "session handle %u [0x%llx] removed", session_handle, vpp_handle); - vcl_evt (VCL_EVT_CLOSE, session, rv); return rv; } +int +vppcom_session_close (uint32_t session_handle) +{ + vcl_worker_t *wrk = vcl_worker_get_current (); + vcl_session_t *session; + + session = vcl_session_get_w_handle (wrk, session_handle); + if (!session) + return VPPCOM_EBADFD; + return vcl_session_cleanup (wrk, session, session_handle, + 1 /* do_disconnect */ ); +} + int vppcom_session_bind (uint32_t session_handle, vppcom_endpt_t * ep) { @@ -1665,20 +1513,12 @@ vcl_is_rx_evt_for_session (session_event_t * e, u32 sid, u8 is_ct) return (e->event_type == SESSION_IO_EVT_CT_TX); } -static inline u8 -vcl_session_is_readable (vcl_session_t * s) -{ - return ((s->session_state & STATE_OPEN) - || (s->session_state == STATE_LISTEN - && s->session_type == VPPCOM_PROTO_UDP)); -} - static inline int vppcom_session_read_internal (uint32_t session_handle, void *buf, int n, u8 peek) { vcl_worker_t *wrk = vcl_worker_get_current (); - int n_read = 0, rv, is_nonblocking; + int n_read = 0, is_nonblocking; vcl_session_t *s = 0; svm_fifo_t *rx_fifo; svm_msg_q_msg_t msg; @@ -1693,15 +1533,12 @@ vppcom_session_read_internal (uint32_t session_handle, void *buf, int n, if (PREDICT_FALSE (!s || s->is_vep)) return VPPCOM_EBADFD; - if (PREDICT_FALSE (!vcl_session_is_readable (s))) + if (PREDICT_FALSE (!vcl_session_is_open (s))) { - session_state_t state = s->session_state; - rv = ((state & STATE_DISCONNECT) ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN); - - VDBG (0, "session handle %u[0x%llx] is not open! state 0x%x (%s)," - " returning %d (%s)", session_handle, s->vpp_handle, state, - vppcom_session_state_str (state), rv, vppcom_retval_str (rv)); - return rv; + VDBG (0, "session handle %u[0x%llx] is not open! state 0x%x (%s)", + s->session_index, s->vpp_handle, s->session_state, + vppcom_session_state_str (s->session_state)); + return vcl_session_closed_error (s); } is_nonblocking = VCL_SESS_ATTR_TEST (s->attr, VCL_SESS_ATTR_NONBLOCK); @@ -1719,6 +1556,9 @@ vppcom_session_read_internal (uint32_t session_handle, void *buf, int n, } while (svm_fifo_is_empty (rx_fifo)) { + if (vcl_session_is_closing (s)) + return vcl_session_closing_error (s); + svm_fifo_unset_event (rx_fifo); svm_msg_q_lock (mq); if (svm_msg_q_is_empty (mq)) @@ -1730,9 +1570,6 @@ vppcom_session_read_internal (uint32_t session_handle, void *buf, int n, if (!vcl_is_rx_evt_for_session (e, s->session_index, is_ct)) vcl_handle_mq_event (wrk, e); svm_msg_q_free_msg (mq, &msg); - - if (PREDICT_FALSE (s->session_state == STATE_DISCONNECT)) - return VPPCOM_ECONNRESET; } } @@ -1774,7 +1611,7 @@ vppcom_session_read_segments (uint32_t session_handle, vppcom_data_segments_t ds) { vcl_worker_t *wrk = vcl_worker_get_current (); - int n_read = 0, rv, is_nonblocking; + int n_read = 0, is_nonblocking; vcl_session_t *s = 0; svm_fifo_t *rx_fifo; svm_msg_q_msg_t msg; @@ -1786,12 +1623,8 @@ vppcom_session_read_segments (uint32_t session_handle, if (PREDICT_FALSE (!s || s->is_vep)) return VPPCOM_EBADFD; - if (PREDICT_FALSE (!vcl_session_is_readable (s))) - { - session_state_t state = s->session_state; - rv = ((state & STATE_DISCONNECT) ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN); - return rv; - } + if (PREDICT_FALSE (!vcl_session_is_open (s))) + return vcl_session_closed_error (s); is_nonblocking = VCL_SESS_ATTR_TEST (s->attr, VCL_SESS_ATTR_NONBLOCK); is_ct = vcl_session_is_ct (s); @@ -1808,6 +1641,9 @@ vppcom_session_read_segments (uint32_t session_handle, } while (svm_fifo_is_empty (rx_fifo)) { + if (vcl_session_is_closing (s)) + return vcl_session_closing_error (s); + svm_fifo_unset_event (rx_fifo); svm_msg_q_lock (mq); if (svm_msg_q_is_empty (mq)) @@ -1819,9 +1655,6 @@ vppcom_session_read_segments (uint32_t session_handle, if (!vcl_is_rx_evt_for_session (e, s->session_index, is_ct)) vcl_handle_mq_event (wrk, e); svm_msg_q_free_msg (mq, &msg); - - if (PREDICT_FALSE (s->session_state == STATE_DISCONNECT)) - return VPPCOM_ECONNRESET; } } @@ -1853,37 +1686,6 @@ vppcom_session_free_segments (uint32_t session_handle, svm_fifo_segments_free (s->rx_fifo, (svm_fifo_segment_t *) ds); } -static inline int -vppcom_session_read_ready (vcl_session_t * session) -{ - /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ - if (PREDICT_FALSE (session->is_vep)) - { - clib_warning ("VCL<%d>: ERROR: sid %u: cannot read from an " - "epoll session!", getpid (), session->session_index); - return VPPCOM_EBADFD; - } - - if (PREDICT_FALSE (!(session->session_state & (STATE_OPEN | STATE_LISTEN)))) - { - session_state_t state = session->session_state; - int rv; - - rv = ((state & STATE_DISCONNECT) ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN); - - VDBG (1, "VCL<%d>: vpp handle 0x%llx, sid %u: session is not open!" - " state 0x%x (%s), returning %d (%s)", getpid (), - session->vpp_handle, session->session_index, state, - vppcom_session_state_str (state), rv, vppcom_retval_str (rv)); - return rv; - } - - if (session->session_state & STATE_LISTEN) - return clib_fifo_elts (session->accept_evts_fifo); - - return svm_fifo_max_dequeue (session->rx_fifo); -} - int vppcom_data_segment_copy (void *buf, vppcom_data_segments_t ds, u32 max_bytes) { @@ -1912,7 +1714,7 @@ vppcom_session_write_inline (uint32_t session_handle, void *buf, size_t n, u8 is_flush) { vcl_worker_t *wrk = vcl_worker_get_current (); - int rv, n_write, is_nonblocking; + int n_write, is_nonblocking; vcl_session_t *s = 0; svm_fifo_t *tx_fifo = 0; session_evt_type_t et; @@ -1930,21 +1732,17 @@ vppcom_session_write_inline (uint32_t session_handle, void *buf, size_t n, if (PREDICT_FALSE (s->is_vep)) { - clib_warning ("VCL<%d>: ERROR: vpp handle 0x%llx, sid %u: " - "cannot write to an epoll session!", - getpid (), s->vpp_handle, session_handle); - + VDBG (0, "ERROR: session %u [0x%llx]: cannot write to an epoll" + " session!", s->session_index, s->vpp_handle); return VPPCOM_EBADFD; } - if (PREDICT_FALSE (!(s->session_state & STATE_OPEN))) + if (PREDICT_FALSE (!vcl_session_is_open (s))) { - session_state_t state = s->session_state; - rv = ((state & STATE_DISCONNECT) ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN); - VDBG (1, "VCL<%d>: vpp handle 0x%llx, sid %u: session is not open! " - "state 0x%x (%s)", getpid (), s->vpp_handle, session_handle, - state, vppcom_session_state_str (state)); - return rv; + VDBG (1, "session %u [0x%llx]: is not open! state 0x%x (%s)", + s->session_index, s->vpp_handle, s->session_state, + vppcom_session_state_str (s->session_state)); + return vcl_session_closed_error (s);; } tx_fifo = s->tx_fifo; @@ -1960,6 +1758,8 @@ vppcom_session_write_inline (uint32_t session_handle, void *buf, size_t n, while (svm_fifo_is_full (tx_fifo)) { svm_fifo_add_want_tx_ntf (tx_fifo, SVM_FIFO_WANT_TX_NOTIF); + if (vcl_session_is_closing (s)) + return vcl_session_closing_error (s); svm_msg_q_lock (mq); if (svm_msg_q_is_empty (mq)) svm_msg_q_wait (mq); @@ -1971,9 +1771,6 @@ vppcom_session_write_inline (uint32_t session_handle, void *buf, size_t n, if (!vcl_is_tx_evt_for_session (e, s->session_index, is_ct)) vcl_handle_mq_event (wrk, e); svm_msg_q_free_msg (mq, &msg); - - if (PREDICT_FALSE (!(s->session_state & STATE_OPEN))) - return VPPCOM_ECONNRESET; } } @@ -1991,8 +1788,8 @@ vppcom_session_write_inline (uint32_t session_handle, void *buf, size_t n, ASSERT (n_write > 0); - VDBG (2, "VCL<%d>: vpp handle 0x%llx, sid %u: wrote %d bytes", getpid (), - s->vpp_handle, session_handle, n_write); + VDBG (2, "session %u [0x%llx]: wrote %d bytes", s->session_index, + s->vpp_handle, n_write); return n_write; } @@ -2037,41 +1834,6 @@ vcl_ct_session_get_from_fifo (vcl_worker_t * wrk, svm_fifo_t * f, u8 type) return 0; } -static inline int -vppcom_session_write_ready (vcl_session_t * session) -{ - /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ - if (PREDICT_FALSE (session->is_vep)) - { - VDBG (0, "session %u [0x%llx]: cannot write to an epoll session!", - session->session_index, session->vpp_handle); - return VPPCOM_EBADFD; - } - - if (PREDICT_FALSE (session->session_state & STATE_LISTEN)) - { - if (session->tx_fifo) - return svm_fifo_max_enqueue (session->tx_fifo); - else - return VPPCOM_EBADFD; - } - - if (PREDICT_FALSE (!(session->session_state & STATE_OPEN))) - { - session_state_t state = session->session_state; - int rv; - - rv = ((state & STATE_DISCONNECT) ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN); - VDBG (0, "session %u [0x%llx]: session is not open! state 0x%x (%s), " - "returning %d (%s)", session->session_index, session->vpp_handle, - state, vppcom_session_state_str (state), rv, - vppcom_retval_str (rv)); - return rv; - } - - return svm_fifo_max_enqueue (session->tx_fifo); -} - #define vcl_fifo_rx_evt_valid_or_break(_fifo) \ if (PREDICT_FALSE (svm_fifo_is_empty (_fifo))) \ { \ @@ -2366,7 +2128,7 @@ vppcom_select (int n_bits, vcl_si_set * read_map, vcl_si_set * write_map, continue; } - rv = vppcom_session_read_ready (session); + rv = vcl_session_read_ready (session); if (rv) { clib_bitmap_set_no_check ((uword*)read_map, sid, 1); @@ -2767,31 +2529,34 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e, if (!(session = vcl_session_get (wrk, sid))) break; session_events = session->vep.ev.events; - if (EPOLLOUT & session_events) - { - add_event = 1; - events[*num_ev].events |= EPOLLOUT; - session_evt_data = session->vep.ev.data.u64; - } + if (!(EPOLLOUT & session_events)) + break; + add_event = 1; + events[*num_ev].events |= EPOLLOUT; + session_evt_data = session->vep.ev.data.u64; break; case SESSION_CTRL_EVT_DISCONNECTED: disconnected_msg = (session_disconnected_msg_t *) e->data; session = vcl_session_disconnected_handler (wrk, disconnected_msg); if (!session) break; + session_events = session->vep.ev.events; + if (!((EPOLLHUP | EPOLLRDHUP) & session_events)) + break; add_event = 1; events[*num_ev].events |= EPOLLHUP | EPOLLRDHUP; session_evt_data = session->vep.ev.data.u64; - session_events = session->vep.ev.events; break; case SESSION_CTRL_EVT_RESET: sid = vcl_session_reset_handler (wrk, (session_reset_msg_t *) e->data); if (!(session = vcl_session_get (wrk, sid))) break; + session_events = session->vep.ev.events; + if (!((EPOLLHUP | EPOLLRDHUP) & session_events)) + break; add_event = 1; events[*num_ev].events |= EPOLLHUP | EPOLLRDHUP; session_evt_data = session->vep.ev.data.u64; - session_events = session->vep.ev.events; break; case SESSION_CTRL_EVT_REQ_WORKER_UPDATE: vcl_session_req_worker_update_handler (wrk, e->data); @@ -2876,7 +2641,7 @@ vppcom_epoll_wait_condvar (vcl_worker_t * wrk, struct epoll_event *events, double total_wait = 0, wait_slice; int rv; - wait_for_time = (wait_for_time == -1) ? (double) 10e9 : wait_for_time; + wait_for_time = (wait_for_time == -1) ? (double) 1e6 : wait_for_time; wait_slice = wrk->cut_through_registrations ? 10e-6 : wait_for_time; do @@ -2996,12 +2761,12 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op, switch (op) { case VPPCOM_ATTR_GET_NREAD: - rv = vppcom_session_read_ready (session); + rv = vcl_session_read_ready (session); VDBG (2, "VPPCOM_ATTR_GET_NREAD: sid %u, nread = %d", rv); break; case VPPCOM_ATTR_GET_NWRITE: - rv = vppcom_session_write_ready (session); + rv = vcl_session_write_ready (session); VDBG (2, "VCL<%d>: VPPCOM_ATTR_GET_NWRITE: sid %u, nwrite = %d", getpid (), session_handle, rv); break; @@ -3502,10 +3267,6 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op, rv = VPPCOM_EINVAL; break; - case VPPCOM_ATTR_GET_REFCNT: - rv = vcl_session_get_refcnt (session); - break; - case VPPCOM_ATTR_SET_SHUT: if (*flags == SHUT_RD || *flags == SHUT_RDWR) VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_SHUT_RD); @@ -3644,7 +3405,7 @@ vppcom_poll (vcl_poll_t * vp, uint32_t n_sids, double wait_for_time) if (POLLIN & vp[i].events) { - rv = vppcom_session_read_ready (session); + rv = vcl_session_read_ready (session); if (rv > 0) { vp[i].revents |= POLLIN; @@ -3668,7 +3429,7 @@ vppcom_poll (vcl_poll_t * vp, uint32_t n_sids, double wait_for_time) if (POLLOUT & vp[i].events) { - rv = vppcom_session_write_ready (session); + rv = vcl_session_write_ready (session); if (rv > 0) { vp[i].revents |= POLLOUT; @@ -3733,12 +3494,6 @@ vppcom_session_worker (vcl_session_handle_t session_handle) return session_handle >> 24; } -int -vppcom_session_handle (uint32_t session_index) -{ - return (vcl_get_worker_index () << 24) | session_index; -} - int vppcom_worker_register (void) { @@ -3760,6 +3515,15 @@ vppcom_worker_index (void) return vcl_get_worker_index (); } +int +vppcom_worker_mqs_epfd (void) +{ + vcl_worker_t *wrk = vcl_worker_get_current (); + if (!vcm->cfg.use_mq_eventfd) + return -1; + return wrk->mqs_epfd; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vcl/vppcom.h b/src/vcl/vppcom.h index d82c9f9c58c2..053abc977816 100644 --- a/src/vcl/vppcom.h +++ b/src/vcl/vppcom.h @@ -152,7 +152,6 @@ typedef enum VPPCOM_ATTR_SET_TCP_KEEPINTVL, VPPCOM_ATTR_GET_TCP_USER_MSS, VPPCOM_ATTR_SET_TCP_USER_MSS, - VPPCOM_ATTR_GET_REFCNT, VPPCOM_ATTR_SET_SHUT, VPPCOM_ATTR_GET_SHUT, } vppcom_attr_op_t; @@ -284,7 +283,6 @@ extern int vppcom_poll (vcl_poll_t * vp, uint32_t n_sids, extern int vppcom_mq_epoll_fd (void); extern int vppcom_session_index (vcl_session_handle_t session_handle); extern int vppcom_session_worker (vcl_session_handle_t session_handle); -extern int vppcom_session_handle (uint32_t session_index); extern int vppcom_session_read_segments (uint32_t session_handle, vppcom_data_segments_t ds); @@ -310,6 +308,14 @@ extern int vppcom_worker_register (void); */ extern int vppcom_worker_index (void); +/** + * Returns the current worker's message queues epoll fd + * + * This only works if vcl is configured to do eventfd based message queue + * notifications. + */ +extern int vppcom_worker_mqs_epfd (void); + /* *INDENT-OFF* */ #ifdef __cplusplus } diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 5370d5012934..c44d8e49d40c 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -43,20 +43,24 @@ * Allocate/free network buffers. */ +#include #include #include -vlib_buffer_callbacks_t *vlib_buffer_callbacks = 0; +#define VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA 16384 +#define VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA_UNPRIV 8192 -/* when running unpriviledged we are limited by RLIMIT_MEMLOCK which is - typically set to 16MB so setting default size for buffer memory to 14MB - */ -static u32 vlib_buffer_physmem_sz = 14 << 20; +#ifdef CLIB_HAVE_VEC128 +/* Assumptions by vlib_buffer_free_inline: */ +STATIC_ASSERT_FITS_IN (vlib_buffer_t, flags, 16); +STATIC_ASSERT_FITS_IN (vlib_buffer_t, ref_count, 16); +STATIC_ASSERT_FITS_IN (vlib_buffer_t, buffer_pool_index, 16); +#endif -vlib_buffer_main_t buffer_main; +/* Make sure that buffer template size is not accidentally changed */ +STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64); -/* logging */ -static vlib_log_class_t buffer_log_default; +u16 __vlib_buffer_external_hdr_size = 0; uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, @@ -87,9 +91,9 @@ format_vlib_buffer (u8 * s, va_list * args) a = format (a, "%s ", v); foreach_vlib_buffer_flag #undef _ - s = format (s, "current data %d, length %d, free-list %d, clone-count %u", - b->current_data, b->current_length, - vlib_buffer_get_free_list_index (b), b->n_add_refs); + s = format (s, "current data %d, length %d, buffer-pool %d, " + "ref-count %u", b->current_data, b->current_length, + b->buffer_pool_index, b->ref_count); if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) s = format (s, ", totlen-nifb %d", @@ -109,9 +113,9 @@ format_vlib_buffer (u8 * s, va_list * args) b = vlib_get_buffer (vm, next_buffer); s = - format (s, "\n%Unext-buffer 0x%x, segment length %d, clone-count %u", + format (s, "\n%Unext-buffer 0x%x, segment length %d, ref-count %u", format_white_space, indent, next_buffer, b->current_length, - b->n_add_refs); + b->ref_count); } return s; @@ -179,31 +183,26 @@ vlib_validate_buffer_helper (vlib_main_t * vm, u32 bi, uword follow_buffer_next, uword ** unique_hash) { + vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_t *b = vlib_get_buffer (vm, bi); - vlib_buffer_free_list_t *fl; - if (pool_is_free_index - (vm->buffer_free_list_pool, vlib_buffer_get_free_list_index (b))) - return format (0, "unknown free list 0x%x", - vlib_buffer_get_free_list_index (b)); - - fl = - pool_elt_at_index (vm->buffer_free_list_pool, - vlib_buffer_get_free_list_index (b)); + if (vec_len (bm->buffer_pools) <= b->buffer_pool_index) + return format (0, "unknown buffer pool 0x%x", b->buffer_pool_index); if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE) return format (0, "current data %d before pre-data", b->current_data); - if (b->current_data + b->current_length > fl->n_data_bytes) - return format (0, "%d-%d beyond end of buffer %d", - b->current_data, b->current_length, fl->n_data_bytes); + if (b->current_data + b->current_length > + vlib_buffer_get_default_data_size (vm)) + return format (0, "%d-%d beyond end of buffer %d", b->current_data, + b->current_length, vlib_buffer_get_default_data_size (vm)); if (follow_buffer_next && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) { vlib_buffer_known_state_t k; u8 *msg, *result; - k = vlib_buffer_is_known (b->next_buffer); + k = vlib_buffer_is_known (vm, b->next_buffer); if (k != VLIB_BUFFER_KNOWN_ALLOCATED) return format (0, "next 0x%x: %U", b->next_buffer, format_vlib_buffer_known_state, k); @@ -261,7 +260,7 @@ vlib_validate_buffers (vlib_main_t * vm, goto done; } - k = vlib_buffer_is_known (bi); + k = vlib_buffer_is_known (vm, bi); if (k != known_state) { msg = format (0, "is %U; expected %U", @@ -318,15 +317,13 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, uword n_buffers, vlib_buffer_known_state_t expected_state) { + vlib_buffer_main_t *bm = vm->buffer_main; u32 *b; uword i, bi, is_free; if (CLIB_DEBUG == 0) return; - if (vlib_buffer_callbacks) - return; - is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED; b = buffers; for (i = 0; i < n_buffers; i++) @@ -335,332 +332,25 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, bi = b[0]; b += 1; - known = vlib_buffer_is_known (bi); - if (known != expected_state) - { - ASSERT (0); - vlib_panic_with_msg - (vm, "%s %U buffer 0x%x", - is_free ? "freeing" : "allocating", - format_vlib_buffer_known_state, known, bi); - } - - vlib_buffer_set_known_state - (bi, is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED); - } -} - -/* Add buffer free list. */ -static vlib_buffer_free_list_index_t -vlib_buffer_create_free_list_helper (vlib_main_t * vm, - u32 n_data_bytes, - u32 is_public, u32 is_default, u8 * name) -{ - vlib_buffer_main_t *bm = &buffer_main; - vlib_buffer_free_list_t *f; - int i; - - ASSERT (vlib_get_thread_index () == 0); - - if (!is_default && pool_elts (vm->buffer_free_list_pool) == 0) - { - vlib_buffer_free_list_index_t default_free_free_list_index; - - /* *INDENT-OFF* */ - default_free_free_list_index = - vlib_buffer_create_free_list_helper - (vm, - /* default buffer size */ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - /* is_public */ 1, - /* is_default */ 1, - (u8 *) "default"); - /* *INDENT-ON* */ - ASSERT (default_free_free_list_index == - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public) - return default_free_free_list_index; - } - - pool_get_aligned (vm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES); - - clib_memset (f, 0, sizeof (f[0])); - f->index = f - vm->buffer_free_list_pool; - vec_validate (f->buffers, 0); - vec_reset_length (f->buffers); - f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); - f->min_n_buffers_each_alloc = VLIB_FRAME_SIZE; - f->buffer_pool_index = 0; - f->name = clib_mem_is_vec (name) ? name : format (0, "%s", name); - - /* Setup free buffer template. */ - vlib_buffer_set_free_list_index (&f->buffer_init_template, f->index); - f->buffer_init_template.n_add_refs = 0; - - if (is_public) - { - uword *p = hash_get (bm->free_list_by_size, f->n_data_bytes); - if (!p) - hash_set (bm->free_list_by_size, f->n_data_bytes, f->index); - } - - for (i = 1; i < vec_len (vlib_mains); i++) - { - vlib_main_t *wvm = vlib_mains[i]; - vlib_buffer_free_list_t *wf; - pool_get_aligned (wvm->buffer_free_list_pool, - wf, CLIB_CACHE_LINE_BYTES); - ASSERT (f - vm->buffer_free_list_pool == - wf - wvm->buffer_free_list_pool); - wf[0] = f[0]; - wf->buffers = 0; - vec_validate (wf->buffers, 0); - vec_reset_length (wf->buffers); - wf->n_alloc = 0; - } - - return f->index; -} - -vlib_buffer_free_list_index_t -vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, - char *fmt, ...) -{ - va_list va; - u8 *name; - - va_start (va, fmt); - name = va_format (0, fmt, &va); - va_end (va); - - return vlib_buffer_create_free_list_helper (vm, n_data_bytes, - /* is_public */ 0, - /* is_default */ 0, - name); -} - -static void -del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) -{ - vlib_buffer_pool_t *bp = vlib_buffer_pool_get (f->buffer_pool_index); - - vec_add_aligned (bp->buffers, f->buffers, vec_len (f->buffers), - CLIB_CACHE_LINE_BYTES); - vec_free (f->name); - vec_free (f->buffers); - - /* Poison it. */ - clib_memset (f, 0xab, sizeof (f[0])); -} - -/* Add buffer free list. */ -void -vlib_buffer_delete_free_list_internal (vlib_main_t * vm, - vlib_buffer_free_list_index_t index) -{ - vlib_buffer_free_list_t *f; - int i; - - ASSERT (vlib_get_thread_index () == 0); + known = vlib_buffer_is_known (vm, bi); - f = vlib_buffer_get_free_list (vm, index); + if (known == VLIB_BUFFER_UNKNOWN && + expected_state == VLIB_BUFFER_KNOWN_FREE) + known = VLIB_BUFFER_KNOWN_FREE; - ASSERT (vec_len (f->buffers) == f->n_alloc); - - del_free_list (vm, f); - - pool_put (vm->buffer_free_list_pool, f); - - for (i = 1; i < vec_len (vlib_mains); i++) - { - vlib_main_t *wvm = vlib_mains[i]; - f = vlib_buffer_get_free_list (vlib_mains[i], index); - del_free_list (wvm, f); - pool_put (wvm->buffer_free_list_pool, f); - } -} - -static_always_inline void * -vlib_buffer_pool_get_buffer (vlib_main_t * vm, vlib_buffer_pool_t * bp) -{ - return vlib_physmem_alloc_from_map (vm, bp->physmem_map_index, - bp->buffer_size, CLIB_CACHE_LINE_BYTES); -} - -/* Make sure free list has at least given number of free buffers. */ -static uword -vlib_buffer_fill_free_list_internal (vlib_main_t * vm, - vlib_buffer_free_list_t * fl, - uword min_free_buffers) -{ - vlib_buffer_t *b; - vlib_buffer_pool_t *bp = vlib_buffer_pool_get (fl->buffer_pool_index); - int n; - u32 *bi; - u32 n_alloc = 0; - - /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->buffers); - if (n <= 0) - return min_free_buffers; - - if (vec_len (bp->buffers) > 0) - { - int n_copy, n_left; - clib_spinlock_lock (&bp->lock); - n_copy = clib_min (vec_len (bp->buffers), n); - n_left = vec_len (bp->buffers) - n_copy; - vec_add_aligned (fl->buffers, bp->buffers + n_left, n_copy, - CLIB_CACHE_LINE_BYTES); - _vec_len (bp->buffers) = n_left; - clib_spinlock_unlock (&bp->lock); - n = min_free_buffers - vec_len (fl->buffers); - if (n <= 0) - return min_free_buffers; - } - - /* Always allocate round number of buffers. */ - n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); - - /* Always allocate new buffers in reasonably large sized chunks. */ - n = clib_max (n, fl->min_n_buffers_each_alloc); - - clib_spinlock_lock (&bp->lock); - while (n_alloc < n) - { - if ((b = vlib_buffer_pool_get_buffer (vm, bp)) == 0) - goto done; - - n_alloc += 1; - - vec_add2_aligned (fl->buffers, bi, 1, CLIB_CACHE_LINE_BYTES); - bi[0] = vlib_get_buffer_index (vm, b); - - if (CLIB_DEBUG > 0) - vlib_buffer_set_known_state (bi[0], VLIB_BUFFER_KNOWN_FREE); - - clib_memset (b, 0, sizeof (vlib_buffer_t)); - vlib_buffer_init_for_free_list (b, fl); - - if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, bi, 1); - } - -done: - clib_spinlock_unlock (&bp->lock); - fl->n_alloc += n_alloc; - return n_alloc; -} - -void * -vlib_set_buffer_free_callback (vlib_main_t * vm, void *fp) -{ - vlib_buffer_main_t *bm = &buffer_main; - void *rv = bm->buffer_free_callback; - - bm->buffer_free_callback = fp; - return rv; -} - -static_always_inline void -recycle_or_free (vlib_main_t * vm, vlib_buffer_main_t * bm, u32 bi, - vlib_buffer_t * b, u32 follow_buffer_next) -{ - vlib_buffer_free_list_t *fl; - vlib_buffer_free_list_index_t fi; - u32 flags, next; - - fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); - - do - { - vlib_buffer_t *nb = vlib_get_buffer (vm, bi); - flags = nb->flags; - next = nb->next_buffer; - if (nb->n_add_refs) - nb->n_add_refs--; - else + if (known != expected_state) { - vlib_buffer_validate_alloc_free (vm, &bi, 1, - VLIB_BUFFER_KNOWN_ALLOCATED); - vlib_buffer_add_to_free_list (vm, fl, bi, 1); + clib_panic ("%s %U buffer 0x%x", is_free ? "freeing" : "allocating", + format_vlib_buffer_known_state, known, bi); } - bi = next; - } - while (follow_buffer_next && (flags & VLIB_BUFFER_NEXT_PRESENT)); -} - -static_always_inline void -vlib_buffer_free_inline (vlib_main_t * vm, - u32 * buffers, u32 n_buffers, u32 follow_buffer_next) -{ - vlib_buffer_main_t *bm = &buffer_main; - vlib_buffer_t *p, *b0, *b1, *b2, *b3; - int i = 0; - u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, - u32 follow_buffer_next); - - cb = bm->buffer_free_callback; - - if (PREDICT_FALSE (cb != 0)) - n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); - - if (!n_buffers) - return; - - while (i + 11 < n_buffers) - { - p = vlib_get_buffer (vm, buffers[i + 8]); - vlib_prefetch_buffer_header (p, LOAD); - p = vlib_get_buffer (vm, buffers[i + 9]); - vlib_prefetch_buffer_header (p, LOAD); - p = vlib_get_buffer (vm, buffers[i + 10]); - vlib_prefetch_buffer_header (p, LOAD); - p = vlib_get_buffer (vm, buffers[i + 11]); - vlib_prefetch_buffer_header (p, LOAD); - - b0 = vlib_get_buffer (vm, buffers[i]); - b1 = vlib_get_buffer (vm, buffers[i + 1]); - b2 = vlib_get_buffer (vm, buffers[i + 2]); - b3 = vlib_get_buffer (vm, buffers[i + 3]); - - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); - - recycle_or_free (vm, bm, buffers[i], b0, follow_buffer_next); - recycle_or_free (vm, bm, buffers[i + 1], b1, follow_buffer_next); - recycle_or_free (vm, bm, buffers[i + 2], b2, follow_buffer_next); - recycle_or_free (vm, bm, buffers[i + 3], b3, follow_buffer_next); - - i += 4; - } - while (i < n_buffers) - { - b0 = vlib_get_buffer (vm, buffers[i]); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - recycle_or_free (vm, bm, buffers[i], b0, follow_buffer_next); - i++; + clib_spinlock_lock (&bm->buffer_known_hash_lockp); + hash_set (bm->buffer_known_hash, bi, is_free ? VLIB_BUFFER_KNOWN_FREE : + VLIB_BUFFER_KNOWN_ALLOCATED); + clib_spinlock_unlock (&bm->buffer_known_hash_lockp); } } -static void -vlib_buffer_free_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 1); -} - -static void -vlib_buffer_free_no_next_internal (vlib_main_t * vm, u32 * buffers, - u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 0); -} - void vlib_packet_template_init (vlib_main_t * vm, vlib_packet_template_t * t, @@ -680,7 +370,6 @@ vlib_packet_template_init (vlib_main_t * vm, vec_add (t->packet_data, packet_data, n_packet_data_bytes); t->min_n_buffers_each_alloc = min_n_buffers_each_alloc; - vlib_worker_thread_barrier_release (vm); } @@ -706,22 +395,20 @@ vlib_packet_template_get_packet (vlib_main_t * vm, /* Append given data to end of buffer, possibly allocating new buffers. */ int -vlib_buffer_add_data (vlib_main_t * vm, - vlib_buffer_free_list_index_t free_list_index, - u32 * buffer_index, void *data, u32 n_data_bytes) +vlib_buffer_add_data (vlib_main_t * vm, u32 * buffer_index, void *data, + u32 n_data_bytes) { u32 n_buffer_bytes, n_left, n_left_this_buffer, bi; vlib_buffer_t *b; void *d; bi = *buffer_index; - if (bi == ~0 - && 1 != vlib_buffer_alloc_from_free_list (vm, &bi, 1, free_list_index)) + if (bi == ~0 && 1 != vlib_buffer_alloc (vm, &bi, 1)) goto out_of_buffers; d = data; n_left = n_data_bytes; - n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, free_list_index); + n_buffer_bytes = vlib_buffer_get_default_data_size (vm); b = vlib_get_buffer (vm, bi); b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -746,9 +433,7 @@ vlib_buffer_add_data (vlib_main_t * vm, break; d += n; - if (1 != - vlib_buffer_alloc_from_free_list (vm, &b->next_buffer, 1, - free_list_index)) + if (1 != vlib_buffer_alloc (vm, &b->next_buffer, 1)) goto out_of_buffers; b->flags |= VLIB_BUFFER_NEXT_PRESENT; @@ -766,15 +451,12 @@ vlib_buffer_add_data (vlib_main_t * vm, u16 vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, - vlib_buffer_free_list_index_t - free_list_index, vlib_buffer_t * first, vlib_buffer_t ** last, void *data, u16 data_len) { vlib_buffer_t *l = *last; - u32 n_buffer_bytes = - vlib_buffer_free_list_buffer_size (vm, free_list_index); + u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); u16 copied = 0; ASSERT (n_buffer_bytes >= l->current_length + l->current_data); while (data_len) @@ -782,9 +464,8 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, u16 max = n_buffer_bytes - l->current_length - l->current_data; if (max == 0) { - if (1 != - vlib_buffer_alloc_from_free_list (vm, &l->next_buffer, 1, - free_list_index)) + if (1 != vlib_buffer_alloc_from_pool (vm, &l->next_buffer, 1, + first->buffer_pool_index)) return copied; *last = l = vlib_buffer_chain_buffer (vm, l, l->next_buffer); max = n_buffer_bytes - l->current_length - l->current_data; @@ -800,14 +481,27 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, return copied; } -u8 -vlib_buffer_register_physmem_map (vlib_main_t * vm, u32 physmem_map_index) +clib_error_t * +vlib_buffer_pool_create (vlib_main_t * vm, u8 index, char *name, + u32 data_size, u32 physmem_map_index) { - vlib_buffer_main_t *bm = &buffer_main; - vlib_buffer_pool_t *p; + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_pool_t *bp; vlib_physmem_map_t *m = vlib_physmem_get_map (vm, physmem_map_index); uword start = pointer_to_uword (m->base); uword size = (uword) m->n_pages << m->log2_page_size; + uword i, j; + u32 alloc_size, n_alloc_per_page;; + + vec_validate_aligned (bm->buffer_pools, index, CLIB_CACHE_LINE_BYTES); + bp = vec_elt_at_index (bm->buffer_pools, index); + + if (bp->start) + return clib_error_return (0, "buffer with index %u already exists", + index); + + if (index >= 255) + return clib_error_return (0, "buffer index must be < 255", index); if (bm->buffer_mem_size == 0) { @@ -834,36 +528,74 @@ vlib_buffer_register_physmem_map (vlib_main_t * vm, u32 physmem_map_index) clib_panic ("buffer memory size out of range!"); } - vec_add2 (bm->buffer_pools, p, 1); - p->start = start; - p->size = size; - p->physmem_map_index = physmem_map_index; + bp->start = start; + bp->size = size; + bp->index = bp - bm->buffer_pools; + bp->buffer_template.buffer_pool_index = bp->index; + bp->buffer_template.ref_count = 1; + bp->physmem_map_index = physmem_map_index; + bp->name = format (0, "%s%c", name, 0); + bp->data_size = data_size; + bp->numa_node = m->numa_node; - ASSERT (p - bm->buffer_pools < 256); - return p - bm->buffer_pools; -} + vec_validate_aligned (bp->threads, vec_len (vlib_mains) - 1, + CLIB_CACHE_LINE_BYTES); -static u8 * -format_vlib_buffer_free_list (u8 * s, va_list * va) -{ - vlib_buffer_free_list_t *f = va_arg (*va, vlib_buffer_free_list_t *); - u32 threadnum = va_arg (*va, u32); - uword bytes_alloc, bytes_free, n_free, size; + alloc_size = data_size + sizeof (vlib_buffer_t) + bm->ext_hdr_size; + n_alloc_per_page = (1ULL << m->log2_page_size) / alloc_size; + + /* preallocate buffer indices memory */ + vec_validate_aligned (bp->buffers, m->n_pages * n_alloc_per_page, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (bp->buffers); + + clib_spinlock_init (&bp->lock); + + for (j = 0; j < m->n_pages; j++) + for (i = 0; i < n_alloc_per_page; i++) + { + u8 *p; + u32 bi; + + p = m->base + (j << m->log2_page_size) + i * alloc_size; + p += bm->ext_hdr_size; + + vlib_buffer_copy_template ((vlib_buffer_t *) p, &bp->buffer_template); + + bi = vlib_get_buffer_index (vm, (vlib_buffer_t *) p); - if (!f) - return format (s, "%=7s%=30s%=12s%=12s%=12s%=12s%=12s%=12s", - "Thread", "Name", "Index", "Size", "Alloc", "Free", - "#Alloc", "#Free"); + vec_add1_aligned (bp->buffers, bi, CLIB_CACHE_LINE_BYTES); + vlib_get_buffer (vm, bi); + } - size = sizeof (vlib_buffer_t) + f->n_data_bytes; - n_free = vec_len (f->buffers); - bytes_alloc = size * f->n_alloc; - bytes_free = size * n_free; + bp->n_buffers = vec_len (bp->buffers); - s = format (s, "%7d%30v%12d%12d%=12U%=12U%=12d%=12d", threadnum, - f->name, f->index, f->n_data_bytes, - format_memory_size, bytes_alloc, - format_memory_size, bytes_free, f->n_alloc, n_free); + return 0; +} + +static u8 * +format_vlib_buffer_pool (u8 * s, va_list * va) +{ + vlib_main_t *vm = va_arg (*va, vlib_main_t *); + vlib_buffer_pool_t *bp = va_arg (*va, vlib_buffer_pool_t *); + vlib_buffer_pool_thread_t *bpt; + u32 cached = 0; + + if (!bp) + return format (s, "%-20s%=6s%=6s%=6s%=11s%=6s%=8s%=8s%=8s", + "Pool Name", "Index", "NUMA", "Size", "Data Size", + "Total", "Avail", "Cached", "Used"); + + /* *INDENT-OFF* */ + vec_foreach (bpt, bp->threads) + cached += vec_len (bpt->cached_buffers); + /* *INDENT-ON* */ + + s = format (s, "%-20s%=6d%=6d%=6u%=11u%=6u%=8u%=8u%=8u", + bp->name, bp->index, bp->numa_node, bp->data_size + + sizeof (vlib_buffer_t) + vm->buffer_main->ext_hdr_size, + bp->data_size, bp->n_buffers, vec_len (bp->buffers), cached, + bp->n_buffers - vec_len (bp->buffers) - cached); return s; } @@ -872,25 +604,15 @@ static clib_error_t * show_buffers (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - vlib_buffer_free_list_t *f; - vlib_main_t *curr_vm; - u32 vm_index = 0; + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_pool_t *bp; - vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0, 0); + vlib_cli_output (vm, "%U", format_vlib_buffer_pool, vm, 0); - do - { - curr_vm = vlib_mains[vm_index]; - - /* *INDENT-OFF* */ - pool_foreach (f, curr_vm->buffer_free_list_pool, ({ - vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f, vm_index); - })); - /* *INDENT-ON* */ - - vm_index++; - } - while (vm_index < vec_len (vlib_mains)); + /* *INDENT-OFF* */ + vec_foreach (bp, bm->buffer_pools) + vlib_cli_output (vm, "%U", format_vlib_buffer_pool, vm, bp); + /* *INDENT-ON* */ return 0; } @@ -904,71 +626,144 @@ VLIB_CLI_COMMAND (show_buffers_command, static) = { /* *INDENT-ON* */ clib_error_t * -vlib_buffer_main_init (struct vlib_main_t * vm) +vlib_buffer_worker_init (vlib_main_t * vm) { - vlib_buffer_main_t *bm = &buffer_main; - clib_error_t *error; - u32 physmem_map_index; - u8 pool_index; - int log2_page_size = 0; - - buffer_log_default = vlib_log_register_class ("buffer", 0); + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_pool_t *bp; - if (vlib_buffer_callbacks) + /* *INDENT-OFF* */ + vec_foreach (bp, bm->buffer_pools) { - /* external plugin has registered own buffer callbacks - so we just copy them and quit */ - clib_memcpy_fast (&bm->cb, vlib_buffer_callbacks, - sizeof (vlib_buffer_callbacks_t)); - bm->callbacks_registered = 1; - return 0; + clib_spinlock_lock (&bp->lock); + vec_validate_aligned (bp->threads, vec_len (vlib_mains) - 1, + CLIB_CACHE_LINE_BYTES); + clib_spinlock_unlock (&bp->lock); } + /* *INDENT-ON* */ - bm->cb.vlib_buffer_fill_free_list_cb = &vlib_buffer_fill_free_list_internal; - bm->cb.vlib_buffer_free_cb = &vlib_buffer_free_internal; - bm->cb.vlib_buffer_free_no_next_cb = &vlib_buffer_free_no_next_internal; - bm->cb.vlib_buffer_delete_free_list_cb = - &vlib_buffer_delete_free_list_internal; - clib_spinlock_init (&bm->buffer_known_hash_lockp); + return 0; +} + +VLIB_WORKER_INIT_FUNCTION (vlib_buffer_worker_init); + +static clib_error_t * +vlib_buffer_main_init_numa_node (struct vlib_main_t *vm, u32 numa_node) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + clib_error_t *error; + u32 physmem_map_index; + uword n_pages, pagesize; + u32 buffers_per_numa; + u32 buffer_size = CLIB_CACHE_LINE_ROUND (bm->ext_hdr_size + + sizeof (vlib_buffer_t) + + vlib_buffer_get_default_data_size + (vm)); + u8 *name; + + pagesize = clib_mem_get_default_hugepage_size (); + name = format (0, "buffers-numa-%d%c", numa_node, 0); + + buffers_per_numa = bm->buffers_per_numa ? bm->buffers_per_numa : + VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA; retry: - error = vlib_physmem_shared_map_create (vm, "buffers", - vlib_buffer_physmem_sz, - log2_page_size, - CLIB_PMALLOC_NUMA_LOCAL, + n_pages = (buffers_per_numa - 1) / (pagesize / buffer_size) + 1; + error = vlib_physmem_shared_map_create (vm, (char *) name, + n_pages * pagesize, + min_log2 (pagesize), numa_node, &physmem_map_index); - if (error && log2_page_size == 0) + if (error && pagesize != clib_mem_get_page_size ()) { - vlib_log_warn (buffer_log_default, "%U", format_clib_error, error); + vlib_log_warn (bm->log_default, "%U", format_clib_error, error); clib_error_free (error); - vlib_log_warn (buffer_log_default, "falling back to non-hugepage " + vlib_log_warn (bm->log_default, "falling back to non-hugepage " "backed buffer pool"); - log2_page_size = min_log2 (clib_mem_get_page_size ()); + pagesize = clib_mem_get_page_size (); + buffers_per_numa = bm->buffers_per_numa ? bm->buffers_per_numa : + VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA_UNPRIV; goto retry; } if (error) return error; - pool_index = vlib_buffer_register_physmem_map (vm, physmem_map_index); - vlib_buffer_pool_t *bp = vlib_buffer_pool_get (pool_index); - clib_spinlock_init (&bp->lock); - bp->buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES + - sizeof (vlib_buffer_t); + vec_reset_length (name); + name = format (name, "default-numa-%d%c", numa_node, 0); - return 0; + return vlib_buffer_pool_create (vm, numa_node, (char *) name, + vlib_buffer_get_default_data_size (vm), + physmem_map_index); +} + +void +vlib_buffer_main_alloc (vlib_main_t * vm) +{ + vlib_buffer_main_t *bm; + + if (vm->buffer_main) + return; + + vm->buffer_main = bm = clib_mem_alloc (sizeof (bm[0])); + clib_memset (vm->buffer_main, 0, sizeof (bm[0])); + bm->default_data_size = VLIB_BUFFER_DEFAULT_DATA_SIZE; +} + +clib_error_t * +vlib_buffer_main_init (struct vlib_main_t *vm) +{ + vlib_buffer_main_t *bm; + clib_error_t *err; + clib_bitmap_t *bmp = 0; + u32 numa_node; + + vlib_buffer_main_alloc (vm); + + bm = vm->buffer_main; + bm->log_default = vlib_log_register_class ("buffer", 0); + bm->ext_hdr_size = __vlib_buffer_external_hdr_size; + + clib_spinlock_init (&bm->buffer_known_hash_lockp); + + err = clib_sysfs_read ("/sys/devices/system/node/possible", "%U", + unformat_bitmap_list, &bmp); + if (err) + { + /* no info from sysfs, assuming that only numa 0 exists */ + clib_error_free (err); + bmp = clib_bitmap_set (bmp, 0, 1); + } + + /* *INDENT-OFF* */ + clib_bitmap_foreach (numa_node, bmp, { + if ((err = vlib_buffer_main_init_numa_node(vm, numa_node))) + goto done; + }); + /* *INDENT-ON* */ + + bm->n_numa_nodes = clib_bitmap_last_set (bmp) + 1; + +done: + vec_free (bmp); + return err; } static clib_error_t * vlib_buffers_configure (vlib_main_t * vm, unformat_input_t * input) { - u32 size_in_mb; + vlib_buffer_main_t *bm; + + vlib_buffer_main_alloc (vm); + + bm = vm->buffer_main; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "memory-size-in-mb %d", &size_in_mb)) - vlib_buffer_physmem_sz = size_in_mb << 20; + if (unformat (input, "buffers-per-numa %u", &bm->buffers_per_numa)) + ; + else if (unformat (input, "default data-size %u", + &bm->default_data_size)) + ; else return unformat_parse_error (input); } diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 2c8d5a046a73..31baf5fd21ef 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -48,9 +48,10 @@ #include /* for vlib_error_t */ #include /* for __PRE_DATA_SIZE */ -#define VLIB_BUFFER_DATA_SIZE (2048) #define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE +#define VLIB_BUFFER_DEFAULT_DATA_SIZE (2048) + /* Minimum buffer chain segment size. Does not apply to last buffer in chain. Dataplane code can safely asume that specified amount of data is not split into 2 chained buffers */ @@ -59,8 +60,6 @@ /* Amount of head buffer data copied to each replica head buffer */ #define VLIB_BUFFER_CLONE_HEAD_SIZE (256) -typedef u8 vlib_buffer_free_list_index_t; - /** \file vlib buffer structure definition and a few select access methods. This structure and the buffer allocation @@ -72,11 +71,10 @@ typedef u8 vlib_buffer_free_list_index_t; * Buffer Flags */ #define foreach_vlib_buffer_flag \ - _( 0, NON_DEFAULT_FREELIST, "non-default-fl") \ - _( 1, IS_TRACED, 0) \ - _( 2, NEXT_PRESENT, 0) \ - _( 3, TOTAL_LENGTH_VALID, 0) \ - _( 4, EXT_HDR_VALID, "ext-hdr-valid") + _( 0, IS_TRACED, 0) \ + _( 1, NEXT_PRESENT, 0) \ + _( 2, TOTAL_LENGTH_VALID, 0) \ + _( 3, EXT_HDR_VALID, "ext-hdr-valid") /* NOTE: only buffer generic flags should be defined here, please consider using user flags. i.e. src/vnet/buffer.h */ @@ -98,83 +96,93 @@ enum /* User defined buffer flags. */ #define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n)) #define VLIB_BUFFER_FLAG_USER(n) (1 << LOG2_VLIB_BUFFER_FLAG_USER(n)) -#define VLIB_BUFFER_FLAGS_ALL (0x1f) +#define VLIB_BUFFER_FLAGS_ALL (0x0f) -/* VLIB buffer representation. */ -typedef struct +/** VLIB buffer representation. */ +typedef union { - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - STRUCT_MARK (template_start); - /* Offset within data[] that we are currently processing. - If negative current header points into predata area. */ - i16 current_data; /**< signed offset in data[], pre_data[] - that we are currently processing. - If negative current header points into predata area. - */ - u16 current_length; /**< Nbytes between current data and - the end of this buffer. - */ - u32 flags; /**< buffer flags: -
VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, -
VLIB_BUFFER_IS_TRACED: trace this buffer. -
VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer. -
VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says -
VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager header, - set to avoid adding it to a flow report -
VLIB_BUFFER_FLAG_USER(n): user-defined bit N - */ - - u32 flow_id; /**< Generic flow identifier */ - - - u32 next_buffer; /**< Next buffer for this linked-list of buffers. - Only valid if VLIB_BUFFER_NEXT_PRESENT flag is set. - */ - - STRUCT_MARK (template_end); - - u32 current_config_index; /**< Used by feature subgraph arcs to - visit enabled feature nodes - */ - vlib_error_t error; /**< Error code for buffers to be enqueued - to error handler. - */ - u8 n_add_refs; /**< Number of additional references to this buffer. */ - - u8 buffer_pool_index; /**< index of buffer pool this buffer belongs. */ - - u32 opaque[10]; /**< Opaque data used by sub-graphs for their own purposes. - See .../vnet/vnet/buffer.h - */ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); - - u32 trace_index; /**< Specifies index into trace buffer - if VLIB_PACKET_IS_TRACED flag is set. - */ - u32 recycle_count; /**< Used by L2 path recycle code */ - - u32 total_length_not_including_first_buffer; - /**< Only valid for first buffer in chain. Current length plus - total length given here give total number of bytes in buffer chain. - */ - vlib_buffer_free_list_index_t free_list_index; /** < only used if - VLIB_BUFFER_NON_DEFAULT_FREELIST - flag is set */ - u8 align_pad[3]; /**< available */ - u32 opaque2[12]; /**< More opaque data, see ../vnet/vnet/buffer.h */ - - /***** end of second cache line */ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline2); - u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE]; /**< Space for inserting data - before buffer start. - Packet rewrite string will be - rewritten backwards and may extend - back before buffer->data[0]. - Must come directly before packet data. - */ - - u8 data[0]; /**< Packet data. Hardware DMA here */ -} vlib_buffer_t; /* Must be a multiple of 64B. */ + struct + { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /** signed offset in data[], pre_data[] that we are currently + * processing. If negative current header points into predata area. */ + i16 current_data; + + /** Nbytes between current data and the end of this buffer. */ + u16 current_length; + + /** buffer flags: +
VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, +
VLIB_BUFFER_IS_TRACED: trace this buffer. +
VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer. +
VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says +
VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager header, + set to avoid adding it to a flow report +
VLIB_BUFFER_FLAG_USER(n): user-defined bit N + */ + u32 flags; + + /** Generic flow identifier */ + u32 flow_id; + + /** Reference count for this buffer. */ + volatile u8 ref_count; + + /** index of buffer pool this buffer belongs. */ + u8 buffer_pool_index; + + /** Error code for buffers to be enqueued to error handler. */ + vlib_error_t error; + + /** Next buffer for this linked-list of buffers. Only valid if + * VLIB_BUFFER_NEXT_PRESENT flag is set. */ + u32 next_buffer; + + /** Used by feature subgraph arcs to visit enabled feature nodes */ + u32 current_config_index; + + /** Opaque data used by sub-graphs for their own purposes. */ + u32 opaque[10]; + + /** part of buffer metadata which is initialized on alloc ends here. */ + STRUCT_MARK (template_end); + + /** start of 2nd cache line */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + + /** Specifies index into trace buffer if VLIB_PACKET_IS_TRACED flag is + * set. */ + u32 trace_index; + + /** Only valid for first buffer in chain. Current length plus total length + * given here give total number of bytes in buffer chain. */ + u32 total_length_not_including_first_buffer; + + /**< More opaque data, see ../vnet/vnet/buffer.h */ + u32 opaque2[14]; + + /** start of third cache line */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline2); + + /** Space for inserting data before buffer start. Packet rewrite string + * will be rewritten backwards and may extend back before + * buffer->data[0]. Must come directly before packet data. */ + u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE]; + + /** Packet data */ + u8 data[0]; + }; +#ifdef CLIB_HAVE_VEC128 + u8x16 as_u8x16[4]; +#endif +#ifdef CLIB_HAVE_VEC256 + u8x32 as_u8x32[2]; +#endif +#ifdef CLIB_HAVE_VEC512 + u8x64 as_u8x64[1]; +#endif +} vlib_buffer_t; #define VLIB_BUFFER_HDR_SIZE (sizeof(vlib_buffer_t) - VLIB_BUFFER_PRE_DATA_SIZE) @@ -358,72 +366,32 @@ vlib_buffer_pull (vlib_buffer_t * b, u8 size) /* Forward declaration. */ struct vlib_main_t; -typedef struct vlib_buffer_free_list_t -{ - /* Template buffer used to initialize first 16 bytes of buffers - allocated on this free list. */ - vlib_buffer_t buffer_init_template; - - /* Our index into vlib_main_t's buffer_free_list_pool. */ - vlib_buffer_free_list_index_t index; - - /* Number of data bytes for buffers in this free list. */ - u32 n_data_bytes; - - /* Number of buffers to allocate when we need to allocate new buffers */ - u32 min_n_buffers_each_alloc; - - /* Total number of buffers allocated from this free list. */ - u32 n_alloc; - - /* Vector of free buffers. Each element is a byte offset into I/O heap. */ - u32 *buffers; - - /* index of buffer pool used to get / put buffers */ - u8 buffer_pool_index; - - /* Free list name. */ - u8 *name; - - /* Callback functions to initialize newly allocated buffers. - If null buffers are zeroed. */ - void (*buffer_init_function) (struct vlib_main_t * vm, - struct vlib_buffer_free_list_t * fl, - u32 * buffers, u32 n_buffers); - - uword buffer_init_function_opaque; -} __attribute__ ((aligned (16))) vlib_buffer_free_list_t; - -typedef uword (vlib_buffer_fill_free_list_cb_t) (struct vlib_main_t * vm, - vlib_buffer_free_list_t * fl, - uword min_free_buffers); -typedef void (vlib_buffer_free_cb_t) (struct vlib_main_t * vm, u32 * buffers, - u32 n_buffers); -typedef void (vlib_buffer_free_no_next_cb_t) (struct vlib_main_t * vm, - u32 * buffers, u32 n_buffers); - typedef struct { - vlib_buffer_fill_free_list_cb_t *vlib_buffer_fill_free_list_cb; - vlib_buffer_free_cb_t *vlib_buffer_free_cb; - vlib_buffer_free_no_next_cb_t *vlib_buffer_free_no_next_cb; - void (*vlib_buffer_delete_free_list_cb) (struct vlib_main_t * vm, - vlib_buffer_free_list_index_t - free_list_index); -} vlib_buffer_callbacks_t; - -extern vlib_buffer_callbacks_t *vlib_buffer_callbacks; - + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u32 *cached_buffers; + u32 n_alloc; +} vlib_buffer_pool_thread_t; typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); uword start; uword size; uword log2_page_size; + u8 index; + u32 numa_node; u32 physmem_map_index; - u32 buffer_size; + u32 data_size; + u32 n_buffers; u32 *buffers; + u8 *name; clib_spinlock_t lock; + + /* per-thread data */ + vlib_buffer_pool_thread_t *threads; + + /* buffer metadata template */ + vlib_buffer_t buffer_template; } vlib_buffer_pool_t; typedef struct @@ -435,46 +403,25 @@ typedef struct uword buffer_mem_size; vlib_buffer_pool_t *buffer_pools; - /* Buffer free callback, for subversive activities */ - u32 (*buffer_free_callback) (struct vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, u32 follow_buffer_next); -#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX (0) -#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES VLIB_BUFFER_DATA_SIZE - - /* Hash table mapping buffer size (rounded to next unit of - sizeof (vlib_buffer_t)) to free list index. */ - uword *free_list_by_size; - /* Hash table mapping buffer index into number 0 => allocated but free, 1 => allocated and not-free. If buffer index is not in hash table then this buffer has never been allocated. */ uword *buffer_known_hash; clib_spinlock_t buffer_known_hash_lockp; + u32 n_numa_nodes; - /* Callbacks */ - vlib_buffer_callbacks_t cb; - int callbacks_registered; -} vlib_buffer_main_t; - -extern vlib_buffer_main_t buffer_main; + /* config */ + u32 buffers_per_numa; + u16 ext_hdr_size; + u32 default_data_size; -static_always_inline vlib_buffer_pool_t * -vlib_buffer_pool_get (u8 buffer_pool_index) -{ - vlib_buffer_main_t *bm = &buffer_main; - return vec_elt_at_index (bm->buffer_pools, buffer_pool_index); -} - -u8 vlib_buffer_register_physmem_map (struct vlib_main_t * vm, - u32 physmem_map_index); + /* logging */ + vlib_log_class_t log_default; +} vlib_buffer_main_t; clib_error_t *vlib_buffer_main_init (struct vlib_main_t *vm); - -void *vlib_set_buffer_free_callback (struct vlib_main_t *vm, void *fp); - /* */ @@ -495,23 +442,17 @@ extern void vlib_buffer_trace_trajectory_init (vlib_buffer_t * b); #define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b) #endif /* VLIB_BUFFER_TRACE_TRAJECTORY */ -#endif /* included_vlib_buffer_h */ +extern u16 __vlib_buffer_external_hdr_size; +#define VLIB_BUFFER_SET_EXT_HDR_SIZE(x) \ +static void __clib_constructor \ +vnet_buffer_set_ext_hdr_size() \ +{ \ + if (__vlib_buffer_external_hdr_size) \ + clib_error ("buffer external header space already set"); \ + __vlib_buffer_external_hdr_size = CLIB_CACHE_LINE_ROUND (x); \ +} -#define VLIB_BUFFER_REGISTER_CALLBACKS(x,...) \ - __VA_ARGS__ vlib_buffer_callbacks_t __##x##_buffer_callbacks; \ -static void __vlib_add_buffer_callbacks_t_##x (void) \ - __attribute__((__constructor__)) ; \ -static void __vlib_add_buffer_callbacks_t_##x (void) \ -{ \ - if (vlib_buffer_callbacks) \ - clib_panic ("vlib buffer callbacks already registered"); \ - vlib_buffer_callbacks = &__##x##_buffer_callbacks; \ -} \ -static void __vlib_rm_buffer_callbacks_t_##x (void) \ - __attribute__((__destructor__)) ; \ -static void __vlib_rm_buffer_callbacks_t_##x (void) \ -{ vlib_buffer_callbacks = 0; } \ -__VA_ARGS__ vlib_buffer_callbacks_t __##x##_buffer_callbacks +#endif /* included_vlib_buffer_h */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index b561a91c394d..ec0c12e3ee95 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -47,6 +47,29 @@ vlib buffer access methods. */ +always_inline void +vlib_buffer_validate (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_pool_t *bp; + + /* reference count in allocated buffer always must be 1 or higher */ + ASSERT (b->ref_count > 0); + + /* verify that buffer pool index is valid */ + bp = vec_elt_at_index (bm->buffer_pools, b->buffer_pool_index); + ASSERT (pointer_to_uword (b) >= bp->start); + ASSERT (pointer_to_uword (b) < bp->start + bp->size - + (bp->data_size + sizeof (vlib_buffer_t))); +} + +always_inline void * +vlib_buffer_ptr_from_index (uword buffer_mem_start, u32 buffer_index, + uword offset) +{ + offset += ((uword) buffer_index) << CLIB_LOG2_CACHE_LINE_BYTES; + return uword_to_pointer (buffer_mem_start + offset, vlib_buffer_t *); +} /** \brief Translate buffer index into buffer pointer @@ -57,11 +80,50 @@ always_inline vlib_buffer_t * vlib_get_buffer (vlib_main_t * vm, u32 buffer_index) { - vlib_buffer_main_t *bm = &buffer_main; - uword offset = ((uword) buffer_index) << CLIB_LOG2_CACHE_LINE_BYTES; - ASSERT (offset < bm->buffer_mem_size); + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_t *b; - return uword_to_pointer (bm->buffer_mem_start + offset, void *); + b = vlib_buffer_ptr_from_index (bm->buffer_mem_start, buffer_index, 0); + vlib_buffer_validate (vm, b); + return b; +} + +static_always_inline u32 +vlib_buffer_get_default_data_size (vlib_main_t * vm) +{ + return vm->buffer_main->default_data_size; +} + +static_always_inline void +vlib_buffer_copy_indices (u32 * dst, u32 * src, u32 n_indices) +{ + clib_memcpy_fast (dst, src, n_indices * sizeof (u32)); +} + +STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64); +static_always_inline void +vlib_buffer_copy_template (vlib_buffer_t * b, vlib_buffer_t * bt) +{ +#if defined CLIB_HAVE_VEC512 + b->as_u8x64[0] = bt->as_u8x64[0]; +#elif defined (CLIB_HAVE_VEC256) + b->as_u8x32[0] = bt->as_u8x32[0]; + b->as_u8x32[1] = bt->as_u8x32[1]; +#elif defined (CLIB_HAVE_VEC128) + b->as_u8x16[0] = bt->as_u8x16[0]; + b->as_u8x16[1] = bt->as_u8x16[1]; + b->as_u8x16[2] = bt->as_u8x16[2]; + b->as_u8x16[3] = bt->as_u8x16[3]; +#else + clib_memcpy_fast (b, bt, 64); +#endif +} + +always_inline u8 +vlib_buffer_pool_get_default_for_numa (vlib_main_t * vm, u32 numa_node) +{ + ASSERT (numa_node < vm->buffer_main->n_numa_nodes); + return numa_node; } /** \brief Translate array of buffer indices into buffer pointers with offset @@ -76,8 +138,9 @@ static_always_inline void vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count, i32 offset) { + uword buffer_mem_start = vm->buffer_main->buffer_mem_start; #ifdef CLIB_HAVE_VEC256 - u64x4 off = u64x4_splat (buffer_main.buffer_mem_start + offset); + u64x4 off = u64x4_splat (buffer_mem_start + offset); /* if count is not const, compiler will not unroll while loop se we maintain two-in-parallel variant */ while (count >= 8) @@ -99,7 +162,7 @@ vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count, /* shift and add to get vlib_buffer_t pointer */ u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); #elif defined (CLIB_HAVE_VEC128) - u64x2 off = u64x2_splat (buffer_main.buffer_mem_start + offset); + u64x2 off = u64x2_splat (buffer_mem_start + offset); u32x4 bi4 = u32x4_load_unaligned (bi); u64x2 b0 = u32x4_extend_to_u64x2 ((u32x4) bi4); #if defined (__aarch64__) @@ -111,10 +174,10 @@ vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count, u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2); #else - b[0] = ((u8 *) vlib_get_buffer (vm, bi[0])) + offset; - b[1] = ((u8 *) vlib_get_buffer (vm, bi[1])) + offset; - b[2] = ((u8 *) vlib_get_buffer (vm, bi[2])) + offset; - b[3] = ((u8 *) vlib_get_buffer (vm, bi[3])) + offset; + b[0] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[0], offset); + b[1] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[1], offset); + b[2] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[2], offset); + b[3] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[3], offset); #endif b += 4; bi += 4; @@ -122,7 +185,7 @@ vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count, } while (count) { - b[0] = ((u8 *) vlib_get_buffer (vm, bi[0])) + offset; + b[0] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[0], offset); b += 1; bi += 1; count -= 1; @@ -153,7 +216,7 @@ vlib_get_buffers (vlib_main_t * vm, u32 * bi, vlib_buffer_t ** b, int count) always_inline u32 vlib_get_buffer_index (vlib_main_t * vm, void *p) { - vlib_buffer_main_t *bm = &buffer_main; + vlib_buffer_main_t *bm = vm->buffer_main; uword offset = pointer_to_uword (p) - bm->buffer_mem_start; ASSERT (pointer_to_uword (p) >= bm->buffer_mem_start); ASSERT (offset < bm->buffer_mem_size); @@ -175,7 +238,7 @@ vlib_get_buffer_indices_with_offset (vlib_main_t * vm, void **b, u32 * bi, { #ifdef CLIB_HAVE_VEC256 u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 }; - u64x4 off4 = u64x4_splat (buffer_main.buffer_mem_start - offset); + u64x4 off4 = u64x4_splat (vm->buffer_main->buffer_mem_start - offset); while (count >= 8) { @@ -356,9 +419,9 @@ void vlib_buffer_validate_alloc_free (vlib_main_t * vm, u32 * buffers, expected_state); always_inline vlib_buffer_known_state_t -vlib_buffer_is_known (u32 buffer_index) +vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index) { - vlib_buffer_main_t *bm = &buffer_main; + vlib_buffer_main_t *bm = vm->buffer_main; clib_spinlock_lock (&bm->buffer_known_hash_lockp); uword *p = hash_get (bm->buffer_known_hash, buffer_index); @@ -366,51 +429,48 @@ vlib_buffer_is_known (u32 buffer_index) return p ? p[0] : VLIB_BUFFER_UNKNOWN; } -always_inline void -vlib_buffer_set_known_state (u32 buffer_index, - vlib_buffer_known_state_t state) -{ - vlib_buffer_main_t *bm = &buffer_main; - - clib_spinlock_lock (&bm->buffer_known_hash_lockp); - hash_set (bm->buffer_known_hash, buffer_index, state); - clib_spinlock_unlock (&bm->buffer_known_hash_lockp); -} - /* Validates sanity of a single buffer. Returns format'ed vector with error message if any. */ u8 *vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index, uword follow_chain); -always_inline u32 -vlib_buffer_round_size (u32 size) +static_always_inline vlib_buffer_pool_t * +vlib_get_buffer_pool (vlib_main_t * vm, u8 buffer_pool_index) { - return round_pow2 (size, sizeof (vlib_buffer_t)); + vlib_buffer_main_t *bm = vm->buffer_main; + return vec_elt_at_index (bm->buffer_pools, buffer_pool_index); } -always_inline vlib_buffer_free_list_index_t -vlib_buffer_get_free_list_index (vlib_buffer_t * b) +static_always_inline uword +vlib_buffer_pool_get (vlib_main_t * vm, u8 buffer_pool_index, u32 * buffers, + u32 n_buffers) { - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NON_DEFAULT_FREELIST)) - return b->free_list_index; + vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); + u32 len; - return 0; -} + ASSERT (bp->buffers); -always_inline void -vlib_buffer_set_free_list_index (vlib_buffer_t * b, - vlib_buffer_free_list_index_t index) -{ - if (PREDICT_FALSE (index)) + clib_spinlock_lock (&bp->lock); + len = vec_len (bp->buffers); + if (PREDICT_TRUE (n_buffers < len)) { - b->flags |= VLIB_BUFFER_NON_DEFAULT_FREELIST; - b->free_list_index = index; + len -= n_buffers; + vlib_buffer_copy_indices (buffers, bp->buffers + len, n_buffers); + _vec_len (bp->buffers) = len; + clib_spinlock_unlock (&bp->lock); + return n_buffers; } else - b->flags &= ~VLIB_BUFFER_NON_DEFAULT_FREELIST; + { + vlib_buffer_copy_indices (buffers, bp->buffers, len); + _vec_len (bp->buffers) = 0; + clib_spinlock_unlock (&bp->lock); + return len; + } } -/** \brief Allocate buffers from specific freelist into supplied array + +/** \brief Allocate buffers from specific pool into supplied array @param vm - (vlib_main_t *) vlib main data structure pointer @param buffers - (u32 * ) buffer index array @@ -418,57 +478,87 @@ vlib_buffer_set_free_list_index (vlib_buffer_t * b, @return - (u32) number of buffers actually allocated, may be less than the number requested or zero */ + always_inline u32 -vlib_buffer_alloc_from_free_list (vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, - vlib_buffer_free_list_index_t index) +vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u8 buffer_pool_index) { - vlib_buffer_main_t *bm = &buffer_main; - vlib_buffer_free_list_t *fl; - u32 *src; - uword len; - - ASSERT (bm->cb.vlib_buffer_fill_free_list_cb); + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_pool_t *bp; + vlib_buffer_pool_thread_t *bpt; + u32 *src, *dst, len, n_left; - fl = pool_elt_at_index (vm->buffer_free_list_pool, index); + bp = vec_elt_at_index (bm->buffer_pools, buffer_pool_index); + bpt = vec_elt_at_index (bp->threads, vm->thread_index); - len = vec_len (fl->buffers); + dst = buffers; + n_left = n_buffers; + len = vec_len (bpt->cached_buffers); - if (PREDICT_FALSE (len < n_buffers)) + /* per-thread cache contains enough buffers */ + if (len >= n_buffers) { - bm->cb.vlib_buffer_fill_free_list_cb (vm, fl, n_buffers); - if (PREDICT_FALSE ((len = vec_len (fl->buffers)) == 0)) - return 0; + src = bpt->cached_buffers + len - n_buffers; + vlib_buffer_copy_indices (dst, src, n_buffers); + _vec_len (bpt->cached_buffers) -= n_buffers; - /* even if fill free list didn't manage to refill free list - we should give what we have */ - n_buffers = clib_min (len, n_buffers); + if (CLIB_DEBUG > 0) + vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, + VLIB_BUFFER_KNOWN_FREE); + return n_buffers; + } - /* following code is intentionaly duplicated to allow compiler - to optimize fast path when n_buffers is constant value */ - src = fl->buffers + len - n_buffers; - clib_memcpy_fast (buffers, src, n_buffers * sizeof (u32)); - _vec_len (fl->buffers) -= n_buffers; + /* take everything available in the cache */ + if (len) + { + vlib_buffer_copy_indices (dst, bpt->cached_buffers, len); + _vec_len (bpt->cached_buffers) = 0; + dst += len; + n_left -= len; + } - /* Verify that buffers are known free. */ - vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, - VLIB_BUFFER_KNOWN_FREE); + len = round_pow2 (n_left, 32); + vec_validate_aligned (bpt->cached_buffers, len - 1, CLIB_CACHE_LINE_BYTES); + len = vlib_buffer_pool_get (vm, buffer_pool_index, bpt->cached_buffers, + len); + _vec_len (bpt->cached_buffers) = len; - return n_buffers; + if (len) + { + u32 n_copy = clib_min (len, n_left); + src = bpt->cached_buffers + len - n_copy; + vlib_buffer_copy_indices (dst, src, n_copy); + _vec_len (bpt->cached_buffers) -= n_copy; + n_left -= n_copy; } - src = fl->buffers + len - n_buffers; - clib_memcpy_fast (buffers, src, n_buffers * sizeof (u32)); - _vec_len (fl->buffers) -= n_buffers; + n_buffers -= n_left; /* Verify that buffers are known free. */ - vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, - VLIB_BUFFER_KNOWN_FREE); + if (CLIB_DEBUG > 0) + vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, + VLIB_BUFFER_KNOWN_FREE); return n_buffers; } +/** \brief Allocate buffers from specific numa node into supplied array + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u32) number of buffers requested + @param numa_node - (u32) numa node + @return - (u32) number of buffers actually allocated, may be + less than the number requested or zero +*/ +always_inline u32 +vlib_buffer_alloc_on_numa (vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u32 numa_node) +{ + u8 index = vlib_buffer_pool_get_default_for_numa (vm, numa_node); + return vlib_buffer_alloc_from_pool (vm, buffers, n_buffers, index); +} + /** \brief Allocate buffers into supplied array @param vm - (vlib_main_t *) vlib main data structure pointer @@ -477,11 +567,11 @@ vlib_buffer_alloc_from_free_list (vlib_main_t * vm, @return - (u32) number of buffers actually allocated, may be less than the number requested or zero */ + always_inline u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) { - return vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers, - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + return vlib_buffer_alloc_on_numa (vm, buffers, n_buffers, vm->numa_node); } /** \brief Allocate buffers into ring @@ -513,6 +603,216 @@ vlib_buffer_alloc_to_ring (vlib_main_t * vm, u32 * ring, u32 start, return n_alloc; } +/** \brief Allocate buffers into ring from specific buffer pool + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index ring + @param start - (u32) first slot in the ring + @param ring_size - (u32) ring size + @param n_buffers - (u32) number of buffers requested + @return - (u32) number of buffers actually allocated, may be + less than the number requested or zero +*/ +always_inline u32 +vlib_buffer_alloc_to_ring_from_pool (vlib_main_t * vm, u32 * ring, u32 start, + u32 ring_size, u32 n_buffers, + u8 buffer_pool_index) +{ + u32 n_alloc; + + ASSERT (n_buffers <= ring_size); + + if (PREDICT_TRUE (start + n_buffers <= ring_size)) + return vlib_buffer_alloc_from_pool (vm, ring + start, n_buffers, + buffer_pool_index); + + n_alloc = vlib_buffer_alloc_from_pool (vm, ring + start, ring_size - start, + buffer_pool_index); + + if (PREDICT_TRUE (n_alloc == ring_size - start)) + n_alloc += vlib_buffer_alloc_from_pool (vm, ring, n_buffers - n_alloc, + buffer_pool_index); + + return n_alloc; +} + +static_always_inline void +vlib_buffer_pool_put (vlib_main_t * vm, u8 buffer_pool_index, + u32 * buffers, u32 n_buffers) +{ + vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index); + vlib_buffer_pool_thread_t *bpt = + vec_elt_at_index (bp->threads, vm->thread_index); + + if (CLIB_DEBUG > 0) + vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, + VLIB_BUFFER_KNOWN_ALLOCATED); + + vec_add_aligned (bpt->cached_buffers, buffers, n_buffers, + CLIB_CACHE_LINE_BYTES); + + if (vec_len (bpt->cached_buffers) > 4 * VLIB_FRAME_SIZE) + { + clib_spinlock_lock (&bp->lock); + /* keep last stored buffers, as they are more likely hot in the cache */ + vec_add_aligned (bp->buffers, bpt->cached_buffers, VLIB_FRAME_SIZE, + CLIB_CACHE_LINE_BYTES); + vec_delete (bpt->cached_buffers, VLIB_FRAME_SIZE, 0); + bpt->n_alloc -= VLIB_FRAME_SIZE; + clib_spinlock_unlock (&bp->lock); + } +} + +static_always_inline void +vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, + int maybe_next) +{ + const int queue_size = 128; + vlib_buffer_pool_t *bp = 0; + u8 buffer_pool_index = ~0; + u32 n_queue = 0, queue[queue_size + 4]; + vlib_buffer_t bt = { }; +#if defined(CLIB_HAVE_VEC128) && !__aarch64__ + vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 }; + vlib_buffer_t bpi_vec = {.buffer_pool_index = ~0 }; + vlib_buffer_t flags_refs_mask = { + .flags = VLIB_BUFFER_NEXT_PRESENT, + .ref_count = ~0 + }; +#endif + + while (n_buffers) + { + vlib_buffer_t *b[8]; + u32 bi, sum = 0, flags, next; + + if (n_buffers < 12) + goto one_by_one; + + vlib_get_buffers (vm, buffers, b, 4); + vlib_get_buffers (vm, buffers + 8, b + 4, 4); + + vlib_prefetch_buffer_header (b[4], LOAD); + vlib_prefetch_buffer_header (b[5], LOAD); + vlib_prefetch_buffer_header (b[6], LOAD); + vlib_prefetch_buffer_header (b[7], LOAD); + +#if defined(CLIB_HAVE_VEC128) && !__aarch64__ + u8x16 p0, p1, p2, p3, r; + p0 = u8x16_load_unaligned (b[0]); + p1 = u8x16_load_unaligned (b[1]); + p2 = u8x16_load_unaligned (b[2]); + p3 = u8x16_load_unaligned (b[3]); + + r = p0 ^ bpi_vec.as_u8x16[0]; + r |= p1 ^ bpi_vec.as_u8x16[0]; + r |= p2 ^ bpi_vec.as_u8x16[0]; + r |= p3 ^ bpi_vec.as_u8x16[0]; + r &= bpi_mask.as_u8x16[0]; + r |= (p0 | p1 | p2 | p3) & flags_refs_mask.as_u8x16[0]; + + sum = !u8x16_is_all_zero (r); +#else + sum |= b[0]->flags; + sum |= b[1]->flags; + sum |= b[2]->flags; + sum |= b[3]->flags; + sum &= VLIB_BUFFER_NEXT_PRESENT; + sum += b[0]->ref_count - 1; + sum += b[1]->ref_count - 1; + sum += b[2]->ref_count - 1; + sum += b[3]->ref_count - 1; + sum |= b[0]->buffer_pool_index ^ buffer_pool_index; + sum |= b[1]->buffer_pool_index ^ buffer_pool_index; + sum |= b[2]->buffer_pool_index ^ buffer_pool_index; + sum |= b[3]->buffer_pool_index ^ buffer_pool_index; +#endif + + if (sum) + goto one_by_one; + + vlib_buffer_copy_indices (queue + n_queue, buffers, 4); + vlib_buffer_copy_template (b[0], &bt); + vlib_buffer_copy_template (b[1], &bt); + vlib_buffer_copy_template (b[2], &bt); + vlib_buffer_copy_template (b[3], &bt); + n_queue += 4; + + vlib_buffer_validate (vm, b[0]); + vlib_buffer_validate (vm, b[1]); + vlib_buffer_validate (vm, b[2]); + vlib_buffer_validate (vm, b[3]); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]); + + if (n_queue >= queue_size) + { + vlib_buffer_pool_put (vm, buffer_pool_index, queue, n_queue); + n_queue = 0; + } + buffers += 4; + n_buffers -= 4; + continue; + + one_by_one: + bi = buffers[0]; + + next_in_chain: + b[0] = vlib_get_buffer (vm, bi); + flags = b[0]->flags; + next = b[0]->next_buffer; + + if (PREDICT_FALSE (buffer_pool_index != b[0]->buffer_pool_index)) + { + + if (n_queue) + { + vlib_buffer_pool_put (vm, buffer_pool_index, queue, n_queue); + n_queue = 0; + } + + buffer_pool_index = b[0]->buffer_pool_index; +#if defined(CLIB_HAVE_VEC128) && !__aarch64__ + bpi_vec.buffer_pool_index = buffer_pool_index; +#endif + bp = vlib_get_buffer_pool (vm, buffer_pool_index); + vlib_buffer_copy_template (&bt, &bp->buffer_template); + } + + vlib_buffer_validate (vm, b[0]); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); + + if (clib_atomic_sub_fetch (&b[0]->ref_count, 1) == 0) + { + vlib_buffer_copy_template (b[0], &bt); + queue[n_queue++] = bi; + } + + if (n_queue == queue_size) + { + vlib_buffer_pool_put (vm, buffer_pool_index, queue, queue_size); + n_queue = 0; + } + + if (flags & VLIB_BUFFER_NEXT_PRESENT) + { + bi = next; + goto next_in_chain; + } + + buffers++; + n_buffers--; + } + + if (n_queue) + vlib_buffer_pool_put (vm, buffer_pool_index, queue, n_queue); +} + + /** \brief Free buffers Frees the entire buffer chain for each buffer @@ -528,11 +828,7 @@ vlib_buffer_free (vlib_main_t * vm, /* number of buffers to free */ u32 n_buffers) { - vlib_buffer_main_t *bm = &buffer_main; - - ASSERT (bm->cb.vlib_buffer_free_cb); - - return bm->cb.vlib_buffer_free_cb (vm, buffers, n_buffers); + vlib_buffer_free_inline (vm, buffers, n_buffers, /* maybe next */ 1); } /** \brief Free buffers, does not free the buffer chain for each buffer @@ -549,11 +845,7 @@ vlib_buffer_free_no_next (vlib_main_t * vm, /* number of buffers to free */ u32 n_buffers) { - vlib_buffer_main_t *bm = &buffer_main; - - ASSERT (bm->cb.vlib_buffer_free_no_next_cb); - - return bm->cb.vlib_buffer_free_no_next_cb (vm, buffers, n_buffers); + vlib_buffer_free_inline (vm, buffers, n_buffers, /* maybe next */ 0); } /** \brief Free one buffer @@ -565,7 +857,7 @@ vlib_buffer_free_no_next (vlib_main_t * vm, always_inline void vlib_buffer_free_one (vlib_main_t * vm, u32 buffer_index) { - vlib_buffer_free (vm, &buffer_index, /* n_buffers */ 1); + vlib_buffer_free_inline (vm, &buffer_index, 1, /* maybe next */ 1); } /** \brief Free buffers from ring @@ -618,63 +910,9 @@ vlib_buffer_free_from_ring_no_next (vlib_main_t * vm, u32 * ring, u32 start, } } -/* Add/delete buffer free lists. */ -vlib_buffer_free_list_index_t vlib_buffer_create_free_list (vlib_main_t * vm, - u32 n_data_bytes, - char *fmt, ...); -always_inline void -vlib_buffer_delete_free_list (vlib_main_t * vm, - vlib_buffer_free_list_index_t free_list_index) -{ - vlib_buffer_main_t *bm = &buffer_main; - - ASSERT (bm->cb.vlib_buffer_delete_free_list_cb); - - bm->cb.vlib_buffer_delete_free_list_cb (vm, free_list_index); -} - -/* Make sure we have at least given number of unaligned buffers. */ -void vlib_buffer_free_list_fill_unaligned (vlib_main_t * vm, - vlib_buffer_free_list_t * - free_list, - uword n_unaligned_buffers); - -always_inline vlib_buffer_free_list_t * -vlib_buffer_get_buffer_free_list (vlib_main_t * vm, vlib_buffer_t * b, - vlib_buffer_free_list_index_t * index) -{ - vlib_buffer_free_list_index_t i; - - *index = i = vlib_buffer_get_free_list_index (b); - return pool_elt_at_index (vm->buffer_free_list_pool, i); -} - -always_inline vlib_buffer_free_list_t * -vlib_buffer_get_free_list (vlib_main_t * vm, - vlib_buffer_free_list_index_t free_list_index) -{ - vlib_buffer_free_list_t *f; - - f = pool_elt_at_index (vm->buffer_free_list_pool, free_list_index); - - /* Sanity: indices must match. */ - ASSERT (f->index == free_list_index); - - return f; -} - -always_inline u32 -vlib_buffer_free_list_buffer_size (vlib_main_t * vm, - vlib_buffer_free_list_index_t index) -{ - vlib_buffer_free_list_t *f = vlib_buffer_get_free_list (vm, index); - return f->n_data_bytes; -} - /* Append given data to end of buffer, possibly allocating new buffers. */ -int vlib_buffer_add_data (vlib_main_t * vm, - vlib_buffer_free_list_index_t free_list_index, - u32 * buffer_index, void *data, u32 n_data_bytes); +int vlib_buffer_add_data (vlib_main_t * vm, u32 * buffer_index, void *data, + u32 n_data_bytes); /* duplicate all buffers in chain */ always_inline vlib_buffer_t * @@ -753,7 +991,7 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, u16 i; vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer); - ASSERT (s->n_add_refs == 0); + ASSERT (s->ref_count == 1); ASSERT (n_buffers); ASSERT (n_buffers <= 256); @@ -778,17 +1016,15 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, return 1; } - n_buffers = vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers, - vlib_buffer_get_free_list_index - (s)); + n_buffers = vlib_buffer_alloc_from_pool (vm, buffers, n_buffers, + s->buffer_pool_index); for (i = 0; i < n_buffers; i++) { vlib_buffer_t *d = vlib_get_buffer (vm, buffers[i]); d->current_data = s->current_data; d->current_length = head_end_offset; - vlib_buffer_set_free_list_index (d, - vlib_buffer_get_free_list_index (s)); + ASSERT (d->buffer_pool_index == s->buffer_pool_index); d->total_length_not_including_first_buffer = s->current_length - head_end_offset; @@ -806,11 +1042,11 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, d->next_buffer = src_buffer; } vlib_buffer_advance (s, head_end_offset); - s->n_add_refs = n_buffers - 1; + s->ref_count = n_buffers; while (s->flags & VLIB_BUFFER_NEXT_PRESENT) { s = vlib_get_buffer (vm, s->next_buffer); - s->n_add_refs = n_buffers - 1; + s->ref_count = n_buffers; } return n_buffers; @@ -864,8 +1100,7 @@ vlib_buffer_attach_clone (vlib_main_t * vm, vlib_buffer_t * head, vlib_buffer_t * tail) { ASSERT ((head->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - ASSERT (vlib_buffer_get_free_list_index (head) == - vlib_buffer_get_free_list_index (tail)); + ASSERT (head->buffer_pool_index == tail->buffer_pool_index); head->flags |= VLIB_BUFFER_NEXT_PRESENT; head->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -876,7 +1111,7 @@ vlib_buffer_attach_clone (vlib_main_t * vm, vlib_buffer_t * head, tail->total_length_not_including_first_buffer; next_segment: - clib_atomic_add_fetch (&tail->n_add_refs, 1); + clib_atomic_add_fetch (&tail->ref_count, 1); if (tail->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -925,12 +1160,10 @@ vlib_buffer_chain_increase_length (vlib_buffer_t * first, * Returns the number of copied bytes. */ always_inline u16 vlib_buffer_chain_append_data (vlib_main_t * vm, - vlib_buffer_free_list_index_t free_list_index, vlib_buffer_t * first, vlib_buffer_t * last, void *data, u16 data_len) { - u32 n_buffer_bytes = - vlib_buffer_free_list_buffer_size (vm, free_list_index); + u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); ASSERT (n_buffer_bytes >= last->current_length + last->current_data); u16 len = clib_min (data_len, n_buffer_bytes - last->current_length - @@ -948,8 +1181,6 @@ vlib_buffer_chain_append_data (vlib_main_t * vm, * chained and points to the last buffer in the chain. */ u16 vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, - vlib_buffer_free_list_index_t - free_list_index, vlib_buffer_t * first, vlib_buffer_t ** last, void *data, u16 data_len); @@ -966,17 +1197,9 @@ typedef struct /* Number of buffers to allocate in each call to allocator. */ u32 min_n_buffers_each_alloc; - /* Buffer free list for this template. */ - vlib_buffer_free_list_index_t free_list_index; - - u32 *free_buffers; - u8 *name; } vlib_packet_template_t; -void vlib_packet_template_get_packet_helper (vlib_main_t * vm, - vlib_packet_template_t * t); - void vlib_packet_template_init (vlib_main_t * vm, vlib_packet_template_t * t, void *packet_data, @@ -994,148 +1217,9 @@ vlib_packet_template_free (vlib_main_t * vm, vlib_packet_template_t * t) vec_free (t->packet_data); } -/* Set a buffer quickly into "uninitialized" state. We want this to - be extremely cheap and arrange for all fields that need to be - initialized to be in the first 128 bits of the buffer. */ -always_inline void -vlib_buffer_init_for_free_list (vlib_buffer_t * dst, - vlib_buffer_free_list_t * fl) -{ - vlib_buffer_t *src = &fl->buffer_init_template; - - /* Make sure vlib_buffer_t is cacheline aligned and sized */ - ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline0) == 0); - ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline1) == - CLIB_CACHE_LINE_BYTES); - ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline2) == - CLIB_CACHE_LINE_BYTES * 2); - - /* Make sure buffer template is sane. */ - ASSERT (fl->index == vlib_buffer_get_free_list_index (src)); - - clib_memcpy_fast (STRUCT_MARK_PTR (dst, template_start), - STRUCT_MARK_PTR (src, template_start), - STRUCT_OFFSET_OF (vlib_buffer_t, template_end) - - STRUCT_OFFSET_OF (vlib_buffer_t, template_start)); - - /* Not in the first 16 octets. */ - dst->n_add_refs = src->n_add_refs; - vlib_buffer_set_free_list_index (dst, fl->index); - - /* Make sure it really worked. */ -#define _(f) ASSERT (dst->f == src->f); - _(current_data); - _(current_length); - _(flags); -#undef _ - /* ASSERT (dst->total_length_not_including_first_buffer == 0); */ - /* total_length_not_including_first_buffer is not in the template anymore - * so it may actually not zeroed for some buffers. One option is to - * uncomment the line lower (comes at a cost), the other, is to just not - * care */ - /* dst->total_length_not_including_first_buffer = 0; */ - ASSERT (dst->n_add_refs == 0); -} - -always_inline void -vlib_buffer_add_to_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * f, - u32 buffer_index, u8 do_init) -{ - vlib_buffer_pool_t *bp = vlib_buffer_pool_get (f->buffer_pool_index); - vlib_buffer_t *b; - b = vlib_get_buffer (vm, buffer_index); - if (PREDICT_TRUE (do_init)) - vlib_buffer_init_for_free_list (b, f); - vec_add1_aligned (f->buffers, buffer_index, CLIB_CACHE_LINE_BYTES); - - if (vec_len (f->buffers) > 4 * VLIB_FRAME_SIZE) - { - clib_spinlock_lock (&bp->lock); - /* keep last stored buffers, as they are more likely hot in the cache */ - vec_add_aligned (bp->buffers, f->buffers, VLIB_FRAME_SIZE, - CLIB_CACHE_LINE_BYTES); - vec_delete (f->buffers, VLIB_FRAME_SIZE, 0); - f->n_alloc -= VLIB_FRAME_SIZE; - clib_spinlock_unlock (&bp->lock); - } -} - -#if CLIB_DEBUG > 0 -extern u32 *vlib_buffer_state_validation_lock; -extern uword *vlib_buffer_state_validation_hash; -extern void *vlib_buffer_state_heap; -#endif - -static inline void -vlib_validate_buffer_in_use (vlib_buffer_t * b, u32 expected) -{ -#if CLIB_DEBUG > 0 - uword *p; - void *oldheap; - - oldheap = clib_mem_set_heap (vlib_buffer_state_heap); - - while (clib_atomic_test_and_set (vlib_buffer_state_validation_lock)) - ; - - p = hash_get (vlib_buffer_state_validation_hash, b); - - /* If we don't know about b, declare it to be in the expected state */ - if (!p) - { - hash_set (vlib_buffer_state_validation_hash, b, expected); - goto out; - } - - if (p[0] != expected) - { - void cj_stop (void); - u32 bi; - vlib_main_t *vm = &vlib_global_main; - - cj_stop (); - - bi = vlib_get_buffer_index (vm, b); - - clib_mem_set_heap (oldheap); - clib_warning ("%.6f buffer %llx (%d): %s, not %s", - vlib_time_now (vm), bi, - p[0] ? "busy" : "free", expected ? "busy" : "free"); - os_panic (); - } -out: - CLIB_MEMORY_BARRIER (); - *vlib_buffer_state_validation_lock = 0; - clib_mem_set_heap (oldheap); -#endif -} - -static inline void -vlib_validate_buffer_set_in_use (vlib_buffer_t * b, u32 expected) -{ -#if CLIB_DEBUG > 0 - void *oldheap; - - oldheap = clib_mem_set_heap (vlib_buffer_state_heap); - - while (clib_atomic_test_and_set (vlib_buffer_state_validation_lock)) - ; - - hash_set (vlib_buffer_state_validation_hash, b, expected); - - CLIB_MEMORY_BARRIER (); - *vlib_buffer_state_validation_lock = 0; - clib_mem_set_heap (oldheap); -#endif -} - -/** minimum data size of first buffer in a buffer chain */ -#define VLIB_BUFFER_CHAIN_MIN_FIRST_DATA_SIZE (256) - /** * @brief compress buffer chain in a way where the first buffer is at least - * VLIB_BUFFER_CHAIN_MIN_FIRST_DATA_SIZE long + * VLIB_BUFFER_CLONE_HEAD_SIZE long * * @param[in] vm - vlib_main * @param[in,out] first - first buffer in chain @@ -1146,19 +1230,15 @@ always_inline void vlib_buffer_chain_compress (vlib_main_t * vm, vlib_buffer_t * first, u32 ** discard_vector) { - if (first->current_length >= VLIB_BUFFER_CHAIN_MIN_FIRST_DATA_SIZE || + if (first->current_length >= VLIB_BUFFER_CLONE_HEAD_SIZE || !(first->flags & VLIB_BUFFER_NEXT_PRESENT)) { /* this is already big enough or not a chain */ return; } - /* probe free list to find allocated buffer size to avoid overfill */ - vlib_buffer_free_list_index_t index; - vlib_buffer_free_list_t *free_list = - vlib_buffer_get_buffer_free_list (vm, first, &index); - u32 want_first_size = clib_min (VLIB_BUFFER_CHAIN_MIN_FIRST_DATA_SIZE, - free_list->n_data_bytes - + u32 want_first_size = clib_min (VLIB_BUFFER_CLONE_HEAD_SIZE, + vlib_buffer_get_default_data_size (vm) - first->current_data); do { @@ -1205,9 +1285,7 @@ always_inline int vlib_buffer_chain_linearize (vlib_main_t * vm, vlib_buffer_t * first) { vlib_buffer_t *b = first; - vlib_buffer_free_list_t *fl = - vlib_buffer_get_free_list (vm, vlib_buffer_get_free_list_index (b)); - u32 buf_len = fl->n_data_bytes; + u32 buf_len = vlib_buffer_get_default_data_size (vm); // free buffer chain starting from the second buffer int free_count = (b->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; u32 chain_to_free = b->next_buffer; diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h index 2163b72de5e0..7e488783fc5e 100644 --- a/src/vlib/buffer_node.h +++ b/src/vlib/buffer_node.h @@ -383,7 +383,7 @@ vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node, #ifdef CLIB_HAVE_VEC512 if (n_enqueued >= 32) { - clib_memcpy_fast (to_next, buffers, 32 * sizeof (u32)); + vlib_buffer_copy_indices (to_next, buffers, 32); nexts += 32; to_next += 32; buffers += 32; @@ -397,7 +397,7 @@ vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node, #ifdef CLIB_HAVE_VEC256 if (n_enqueued >= 16) { - clib_memcpy_fast (to_next, buffers, 16 * sizeof (u32)); + vlib_buffer_copy_indices (to_next, buffers, 16); nexts += 16; to_next += 16; buffers += 16; @@ -411,7 +411,7 @@ vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node, #ifdef CLIB_HAVE_VEC128 if (n_enqueued >= 8) { - clib_memcpy_fast (to_next, buffers, 8 * sizeof (u32)); + vlib_buffer_copy_indices (to_next, buffers, 8); nexts += 8; to_next += 8; buffers += 8; @@ -424,7 +424,7 @@ vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node, if (n_enqueued >= 4) { - clib_memcpy_fast (to_next, buffers, 4 * sizeof (u32)); + vlib_buffer_copy_indices (to_next, buffers, 4); nexts += 4; to_next += 4; buffers += 4; @@ -459,7 +459,7 @@ vlib_buffer_enqueue_to_single_next (vlib_main_t * vm, if (PREDICT_TRUE (n_left_to_next >= count)) { - clib_memcpy_fast (to_next, buffers, count * sizeof (u32)); + vlib_buffer_copy_indices (to_next, buffers, count); n_left_to_next -= count; vlib_put_next_frame (vm, node, next_index, n_left_to_next); return; @@ -467,7 +467,7 @@ vlib_buffer_enqueue_to_single_next (vlib_main_t * vm, n_enq = n_left_to_next; next: - clib_memcpy_fast (to_next, buffers, n_enq * sizeof (u32)); + vlib_buffer_copy_indices (to_next, buffers, n_enq); n_left_to_next -= n_enq; if (PREDICT_FALSE (count > n_enq)) diff --git a/src/vlib/cli.c b/src/vlib/cli.c index 4c33914e8ccb..4e8f3ae5ae58 100644 --- a/src/vlib/cli.c +++ b/src/vlib/cli.c @@ -1465,7 +1465,8 @@ elog_trace_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; int enable = 1; - int api = 0, cli = 0, barrier = 0; + int api = 0, cli = 0, barrier = 0, dispatch = 0, circuit = 0; + u32 circuit_node_index; if (!unformat_user (input, unformat_line_input, line_input)) goto print_status; @@ -1474,6 +1475,11 @@ elog_trace_command_fn (vlib_main_t * vm, { if (unformat (line_input, "api")) api = 1; + else if (unformat (line_input, "dispatch")) + dispatch = 1; + else if (unformat (line_input, "circuit-node %U", + unformat_vlib_node, vm, &circuit_node_index)) + circuit = 1; else if (unformat (line_input, "cli")) cli = 1; else if (unformat (line_input, "barrier")) @@ -1489,8 +1495,28 @@ elog_trace_command_fn (vlib_main_t * vm, vm->elog_trace_api_messages = api ? enable : vm->elog_trace_api_messages; vm->elog_trace_cli_commands = cli ? enable : vm->elog_trace_cli_commands; + vm->elog_trace_graph_dispatch = dispatch ? + enable : vm->elog_trace_graph_dispatch; + vm->elog_trace_graph_circuit = circuit ? + enable : vm->elog_trace_graph_circuit; vlib_worker_threads->barrier_elog_enabled = barrier ? enable : vlib_worker_threads->barrier_elog_enabled; + vm->elog_trace_graph_circuit_node_index = circuit_node_index; + + /* + * Set up start-of-buffer logic-analyzer trigger + * for main loop event logs, which are fairly heavyweight. + * See src/vlib/main/vlib_elog_main_loop_event(...), which + * will fully disable the scheme when the elog buffer fills. + */ + if (dispatch || circuit) + { + elog_main_t *em = &vm->elog_main; + + em->n_total_events_disable_limit = + em->n_total_events + vec_len (em->event_ring); + } + print_status: vlib_cli_output (vm, "Current status:"); @@ -1502,6 +1528,16 @@ elog_trace_command_fn (vlib_main_t * vm, vlib_cli_output (vm, " Barrier sync trace: %s", vlib_worker_threads->barrier_elog_enabled ? "on" : "off"); + vlib_cli_output + (vm, " Graph Dispatch: %s", + vm->elog_trace_graph_dispatch ? "on" : "off"); + vlib_cli_output + (vm, " Graph Circuit: %s", + vm->elog_trace_graph_circuit ? "on" : "off"); + if (vm->elog_trace_graph_circuit) + vlib_cli_output + (vm, " node %U", + format_vlib_node_name, vm, vm->elog_trace_graph_circuit_node_index); return 0; } @@ -1515,6 +1551,8 @@ elog_trace_command_fn (vlib_main_t * vm, * @clistart * elog trace api cli barrier * elog trace api cli barrier disable + * elog trace dispatch + * elog trace circuit-node ethernet-input * elog trace * @cliend * @cliexcmd{elog trace [api][cli][barrier][disable]} @@ -1523,7 +1561,8 @@ elog_trace_command_fn (vlib_main_t * vm, VLIB_CLI_COMMAND (elog_trace_command, static) = { .path = "elog trace", - .short_help = "elog trace [api][cli][barrier][disable]", + .short_help = "elog trace [api][cli][barrier][dispatch]\n" + "[circuit-node e.g. ethernet-input][disable]", .function = elog_trace_command_fn, }; /* *INDENT-ON* */ diff --git a/src/vlib/error.c b/src/vlib/error.c index 691e6396d971..a416649cfa79 100644 --- a/src/vlib/error.c +++ b/src/vlib/error.c @@ -109,37 +109,6 @@ vlib_error_drop_buffers (vlib_main_t * vm, return n_buffers; } -/* Convenience node to drop a vector of buffers with a "misc error". */ -static uword -misc_drop_buffers (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return vlib_error_drop_buffers (vm, node, vlib_frame_vector_args (frame), - /* buffer stride */ 1, - frame->n_vectors, - /* next */ 0, - node->node_index, - /* error */ 0); -} - -static char *misc_drop_buffers_error_strings[] = { - [0] = "misc. errors", -}; - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (misc_drop_buffers_node,static) = { - .function = misc_drop_buffers, - .name = "misc-drop-buffers", - .vector_size = sizeof (u32), - .n_errors = 1, - .n_next_nodes = 1, - .next_nodes = { - "error-drop", - }, - .error_strings = misc_drop_buffers_error_strings, -}; -/* *INDENT-ON* */ - void vlib_stats_register_error_index (u8 *, u64 *, u64) __attribute__ ((weak)); void diff --git a/src/vlib/format.c b/src/vlib/format.c index 79a4d6866db9..ee730bd1c28c 100644 --- a/src/vlib/format.c +++ b/src/vlib/format.c @@ -187,6 +187,30 @@ unformat_vlib_number_by_name (unformat_input_t * input, va_list * args) return p != 0; } +/* Parse a filename to dump debug info */ +uword +unformat_vlib_tmpfile (unformat_input_t * input, va_list * args) +{ + u8 **chroot_filename = va_arg (*args, u8 **); + u8 *filename; + + if (!unformat (input, "%s", &filename)) + return 0; + + /* Brain-police user path input */ + if (strstr ((char *) filename, "..") || index ((char *) filename, '/')) + { + vec_free (filename); + return 0; + } + + *chroot_filename = format (0, "/tmp/%s%c", filename, 0); + vec_free (filename); + + return 1; +} + + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vlib/format_funcs.h b/src/vlib/format_funcs.h index f60b8940d14f..30e919d7e96c 100644 --- a/src/vlib/format_funcs.h +++ b/src/vlib/format_funcs.h @@ -59,6 +59,9 @@ uword unformat_vlib_number_by_name (unformat_input_t * input, va_list * args); /* Parse an int either %d or 0x%x. */ uword unformat_vlib_number (unformat_input_t * input, va_list * args); +/* Parse a filename to dump debug info */ +uword unformat_vlib_tmpfile (unformat_input_t * input, va_list * args); + /* Flag to format_vlib_*_header functions to tell them not to recurse into the next layer's header. For example, tells format_vlib_ethernet_header not to format ip header. */ diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index b99f54f2a62b..ec35adb1c37d 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -173,6 +173,24 @@ vlib_pci_get_numa_node (vlib_main_t * vm, vlib_pci_dev_handle_t h) return d->numa_node; } +u32 +vlib_pci_get_num_msix_interrupts (vlib_main_t * vm, vlib_pci_dev_handle_t h) +{ + linux_pci_device_t *d = linux_pci_get_device (h); + + if (d->type == LINUX_PCI_DEVICE_TYPE_VFIO) + { + struct vfio_irq_info ii = { 0 }; + + ii.argsz = sizeof (struct vfio_irq_info); + ii.index = VFIO_PCI_MSIX_IRQ_INDEX; + if (ioctl (d->fd, VFIO_DEVICE_GET_IRQ_INFO, &ii) < 0) + return 0; + return ii.count; + } + return 0; +} + /* Call to allocate/initialize the pci subsystem. This is not an init function so that users can explicitly enable pci only when it's needed. */ @@ -466,8 +484,8 @@ vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr, clib_memset (&ifr, 0, sizeof ifr); clib_memset (&drvinfo, 0, sizeof drvinfo); ifr.ifr_data = (char *) &drvinfo; - strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name)); - ifr.ifr_name[ARRAY_LEN (ifr.ifr_name) - 1] = '\0'; + clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1); + drvinfo.cmd = ETHTOOL_GDRVINFO; if (ioctl (fd, SIOCETHTOOL, &ifr) < 0) { @@ -482,8 +500,8 @@ vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr, continue; clib_memset (&ifr, 0, sizeof (ifr)); - strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name)); - ifr.ifr_name[ARRAY_LEN (ifr.ifr_name) - 1] = '\0'; + clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1); + if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0) { error = clib_error_return_unix (0, "ioctl fetch intf %s flags", @@ -967,7 +985,7 @@ add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p, { vlib_buffer_pool_t *bp; /* *INDENT-OFF* */ - vec_foreach (bp, buffer_main.buffer_pools) + vec_foreach (bp, vm->buffer_main->buffer_pools) { u32 i; vlib_physmem_map_t *pm; diff --git a/src/vlib/linux/vmbus.c b/src/vlib/linux/vmbus.c index 2af62241d4dc..c1d8eb9b7158 100644 --- a/src/vlib/linux/vmbus.c +++ b/src/vlib/linux/vmbus.c @@ -157,7 +157,7 @@ vlib_vmbus_raise_lower (int fd, const char *upper_name) u8 *dev_net_dir; DIR *dir; - memset (&ifr, 0, sizeof (ifr)); + clib_memset (&ifr, 0, sizeof (ifr)); dev_net_dir = format (0, "%s/%s%c", sysfs_class_net_path, upper_name, 0); @@ -175,7 +175,7 @@ vlib_vmbus_raise_lower (int fd, const char *upper_name) if (strncmp (e->d_name, "lower_", 6)) continue; - strncpy (ifr.ifr_name, e->d_name + 6, IFNAMSIZ); + strncpy (ifr.ifr_name, e->d_name + 6, IFNAMSIZ - 1); break; } closedir (dir); @@ -249,8 +249,8 @@ vlib_vmbus_bind_to_uio (vlib_vmbus_addr_t * addr) } - memset (&ifr, 0, sizeof (ifr)); - strncpy (ifr.ifr_name, ifname, IFNAMSIZ); + clib_memset (&ifr, 0, sizeof (ifr)); + strncpy (ifr.ifr_name, ifname, IFNAMSIZ - 1); /* read up/down flags */ fd = socket (PF_INET, SOCK_DGRAM, 0); diff --git a/src/vlib/log.c b/src/vlib/log.c index 76490fcd3bcb..9a0d75fe3ac8 100644 --- a/src/vlib/log.c +++ b/src/vlib/log.c @@ -232,6 +232,8 @@ vlib_log_register_class (char *class, char *subclass) vlib_log_class_data_t *tmp; vec_foreach (tmp, lm->classes) { + if (vec_len (tmp->name) != strlen (class)) + continue; if (!memcmp (class, tmp->name, vec_len (tmp->name))) { c = tmp; @@ -689,7 +691,7 @@ test_log_class_subclass (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_test_log, static) = { .path = "test log", - .short_help = "test log -typedef u32 vlib_log_class_t; - #define foreach_vlib_log_level \ _(0, EMERG, emerg) \ _(1, ALERT, alert) \ diff --git a/src/vlib/main.c b/src/vlib/main.c index ae78c4301348..eed627cf2150 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -463,7 +463,7 @@ vlib_put_next_frame (vlib_main_t * vm, vlib_frame_t *f; u32 n_vectors_in_frame; - if (buffer_main.callbacks_registered == 0 && CLIB_DEBUG > 0) + if (CLIB_DEBUG > 0) vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left); nf = vlib_node_runtime_get_next_frame (vm, r, next_index); @@ -543,15 +543,17 @@ never_inline void vlib_node_runtime_sync_stats (vlib_main_t * vm, vlib_node_runtime_t * r, uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks) + uword n_ticks0, uword n_ticks1) { vlib_node_t *n = vlib_get_node (vm, r->node_index); n->stats_total.calls += n_calls + r->calls_since_last_overflow; n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow; n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow; - n->stats_total.perf_counter_ticks += n_ticks + - r->perf_counter_ticks_since_last_overflow; + n->stats_total.perf_counter0_ticks += n_ticks0 + + r->perf_counter0_ticks_since_last_overflow; + n->stats_total.perf_counter1_ticks += n_ticks1 + + r->perf_counter1_ticks_since_last_overflow; n->stats_total.perf_counter_vectors += n_vectors + r->perf_counter_vectors_since_last_overflow; n->stats_total.max_clock = r->max_clock; @@ -560,7 +562,8 @@ vlib_node_runtime_sync_stats (vlib_main_t * vm, r->calls_since_last_overflow = 0; r->vectors_since_last_overflow = 0; r->clocks_since_last_overflow = 0; - r->perf_counter_ticks_since_last_overflow = 0ULL; + r->perf_counter0_ticks_since_last_overflow = 0ULL; + r->perf_counter1_ticks_since_last_overflow = 0ULL; r->perf_counter_vectors_since_last_overflow = 0ULL; } @@ -568,12 +571,12 @@ always_inline void __attribute__ ((unused)) vlib_process_sync_stats (vlib_main_t * vm, vlib_process_t * p, uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks) + uword n_ticks0, uword n_ticks1) { vlib_node_runtime_t *rt = &p->node_runtime; vlib_node_t *n = vlib_get_node (vm, rt->node_index); vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks, - n_ticks); + n_ticks0, n_ticks1); n->stats_total.suspends += p->n_suspends; p->n_suspends = 0; } @@ -599,7 +602,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index); - vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0); + vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0, 0); /* Sync up runtime next frame vector counters with main node structure. */ { @@ -620,27 +623,30 @@ vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks) + uword n_ticks0, uword n_ticks1) { u32 ca0, ca1, v0, v1, cl0, cl1, r; - u32 ptick0, ptick1, pvec0, pvec1; + u32 ptick00, ptick01, ptick10, ptick11, pvec0, pvec1; cl0 = cl1 = node->clocks_since_last_overflow; ca0 = ca1 = node->calls_since_last_overflow; v0 = v1 = node->vectors_since_last_overflow; - ptick0 = ptick1 = node->perf_counter_ticks_since_last_overflow; + ptick00 = ptick01 = node->perf_counter0_ticks_since_last_overflow; + ptick10 = ptick11 = node->perf_counter1_ticks_since_last_overflow; pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow; ca1 = ca0 + n_calls; v1 = v0 + n_vectors; cl1 = cl0 + n_clocks; - ptick1 = ptick0 + n_ticks; + ptick01 = ptick00 + n_ticks0; + ptick11 = ptick10 + n_ticks1; pvec1 = pvec0 + n_vectors; node->calls_since_last_overflow = ca1; node->clocks_since_last_overflow = cl1; node->vectors_since_last_overflow = v1; - node->perf_counter_ticks_since_last_overflow = ptick1; + node->perf_counter0_ticks_since_last_overflow = ptick01; + node->perf_counter1_ticks_since_last_overflow = ptick11; node->perf_counter_vectors_since_last_overflow = pvec1; node->max_clock_n = node->max_clock > n_clocks ? @@ -649,38 +655,39 @@ vlib_node_runtime_update_stats (vlib_main_t * vm, r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors); - if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick1 < ptick0) - || (pvec1 < pvec0)) + if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick01 < ptick00) + || (ptick11 < ptick10) || (pvec1 < pvec0)) { node->calls_since_last_overflow = ca0; node->clocks_since_last_overflow = cl0; node->vectors_since_last_overflow = v0; - node->perf_counter_ticks_since_last_overflow = ptick0; + node->perf_counter0_ticks_since_last_overflow = ptick00; + node->perf_counter1_ticks_since_last_overflow = ptick10; node->perf_counter_vectors_since_last_overflow = pvec0; vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks, - n_ticks); + n_ticks0, n_ticks1); } return r; } -static inline u64 -vlib_node_runtime_perf_counter (vlib_main_t * vm) +static inline void +vlib_node_runtime_perf_counter (vlib_main_t * vm, u64 * pmc0, u64 * pmc1) { + *pmc0 = 0; + *pmc1 = 0; if (PREDICT_FALSE (vm->vlib_node_runtime_perf_counter_cb != 0)) - return ((*vm->vlib_node_runtime_perf_counter_cb) (vm)); - return 0ULL; + (*vm->vlib_node_runtime_perf_counter_cb) (vm, pmc0, pmc1); } always_inline void vlib_process_update_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks) + uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_runtime_update_stats (vm, &p->node_runtime, - n_calls, n_vectors, n_clocks, n_ticks); + n_calls, n_vectors, n_clocks, 0ULL, 0ULL); } static clib_error_t * @@ -905,18 +912,35 @@ vlib_elog_main_loop_event (vlib_main_t * vm, { vlib_main_t *evm = &vlib_global_main; elog_main_t *em = &evm->elog_main; + int enabled = evm->elog_trace_graph_dispatch | + evm->elog_trace_graph_circuit; - if (VLIB_ELOG_MAIN_LOOP && n_vectors) - elog_track (em, - /* event type */ - vec_elt_at_index (is_return - ? evm->node_return_elog_event_types - : evm->node_call_elog_event_types, - node_index), - /* track */ - (vm->thread_index ? &vlib_worker_threads[vm->thread_index]. - elog_track : &em->default_track), - /* data to log */ n_vectors); + if (PREDICT_FALSE (enabled && n_vectors)) + { + if (PREDICT_FALSE (!elog_is_enabled (em))) + { + evm->elog_trace_graph_dispatch = 0; + evm->elog_trace_graph_circuit = 0; + return; + } + if (PREDICT_TRUE + (evm->elog_trace_graph_dispatch || + (evm->elog_trace_graph_circuit && + node_index == evm->elog_trace_graph_circuit_node_index))) + { + elog_track (em, + /* event type */ + vec_elt_at_index (is_return + ? evm->node_return_elog_event_types + : evm->node_call_elog_event_types, + node_index), + /* track */ + (vm->thread_index ? + &vlib_worker_threads[vm->thread_index].elog_track + : &em->default_track), + /* data to log */ n_vectors); + } + } } #if VLIB_BUFFER_TRACE_TRAJECTORY > 0 @@ -980,14 +1004,12 @@ format_buffer_metadata (u8 * s, va_list * args) (i32) (b->current_data), (i32) (b->current_length)); s = format (s, "current_config_index: %d, flow_id: %x, next_buffer: %x\n", b->current_config_index, b->flow_id, b->next_buffer); - s = format (s, "error: %d, n_add_refs: %d, buffer_pool_index: %d\n", - (u32) (b->error), (u32) (b->n_add_refs), + s = format (s, "error: %d, ref_count: %d, buffer_pool_index: %d\n", + (u32) (b->error), (u32) (b->ref_count), (u32) (b->buffer_pool_index)); s = format (s, - "trace_index: %d, recycle_count: %d, len_not_first_buf: %d\n", - b->trace_index, b->recycle_count, - b->total_length_not_including_first_buffer); - s = format (s, "free_list_index: %d\n", (u32) (b->free_list_index)); + "trace_index: %d, len_not_first_buf: %d\n", + b->trace_index, b->total_length_not_including_first_buffer); return s; } @@ -1111,6 +1133,7 @@ dispatch_node (vlib_main_t * vm, u64 t; vlib_node_main_t *nm = &vm->node_main; vlib_next_frame_t *nf; + u64 pmc_before[2], pmc_after[2], pmc_delta[2]; if (CLIB_DEBUG > 0) { @@ -1146,142 +1169,142 @@ dispatch_node (vlib_main_t * vm, vm->cpu_time_last_node_dispatch = last_time_stamp; - if (1 /* || vm->thread_index == node->thread_index */ ) - { - u64 pmc_before, pmc_delta; - - vlib_elog_main_loop_event (vm, node->node_index, - last_time_stamp, - frame ? frame->n_vectors : 0, - /* is_after */ 0); + vlib_elog_main_loop_event (vm, node->node_index, + last_time_stamp, frame ? frame->n_vectors : 0, + /* is_after */ 0); - /* - * To validate accounting: pmc_before = last_time_stamp - * perf ticks should equal clocks/pkt... - */ - pmc_before = vlib_node_runtime_perf_counter (vm); + vlib_node_runtime_perf_counter (vm, &pmc_before[0], &pmc_before[1]); - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See ixge.c... - */ - if (VLIB_BUFFER_TRACE_TRAJECTORY && frame) - { - int i; - u32 *from; - from = vlib_frame_vector_args (frame); - for (i = 0; i < frame->n_vectors; i++) - { - vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); - add_trajectory_trace (b, node->node_index); - } - if (PREDICT_FALSE (vm->dispatch_pcap_enable)) - dispatch_pcap_trace (vm, node, frame); - n = node->function (vm, node, frame); - } - else + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See ixge.c... + */ + if (VLIB_BUFFER_TRACE_TRAJECTORY && frame) + { + int i; + u32 *from; + from = vlib_frame_vector_args (frame); + for (i = 0; i < frame->n_vectors; i++) { - if (PREDICT_FALSE (vm->dispatch_pcap_enable)) - dispatch_pcap_trace (vm, node, frame); - n = node->function (vm, node, frame); + vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); + add_trajectory_trace (b, node->node_index); } + if (PREDICT_FALSE (vm->dispatch_pcap_enable)) + dispatch_pcap_trace (vm, node, frame); + n = node->function (vm, node, frame); + } + else + { + if (PREDICT_FALSE (vm->dispatch_pcap_enable)) + dispatch_pcap_trace (vm, node, frame); + n = node->function (vm, node, frame); + } - t = clib_cpu_time_now (); + t = clib_cpu_time_now (); - /* - * To validate accounting: pmc_delta = t - pmc_before; - * perf ticks should equal clocks/pkt... - */ - pmc_delta = vlib_node_runtime_perf_counter (vm) - pmc_before; - - vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */ - 1); - - vm->main_loop_vectors_processed += n; - vm->main_loop_nodes_processed += n > 0; - - v = vlib_node_runtime_update_stats (vm, node, - /* n_calls */ 1, - /* n_vectors */ n, - /* n_clocks */ t - last_time_stamp, - pmc_delta /* PMC ticks */ ); - - /* When in interrupt mode and vector rate crosses threshold switch to - polling mode. */ - if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT) - || (dispatch_state == VLIB_NODE_STATE_POLLING - && (node->flags - & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))) - { -#ifdef DISPATCH_NODE_ELOG_REQUIRED - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "%s vector length %d, switching to %s",.format_args = - "T4i4t4",.n_enum_strings = 2,.enum_strings = - { - "interrupt", "polling",},}; - struct - { - u32 node_name, vector_length, is_polling; - } *ed; - vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index; -#endif + /* + * To validate accounting: pmc_delta = t - pmc_before; + * perf ticks should equal clocks/pkt... + */ + vlib_node_runtime_perf_counter (vm, &pmc_after[0], &pmc_after[1]); + + pmc_delta[0] = pmc_after[0] - pmc_before[0]; + pmc_delta[1] = pmc_after[1] - pmc_before[1]; + + vlib_elog_main_loop_event (vm, node->node_index, t, n, 1 /* is_after */ ); + + vm->main_loop_vectors_processed += n; + vm->main_loop_nodes_processed += n > 0; + + v = vlib_node_runtime_update_stats (vm, node, + /* n_calls */ 1, + /* n_vectors */ n, + /* n_clocks */ t - last_time_stamp, + pmc_delta[0] /* PMC0 */ , + pmc_delta[1] /* PMC1 */ ); + + /* When in interrupt mode and vector rate crosses threshold switch to + polling mode. */ + if (PREDICT_FALSE ((dispatch_state == VLIB_NODE_STATE_INTERRUPT) + || (dispatch_state == VLIB_NODE_STATE_POLLING + && (node->flags + & + VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)))) + { + /* *INDENT-OFF* */ + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__, + .format = "%s vector length %d, switching to %s", + .format_args = "T4i4t4", + .n_enum_strings = 2, + .enum_strings = { + "interrupt", "polling", + }, + }; + /* *INDENT-ON* */ + struct + { + u32 node_name, vector_length, is_polling; + } *ed; - if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT - && v >= nm->polling_threshold_vector_length) && - !(node->flags & - VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) + if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT + && v >= nm->polling_threshold_vector_length) && + !(node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) + { + vlib_node_t *n = vlib_get_node (vm, node->node_index); + n->state = VLIB_NODE_STATE_POLLING; + node->state = VLIB_NODE_STATE_POLLING; + node->flags &= + ~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE; + node->flags |= VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE; + nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] -= 1; + nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] += 1; + + if (PREDICT_FALSE (vlib_global_main.elog_trace_graph_dispatch)) { - vlib_node_t *n = vlib_get_node (vm, node->node_index); - n->state = VLIB_NODE_STATE_POLLING; - node->state = VLIB_NODE_STATE_POLLING; - node->flags &= - ~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE; - node->flags |= - VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE; - nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] -= 1; - nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] += 1; + vlib_worker_thread_t *w = vlib_worker_threads + + vm->thread_index; -#ifdef DISPATCH_NODE_ELOG_REQUIRED ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); ed->node_name = n->name_elog_string; ed->vector_length = v; ed->is_polling = 1; -#endif } - else if (dispatch_state == VLIB_NODE_STATE_POLLING - && v <= nm->interrupt_threshold_vector_length) + } + else if (dispatch_state == VLIB_NODE_STATE_POLLING + && v <= nm->interrupt_threshold_vector_length) + { + vlib_node_t *n = vlib_get_node (vm, node->node_index); + if (node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) { - vlib_node_t *n = vlib_get_node (vm, node->node_index); - if (node->flags & - VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) - { - /* Switch to interrupt mode after dispatch in polling one more time. - This allows driver to re-enable interrupts. */ - n->state = VLIB_NODE_STATE_INTERRUPT; - node->state = VLIB_NODE_STATE_INTERRUPT; - node->flags &= - ~VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE; - nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] -= - 1; - nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] += - 1; + /* Switch to interrupt mode after dispatch in polling one more time. + This allows driver to re-enable interrupts. */ + n->state = VLIB_NODE_STATE_INTERRUPT; + node->state = VLIB_NODE_STATE_INTERRUPT; + node->flags &= + ~VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE; + nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] -= 1; + nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] += 1; - } - else + } + else + { + vlib_worker_thread_t *w = vlib_worker_threads + + vm->thread_index; + node->flags |= + VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE; + if (PREDICT_FALSE (vlib_global_main.elog_trace_graph_dispatch)) { - node->flags |= - VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE; -#ifdef DISPATCH_NODE_ELOG_REQUIRED ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); ed->node_name = n->name_elog_string; ed->vector_length = v; ed->is_polling = 0; -#endif } } } @@ -1544,8 +1567,7 @@ dispatch_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp, - /* pmc_ticks */ 0ULL); + /* n_clocks */ t - last_time_stamp); return t; } @@ -1628,8 +1650,7 @@ dispatch_suspended_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp, - /* pmc_ticks */ 0ULL); + /* n_clocks */ t - last_time_stamp); return t; } @@ -1679,8 +1700,8 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (!nm->interrupt_threshold_vector_length) nm->interrupt_threshold_vector_length = 5; - /* Make sure the performance monitor counter is disabled */ - vm->perf_counter_id = ~0; + vm->cpu_id = clib_get_current_cpu_id (); + vm->numa_node = clib_get_current_numa_node (); /* Start all processes. */ if (is_main) @@ -1775,9 +1796,30 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (is_main) { + /* *INDENT-OFF* */ + ELOG_TYPE_DECLARE (es) = + { + .format = "process tw start", + .format_args = "", + }; + ELOG_TYPE_DECLARE (ee) = + { + .format = "process tw end: %d", + .format_args = "i4", + }; + /* *INDENT-ON* */ + + struct + { + int nready_procs; + } *ed; + /* Check if process nodes have expired from timing wheel. */ ASSERT (nm->data_from_advancing_timing_wheel != 0); + if (PREDICT_FALSE (vm->elog_trace_graph_dispatch)) + ed = ELOG_DATA (&vlib_global_main.elog_main, es); + nm->data_from_advancing_timing_wheel = TW (tw_timer_expire_timers_vec) ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm), @@ -1785,6 +1827,13 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) ASSERT (nm->data_from_advancing_timing_wheel != 0); + if (PREDICT_FALSE (vm->elog_trace_graph_dispatch)) + { + ed = ELOG_DATA (&vlib_global_main.elog_main, ee); + ed->nready_procs = + _vec_len (nm->data_from_advancing_timing_wheel); + } + if (PREDICT_FALSE (_vec_len (nm->data_from_advancing_timing_wheel) > 0)) { @@ -1990,10 +2039,6 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) if ((error = vlib_call_all_init_functions (vm))) goto done; - /* Create default buffer free list. */ - vlib_buffer_create_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - "default"); - nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)), CLIB_CACHE_LINE_BYTES); @@ -2127,7 +2172,9 @@ pcap_dispatch_trace_command_internal (vlib_main_t * vm, } pm->n_packets_to_capture = max; } - else if (unformat (line_input, "file %s", &filename)) + else + if (unformat + (line_input, "file %U", unformat_vlib_tmpfile, &filename)) { if (vm->dispatch_pcap_enable) { @@ -2136,21 +2183,6 @@ pcap_dispatch_trace_command_internal (vlib_main_t * vm, errorFlag = 1; break; } - - /* Brain-police user path input */ - if (strstr ((char *) filename, "..") - || index ((char *) filename, '/')) - { - vlib_cli_output (vm, "illegal characters in filename '%s'", - filename); - vlib_cli_output (vm, "Hint: .. and / are not allowed."); - vec_free (filename); - errorFlag = 1; - break; - } - - chroot_filename = format (0, "/tmp/%s%c", filename, 0); - vec_free (filename); } else if (unformat (line_input, "status")) { diff --git a/src/vlib/main.h b/src/vlib/main.h index 474756bebd64..f89ecd3299f1 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -84,9 +84,8 @@ typedef struct vlib_main_t u32 node_counts_per_main_loop[2]; /* Main loop hw / sw performance counters */ - u64 (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *); - int perf_counter_id; - int perf_counter_fd; + void (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *, + u64 *, u64 *); /* Every so often we switch to the next counter. */ #define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7 @@ -116,8 +115,8 @@ typedef struct vlib_main_t /* Size of the heap */ uword heap_size; - /* Pool of buffer free lists. */ - vlib_buffer_free_list_t *buffer_free_list_pool; + /* buffer main structure. */ + vlib_buffer_main_t *buffer_main; /* physical memory main structure. */ vlib_physmem_main_t physmem_main; @@ -156,6 +155,9 @@ typedef struct vlib_main_t /* Event logger trace flags */ int elog_trace_api_messages; int elog_trace_cli_commands; + int elog_trace_graph_dispatch; + int elog_trace_graph_circuit; + u32 elog_trace_graph_circuit_node_index; /* Node call and return event types. */ elog_event_type_t *node_call_elog_event_types; @@ -172,8 +174,10 @@ typedef struct vlib_main_t /* Hash table to record which init functions have been called. */ uword *init_functions_called; - /* to compare with node runtime */ + /* thread, cpu and numa_node indices */ u32 thread_index; + u32 cpu_id; + u32 numa_node; /* List of init functions to call, setup by constructors */ _vlib_init_function_list_elt_t *init_function_registrations; diff --git a/src/vlib/node.h b/src/vlib/node.h index f41eb60aa2c4..e5d46d83665e 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -217,31 +217,9 @@ CLIB_MARCH_SFX (node##_multiarch_register) (void) \ } \ uword CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (node##_fn) -#if CLIB_DEBUG > 0 -#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn) -#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) + +/* FIXME to be removed */ #define VLIB_NODE_FUNCTION_MULTIARCH(node, fn) -#else -#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \ - uword \ - __attribute__ ((flatten)) \ - __attribute__ ((target (tgt))) \ - CLIB_CPU_OPTIMIZED \ - fn ## _ ## arch ( struct vlib_main_t * vm, \ - struct vlib_node_runtime_t * node, \ - struct vlib_frame_t * frame) \ - { return fn (vm, node, frame); } - -#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \ - foreach_march_variant(VLIB_NODE_FUNCTION_CLONE_TEMPLATE, fn) - -#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn) \ - VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \ - CLIB_MULTIARCH_SELECT_FN(fn, static inline) \ - static void __attribute__((__constructor__)) \ - __vlib_node_function_multiarch_select_##node (void) \ - { node.function = fn ## _multiarch_select(); } -#endif always_inline vlib_node_registration_t * vlib_node_next_registered (vlib_node_registration_t * c) @@ -258,7 +236,8 @@ typedef struct u64 calls, vectors, clocks, suspends; u64 max_clock; u64 max_clock_n; - u64 perf_counter_ticks; + u64 perf_counter0_ticks; + u64 perf_counter1_ticks; u64 perf_counter_vectors; } vlib_node_stats_t; @@ -507,7 +486,8 @@ typedef struct vlib_node_runtime_t u32 vectors_since_last_overflow; /**< Number of vector elements processed by this node. */ - u32 perf_counter_ticks_since_last_overflow; /**< Perf counter ticks */ + u32 perf_counter0_ticks_since_last_overflow; /**< Perf counter 0 ticks */ + u32 perf_counter1_ticks_since_last_overflow; /**< Perf counter 1 ticks */ u32 perf_counter_vectors_since_last_overflow; /**< Perf counter vectors */ u32 next_frame_index; /**< Start of next frames for this diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c index 062854af5bcb..c8e32b58a693 100644 --- a/src/vlib/node_cli.c +++ b/src/vlib/node_cli.c @@ -37,6 +37,9 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include +#include #include #include @@ -88,6 +91,81 @@ VLIB_CLI_COMMAND (show_node_graph_command, static) = { }; /* *INDENT-ON* */ +static clib_error_t * +show_node_graphviz (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + vlib_node_main_t *nm = &vm->node_main; + u8 *chroot_filename = 0; + int fd; + vlib_node_t **nodes = 0; + uword i, j; + + if (!unformat_user (input, unformat_vlib_tmpfile, &chroot_filename)) + { + fd = -1; + } + else + { + fd = + open ((char *) chroot_filename, O_CREAT | O_TRUNC | O_WRONLY, 0664); + } + +#define format__(vm__, fd__, ...) \ + if ((fd) < 0) \ + { \ + vlib_cli_output((vm__), ## __VA_ARGS__); \ + } \ + else \ + { \ + fdformat((fd__), ## __VA_ARGS__); \ + } + + format__ (vm, fd, "%s", "digraph {\n"); + + nodes = vec_dup (nm->nodes); + vec_sort_with_function (nodes, node_cmp); + + for (i = 0; i < vec_len (nodes); i++) + { + for (j = 0; j < vec_len (nodes[i]->next_nodes); j++) + { + vlib_node_t *x; + + if (nodes[i]->next_nodes[j] == VLIB_INVALID_NODE_INDEX) + continue; + + x = vec_elt (nm->nodes, nodes[i]->next_nodes[j]); + format__ (vm, fd, " \"%v\" -> \"%v\"\n", nodes[i]->name, x->name); + } + } + + format__ (vm, fd, "%s", "}"); + + if (fd >= 0) + { + vlib_cli_output (vm, + "vlib graph dumped into `%s'. Run eg. `fdp -Tsvg -O %s'.", + chroot_filename, chroot_filename); + } + + vec_free (nodes); + vec_free (chroot_filename); + vec_free (nodes); + if (fd >= 0) + close (fd); + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_node_graphviz_command, static) = { + .path = "show vlib graphviz", + .short_help = "Dump packet processing node graph as a graphviz dotfile", + .function = show_node_graphviz, +}; +/* *INDENT-ON* */ + static u8 * format_vlib_node_state (u8 * s, va_list * va) { @@ -148,8 +226,6 @@ format_vlib_node_stats (u8 * s, va_list * va) f64 maxc, maxcn; u32 maxn; u32 indent; - u64 pmc_ticks; - f64 pmc_ticks_per_packet; if (!n) { @@ -163,9 +239,6 @@ format_vlib_node_stats (u8 * s, va_list * va) "%=30s%=12s%=16s%=16s%=16s%=16s%=16s", "Name", "State", "Calls", "Vectors", "Suspends", "Clocks", "Vectors/Call"); - if (vm->perf_counter_id) - s = format (s, "%=16s", "Perf Ticks"); - return s; } @@ -182,13 +255,6 @@ format_vlib_node_stats (u8 * s, va_list * va) else maxcn = 0.0; - pmc_ticks = n->stats_total.perf_counter_ticks - - n->stats_last_clear.perf_counter_ticks; - if (p > 0) - pmc_ticks_per_packet = (f64) pmc_ticks / (f64) p; - else - pmc_ticks_per_packet = 0.0; - /* Clocks per packet, per call or per suspend. */ x = 0; if (p > 0) @@ -221,9 +287,6 @@ format_vlib_node_stats (u8 * s, va_list * va) s = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns, format_vlib_node_state, vm, n, c, p, d, x, v); - if (pmc_ticks_per_packet > 0.0) - s = format (s, "%16.2e", pmc_ticks_per_packet); - if (ns != n->name) vec_free (ns); diff --git a/src/vlib/pci/pci.h b/src/vlib/pci/pci.h index 71d4baedd20e..d3b43ba8d90d 100644 --- a/src/vlib/pci/pci.h +++ b/src/vlib/pci/pci.h @@ -103,6 +103,9 @@ vlib_pci_addr_t *vlib_pci_get_all_dev_addrs (); vlib_pci_addr_t *vlib_pci_get_addr (vlib_main_t * vm, vlib_pci_dev_handle_t h); u32 vlib_pci_get_numa_node (vlib_main_t * vm, vlib_pci_dev_handle_t h); +u32 vlib_pci_get_num_msix_interrupts (vlib_main_t * vm, + vlib_pci_dev_handle_t h); + uword vlib_pci_get_private_data (vlib_main_t * vm, vlib_pci_dev_handle_t h); void vlib_pci_set_private_data (vlib_main_t * vm, vlib_pci_dev_handle_t h, uword private_data); diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 3e184e3e1cd3..cdb4eb082b55 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -719,8 +719,6 @@ start_workers (vlib_main_t * vm) for (i = 0; i < vec_len (tm->registrations); i++) { vlib_node_main_t *nm, *nm_clone; - vlib_buffer_free_list_t *fl_clone, *fl_orig; - vlib_buffer_free_list_t *orig_freelist_pool; int k; tr = tm->registrations[i]; @@ -801,7 +799,7 @@ start_workers (vlib_main_t * vm) /* fork the frame dispatch queue */ nm_clone->pending_frames = 0; - vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */ + vec_validate (nm_clone->pending_frames, 10); _vec_len (nm_clone->pending_frames) = 0; /* fork nodes */ @@ -850,6 +848,21 @@ start_workers (vlib_main_t * vm) n->runtime_data_bytes)); } + nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = + vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], + CLIB_CACHE_LINE_BYTES); + vec_foreach (rt, + nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + rt->thread_index = vm_clone->thread_index; + /* copy initial runtime_data from node */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); + } + nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); @@ -868,26 +881,6 @@ start_workers (vlib_main_t * vm) (vlib_mains[0]->error_main.counters_last_clear, CLIB_CACHE_LINE_BYTES); - /* Fork the vlib_buffer_main_t free lists, etc. */ - orig_freelist_pool = vm_clone->buffer_free_list_pool; - vm_clone->buffer_free_list_pool = 0; - - /* *INDENT-OFF* */ - pool_foreach (fl_orig, orig_freelist_pool, - ({ - pool_get_aligned (vm_clone->buffer_free_list_pool, - fl_clone, CLIB_CACHE_LINE_BYTES); - ASSERT (fl_orig - orig_freelist_pool - == fl_clone - vm_clone->buffer_free_list_pool); - - fl_clone[0] = fl_orig[0]; - fl_clone->buffers = 0; - vec_validate(fl_clone->buffers, 0); - vec_reset_length(fl_clone->buffers); - fl_clone->n_alloc = 0; - })); -/* *INDENT-ON* */ - worker_thread_index++; } } @@ -1173,6 +1166,33 @@ vlib_worker_thread_node_refork (void) vec_free (old_rt); + /* re-clone pre-input nodes */ + old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]; + nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = + vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], + CLIB_CACHE_LINE_BYTES); + + vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + rt->thread_index = vm_clone->thread_index; + /* copy runtime_data, will be overwritten later for existing rt */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy_fast (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); + } + + for (j = 0; j < vec_len (old_rt); j++) + { + rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); + rt->state = old_rt[j].state; + clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, + VLIB_NODE_RUNTIME_DATA_SIZE); + } + + vec_free (old_rt); + nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); } diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 8be0770bfd38..6b519e5ce955 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -223,12 +223,13 @@ linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, em->epoll_events, vec_len (em->epoll_events), timeout_ms); } + } else { if (timeout_ms) usleep (timeout_ms * 1000); - return 0; + goto done; } } @@ -238,7 +239,7 @@ linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait")); /* non fatal error (e.g. EINTR). */ - return 0; + goto done; } em->epoll_waits += 1; @@ -314,6 +315,13 @@ linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } } +done: + if (PREDICT_FALSE (vm->cpu_id != clib_get_current_cpu_id ())) + { + vm->cpu_id = clib_get_current_cpu_id (); + vm->numa_node = clib_get_current_numa_node (); + } + return 0; } diff --git a/src/vlib/unix/mc_socket.c b/src/vlib/unix/mc_socket.c index c820981dae06..86ee43619006 100644 --- a/src/vlib/unix/mc_socket.c +++ b/src/vlib/unix/mc_socket.c @@ -165,7 +165,7 @@ recvmsg_helper (mc_socket_main_t * msm, vlib_main_t *vm = msm->mc_main.vlib_main; vlib_buffer_t *b; uword n_left, n_alloc, n_mtu, i, i_rx; - const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + const uword buffer_size = vlib_buffer_get_default_data_size (vm); word n_bytes_left; /* Make sure we have at least a MTU worth of buffers. */ @@ -1012,9 +1012,9 @@ mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list, msm->rx_mtu_n_bytes = mtu; msm->rx_mtu_n_buffers = - msm->rx_mtu_n_bytes / VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + msm->rx_mtu_n_bytes / vlib_buffer_get_default_data_size (vm); msm->rx_mtu_n_buffers += - (msm->rx_mtu_n_bytes % VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES) != 0; + (msm->rx_mtu_n_bytes % vlib_buffer_get_default_data_size (vm)) != 0; error = socket_setup (msm); if (error) diff --git a/src/vlib/vlib.h b/src/vlib/vlib.h index 49bb51bfbe3c..2149c4763ab5 100644 --- a/src/vlib/vlib.h +++ b/src/vlib/vlib.h @@ -48,6 +48,7 @@ /* Forward declarations of structs to avoid circular dependencies. */ struct vlib_main_t; +typedef u32 vlib_log_class_t; /* All includes in alphabetical order. */ #include diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c index 0774eea251ec..b50d79e29229 100644 --- a/src/vlibapi/node_serialize.c +++ b/src/vlibapi/node_serialize.c @@ -57,7 +57,7 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector, u8 *namep; u32 name_bytes; uword i, j, k; - u64 l, v, c, d, pmc; + u64 l, v, c, d; state_string_enum_t state_code; serialize_open_vector (sm, vector); @@ -77,8 +77,6 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector, v = n->stats_total.vectors - n->stats_last_clear.vectors; c = n->stats_total.calls - n->stats_last_clear.calls; d = n->stats_total.suspends - n->stats_last_clear.suspends; - pmc = n->stats_total.perf_counter_ticks - - n->stats_last_clear.perf_counter_ticks; state_code = STATE_INTERNAL; @@ -151,8 +149,6 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector, serialize_integer (sm, v, 8); /* Total suspends */ serialize_integer (sm, d, 8); - /* PMC counter */ - serialize_integer (sm, pmc, 8); } else /* no stats */ serialize_likely_small_unsigned_integer (sm, 0); @@ -171,7 +167,7 @@ vlib_node_unserialize (u8 * vector) vlib_node_t **nodes; vlib_node_t ***nodes_by_thread = 0; int i, j, k; - u64 l, v, c, d, pmc; + u64 l, v, c, d; state_string_enum_t state_code; int stats_present; @@ -229,9 +225,6 @@ vlib_node_unserialize (u8 * vector) /* Total suspends */ unserialize_integer (sm, &d, 8); node->stats_total.suspends = d; - /* PMC counter */ - unserialize_integer (sm, &pmc, 8); - node->stats_total.perf_counter_ticks = pmc; } } } diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c index 3add39a43285..fb1173406620 100644 --- a/src/vlibmemory/memory_client.c +++ b/src/vlibmemory/memory_client.c @@ -468,6 +468,16 @@ vl_client_connect_to_vlib_no_map (const char *svm_name, 0 /* dont map */ ); } +int +vl_client_connect_to_vlib_no_rx_pthread_no_map (const char *svm_name, + const char *client_name, + int rx_queue_size) +{ + return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, + 0 /* want pthread */ , + 0 /* dont map */ ); +} + static void disconnect_from_vlib_internal (u8 do_unmap) { diff --git a/src/vlibmemory/memory_client.h b/src/vlibmemory/memory_client.h index 6aaf6d7e5697..8400d963dcf9 100644 --- a/src/vlibmemory/memory_client.h +++ b/src/vlibmemory/memory_client.h @@ -35,6 +35,9 @@ int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name, int vl_client_connect_to_vlib_no_map (const char *svm_name, const char *client_name, int rx_queue_size); +int vl_client_connect_to_vlib_no_rx_pthread_no_map (const char *svm_name, + const char *client_name, + int rx_queue_size); void vl_client_install_client_message_handlers (void); u8 vl_mem_client_is_connected (void); diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c index 1ccd563639f7..1263f2c9433d 100644 --- a/src/vlibmemory/memory_shared.c +++ b/src/vlibmemory/memory_shared.c @@ -176,6 +176,7 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null) rv = clib_mem_alloc (nbytes); rv->q = 0; + rv->gc_mark_timestamp = 0; svm_pop_heap (oldheap); pthread_mutex_unlock (&am->vlib_rp->mutex); diff --git a/src/vlibmemory/socket_client.c b/src/vlibmemory/socket_client.c index 38bcc2a51569..ca1e53538ae7 100644 --- a/src/vlibmemory/socket_client.c +++ b/src/vlibmemory/socket_client.c @@ -304,8 +304,18 @@ static void vl_api_sock_init_shm_reply_t_handler new_name = format (0, "%v[shm]%c", scm->name, 0); vl_client_install_client_message_handlers (); - vl_client_connect_to_vlib_no_map ("pvt", (char *) new_name, - 32 /* input_queue_length */ ); + if (scm->want_shm_pthread) + { + vl_client_connect_to_vlib_no_map ("pvt", (char *) new_name, + 32 /* input_queue_length */ ); + } + else + { + vl_client_connect_to_vlib_no_rx_pthread_no_map ("pvt", + (char *) new_name, 32 + /* input_queue_length */ + ); + } vl_socket_client_enable_disable (0); vec_free (new_name); } @@ -402,13 +412,16 @@ vl_socket_client_connect (char *socket_path, char *client_name, } int -vl_socket_client_init_shm (vl_api_shm_elem_config_t * config) +vl_socket_client_init_shm (vl_api_shm_elem_config_t * config, + int want_pthread) { socket_client_main_t *scm = &socket_client_main; vl_api_sock_init_shm_t *mp; int rv, i; u64 *cfg; + scm->want_shm_pthread = want_pthread; + mp = vl_socket_client_msg_alloc (sizeof (*mp) + vec_len (config) * sizeof (u64)); clib_memset (mp, 0, sizeof (*mp)); diff --git a/src/vlibmemory/socket_client.h b/src/vlibmemory/socket_client.h index 46e2c865250a..68cd0f219944 100644 --- a/src/vlibmemory/socket_client.h +++ b/src/vlibmemory/socket_client.h @@ -40,6 +40,8 @@ typedef struct u8 *name; clib_time_t clib_time; ssvm_private_t memfd_segment; + + int want_shm_pthread; } socket_client_main_t; extern socket_client_main_t socket_client_main; @@ -53,7 +55,8 @@ int vl_socket_client_read (int wait); int vl_socket_client_write (void); void vl_socket_client_enable_disable (int enable); void *vl_socket_client_msg_alloc (int nbytes); -int vl_socket_client_init_shm (vl_api_shm_elem_config_t * config); +int vl_socket_client_init_shm (vl_api_shm_elem_config_t * config, + int want_pthread); clib_error_t *vl_socket_client_recv_fd_msg (int fds[], int n_fds, u32 wait); #endif /* SRC_VLIBMEMORY_SOCKET_CLIENT_H_ */ diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index 353bd3f7a6d9..f03c95d7eff5 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -434,7 +434,11 @@ list(APPEND VNET_API_FILES ip/punt.api ) -list(APPEND VNET_MULTIARCH_SOURCES ip/ip4_forward.c ip/ip4_input.c) +list(APPEND VNET_MULTIARCH_SOURCES + ip/ip4_forward.c + ip/ip6_forward.c + ip/ip4_input.c +) ############################################################################## # Layer 2/3 ARP @@ -480,6 +484,9 @@ list(APPEND VNET_SOURCES ipsec/ipsec_input.c ipsec/ipsec_if.c ipsec/ipsec_if_in.c + ipsec/ipsec_sa.c + ipsec/ipsec_spd.c + ipsec/ipsec_spd_policy.c ipsec/esp_format.c ipsec/esp_encrypt.c ipsec/esp_decrypt.c @@ -498,6 +505,9 @@ list(APPEND VNET_MULTIARCH_SOURCES ipsec/esp_decrypt.c ipsec/ah_decrypt.c ipsec/ah_encrypt.c + ipsec/ipsec_if_in.c + ipsec/ipsec_output.c + ipsec/ipsec_input.c ) list(APPEND VNET_API_FILES ipsec/ipsec.api) @@ -971,9 +981,13 @@ list(APPEND VNET_SOURCES devices/virtio/vhost_user_output.c devices/virtio/vhost_user_api.c devices/virtio/virtio.c + devices/virtio/virtio_api.c + devices/virtio/cli.c + devices/virtio/pci.c ) list(APPEND VNET_HEADERS + devices/virtio/pci.h devices/virtio/virtio.h devices/virtio/vhost_user.h ) @@ -983,7 +997,10 @@ list(APPEND VNET_MULTIARCH_SOURCES devices/virtio/vhost_user_output.c ) -list(APPEND VNET_API_FILES devices/virtio/vhost_user.api) +list(APPEND VNET_API_FILES + devices/virtio/vhost_user.api + devices/virtio/virtio.api +) ############################################################################## # tap interface (with virtio backend) @@ -1028,6 +1045,7 @@ list(APPEND VNET_SOURCES session/session_node.c session/transport.c session/application.c + session/application_worker.c session/session_cli.c session/application_interface.c session/application_namespace.c @@ -1039,11 +1057,11 @@ list(APPEND VNET_HEADERS session/session.h session/session_table.h session/session_rules_table.h - session/stream_session.h + session/session_types.h session/session_lookup.h session/application.h session/transport.h - session/transport_interface.h + session/transport_types.h session/application_interface.h session/application_namespace.h session/session_debug.h @@ -1082,6 +1100,7 @@ list(APPEND VNET_SOURCES list(APPEND VNET_HEADERS tls/tls.h + tls/tls_test.h ) ############################################################################## @@ -1144,18 +1163,13 @@ list(APPEND VNET_API_FILES feature/feature.api) list(APPEND VNET_SOURCES unix/gdb_funcs.c - unix/tap_api.c - unix/tapcli.c unix/tuntap.c ) list(APPEND VNET_HEADERS unix/tuntap.h - unix/tapcli.h ) -list(APPEND VNET_API_FILES unix/tap.api) - ############################################################################## # FIB ############################################################################## diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index ea584130d58e..0cc7d72c600a 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -908,7 +908,6 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt, } vlib_buffer_t *b = vlib_get_buffer (vm, bi); ASSERT (b->current_data == 0); - clib_memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b))); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); bfd_echo_pkt_t *pkt = vlib_buffer_get_current (b); clib_memset (pkt, 0, sizeof (*pkt)); @@ -982,7 +981,6 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, } vlib_buffer_t *b = vlib_get_buffer (vm, bi); ASSERT (b->current_data == 0); - clib_memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b))); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); bfd_init_control_frame (bm, bs, b); switch (bs->poll_state) diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index c587c86801f6..9f072e118370 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -345,7 +345,7 @@ typedef struct u8 data[400]; } bfd_input_trace_t; -enum +typedef enum { BFD_EVENT_RESCHEDULE = 1, BFD_EVENT_NEW_SESSION, diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index d35c6dee5e19..041e5840e0b8 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -1239,7 +1239,6 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, { b0->current_data = 0; b0->current_length = 0; - clib_memset (vnet_buffer (b0), 0, sizeof (*vnet_buffer (b0))); bfd_init_final_control_frame (vm, b0, bfd_udp_main.bfd_main, bs, 0); if (is_ipv6) diff --git a/src/vnet/bier/bier_fmask.h b/src/vnet/bier/bier_fmask.h index 0666605c23f5..2317a5094aac 100644 --- a/src/vnet/bier/bier_fmask.h +++ b/src/vnet/bier/bier_fmask.h @@ -171,7 +171,7 @@ extern void bier_fmask_encode (index_t bfmi, /* * provided for fast data-path access */ -bier_fmask_t *bier_fmask_pool; +extern bier_fmask_t *bier_fmask_pool; static inline bier_fmask_t * bier_fmask_get (u32 index) diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c index 80013428101b..636146f84492 100644 --- a/src/vnet/bonding/node.c +++ b/src/vnet/bonding/node.c @@ -21,7 +21,9 @@ #include #include +#ifndef CLIB_MARCH_VARIANT bond_main_t bond_main; +#endif /* CLIB_MARCH_VARIANT */ #define foreach_bond_input_error \ _(NONE, "no error") \ diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h index 15640071aa8f..9a194b8db0b8 100644 --- a/src/vnet/bonding/node.h +++ b/src/vnet/bonding/node.h @@ -70,7 +70,7 @@ typedef enum #undef _ } bond_load_balance_t; -enum +typedef enum { BOND_SEND_GARP_NA = 1, } bond_send_garp_na_process_event_t; diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 89dd84567bc4..06696515ecd5 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -389,7 +389,11 @@ typedef struct { u8 __unused; u8 flags; - u16 src_epg; + union + { + u16 src_epg; + u16 sclass; + }; } gbp; union diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index 243a38a42b1b..2aa52e06a621 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -192,7 +192,7 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u8 *block_start = apif->rx_ring + block * block_size; uword n_trace = vlib_get_trace_count (vm, node); u32 thread_index = vm->thread_index; - u32 n_buffer_bytes = VLIB_BUFFER_DATA_SIZE; + u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes; if (apif->per_interface_next_index != ~0) diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c index 577d4a394b22..9293c0d811f3 100644 --- a/src/vnet/devices/netmap/node.c +++ b/src/vnet/devices/netmap/node.c @@ -99,7 +99,7 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, struct netmap_ring *ring; int cur_ring; u32 thread_index = vm->thread_index; - u32 n_buffer_bytes = VLIB_BUFFER_DATA_SIZE; + u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); if (nif->per_interface_next_index != ~0) next_index = nif->per_interface_next_index; diff --git a/src/vnet/devices/pipe/pipe_api.c b/src/vnet/devices/pipe/pipe_api.c index cb67d1f04043..1dcb8bc76c65 100644 --- a/src/vnet/devices/pipe/pipe_api.c +++ b/src/vnet/devices/pipe/pipe_api.c @@ -36,7 +36,7 @@ #undef vl_printfun #include -vpe_api_main_t vpe_api_main; +extern vpe_api_main_t vpe_api_main; #define foreach_vpe_api_msg \ _(PIPE_CREATE, pipe_create) \ diff --git a/src/vnet/devices/tap/cli.c b/src/vnet/devices/tap/cli.c index 9d86159c5741..ee57a72268e1 100644 --- a/src/vnet/devices/tap/cli.c +++ b/src/vnet/devices/tap/cli.c @@ -172,29 +172,6 @@ tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input, int show_descr = 0; clib_error_t *error = 0; u32 hw_if_index, *hw_if_indices = 0; - virtio_vring_t *vring; - int i, j; - struct feat_struct - { - u8 bit; - char *str; - }; - struct feat_struct *feat_entry; - - static struct feat_struct feat_array[] = { -#define _(s,b) { .str = #s, .bit = b, }, - foreach_virtio_net_features -#undef _ - {.str = NULL} - }; - - struct feat_struct *flag_entry; - static struct feat_struct flags_array[] = { -#define _(b,e,s) { .bit = b, .str = s, }, - foreach_virtio_if_flag -#undef _ - {.str = NULL} - }; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -220,81 +197,8 @@ tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-ON* */ } - for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++) - { - vnet_hw_interface_t *hi = - vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]); - vif = pool_elt_at_index (mm->interfaces, hi->dev_instance); - vlib_cli_output (vm, "interface %U", format_vnet_sw_if_index_name, - vnm, vif->sw_if_index); - if (vif->host_if_name) - vlib_cli_output (vm, " name \"%s\"", vif->host_if_name); - if (vif->net_ns) - vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns); - vlib_cli_output (vm, " flags 0x%x", vif->flags); - flag_entry = (struct feat_struct *) &flags_array; - while (flag_entry->str) - { - if (vif->flags & (1ULL << flag_entry->bit)) - vlib_cli_output (vm, " %s (%d)", flag_entry->str, - flag_entry->bit); - flag_entry++; - } - vlib_cli_output (vm, " fd %d", vif->fd); - vlib_cli_output (vm, " tap-fd %d", vif->tap_fd); - vlib_cli_output (vm, " features 0x%lx", vif->features); - feat_entry = (struct feat_struct *) &feat_array; - while (feat_entry->str) - { - if (vif->features & (1ULL << feat_entry->bit)) - vlib_cli_output (vm, " %s (%d)", feat_entry->str, - feat_entry->bit); - feat_entry++; - } - vlib_cli_output (vm, " remote-features 0x%lx", vif->remote_features); - feat_entry = (struct feat_struct *) &feat_array; - while (feat_entry->str) - { - if (vif->remote_features & (1ULL << feat_entry->bit)) - vlib_cli_output (vm, " %s (%d)", feat_entry->str, - feat_entry->bit); - feat_entry++; - } - vec_foreach_index (i, vif->vrings) - { - // RX = 0, TX = 1 - vring = vec_elt_at_index (vif->vrings, i); - vlib_cli_output (vm, " Virtqueue (%s)", (i & 1) ? "TX" : "RX"); - vlib_cli_output (vm, - " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d", - vring->size, vring->last_used_idx, vring->desc_next, - vring->desc_in_use); - vlib_cli_output (vm, - " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", - vring->avail->flags, vring->avail->idx, - vring->used->flags, vring->used->idx); - vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd, - vring->call_fd); - if (show_descr) - { - vlib_cli_output (vm, "\n descriptor table:\n"); - vlib_cli_output (vm, - " id addr len flags next user_addr\n"); - vlib_cli_output (vm, - " ===== ================== ===== ====== ===== ==================\n"); - vring = vif->vrings; - for (j = 0; j < vring->size; j++) - { - struct vring_desc *desc = &vring->desc[j]; - vlib_cli_output (vm, - " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", - j, desc->addr, - desc->len, - desc->flags, desc->next, desc->addr); - } - } - } - } + virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TAP); + done: vec_free (hw_if_indices); return error; diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c index d0ed58c1f06f..101576c274b9 100644 --- a/src/vnet/devices/tap/tap.c +++ b/src/vnet/devices/tap/tap.c @@ -130,7 +130,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) _IOCTL (vif->fd, VHOST_GET_FEATURES, &vif->remote_features); - if ((vif->remote_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) == 0) + if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF)) == 0) { args->rv = VNET_API_ERROR_UNSUPPORTED; args->error = clib_error_return (0, "vhost-net backend doesn't support " @@ -138,7 +138,8 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) goto error; } - if ((vif->remote_features & (1ULL << VIRTIO_RING_F_INDIRECT_DESC)) == 0) + if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC)) == + 0) { args->rv = VNET_API_ERROR_UNSUPPORTED; args->error = clib_error_return (0, "vhost-net backend doesn't support " @@ -146,7 +147,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) goto error; } - if ((vif->remote_features & (1ULL << VIRTIO_F_VERSION_1)) == 0) + if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1)) == 0) { args->rv = VNET_API_ERROR_UNSUPPORTED; args->error = clib_error_return (0, "vhost-net backend doesn't support " @@ -154,9 +155,11 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) goto error; } - vif->features |= 1ULL << VIRTIO_NET_F_MRG_RXBUF; - vif->features |= 1ULL << VIRTIO_F_VERSION_1; - vif->features |= 1ULL << VIRTIO_RING_F_INDIRECT_DESC; + vif->features |= VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF); + vif->features |= VIRTIO_FEATURE (VIRTIO_F_VERSION_1); + vif->features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC); + + virtio_set_net_hdr_size (vif); _IOCTL (vif->fd, VHOST_SET_FEATURES, &vif->features); @@ -349,6 +352,8 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) } vif->rx_ring_sz = args->rx_ring_sz != 0 ? args->rx_ring_sz : 256; vif->tx_ring_sz = args->tx_ring_sz != 0 ? args->tx_ring_sz : 256; + clib_memcpy (vif->mac_addr, args->mac_addr, 6); + vif->host_if_name = args->host_if_name; args->host_if_name = 0; vif->net_ns = args->host_namespace; @@ -363,9 +368,10 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) if (args->host_ip6_prefix_len) clib_memcpy (&vif->host_ip6_addr, &args->host_ip6_addr, 16); + vif->type = VIRTIO_IF_TYPE_TAP; args->error = ethernet_register_interface (vnm, virtio_device_class.index, vif->dev_instance, - args->mac_addr, + vif->mac_addr, &vif->hw_if_index, virtio_eth_flag_change); if (args->error) @@ -386,7 +392,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, 0, VNET_HW_INTERFACE_RX_MODE_DEFAULT); vif->per_interface_next_index = ~0; - vif->type = VIRTIO_IF_TYPE_TAP; + virtio_vring_set_numa_node (vm, vif, 0); vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; vnet_hw_interface_set_flags (vnm, vif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); @@ -433,6 +439,9 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index) vif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + if (vif->type != VIRTIO_IF_TYPE_TAP) + return VNET_API_ERROR_INVALID_INTERFACE; + /* bring down the interface */ vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0); @@ -469,6 +478,8 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids) /* *INDENT-OFF* */ pool_foreach (vif, mm->interfaces, + if (vif->type != VIRTIO_IF_TYPE_TAP) + continue; vec_add2(r_tapids, tapid, 1); clib_memset (tapid, 0, sizeof (*tapid)); tapid->id = vif->id; diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c new file mode 100644 index 000000000000..92e7e93831a7 --- /dev/null +++ b/src/vnet/devices/virtio/cli.c @@ -0,0 +1,205 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static clib_error_t * +virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + virtio_pci_create_if_args_t args; + u32 tmp; + u64 feature_mask = (u64) ~ (0ULL); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + memset (&args, 0, sizeof (args)); + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_vlib_pci_addr, &args.addr)) + ; + else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask)) + args.features = feature_mask; + else if (unformat (line_input, "rx-queue-size %u", &tmp)) + args.rxq_size = tmp; + else if (unformat (line_input, "tx-queue-size %u", &tmp)) + args.txq_size = tmp; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + virtio_pci_create_if (vm, &args); + + return args.error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (virtio_pci_create_command, static) = { + .path = "create interface virtio", + .short_help = "create interface virtio " + "[feature-mask ] [rx-queue-size ] [tx-queue-size ]", + .function = virtio_pci_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index = ~0; + vnet_hw_interface_t *hw; + virtio_main_t *vim = &virtio_main; + virtio_if_t *vif; + vnet_main_t *vnm = vnet_get_main (); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (sw_if_index == ~0) + return clib_error_return (0, + "please specify interface name or sw_if_index"); + + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hw == NULL || virtio_device_class.index != hw->dev_class_index) + return clib_error_return (0, "not a virtio interface"); + + vif = pool_elt_at_index (vim->interfaces, hw->dev_instance); + + if (virtio_pci_delete_if (vm, vif) < 0) + return clib_error_return (0, "not a virtio pci interface"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (virtio_pci_delete_command, static) = { + .path = "delete interface virtio", + .short_help = "delete interface virtio " + "{ | sw_if_index }", + .function = virtio_pci_delete_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_virtio_pci_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + virtio_main_t *vim = &virtio_main; + vnet_main_t *vnm = &vnet_main; + virtio_if_t *vif; + clib_error_t *error = 0; + u32 hw_if_index, *hw_if_indices = 0; + vnet_hw_interface_t *hi; + u8 show_descr = 0, show_device_config = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + { + hi = vnet_get_hw_interface (vnm, hw_if_index); + if (virtio_device_class.index != hi->dev_class_index) + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + vec_add1 (hw_if_indices, hw_if_index); + } + else if (unformat (input, "descriptors") || unformat (input, "desc")) + show_descr = 1; + else if (unformat (input, "debug-device")) + show_device_config = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (vec_len (hw_if_indices) == 0) + { + pool_foreach (vif, vim->interfaces, + vec_add1 (hw_if_indices, vif->hw_if_index); + ); + } + else if (show_device_config) + { + vif = pool_elt_at_index (vim->interfaces, hi->dev_instance); + if (vif->type == VIRTIO_IF_TYPE_PCI) + debug_device_config_space (vm, vif); + } + + virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_PCI); + +done: + vec_free (hw_if_indices); + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_virtio_pci_command, static) = { + .path = "show virtio pci", + .short_help = "show virtio pci [] [descriptors | desc] [debug-device]", + .function = show_virtio_pci_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +virtio_pci_cli_init (vlib_main_t * vm) +{ + virtio_main_t *vim = &virtio_main; + vim->log_default = vlib_log_register_class ("virtio-pci", 0); + return 0; +} + +VLIB_INIT_FUNCTION (virtio_pci_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c index 7c66a60cf999..aa6a342f90b9 100644 --- a/src/vnet/devices/virtio/device.c +++ b/src/vnet/devices/virtio/device.c @@ -18,8 +18,6 @@ #include #include #include -#include -#include #include #include @@ -36,10 +34,10 @@ _(NO_TX_QUEUES, "no tx queues") typedef enum { -#define _(f,s) TAP_TX_ERROR_##f, +#define _(f,s) VIRTIO_TX_ERROR_##f, foreach_virtio_tx_func_error #undef _ - TAP_TX_N_ERROR, + VIRTIO_TX_N_ERROR, } virtio_tx_func_error_t; static char *virtio_tx_func_error_strings[] = { @@ -56,11 +54,13 @@ format_virtio_device_name (u8 * s, va_list * args) virtio_if_t *vif = pool_elt_at_index (mm->interfaces, dev_instance); if (vif->type == VIRTIO_IF_TYPE_TAP) - { - s = format (s, "tap%u", vif->id); - } + s = format (s, "tap%u", vif->id); + else if (vif->type == VIRTIO_IF_TYPE_PCI) + s = format (s, "virtio-%x/%x/%x/%x", vif->pci_addr.domain, + vif->pci_addr.bus, vif->pci_addr.slot, + vif->pci_addr.function); else - s = format (s, "virtio%lu", vif->dev_instance); + s = format (s, "virtio-%lu", vif->dev_instance); return s; } @@ -104,13 +104,6 @@ virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring) { struct vring_used_elem *e = &vring->used->ring[last & mask]; u16 slot = e->id; - struct vring_desc *d = &vring->desc[slot]; - - if (PREDICT_FALSE (d->flags & VRING_DESC_F_INDIRECT)) - { - d = uword_to_pointer (d->addr, struct vring_desc *); - vec_free (d); - } vlib_buffer_free (vm, &vring->buffers[slot], 1); used--; @@ -122,11 +115,12 @@ virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring) } static_always_inline u16 -add_buffer_to_slot (vlib_main_t * vm, virtio_vring_t * vring, u32 bi, - u16 avail, u16 next, u16 mask) +add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif, + virtio_vring_t * vring, u32 bi, u16 avail, u16 next, + u16 mask) { u16 n_added = 0; - const int hdr_sz = sizeof (struct virtio_net_hdr_v1); + int hdr_sz = vif->virtio_net_hdr_sz; struct vring_desc *d; d = &vring->desc[next]; vlib_buffer_t *b = vlib_get_buffer (vm, bi); @@ -136,31 +130,85 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_vring_t * vring, u32 bi, if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)) { - d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; + d->addr = + ((vif->type == VIRTIO_IF_TYPE_PCI) ? vlib_buffer_get_current_pa (vm, + b) : + pointer_to_uword (vlib_buffer_get_current (b))) - hdr_sz; d->len = b->current_length + hdr_sz; d->flags = 0; } else { - struct vring_desc *id, *descs = 0; - - /* first buffer in chain */ - vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES); - id->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; - id->len = b->current_length + hdr_sz; - - while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + /* + * We are using single vlib_buffer_t for indirect descriptor(s) + * chain. Single descriptor is 16 bytes and vlib_buffer_t + * has 2048 bytes space. So maximum long chain can have 128 + * (=2048/16) indirect descriptors. + * It can easily support 65535 bytes of Jumbo frames with + * each data buffer size of 512 bytes minimum. + */ + vlib_buffer_t *indirect_desc = + vlib_get_buffer (vm, vring->indirect_buffers[next]); + indirect_desc->current_data = 0; + + struct vring_desc *id = + (struct vring_desc *) vlib_buffer_get_current (indirect_desc); + u32 count = 1; + if (vif->type == VIRTIO_IF_TYPE_PCI) { - id->flags = VRING_DESC_F_NEXT; - id->next = vec_len (descs); - vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES); - b = vlib_get_buffer (vm, b->next_buffer); - id->addr = pointer_to_uword (vlib_buffer_get_current (b)); - id->len = b->current_length; + d->addr = vlib_physmem_get_pa (vm, id); + id->addr = vlib_buffer_get_current_pa (vm, b) - hdr_sz; + + /* + * If VIRTIO_F_ANY_LAYOUT is not negotiated, then virtio_net_hdr + * should be presented in separate descriptor and data will start + * from next descriptor. + */ + if (PREDICT_TRUE + (vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT))) + id->len = b->current_length + hdr_sz; + else + { + id->len = hdr_sz; + id->flags = VRING_DESC_F_NEXT; + id->next = count; + count++; + id++; + id->addr = vlib_buffer_get_current_pa (vm, b); + id->len = b->current_length; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + id->flags = VRING_DESC_F_NEXT; + id->next = count; + count++; + id++; + b = vlib_get_buffer (vm, b->next_buffer); + id->addr = vlib_buffer_get_current_pa (vm, b); + id->len = b->current_length; + } } - - d->addr = pointer_to_uword (descs); - d->len = vec_len (descs) * sizeof (struct vring_desc); + else /* VIRTIO_IF_TYPE_TAP */ + { + d->addr = pointer_to_uword (id); + /* first buffer in chain */ + id->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; + id->len = b->current_length + hdr_sz; + + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + id->flags = VRING_DESC_F_NEXT; + id->next = count; + count++; + id++; + b = vlib_get_buffer (vm, b->next_buffer); + id->addr = pointer_to_uword (vlib_buffer_get_current (b)); + id->len = b->current_length; + } + } + id->flags = 0; + id->next = 0; + d->len = count * sizeof (struct vring_desc); d->flags = VRING_DESC_F_INDIRECT; } vring->buffers[next] = bi; @@ -184,8 +232,8 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, clib_spinlock_lock_if_init (&vif->lockp); if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0 && - vring->last_kick_avail_idx != vring->avail->idx) - virtio_kick (vring); + (vring->last_kick_avail_idx != vring->avail->idx)) + virtio_kick (vm, vring, vif); /* free consumed buffers */ virtio_free_used_desc (vm, vring); @@ -196,8 +244,11 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left && used < sz) { - u16 n_added; - n_added = add_buffer_to_slot (vm, vring, buffers[0], avail, next, mask); + u16 n_added = 0; + n_added = + add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask); + if (!n_added) + break; avail += n_added; next = (next + n_added) & mask; used += n_added; @@ -212,13 +263,12 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vring->desc_next = next; vring->desc_in_use = used; if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) - virtio_kick (vring); + virtio_kick (vm, vring, vif); } - if (n_left) { - vlib_error_count (vm, node->node_index, TAP_TX_ERROR_NO_FREE_SLOTS, + vlib_error_count (vm, node->node_index, VIRTIO_TX_ERROR_NO_FREE_SLOTS, n_left); vlib_buffer_free (vm, buffers, n_left); } @@ -274,6 +324,12 @@ virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance); virtio_vring_t *vring = vec_elt_at_index (vif->vrings, qid); + if (vif->type == VIRTIO_IF_TYPE_PCI && !(vif->support_int_mode)) + { + vring->avail->flags |= VIRTIO_RING_FLAG_MASK_INT; + return clib_error_return (0, "interrupt mode is not supported"); + } + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) vring->avail->flags |= VIRTIO_RING_FLAG_MASK_INT; else @@ -313,7 +369,7 @@ VNET_DEVICE_CLASS (virtio_device_class) = { .format_device_name = format_virtio_device_name, .format_device = format_virtio_device, .format_tx_trace = format_virtio_tx_trace, - .tx_function_n_errors = TAP_TX_N_ERROR, + .tx_function_n_errors = VIRTIO_TX_N_ERROR, .tx_function_error_strings = virtio_tx_func_error_strings, .rx_redirect_to_node = virtio_set_interface_next_node, .clear_counters = virtio_clear_hw_interface_counters, diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index d7a0b3964b4c..6b82c418ffb3 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -21,8 +21,6 @@ #include #include #include -#include -#include #include #include @@ -40,10 +38,10 @@ typedef enum { -#define _(f,s) TAP_INPUT_ERROR_##f, +#define _(f,s) VIRTIO_INPUT_ERROR_##f, foreach_virtio_input_error #undef _ - TAP_INPUT_N_ERROR, + VIRTIO_INPUT_N_ERROR, } virtio_input_error_t; static char *virtio_input_error_strings[] = { @@ -80,9 +78,9 @@ format_virtio_input_trace (u8 * s, va_list * args) } static_always_inline void -virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring) +virtio_refill_vring (vlib_main_t * vm, virtio_if_t * vif, + virtio_vring_t * vring, const int hdr_sz) { - const int hdr_sz = sizeof (struct virtio_net_hdr_v1); u16 used, next, avail, n_slots; u16 sz = vring->size; u16 mask = sz - 1; @@ -98,8 +96,10 @@ virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring) next = vring->desc_next; avail = vring->avail->idx; - n_slots = vlib_buffer_alloc_to_ring (vm, vring->buffers, next, vring->size, - n_slots); + n_slots = + vlib_buffer_alloc_to_ring_from_pool (vm, vring->buffers, next, + vring->size, n_slots, + vring->buffer_pool_index); if (n_slots == 0) return; @@ -108,8 +108,19 @@ virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring) { struct vring_desc *d = &vring->desc[next];; vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]); - d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; - d->len = VLIB_BUFFER_DATA_SIZE + hdr_sz; + /* + * current_data may not be initialized with 0 and may contain + * previous offset. Here we want to make sure, it should be 0 + * initialized. + */ + b->current_data = 0; + b->current_data -= hdr_sz; + memset (vlib_buffer_get_current (b), 0, hdr_sz); + d->addr = + ((vif->type == VIRTIO_IF_TYPE_PCI) ? vlib_buffer_get_current_pa (vm, + b) : + pointer_to_uword (vlib_buffer_get_current (b))); + d->len = vlib_buffer_get_default_data_size (vm) + hdr_sz; d->flags = VRING_DESC_F_WRITE; vring->avail->ring[avail & mask] = next; avail++; @@ -123,7 +134,9 @@ virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring) vring->desc_in_use = used; if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) - virtio_kick (vring); + { + virtio_kick (vm, vring, vif); + } goto more; } @@ -136,7 +149,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_trace = vlib_get_trace_count (vm, node); virtio_vring_t *vring = vec_elt_at_index (vif->vrings, 0); u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - const int hdr_sz = sizeof (struct virtio_net_hdr_v1); + const int hdr_sz = vif->virtio_net_hdr_sz; u32 *to_next = 0; u32 n_rx_packets = 0; u32 n_rx_bytes = 0; @@ -146,7 +159,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0 && vring->last_kick_avail_idx != vring->avail->idx) - virtio_kick (vring); + virtio_kick (vm, vring, vif); if (n_left == 0) goto refill; @@ -159,17 +172,18 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left && n_left_to_next) { - u16 num_buffers; + u16 num_buffers = 1; struct vring_used_elem *e = &vring->used->ring[last & mask]; struct virtio_net_hdr_v1 *hdr; u16 slot = e->id; u16 len = e->len - hdr_sz; u32 bi0 = vring->buffers[slot]; vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - hdr = vlib_buffer_get_current (b0) - hdr_sz; - num_buffers = hdr->num_buffers; + hdr = vlib_buffer_get_current (b0); + if (hdr_sz == sizeof (struct virtio_net_hdr_v1)) + num_buffers = hdr->num_buffers; - b0->current_data = 0; + b0->current_data += hdr_sz; b0->current_length = len; b0->total_length_not_including_first_buffer = 0; b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -189,7 +203,6 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, cb = vlib_get_buffer (vm, cbi); /* current buffer */ - cb->current_data = -hdr_sz; cb->current_length = e->len; /* previous buffer */ @@ -253,7 +266,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_rx_bytes); refill: - virtio_refill_vring (vm, vring); + virtio_refill_vring (vm, vif, vring, hdr_sz); return n_rx_packets; } @@ -289,7 +302,7 @@ VLIB_REGISTER_NODE (virtio_input_node) = { .format_trace = format_virtio_input_trace, .type = VLIB_NODE_TYPE_INPUT, .state = VLIB_NODE_STATE_INTERRUPT, - .n_errors = TAP_INPUT_N_ERROR, + .n_errors = VIRTIO_INPUT_N_ERROR, .error_strings = virtio_input_error_strings, }; diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c new file mode 100644 index 000000000000..020b088d3460 --- /dev/null +++ b/src/vnet/devices/virtio/pci.c @@ -0,0 +1,1021 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define PCI_VENDOR_ID_VIRTIO 0x1af4 +#define PCI_DEVICE_ID_VIRTIO_NIC 0x1000 +/* Doesn't support modern device */ +#define PCI_DEVICE_ID_VIRTIO_NIC_MODERN 0x1041 + +#define PCI_CAPABILITY_LIST 0x34 +#define PCI_CAP_ID_VNDR 0x09 +#define PCI_CAP_ID_MSIX 0x11 + +#define PCI_MSIX_ENABLE 0x8000 + +#define PCI_CONFIG_SIZE(vif) ((vif->msix_enabled == VIRTIO_MSIX_ENABLED) ? \ + 24 : 20) + +static pci_device_id_t virtio_pci_device_ids[] = { + { + .vendor_id = PCI_VENDOR_ID_VIRTIO, + .device_id = PCI_DEVICE_ID_VIRTIO_NIC}, + { + .vendor_id = PCI_VENDOR_ID_VIRTIO, + .device_id = PCI_DEVICE_ID_VIRTIO_NIC_MODERN}, + {0}, +}; + +static void +virtio_pci_legacy_read_config (vlib_main_t * vm, virtio_if_t * vif, void *dst, + int len, u32 addr) +{ + u32 size = 0; + vlib_pci_dev_handle_t h = vif->pci_dev_handle; + + while (len > 0) + { + if (len >= 4) + { + size = 4; + vlib_pci_read_io_u32 (vm, h, PCI_CONFIG_SIZE (vif) + addr, dst); + } + else if (len >= 2) + { + size = 2; + vlib_pci_read_io_u16 (vm, h, PCI_CONFIG_SIZE (vif) + addr, dst); + } + else + { + size = 1; + vlib_pci_read_io_u8 (vm, h, PCI_CONFIG_SIZE (vif) + addr, dst); + } + dst = (u8 *) dst + size; + addr += size; + len -= size; + } +} + +static void +virtio_pci_legacy_write_config (vlib_main_t * vm, virtio_if_t * vif, + void *src, int len, u32 addr) +{ + u32 size = 0; + vlib_pci_dev_handle_t h = vif->pci_dev_handle; + + while (len > 0) + { + if (len >= 4) + { + size = 4; + vlib_pci_write_io_u32 (vm, h, PCI_CONFIG_SIZE (vif) + addr, src); + } + else if (len >= 2) + { + size = 2; + vlib_pci_write_io_u16 (vm, h, PCI_CONFIG_SIZE (vif) + addr, src); + } + else + { + size = 1; + vlib_pci_write_io_u8 (vm, h, PCI_CONFIG_SIZE (vif) + addr, src); + } + src = (u8 *) src + size; + addr += size; + len -= size; + } +} + +static u64 +virtio_pci_legacy_get_features (vlib_main_t * vm, virtio_if_t * vif) +{ + u32 features; + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_HOST_FEATURES, + &features); + return features; +} + +static u32 +virtio_pci_legacy_set_features (vlib_main_t * vm, virtio_if_t * vif, + u64 features) +{ + if ((features >> 32) != 0) + { + clib_warning ("only 32 bit features are allowed for legacy virtio!"); + } + u32 feature = 0, guest_features = (u32) features; + vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES, + &guest_features); + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES, + &feature); + return feature; +} + +static u8 +virtio_pci_legacy_get_status (vlib_main_t * vm, virtio_if_t * vif) +{ + u8 status = 0; + vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &status); + return status; +} + +static void +virtio_pci_legacy_set_status (vlib_main_t * vm, virtio_if_t * vif, u8 status) +{ + if (status != VIRTIO_CONFIG_STATUS_RESET) + status |= virtio_pci_legacy_get_status (vm, vif); + vlib_pci_write_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &status); +} + +static u8 +virtio_pci_legacy_reset (vlib_main_t * vm, virtio_if_t * vif) +{ + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_RESET); + return virtio_pci_legacy_get_status (vm, vif); +} + +static u8 +virtio_pci_legacy_get_isr (vlib_main_t * vm, virtio_if_t * vif) +{ + u8 isr = 0; + vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_ISR, &isr); + return isr; +} + +static u16 +virtio_pci_legacy_get_queue_num (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_id) +{ + u16 queue_num = 0; + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NUM, + &queue_num); + return queue_num; +} + + +static void +virtio_pci_legacy_setup_queue (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_id, void *p) +{ + u64 addr = vlib_physmem_get_pa (vm, p) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, + (u32 *) & addr); +} + +static void +virtio_pci_legacy_del_queue (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_id) +{ + u32 src = 0; + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, &src); +} + +inline void +virtio_pci_legacy_notify_queue (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_id) +{ + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NOTIFY, + &queue_id); +} + +/* Enable one vector (0) for Link State Intrerrupt */ +static u16 +virtio_pci_legacy_set_config_irq (vlib_main_t * vm, virtio_if_t * vif, + u16 vec) +{ + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR, + &vec); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR, + &vec); + return vec; +} + +static u16 +virtio_pci_legacy_set_queue_irq (vlib_main_t * vm, virtio_if_t * vif, u16 vec, + u16 queue_id) +{ + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR, + &vec); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR, + &vec); + return vec; +} + +static u32 +virtio_pci_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, + u32 flags) +{ + return 0; +} + +static clib_error_t * +virtio_pci_get_max_virtqueue_pairs (vlib_main_t * vm, virtio_if_t * vif) +{ + virtio_main_t *vim = &virtio_main; + virtio_net_config_t config; + clib_error_t *error = 0; + u16 max_queue_pairs = 1; + + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ)) + { + virtio_pci_legacy_read_config (vm, vif, &config.max_virtqueue_pairs, + sizeof (config.max_virtqueue_pairs), 8); + max_queue_pairs = config.max_virtqueue_pairs; + } + + virtio_log_debug (vim, vif, "max queue pair is %x", max_queue_pairs); + if (max_queue_pairs < 1 || max_queue_pairs > 0x8000) + clib_error_return (error, "max queue pair is %x", max_queue_pairs); + + vif->max_queue_pairs = max_queue_pairs; + return error; +} + +static void +virtio_pci_set_mac (vlib_main_t * vm, virtio_if_t * vif) +{ + virtio_pci_legacy_write_config (vm, vif, vif->mac_addr, + sizeof (vif->mac_addr), 0); +} + +static u32 +virtio_pci_get_mac (vlib_main_t * vm, virtio_if_t * vif) +{ + if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_MAC)) + { + virtio_pci_legacy_read_config (vm, vif, vif->mac_addr, + sizeof (vif->mac_addr), 0); + return 0; + } + return 1; +} + +static u16 +virtio_pci_is_link_up (vlib_main_t * vm, virtio_if_t * vif) +{ + /* + * Minimal driver: assumes link is up + */ + u16 status = 1; + if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_STATUS)) + virtio_pci_legacy_read_config (vm, vif, &status, sizeof (status), /* mac */ + 6); + return status; +} + +static void +virtio_pci_irq_0_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *vim = &virtio_main; + uword pd = vlib_pci_get_private_data (vm, h); + virtio_if_t *vif = pool_elt_at_index (vim->interfaces, pd); + u16 qid = line; + + vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, qid); +} + +static void +virtio_pci_irq_1_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *vim = &virtio_main; + uword pd = vlib_pci_get_private_data (vm, h); + virtio_if_t *vif = pool_elt_at_index (vim->interfaces, pd); + + if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP) + { + vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + } + else + { + vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP; + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); + } +} + +static void +virtio_pci_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h) +{ + virtio_main_t *vim = &virtio_main; + uword pd = vlib_pci_get_private_data (vm, h); + virtio_if_t *vif = pool_elt_at_index (vim->interfaces, pd); + u8 isr = 0; + u16 line = 0; + + isr = virtio_pci_legacy_get_isr (vm, vif); + + /* + * If the lower bit is set: look through the used rings of + * all virtqueues for the device, to see if any progress has + * been made by the device which requires servicing. + */ + if (isr & VIRTIO_PCI_ISR_INTR) + virtio_pci_irq_0_handler (vm, h, line); + + if (isr & VIRTIO_PCI_ISR_CONFIG) + virtio_pci_irq_1_handler (vm, h, line); +} + +inline void +device_status (vlib_main_t * vm, virtio_if_t * vif) +{ + struct status_struct + { + u8 bit; + char *str; + }; + struct status_struct *status_entry; + static struct status_struct status_array[] = { +#define _(s,b) { .str = #s, .bit = b, }, + foreach_virtio_config_status_flags +#undef _ + {.str = NULL} + }; + + vlib_cli_output (vm, " status 0x%x", vif->status); + + status_entry = (struct status_struct *) &status_array; + while (status_entry->str) + { + if (vif->status & status_entry->bit) + vlib_cli_output (vm, " %s (%x)", status_entry->str, + status_entry->bit); + status_entry++; + } +} + +inline void +debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif) +{ + u32 data_u32; + u16 data_u16; + u8 data_u8; + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_HOST_FEATURES, + &data_u32); + vlib_cli_output (vm, "remote features 0x%lx", data_u32); + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES, + &data_u32); + vlib_cli_output (vm, "guest features 0x%lx", data_u32); + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, + &data_u32); + vlib_cli_output (vm, "queue address 0x%lx", data_u32); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NUM, + &data_u16); + vlib_cli_output (vm, "queue size 0x%x", data_u16); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &data_u16); + vlib_cli_output (vm, "queue select 0x%x", data_u16); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NOTIFY, + &data_u16); + vlib_cli_output (vm, "queue notify 0x%x", data_u16); + vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &data_u8); + vlib_cli_output (vm, "status 0x%x", data_u8); + vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_ISR, &data_u8); + vlib_cli_output (vm, "isr 0x%x", data_u8); + + if (vif->msix_enabled == VIRTIO_MSIX_ENABLED) + { + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR, + &data_u16); + vlib_cli_output (vm, "config vector 0x%x", data_u16); + u16 queue_id = 0; + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR, + &data_u16); + vlib_cli_output (vm, "queue vector for queue (0) 0x%x", data_u16); + } + + u8 mac[6]; + virtio_pci_legacy_read_config (vm, vif, mac, sizeof (mac), 0); + vlib_cli_output (vm, "mac %U", format_ethernet_address, mac); + virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), /* offset to status */ + 6); + vlib_cli_output (vm, "link up/down status 0x%x", data_u16); + virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), + /* offset to max_virtqueue */ 8); + vlib_cli_output (vm, "num of virtqueue 0x%x", data_u16); + virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), /* offset to mtu */ + 10); + vlib_cli_output (vm, "mtu 0x%x", data_u16); + + u32 i = PCI_CONFIG_SIZE (vif) + 12, a = 4; + i += a; + i &= ~a; + for (; i < 64; i += 4) + { + u32 data = 0; + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, i, &data); + vlib_cli_output (vm, "0x%lx", data); + } +} + +static u8 +virtio_pci_queue_size_valid (u16 qsz) +{ + if (qsz < 64 || qsz > 4096) + return 0; + if ((qsz % 64) != 0) + return 0; + return 1; +} + +clib_error_t * +virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx) +{ + clib_error_t *error = 0; + u16 queue_size = 0; + virtio_vring_t *vring; + struct vring vr; + u32 i = 0; + void *ptr; + + queue_size = virtio_pci_legacy_get_queue_num (vm, vif, idx); + if (!virtio_pci_queue_size_valid (queue_size)) + clib_warning ("queue size is not valid"); + + if (!is_pow2 (queue_size)) + return clib_error_return (0, "ring size must be power of 2"); + + if (queue_size > 32768) + return clib_error_return (0, "ring size must be 32768 or lower"); + + if (queue_size == 0) + queue_size = 256; + + vec_validate_aligned (vif->vrings, idx, CLIB_CACHE_LINE_BYTES); + vring = vec_elt_at_index (vif->vrings, idx); + + i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN); + i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN); + ptr = vlib_physmem_alloc_aligned (vm, i, VIRTIO_PCI_VRING_ALIGN); + memset (ptr, 0, i); + vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN); + vring->desc = vr.desc; + vring->avail = vr.avail; + vring->used = vr.used; + vring->queue_id = idx; + vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT; + + ASSERT (vring->buffers == 0); + vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES); + ASSERT (vring->indirect_buffers == 0); + vec_validate_aligned (vring->indirect_buffers, queue_size, + CLIB_CACHE_LINE_BYTES); + if (idx % 2) + { + u32 n_alloc = 0; + do + { + if (n_alloc < queue_size) + n_alloc = + vlib_buffer_alloc (vm, vring->indirect_buffers + n_alloc, + queue_size - n_alloc); + } + while (n_alloc != queue_size); + vif->tx_ring_sz = queue_size; + } + else + vif->rx_ring_sz = queue_size; + vring->size = queue_size; + + virtio_pci_legacy_setup_queue (vm, vif, idx, ptr); + vring->kick_fd = -1; + + return error; +} + +static void +virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif, + u64 req_features) +{ + /* + * if features are not requested + * default: all supported features + */ + u64 supported_features = VIRTIO_FEATURE (VIRTIO_NET_F_MTU) + | VIRTIO_FEATURE (VIRTIO_NET_F_MAC) + | VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) + | VIRTIO_FEATURE (VIRTIO_NET_F_STATUS) + | VIRTIO_FEATURE (VIRTIO_F_NOTIFY_ON_EMPTY) + | VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT) + | VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC); + + if (req_features == 0) + { + req_features = supported_features; + } + + vif->features = req_features & vif->remote_features & supported_features; + + if (vif-> + remote_features & vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MTU)) + { + virtio_net_config_t config; + virtio_pci_legacy_read_config (vm, vif, &config.mtu, + sizeof (config.mtu), 10); + if (config.mtu < 64) + vif->features &= ~VIRTIO_FEATURE (VIRTIO_NET_F_MTU); + } + + vif->features = virtio_pci_legacy_set_features (vm, vif, vif->features); +} + +void +virtio_pci_read_device_feature (vlib_main_t * vm, virtio_if_t * vif) +{ + vif->remote_features = virtio_pci_legacy_get_features (vm, vif); +} + +int +virtio_pci_reset_device (vlib_main_t * vm, virtio_if_t * vif) +{ + u8 status = 0; + + /* + * Reset the device + */ + status = virtio_pci_legacy_reset (vm, vif); + + /* + * Set the Acknowledge status bit + */ + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_ACK); + + /* + * Set the Driver status bit + */ + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER); + + /* + * Read the status and verify it + */ + status = virtio_pci_legacy_get_status (vm, vif); + if (! + ((status & VIRTIO_CONFIG_STATUS_ACK) + && (status & VIRTIO_CONFIG_STATUS_DRIVER))) + return -1; + vif->status = status; + + return 0; +} + +clib_error_t * +virtio_pci_read_caps (vlib_main_t * vm, virtio_if_t * vif) +{ + clib_error_t *error = 0; + virtio_main_t *vim = &virtio_main; + struct virtio_pci_cap cap; + u8 pos, common_cfg = 0, notify_base = 0, dev_cfg = 0, isr = 0, pci_cfg = 0; + vlib_pci_dev_handle_t h = vif->pci_dev_handle; + + if ((error = vlib_pci_read_config_u8 (vm, h, PCI_CAPABILITY_LIST, &pos))) + { + virtio_log_error (vim, vif, "error in reading capabilty list position"); + clib_error_return (error, "error in reading capabilty list position"); + } + while (pos) + { + if ((error = + vlib_pci_read_write_config (vm, h, VLIB_READ, pos, &cap, + sizeof (cap)))) + { + virtio_log_error (vim, vif, "%s [%2x]", + "error in reading the capability at", pos); + clib_error_return (error, + "error in reading the capability at [%2x]", pos); + } + + if (cap.cap_vndr == PCI_CAP_ID_MSIX) + { + u16 flags, table_size, table_size_mask = 0x07FF; + + if ((error = + vlib_pci_read_write_config (vm, h, VLIB_READ, pos + 2, &flags, + sizeof (flags)))) + clib_error_return (error, + "error in reading the capability at [%2x]", + pos + 2); + + table_size = flags & table_size_mask; + virtio_log_debug (vim, vif, "flags:0x%x %s 0x%x", flags, + "msix interrupt vector table-size", table_size); + + if (flags & PCI_MSIX_ENABLE) + { + virtio_log_debug (vim, vif, "msix interrupt enabled"); + vif->msix_enabled = VIRTIO_MSIX_ENABLED; + } + else + { + virtio_log_debug (vim, vif, "msix interrupt disabled"); + vif->msix_enabled = VIRTIO_MSIX_DISABLED; + } + } + + if (cap.cap_vndr != PCI_CAP_ID_VNDR) + { + virtio_log_debug (vim, vif, "[%2x] %s %2x ", pos, + "skipping non VNDR cap id:", cap.cap_vndr); + goto next; + } + + virtio_log_debug (vim, vif, + "[%4x] cfg type: %u, bar: %u, offset: %04x, len: %u", + pos, cap.cfg_type, cap.bar, cap.offset, cap.length); + switch (cap.cfg_type) + { + case VIRTIO_PCI_CAP_COMMON_CFG: + common_cfg = 1; + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + notify_base = 1; + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + dev_cfg = 1; + break; + case VIRTIO_PCI_CAP_ISR_CFG: + isr = 1; + break; + case VIRTIO_PCI_CAP_PCI_CFG: + if (cap.bar == 0) + pci_cfg = 1; + break; + } + next: + pos = cap.cap_next; + } + + if (common_cfg == 0 || notify_base == 0 || dev_cfg == 0 || isr == 0) + { + virtio_log_debug (vim, vif, "legacy virtio pci device found"); + return error; + } + + if (!pci_cfg) + clib_error_return (error, "modern virtio pci device found"); + + virtio_log_debug (vim, vif, "transitional virtio pci device found"); + return error; +} + +static clib_error_t * +virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif, + virtio_pci_create_if_args_t * args) +{ + clib_error_t *error = 0; + virtio_main_t *vim = &virtio_main; + u8 status = 0; + + if ((error = virtio_pci_read_caps (vm, vif))) + clib_error_return (error, "Device not supported"); + + if (virtio_pci_reset_device (vm, vif) < 0) + { + virtio_log_error (vim, vif, "Failed to reset the device"); + clib_error_return (error, "Failed to reset the device"); + } + /* + * read device features and negotiate (user) requested features + */ + virtio_pci_read_device_feature (vm, vif); + virtio_negotiate_features (vm, vif, args->features); + + /* + * After FEATURE_OK, driver should not accept new feature bits + */ + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_FEATURES_OK); + status = virtio_pci_legacy_get_status (vm, vif); + if (!(status & VIRTIO_CONFIG_STATUS_FEATURES_OK)) + { + virtio_log_error (vim, vif, + "error encountered: Device doesn't support requested features"); + clib_error_return (error, "Device doesn't support requested features"); + } + vif->status = status; + + if (virtio_pci_get_mac (vm, vif)) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (vif->mac_addr + 2, &rnd, sizeof (rnd)); + vif->mac_addr[0] = 2; + vif->mac_addr[1] = 0xfe; + virtio_pci_set_mac (vm, vif); + } + + virtio_set_net_hdr_size (vif); + + if ((error = virtio_pci_get_max_virtqueue_pairs (vm, vif))) + goto error; + + if ((error = virtio_pci_vring_init (vm, vif, 0))) + goto error; + + if ((error = virtio_pci_vring_init (vm, vif, 1))) + goto error; + + if (vif->msix_enabled == VIRTIO_MSIX_ENABLED) + { + if (virtio_pci_legacy_set_config_irq (vm, vif, 1) == + VIRTIO_MSI_NO_VECTOR) + virtio_log_warning (vim, vif, "config vector 1 is not set"); + if (virtio_pci_legacy_set_queue_irq (vm, vif, 0, 0) == + VIRTIO_MSI_NO_VECTOR) + virtio_log_warning (vim, vif, "queue vector 0 is not set"); + } + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER_OK); + vif->status = virtio_pci_legacy_get_status (vm, vif); +error: + return error; +} + +void +virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *vim = &virtio_main; + virtio_if_t *vif; + vlib_pci_dev_handle_t h; + clib_error_t *error = 0; + + if (args->rxq_size == 0) + args->rxq_size = VIRTIO_NUM_RX_DESC; + if (args->txq_size == 0) + args->txq_size = VIRTIO_NUM_TX_DESC; + + if (!virtio_pci_queue_size_valid (args->rxq_size) || + !virtio_pci_queue_size_valid (args->txq_size)) + { + args->rv = VNET_API_ERROR_INVALID_VALUE; + args->error = + clib_error_return (error, + "queue size must be <= 4096, >= 64, " + "and multiples of 64"); + vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + "queue size must be <= 4096, >= 64, and multiples of 64"); + return; + } + + /* *INDENT-OFF* */ + pool_foreach (vif, vim->interfaces, ({ + if (vif->pci_addr.as_u32 == args->addr) + { + args->rv = VNET_API_ERROR_INVALID_VALUE; + args->error = + clib_error_return (error, "PCI address in use"); + vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + " PCI address in use"); + return; + } + })); + /* *INDENT-ON* */ + + pool_get (vim->interfaces, vif); + vif->dev_instance = vif - vim->interfaces; + vif->per_interface_next_index = ~0; + vif->pci_addr.as_u32 = args->addr; + + if ((error = + vlib_pci_device_open (vm, (vlib_pci_addr_t *) & vif->pci_addr, + virtio_pci_device_ids, &h))) + { + pool_put (vim->interfaces, vif); + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = + clib_error_return (error, "pci-addr %U", format_vlib_pci_addr, + &vif->pci_addr); + vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s", + format_vlib_pci_addr, &vif->pci_addr, + "error encountered on pci device open"); + return; + } + vif->pci_dev_handle = h; + vlib_pci_set_private_data (vm, h, vif->dev_instance); + + if ((error = vlib_pci_bus_master_enable (vm, h))) + { + virtio_log_error (vim, vif, + "error encountered on pci bus master enable"); + goto error; + } + + if ((error = vlib_pci_io_region (vm, h, 0))) + { + virtio_log_error (vim, vif, "error encountered on pci io region"); + goto error; + } + + if (vlib_pci_get_num_msix_interrupts (vm, h) > 1) + { + if ((error = vlib_pci_register_msix_handler (vm, h, 0, 1, + &virtio_pci_irq_0_handler))) + { + virtio_log_error (vim, vif, + "error encountered on pci register msix handler 0"); + goto error; + } + if ((error = vlib_pci_register_msix_handler (vm, h, 1, 1, + &virtio_pci_irq_1_handler))) + { + virtio_log_error (vim, vif, + "error encountered on pci register msix handler 1"); + goto error; + } + + if ((error = vlib_pci_enable_msix_irq (vm, h, 0, 2))) + { + virtio_log_error (vim, vif, + "error encountered on pci enable msix irq"); + goto error; + } + vif->support_int_mode = 1; + virtio_log_debug (vim, vif, "device supports msix interrupts"); + } + else if (vlib_pci_get_num_msix_interrupts (vm, h) == 1) + { + /* + * if msix table-size is 1, fall back to intX. + */ + if ((error = + vlib_pci_register_intx_handler (vm, h, &virtio_pci_irq_handler))) + { + virtio_log_error (vim, vif, + "error encountered on pci register interrupt handler"); + goto error; + } + vif->support_int_mode = 1; + virtio_log_debug (vim, vif, "pci register interrupt handler"); + } + else + { + /* + * WARN: intX is showing some weird behaviour. + * Please don't use interrupt mode with UIO driver. + */ + vif->support_int_mode = 0; + virtio_log_debug (vim, vif, "driver is configured in poll mode only"); + } + + if ((error = vlib_pci_intr_enable (vm, h))) + { + virtio_log_error (vim, vif, + "error encountered on pci interrupt enable"); + goto error; + } + + if ((error = virtio_pci_device_init (vm, vif, args))) + { + virtio_log_error (vim, vif, "error encountered on device init"); + goto error; + } + + vif->type = VIRTIO_IF_TYPE_PCI; + /* create interface */ + error = ethernet_register_interface (vnm, virtio_device_class.index, + vif->dev_instance, vif->mac_addr, + &vif->hw_if_index, + virtio_pci_flag_change); + + if (error) + { + virtio_log_error (vim, vif, + "error encountered on ethernet register interface"); + goto error; + } + + vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index); + vif->sw_if_index = sw->sw_if_index; + args->sw_if_index = sw->sw_if_index; + + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + vnet_hw_interface_set_input_node (vnm, vif->hw_if_index, + virtio_input_node.index); + vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0); + virtio_vring_set_numa_node (vm, vif, 0); + + vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, 0, + VNET_HW_INTERFACE_RX_MODE_POLLING); + if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP) + { + vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + } + else + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); + return; + +error: + virtio_pci_delete_if (vm, vif); + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = error; +} + +int +virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *vim = &virtio_main; + u32 i = 0; + + if (vif->type != VIRTIO_IF_TYPE_PCI) + return VNET_API_ERROR_INVALID_INTERFACE; + + vlib_pci_intr_disable (vm, vif->pci_dev_handle); + + virtio_pci_legacy_del_queue (vm, vif, 0); + virtio_pci_legacy_del_queue (vm, vif, 1); + + virtio_pci_legacy_reset (vm, vif); + + if (vif->hw_if_index) + { + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); + vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, 0); + ethernet_delete_interface (vnm, vif->hw_if_index); + } + + vlib_pci_device_close (vm, vif->pci_dev_handle); + + vec_foreach_index (i, vif->vrings) + { + virtio_vring_t *vring = vec_elt_at_index (vif->vrings, i); + if (vring->kick_fd != -1) + close (vring->kick_fd); + if (vring->used) + { + if ((i & 1) == 1) + virtio_free_used_desc (vm, vring); + else + virtio_free_rx_buffers (vm, vring); + } + if (vring->queue_id % 2) + { + vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size); + } + vec_free (vring->buffers); + vec_free (vring->indirect_buffers); + vlib_physmem_free (vm, vring->desc); + } + + vec_free (vif->vrings); + + if (vif->fd != -1) + vif->fd = -1; + if (vif->tap_fd != -1) + vif->tap_fd = -1; + clib_error_free (vif->error); + memset (vif, 0, sizeof (*vif)); + pool_put (vim->interfaces, vif); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h new file mode 100644 index 000000000000..40648c85c3e6 --- /dev/null +++ b/src/vnet/devices/virtio/pci.h @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_virtio_pci_h__ +#define __included_virtio_pci_h__ + +/* VirtIO ABI version, this must match exactly. */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* + * VirtIO Header, located in BAR 0. + */ +#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO) */ +#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ +#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ +#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ +#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ +#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ +#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading + * also clears the register (8, RO) */ +/* Only if MSIX is enabled: */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications + (16, RW) */ + +/* + * Vector value used to disable MSI for queue. + * define in include/linux/virtio_pci.h + * #define VIRTIO_MSI_NO_VECTOR 0xFFFF + */ + +/* The bit of the ISR which indicates a device has an interrupt. */ +#define VIRTIO_PCI_ISR_INTR 0x1 +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 + +/* VirtIO device IDs. */ +#define VIRTIO_ID_NETWORK 0x01 + +/* Status byte for guest to report progress. */ +#define foreach_virtio_config_status_flags \ + _ (VIRTIO_CONFIG_STATUS_RESET, 0x00) \ + _ (VIRTIO_CONFIG_STATUS_ACK, 0x01) \ + _ (VIRTIO_CONFIG_STATUS_DRIVER, 0x02) \ + _ (VIRTIO_CONFIG_STATUS_DRIVER_OK, 0x04) \ + _ (VIRTIO_CONFIG_STATUS_FEATURES_OK, 0x08) \ + _ (VIRTIO_CONFIG_STATUS_DEVICE_NEEDS_RESET, 0x40) \ + _ (VIRTIO_CONFIG_STATUS_FAILED, 0x80) + +typedef enum +{ +#define _(a, b) a = b, + foreach_virtio_config_status_flags +#undef _ +} virtio_config_status_flags_t; + +#define foreach_virtio_net_feature_flags \ + _ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \ + _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \ + _ (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, 2) /* Dynamic offload configuration. */ \ + _ (VIRTIO_NET_F_MTU, 3) /* Initial MTU advice. */ \ + _ (VIRTIO_NET_F_MAC, 5) /* Host has given MAC address. */ \ + _ (VIRTIO_NET_F_GSO, 6) /* Host handles pkts w/ any GSO. */ \ + _ (VIRTIO_NET_F_GUEST_TSO4, 7) /* Guest can handle TSOv4 in. */ \ + _ (VIRTIO_NET_F_GUEST_TSO6, 8) /* Guest can handle TSOv6 in. */ \ + _ (VIRTIO_NET_F_GUEST_ECN, 9) /* Guest can handle TSO[6] w/ ECN in. */ \ + _ (VIRTIO_NET_F_GUEST_UFO, 10) /* Guest can handle UFO in. */ \ + _ (VIRTIO_NET_F_HOST_TSO4, 11) /* Host can handle TSOv4 in. */ \ + _ (VIRTIO_NET_F_HOST_TSO6, 12) /* Host can handle TSOv6 in. */ \ + _ (VIRTIO_NET_F_HOST_ECN, 13) /* Host can handle TSO[6] w/ ECN in. */ \ + _ (VIRTIO_NET_F_HOST_UFO, 14) /* Host can handle UFO in. */ \ + _ (VIRTIO_NET_F_MRG_RXBUF, 15) /* Host can merge receive buffers. */ \ + _ (VIRTIO_NET_F_STATUS, 16) /* virtio_net_config.status available */ \ + _ (VIRTIO_NET_F_CTRL_VQ, 17) /* Control channel available */ \ + _ (VIRTIO_NET_F_CTRL_RX, 18) /* Control channel RX mode support */ \ + _ (VIRTIO_NET_F_CTRL_VLAN, 19) /* Control channel VLAN filtering */ \ + _ (VIRTIO_NET_F_CTRL_RX_EXTRA, 20) /* Extra RX mode control support */ \ + _ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21) /* Guest can announce device on the network */ \ + _ (VIRTIO_NET_F_MQ, 22) /* Device supports Receive Flow Steering */ \ + _ (VIRTIO_NET_F_CTRL_MAC_ADDR, 23) /* Set MAC address */ \ + _ (VIRTIO_F_NOTIFY_ON_EMPTY, 24) \ + _ (VHOST_F_LOG_ALL, 26) /* Log all write descriptors */ \ + _ (VIRTIO_F_ANY_LAYOUT, 27) /* Can the device handle any descripor layout */ \ + _ (VIRTIO_RING_F_INDIRECT_DESC, 28) /* Support indirect buffer descriptors */ \ + _ (VIRTIO_RING_F_EVENT_IDX, 29) /* The Guest publishes the used index for which it expects an interrupt \ + * at the end of the avail ring. Host should ignore the avail->flags field. */ \ +/* The Host publishes the avail index for which it expects a kick \ + * at the end of the used ring. Guest should ignore the used->flags field. */ \ + _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) + +#define VIRTIO_NET_F_MTU 3 +#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ + +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR Status */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific configuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 +/* PCI configuration access */ +#define VIRTIO_PCI_CAP_PCI_CFG 5 + +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +#define VIRTIO_PCI_VRING_ALIGN 4096 + +#define virtio_log_debug(vim, vif, f, ...) \ +{ \ + vlib_log(VLIB_LOG_LEVEL_DEBUG, vim->log_default, "%U: " f, \ + format_vlib_pci_addr, &vif->pci_addr, \ + ##__VA_ARGS__); \ +}; + +#define virtio_log_warning(vim, vif, f, ...) \ +{ \ + vlib_log(VLIB_LOG_LEVEL_WARNING, vim->log_default, "%U: " f, \ + format_vlib_pci_addr, &vif->pci_addr, \ + ##__VA_ARGS__); \ +}; + +#define virtio_log_error(vim, vif, f, ...) \ +{ \ + vlib_log(VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: " f, \ + format_vlib_pci_addr, &vif->pci_addr, \ + ##__VA_ARGS__); \ +}; + +typedef enum +{ + VIRTIO_MSIX_NONE = 0, + VIRTIO_MSIX_DISABLED = 1, + VIRTIO_MSIX_ENABLED = 2 +} virtio_msix_status_t; + +/* This is the PCI capability header: */ +typedef struct +{ + u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + u8 cap_next; /* Generic PCI field: next ptr. */ + u8 cap_len; /* Generic PCI field: capability length */ + u8 cfg_type; /* Identifies the structure. */ + u8 bar; /* Where to find it. */ + u8 padding[3]; /* Pad to full dword. */ + u32 offset; /* Offset within bar. */ + u32 length; /* Length of the structure, in bytes. */ +} virtio_pci_cap_t; + +typedef struct +{ + struct virtio_pci_cap cap; + u32 notify_off_multiplier; /* Multiplier for queue_notify_off. */ +} virtio_pci_notify_cap_t; + +/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ +typedef struct +{ + /* About the whole device. */ + u32 device_feature_select; /* read-write */ + u32 device_feature; /* read-only */ + u32 guest_feature_select; /* read-write */ + u32 guest_feature; /* read-write */ + u16 msix_config; /* read-write */ + u16 num_queues; /* read-only */ + u8 device_status; /* read-write */ + u8 config_generation; /* read-only */ + + /* About a specific virtqueue. */ + u16 queue_select; /* read-write */ + u16 queue_size; /* read-write, power of 2. */ + u16 queue_msix_vector; /* read-write */ + u16 queue_enable; /* read-write */ + u16 queue_notify_off; /* read-only */ + u32 queue_desc_lo; /* read-write */ + u32 queue_desc_hi; /* read-write */ + u32 queue_avail_lo; /* read-write */ + u32 queue_avail_hi; /* read-write */ + u32 queue_used_lo; /* read-write */ + u32 queue_used_hi; /* read-write */ +} virtio_pci_common_cfg_t; + +typedef struct +{ + u64 addr; + u32 len; + u16 flags; + u16 next; +} vring_desc_t; + +typedef struct +{ + u16 flags; + u16 idx; + u16 ring[0]; + /* u16 used_event; */ +} vring_avail_t; + +typedef struct +{ + u32 id; + u32 len; +} vring_used_elem_t; + +typedef struct +{ + u16 flags; + u16 idx; + vring_used_elem_t ring[0]; + /* u16 avail_event; */ +} vring_used_t; + +typedef struct +{ + u32 addr; + u16 rxq_size; + u16 txq_size; + /* return */ + i32 rv; + u32 sw_if_index; + u8 mac_addr_set; + u8 mac_addr[6]; + u64 features; + clib_error_t *error; +} virtio_pci_create_if_args_t; + +extern void debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif); +extern void device_status (vlib_main_t * vm, virtio_if_t * vif); +void virtio_pci_create_if (vlib_main_t * vm, + virtio_pci_create_if_args_t * args); +int virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * ad); + +#endif /* __included_virtio_pci_h__ */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/vhost_user_input.c b/src/vnet/devices/virtio/vhost_user_input.c index 812f9d3f6ab4..286fa7ed7441 100644 --- a/src/vnet/devices/virtio/vhost_user_input.c +++ b/src/vnet/devices/virtio/vhost_user_input.c @@ -65,7 +65,7 @@ */ #define VHOST_USER_RX_COPY_THRESHOLD 64 -vlib_node_registration_t vhost_user_input_node; +extern vlib_node_registration_t vhost_user_input_node; #define foreach_vhost_user_input_func_error \ _(NO_ERROR, "no error") \ @@ -258,6 +258,7 @@ vhost_user_if_input (vlib_main_t * vm, u32 n_left_to_next, *to_next; u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; u32 n_trace = vlib_get_trace_count (vm, node); + u32 buffer_data_size = vlib_buffer_get_default_data_size (vm); u32 map_hint = 0; vhost_cpu_t *cpu = &vum->cpus[vm->thread_index]; u16 copy_len = 0; @@ -500,8 +501,7 @@ vhost_user_if_input (vlib_main_t * vm, } /* Get more output if necessary. Or end of packet. */ - if (PREDICT_FALSE - (b_current->current_length == VLIB_BUFFER_DATA_SIZE)) + if (PREDICT_FALSE (b_current->current_length == buffer_data_size)) { if (PREDICT_FALSE (cpu->rx_buffers_len == 0)) { @@ -534,7 +534,7 @@ vhost_user_if_input (vlib_main_t * vm, vhost_copy_t *cpy = &cpu->copy[copy_len]; copy_len++; u32 desc_data_l = desc_table[desc_current].len - desc_data_offset; - cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length; + cpy->len = buffer_data_size - b_current->current_length; cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len; cpy->dst = (uword) (vlib_buffer_get_current (b_current) + b_current->current_length); diff --git a/src/vnet/devices/virtio/vhost_user_output.c b/src/vnet/devices/virtio/vhost_user_output.c index 4fbd63ea7082..c0c54d1b13a5 100644 --- a/src/vnet/devices/virtio/vhost_user_output.c +++ b/src/vnet/devices/virtio/vhost_user_output.c @@ -53,7 +53,7 @@ */ #define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 40) -vnet_device_class_t vhost_user_device_class; +extern vnet_device_class_t vhost_user_device_class; #define foreach_vhost_user_tx_func_error \ _(NONE, "no error") \ diff --git a/src/vnet/devices/virtio/virtio.api b/src/vnet/devices/virtio/virtio.api new file mode 100644 index 000000000000..cb672960afd3 --- /dev/null +++ b/src/vnet/devices/virtio/virtio.api @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +/** \brief Initialize a new virtio pci interface with the given paramters + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param pci_addr - pci address as unsigned 32bit integer: + 0-15 domain, 16-23 bus, 24-28 slot, 29-31 function + @param use_random_mac - let the system generate a unique mac address + @param mac_address - mac addr to assign to the interface if use_radom not set + @param tx_ring_sz - the number of entries of TX ring + @param rx_ring_sz - the number of entries of RX ring + @param features - the virtio features which driver should negotiate with device +*/ +define virtio_pci_create +{ + u32 client_index; + u32 context; + u32 pci_addr; + u8 use_random_mac; + u8 mac_address[6]; + u16 tx_ring_sz; /* optional, default is 256 entries, must be power of 2 */ + u16 rx_ring_sz; /* optional, default is 256 entries, must be power of 2 */ + u64 features; +}; + +/** \brief Reply for virtio pci create reply + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - software index allocated for the new virtio pci interface +*/ +define virtio_pci_create_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Delete virtio pci interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index of existing virtio pci interface +*/ +autoreply define virtio_pci_delete +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Dump virtio pci interfaces request */ +define sw_interface_virtio_pci_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for virtio pci interface dump request + @param sw_if_index - software index of virtio pci interface + @param pci_addr - pci address as unsigned 32bit integer: + 0-15 domain, 16-23 bus, 24-28 slot, 29-31 function + @param mac_addr - native virtio device mac address + @param tx_ring_sz - the number of entries of TX ring + @param rx_ring_sz - the number of entries of RX ring + @param features - the virtio features which driver have negotiated with device +*/ +define sw_interface_virtio_pci_details +{ + u32 context; + u32 sw_if_index; + u32 pci_addr; + u8 mac_addr[6]; + u16 tx_ring_sz; + u16 rx_ring_sz; + u64 features; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c index 17de781921df..cfeb30246f04 100644 --- a/src/vnet/devices/virtio/virtio.c +++ b/src/vnet/devices/virtio/virtio.c @@ -26,11 +26,13 @@ #include #include +#include #include #include #include #include #include +#include virtio_main_t virtio_main; @@ -101,6 +103,20 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz) ASSERT (vring->buffers == 0); vec_validate_aligned (vring->buffers, sz, CLIB_CACHE_LINE_BYTES); + ASSERT (vring->indirect_buffers == 0); + vec_validate_aligned (vring->indirect_buffers, sz, CLIB_CACHE_LINE_BYTES); + if (idx % 2) + { + u32 n_alloc = 0; + do + { + if (n_alloc < sz) + n_alloc = + vlib_buffer_alloc (vm, vring->indirect_buffers + n_alloc, + sz - n_alloc); + } + while (n_alloc != sz); + } vring->size = sz; vring->call_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC); @@ -136,7 +152,7 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz) return err; } -static_always_inline void +inline void virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring) { u16 used = vring->desc_in_use; @@ -171,10 +187,172 @@ virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif, u32 idx) clib_mem_free (vring->desc); if (vring->avail) clib_mem_free (vring->avail); + if (vring->queue_id % 2) + { + vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size); + } vec_free (vring->buffers); + vec_free (vring->indirect_buffers); return 0; } +void +virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif, u32 idx) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 thread_index; + virtio_vring_t *vring = vec_elt_at_index (vif->vrings, idx); + thread_index = + vnet_get_device_input_thread_index (vnm, vif->hw_if_index, + vring->queue_id); + vring->buffer_pool_index = + vlib_buffer_pool_get_default_for_numa (vm, + vlib_mains + [thread_index]->numa_node); +} + +inline void +virtio_set_net_hdr_size (virtio_if_t * vif) +{ + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) || + vif->features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1)) + vif->virtio_net_hdr_sz = sizeof (struct virtio_net_hdr_v1); + else + vif->virtio_net_hdr_sz = sizeof (struct virtio_net_hdr); +} + +inline void +virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) +{ + u32 i, j, hw_if_index; + virtio_if_t *vif; + vnet_main_t *vnm = &vnet_main; + virtio_main_t *mm = &virtio_main; + virtio_vring_t *vring; + struct feat_struct + { + u8 bit; + char *str; + }; + struct feat_struct *feat_entry; + + static struct feat_struct feat_array[] = { +#define _(s,b) { .str = #s, .bit = b, }, + foreach_virtio_net_features +#undef _ + {.str = NULL} + }; + + struct feat_struct *flag_entry; + static struct feat_struct flags_array[] = { +#define _(b,e,s) { .bit = b, .str = s, }, + foreach_virtio_if_flag +#undef _ + {.str = NULL} + }; + + if (!hw_if_indices) + return; + + for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++) + { + vnet_hw_interface_t *hi = + vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]); + vif = pool_elt_at_index (mm->interfaces, hi->dev_instance); + if (vif->type != type) + continue; + vlib_cli_output (vm, "Interface: %U (ifindex %d)", + format_vnet_hw_if_index_name, vnm, + hw_if_indices[hw_if_index], vif->hw_if_index); + if (type == VIRTIO_IF_TYPE_PCI) + { + vlib_cli_output (vm, " PCI Address: %U", format_vlib_pci_addr, + &vif->pci_addr); + } + if (type == VIRTIO_IF_TYPE_TAP) + { + if (vif->host_if_name) + vlib_cli_output (vm, " name \"%s\"", vif->host_if_name); + if (vif->net_ns) + vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns); + vlib_cli_output (vm, " fd %d", vif->fd); + vlib_cli_output (vm, " tap-fd %d", vif->tap_fd); + } + vlib_cli_output (vm, " Mac Address: %U", format_ethernet_address, + vif->mac_addr); + vlib_cli_output (vm, " Device instance: %u", vif->dev_instance); + vlib_cli_output (vm, " flags 0x%x", vif->flags); + flag_entry = (struct feat_struct *) &flags_array; + while (flag_entry->str) + { + if (vif->flags & (1ULL << flag_entry->bit)) + vlib_cli_output (vm, " %s (%d)", flag_entry->str, + flag_entry->bit); + flag_entry++; + } + if (type == VIRTIO_IF_TYPE_PCI) + { + device_status (vm, vif); + } + vlib_cli_output (vm, " features 0x%lx", vif->features); + feat_entry = (struct feat_struct *) &feat_array; + while (feat_entry->str) + { + if (vif->features & (1ULL << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, + feat_entry->bit); + feat_entry++; + } + vlib_cli_output (vm, " remote-features 0x%lx", vif->remote_features); + feat_entry = (struct feat_struct *) &feat_array; + while (feat_entry->str) + { + if (vif->remote_features & (1ULL << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, + feat_entry->bit); + feat_entry++; + } + vec_foreach_index (i, vif->vrings) + { + // RX = 0, TX = 1 + vring = vec_elt_at_index (vif->vrings, i); + vlib_cli_output (vm, " Virtqueue (%s)", (i & 1) ? "TX" : "RX"); + vlib_cli_output (vm, + " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d", + vring->size, vring->last_used_idx, vring->desc_next, + vring->desc_in_use); + vlib_cli_output (vm, + " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", + vring->avail->flags, vring->avail->idx, + vring->used->flags, vring->used->idx); + if (type == VIRTIO_IF_TYPE_TAP) + { + vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd, + vring->call_fd); + } + if (show_descr) + { + vlib_cli_output (vm, "\n descriptor table:\n"); + vlib_cli_output (vm, + " id addr len flags next user_addr\n"); + vlib_cli_output (vm, + " ===== ================== ===== ====== ===== ==================\n"); + vring = vif->vrings; + for (j = 0; j < vring->size; j++) + { + struct vring_desc *desc = &vring->desc[j]; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", + j, desc->addr, + desc->len, + desc->flags, desc->next, desc->addr); + } + } + } + } + +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h index 841441bd0c6b..af61ca5968f4 100644 --- a/src/vnet/devices/virtio/virtio.h +++ b/src/vnet/devices/virtio/virtio.h @@ -18,6 +18,11 @@ #ifndef _VNET_DEVICES_VIRTIO_VIRTIO_H_ #define _VNET_DEVICES_VIRTIO_VIRTIO_H_ +#include +#include +#include +#include + #define foreach_virtio_net_features \ _ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \ _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \ @@ -53,6 +58,7 @@ _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \ _ (VIRTIO_F_VERSION_1, 32) + #define foreach_virtio_if_flag \ _(0, ADMIN_UP, "admin-up") \ _(1, DELETING, "deleting") @@ -64,13 +70,29 @@ typedef enum #undef _ } virtio_if_flag_t; +#define VIRTIO_NUM_RX_DESC 256 +#define VIRTIO_NUM_TX_DESC 256 + +#define VIRTIO_FEATURE(X) (1ULL << X) + typedef enum { VIRTIO_IF_TYPE_TAP, + VIRTIO_IF_TYPE_PCI, VIRTIO_IF_N_TYPES, } virtio_if_type_t; +typedef struct +{ + u8 mac[6]; + u16 status; + u16 max_virtqueue_pairs; + u16 mtu; +} virtio_net_config_t; + +#define VIRTIO_RING_FLAG_MASK_INT 1 + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -81,34 +103,68 @@ typedef struct u16 desc_next; int kick_fd; int call_fd; + u8 buffer_pool_index; u16 size; -#define VIRTIO_RING_FLAG_MASK_INT 1 - u32 flags; + u16 queue_id; + u16 flags; u32 call_file_index; u32 *buffers; + u32 *indirect_buffers; u16 last_used_idx; u16 last_kick_avail_idx; } virtio_vring_t; +typedef union +{ + struct + { + u16 domain; + u8 bus; + u8 slot:5; + u8 function:3; + }; + u32 as_u32; +} pci_addr_t; + typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u32 flags; clib_spinlock_t lockp; - u32 id; u32 dev_instance; u32 hw_if_index; u32 sw_if_index; + u16 virtio_net_hdr_sz; + virtio_if_type_t type; + union + { + u32 id; + pci_addr_t pci_addr; + }; u32 per_interface_next_index; - int fd; - int tap_fd; + union + { + int fd; + u32 msix_enabled; + }; + union + { + int tap_fd; + u32 pci_dev_handle; + }; virtio_vring_t *vrings; u64 features, remote_features; - virtio_if_type_t type; + /* error */ + clib_error_t *error; + u8 support_int_mode; /* support interrupt mode */ + u16 max_queue_pairs; u16 tx_ring_sz; u16 rx_ring_sz; + u8 status; + u8 mac_addr[6]; u8 *host_if_name; u8 *net_ns; u8 *host_bridge; @@ -117,12 +173,14 @@ typedef struct u8 host_ip4_prefix_len; ip6_address_t host_ip6_addr; u8 host_ip6_prefix_len; - int ifindex; } virtio_if_t; typedef struct { + /* logging */ + vlib_log_class_t log_default; + virtio_if_t *interfaces; } virtio_main_t; @@ -134,18 +192,30 @@ clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz); clib_error_t *virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif, u32 idx); +void virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif, + u32 idx); extern void virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring); - +extern void virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring); +extern void virtio_set_net_hdr_size (virtio_if_t * vif); +extern void virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, + u32 type); +extern void virtio_pci_legacy_notify_queue (vlib_main_t * vm, + virtio_if_t * vif, u16 queue_id); format_function_t format_virtio_device_name; static_always_inline void -virtio_kick (virtio_vring_t * vring) +virtio_kick (vlib_main_t * vm, virtio_vring_t * vring, virtio_if_t * vif) { - u64 x = 1; - int __clib_unused r; - - r = write (vring->kick_fd, &x, sizeof (x)); - vring->last_kick_avail_idx = vring->avail->idx; + if (vif->type == VIRTIO_IF_TYPE_PCI) + virtio_pci_legacy_notify_queue (vm, vif, vring->queue_id); + else + { + u64 x = 1; + int __clib_unused r; + + r = write (vring->kick_fd, &x, sizeof (x)); + vring->last_kick_avail_idx = vring->avail->idx; + } } #endif /* _VNET_DEVICES_VIRTIO_VIRTIO_H_ */ diff --git a/src/vnet/devices/virtio/virtio_api.c b/src/vnet/devices/virtio/virtio_api.c new file mode 100644 index 000000000000..5035799befec --- /dev/null +++ b/src/vnet/devices/virtio/virtio_api.c @@ -0,0 +1,237 @@ +/* + *------------------------------------------------------------------ + * virtio_api.c - vnet virtio pci device driver API support + * + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_virtio_pci_api_msg \ +_(VIRTIO_PCI_CREATE, virtio_pci_create) \ +_(VIRTIO_PCI_DELETE, virtio_pci_delete) \ +_(SW_INTERFACE_VIRTIO_PCI_DUMP, sw_interface_virtio_pci_dump) + +static void +vl_api_virtio_pci_create_t_handler (vl_api_virtio_pci_create_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_virtio_pci_create_reply_t *rmp; + vl_api_registration_t *reg; + virtio_pci_create_if_args_t _a, *ap = &_a; + + clib_memset (ap, 0, sizeof (*ap)); + + ap->addr = ntohl (mp->pci_addr); + if (!mp->use_random_mac) + { + clib_memcpy (ap->mac_addr, mp->mac_address, 6); + ap->mac_addr_set = 1; + } + ap->rxq_size = ntohs (mp->rx_ring_sz); + ap->txq_size = ntohs (mp->tx_ring_sz); + ap->sw_if_index = (u32) ~ 0; + ap->features = clib_net_to_host_u64 (mp->features); + + virtio_pci_create_if (vm, ap); + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return;; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_VIRTIO_PCI_CREATE_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (ap->rv); + rmp->sw_if_index = htonl (ap->sw_if_index); + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +virtio_pci_send_sw_interface_event_deleted (vpe_api_main_t * am, + vl_api_registration_t * reg, + u32 sw_if_index) +{ + vl_api_sw_interface_event_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + clib_memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_EVENT); + mp->sw_if_index = htonl (sw_if_index); + + mp->admin_up_down = 0; + mp->link_up_down = 0; + mp->deleted = 1; + vl_api_send_msg (reg, (u8 *) mp); +} + +static void +vl_api_virtio_pci_delete_t_handler (vl_api_virtio_pci_delete_t * mp) +{ + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + virtio_main_t *vim = &virtio_main; + int rv = 0; + vnet_hw_interface_t *hw; + virtio_if_t *vif; + vpe_api_main_t *vam = &vpe_api_main; + vl_api_virtio_pci_delete_reply_t *rmp; + vl_api_registration_t *reg; + u32 sw_if_index = ntohl (mp->sw_if_index); + + hw = vnet_get_sup_hw_interface (vnm, htonl (mp->sw_if_index)); + if (hw == NULL || virtio_device_class.index != hw->dev_class_index) + { + rv = VNET_API_ERROR_INVALID_INTERFACE; + goto reply; + } + + vif = pool_elt_at_index (vim->interfaces, hw->dev_instance); + + rv = virtio_pci_delete_if (vm, vif); + +reply: + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_VIRTIO_PCI_DELETE_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (rv); + + vl_api_send_msg (reg, (u8 *) rmp); + + if (!rv) + { + virtio_pci_send_sw_interface_event_deleted (vam, reg, sw_if_index); + } +} + +static void +virtio_pci_send_sw_interface_details (vpe_api_main_t * am, + vl_api_registration_t * reg, + virtio_if_t * vif, u32 context) +{ + vl_api_sw_interface_virtio_pci_details_t *mp; + mp = vl_msg_api_alloc (sizeof (*mp)); + + clib_memset (mp, 0, sizeof (*mp)); + + mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_VIRTIO_PCI_DETAILS); + mp->pci_addr = htonl (vif->pci_addr.as_u32); + mp->sw_if_index = htonl (vif->sw_if_index); + mp->rx_ring_sz = htons (vif->rx_ring_sz); + mp->tx_ring_sz = htons (vif->tx_ring_sz); + clib_memcpy (mp->mac_addr, vif->mac_addr, 6); + mp->features = clib_host_to_net_u64 (vif->features); + + mp->context = context; + vl_api_send_msg (reg, (u8 *) mp); +} + +static void + vl_api_sw_interface_virtio_pci_dump_t_handler + (vl_api_sw_interface_virtio_pci_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + vl_api_registration_t *reg; + virtio_main_t *vmx = &virtio_main; + virtio_if_t *vif; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (vif, vmx->interfaces, ( + { + if (vif->type == VIRTIO_IF_TYPE_PCI) + { + virtio_pci_send_sw_interface_details + (am, reg, vif, mp->context);} + } + )); +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_virtio; +#undef _ +} + +static clib_error_t * +virtio_pci_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_virtio_pci_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (virtio_pci_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index 432bc8dbe686..3958af86c208 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -179,7 +179,6 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, dhcpv6_client_mac_t *cmac; // client mac ethernet_header_t *e_h0; u8 client_src_mac[6]; - vlib_buffer_free_list_t *fl; dhcp_vss_t *vss; u8 is_solicit = 0; @@ -315,12 +314,9 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, copy_ip6_address (&r1->link_addr, ia0); link_address_set: - fl = - vlib_buffer_get_free_list (vm, - vlib_buffer_get_free_list_index (b0)); if ((b0->current_length + sizeof (*id1) + sizeof (*vss1) + - sizeof (*cmac)) > fl->n_data_bytes) + sizeof (*cmac)) > vlib_buffer_get_default_data_size (vm)) { error0 = DHCPV6_PROXY_ERROR_PKT_TOO_BIG; next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; diff --git a/src/vnet/dns/dns.c b/src/vnet/dns/dns.c index 93b7fb8b9f48..15d40beb7107 100644 --- a/src/vnet/dns/dns.c +++ b/src/vnet/dns/dns.c @@ -2810,7 +2810,6 @@ vnet_send_dns4_reply (dns_main_t * dm, dns_pending_request_t * pr, * In the resolution-required / deferred case, resetting a freshly-allocated * buffer won't hurt. We hope. */ - b0->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST; b0->flags |= (VNET_BUFFER_F_LOCALLY_ORIGINATED | VLIB_BUFFER_TOTAL_LENGTH_VALID); b0->current_data = 0; diff --git a/src/vnet/dpo/dvr_dpo.c b/src/vnet/dpo/dvr_dpo.c index 4b440f81c599..a362d23e890e 100644 --- a/src/vnet/dpo/dvr_dpo.c +++ b/src/vnet/dpo/dvr_dpo.c @@ -18,6 +18,8 @@ #include #include +dvr_dpo_t *dvr_dpo_pool; + /** * The 'DB' of DVR DPOs. * There is one per-interface per-L3 proto, so this is a per-interface vector diff --git a/src/vnet/dpo/dvr_dpo.h b/src/vnet/dpo/dvr_dpo.h index 15fe113c5965..669c49201c82 100644 --- a/src/vnet/dpo/dvr_dpo.h +++ b/src/vnet/dpo/dvr_dpo.h @@ -54,7 +54,7 @@ extern void dvr_dpo_module_init(void); /** * @brief pool of all interface DPOs */ -dvr_dpo_t *dvr_dpo_pool; +extern dvr_dpo_t *dvr_dpo_pool; static inline dvr_dpo_t * dvr_dpo_get (index_t index) diff --git a/src/vnet/dpo/interface_rx_dpo.c b/src/vnet/dpo/interface_rx_dpo.c index 82767e73fc74..90868b953cac 100644 --- a/src/vnet/dpo/interface_rx_dpo.c +++ b/src/vnet/dpo/interface_rx_dpo.c @@ -17,6 +17,8 @@ #include #include +interface_rx_dpo_t *interface_rx_dpo_pool; + /* * The 'DB' of interface DPOs. * There is only one per-interface per-protocol, so this is a per-interface diff --git a/src/vnet/dpo/interface_rx_dpo.h b/src/vnet/dpo/interface_rx_dpo.h index edecce08675f..d3406a223879 100644 --- a/src/vnet/dpo/interface_rx_dpo.h +++ b/src/vnet/dpo/interface_rx_dpo.h @@ -58,7 +58,7 @@ extern void interface_rx_dpo_module_init(void); /** * @brief pool of all interface DPOs */ -interface_rx_dpo_t *interface_rx_dpo_pool; +extern interface_rx_dpo_t *interface_rx_dpo_pool; static inline interface_rx_dpo_t * interface_rx_dpo_get (index_t index) diff --git a/src/vnet/dpo/l3_proxy_dpo.h b/src/vnet/dpo/l3_proxy_dpo.h index fcc28b3bf97f..3a578e299241 100644 --- a/src/vnet/dpo/l3_proxy_dpo.h +++ b/src/vnet/dpo/l3_proxy_dpo.h @@ -52,7 +52,7 @@ extern void l3_proxy_dpo_module_init(void); /** * @brief pool of all l3_proxy DPOs */ -l3_proxy_dpo_t *l3_proxy_dpo_pool; +extern l3_proxy_dpo_t *l3_proxy_dpo_pool; static inline l3_proxy_dpo_t * l3_proxy_dpo_get (index_t index) diff --git a/src/vnet/dpo/receive_dpo.h b/src/vnet/dpo/receive_dpo.h index ee3c7836aff6..9459fbcc2790 100644 --- a/src/vnet/dpo/receive_dpo.h +++ b/src/vnet/dpo/receive_dpo.h @@ -59,7 +59,7 @@ extern void receive_dpo_module_init(void); /** * @brief pool of all receive DPOs */ -receive_dpo_t *receive_dpo_pool; +extern receive_dpo_t *receive_dpo_pool; static inline receive_dpo_t * receive_dpo_get (index_t index) diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index 13b718d4c627..dded56af9954 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include @@ -65,7 +66,7 @@ typedef struct uword type_opaque; uword data; /* Used for arp event notification only */ - void *data_callback; + arp_change_event_cb_t data_callback; u32 pid; } pending_resolution_t; @@ -103,10 +104,10 @@ static ethernet_arp_main_t ethernet_arp_main; typedef struct { u32 sw_if_index; - ethernet_arp_ip4_over_ethernet_address_t a; - int is_static; - int is_no_fib_entry; - int flags; + ip4_address_t ip4; + mac_address_t mac; + ip_neighbor_flags_t nbr_flags; + u32 flags; #define ETHERNET_ARP_ARGS_REMOVE (1<<0) #define ETHERNET_ARP_ARGS_FLUSH (1<<1) #define ETHERNET_ARP_ARGS_POPULATE (1<<2) @@ -226,9 +227,9 @@ format_ethernet_arp_header (u8 * s, va_list * va) { s = format (s, "\n%U%U/%U -> %U/%U", format_white_space, indent, - format_ethernet_address, a->ip4_over_ethernet[0].ethernet, + format_mac_address_t, &a->ip4_over_ethernet[0].mac, format_ip4_address, &a->ip4_over_ethernet[0].ip4, - format_ethernet_address, a->ip4_over_ethernet[1].ethernet, + format_mac_address_t, &a->ip4_over_ethernet[1].mac, format_ip4_address, &a->ip4_over_ethernet[1].ip4); } else @@ -260,20 +261,20 @@ format_ethernet_arp_ip4_entry (u8 * s, va_list * va) si = vnet_get_sw_interface (vnm, e->sw_if_index); - if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) + if (e->flags & IP_NEIGHBOR_FLAG_STATIC) flags = format (flags, "S"); - if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) + if (e->flags & IP_NEIGHBOR_FLAG_DYNAMIC) flags = format (flags, "D"); - if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY) + if (e->flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY) flags = format (flags, "N"); s = format (s, "%=12U%=16U%=6s%=20U%U", format_vlib_time, vnm->vlib_main, e->time_last_updated, format_ip4_address, &e->ip4_address, flags ? (char *) flags : "", - format_ethernet_address, e->ethernet_address, + format_mac_address_t, &e->mac, format_vnet_sw_interface_name, vnm, si); vec_free (flags); @@ -358,9 +359,7 @@ arp_nbr_probe (ip_adjacency_t * adj) hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); - clib_memcpy_fast (h->ip4_over_ethernet[0].ethernet, - hi->hw_address, - sizeof (h->ip4_over_ethernet[0].ethernet)); + mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address); h->ip4_over_ethernet[0].ip4 = src[0]; h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4; @@ -389,7 +388,7 @@ arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e) (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE, ethernet_build_rewrite (vnet_get_main (), e->sw_if_index, - adj_get_link_type (ai), e->ethernet_address)); + adj_get_link_type (ai), &e->mac)); } static void @@ -601,7 +600,7 @@ force_reuse_arp_entry (void) am->arp_delete_rotor = index; index = pool_next_index (am->ip4_entry_pool, index); } - while (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC); + while (e->flags & IP_NEIGHBOR_FLAG_STATIC); /* Remove ARP entry from its interface and update fib */ hash_unset @@ -621,15 +620,12 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, { ethernet_arp_ip4_entry_t *e = 0; ethernet_arp_main_t *am = ðernet_arp_main; - ethernet_arp_ip4_over_ethernet_address_t *a = &args->a; vlib_main_t *vm = vlib_get_main (); int make_new_arp_cache_entry = 1; uword *p; pending_resolution_t *pr, *mc; ethernet_arp_interface_t *arp_int; - int is_static = args->is_static; u32 sw_if_index = args->sw_if_index; - int is_no_fib_entry = args->is_no_fib_entry; vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index); @@ -637,17 +633,17 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, if (NULL != arp_int->arp_entries) { - p = hash_get (arp_int->arp_entries, a->ip4.as_u32); + p = hash_get (arp_int->arp_entries, args->ip4.as_u32); if (p) { e = pool_elt_at_index (am->ip4_entry_pool, p[0]); /* Refuse to over-write static arp. */ - if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)) + if (!(args->nbr_flags & IP_NEIGHBOR_FLAG_STATIC) && + (e->flags & IP_NEIGHBOR_FLAG_STATIC)) { /* if MAC address match, still check to send event */ - if (0 == memcmp (e->ethernet_address, - a->ethernet, sizeof (e->ethernet_address))) + if (mac_address_equal (&e->mac, &args->mac)) goto check_customers; return -2; } @@ -670,15 +666,15 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, if (NULL == arp_int->arp_entries) arp_int->arp_entries = hash_create (0, sizeof (u32)); - hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool); + hash_set (arp_int->arp_entries, args->ip4.as_u32, + e - am->ip4_entry_pool); e->sw_if_index = sw_if_index; - e->ip4_address = a->ip4; + e->ip4_address = args->ip4; e->fib_entry_index = FIB_NODE_INDEX_INVALID; - clib_memcpy_fast (e->ethernet_address, - a->ethernet, sizeof (e->ethernet_address)); + mac_address_copy (&e->mac, &args->mac); - if (!is_no_fib_entry) + if (!(args->nbr_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY)) { arp_adj_fib_add (e, ip4_fib_table_get_index_for_sw_if_index @@ -686,7 +682,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, } else { - e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY; + e->flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY; } } else @@ -695,36 +691,34 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, * prevent a DoS attack from the data-plane that * spams us with no-op updates to the MAC address */ - if (0 == memcmp (e->ethernet_address, - a->ethernet, sizeof (e->ethernet_address))) + if (mac_address_equal (&e->mac, &args->mac)) { e->time_last_updated = vlib_time_now (vm); goto check_customers; } /* Update ethernet address. */ - clib_memcpy_fast (e->ethernet_address, a->ethernet, - sizeof (e->ethernet_address)); + mac_address_copy (&e->mac, &args->mac); } /* Update time stamp and flags. */ e->time_last_updated = vlib_time_now (vm); - if (is_static) + if (args->nbr_flags & IP_NEIGHBOR_FLAG_STATIC) { - e->flags &= ~ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; - e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC; + e->flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC; + e->flags |= IP_NEIGHBOR_FLAG_STATIC; } else { - e->flags &= ~ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC; - e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; + e->flags &= ~IP_NEIGHBOR_FLAG_STATIC; + e->flags |= IP_NEIGHBOR_FLAG_DYNAMIC; } adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e); check_customers: /* Customer(s) waiting for this address to be resolved? */ - p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32); + p = hash_get (am->pending_resolutions_by_address, args->ip4.as_u32); if (p) { u32 next_index; @@ -739,11 +733,11 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, pool_put (am->pending_resolutions, pr); } - hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32); + hash_unset (am->pending_resolutions_by_address, args->ip4.as_u32); } /* Customer(s) requesting ARP event for this address? */ - p = hash_get (am->mac_changes_by_address, a->ip4.as_u32); + p = hash_get (am->mac_changes_by_address, args->ip4.as_u32); if (p) { u32 next_index; @@ -751,14 +745,12 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, while (next_index != (u32) ~ 0) { - int (*fp) (u32, u8 *, u32, u32); int rv = 1; mc = pool_elt_at_index (am->mac_changes, next_index); - fp = mc->data_callback; /* Call the user's data callback, return 1 to suppress dup events */ - if (fp) - rv = (*fp) (mc->data, a->ethernet, sw_if_index, 0); + if (mc->data_callback) + rv = (mc->data_callback) (mc->data, &args->mac, sw_if_index, 0); /* * Signal the resolver process, as long as the user @@ -807,7 +799,7 @@ vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, - void *data_callback, + arp_change_event_cb_t data_callback, u32 pid, void *address_arg, uword node_index, @@ -836,11 +828,17 @@ vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; pool_get (am->mac_changes, mc); + /* *INDENT-OFF* */ *mc = (pending_resolution_t) { - .next_index = ~0,.node_index = node_index,.type_opaque = - type_opaque,.data = data,.data_callback = data_callback,.pid = - pid,}; + .next_index = ~0, + .node_index = node_index, + .type_opaque = type_opaque, + .data = data, + .data_callback = data_callback, + .pid = pid, + }; + /* *INDENT-ON* */ /* Insert new resolution at the end of the list */ u32 new_idx = mc - am->mac_changes; @@ -855,9 +853,9 @@ vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, return VNET_API_ERROR_NO_SUCH_ENTRY; /* Clients may need to clean up pool entries, too */ - void (*fp) (u32, u8 *) = data_callback; - if (fp) - (*fp) (mc->data, 0 /* no new mac addrs */ ); + if (data_callback) + /* no new mac addrs */ + (data_callback) (mc->data, NULL, ~0, NULL); /* Remove the entry from the list and delete the entry */ *p = mc->next_index; @@ -894,6 +892,7 @@ typedef enum _ (gratuitous_arp, "ARP probe or announcement dropped") \ _ (interface_no_table, "Interface is not mapped to an IP table") \ _ (interface_not_ip_enabled, "Interface is not IP enabled") \ + _ (unnumbered_mismatch, "RX interface is unnumbered to different subnet") \ typedef enum { @@ -933,7 +932,7 @@ arp_learn (vnet_main_t * vnm, ethernet_arp_main_t * am, u32 sw_if_index, const ethernet_arp_ip4_over_ethernet_address_t * addr) { - vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, addr, 0, 0); + vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, addr, 0); return (ETHERNET_ARP_ERROR_l3_src_address_learned); } @@ -1177,15 +1176,15 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)) && (!memcmp - (arp0->ip4_over_ethernet[0].ethernet, vrrp_prefix, + (arp0->ip4_over_ethernet[0].mac.bytes, vrrp_prefix, sizeof (vrrp_prefix)))); /* Trash ARP packets whose ARP-level source addresses do not match their L2-frame-level source addresses, unless it's a reply from a VRRP virtual router */ - if (memcmp - (eth_rx->src_address, arp0->ip4_over_ethernet[0].ethernet, - sizeof (eth_rx->src_address)) && !is_vrrp_reply0) + if (!ethernet_mac_address_equal + (eth_rx->src_address, + arp0->ip4_over_ethernet[0].mac.bytes) && !is_vrrp_reply0) { error0 = ETHERNET_ARP_ERROR_l2_address_mismatch; goto drop2; @@ -1239,8 +1238,8 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0]; - clib_memcpy_fast (arp0->ip4_over_ethernet[0].ethernet, - hw_if0->hw_address, 6); + mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac, + hw_if0->hw_address); clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) = if_addr0->data_u32; @@ -1258,7 +1257,10 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (is_unnum0) { if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0)) - goto drop2; + { + error0 = ETHERNET_ARP_ERROR_unnumbered_mismatch; + goto drop2; + } } } @@ -1477,7 +1479,7 @@ typedef struct pg_edit_t opcode; struct { - pg_edit_t ethernet; + pg_edit_t mac; pg_edit_t ip4; } ip4_over_ethernet[2]; } pg_ethernet_arp_header_t; @@ -1492,9 +1494,9 @@ pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p) _(n_l2_address_bytes); _(n_l3_address_bytes); _(opcode); - _(ip4_over_ethernet[0].ethernet); + _(ip4_over_ethernet[0].mac); _(ip4_over_ethernet[0].ip4); - _(ip4_over_ethernet[1].ethernet); + _(ip4_over_ethernet[1].mac); _(ip4_over_ethernet[1].ip4); #undef _ } @@ -1520,11 +1522,11 @@ unformat_pg_arp_header (unformat_input_t * input, va_list * args) unformat_pg_edit, unformat_ethernet_arp_opcode_net_byte_order, &p->opcode, unformat_pg_edit, - unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet, + unformat_mac_address_t, &p->ip4_over_ethernet[0].mac, unformat_pg_edit, unformat_ip4_address, &p->ip4_over_ethernet[0].ip4, unformat_pg_edit, - unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet, + unformat_mac_address_t, &p->ip4_over_ethernet[1].mac, unformat_pg_edit, unformat_ip4_address, &p->ip4_over_ethernet[1].ip4)) { @@ -1554,11 +1556,12 @@ vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, ethernet_arp_ip4_over_ethernet_address_t * a) { - vnet_arp_set_ip4_over_ethernet_rpc_args_t args; - - args.sw_if_index = sw_if_index; - args.flags = ETHERNET_ARP_ARGS_REMOVE; - clib_memcpy_fast (&args.a, a, sizeof (*a)); + vnet_arp_set_ip4_over_ethernet_rpc_args_t args = { + .sw_if_index = sw_if_index, + .flags = ETHERNET_ARP_ARGS_REMOVE, + .ip4 = a->ip4, + .mac = a->mac, + }; vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, (u8 *) & args, sizeof (args)); @@ -1576,7 +1579,8 @@ vnet_arp_wc_publish (u32 sw_if_index, vnet_arp_set_ip4_over_ethernet_rpc_args_t args = { .flags = ETHERNET_ARP_ARGS_WC_PUB, .sw_if_index = sw_if_index, - .a = *a + .ip4 = a->ip4, + .mac = a->mac, }; vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, @@ -1598,9 +1602,9 @@ vnet_arp_wc_publish_internal (vnet_main_t * vnm, return; wc_arp_report_t *r = vlib_process_signal_event_data (vm, ni, et, 1, sizeof *r); - r->ip4 = args->a.ip4.as_u32; + r->ip.as_u32 = args->ip4.as_u32; r->sw_if_index = args->sw_if_index; - memcpy (r->mac, args->a.ethernet, sizeof r->mac); + mac_address_copy (&r->mac, &args->mac); } void @@ -1628,7 +1632,7 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm, eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index]; - e = arp_entry_find (eai, &args->a.ip4); + e = arp_entry_find (eai, &args->ip4); if (NULL != e) { @@ -1642,11 +1646,11 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm, * does in response to interface events. unset is only done * by the control plane. */ - if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) + if (e->flags & IP_NEIGHBOR_FLAG_STATIC) { - e->flags &= ~ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; + e->flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC; } - else if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) + else if (e->flags & IP_NEIGHBOR_FLAG_DYNAMIC) { arp_entry_free (eai, e); } @@ -1703,11 +1707,11 @@ arp_add_del_interface_address (ip4_main_t * im, e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]); vnet_arp_set_ip4_over_ethernet_rpc_args_t delme = { - .a.ip4.as_u32 = e->ip4_address.as_u32, + .ip4.as_u32 = e->ip4_address.as_u32, .sw_if_index = e->sw_if_index, .flags = ETHERNET_ARP_ARGS_FLUSH, }; - clib_memcpy_fast (&delme.a.ethernet, e->ethernet_address, 6); + mac_address_copy (&delme.mac, &e->mac); vnet_arp_flush_ip4_over_ethernet_internal (vnet_get_main (), &delme); @@ -1833,7 +1837,7 @@ vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm, eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index]; - e = arp_entry_find (eai, &args->a.ip4); + e = arp_entry_find (eai, &args->ip4); if (NULL != e) { @@ -1858,7 +1862,7 @@ vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm, vec_validate (am->ethernet_arp_by_sw_if_index, args->sw_if_index); eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index]; - e = arp_entry_find (eai, &args->a.ip4); + e = arp_entry_find (eai, &args->ip4); if (NULL != e) { @@ -1912,11 +1916,10 @@ ethernet_arp_sw_interface_up_down (vnet_main_t * vnm, e = pool_elt_at_index (am->ip4_entry_pool, to_update[i]); vnet_arp_set_ip4_over_ethernet_rpc_args_t update_me = { - .a.ip4.as_u32 = e->ip4_address.as_u32, + .ip4.as_u32 = e->ip4_address.as_u32, .sw_if_index = e->sw_if_index, }; - - clib_memcpy_fast (&update_me.a.ethernet, e->ethernet_address, 6); + mac_address_copy (&update_me.mac, &e->mac); if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) { @@ -1952,9 +1955,9 @@ increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a) for (i = 5; i >= 0; i--) { - old = a->ethernet[i]; - a->ethernet[i] += 1; - if (old < a->ethernet[i]) + old = a->mac.bytes[i]; + a->mac.bytes[i] += 1; + if (old < a->mac.bytes[i]) break; } } @@ -1963,15 +1966,15 @@ int vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, u32 sw_if_index, const ethernet_arp_ip4_over_ethernet_address_t - * a, int is_static, int is_no_fib_entry) + * a, ip_neighbor_flags_t flags) { - vnet_arp_set_ip4_over_ethernet_rpc_args_t args; - - args.sw_if_index = sw_if_index; - args.is_static = is_static; - args.is_no_fib_entry = is_no_fib_entry; - args.flags = 0; - clib_memcpy_fast (&args.a, a, sizeof (*a)); + vnet_arp_set_ip4_over_ethernet_rpc_args_t args = { + .sw_if_index = sw_if_index, + .nbr_flags = flags, + .flags = 0, + .ip4.as_u32 = a->ip4.as_u32, + .mac = a->mac, + }; vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, (u8 *) & args, sizeof (args)); @@ -2075,9 +2078,10 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, int count = 1; u32 fib_index = 0; u32 fib_id; - int is_static = 0; - int is_no_fib_entry = 0; int is_proxy = 0; + ip_neighbor_flags_t flags; + + flags = IP_NEIGHBOR_FLAG_NONE; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -2085,17 +2089,17 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, if (unformat (input, "%U %U %U", unformat_vnet_sw_interface, vnm, &sw_if_index, unformat_ip4_address, &addr.ip4, - unformat_ethernet_address, &addr.ethernet)) + unformat_mac_address_t, &addr.mac)) addr_valid = 1; else if (unformat (input, "delete") || unformat (input, "del")) is_del = 1; else if (unformat (input, "static")) - is_static = 1; + flags |= IP_NEIGHBOR_FLAG_STATIC; else if (unformat (input, "no-fib-entry")) - is_no_fib_entry = 1; + flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY; else if (unformat (input, "count %d", &count)) ; @@ -2138,8 +2142,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, (vnm, &addr.ip4, vlib_current_process (vm), 1 /* type */ , 0 /* data */ ); - vnet_arp_set_ip4_over_ethernet - (vnm, sw_if_index, &addr, is_static, is_no_fib_entry); + vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, &addr, flags); vlib_process_wait_for_event (vm); event_type = vlib_process_get_events (vm, &event_data); @@ -2367,13 +2370,14 @@ arp_term_l2bd (vlib_main_t * vm, /* Trash ARP packets whose ARP-level source addresses do not match, or if requester address is mcast */ if (PREDICT_FALSE - (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet, - sizeof (eth0->src_address)) || - ethernet_address_cast (arp0->ip4_over_ethernet[0].ethernet))) + (!ethernet_mac_address_equal (eth0->src_address, + arp0->ip4_over_ethernet[0]. + mac.bytes)) + || ethernet_address_cast (arp0->ip4_over_ethernet[0].mac.bytes)) { /* VRRP virtual MAC may be different to SMAC in ARP reply */ - if (memcmp (arp0->ip4_over_ethernet[0].ethernet, vrrp_prefix, - sizeof (vrrp_prefix))) + if (!ethernet_mac_address_equal + (arp0->ip4_over_ethernet[0].mac.bytes, vrrp_prefix)) { error0 = ETHERNET_ARP_ERROR_l2_address_mismatch; goto drop; @@ -2412,7 +2416,7 @@ arp_term_l2bd (vlib_main_t * vm, arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply); arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0]; arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0; - clib_memcpy_fast (arp0->ip4_over_ethernet[0].ethernet, macp0, 6); + mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac, macp0); clib_memcpy_fast (eth0->dst_address, eth0->src_address, 6); clib_memcpy_fast (eth0->src_address, macp0, 6); n_replies_sent += 1; @@ -2579,10 +2583,8 @@ send_ip4_garp_w_addr (vlib_main_t * vm, if (!h) return; - clib_memcpy_fast (h->ip4_over_ethernet[0].ethernet, hi->hw_address, - sizeof (h->ip4_over_ethernet[0].ethernet)); - clib_memcpy_fast (h->ip4_over_ethernet[1].ethernet, hi->hw_address, - sizeof (h->ip4_over_ethernet[1].ethernet)); + mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address); + mac_address_from_bytes (&h->ip4_over_ethernet[1].mac, hi->hw_address); h->ip4_over_ethernet[0].ip4 = ip4_addr[0]; h->ip4_over_ethernet[1].ip4 = ip4_addr[0]; @@ -2622,8 +2624,10 @@ vnet_arp_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) pool_foreach (e, am->ip4_entry_pool, ({ if (e->sw_if_index != sw_if_index) continue; - vnet_arp_set_ip4_over_ethernet_rpc_args_t args = { .sw_if_index = sw_if_index, - .a.ip4 = e->ip4_address }; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args = { + .sw_if_index = sw_if_index, + .ip4 = e->ip4_address, + }; vnet_arp_unset_ip4_over_ethernet_internal (vnm, &args); })); /* *INDENT-ON* */ diff --git a/src/vnet/ethernet/arp.h b/src/vnet/ethernet/arp.h index 6c89d1e68cbb..e99d7a82db83 100644 --- a/src/vnet/ethernet/arp.h +++ b/src/vnet/ethernet/arp.h @@ -19,6 +19,29 @@ #include #include #include +#include + +typedef struct +{ + u32 sw_if_index; + ip4_address_t ip4_address; + + mac_address_t mac; + + ip_neighbor_flags_t flags; + + f64 time_last_updated; + + /** + * The index of the adj-fib entry created + */ + fib_node_index_t fib_entry_index; +} ethernet_arp_ip4_entry_t; + +extern u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va); + +ethernet_arp_ip4_entry_t *ip4_neighbors_pool (void); +ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index); extern int vnet_proxy_arp_add_del (ip4_address_t * lo_addr, ip4_address_t * hi_addr, @@ -28,8 +51,7 @@ extern int vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, u32 sw_if_index, const ethernet_arp_ip4_over_ethernet_address_t - * a, int is_static, - int is_no_fib_entry); + * a, ip_neighbor_flags_t flags); extern int vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, u32 sw_if_index, @@ -39,6 +61,38 @@ extern int vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, extern int vnet_proxy_arp_fib_reset (u32 fib_id); +void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, + void *address_arg, + uword node_index, + uword type_opaque, uword data); + +typedef int (*arp_change_event_cb_t) (u32 pool_index, + const mac_address_t * mac, + u32 sw_if_index, + const ip4_address_t * address); + +int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, + arp_change_event_cb_t data_callback, + u32 pid, + void *address_arg, + uword node_index, + uword type_opaque, + uword data, int is_add); + +void wc_arp_set_publisher_node (uword inode_index, uword event_type); + +void ethernet_arp_change_mac (u32 sw_if_index); +void ethernet_ndp_change_mac (u32 sw_if_index); + +void arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai); + +typedef struct +{ + u32 sw_if_index; + ip4_address_t ip; + mac_address_t mac; +} wc_arp_report_t; + /** * call back function when walking the DB of proxy ARPs * @return 0 to stop the walk !0 to continue diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h index 206be4162d2d..b4e021cce19e 100644 --- a/src/vnet/ethernet/arp_packet.h +++ b/src/vnet/ethernet/arp_packet.h @@ -18,6 +18,8 @@ #ifndef included_ethernet_arp_packet_h #define included_ethernet_arp_packet_h +#include + #define foreach_ethernet_arp_hardware_type \ _ (0, reserved) \ _ (1, ethernet) \ @@ -119,11 +121,14 @@ typedef enum /* *INDENT-OFF* */ typedef CLIB_PACKED (struct { - u8 ethernet[6]; + mac_address_t mac; ip4_address_t ip4; }) ethernet_arp_ip4_over_ethernet_address_t; /* *INDENT-ON* */ +STATIC_ASSERT (sizeof (ethernet_arp_ip4_over_ethernet_address_t) == 10, + "Packet ethernet address and IP4 address too big"); + typedef struct { u16 l2_type; @@ -140,34 +145,6 @@ typedef struct }; } ethernet_arp_header_t; -typedef enum ethernet_arp_entry_flags_t_ -{ - ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC = (1 << 0), - ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC = (1 << 1), - ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY = (1 << 2), -} __attribute__ ((packed)) ethernet_arp_entry_flags_t; - -typedef struct -{ - u32 sw_if_index; - ip4_address_t ip4_address; - - u8 ethernet_address[6]; - - ethernet_arp_entry_flags_t flags; - - f64 time_last_updated; - - /** - * The index of the adj-fib entry created - */ - fib_node_index_t fib_entry_index; -} ethernet_arp_ip4_entry_t; - -ethernet_arp_ip4_entry_t *ip4_neighbors_pool (void); -ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index); -u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va); - void send_ip4_garp (vlib_main_t * vm, u32 sw_if_index); void send_ip4_garp_w_addr (vlib_main_t * vm, const ip4_address_t * ip4_addr, u32 sw_if_index); diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index ceaadd26c1cd..9b19143b03fa 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -42,6 +42,7 @@ #include #include +#include #include #include @@ -59,40 +60,6 @@ typedef struct u32 hw_if_index; } ethernet_input_frame_t; -always_inline u64 -ethernet_mac_address_u64 (const u8 * a) -{ - return (((u64) a[0] << (u64) (5 * 8)) - | ((u64) a[1] << (u64) (4 * 8)) - | ((u64) a[2] << (u64) (3 * 8)) - | ((u64) a[3] << (u64) (2 * 8)) - | ((u64) a[4] << (u64) (1 * 8)) | ((u64) a[5] << (u64) (0 * 8))); -} - -always_inline void -ethernet_mac_address_from_u64 (u64 u, u8 * a) -{ - i8 ii; - - for (ii = 5; ii >= 0; ii--) - { - a[ii] = u & 0xFF; - u = u >> 8; - } -} - -static inline int -ethernet_mac_address_is_multicast_u64 (u64 a) -{ - return (a & (1ULL << (5 * 8))) != 0; -} - -static inline int -ethernet_mac_address_is_zero (const u8 * mac) -{ - return ((*((u32 *) mac) == 0) && (*((u16 *) (mac + 4)) == 0)); -} - #ifdef CLIB_HAVE_VEC128 static const u16x8 tagged_ethertypes = { (u16) ETHERNET_TYPE_VLAN, @@ -565,42 +532,12 @@ eth_identify_subint (vnet_hw_interface_t * hi, return 1; } -// Compare two ethernet macs. Return 1 if they are the same, 0 if different -always_inline u32 -eth_mac_equal (const u8 * mac1, const u8 * mac2) -{ - return (*((u32 *) (mac1 + 0)) == *((u32 *) (mac2 + 0)) && - *((u32 *) (mac1 + 2)) == *((u32 *) (mac2 + 2))); -} - - always_inline ethernet_main_t * vnet_get_ethernet_main (void) { return ðernet_main; } -void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, - void *address_arg, - uword node_index, - uword type_opaque, uword data); - - -int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, - void *data_callback, - u32 pid, - void *address_arg, - uword node_index, - uword type_opaque, - uword data, int is_add); - -void wc_arp_set_publisher_node (uword inode_index, uword event_type); - -void ethernet_arp_change_mac (u32 sw_if_index); -void ethernet_ndp_change_mac (u32 sw_if_index); - -void arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai); - void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai); u8 *ethernet_build_rewrite (vnet_main_t * vnm, u32 sw_if_index, @@ -610,13 +547,6 @@ const u8 *ethernet_ip6_mcast_dst_addr (void); extern vlib_node_registration_t ethernet_input_node; -typedef struct -{ - u32 sw_if_index; - u32 ip4; - u8 mac[6]; -} wc_arp_report_t; - #endif /* included_ethernet_h */ /* diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index f1e6785cc6b8..1c13c405a61b 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -658,9 +659,6 @@ VNET_DEVICE_CLASS (ethernet_simulated_device_class) = { }; /* *INDENT-ON* */ -VLIB_DEVICE_TX_FUNCTION_MULTIARCH (ethernet_simulated_device_class, - simulated_ethernet_interface_tx); - /* * Maintain a bitmap of allocated loopback instance numbers. */ diff --git a/src/vnet/ethernet/mac_address.c b/src/vnet/ethernet/mac_address.c index 419a5b0879fe..6f40e50efa2c 100644 --- a/src/vnet/ethernet/mac_address.c +++ b/src/vnet/ethernet/mac_address.c @@ -37,13 +37,28 @@ uword unformat_mac_address_t (unformat_input_t * input, va_list * args) { mac_address_t *mac = va_arg (*args, mac_address_t *); + u32 i, a[3]; - if (!unformat (input, "%_%x:%x:%x:%x:%x:%x%_", - &mac->bytes[0], &mac->bytes[1], &mac->bytes[2], - &mac->bytes[3], &mac->bytes[4], &mac->bytes[5])) - return 0; + if (unformat (input, "%_%X:%X:%X:%X:%X:%X%_", + 1, &mac->bytes[0], 1, &mac->bytes[1], 1, &mac->bytes[2], + 1, &mac->bytes[3], 1, &mac->bytes[4], 1, &mac->bytes[5])) + return (1); + else if (unformat (input, "%_%x.%x.%x%_", &a[0], &a[1], &a[2])) + { + for (i = 0; i < ARRAY_LEN (a); i++) + if (a[i] >= (1 << 16)) + return 0; - return 1; + mac->bytes[0] = (a[0] >> 8) & 0xff; + mac->bytes[1] = (a[0] >> 0) & 0xff; + mac->bytes[2] = (a[1] >> 8) & 0xff; + mac->bytes[3] = (a[1] >> 0) & 0xff; + mac->bytes[4] = (a[2] >> 8) & 0xff; + mac->bytes[5] = (a[2] >> 0) & 0xff; + + return (1); + } + return (0); } /* diff --git a/src/vnet/ethernet/mac_address.h b/src/vnet/ethernet/mac_address.h index e89fb6516a07..87a66a242be9 100644 --- a/src/vnet/ethernet/mac_address.h +++ b/src/vnet/ethernet/mac_address.h @@ -16,7 +16,7 @@ #ifndef __MAC_ADDRESS_H__ #define __MAC_ADDRESS_H__ -#include +#include typedef struct mac_address_t_ { @@ -36,18 +36,59 @@ STATIC_ASSERT ((sizeof (mac_address_t) == 6), extern const mac_address_t ZERO_MAC_ADDRESS; +always_inline u64 +ethernet_mac_address_u64 (const u8 * a) +{ + return (((u64) a[0] << (u64) (5 * 8)) + | ((u64) a[1] << (u64) (4 * 8)) + | ((u64) a[2] << (u64) (3 * 8)) + | ((u64) a[3] << (u64) (2 * 8)) + | ((u64) a[4] << (u64) (1 * 8)) | ((u64) a[5] << (u64) (0 * 8))); +} + +always_inline void +ethernet_mac_address_from_u64 (u64 u, u8 * a) +{ + i8 ii; + + for (ii = 5; ii >= 0; ii--) + { + a[ii] = u & 0xFF; + u = u >> 8; + } +} + +static inline int +ethernet_mac_address_is_multicast_u64 (u64 a) +{ + return (a & (1ULL << (5 * 8))) != 0; +} + +static inline int +ethernet_mac_address_is_zero (const u8 * mac) +{ + return ((*((u32 *) mac) == 0) && (*((u16 *) (mac + 4)) == 0)); +} + +static inline int +ethernet_mac_address_equal (const u8 * a, const u8 * b) +{ + return ((*((u32 *) a) == (*((u32 *) b))) && + (*((u16 *) (a + 4)) == (*((u16 *) (b + 4))))); +} + static_always_inline void mac_address_from_bytes (mac_address_t * mac, const u8 * bytes) { /* zero out the last 2 bytes, then copy over only 6 */ - clib_memcpy (mac->bytes, bytes, 6); + clib_memcpy_fast (mac->bytes, bytes, 6); } static_always_inline void mac_address_to_bytes (const mac_address_t * mac, u8 * bytes) { /* zero out the last 2 bytes, then copy over only 6 */ - clib_memcpy (bytes, mac->bytes, 6); + clib_memcpy_fast (bytes, mac->bytes, 6); } static_always_inline int @@ -67,7 +108,7 @@ mac_address_as_u64 (const mac_address_t * mac) } static_always_inline void -mac_address_from_u64 (u64 u, mac_address_t * mac) +mac_address_from_u64 (mac_address_t * mac, u64 u) { clib_memcpy (mac->bytes, &u, 6); } @@ -78,6 +119,25 @@ mac_address_copy (mac_address_t * dst, const mac_address_t * src) mac_address_from_bytes (dst, src->bytes); } +static_always_inline int +mac_address_cmp (const mac_address_t * a, const mac_address_t * b) +{ + return (memcmp (a->bytes, b->bytes, 6)); +} + +static_always_inline int +mac_address_equal (const mac_address_t * a, const mac_address_t * b) +{ + return (a->u.last_2 == b->u.last_2 && a->u.first_4 == b->u.first_4); +} + +static_always_inline void +mac_address_set_zero (mac_address_t * mac) +{ + mac->u.first_4 = 0; + mac->u.last_2 = 0; +} + extern uword unformat_mac_address_t (unformat_input_t * input, va_list * args); extern u8 *format_mac_address_t (u8 * s, va_list * args); diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index 268b171a6afe..3264bdc03afb 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -223,7 +223,7 @@ identify_subint (vnet_hw_interface_t * hi, if (!(ethernet_address_cast (e0->dst_address))) { - if (!eth_mac_equal ((u8 *) e0, hi->hw_address)) + if (!ethernet_mac_address_equal ((u8 *) e0, hi->hw_address)) { *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; } @@ -1123,11 +1123,11 @@ ethernet_input_inline (vlib_main_t * vm, { if (!ethernet_address_cast (e0->dst_address) && (hi->hw_address != 0) && - !eth_mac_equal ((u8 *) e0, hi->hw_address)) + !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address)) error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; if (!ethernet_address_cast (e1->dst_address) && (hi->hw_address != 0) && - !eth_mac_equal ((u8 *) e1, hi->hw_address)) + !ethernet_mac_address_equal ((u8 *) e1, hi->hw_address)) error1 = ETHERNET_ERROR_L3_MAC_MISMATCH; vlib_buffer_advance (b0, sizeof (ethernet_header_t)); determine_next_node (em, variant, 0, type0, b0, @@ -1347,7 +1347,7 @@ ethernet_input_inline (vlib_main_t * vm, { if (!ethernet_address_cast (e0->dst_address) && (hi->hw_address != 0) && - !eth_mac_equal ((u8 *) e0, hi->hw_address)) + !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address)) error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; vlib_buffer_advance (b0, sizeof (ethernet_header_t)); determine_next_node (em, variant, 0, type0, b0, diff --git a/src/vnet/fib/fib_node.h b/src/vnet/fib/fib_node.h index 1179dfc0a502..e9c45c55efd6 100644 --- a/src/vnet/fib/fib_node.h +++ b/src/vnet/fib/fib_node.h @@ -48,6 +48,7 @@ typedef enum fib_node_type_t_ { FIB_NODE_TYPE_BIER_FMASK, FIB_NODE_TYPE_BIER_ENTRY, FIB_NODE_TYPE_VXLAN_GBP_TUNNEL, + FIB_NODE_TYPE_IPSEC_SA, /** * Marker. New types before this one. leave the test last. */ @@ -75,7 +76,8 @@ typedef enum fib_node_type_t_ { [FIB_NODE_TYPE_UDP_ENCAP] = "udp-encap", \ [FIB_NODE_TYPE_BIER_FMASK] = "bier-fmask", \ [FIB_NODE_TYPE_BIER_ENTRY] = "bier-entry", \ - [FIB_NODE_TYPE_VXLAN_GBP_TUNNEL] = "vxlan-gbp-tunnel" \ + [FIB_NODE_TYPE_VXLAN_GBP_TUNNEL] = "vxlan-gbp-tunnel", \ + [FIB_NODE_TYPE_IPSEC_SA] = "ipsec-sa" \ } /** diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index 9ad1842dd1a7..45ccb4f273db 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -643,11 +643,13 @@ ip4_show_fib (vlib_main_t * vm, if (memory) { - uword mtrie_size, hash_size; + uword mtrie_size, hash_size, *old_heap; + mtrie_size = ip4_fib_mtrie_memory_usage(&fib->mtrie); hash_size = 0; + old_heap = clib_mem_set_heap (ip4_main.mtrie_mheap); for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++) { uword * hash = fib->fib_entry_by_dst_address[i]; @@ -656,6 +658,8 @@ ip4_show_fib (vlib_main_t * vm, hash_size += hash_bytes(hash); } } + clib_mem_set_heap (old_heap); + if (verbose) vlib_cli_output (vm, "%U mtrie:%d hash:%d", format_fib_table_name, fib->index, @@ -717,11 +721,14 @@ ip4_show_fib (vlib_main_t * vm, })); if (memory) + { vlib_cli_output (vm, "totals: mtrie:%ld hash:%ld all:%ld", total_mtrie_memory, total_hash_memory, total_mtrie_memory + total_hash_memory); - + vlib_cli_output (vm, "\nMtrie Mheap Usage: %U\n", + format_mheap, ip4_main.mtrie_mheap, 1); + } return 0; } diff --git a/src/vnet/interface.c b/src/vnet/interface.c index 37f27ea49ea6..12204bd57186 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -837,7 +837,7 @@ vnet_register_interface (vnet_main_t * vnm, /* The new class may differ from the old one. * Functions have to be updated. */ node = vlib_get_node (vm, hw->output_node_index); - node->function = vnet_interface_output_node_multiarch_select (); + node->function = vnet_interface_output_node; node->format_trace = format_vnet_interface_output_trace; /* *INDENT-OFF* */ foreach_vlib_main ({ @@ -887,7 +887,7 @@ vnet_register_interface (vnet_main_t * vnm, r.flags = 0; r.name = output_node_name; - r.function = vnet_interface_output_node_multiarch_select (); + r.function = vnet_interface_output_node; r.format_trace = format_vnet_interface_output_trace; { diff --git a/src/vnet/interface.h b/src/vnet/interface.h index f6f486e45912..174e5347ad22 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -302,30 +302,8 @@ CLIB_MARCH_SFX (devclass##_tx_fn_multiarch_register) (void) \ } \ uword CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (devclass##_tx_fn) -#define VLIB_DEVICE_TX_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \ - uword \ - __attribute__ ((flatten)) \ - __attribute__ ((target (tgt))) \ - CLIB_CPU_OPTIMIZED \ - fn ## _ ## arch ( vlib_main_t * vm, \ - vlib_node_runtime_t * node, \ - vlib_frame_t * frame) \ - { return fn (vm, node, frame); } - -#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH_CLONE(fn) \ - foreach_march_variant(VLIB_DEVICE_TX_FUNCTION_CLONE_TEMPLATE, fn) - -#if CLIB_DEBUG > 0 -#define VLIB_MULTIARCH_CLONE_AND_SELECT_FN(fn,...) +/* FIXME to be removed */ #define VLIB_DEVICE_TX_FUNCTION_MULTIARCH(dev, fn) -#else -#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH(dev, fn) \ - VLIB_DEVICE_TX_FUNCTION_MULTIARCH_CLONE(fn) \ - CLIB_MULTIARCH_SELECT_FN(fn, static inline) \ - static void __attribute__((__constructor__)) \ - __vlib_device_tx_function_multiarch_select_##dev (void) \ - { dev.tx_function = fn ## _multiarch_select(); } -#endif /** * Link Type: A description of the protocol of packets on the link. @@ -774,7 +752,8 @@ typedef enum _(RX_NO_BUF, rx-no-buf, if) \ _(RX_MISS, rx-miss, if) \ _(RX_ERROR, rx-error, if) \ - _(TX_ERROR, tx-error, if) + _(TX_ERROR, tx-error, if) \ + _(MPLS, mpls, if) #define foreach_combined_interface_counter_name \ _(RX, rx, if) \ @@ -871,6 +850,10 @@ void vnet_pcap_drop_trace_filter_add_del (u32 error_index, int is_add); int vnet_interface_name_renumber (u32 sw_if_index, u32 new_show_dev_instance); +uword vnet_interface_output_node (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame); + #endif /* included_vnet_interface_h */ /* diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h index c4c66179bb5b..9a674b180b98 100644 --- a/src/vnet/interface_funcs.h +++ b/src/vnet/interface_funcs.h @@ -418,8 +418,6 @@ typedef struct } vnet_interface_output_runtime_t; /* Interface output function. */ -void *vnet_interface_output_node_multiarch_select (void); - word vnet_sw_interface_compare (vnet_main_t * vnm, uword sw_if_index0, uword sw_if_index1); word vnet_hw_interface_compare (vnet_main_t * vnm, uword hw_if_index0, diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c index ba69d36b6c46..beeb62a2db4d 100644 --- a/src/vnet/interface_output.c +++ b/src/vnet/interface_output.c @@ -446,7 +446,7 @@ vnet_interface_output_node_inline (vlib_main_t * vm, return n_buffers; } -static uword +uword vnet_interface_output_node (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { @@ -463,9 +463,6 @@ vnet_interface_output_node (vlib_main_t * vm, vlib_node_runtime_t * node, /* do_tx_offloads */ 1); } -VLIB_NODE_FUNCTION_MULTIARCH_CLONE (vnet_interface_output_node); -CLIB_MULTIARCH_SELECT_FN (vnet_interface_output_node); - /* Use buffer's sw_if_index[VNET_TX] to choose output interface. */ static uword vnet_per_buffer_interface_output (vlib_main_t * vm, @@ -1019,6 +1016,37 @@ VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node,static) = { }; /* *INDENT-ON* */ +/* Convenience node to drop a vector of buffers with a "misc error". */ +static uword +misc_drop_buffers (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return vlib_error_drop_buffers (vm, node, vlib_frame_vector_args (frame), + /* buffer stride */ 1, + frame->n_vectors, + /* next */ 0, + node->node_index, + /* error */ 0); +} + +static char *misc_drop_buffers_error_strings[] = { + [0] = "misc. errors", +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (misc_drop_buffers_node,static) = { + .function = misc_drop_buffers, + .name = "misc-drop-buffers", + .vector_size = sizeof (u32), + .n_errors = 1, + .n_next_nodes = 1, + .next_nodes = { + "error-drop", + }, + .error_strings = misc_drop_buffers_error_strings, +}; +/* *INDENT-ON* */ + VLIB_NODE_FUNCTION_MULTIARCH (vnet_per_buffer_interface_output_node, vnet_per_buffer_interface_output); diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 025fd577cf53..b67e2e5f0c85 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -20,7 +20,7 @@ called through a shared memory interface. */ -option version = "1.4.0"; +option version = "2.0.0"; import "vnet/ip/ip_types.api"; import "vnet/fib/fib_types.api"; import "vnet/ethernet/ethernet_types.api"; @@ -103,66 +103,47 @@ manual_endian manual_print define ip6_fib_details vl_api_fib_path_t path[count]; }; -/** \brief Dump IP neighboors - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface to dump neighboors, ~0 == all - @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4] +/** \brief IP neighbor flags + @param is_static - A static neighbor Entry - there are not flushed + If the interface goes down. + @param is_no_fib_entry - Do not create a corresponding entry in the FIB + table for the neighbor. */ -define ip_neighbor_dump +enum ip_neighbor_flags { - u32 client_index; - u32 context; - u32 sw_if_index; - u8 is_ipv6; + IP_API_NEIGHBOR_FLAG_NONE = 0, + IP_API_NEIGHBOR_FLAG_STATIC = 0x1, + IP_API_NEIGHBOR_FLAG_NO_FIB_ENTRY = 0x2, + IP_API_NEIGHBOR_FLAG_FIX_ME_OLE = 0x3, }; -/** \brief IP neighboors dump response - @param context - sender context which was passed in the request - @param sw_if_index - The interface used to reach the neighbor - @param stats_index - An index in the stats segment that can be used to read - the counters for this neighbour. - @param is_static - [1|0] to indicate if neighbor is statically configured - @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4] +/** \brief IP neighbor + @param sw_if_index - interface used to reach neighbor + @param mac_address - l2 address of the neighbor + @param ip_address - ip4 or ip6 address of the neighbor + @param flags - flags for the nieghbor */ -define ip_neighbor_details { - u32 context; - u32 sw_if_index; - u32 stats_index; - u8 is_static; - u8 is_ipv6; - u8 mac_address[6]; - u8 ip_address[16]; +typedef ip_neighbor { + u32 sw_if_index; + vl_api_ip_neighbor_flags_t flags; + vl_api_mac_address_t mac_address; + vl_api_address_t ip_address; }; /** \brief IP neighbor add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param sw_if_index - interface used to reach neighbor @param is_add - 1 to add neighbor, 0 to delete - @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 - @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 - @param is_static - A static neighbor Entry - there are not flushed - If the interface goes down. - @param is_no_adj_fib - Do not create a corresponding entry in the FIB - table for the neighbor. - @param mac_address - l2 address of the neighbor - @param dst_address - ip4 or ip6 address of the neighbor + @param neighbor - the neighor to add/remove */ define ip_neighbor_add_del { u32 client_index; u32 context; - u32 sw_if_index; /* 1 = add, 0 = delete */ u8 is_add; - u8 is_ipv6; - u8 is_static; - u8 is_no_adj_fib; - u8 mac_address[6]; - u8 dst_address[16]; + vl_api_ip_neighbor_t neighbor; }; - define ip_neighbor_add_del_reply { u32 context; @@ -170,6 +151,29 @@ define ip_neighbor_add_del_reply u32 stats_index; }; +/** \brief Dump IP neighboors + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface to dump neighboors, ~0 == all + @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4] +*/ +define ip_neighbor_dump +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 is_ipv6; +}; + +/** \brief IP neighboors dump response + @param context - sender context which was passed in the request + @param neighbour - the neighbor +*/ +define ip_neighbor_details { + u32 context; + vl_api_ip_neighbor_t neighbor; +}; + /** \brief Set the ip flow hash config for a fib request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -239,8 +243,7 @@ autoreply define sw_interface_ip6nd_ra_config @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - The interface the RA prefix information is for - @param address[] - The prefix to advertise - @param address_length - the prefix length + @param prefix - The prefix to advertise @param use_default - Revert to default settings @param no_advertise - Do not advertise this prefix @param off_link - The prefix is off link (it is not configured on the interface) @@ -269,8 +272,7 @@ autoreply define sw_interface_ip6nd_ra_prefix u32 client_index; u32 context; u32 sw_if_index; - u8 address[16]; - u8 address_length; + vl_api_prefix_t prefix; u8 use_default; u8 no_advertise; u8 off_link; @@ -294,7 +296,7 @@ autoreply define ip6nd_proxy_add_del u32 context; u32 sw_if_index; u8 is_del; - u8 address[16]; + vl_api_ip6_address_t ip; }; /** \brief IPv6 ND proxy details returned after request @@ -305,7 +307,7 @@ define ip6nd_proxy_details { u32 context; u32 sw_if_index; - u8 address[16]; + vl_api_ip6_address_t ip; }; /** \brief IPv6 ND proxy dump request @@ -683,9 +685,7 @@ autoreply define ip_container_proxy_add_del { u32 client_index; u32 context; - u8 ip[16]; - u8 is_ip4; - u8 plen; + vl_api_prefix_t pfx; u32 sw_if_index; u8 is_add; }; @@ -708,8 +708,7 @@ define ip_container_proxy_details @param context - sender context, to match reply w/ request @param is_ip6 - 1 if source address type is IPv6 @param is_add - 1 if add, 0 if delete - @param mask_length - mask length for address entry - @param address - array of address bytes + @param ip - prefix to match @param number_of_ranges - length of low_port and high_port arrays (must match) @param low_ports[32] - up to 32 low end of port range entries (must have corresponding high_ports entry) @param high_ports[32] - up to 32 high end of port range entries (must have corresponding low_ports entry) @@ -720,10 +719,8 @@ autoreply define ip_source_and_port_range_check_add_del { u32 client_index; u32 context; - u8 is_ipv6; u8 is_add; - u8 mask_length; - u8 address[16]; + vl_api_prefix_t prefix; u8 number_of_ranges; u16 low_ports[32]; u16 high_ports[32]; @@ -794,16 +791,14 @@ autoreply define ip_scan_neighbor_enable_disable @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface index - @param dst_address - target IP address to send IP addr resolution request - @param is_ipv6 - [1|0] to indicate if address family is IPv[6|4] + @param dst - target IP address to send IP addr resolution request */ autoreply define ip_probe_neighbor { u32 client_index; u32 context; u32 sw_if_index; - u8 dst_address[16]; - u8 is_ipv6; + vl_api_address_t dst; }; /** \brief Register for IP4 ARP resolution event on receing ARP reply or @@ -812,8 +807,8 @@ autoreply define ip_probe_neighbor @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid - @param address - exact IP4 address of interested arp resolution event, or - 0 to get MAC/IP info from ARP requests in BDs + @param ip - exact IP4 address of interested arp resolution event, or + 0 to get MAC/IP info from ARP requests in BDs */ autoreply define want_ip4_arp_events { @@ -821,25 +816,25 @@ autoreply define want_ip4_arp_events u32 context; u8 enable_disable; u32 pid; - u32 address; + vl_api_ip4_address_t ip; }; /** \brief Tell client about an IP4 ARP resolution event or MAC/IP info from ARP requests in L2 BDs @param client_index - opaque cookie to identify the sender - @param address - the exact ip4 address of interest + @param ip - the exact ip4 address of interest @param pid - client pid registered to receive notification @param sw_if_index - interface which received ARP packet - @param new_mac - the new mac address + @param mac - the new mac address @param mac_ip - 0: ARP resolution event, 1: MAC/IP info from L2 BDs */ define ip4_arp_event { u32 client_index; - u32 address; + vl_api_ip4_address_t ip; u32 pid; u32 sw_if_index; - u8 new_mac[6]; + vl_api_mac_address_t mac; u8 mac_ip; }; @@ -854,8 +849,8 @@ service { @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid - @param address - the exact IP6 address of interested ND resolution event, or - 0 to get MAC/IP info from ICMP6 NS in L2 BDs. + @param ip - the exact IP6 address of interested ND resolution event, or + 0 to get MAC/IP info from ICMP6 NS in L2 BDs. */ autoreply define want_ip6_nd_events { @@ -863,7 +858,7 @@ autoreply define want_ip6_nd_events u32 context; u8 enable_disable; u32 pid; - u8 address[16]; + vl_api_ip6_address_t ip; }; /** \brief Tell client about an IP6 ND resolution or @@ -871,7 +866,7 @@ autoreply define want_ip6_nd_events @param client_index - opaque cookie to identify the sender @param pid - client pid registered to receive notification @param sw_if_index - interface which received ARP packet - @param address - the exact ip6 address of interest + @param ip - the exact ip6 address of interest @param new_mac - the new mac address @param mac_ip - 0: ND resolution event, 1: MAC/IP info from L2 BDs */ @@ -880,8 +875,8 @@ define ip6_nd_event u32 client_index; u32 pid; u32 sw_if_index; - u8 address[16]; - u8 new_mac[6]; + vl_api_ip6_address_t ip; + vl_api_mac_address_t mac; u8 mac_ip; }; @@ -905,16 +900,14 @@ autoreply define want_ip6_ra_events }; /** \brief Struct representing RA prefix info - @param dst_address - RA prefix info destination address - @param dst_address_length - RA prefix info destination address length + @param prefix - RA prefix info destination address @param flags - RA prefix info flags @param valid_time - RA prefix info valid time @param preferred_time - RA prefix info preferred time */ typeonly define ip6_ra_prefix_info { - u8 dst_address[16]; - u8 dst_address_length; + vl_api_prefix_t prefix; u8 flags; u32 valid_time; u32 preferred_time; @@ -926,6 +919,7 @@ typeonly define ip6_ra_prefix_info @param current_hop_limit - RA current hop limit @param flags - RA flags @param router_lifetime_in_sec - RA lifetime in seconds + @param router_addr - The router's address @param neighbor_reachable_time_in_msec - RA neighbor reachable time in msec @param time_in_msec_between_retransmitted_neighbor_solicitations - time in msec between retransmitted neighbor solicitations @@ -937,7 +931,7 @@ define ip6_ra_event u32 client_index; u32 pid; u32 sw_if_index; - u8 router_address[16]; + vl_api_ip6_address_t router_addr; u8 current_hop_limit; u8 flags; u16 router_lifetime_in_sec; @@ -953,15 +947,15 @@ service { }; /** \brief Proxy ARP configuration type - @param vrf_id - VRF / Fib table ID - @param low_address[4] - Low address of the Proxy ARP range - @param hi_address[4] - High address of the Proxy ARP range + @param table_id - VRF / Fib table ID + @param low - Low address of the Proxy ARP range + @param hi - High address of the Proxy ARP range */ typeonly define proxy_arp { - u32 vrf_id; - u8 low_address[4]; - u8 hi_address[4]; + u32 table_id; + vl_api_ip4_address_t low; + vl_api_ip4_address_t hi; }; /** \brief Proxy ARP add / del request diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 1a1b7e5182b0..b3ae29a2790e 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -900,7 +900,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del); /* Global IP4 main. */ +#ifndef CLIB_MARCH_VARIANT ip4_main_t ip4_main; +#endif /* CLIB_MARCH_VARIANT */ static clib_error_t * ip4_lookup_init (vlib_main_t * vm) @@ -948,10 +950,6 @@ ip4_lookup_init (vlib_main_t * vm) clib_memset (&h, 0, sizeof (h)); - /* Set target ethernet address to all zeros. */ - clib_memset (h.ip4_over_ethernet[1].ethernet, 0, - sizeof (h.ip4_over_ethernet[1].ethernet)); - #define _16(f,v) h.f = clib_host_to_net_u16 (v); #define _8(f,v) h.f = v; _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet); @@ -1872,9 +1870,8 @@ ip4_arp_inline (vlib_main_t * vm, hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); /* Src ethernet address in ARP header. */ - clib_memcpy_fast (h0->ip4_over_ethernet[0].ethernet, - hw_if0->hw_address, - sizeof (h0->ip4_over_ethernet[0].ethernet)); + mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac, + hw_if0->hw_address); if (is_glean) { /* The interface's source address is stashed in the Glean Adj */ @@ -2046,8 +2043,7 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index, sw_if_index); } - clib_memcpy_fast (h->ip4_over_ethernet[0].ethernet, hi->hw_address, - sizeof (h->ip4_over_ethernet[0].ethernet)); + mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address); h->ip4_over_ethernet[0].ip4 = src[0]; h->ip4_over_ethernet[1].ip4 = dst[0]; diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index ab17f6621987..1a57c4173424 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -393,14 +394,34 @@ serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main; void ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai); - -void +always_inline void ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip, - u8 * mac); + u8 * mac) +{ + ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); + /* Invert the "u" bit */ + ip->as_u8[8] = mac[0] ^ (1 << 1); + ip->as_u8[9] = mac[1]; + ip->as_u8[10] = mac[2]; + ip->as_u8[11] = 0xFF; + ip->as_u8[12] = 0xFE; + ip->as_u8[13] = mac[3]; + ip->as_u8[14] = mac[4]; + ip->as_u8[15] = mac[5]; +} -void +always_inline void ip6_ethernet_mac_address_from_link_local_address (u8 * mac, - ip6_address_t * ip); + ip6_address_t * ip) +{ + /* Invert the previously inverted "u" bit */ + mac[0] = ip->as_u8[8] ^ (1 << 1); + mac[1] = ip->as_u8[9]; + mac[2] = ip->as_u8[10]; + mac[3] = ip->as_u8[13]; + mac[4] = ip->as_u8[14]; + mac[5] = ip->as_u8[15]; +} int vnet_set_ip6_flow_hash (u32 table_id, flow_hash_config_t flow_hash_config); @@ -415,8 +436,13 @@ clib_error_t *set_ip6_link_local_address (vlib_main_t * vm, u32 sw_if_index, ip6_address_t * address); +typedef int (*ip6_nd_change_event_cb_t) (u32 pool_index, + const mac_address_t * new_mac, + u32 sw_if_index, + const ip6_address_t * address); + int vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm, - void *data_callback, + ip6_nd_change_event_cb_t data_callback, u32 pid, void *address_arg, uword node_index, diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 56cef4aa43ea..2c3879b13d1c 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -50,7 +50,9 @@ #include #include +#ifndef CLIB_MARCH_VARIANT #include +#endif #include /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */ @@ -138,6 +140,7 @@ ip6_del_interface_routes (ip6_main_t * im, fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE); } +#ifndef CLIB_MARCH_VARIANT void ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) { @@ -326,7 +329,9 @@ ip6_add_del_interface_address (vlib_main_t * vm, return error; } -clib_error_t * +#endif + +static clib_error_t * ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) { ip6_main_t *im = &ip6_main; @@ -493,7 +498,7 @@ VNET_FEATURE_INIT (ip6_interface_output, static) = { }; /* *INDENT-ON* */ -clib_error_t * +static clib_error_t * ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) { ip6_main_t *im = &ip6_main; @@ -533,9 +538,9 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del); -static uword -ip6_lookup (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { return ip6_lookup_inline (vm, node, frame); } @@ -545,7 +550,6 @@ static u8 *format_ip6_lookup_trace (u8 * s, va_list * args); /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_lookup_node) = { - .function = ip6_lookup, .name = "ip6-lookup", .vector_size = sizeof (u32), .format_trace = format_ip6_lookup_trace, @@ -554,11 +558,9 @@ VLIB_REGISTER_NODE (ip6_lookup_node) = }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup); - -static uword -ip6_load_balance (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; u32 n_left_from, n_left_to_next, *from, *to_next; @@ -776,7 +778,6 @@ ip6_load_balance (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_load_balance_node) = { - .function = ip6_load_balance, .name = "ip6-load-balance", .vector_size = sizeof (u32), .sibling_of = "ip6-lookup", @@ -784,8 +785,6 @@ VLIB_REGISTER_NODE (ip6_load_balance_node) = }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance); - typedef struct { /* Adjacency taken. */ @@ -798,6 +797,7 @@ typedef struct } ip6_forward_next_trace_t; +#ifndef CLIB_MARCH_VARIANT u8 * format_ip6_forward_next_trace (u8 * s, va_list * args) { @@ -811,6 +811,7 @@ format_ip6_forward_next_trace (u8 * s, va_list * args) format_ip6_header, t->packet_data, sizeof (t->packet_data)); return s; } +#endif static u8 * format_ip6_lookup_trace (u8 * s, va_list * args) @@ -848,6 +849,7 @@ format_ip6_rewrite_trace (u8 * s, va_list * args) } /* Common trace function for all ip6-forward next nodes. */ +#ifndef CLIB_MARCH_VARIANT void ip6_forward_next_trace (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1045,6 +1047,7 @@ ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) return p0->flags; } +#endif /** * @brief returns number of links on which src is reachable. @@ -1403,16 +1406,15 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, return frame->n_vectors; } -static uword -ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) { return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ ); } /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_local_node, static) = +VLIB_REGISTER_NODE (ip6_local_node) = { - .function = ip6_local, .name = "ip6-local", .vector_size = sizeof (u32), .format_trace = format_ip6_forward_next_trace, @@ -1428,19 +1430,15 @@ VLIB_REGISTER_NODE (ip6_local_node, static) = }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local); - - -static uword -ip6_local_end_of_arc (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ ); } /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_local_end_of_arc_node,static) = { - .function = ip6_local_end_of_arc, +VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = { .name = "ip6-local-end-of-arc", .vector_size = sizeof (u32), @@ -1448,8 +1446,6 @@ VLIB_REGISTER_NODE (ip6_local_end_of_arc_node,static) = { .sibling_of = "ip6-local", }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_end_of_arc_node, ip6_local_end_of_arc) - VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = { .arc_name = "ip6-local", .node_name = "ip6-local-end-of-arc", @@ -1457,6 +1453,11 @@ VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = { }; /* *INDENT-ON* */ +#ifdef CLIB_MARCH_VARIANT +extern vlib_node_registration_t ip6_local_node; + +#else + void ip6_register_protocol (u32 protocol, u32 node_index) { @@ -1576,6 +1577,7 @@ ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index, adj_unlock (ai); return /* no error */ 0; } +#endif typedef enum { @@ -1969,9 +1971,9 @@ ip6_rewrite_inline (vlib_main_t * vm, return frame->n_vectors; } -static uword -ip6_rewrite (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip6_rewrite_inline (vm, node, frame, 1, 0, 0); @@ -1979,9 +1981,9 @@ ip6_rewrite (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 0, 0); } -static uword -ip6_rewrite_bcast (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip6_rewrite_inline (vm, node, frame, 1, 0, 0); @@ -1989,9 +1991,9 @@ ip6_rewrite_bcast (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 0, 0); } -static uword -ip6_rewrite_mcast (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip6_rewrite_inline (vm, node, frame, 1, 0, 1); @@ -1999,9 +2001,9 @@ ip6_rewrite_mcast (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 0, 1); } -static uword -ip6_midchain (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip6_rewrite_inline (vm, node, frame, 1, 1, 0); @@ -2009,9 +2011,9 @@ ip6_midchain (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 1, 0); } -static uword -ip6_mcast_midchain (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip6_rewrite_inline (vm, node, frame, 1, 1, 1); @@ -2022,18 +2024,14 @@ ip6_mcast_midchain (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_midchain_node) = { - .function = ip6_midchain, .name = "ip6-midchain", .vector_size = sizeof (u32), .format_trace = format_ip6_forward_next_trace, .sibling_of = "ip6-rewrite", }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain); - VLIB_REGISTER_NODE (ip6_rewrite_node) = { - .function = ip6_rewrite, .name = "ip6-rewrite", .vector_size = sizeof (u32), .format_trace = format_ip6_rewrite_trace, @@ -2046,45 +2044,39 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) = }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite); - VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = { - .function = ip6_rewrite_bcast, .name = "ip6-rewrite-bcast", .vector_size = sizeof (u32), .format_trace = format_ip6_rewrite_trace, .sibling_of = "ip6-rewrite", }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_bcast_node, ip6_rewrite_bcast) VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) = { - .function = ip6_rewrite_mcast, .name = "ip6-rewrite-mcast", .vector_size = sizeof (u32), .format_trace = format_ip6_rewrite_trace, .sibling_of = "ip6-rewrite", }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast); -VLIB_REGISTER_NODE (ip6_mcast_midchain_node, static) = +VLIB_REGISTER_NODE (ip6_mcast_midchain_node) = { - .function = ip6_mcast_midchain, .name = "ip6-mcast-midchain", .vector_size = sizeof (u32), .format_trace = format_ip6_rewrite_trace, .sibling_of = "ip6-rewrite", }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_mcast_midchain_node, ip6_mcast_midchain); /* *INDENT-ON* */ /* * Hop-by-Hop handling */ +#ifndef CLIB_MARCH_VARIANT ip6_hop_by_hop_main_t ip6_hop_by_hop_main; +#endif /* CLIB_MARCH_VARIANT */ #define foreach_ip6_hop_by_hop_error \ _(PROCESSED, "pkts with ip6 hop-by-hop options") \ @@ -2112,7 +2104,7 @@ typedef struct u8 option_data[256]; } ip6_hop_by_hop_trace_t; -vlib_node_registration_t ip6_hop_by_hop_node; +extern vlib_node_registration_t ip6_hop_by_hop_node; static char *ip6_hop_by_hop_error_strings[] = { #define _(sym,string) string, @@ -2120,6 +2112,7 @@ static char *ip6_hop_by_hop_error_strings[] = { #undef _ }; +#ifndef CLIB_MARCH_VARIANT u8 * format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args) { @@ -2163,6 +2156,7 @@ format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args) } return s; } +#endif static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args) @@ -2290,9 +2284,9 @@ ip6_scan_hbh_options (vlib_buffer_t * b0, /* * Process the Hop-by-Hop Options header */ -static uword -ip6_hop_by_hop (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index); @@ -2530,7 +2524,6 @@ ip6_hop_by_hop (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = { - .function = ip6_hop_by_hop, .name = "ip6-hop-by-hop", .sibling_of = "ip6-lookup", .vector_size = sizeof (u32), @@ -2542,8 +2535,6 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop); - static clib_error_t * ip6_hop_by_hop_init (vlib_main_t * vm) { @@ -2556,6 +2547,7 @@ ip6_hop_by_hop_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (ip6_hop_by_hop_init); +#ifndef CLIB_MARCH_VARIANT void ip6_hbh_set_next_override (uword next) { @@ -2622,6 +2614,7 @@ ip6_hbh_unregister_option (u8 option) /* Global IP6 main. */ ip6_main_t ip6_main; +#endif static clib_error_t * ip6_lookup_init (vlib_main_t * vm) @@ -2717,35 +2710,6 @@ ip6_lookup_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (ip6_lookup_init); -void -ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip, - u8 * mac) -{ - ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); - /* Invert the "u" bit */ - ip->as_u8[8] = mac[0] ^ (1 << 1); - ip->as_u8[9] = mac[1]; - ip->as_u8[10] = mac[2]; - ip->as_u8[11] = 0xFF; - ip->as_u8[12] = 0xFE; - ip->as_u8[13] = mac[3]; - ip->as_u8[14] = mac[4]; - ip->as_u8[15] = mac[5]; -} - -void -ip6_ethernet_mac_address_from_link_local_address (u8 * mac, - ip6_address_t * ip) -{ - /* Invert the previously inverted "u" bit */ - mac[0] = ip->as_u8[8] ^ (1 << 1); - mac[1] = ip->as_u8[9]; - mac[2] = ip->as_u8[10]; - mac[3] = ip->as_u8[13]; - mac[4] = ip->as_u8[14]; - mac[5] = ip->as_u8[15]; -} - static clib_error_t * test_ip6_link_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -2785,6 +2749,7 @@ VLIB_CLI_COMMAND (test_link_command, static) = }; /* *INDENT-ON* */ +#ifndef CLIB_MARCH_VARIANT int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) { @@ -2800,6 +2765,7 @@ vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) return 0; } +#endif static clib_error_t * set_ip6_flow_hash_command_fn (vlib_main_t * vm, @@ -2977,6 +2943,7 @@ VLIB_CLI_COMMAND (show_ip6_local, static) = }; /* *INDENT-ON* */ +#ifndef CLIB_MARCH_VARIANT int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, u32 table_index) @@ -3036,6 +3003,7 @@ vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, return 0; } +#endif static clib_error_t * set_ip6_classify_command_fn (vlib_main_t * vm, diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 3acfb1a48977..ded5b868702c 100755 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -26,6 +26,7 @@ #include #include #include +#include /** * @file @@ -173,7 +174,7 @@ typedef struct uword type_opaque; uword data; /* Used for nd event notification only */ - void *data_callback; + ip6_nd_change_event_cb_t data_callback; u32 pid; } pending_resolution_t; @@ -266,15 +267,15 @@ ip6_neighbor_get_link_local_address (u32 sw_if_index) * @param sw_if_index The interface on which the ARP entires are acted */ static int -vnet_nd_wc_publish (u32 sw_if_index, u8 * mac, ip6_address_t * ip6) +vnet_nd_wc_publish (u32 sw_if_index, + const mac_address_t * mac, const ip6_address_t * ip6) { wc_nd_report_t r = { .sw_if_index = sw_if_index, .ip6 = *ip6, + .mac = *mac, }; - memcpy (r.mac, mac, sizeof r.mac); - void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); vl_api_rpc_call_main_thread (wc_nd_signal_report, (u8 *) & r, sizeof r); return 0; } @@ -347,13 +348,13 @@ format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) return format (s, "%=12s%=45s%=6s%=20s%=40s", "Time", "Address", "Flags", "Link layer", "Interface"); - if (n->flags & IP6_NEIGHBOR_FLAG_DYNAMIC) + if (n->flags & IP_NEIGHBOR_FLAG_DYNAMIC) flags = format (flags, "D"); - if (n->flags & IP6_NEIGHBOR_FLAG_STATIC) + if (n->flags & IP_NEIGHBOR_FLAG_STATIC) flags = format (flags, "S"); - if (n->flags & IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY) + if (n->flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY) flags = format (flags, "N"); si = vnet_get_sw_interface (vnm, n->key.sw_if_index); @@ -361,7 +362,7 @@ format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) format_vlib_time, vm, n->time_last_updated, format_ip6_address, &n->key.ip6_address, flags ? (char *) flags : "", - format_ethernet_address, n->link_layer_address, + format_mac_address_t, &n->mac, format_vnet_sw_interface_name, vnm, si); vec_free (flags); @@ -402,9 +403,8 @@ ip6_neighbor_adj_fib_remove (ip6_neighbor_t * n, u32 fib_index) typedef struct { u8 is_add; - u8 is_static; - u8 is_no_fib_entry; - u8 link_layer_address[6]; + ip_neighbor_flags_t flags; + mac_address_t mac; u32 sw_if_index; ip6_address_t addr; } ip6_neighbor_set_unset_rpc_args_t; @@ -416,19 +416,16 @@ static void set_unset_ip6_neighbor_rpc (vlib_main_t * vm, u32 sw_if_index, const ip6_address_t * a, - const u8 * link_layer_address, - int is_add, int is_static, int is_no_fib_entry) + const mac_address_t * mac, int is_add, ip_neighbor_flags_t flags) { ip6_neighbor_set_unset_rpc_args_t args; void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); args.sw_if_index = sw_if_index; args.is_add = is_add; - args.is_static = is_static; - args.is_no_fib_entry = is_no_fib_entry; - clib_memcpy (&args.addr, a, sizeof (*a)); - if (NULL != link_layer_address) - clib_memcpy (args.link_layer_address, link_layer_address, 6); + args.flags = flags; + ip6_address_copy (&args.addr, a); + mac_address_copy (&args.mac, mac); vl_api_rpc_call_main_thread (ip6_neighbor_set_unset_rpc_callback, (u8 *) & args, sizeof (args)); @@ -511,7 +508,7 @@ ip6_nd_mk_complete (adj_index_t ai, ip6_neighbor_t * nbr) ethernet_build_rewrite (vnet_get_main (), nbr->key.sw_if_index, adj_get_link_type (ai), - nbr->link_layer_address)); + nbr->mac.bytes)); } static void @@ -604,7 +601,7 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]); adj_nbr_walk_nh6 (n->key.sw_if_index, &n->key.ip6_address, ip6_nd_mk_incomplete_walk, NULL); - if (n->flags & IP6_NEIGHBOR_FLAG_STATIC) + if (n->flags & IP_NEIGHBOR_FLAG_STATIC) continue; ip6_neighbor_adj_fib_remove (n, ip6_fib_table_get_index_for_sw_if_index @@ -759,7 +756,7 @@ force_reuse_neighbor_entry (void) nm->neighbor_delete_rotor = index; index = pool_next_index (nm->neighbor_pool, index); } - while (n->flags & IP6_NEIGHBOR_FLAG_STATIC); + while (n->flags & IP_NEIGHBOR_FLAG_STATIC); /* Remove ARP entry from its interface and update fib */ adj_nbr_walk_nh6 (n->key.sw_if_index, @@ -775,9 +772,8 @@ int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, const ip6_address_t * a, - const u8 * link_layer_address, - uword n_bytes_link_layer_address, - int is_static, int is_no_fib_entry) + const mac_address_t * mac, + ip_neighbor_flags_t flags) { ip6_neighbor_main_t *nm = &ip6_neighbor_main; ip6_neighbor_key_t k; @@ -789,9 +785,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, if (vlib_get_thread_index ()) { - set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address, - 1 /* set new neighbor */ , is_static, - is_no_fib_entry); + set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, mac, 1, flags); return 0; } @@ -804,11 +798,11 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, { n = pool_elt_at_index (nm->neighbor_pool, p[0]); /* Refuse to over-write static neighbor entry. */ - if (!is_static && (n->flags & IP6_NEIGHBOR_FLAG_STATIC)) + if (!(flags & IP_NEIGHBOR_FLAG_STATIC) && + (n->flags & IP_NEIGHBOR_FLAG_STATIC)) { /* if MAC address match, still check to send event */ - if (0 == memcmp (n->link_layer_address, - link_layer_address, n_bytes_link_layer_address)) + if (0 == mac_address_cmp (&n->mac, mac)) goto check_customers; return -2; } @@ -832,20 +826,19 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, n->key = k; n->fib_entry_index = FIB_NODE_INDEX_INVALID; - clib_memcpy (n->link_layer_address, - link_layer_address, n_bytes_link_layer_address); + mac_address_copy (&n->mac, mac); /* * create the adj-fib. the entry in the FIB table for and to the peer. */ - if (!is_no_fib_entry) + if (!(flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY)) { ip6_neighbor_adj_fib_add (n, ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index)); } else { - n->flags |= IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY; + n->flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY; } } else @@ -854,28 +847,26 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, * prevent a DoS attack from the data-plane that * spams us with no-op updates to the MAC address */ - if (0 == memcmp (n->link_layer_address, - link_layer_address, n_bytes_link_layer_address)) + if (0 == mac_address_cmp (&n->mac, mac)) { n->time_last_updated = vlib_time_now (vm); goto check_customers; } - clib_memcpy (n->link_layer_address, - link_layer_address, n_bytes_link_layer_address); + mac_address_copy (&n->mac, mac); } /* Update time stamp and flags. */ n->time_last_updated = vlib_time_now (vm); - if (is_static) + if (flags & IP_NEIGHBOR_FLAG_STATIC) { - n->flags |= IP6_NEIGHBOR_FLAG_STATIC; - n->flags &= ~IP6_NEIGHBOR_FLAG_DYNAMIC; + n->flags |= IP_NEIGHBOR_FLAG_STATIC; + n->flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC; } else { - n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC; - n->flags &= ~IP6_NEIGHBOR_FLAG_STATIC; + n->flags |= IP_NEIGHBOR_FLAG_DYNAMIC; + n->flags &= ~IP_NEIGHBOR_FLAG_STATIC; } adj_nbr_walk_nh6 (sw_if_index, @@ -908,16 +899,13 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, while (next_index != (u32) ~ 0) { - int (*fp) (u32, u8 *, u32, ip6_address_t *); int rv = 1; + mc = pool_elt_at_index (nm->mac_changes, next_index); - fp = mc->data_callback; /* Call the user's data callback, return 1 to suppress dup events */ - if (fp) - rv = - (*fp) (mc->data, (u8 *) link_layer_address, sw_if_index, - &ip6a_zero); + if (mc->data_callback) + rv = (mc->data_callback) (mc->data, mac, sw_if_index, &ip6a_zero); /* * Signal the resolver process, as long as the user * says they want to be notified @@ -944,8 +932,8 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, if (vlib_get_thread_index ()) { - set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, NULL, - 0 /* unset */ , 0, 0); + set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, NULL, 0, + IP_NEIGHBOR_FLAG_NONE); return 0; } @@ -980,8 +968,7 @@ static void ip6_neighbor_set_unset_rpc_callback vlib_main_t *vm = vlib_get_main (); if (a->is_add) vnet_set_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr, - a->link_layer_address, 6, a->is_static, - a->is_no_fib_entry); + &a->mac, a->flags); else vnet_unset_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr); } @@ -1010,7 +997,7 @@ ip6_neighbor_t * ip6_neighbors_entries (u32 sw_if_index) { ip6_neighbor_main_t *nm = &ip6_neighbor_main; - ip6_neighbor_t *n, *ns = 0; + ip6_neighbor_t *n, *ns = NULL; /* *INDENT-OFF* */ pool_foreach (n, nm->neighbor_pool, @@ -1085,13 +1072,12 @@ static clib_error_t * set_ip6_neighbor (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { + ip_neighbor_flags_t flags = IP_NEIGHBOR_FLAG_NONE; vnet_main_t *vnm = vnet_get_main (); ip6_address_t addr; - u8 mac_address[6]; + mac_address_t mac; int addr_valid = 0; int is_del = 0; - int is_static = 0; - int is_no_fib_entry = 0; u32 sw_if_index; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -1100,15 +1086,15 @@ set_ip6_neighbor (vlib_main_t * vm, if (unformat (input, "%U %U %U", unformat_vnet_sw_interface, vnm, &sw_if_index, unformat_ip6_address, &addr, - unformat_ethernet_address, mac_address)) + unformat_mac_address_t, &mac)) addr_valid = 1; else if (unformat (input, "delete") || unformat (input, "del")) is_del = 1; else if (unformat (input, "static")) - is_static = 1; + flags |= IP_NEIGHBOR_FLAG_STATIC; else if (unformat (input, "no-fib-entry")) - is_no_fib_entry = 1; + flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY; else break; } @@ -1117,9 +1103,7 @@ set_ip6_neighbor (vlib_main_t * vm, return clib_error_return (0, "Missing interface, ip6 or hw address"); if (!is_del) - vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, &addr, - mac_address, sizeof (mac_address), - is_static, is_no_fib_entry); + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, &addr, &mac, flags); else vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, &addr); return 0; @@ -1258,9 +1242,9 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, is_solicitation ? &ip0->src_address : &h0->target_address, + (mac_address_t *) o0->ethernet_address, - sizeof (o0->ethernet_address), - 0, 0); + IP_NEIGHBOR_FLAG_NONE); } if (is_solicitation && error0 == ICMP6_ERROR_NONE) @@ -1591,11 +1575,11 @@ icmp6_router_solicitation (vlib_main_t * vm, if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 && !is_unspecified && !is_link_local)) { - vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0, - &ip0->src_address, - o0->ethernet_address, - sizeof (o0->ethernet_address), - 0, 0); + vnet_set_ip6_ethernet_neighbor + (vm, sw_if_index0, + &ip0->src_address, + (mac_address_t *) o0->ethernet_address, + IP_NEIGHBOR_FLAG_NONE); } /* default is to drop */ @@ -1691,8 +1675,7 @@ icmp6_router_solicitation (vlib_main_t * vm, sizeof (icmp6_router_advertisement_header_t); if (vlib_buffer_add_data - (vm, vlib_buffer_get_free_list_index - (p0), &bi0, (void *) &rh, + (vm, &bi0, (void *) &rh, sizeof (icmp6_router_advertisement_header_t))) { /* buffer allocation failed, drop the pkt */ @@ -1714,8 +1697,7 @@ icmp6_router_solicitation (vlib_main_t * vm, eth_if0->address, 6); if (vlib_buffer_add_data - (vm, vlib_buffer_get_free_list_index - (p0), &bi0, (void *) &h, + (vm, &bi0, (void *) &h, sizeof (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t))) { @@ -1743,8 +1725,7 @@ icmp6_router_solicitation (vlib_main_t * vm, sizeof (icmp6_neighbor_discovery_mtu_option_t); if (vlib_buffer_add_data - (vm, vlib_buffer_get_free_list_index - (p0), &bi0, (void *) &h, + (vm, &bi0, (void *) &h, sizeof (icmp6_neighbor_discovery_mtu_option_t))) { @@ -1800,10 +1781,8 @@ icmp6_router_solicitation (vlib_main_t * vm, payload_length += sizeof( icmp6_neighbor_discovery_prefix_information_option_t); if (vlib_buffer_add_data - (vm, vlib_buffer_get_free_list_index (p0), - &bi0, - (void *)&h, - sizeof(icmp6_neighbor_discovery_prefix_information_option_t))) + (vm, &bi0, (void *)&h, + sizeof(icmp6_neighbor_discovery_prefix_information_option_t))) { error0 = ICMP6_ERROR_ALLOC_FAILURE; goto drop0; @@ -2018,7 +1997,7 @@ icmp6_router_advertisement (vlib_main_t * vm, ra_report_t r; r.sw_if_index = sw_if_index0; - memcpy (r.router_address, &ip0->src_address, 16); + memcpy (&r.router_address, &ip0->src_address, 16); r.current_hop_limit = h0->current_hop_limit; r.flags = h0->flags; r.router_lifetime_in_sec = @@ -2213,9 +2192,9 @@ icmp6_router_advertisement (vlib_main_t * vm, prefix->preferred_time = preferred; prefix->valid_time = valid; prefix->flags = h->flags & 0xc0; - prefix->dst_address_length = - h->dst_address_length; - prefix->dst_address = h->dst_address; + prefix->prefix.fp_len = h->dst_address_length; + prefix->prefix.fp_addr.ip6 = h->dst_address; + prefix->prefix.fp_proto = FIB_PROTOCOL_IP6; /* look for matching prefix - if we our advertising it, it better be consistant */ /* *INDENT-OFF* */ @@ -2312,7 +2291,6 @@ create_buffer_for_rs (vlib_main_t * vm, ip6_radv_t * radv_info) { u32 bi0; vlib_buffer_t *p0; - vlib_buffer_free_list_t *fl; icmp6_router_solicitation_header_t *rh; u16 payload_length; int bogus_length; @@ -2327,8 +2305,6 @@ create_buffer_for_rs (vlib_main_t * vm, ip6_radv_t * radv_info) } p0 = vlib_get_buffer (vm, bi0); - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (p0, fl); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (p0); p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; @@ -2846,9 +2822,8 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index) num_addr_records++; - if(vlib_buffer_add_data - (vm, vlib_buffer_get_free_list_index (b0), &bo0, - (void *)&rr, sizeof(icmp6_multicast_address_record_t))) + if(vlib_buffer_add_data (vm, &bo0, (void *)&rr, + sizeof(icmp6_multicast_address_record_t))) { vlib_buffer_free (vm, &bo0, 1); goto alloc_fail; @@ -4695,7 +4670,7 @@ vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm, int vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm, - void *data_callback, + ip6_nd_change_event_cb_t data_callback, u32 pid, void *address_arg, uword node_index, @@ -4749,9 +4724,8 @@ vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm, return VNET_API_ERROR_NO_SUCH_ENTRY; /* Clients may need to clean up pool entries, too */ - void (*fp) (u32, u8 *) = data_callback; - if (fp) - (*fp) (mc->data, 0 /* no new mac addrs */ ); + if (data_callback) + (data_callback) (mc->data, NULL /* no new mac addrs */ , 0, NULL); /* Remove the entry from the list and delete the entry */ *p = mc->next_index; @@ -4773,7 +4747,9 @@ vnet_ip6_nd_term (vlib_main_t * vm, { ip6_neighbor_main_t *nm = &ip6_neighbor_main; icmp6_neighbor_solicitation_or_advertisement_header_t *ndh; + mac_address_t mac; + mac_address_from_bytes (&mac, eth->src_address); ndh = ip6_next_header (ip); if (ndh->icmp.type != ICMP6_neighbor_solicitation && ndh->icmp.type != ICMP6_neighbor_advertisement) @@ -4792,7 +4768,7 @@ vnet_ip6_nd_term (vlib_main_t * vm, (nm->wc_ip6_nd_publisher_node != (uword) ~ 0 && !ip6_address_is_link_local_unicast (&ip->src_address))) { - vnet_nd_wc_publish (sw_if_index, eth->src_address, &ip->src_address); + vnet_nd_wc_publish (sw_if_index, &mac, &ip->src_address); } /* Check if MAC entry exsist for solicited target IP */ diff --git a/src/vnet/ip/ip6_neighbor.h b/src/vnet/ip/ip6_neighbor.h index e273a10799f4..0707a2a8d217 100644 --- a/src/vnet/ip/ip6_neighbor.h +++ b/src/vnet/ip/ip6_neighbor.h @@ -20,6 +20,8 @@ #define included_ip6_neighbor_h #include +#include +#include typedef struct { @@ -28,18 +30,11 @@ typedef struct u32 pad; } ip6_neighbor_key_t; -typedef enum ip6_neighbor_flags_t_ -{ - IP6_NEIGHBOR_FLAG_STATIC = (1 << 0), - IP6_NEIGHBOR_FLAG_DYNAMIC = (1 << 1), - IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY = (1 << 2), -} __attribute__ ((packed)) ip6_neighbor_flags_t; - typedef struct { ip6_neighbor_key_t key; - u8 link_layer_address[8]; - ip6_neighbor_flags_t flags; + mac_address_t mac; + ip_neighbor_flags_t flags; f64 time_last_updated; fib_node_index_t fib_entry_index; } ip6_neighbor_t; @@ -81,10 +76,8 @@ extern void vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm, extern int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, const ip6_address_t * a, - const u8 * link_layer_address, - uword n_bytes_link_layer_address, - int is_static, - int is_no_fib_entry); + const mac_address_t * mac, + ip_neighbor_flags_t flags); extern int vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, @@ -99,7 +92,7 @@ typedef struct { u32 sw_if_index; ip6_address_t ip6; - u8 mac[6]; + mac_address_t mac; } wc_nd_report_t; void wc_nd_set_publisher_node (uword node_index, uword event_type); @@ -119,8 +112,7 @@ void icmp6_send_router_solicitation (vlib_main_t * vm, u32 sw_if_index, typedef struct { - ip6_address_t dst_address; - u8 dst_address_length; + fib_prefix_t prefix; u8 flags; u32 valid_time; u32 preferred_time; @@ -129,7 +121,7 @@ typedef struct typedef struct { u32 sw_if_index; - u8 router_address[16]; + ip6_address_t router_address; u8 current_hop_limit; u8 flags; u16 router_lifetime_in_sec; diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 015dd01f8aa5..5d33cc77925d 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -181,6 +181,13 @@ ip46_address_is_multicast (const ip46_address_t * a) ip6_address_is_multicast (&a->ip6); } +always_inline void +ip6_address_copy (ip6_address_t * dst, const ip6_address_t * src) +{ + dst->as_u64[0] = src->as_u64[0]; + dst->as_u64[1] = src->as_u64[1]; +} + always_inline void ip6_set_reserved_multicast_address (ip6_address_t * a, ip6_multicast_address_scope_t scope, diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 42ce883c2344..749cb889be96 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -23,8 +23,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -120,13 +122,25 @@ _(IP_PUNT_REDIRECT_DUMP, ip_punt_redirect_dump) extern void stats_dslock_with_hint (int hint, int tag); extern void stats_dsunlock (void); +static vl_api_ip_neighbor_flags_t +ip_neighbor_flags_encode (ip_neighbor_flags_t f) +{ + vl_api_ip_neighbor_flags_t v = IP_API_NEIGHBOR_FLAG_NONE; + + if (f & IP_NEIGHBOR_FLAG_STATIC) + v |= IP_API_NEIGHBOR_FLAG_STATIC; + if (f & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY) + v |= IP_API_NEIGHBOR_FLAG_NO_FIB_ENTRY; + + return (clib_host_to_net_u32 (v)); +} + static void send_ip_neighbor_details (u32 sw_if_index, - u8 is_ipv6, - u8 is_static, - u8 * mac_address, - u8 * ip_address, vl_api_registration_t * reg, - u32 context) + const ip46_address_t * ip_address, + const mac_address_t * mac, + ip_neighbor_flags_t flags, + vl_api_registration_t * reg, u32 context) { vl_api_ip_neighbor_details_t *mp; @@ -134,11 +148,11 @@ send_ip_neighbor_details (u32 sw_if_index, clib_memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_DETAILS); mp->context = context; - mp->sw_if_index = htonl (sw_if_index); - mp->is_ipv6 = is_ipv6; - mp->is_static = is_static; - memcpy (mp->mac_address, mac_address, 6); - memcpy (mp->ip_address, ip_address, (is_ipv6) ? 16 : 4); + mp->neighbor.sw_if_index = htonl (sw_if_index); + mp->neighbor.flags = ip_neighbor_flags_encode (flags); + + ip_address_encode (ip_address, IP46_TYPE_ANY, &mp->neighbor.ip_address); + mac_address_encode (mac, mp->neighbor.mac_address); vl_api_send_msg (reg, (u8 *) mp); } @@ -162,12 +176,15 @@ vl_api_ip_neighbor_dump_t_handler (vl_api_ip_neighbor_dump_t * mp) /* *INDENT-OFF* */ vec_foreach (n, ns) { - send_ip_neighbor_details - (n->key.sw_if_index, mp->is_ipv6, - ((n->flags & IP6_NEIGHBOR_FLAG_STATIC) ? 1 : 0), - (u8 *) n->link_layer_address, - (u8 *) & (n->key.ip6_address.as_u8), - reg, mp->context); + ip46_address_t nh = { + .ip6 = { + .as_u64[0] = n->key.ip6_address.as_u64[0], + .as_u64[1] = n->key.ip6_address.as_u64[1], + }, + }; + send_ip_neighbor_details (n->key.sw_if_index, &nh, + &n->mac, n->flags, + reg, mp->context); } /* *INDENT-ON* */ vec_free (ns); @@ -180,11 +197,15 @@ vl_api_ip_neighbor_dump_t_handler (vl_api_ip_neighbor_dump_t * mp) /* *INDENT-OFF* */ vec_foreach (n, ns) { - send_ip_neighbor_details (n->sw_if_index, mp->is_ipv6, - ((n->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) ? 1 : 0), - (u8*) n->ethernet_address, - (u8*) & (n->ip4_address.as_u8), - reg, mp->context); + ip46_address_t nh = { + .ip4 = { + .as_u32 = n->ip4_address.as_u32, + }, + }; + + send_ip_neighbor_details (n->sw_if_index, &nh, + &n->mac, n->flags, + reg, mp->context); } /* *INDENT-ON* */ vec_free (ns); @@ -654,36 +675,52 @@ vl_api_ip_punt_redirect_t_handler (vl_api_ip_punt_redirect_t * mp, REPLY_MACRO (VL_API_IP_PUNT_REDIRECT_REPLY); } +static ip_neighbor_flags_t +ip_neighbor_flags_decode (vl_api_ip_neighbor_flags_t v) +{ + ip_neighbor_flags_t f = IP_NEIGHBOR_FLAG_NONE; + + v = clib_net_to_host_u32 (v); + + if (v & IP_API_NEIGHBOR_FLAG_STATIC) + f |= IP_NEIGHBOR_FLAG_STATIC; + if (v & IP_API_NEIGHBOR_FLAG_NO_FIB_ENTRY) + f |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY; + + return (f); +} + static void vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp, vlib_main_t * vm) { - ip46_address_t ip = ip46_address_initializer; vl_api_ip_neighbor_add_del_reply_t *rmp; ip_neighbor_flags_t flags; u32 stats_index = ~0; + ip46_address_t ip; + mac_address_t mac; + ip46_type_t type; int rv = 0; - VALIDATE_SW_IF_INDEX (mp); + VALIDATE_SW_IF_INDEX ((&mp->neighbor)); stats_dslock_with_hint (1 /* release hint */ , 7 /* tag */ ); - flags = IP_NEIGHBOR_FLAG_NODE; - if (mp->is_static) - flags |= IP_NEIGHBOR_FLAG_STATIC; - if (mp->is_no_adj_fib) - flags |= IP_NEIGHBOR_FLAG_NO_ADJ_FIB; - - if (mp->is_ipv6) - clib_memcpy (&ip.ip6, mp->dst_address, 16); - else - clib_memcpy (&ip.ip4, mp->dst_address, 4); + flags = ip_neighbor_flags_decode (mp->neighbor.flags); + type = ip_address_decode (&mp->neighbor.ip_address, &ip); + mac_address_decode (mp->neighbor.mac_address, &mac); + /* + * there's no validation here of the ND/ARP entry being added. + * The expectation is that the FIB will ensure that nothing bad + * will come of adding bogus entries. + */ if (mp->is_add) - rv = ip_neighbor_add (&ip, mp->is_ipv6, mp->mac_address, - ntohl (mp->sw_if_index), flags, &stats_index); + rv = ip_neighbor_add (&ip, type, &mac, + ntohl (mp->neighbor.sw_if_index), + flags, &stats_index); else - rv = ip_neighbor_del (&ip, mp->is_ipv6, ntohl (mp->sw_if_index)); + rv = ip_neighbor_del (&ip, type, ntohl (mp->neighbor.sw_if_index)); stats_dsunlock (); @@ -808,7 +845,13 @@ add_del_route_t_handler (u8 is_multipath, path.frp_eos = MPLS_NON_EOS; } if (is_local) - path_flags |= FIB_ROUTE_PATH_LOCAL; + { + path_flags |= FIB_ROUTE_PATH_LOCAL; + if (~0 != next_hop_sw_if_index) + { + entry_flags |= (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL); + } + } if (is_dvr) path_flags |= FIB_ROUTE_PATH_DVR; if (is_resolve_host) @@ -838,7 +881,8 @@ add_del_route_t_handler (u8 is_multipath, stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ ); - if (is_drop || is_local || is_classify || is_unreach || is_prohibit) + if (is_drop || (is_local && (~0 == next_hop_sw_if_index)) || + is_classify || is_unreach || is_prohibit) { /* * special route types that link directly to the adj @@ -1653,11 +1697,13 @@ static void { vlib_main_t *vm = vlib_get_main (); vl_api_sw_interface_ip6nd_ra_prefix_reply_t *rmp; + fib_prefix_t pfx; int rv = 0; u8 is_no, use_default, no_advertise, off_link, no_autoconfig, no_onlink; VALIDATE_SW_IF_INDEX (mp); + ip_prefix_decode (&mp->prefix, &pfx); is_no = mp->is_no == 1; use_default = mp->use_default == 1; no_advertise = mp->no_advertise == 1; @@ -1666,8 +1712,8 @@ static void no_onlink = mp->no_onlink == 1; rv = ip6_neighbor_ra_prefix (vm, ntohl (mp->sw_if_index), - (ip6_address_t *) mp->address, - mp->address_length, use_default, + &pfx.fp_addr.ip6, + pfx.fp_len, use_default, ntohl (mp->val_lifetime), ntohl (mp->pref_lifetime), no_advertise, off_link, no_autoconfig, no_onlink, is_no); @@ -1688,7 +1734,8 @@ send_ip6nd_proxy_details (vl_api_registration_t * reg, mp->_vl_msg_id = ntohs (VL_API_IP6ND_PROXY_DETAILS); mp->context = context; mp->sw_if_index = htonl (sw_if_index); - memcpy (mp->address, addr, 16); + + ip6_address_encode (&addr->ip6, mp->ip); vl_api_send_msg (reg, (u8 *) mp); } @@ -1756,12 +1803,13 @@ static void vl_api_ip6nd_proxy_add_del_t_handler (vl_api_ip6nd_proxy_add_del_t * mp) { vl_api_ip6nd_proxy_add_del_reply_t *rmp; + ip6_address_t ip6; int rv = 0; VALIDATE_SW_IF_INDEX (mp); - rv = ip6_neighbor_proxy_add_del (ntohl (mp->sw_if_index), - (ip6_address_t *) mp->address, mp->is_del); + ip6_address_decode (mp->ip, &ip6); + rv = ip6_neighbor_proxy_add_del (ntohl (mp->sw_if_index), &ip6, mp->is_del); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_IP6ND_PROXY_ADD_DEL_REPLY); @@ -1903,8 +1951,9 @@ static void clib_error_t *error; clib_memset (&args, 0, sizeof (args)); - ip_set (&args.prefix.fp_addr, mp->ip, mp->is_ip4); - args.prefix.fp_len = mp->plen ? mp->plen : (mp->is_ip4 ? 32 : 128); + + ip_prefix_decode (&mp->pfx, &args.prefix); + args.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index); args.is_add = mp->is_add; if ((error = vnet_ip_container_proxy_add_del (&args))) @@ -2007,11 +2056,8 @@ static void vl_api_ip_source_and_port_range_check_add_del_reply_t *rmp; int rv = 0; - u8 is_ipv6 = mp->is_ipv6; u8 is_add = mp->is_add; - u8 mask_length = mp->mask_length; - ip4_address_t ip4_addr; - ip6_address_t ip6_addr; + fib_prefix_t pfx; u16 *low_ports = 0; u16 *high_ports = 0; u32 vrf_id; @@ -2019,6 +2065,8 @@ static void u8 num_ranges; int i; + ip_prefix_decode (&mp->prefix, &pfx); + // Validate port range num_ranges = mp->number_of_ranges; if (num_ranges > 32) @@ -2045,13 +2093,6 @@ static void vec_add1 (high_ports, tmp_high + 1); } - // Validate mask_length - if ((is_ipv6 && mask_length > 128) || (!is_ipv6 && mask_length > 32)) - { - rv = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH; - goto reply; - } - vrf_id = ntohl (mp->vrf_id); if (vrf_id < 1) @@ -2061,20 +2102,18 @@ static void } - if (is_ipv6) + if (FIB_PROTOCOL_IP6 == pfx.fp_proto) { - clib_memcpy (ip6_addr.as_u8, mp->address, sizeof (ip6_addr.as_u8)); - rv = ip6_source_and_port_range_check_add_del (&ip6_addr, - mask_length, + rv = ip6_source_and_port_range_check_add_del (&pfx.fp_addr.ip6, + pfx.fp_len, vrf_id, low_ports, high_ports, is_add); } else { - clib_memcpy (ip4_addr.data, mp->address, sizeof (ip4_addr)); - rv = ip4_source_and_port_range_check_add_del (&ip4_addr, - mask_length, + rv = ip4_source_and_port_range_check_add_del (&pfx.fp_addr.ip4, + pfx.fp_len, vrf_id, low_ports, high_ports, is_add); @@ -2174,10 +2213,22 @@ static void #define IP4_ARP_EVENT 3 #define IP6_ND_EVENT 4 -static int arp_change_delete_callback (u32 pool_index, u8 * notused); -static int nd_change_delete_callback (u32 pool_index, u8 * notused); static vlib_node_registration_t ip_resolver_process_node; +static int +arp_change_delete_callback (u32 pool_index, + const mac_address_t * mac, + u32 sw_if_index, const ip4_address_t * address) +{ + vpe_api_main_t *am = &vpe_api_main; + + if (pool_is_free_index (am->arp_events, pool_index)) + return 1; + + pool_put_index (am->arp_events, pool_index); + return 0; +} + static void handle_ip4_arp_event (u32 pool_index) { @@ -2199,7 +2250,7 @@ handle_ip4_arp_event (u32 pool_index) { (void) vnet_add_del_ip4_arp_change_event (vnm, arp_change_delete_callback, - event->pid, &event->address, + event->pid, event->ip, ip_resolver_process_node.index, IP4_ARP_EVENT, ~0 /* pool index, notused */ , 0 /* is_add */ ); return; @@ -2221,12 +2272,26 @@ handle_ip4_arp_event (u32 pool_index) if (vlib_time_now (vm) > last_time + 10.0) { clib_warning ("arp event for %U to pid %d: queue stuffed!", - format_ip4_address, &event->address, event->pid); + format_ip4_address, event->ip, event->pid); last_time = vlib_time_now (vm); } } } +static int +nd_change_delete_callback (u32 pool_index, + const mac_address_t * mac, + u32 sw_if_index, const ip6_address_t * addr) +{ + vpe_api_main_t *am = &vpe_api_main; + + if (pool_is_free_index (am->nd_events, pool_index)) + return 1; + + pool_put_index (am->nd_events, pool_index); + return 0; +} + static void handle_ip6_nd_event (u32 pool_index) { @@ -2248,7 +2313,7 @@ handle_ip6_nd_event (u32 pool_index) { (void) vnet_add_del_ip6_nd_change_event (vnm, nd_change_delete_callback, - event->pid, &event->address, + event->pid, event->ip, ip_resolver_process_node.index, IP6_ND_EVENT, ~0 /* pool index, notused */ , 0 /* is_add */ ); return; @@ -2270,7 +2335,7 @@ handle_ip6_nd_event (u32 pool_index) if (vlib_time_now (vm) > last_time + 10.0) { clib_warning ("ip6 nd event for %U to pid %d: queue stuffed!", - format_ip6_address, &event->address, event->pid); + format_ip6_address, event->ip, event->pid); last_time = vlib_time_now (vm); } } @@ -2321,8 +2386,8 @@ VLIB_REGISTER_NODE (ip_resolver_process_node,static) = { /* *INDENT-ON* */ static int -nd_change_data_callback (u32 pool_index, u8 * new_mac, - u32 sw_if_index, ip6_address_t * address) +nd_change_data_callback (u32 pool_index, const mac_address_t * new_mac, + u32 sw_if_index, const ip6_address_t * address) { vpe_api_main_t *am = &vpe_api_main; vl_api_ip6_nd_event_t *event; @@ -2331,41 +2396,17 @@ nd_change_data_callback (u32 pool_index, u8 * new_mac, return 1; event = pool_elt_at_index (am->nd_events, pool_index); - if (eth_mac_equal (event->new_mac, new_mac) && + if (ethernet_mac_address_equal (event->mac, new_mac->bytes) && sw_if_index == ntohl (event->sw_if_index)) { return 1; } - clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); + mac_address_encode (new_mac, event->mac); event->sw_if_index = htonl (sw_if_index); return 0; } -static int -arp_change_delete_callback (u32 pool_index, u8 * notused) -{ - vpe_api_main_t *am = &vpe_api_main; - - if (pool_is_free_index (am->arp_events, pool_index)) - return 1; - - pool_put_index (am->arp_events, pool_index); - return 0; -} - -static int -nd_change_delete_callback (u32 pool_index, u8 * notused) -{ - vpe_api_main_t *am = &vpe_api_main; - - if (pool_is_free_index (am->nd_events, pool_index)) - return 1; - - pool_put_index (am->nd_events, pool_index); - return 0; -} - static vlib_node_registration_t wc_arp_process_node; enum @@ -2396,9 +2437,10 @@ wc_arp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) wc_arp_report_t *arp_events = event_data; for (i = 0; i < vec_len (arp_events); i++) { - /* discard dup event */ - if (arp_prev.ip4 == arp_events[i].ip4 && - eth_mac_equal ((u8 *) arp_prev.mac, arp_events[i].mac) && + /* discard dup event - cast away volatile */ + if (arp_prev.ip.as_u32 == arp_events[i].ip.as_u32 && + mac_address_equal ((const mac_address_t *) &arp_prev.mac, + &arp_events[i].mac) && arp_prev.sw_if_index == arp_events[i].sw_if_index && (now - last_arp) < 10.0) { @@ -2421,9 +2463,9 @@ wc_arp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) event->client_index = reg->client_index; event->pid = reg->client_pid; event->mac_ip = 1; - event->address = arp_events[i].ip4; + ip4_address_encode(&arp_events[i].ip, event->ip); event->sw_if_index = htonl(arp_events[i].sw_if_index); - memcpy(event->new_mac, arp_events[i].mac, sizeof event->new_mac); + mac_address_encode(&arp_events[i].mac, event->mac); vl_api_send_msg (vl_reg, (u8 *) event); } })); @@ -2435,10 +2477,11 @@ wc_arp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) wc_nd_report_t *nd_events = event_data; for (i = 0; i < vec_len (nd_events); i++) { - /* discard dup event */ - if (ip6_address_is_equal - ((ip6_address_t *) & nd_prev.ip6, &nd_events[i].ip6) - && eth_mac_equal ((u8 *) nd_prev.mac, nd_events[i].mac) + /* discard dup event - cast away volatile */ + if (ip6_address_is_equal ((const ip6_address_t *) &nd_prev.ip6, + &nd_events[i].ip6) + && mac_address_equal ((const mac_address_t *) &nd_prev.mac, + &nd_events[i].mac) && nd_prev.sw_if_index == nd_events[i].sw_if_index && (now - last_nd) < 10.0) { @@ -2460,9 +2503,9 @@ wc_arp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) event->client_index = reg->client_index; event->pid = reg->client_pid; event->mac_ip = 1; - memcpy(event->address, nd_events[i].ip6.as_u8, sizeof event->address); + ip6_address_encode(&nd_events[i].ip6, event->ip); event->sw_if_index = htonl(nd_events[i].sw_if_index); - memcpy(event->new_mac, nd_events[i].mac, sizeof event->new_mac); + mac_address_encode(&nd_events[i].mac, event->mac); vl_api_send_msg (vl_reg, (u8 *) event); } })); @@ -2500,7 +2543,8 @@ wc_arp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) event->sw_if_index = clib_host_to_net_u32 (ra_events[i].sw_if_index); - memcpy (event->router_address, ra_events[i].router_address, 16); + ip6_address_encode (&ra_events[i].router_address, + event->router_addr); event->current_hop_limit = ra_events[i].current_hop_limit; event->flags = ra_events[i].flags; @@ -2524,9 +2568,7 @@ wc_arp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { ra_report_prefix_info_t *info = &ra_events[i].prefixes[j]; - memcpy (prefix->dst_address, info->dst_address.as_u8, - 16); - prefix->dst_address_length = info->dst_address_length; + ip_prefix_encode(&info->prefix, &prefix->prefix); prefix->flags = info->flags; prefix->valid_time = clib_host_to_net_u32 (info->valid_time); @@ -2557,8 +2599,9 @@ VLIB_REGISTER_NODE (wc_arp_process_node,static) = { /* *INDENT-ON* */ static int -arp_change_data_callback (u32 pool_index, u8 * new_mac, - u32 sw_if_index, u32 address) +arp_change_data_callback (u32 pool_index, + const mac_address_t * mac, + u32 sw_if_index, const ip4_address_t * address) { vpe_api_main_t *am = &vpe_api_main; vl_api_ip4_arp_event_t *event; @@ -2567,13 +2610,13 @@ arp_change_data_callback (u32 pool_index, u8 * new_mac, return 1; event = pool_elt_at_index (am->arp_events, pool_index); - if (eth_mac_equal (event->new_mac, new_mac) && + if (ethernet_mac_address_equal (event->mac, mac->bytes) && sw_if_index == ntohl (event->sw_if_index)) { return 1; } - clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); + mac_address_encode (mac, event->mac); event->sw_if_index = htonl (sw_if_index); return 0; } @@ -2584,9 +2627,12 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) vpe_api_main_t *am = &vpe_api_main; vnet_main_t *vnm = vnet_get_main (); vl_api_want_ip4_arp_events_reply_t *rmp; + ip4_address_t ip; int rv = 0; - if (mp->address == 0) + ip4_address_decode (mp->ip, &ip); + + if (ip.as_u32 == 0) { uword *p = hash_get (am->wc_ip4_arp_events_registration_hash, mp->client_index); @@ -2632,7 +2678,7 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) pool_get (am->arp_events, event); rv = vnet_add_del_ip4_arp_change_event (vnm, arp_change_data_callback, - mp->pid, &mp->address /* addr, in net byte order */ , + mp->pid, mp->ip /* addr, in net byte order */ , ip_resolver_process_node.index, IP4_ARP_EVENT, event - am->arp_events, 1 /* is_add */ ); @@ -2646,16 +2692,16 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) /* Python API expects events to have no context */ event->_vl_msg_id = htons (VL_API_IP4_ARP_EVENT); event->client_index = mp->client_index; - event->address = mp->address; + memcpy (event->ip, mp->ip, 4); event->pid = mp->pid; - if (mp->address == 0) + if (ip.as_u32 == 0) event->mac_ip = 1; } else { rv = vnet_add_del_ip4_arp_change_event (vnm, arp_change_delete_callback, - mp->pid, &mp->address /* addr, in net byte order */ , + mp->pid, mp->ip /* addr, in net byte order */ , ip_resolver_process_node.index, IP4_ARP_EVENT, ~0 /* pool index */ , 0 /* is_add */ ); } @@ -2693,7 +2739,7 @@ want_ip4_arp_events_reaper (u32 client_index) event = pool_elt_at_index (am->arp_events, *event_id); vnet_add_del_ip4_arp_change_event (vnm, arp_change_delete_callback, - event->pid, &event->address, + event->pid, event->ip, ip_resolver_process_node.index, IP4_ARP_EVENT, ~0 /* pool index, notused */ , 0 /* is_add */ ); } @@ -2721,9 +2767,12 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) vpe_api_main_t *am = &vpe_api_main; vnet_main_t *vnm = vnet_get_main (); vl_api_want_ip6_nd_events_reply_t *rmp; + ip6_address_t ip6; int rv = 0; - if (ip6_address_is_zero ((ip6_address_t *) mp->address)) + ip6_address_decode (mp->ip, &ip6); + + if (ip6_address_is_zero (&ip6)) { uword *p = hash_get (am->wc_ip6_nd_events_registration_hash, mp->client_index); @@ -2770,7 +2819,7 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) rv = vnet_add_del_ip6_nd_change_event (vnm, nd_change_data_callback, - mp->pid, mp->address /* addr, in net byte order */ , + mp->pid, &ip6, ip_resolver_process_node.index, IP6_ND_EVENT, event - am->nd_events, 1 /* is_add */ ); @@ -2783,14 +2832,14 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) event->_vl_msg_id = ntohs (VL_API_IP6_ND_EVENT); event->client_index = mp->client_index; - clib_memcpy (event->address, mp->address, sizeof event->address); + ip6_address_encode (&ip6, event->ip); event->pid = mp->pid; } else { rv = vnet_add_del_ip6_nd_change_event (vnm, nd_change_delete_callback, - mp->pid, mp->address /* addr, in net byte order */ , + mp->pid, &ip6 /* addr, in net byte order */ , ip_resolver_process_node.index, IP6_ND_EVENT, ~0 /* pool index */ , 0 /* is_add */ ); } @@ -2829,7 +2878,7 @@ want_ip6_nd_events_reaper (u32 client_index) event = pool_elt_at_index (am->nd_events, *event_id); vnet_add_del_ip6_nd_change_event (vnm, nd_change_delete_callback, - event->pid, &event->address, + event->pid, event->ip, ip_resolver_process_node.index, IP6_ND_EVENT, ~0 /* pool index, notused */ , 0 /* is_add */ ); } @@ -2916,26 +2965,24 @@ static void vl_api_proxy_arp_add_del_t_handler (vl_api_proxy_arp_add_del_t * mp) { vl_api_proxy_arp_add_del_reply_t *rmp; + ip4_address_t lo, hi; u32 fib_index; int rv; - ip4_main_t *im = &ip4_main; - uword *p; stats_dslock_with_hint (1 /* release hint */ , 6 /* tag */ ); - p = hash_get (im->fib_index_by_table_id, ntohl (mp->proxy.vrf_id)); + fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->proxy.table_id)); - if (!p) + if (~0 == fib_index) { rv = VNET_API_ERROR_NO_SUCH_FIB; goto out; } - fib_index = p[0]; + ip4_address_decode (mp->proxy.low, &lo); + ip4_address_decode (mp->proxy.hi, &hi); - rv = vnet_proxy_arp_add_del ((ip4_address_t *) mp->proxy.low_address, - (ip4_address_t *) mp->proxy.hi_address, - fib_index, mp->is_add == 0); + rv = vnet_proxy_arp_add_del (&lo, &hi, fib_index, mp->is_add == 0); out: stats_dsunlock (); @@ -2962,10 +3009,10 @@ send_proxy_arp_details (const ip4_address_t * lo_addr, clib_memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_PROXY_ARP_DETAILS); mp->context = ctx->context; - mp->proxy.vrf_id = htonl (fib_index); - clib_memcpy (mp->proxy.low_address, lo_addr, - sizeof (mp->proxy.low_address)); - clib_memcpy (mp->proxy.hi_address, hi_addr, sizeof (mp->proxy.hi_address)); + mp->proxy.table_id = htonl (fib_index); + + ip4_address_encode (lo_addr, mp->proxy.low); + ip4_address_encode (hi_addr, mp->proxy.hi); vl_api_send_msg (ctx->reg, (u8 *) mp); @@ -3062,17 +3109,18 @@ vl_api_ip_probe_neighbor_t_handler (vl_api_ip_probe_neighbor_t * mp) vlib_main_t *vm = vlib_get_main (); vl_api_ip_probe_neighbor_reply_t *rmp; clib_error_t *error; + ip46_address_t dst; + ip46_type_t itype; VALIDATE_SW_IF_INDEX (mp); u32 sw_if_index = ntohl (mp->sw_if_index); + itype = ip_address_decode (&mp->dst, &dst); - if (mp->is_ipv6) - error = ip6_probe_neighbor (vm, (ip6_address_t *) mp->dst_address, - sw_if_index, 0); + if (IP46_TYPE_IP6 == itype) + error = ip6_probe_neighbor (vm, &dst.ip6, sw_if_index, 0); else - error = ip4_probe_neighbor (vm, (ip4_address_t *) mp->dst_address, - sw_if_index, 0); + error = ip4_probe_neighbor (vm, &dst.ip4, sw_if_index, 0); if (error) { diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c index 36467a2eb01a..d586649646de 100644 --- a/src/vnet/ip/ip_checksum.c +++ b/src/vnet/ip/ip_checksum.c @@ -120,34 +120,13 @@ do { \ * function which uses the function pointer we set up in * ip_checksum_init(). */ -#if CLIB_DEBUG > 0 -#define IP_INCREMENTAL_CHECKSUM_CLONE_TEMPLATE(arch, fn) -#define IP_INCREMENTAL_CHECKSUM_MULTIARCH_CLONE(fn) -#else -#define IP_INCREMENTAL_CHECKSUM_CLONE_TEMPLATE(arch, fn, tgt) \ - uword \ - __attribute__ ((flatten)) \ - __attribute__ ((target (tgt))) \ - CLIB_CPU_OPTIMIZED \ - fn ## _ ## arch (ip_csum_t sum, \ - void *_data, \ - uword n_bytes) \ - { return fn (sum, _data, n_bytes); } - -#define IP_INCREMENTAL_CHECKSUM_MULTIARCH_CLONE(fn) \ - foreach_march_variant(IP_INCREMENTAL_CHECKSUM_CLONE_TEMPLATE,fn) -#endif - -IP_INCREMENTAL_CHECKSUM_MULTIARCH_CLONE (_ip_incremental_checksum); - -CLIB_MULTIARCH_SELECT_FN (_ip_incremental_checksum, static inline); ip_csum_t (*vnet_incremental_checksum_fp) (ip_csum_t, void *, uword); static clib_error_t * ip_checksum_init (vlib_main_t * vm) { - vnet_incremental_checksum_fp = _ip_incremental_checksum_multiarch_select (); + vnet_incremental_checksum_fp = _ip_incremental_checksum; return 0; } diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index de0998779a84..fd5bc6fa0bad 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -78,9 +78,6 @@ frag_buffer_alloc (vlib_buffer_t * org_b, u32 * bi) return 0; vlib_buffer_t *b = vlib_get_buffer (vm, *bi); - vlib_buffer_free_list_t *fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b, fl); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); vlib_buffer_copy_trace_flag (vm, org_b, *bi); @@ -109,7 +106,8 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t); max = - (clib_min (mtu, VLIB_BUFFER_DATA_SIZE) - sizeof (ip4_header_t)) & ~0x7; + (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - + sizeof (ip4_header_t)) & ~0x7; if (rem > (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t))) diff --git a/src/vnet/ip/ip_neighbor.c b/src/vnet/ip/ip_neighbor.c index 7d1998d25b85..ad89d3ff6795 100644 --- a/src/vnet/ip/ip_neighbor.c +++ b/src/vnet/ip/ip_neighbor.c @@ -49,8 +49,8 @@ static ip_neighbor_scan_config_t ip_neighbor_scan_conf; int ip_neighbor_add (const ip46_address_t * ip, - u8 is_ip6, - const u8 * mac, + ip46_type_t type, + const mac_address_t * mac, u32 sw_if_index, ip_neighbor_flags_t flags, u32 * stats_index) { @@ -63,13 +63,10 @@ ip_neighbor_add (const ip46_address_t * ip, * The expectation is that the FIB will ensure that nothing bad * will come of adding bogus entries. */ - if (is_ip6) + if (IP46_TYPE_IP6 == type) { rv = vnet_set_ip6_ethernet_neighbor (vlib_get_main (), - sw_if_index, &ip->ip6, mac, 6, - (flags & IP_NEIGHBOR_FLAG_STATIC), - (flags & - IP_NEIGHBOR_FLAG_NO_ADJ_FIB)); + sw_if_index, &ip->ip6, mac, flags); fproto = FIB_PROTOCOL_IP6; linkt = VNET_LINK_IP6; } @@ -77,16 +74,12 @@ ip_neighbor_add (const ip46_address_t * ip, { ethernet_arp_ip4_over_ethernet_address_t a = { .ip4 = ip->ip4, + .mac = *mac, }; - clib_memcpy (&a.ethernet, mac, 6); - - rv = vnet_arp_set_ip4_over_ethernet (vnet_get_main (), - sw_if_index, - &a, - (flags & IP_NEIGHBOR_FLAG_STATIC), - (flags & - IP_NEIGHBOR_FLAG_NO_ADJ_FIB)); + rv = + vnet_arp_set_ip4_over_ethernet (vnet_get_main (), sw_if_index, &a, + flags); fproto = FIB_PROTOCOL_IP4; linkt = VNET_LINK_IP4; } @@ -98,11 +91,11 @@ ip_neighbor_add (const ip46_address_t * ip, } int -ip_neighbor_del (const ip46_address_t * ip, u8 is_ip6, u32 sw_if_index) +ip_neighbor_del (const ip46_address_t * ip, ip46_type_t type, u32 sw_if_index) { int rv; - if (is_ip6) + if (IP46_TYPE_IP6 == type) { rv = vnet_unset_ip6_ethernet_neighbor (vlib_get_main (), sw_if_index, &ip->ip6); @@ -180,14 +173,14 @@ ip_neighbor_scan (vlib_main_t * vm, f64 start_time, u32 start_idx, if (!is_ip6) { n4 = pool_elt_at_index (np4, curr_idx); - if (n4->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) + if (n4->flags & IP_NEIGHBOR_FLAG_STATIC) goto next_neighbor; update_time = n4->time_last_updated; } else { n6 = pool_elt_at_index (np6, curr_idx); - if (n6->flags & IP6_NEIGHBOR_FLAG_STATIC) + if (n6->flags & IP_NEIGHBOR_FLAG_STATIC) goto next_neighbor; update_time = n6->time_last_updated; } @@ -199,9 +192,11 @@ ip_neighbor_scan (vlib_main_t * vm, f64 start_time, u32 start_idx, /* delete stale neighbor */ if (!is_ip6) { - ethernet_arp_ip4_over_ethernet_address_t delme; - clib_memcpy (&delme.ethernet, n4->ethernet_address, 6); - delme.ip4.as_u32 = n4->ip4_address.as_u32; + ethernet_arp_ip4_over_ethernet_address_t delme = { + .ip4.as_u32 = n4->ip4_address.as_u32, + .mac = n4->mac, + }; + vnet_arp_unset_ip4_over_ethernet (vnm, n4->sw_if_index, &delme); } else diff --git a/src/vnet/ip/ip_neighbor.h b/src/vnet/ip/ip_neighbor.h index 9eeebdb1555d..84247f22f47f 100644 --- a/src/vnet/ip/ip_neighbor.h +++ b/src/vnet/ip/ip_neighbor.h @@ -37,19 +37,22 @@ void ip_neighbor_scan_enable_disable (ip_neighbor_scan_arg_t * arg); typedef enum ip_neighbor_flags_t_ { - IP_NEIGHBOR_FLAG_NODE = 0, + IP_NEIGHBOR_FLAG_NONE = 0, IP_NEIGHBOR_FLAG_STATIC = (1 << 0), - IP_NEIGHBOR_FLAG_NO_ADJ_FIB = (1 << 1), -} ip_neighbor_flags_t; + IP_NEIGHBOR_FLAG_DYNAMIC = (1 << 1), + IP_NEIGHBOR_FLAG_NO_FIB_ENTRY = (1 << 2), +} __attribute__ ((packed)) ip_neighbor_flags_t; + +extern u8 *format_ip_neighbor_flags (u8 * s, va_list * args); extern int ip_neighbor_add (const ip46_address_t * ip, - u8 is_ip6, - const u8 * mac, + ip46_type_t type, + const mac_address_t * mac, u32 sw_if_index, ip_neighbor_flags_t flags, u32 * stats_index); extern int ip_neighbor_del (const ip46_address_t * ip, - u8 is_ip6, u32 sw_if_index); + ip46_type_t type, u32 sw_if_index); #endif /* included_ip_neighbor_h */ diff --git a/src/vnet/ip/ip_types_api.c b/src/vnet/ip/ip_types_api.c index 6bc035ebef22..b56101c355f5 100644 --- a/src/vnet/ip/ip_types_api.c +++ b/src/vnet/ip/ip_types_api.c @@ -29,6 +29,30 @@ #include #undef vl_printfun +void +ip6_address_encode (const ip6_address_t * in, vl_api_ip6_address_t out) +{ + clib_memcpy (out, in, sizeof (*in)); +} + +void +ip6_address_decode (const vl_api_ip6_address_t in, ip6_address_t * out) +{ + clib_memcpy (out, in, sizeof (*out)); +} + +void +ip4_address_encode (const ip4_address_t * in, vl_api_ip4_address_t out) +{ + clib_memcpy (out, in, sizeof (*in)); +} + +void +ip4_address_decode (const vl_api_ip4_address_t in, ip4_address_t * out) +{ + clib_memcpy (out, in, sizeof (*out)); +} + static ip46_type_t ip_address_union_decode (const vl_api_address_union_t * in, vl_api_address_family_t af, ip46_address_t * out) @@ -67,9 +91,9 @@ ip_address_union_encode (const ip46_address_t * in, vl_api_address_union_t * out) { if (ADDRESS_IP6 == clib_net_to_host_u32 (af)) - memcpy (&out->ip6, &in->ip6, sizeof (out->ip6)); + ip6_address_encode (&in->ip6, out->ip6); else - memcpy (&out->ip4, &in->ip4, sizeof (out->ip4)); + ip4_address_encode (&in->ip4, out->ip4); } void diff --git a/src/vnet/ip/ip_types_api.h b/src/vnet/ip/ip_types_api.h index be41bf59ec34..a67134c9a865 100644 --- a/src/vnet/ip/ip_types_api.h +++ b/src/vnet/ip/ip_types_api.h @@ -27,6 +27,8 @@ /** * Forward declarations so we need not #include the API definitions here */ +typedef u8 vl_api_ip6_address_t[16]; +typedef u8 vl_api_ip4_address_t[4]; struct _vl_api_address; struct _vl_api_prefix; struct _vl_api_mprefix; @@ -35,6 +37,14 @@ extern ip46_type_t ip_address_decode (const struct _vl_api_address *in, ip46_address_t * out); extern void ip_address_encode (const ip46_address_t * in, ip46_type_t type, struct _vl_api_address *out); +extern void ip6_address_encode (const ip6_address_t * in, + vl_api_ip6_address_t out); +extern void ip6_address_decode (const vl_api_ip6_address_t in, + ip6_address_t * out); +extern void ip4_address_encode (const ip4_address_t * in, + vl_api_ip4_address_t out); +extern void ip4_address_decode (const vl_api_ip4_address_t in, + ip4_address_t * out); extern void ip_prefix_decode (const struct _vl_api_prefix *in, fib_prefix_t * out); diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 2a9b3fed0c9f..5c6fec1810b0 100644 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -51,6 +51,7 @@ #include #include #include +#include /** * @file diff --git a/src/vnet/ip/ping.c b/src/vnet/ip/ping.c index f147e47f1c27..2376d9a2f5b0 100755 --- a/src/vnet/ip/ping.c +++ b/src/vnet/ip/ping.c @@ -388,7 +388,7 @@ init_icmp46_echo_request (vlib_main_t * vm, vlib_buffer_t * b0, int l34_len = l4_header_offset + sizeof (icmp46_header_t) + offsetof (icmp46_echo_request_t, data); - int max_data_len = VLIB_BUFFER_DATA_SIZE - l34_len; + int max_data_len = vlib_buffer_get_default_data_size (vm) - l34_len; int first_buf_data_len = data_len < max_data_len ? data_len : max_data_len; @@ -403,9 +403,9 @@ init_icmp46_echo_request (vlib_main_t * vm, vlib_buffer_t * b0, { int this_buf_data_len = remaining_data_len < - VLIB_BUFFER_DATA_SIZE ? remaining_data_len : VLIB_BUFFER_DATA_SIZE; - int n_alloc = vlib_buffer_alloc_from_free_list (vm, &b0->next_buffer, 1, - hb->free_list_index); + vlib_buffer_get_default_data_size (vm) ? remaining_data_len : + vlib_buffer_get_default_data_size (vm); + int n_alloc = vlib_buffer_alloc (vm, &b0->next_buffer, 1); if (n_alloc < 1) { /* That is how much we have so far - return it... */ @@ -746,15 +746,12 @@ send_ip46_ping (vlib_main_t * vm, u32 bi0 = 0; int n_buf0 = 0; vlib_buffer_t *b0; - vlib_buffer_free_list_t *fl; n_buf0 = vlib_buffer_alloc (vm, &bi0, 1); if (n_buf0 < 1) ERROR_OUT (SEND_PING_ALLOC_FAIL); b0 = vlib_get_buffer (vm, bi0); - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); /* diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 65e62c07abae..6e2fcebf69e4 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -497,7 +497,7 @@ format_punt_trace (u8 * s, va_list * va) static uword punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd) { - const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + const uword buffer_size = vlib_buffer_get_default_data_size (vm); u32 n_trace = vlib_get_trace_count (vm, node); u32 next = node->cached_next_index; u32 n_left_to_next, next_index; diff --git a/src/vnet/ip/rd_cp.c b/src/vnet/ip/rd_cp.c index 1a6122fc9d3c..a0894fa3d7c8 100644 --- a/src/vnet/ip/rd_cp.c +++ b/src/vnet/ip/rd_cp.c @@ -297,7 +297,7 @@ ip6_ra_report_handler (void *data) if (default_route->sw_if_index != sw_if_index) ; else if (0 != memcmp (&default_route->router_address, - r->router_address, 16)) + &r->router_address, 16)) ; else { @@ -311,7 +311,7 @@ ip6_ra_report_handler (void *data) if (!route_already_present) { if (router_lifetime_in_sec != 0) - add_default_route (vm, sw_if_index, (void *) r->router_address, + add_default_route (vm, sw_if_index, &r->router_address, current_time + router_lifetime_in_sec); } else @@ -346,8 +346,8 @@ ip6_ra_report_handler (void *data) if (!(prefix->flags & PREFIX_FLAG_A)) continue; - dst_address = &prefix->dst_address; - prefix_length = prefix->dst_address_length; + dst_address = &prefix->prefix.fp_addr.ip6; + prefix_length = prefix->prefix.fp_len; if (ip6_address_is_link_local_unicast (dst_address)) continue; diff --git a/src/vnet/ipfix-export/flow_report.c b/src/vnet/ipfix-export/flow_report.c index c9370a748cf4..103392ad81ec 100644 --- a/src/vnet/ipfix-export/flow_report.c +++ b/src/vnet/ipfix-export/flow_report.c @@ -86,7 +86,6 @@ send_template_packet (flow_report_main_t * frm, udp_header_t *udp; vlib_main_t *vm = frm->vlib_main; flow_report_stream_t *stream; - vlib_buffer_free_list_t *fl; ASSERT (buffer_indexp); @@ -120,11 +119,9 @@ send_template_packet (flow_report_main_t * frm, b0 = vlib_get_buffer (vm, bi0); /* Initialize the buffer */ - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - ASSERT (vec_len (fr->rewrite) < VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES); + ASSERT (vec_len (fr->rewrite) < vlib_buffer_get_default_data_size (vm)); clib_memcpy_fast (b0->data, fr->rewrite, vec_len (fr->rewrite)); b0->current_data = 0; diff --git a/src/vnet/ipfix-export/ipfix_doc.md b/src/vnet/ipfix-export/ipfix_doc.md index 95db15cc341c..5feea0e19fa7 100644 --- a/src/vnet/ipfix-export/ipfix_doc.md +++ b/src/vnet/ipfix-export/ipfix_doc.md @@ -284,7 +284,6 @@ This function creates the packet header for an ipfix data packet vlib_buffer_t *b0 = 0; u32 bi0 = ~0; u32 offset; - vlib_buffer_free_list_t *fl; b0 = mlm->buffers_by_thread[thread_index]; @@ -300,9 +299,6 @@ This function creates the packet header for an ipfix data packet } b0 = vlib_get_buffer (vm, bi0); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b0, fl); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); offset = 0; mlm->buffers_by_thread[thread_index] = b0; diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c index c8c89028f9d5..7d2bf814fcc2 100644 --- a/src/vnet/ipsec/ah_decrypt.c +++ b/src/vnet/ipsec/ah_decrypt.c @@ -200,9 +200,9 @@ ah_decrypt_inline (vlib_main_t * vm, icv_padding_len = ah_calc_icv_padding_len (icv_size, 0 /* is_ipv6 */ ); } - hmac_calc (sa0->integ_alg, sa0->integ_key, sa0->integ_key_len, - (u8 *) ih4, i_b0->current_length, sig, sa0->use_esn, - sa0->seq_hi); + hmac_calc (sa0->integ_alg, sa0->integ_key.data, + sa0->integ_key.len, (u8 *) ih4, i_b0->current_length, + sig, sa0->use_esn, sa0->seq_hi); if (PREDICT_FALSE (memcmp (digest, sig, icv_size))) { diff --git a/src/vnet/ipsec/ah_encrypt.c b/src/vnet/ipsec/ah_encrypt.c index 0dc1612db5e2..66286094682e 100644 --- a/src/vnet/ipsec/ah_encrypt.c +++ b/src/vnet/ipsec/ah_encrypt.c @@ -59,6 +59,7 @@ static char *ah_encrypt_error_strings[] = { typedef struct { + u32 sa_index; u32 spi; u32 seq; ipsec_integ_alg_t integ_alg; @@ -72,8 +73,9 @@ format_ah_encrypt_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ah_encrypt_trace_t *t = va_arg (*args, ah_encrypt_trace_t *); - s = format (s, "ah: spi %u seq %u integrity %U", - t->spi, t->seq, format_ipsec_integ_alg, t->integ_alg); + s = format (s, "ah: sa-index %d spi %u seq %u integrity %U", + t->sa_index, t->spi, t->seq, + format_ipsec_integ_alg, t->integ_alg); return s; } @@ -125,13 +127,8 @@ ah_encrypt_inline (vlib_main_t * vm, if (PREDICT_FALSE (esp_seq_advance (sa0))) { - clib_warning ("sequence number counter has cycled SPI %u", - sa0->spi); vlib_node_increment_counter (vm, node->node_index, AH_ENCRYPT_ERROR_SEQ_CYCLED, 1); - //TODO need to confirm if below is needed - to_next[0] = i_bi0; - to_next += 1; goto trace; } @@ -242,8 +239,9 @@ ah_encrypt_inline (vlib_main_t * vm, oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; - next0 = AH_ENCRYPT_NEXT_IP4_LOOKUP; - vnet_buffer (i_b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + next0 = sa0->dpo[IPSEC_PROTOCOL_AH].dpoi_next_node; + vnet_buffer (i_b0)->ip.adj_index[VLIB_TX] = + sa0->dpo[IPSEC_PROTOCOL_AH].dpoi_index; } else if (is_ip6 && sa0->is_tunnel && sa0->is_tunnel_ip6) { @@ -256,8 +254,9 @@ ah_encrypt_inline (vlib_main_t * vm, oh6_0->ip6.dst_address.as_u64[1] = sa0->tunnel_dst_addr.ip6.as_u64[1]; - next0 = AH_ENCRYPT_NEXT_IP6_LOOKUP; - vnet_buffer (i_b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + next0 = sa0->dpo[IPSEC_PROTOCOL_AH].dpoi_next_node; + vnet_buffer (i_b0)->ip.adj_index[VLIB_TX] = + sa0->dpo[IPSEC_PROTOCOL_AH].dpoi_index; } u8 sig[64]; @@ -267,8 +266,8 @@ ah_encrypt_inline (vlib_main_t * vm, sizeof (ah_header_t); clib_memset (digest, 0, icv_size); - unsigned size = hmac_calc (sa0->integ_alg, sa0->integ_key, - sa0->integ_key_len, + unsigned size = hmac_calc (sa0->integ_alg, sa0->integ_key.data, + sa0->integ_key.len, vlib_buffer_get_current (i_b0), i_b0->current_length, sig, sa0->use_esn, sa0->seq_hi); @@ -302,6 +301,7 @@ ah_encrypt_inline (vlib_main_t * vm, tr->spi = sa0->spi; tr->seq = sa0->seq - 1; tr->integ_alg = sa0->integ_alg; + tr->sa_index = sa_index0; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c index 3f463505e01a..5a3ccdcacd94 100644 --- a/src/vnet/ipsec/esp_decrypt.c +++ b/src/vnet/ipsec/esp_decrypt.c @@ -206,8 +206,9 @@ esp_decrypt_inline (vlib_main_t * vm, icv_size; i_b0->current_length -= icv_size; - hmac_calc (sa0->integ_alg, sa0->integ_key, sa0->integ_key_len, - (u8 *) esp0, i_b0->current_length, sig, sa0->use_esn, + hmac_calc (sa0->integ_alg, sa0->integ_key.data, + sa0->integ_key.len, (u8 *) esp0, + i_b0->current_length, sig, sa0->use_esn, sa0->seq_hi); if (PREDICT_FALSE (memcmp (icv, sig, icv_size))) @@ -299,7 +300,7 @@ esp_decrypt_inline (vlib_main_t * vm, esp0->data + IV_SIZE, (u8 *) vlib_buffer_get_current (o_b0) + ip_hdr_size, BLOCK_SIZE * blocks, - sa0->crypto_key, esp0->data); + sa0->crypto_key.data, esp0->data); o_b0->current_length = (blocks * BLOCK_SIZE) - 2 + ip_hdr_size; o_b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c index 16f985c6cfe6..e1690439c885 100644 --- a/src/vnet/ipsec/esp_encrypt.c +++ b/src/vnet/ipsec/esp_encrypt.c @@ -23,7 +23,9 @@ #include #include +#ifndef CLIB_MARCH_VARIANT ipsec_proto_main_t ipsec_proto_main; +#endif /* CLIB_MARCH_VARIANT */ #define foreach_esp_encrypt_next \ _(DROP, "error-drop") \ @@ -62,6 +64,7 @@ static char *esp_encrypt_error_strings[] = { typedef struct { + u32 sa_index; u32 spi; u32 seq; u8 udp_encap; @@ -77,8 +80,8 @@ format_esp_encrypt_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *); - s = format (s, "esp: spi %u seq %u crypto %U integrity %U%s", - t->spi, t->seq, + s = format (s, "esp: sa-index %d spi %u seq %u crypto %U integrity %U%s", + t->sa_index, t->spi, t->seq, format_ipsec_crypto_alg, t->crypto_alg, format_ipsec_integ_alg, t->integ_alg, t->udp_encap ? " udp-encap-enabled" : ""); @@ -279,7 +282,9 @@ esp_encrypt_inline (vlib_main_t * vm, oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; - vnet_buffer (o_b0)->sw_if_index[VLIB_TX] = sa0->tx_fib_index; + next0 = sa0->dpo[IPSEC_PROTOCOL_ESP].dpoi_next_node; + vnet_buffer (o_b0)->ip.adj_index[VLIB_TX] = + sa0->dpo[IPSEC_PROTOCOL_ESP].dpoi_index; } else if (is_ip6 && sa0->is_tunnel && sa0->is_tunnel_ip6) { @@ -292,7 +297,9 @@ esp_encrypt_inline (vlib_main_t * vm, oh6_0->ip6.dst_address.as_u64[1] = sa0->tunnel_dst_addr.ip6.as_u64[1]; - vnet_buffer (o_b0)->sw_if_index[VLIB_TX] = sa0->tx_fib_index; + next0 = sa0->dpo[IPSEC_PROTOCOL_ESP].dpoi_next_node; + vnet_buffer (o_b0)->ip.adj_index[VLIB_TX] = + sa0->dpo[IPSEC_PROTOCOL_ESP].dpoi_index; } else { @@ -367,17 +374,15 @@ esp_encrypt_inline (vlib_main_t * vm, (u8 *) vlib_buffer_get_current (o_b0) + ip_udp_hdr_size + sizeof (esp_header_t) + IV_SIZE, BLOCK_SIZE * blocks, - sa0->crypto_key, iv); + sa0->crypto_key.data, iv); } - o_b0->current_length += hmac_calc (sa0->integ_alg, sa0->integ_key, - sa0->integ_key_len, - (u8 *) o_esp0, - o_b0->current_length - - ip_udp_hdr_size, - vlib_buffer_get_current (o_b0) + - o_b0->current_length, - sa0->use_esn, sa0->seq_hi); + o_b0->current_length += + hmac_calc (sa0->integ_alg, sa0->integ_key.data, + sa0->integ_key.len, (u8 *) o_esp0, + o_b0->current_length - ip_udp_hdr_size, + vlib_buffer_get_current (o_b0) + o_b0->current_length, + sa0->use_esn, sa0->seq_hi); if (is_ip6) @@ -412,6 +417,7 @@ esp_encrypt_inline (vlib_main_t * vm, o_b0->trace_index = i_b0->trace_index; esp_encrypt_trace_t *tr = vlib_add_trace (vm, node, o_b0, sizeof (*tr)); + tr->sa_index = sa_index0; tr->spi = sa0->spi; tr->seq = sa0->seq - 1; tr->udp_encap = sa0->udp_encap; diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c index 80497f20807a..3d5c0f766cfb 100644 --- a/src/vnet/ipsec/ikev2.c +++ b/src/vnet/ipsec/ikev2.c @@ -1492,16 +1492,16 @@ ikev2_create_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa, a.is_add = 1; if (sa->is_initiator) { - a.local_ip.as_u32 = sa->iaddr.as_u32; - a.remote_ip.as_u32 = sa->raddr.as_u32; + a.local_ip.ip4.as_u32 = sa->iaddr.as_u32; + a.remote_ip.ip4.as_u32 = sa->raddr.as_u32; proposals = child->i_proposals; a.local_spi = child->r_proposals[0].spi; a.remote_spi = child->i_proposals[0].spi; } else { - a.local_ip.as_u32 = sa->raddr.as_u32; - a.remote_ip.as_u32 = sa->iaddr.as_u32; + a.local_ip.ip4.as_u32 = sa->raddr.as_u32; + a.remote_ip.ip4.as_u32 = sa->iaddr.as_u32; proposals = child->r_proposals; a.local_spi = child->i_proposals[0].spi; a.remote_spi = child->r_proposals[0].spi; @@ -1642,8 +1642,8 @@ ikev2_delete_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa, return 0; a.is_add = 0; - a.local_ip.as_u32 = sa->iaddr.as_u32; - a.remote_ip.as_u32 = sa->raddr.as_u32; + a.local_ip.ip4.as_u32 = sa->iaddr.as_u32; + a.remote_ip.ip4.as_u32 = sa->raddr.as_u32; a.local_spi = child->r_proposals[0].spi; a.remote_spi = child->i_proposals[0].spi; } @@ -1653,8 +1653,8 @@ ikev2_delete_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa, return 0; a.is_add = 0; - a.local_ip.as_u32 = sa->raddr.as_u32; - a.remote_ip.as_u32 = sa->iaddr.as_u32; + a.local_ip.ip4.as_u32 = sa->raddr.as_u32; + a.remote_ip.ip4.as_u32 = sa->iaddr.as_u32; a.local_spi = child->i_proposals[0].spi; a.remote_spi = child->r_proposals[0].spi; } diff --git a/src/vnet/ipsec/ikev2_crypto.c b/src/vnet/ipsec/ikev2_crypto.c index 037a3f5777e6..ffe15891b753 100644 --- a/src/vnet/ipsec/ikev2_crypto.c +++ b/src/vnet/ipsec/ikev2_crypto.c @@ -533,7 +533,11 @@ ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t) y = BN_new (); len = t->key_len / 2; +#if OPENSSL_VERSION_NUMBER >= 0x30000000L + EC_POINT_get_affine_coordinates (group, r_point, x, y, bn_ctx); +#else EC_POINT_get_affine_coordinates_GFp (group, r_point, x, y, bn_ctx); +#endif if (sa->is_initiator) { @@ -562,12 +566,20 @@ ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t) x = BN_bin2bn (sa->i_dh_data, len, x); y = BN_bin2bn (sa->i_dh_data + len, len, y); +#if OPENSSL_VERSION_NUMBER >= 0x30000000L + EC_POINT_set_affine_coordinates (group, i_point, x, y, bn_ctx); +#else EC_POINT_set_affine_coordinates_GFp (group, i_point, x, y, bn_ctx); +#endif sa->dh_shared_key = vec_new (u8, t->key_len); EC_POINT_mul (group, shared_point, NULL, i_point, EC_KEY_get0_private_key (ec), NULL); +#if OPENSSL_VERSION_NUMBER >= 0x30000000L + EC_POINT_get_affine_coordinates (group, shared_point, x, y, bn_ctx); +#else EC_POINT_get_affine_coordinates_GFp (group, shared_point, x, y, bn_ctx); +#endif x_off = len - BN_num_bytes (x); clib_memset (sa->dh_shared_key, 0, x_off); BN_bn2bin (x, sa->dh_shared_key + x_off); @@ -644,7 +656,11 @@ ikev2_complete_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t) x = BN_bin2bn (sa->r_dh_data, len, x); y = BN_bin2bn (sa->r_dh_data + len, len, y); EC_POINT *r_point = EC_POINT_new (group); +#if OPENSSL_VERSION_NUMBER >= 0x30000000L + EC_POINT_set_affine_coordinates (group, r_point, x, y, bn_ctx); +#else EC_POINT_set_affine_coordinates_GFp (group, r_point, x, y, bn_ctx); +#endif EC_KEY_set_public_key (ec, r_point); EC_POINT *i_point = EC_POINT_new (group); @@ -652,10 +668,18 @@ ikev2_complete_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t) x = BN_bin2bn (sa->i_dh_data, len, x); y = BN_bin2bn (sa->i_dh_data + len, len, y); +#if OPENSSL_VERSION_NUMBER >= 0x30000000L + EC_POINT_set_affine_coordinates (group, i_point, x, y, bn_ctx); +#else EC_POINT_set_affine_coordinates_GFp (group, i_point, x, y, bn_ctx); +#endif EC_POINT_mul (group, shared_point, NULL, r_point, EC_KEY_get0_private_key (ec), NULL); +#if OPENSSL_VERSION_NUMBER >= 0x30000000L + EC_POINT_get_affine_coordinates (group, shared_point, x, y, bn_ctx); +#else EC_POINT_get_affine_coordinates_GFp (group, shared_point, x, y, bn_ctx); +#endif sa->dh_shared_key = vec_new (u8, t->key_len); x_off = len - BN_num_bytes (x); clib_memset (sa->dh_shared_key, 0, x_off); diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api index 2b015f9c223a..ece0b024485f 100644 --- a/src/vnet/ipsec/ipsec.api +++ b/src/vnet/ipsec/ipsec.api @@ -1,3 +1,4 @@ +/* Hey Emacs use -*- mode: C -*- */ /* * Copyright (c) 2015-2016 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,7 +14,9 @@ * limitations under the License. */ -option version = "2.1.0"; +option version = "3.0.0"; + +import "vnet/ip/ip_types.api"; /** \brief IPsec: Add/delete Security Policy Database @param client_index - opaque cookie to identify the sender @@ -50,121 +53,252 @@ autoreply define ipsec_interface_add_del_spd u32 spd_id; }; -/** \brief IPsec: Add/delete Security Policy Database entry + +enum ipsec_spd_action +{ + /* bypass - no IPsec processing */ + IPSEC_API_SPD_ACTION_BYPASS = 0, + /* discard - discard packet with ICMP processing */ + IPSEC_API_SPD_ACTION_DISCARD, + /* resolve - send request to control plane for SA resolving */ + IPSEC_API_SPD_ACTION_RESOLVE, + /* protect - apply IPsec policy using following parameters */ + IPSEC_API_SPD_ACTION_PROTECT, +}; + +/** \brief IPsec: Security Policy Database entry See RFC 4301, 4.4.1.1 on how to match packet to selectors - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add - add SPD if non-zero, else delete @param spd_id - SPD instance id (control plane allocated) @param priority - priority of SPD entry (non-unique value). Used to order SPD matching - higher priorities match before lower @param is_outbound - entry applies to outbound traffic if non-zero, otherwise applies to inbound traffic - @param is_ipv6 - remote/local address are IPv6 if non-zero, else IPv4 @param remote_address_start - start of remote address range to match @param remote_address_stop - end of remote address range to match @param local_address_start - start of local address range to match @param local_address_stop - end of local address range to match - @param protocol - protocol type to match [0 means any] + @param protocol - protocol type to match [0 means any] otherwise IANA value @param remote_port_start - start of remote port range to match ... @param remote_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE] @param local_port_start - start of local port range to match ... @param local_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE] - @param policy - 0 = bypass (no IPsec processing), 1 = discard (discard packet with ICMP processing), 2 = resolve (send request to control plane for SA resolving, and discard without ICMP processing), 3 = protect (apply IPsec policy using following parameters) + @param policy - action to perform on match @param sa_id - SAD instance id (control plane allocated) - */ - -autoreply define ipsec_spd_add_del_entry +typedef ipsec_spd_entry { - u32 client_index; - u32 context; - u8 is_add; - u32 spd_id; i32 priority; u8 is_outbound; - // Selector - u8 is_ipv6; - u8 is_ip_any; - u8 remote_address_start[16]; - u8 remote_address_stop[16]; - u8 local_address_start[16]; - u8 local_address_stop[16]; - + u32 sa_id; + vl_api_ipsec_spd_action_t policy; u8 protocol; + // Selector + vl_api_address_t remote_address_start; + vl_api_address_t remote_address_stop; + vl_api_address_t local_address_start; + vl_api_address_t local_address_stop; + u16 remote_port_start; u16 remote_port_stop; u16 local_port_start; u16 local_port_stop; - - // Policy - u8 policy; - u32 sa_id; }; -/** \brief IPsec: Add/delete Security Association Database entry +/** \brief IPsec: Add/delete Security Policy Database entry + @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param is_add - add SAD entry if non-zero, else delete + @param is_add - add SPD if non-zero, else delete + @param entry - Description of the entry to add/dell +*/ +define ipsec_spd_entry_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + vl_api_ipsec_spd_entry_t entry; +}; - @param sad_id - sad id +/** \brief IPsec: Reply Add/delete Security Policy Database entry - @param spi - security parameter index + @param context - sender context, to match reply w/ request + @param retval - success/fail rutrun code + @param stat_index - An index for the policy in the stats segment @ /net/ipec/policy +*/ +define ipsec_spd_entry_add_del_reply +{ + u32 context; + i32 retval; + u32 stat_index; +}; - @param protocol - 0 = AH, 1 = ESP +/** \brief Dump IPsec all SPD IDs + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define ipsec_spds_dump { + u32 client_index; + u32 context; +}; - @param crypto_algorithm - 0 = Null, 1 = AES-CBC-128, 2 = AES-CBC-192, 3 = AES-CBC-256, 4 = 3DES-CBC - @param crypto_key_length - length of crypto_key in bytes - @param crypto_key - crypto keying material +/** \brief Dump IPsec all SPD IDs response + @param client_index - opaque cookie to identify the sender + @param spd_id - SPD instance id (control plane allocated) + @param npolicies - number of policies in SPD +*/ +define ipsec_spds_details { + u32 context; + u32 spd_id; + u32 npolicies; +}; - @param integrity_algorithm - 0 = None, 1 = MD5-96, 2 = SHA1-96, 3 = SHA-256, 4 = SHA-384, 5=SHA-512 - @param integrity_key_length - length of integrity_key in bytes - @param integrity_key - integrity keying material +/** \brief Dump ipsec policy database data + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param spd_id - SPD instance id + @param sa_id - SA id, optional, set to ~0 to see all policies in SPD +*/ +define ipsec_spd_dump { + u32 client_index; + u32 context; + u32 spd_id; + u32 sa_id; +}; - @param use_extended_sequence_number - use ESN when non-zero +/** \brief IPsec policy database response + @param context - sender context which was passed in the request + €param entry - The SPD entry. + @param bytes - byte count of packets matching this policy + @param packets - count of packets matching this policy +*/ +define ipsec_spd_details { + u32 context; + vl_api_ipsec_spd_entry_t entry; +}; - @param is_tunnel - IPsec tunnel mode if non-zero, else transport mode - @param is_tunnel_ipv6 - IPsec tunnel mode is IPv6 if non-zero, else IPv4 tunnel only valid if is_tunnel is non-zero - @param tunnel_src_address - IPsec tunnel source address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero - @param tunnel_dst_address - IPsec tunnel destination address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero - @param udp_encap - enable UDP encapsulation for NAT traversal +/* + * @brief Support cryptographic algorithms + */ +enum ipsec_crypto_alg +{ + IPSEC_API_CRYPTO_ALG_NONE = 0, + IPSEC_API_CRYPTO_ALG_AES_CBC_128, + IPSEC_API_CRYPTO_ALG_AES_CBC_192, + IPSEC_API_CRYPTO_ALG_AES_CBC_256, + IPSEC_API_CRYPTO_ALG_AES_CTR_128, + IPSEC_API_CRYPTO_ALG_AES_CTR_192, + IPSEC_API_CRYPTO_ALG_AES_CTR_256, + IPSEC_API_CRYPTO_ALG_AES_GCM_128, + IPSEC_API_CRYPTO_ALG_AES_GCM_192, + IPSEC_API_CRYPTO_ALG_AES_GCM_256, + IPSEC_API_CRYPTO_ALG_DES_CBC, + IPSEC_API_CRYPTO_ALG_3DES_CBC, +}; - To be added: - Anti-replay - IPsec tunnel address copy mode (to support GDOI) +/* + * @brief Supported Integrity Algorithms */ +enum ipsec_integ_alg +{ + IPSEC_API_INTEG_ALG_NONE = 0, + /* RFC2403 */ + IPSEC_API_INTEG_ALG_MD5_96, + /* RFC2404 */ + IPSEC_API_INTEG_ALG_SHA1_96, + /* draft-ietf-ipsec-ciph-sha-256-00 */ + IPSEC_API_INTEG_ALG_SHA_256_96, + /* RFC4868 */ + IPSEC_API_INTEG_ALG_SHA_256_128, + /* RFC4868 */ + IPSEC_API_INTEG_ALG_SHA_384_192, + /* RFC4868 */ + IPSEC_API_INTEG_ALG_SHA_512_256, +}; + +enum ipsec_sad_flags +{ + IPSEC_API_SAD_FLAG_NONE = 0, + /* Enable extended sequence numbers */ + IPSEC_API_SAD_FLAG_USE_EXTENDED_SEQ_NUM = 0x01, + /* Enable Anti-replay */ + IPSEC_API_SAD_FLAG_USE_ANTI_REPLAY = 0x02, + /* IPsec tunnel mode if non-zero, else transport mode */ + IPSEC_API_SAD_FLAG_IS_TUNNEL = 0x04, + /* IPsec tunnel mode is IPv6 if non-zero, + * else IPv4 tunnel only valid if is_tunnel is non-zero */ + IPSEC_API_SAD_FLAG_IS_TUNNEL_V6 = 0x08, + /* enable UDP encapsulation for NAT traversal */ + IPSEC_API_SAD_FLAG_UDP_ENCAP = 0x10, + + /* come-on Ole please fix this */ + IPSEC_API_SAD_COMBO_12 = 12, + IPSEC_API_SAD_COMBO_20 = 20, +}; + +enum ipsec_proto +{ + IPSEC_API_PROTO_ESP, + IPSEC_API_PROTO_AH, +}; -autoreply define ipsec_sad_add_del_entry +typedef key { - u32 client_index; - u32 context; - u8 is_add; + /* the length of the key */ + u8 length; + /* The data for the key */ + u8 data[128]; +}; +/** \brief IPsec: Security Association Database entry + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add SAD entry if non-zero, else delete + @param sad_id - sad id + @param spi - security parameter index + @param protocol - 0 = AH, 1 = ESP + @param crypto_algorithm - a supported crypto algorithm + @param crypto_key - crypto keying material + @param integrity_algorithm - one of the supported algorithms + @param integrity_key - integrity keying material + @param tunnel_src_address - IPsec tunnel source address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero + @param tunnel_dst_address - IPsec tunnel destination address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero + @param tx_table_id - the FIB id used for encapsulated packets + */ +typedef ipsec_sad_entry +{ u32 sad_id; u32 spi; - u8 protocol; + vl_api_ipsec_proto_t protocol; - u8 crypto_algorithm; - u8 crypto_key_length; - u8 crypto_key[128]; + vl_api_ipsec_crypto_alg_t crypto_algorithm; + vl_api_key_t crypto_key; - u8 integrity_algorithm; - u8 integrity_key_length; - u8 integrity_key[128]; + vl_api_ipsec_integ_alg_t integrity_algorithm; + vl_api_key_t integrity_key; - u8 use_extended_sequence_number; - u8 use_anti_replay; + vl_api_ipsec_sad_flags_t flags; - u8 is_tunnel; - u8 is_tunnel_ipv6; - u8 tunnel_src_address[16]; - u8 tunnel_dst_address[16]; - u8 udp_encap; + vl_api_address_t tunnel_src; + vl_api_address_t tunnel_dst; + u32 tx_table_id; +}; + +/** \brief IPsec: Add/delete Security Association Database entry + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param entry - Entry to add or delete + */ +autoreply define ipsec_sad_entry_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + vl_api_ipsec_sad_entry_t entry; }; /** \brief IPsec: Update Security Association keys @@ -173,10 +307,7 @@ autoreply define ipsec_sad_add_del_entry @param sa_id - sa id - @param crypto_key_length - length of crypto_key in bytes @param crypto_key - crypto keying material - - @param integrity_key_length - length of integrity_key in bytes @param integrity_key - integrity keying material */ @@ -187,11 +318,8 @@ autoreply define ipsec_sa_set_key u32 sa_id; - u8 crypto_key_length; - u8 crypto_key[128]; - - u8 integrity_key_length; - u8 integrity_key[128]; + vl_api_key_t crypto_key; + vl_api_key_t integrity_key; }; /** \brief IKEv2: Add/delete profile @@ -441,80 +569,6 @@ autoreply define ikev2_initiate_rekey_child_sa u32 ispi; }; -/** \brief Dump IPsec all SPD IDs - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define ipsec_spds_dump { - u32 client_index; - u32 context; -}; - -/** \brief Dump IPsec all SPD IDs response - @param client_index - opaque cookie to identify the sender - @param spd_id - SPD instance id (control plane allocated) - @param npolicies - number of policies in SPD -*/ -define ipsec_spds_details { - u32 context; - u32 spd_id; - u32 npolicies; -}; - -/** \brief Dump ipsec policy database data - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param spd_id - SPD instance id - @param sa_id - SA id, optional, set to ~0 to see all policies in SPD -*/ -define ipsec_spd_dump { - u32 client_index; - u32 context; - u32 spd_id; - u32 sa_id; -}; - -/** \brief IPsec policy database response - @param context - sender context which was passed in the request - @param spd_id - SPD instance id - @param priority - numeric value to control policy evaluation order - @param is_outbound - [1|0] to indicate if direction is [out|in]bound - @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4] - @param local_start_addr - first address in local traffic selector range - @param local_stop_addr - last address in local traffic selector range - @param local_start_port - first port in local traffic selector range - @param local_stop_port - last port in local traffic selector range - @param remote_start_addr - first address in remote traffic selector range - @param remote_stop_addr - last address in remote traffic selector range - @param remote_start_port - first port in remote traffic selector range - @param remote_stop_port - last port in remote traffic selector range - @param protocol - traffic selector protocol - @param policy - policy action - @param sa_id - SA id - @param bytes - byte count of packets matching this policy - @param packets - count of packets matching this policy -*/ -define ipsec_spd_details { - u32 context; - u32 spd_id; - i32 priority; - u8 is_outbound; - u8 is_ipv6; - u8 local_start_addr[16]; - u8 local_stop_addr[16]; - u16 local_start_port; - u16 local_stop_port; - u8 remote_start_addr[16]; - u8 remote_stop_addr[16]; - u16 remote_start_port; - u16 remote_stop_port; - u8 protocol; - u8 policy; - u32 sa_id; - u64 bytes; - u64 packets; -}; - /** \brief IPsec: Get SPD interfaces @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -639,41 +693,18 @@ define ipsec_sa_dump { @param replay_window - bit map of seq nums received relative to last_seq if using anti-replay @param total_data_size - total bytes sent or received @param udp_encap - 1 if UDP encap enabled, 0 otherwise - @param tx_table_id - the FIB id used for encapsulated packets */ define ipsec_sa_details { u32 context; - u32 sa_id; - u32 sw_if_index; - - u32 spi; - u8 protocol; - - u8 crypto_alg; - u8 crypto_key_len; - u8 crypto_key[128]; - - u8 integ_alg; - u8 integ_key_len; - u8 integ_key[128]; - - u8 use_esn; - u8 use_anti_replay; - - u8 is_tunnel; - u8 is_tunnel_ip6; - u8 tunnel_src_addr[16]; - u8 tunnel_dst_addr[16]; + vl_api_ipsec_sad_entry_t entry; + u32 sw_if_index; u32 salt; u64 seq_outbound; u64 last_seq_inbound; u64 replay_window; u64 total_data_size; - u8 udp_encap; - - u32 tx_table_id; }; /** \brief Set key on IPsec interface @@ -728,7 +759,7 @@ define ipsec_backend_dump { define ipsec_backend_details { u32 context; u8 name[128]; - u8 protocol; + vl_api_ipsec_proto_t protocol; u8 index; u8 active; }; @@ -742,7 +773,7 @@ define ipsec_backend_details { autoreply define ipsec_select_backend { u32 client_index; u32 context; - u8 protocol; + vl_api_ipsec_proto_t protocol; u8 index; }; diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c index 7b79af2d902a..0ad11ba842e9 100644 --- a/src/vnet/ipsec/ipsec.c +++ b/src/vnet/ipsec/ipsec.c @@ -1,5 +1,5 @@ /* - * decap.c : IPSec tunnel support + * ipsec.c : IPSEC module functions * * Copyright (c) 2015 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,495 +26,8 @@ #include #include - ipsec_main_t ipsec_main; -u32 -ipsec_get_sa_index_by_sa_id (u32 sa_id) -{ - ipsec_main_t *im = &ipsec_main; - uword *p = hash_get (im->sa_index_by_sa_id, sa_id); - if (!p) - return ~0; - - return p[0]; -} - -int -ipsec_set_interface_spd (vlib_main_t * vm, u32 sw_if_index, u32 spd_id, - int is_add) -{ - ipsec_main_t *im = &ipsec_main; - ip4_ipsec_config_t config; - - u32 spd_index; - uword *p; - - p = hash_get (im->spd_index_by_spd_id, spd_id); - if (!p) - return VNET_API_ERROR_SYSCALL_ERROR_1; /* no such spd-id */ - - spd_index = p[0]; - - p = hash_get (im->spd_index_by_sw_if_index, sw_if_index); - if (p && is_add) - return VNET_API_ERROR_SYSCALL_ERROR_1; /* spd already assigned */ - - if (is_add) - { - hash_set (im->spd_index_by_sw_if_index, sw_if_index, spd_index); - } - else - { - hash_unset (im->spd_index_by_sw_if_index, sw_if_index); - } - - clib_warning ("sw_if_index %u spd_id %u spd_index %u", - sw_if_index, spd_id, spd_index); - - /* enable IPsec on TX */ - vnet_feature_enable_disable ("ip4-output", "ipsec4-output-feature", - sw_if_index, is_add, 0, 0); - vnet_feature_enable_disable ("ip6-output", "ipsec6-output-feature", - sw_if_index, is_add, 0, 0); - - config.spd_index = spd_index; - - /* enable IPsec on RX */ - vnet_feature_enable_disable ("ip4-unicast", "ipsec4-input-feature", - sw_if_index, is_add, &config, sizeof (config)); - vnet_feature_enable_disable ("ip6-unicast", "ipsec6-input-feature", - sw_if_index, is_add, &config, sizeof (config)); - - return 0; -} - -int -ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add) -{ - ipsec_main_t *im = &ipsec_main; - ipsec_spd_t *spd = 0; - uword *p; - u32 spd_index, k, v; - - p = hash_get (im->spd_index_by_spd_id, spd_id); - if (p && is_add) - return VNET_API_ERROR_INVALID_VALUE; - if (!p && !is_add) - return VNET_API_ERROR_INVALID_VALUE; - - if (!is_add) /* delete */ - { - spd_index = p[0]; - spd = pool_elt_at_index (im->spds, spd_index); - if (!spd) - return VNET_API_ERROR_INVALID_VALUE; - /* *INDENT-OFF* */ - hash_foreach (k, v, im->spd_index_by_sw_if_index, ({ - if (v == spd_index) - ipsec_set_interface_spd(vm, k, spd_id, 0); - })); - /* *INDENT-ON* */ - hash_unset (im->spd_index_by_spd_id, spd_id); - pool_free (spd->policies); - vec_free (spd->ipv4_outbound_policies); - vec_free (spd->ipv6_outbound_policies); - vec_free (spd->ipv4_inbound_protect_policy_indices); - vec_free (spd->ipv4_inbound_policy_discard_and_bypass_indices); - pool_put (im->spds, spd); - } - else /* create new SPD */ - { - pool_get (im->spds, spd); - clib_memset (spd, 0, sizeof (*spd)); - spd_index = spd - im->spds; - spd->id = spd_id; - hash_set (im->spd_index_by_spd_id, spd_id, spd_index); - } - return 0; -} - -static int -ipsec_spd_entry_sort (void *a1, void *a2) -{ - u32 *id1 = a1; - u32 *id2 = a2; - ipsec_spd_t *spd = ipsec_main.spd_to_sort; - ipsec_policy_t *p1, *p2; - - p1 = pool_elt_at_index (spd->policies, *id1); - p2 = pool_elt_at_index (spd->policies, *id2); - if (p1 && p2) - return p2->priority - p1->priority; - - return 0; -} - -int -ipsec_add_del_policy (vlib_main_t * vm, ipsec_policy_t * policy, int is_add) -{ - ipsec_main_t *im = &ipsec_main; - ipsec_spd_t *spd = 0; - ipsec_policy_t *vp; - uword *p; - u32 spd_index; - - clib_warning ("policy-id %u priority %d is_outbound %u", policy->id, - policy->priority, policy->is_outbound); - - if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) - { - p = hash_get (im->sa_index_by_sa_id, policy->sa_id); - if (!p) - return VNET_API_ERROR_SYSCALL_ERROR_1; - policy->sa_index = p[0]; - } - - p = hash_get (im->spd_index_by_spd_id, policy->id); - - if (!p) - return VNET_API_ERROR_SYSCALL_ERROR_1; - - spd_index = p[0]; - spd = pool_elt_at_index (im->spds, spd_index); - if (!spd) - return VNET_API_ERROR_SYSCALL_ERROR_1; - - if (is_add) - { - u32 policy_index; - - pool_get (spd->policies, vp); - clib_memcpy (vp, policy, sizeof (*vp)); - policy_index = vp - spd->policies; - - ipsec_main.spd_to_sort = spd; - - if (policy->is_outbound) - { - if (policy->is_ipv6) - { - vec_add1 (spd->ipv6_outbound_policies, policy_index); - vec_sort_with_function (spd->ipv6_outbound_policies, - ipsec_spd_entry_sort); - } - else - { - vec_add1 (spd->ipv4_outbound_policies, policy_index); - vec_sort_with_function (spd->ipv4_outbound_policies, - ipsec_spd_entry_sort); - } - } - else - { - if (policy->is_ipv6) - { - if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) - { - vec_add1 (spd->ipv6_inbound_protect_policy_indices, - policy_index); - vec_sort_with_function - (spd->ipv6_inbound_protect_policy_indices, - ipsec_spd_entry_sort); - } - else - { - vec_add1 - (spd->ipv6_inbound_policy_discard_and_bypass_indices, - policy_index); - vec_sort_with_function - (spd->ipv6_inbound_policy_discard_and_bypass_indices, - ipsec_spd_entry_sort); - } - } - else - { - if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) - { - vec_add1 (spd->ipv4_inbound_protect_policy_indices, - policy_index); - vec_sort_with_function - (spd->ipv4_inbound_protect_policy_indices, - ipsec_spd_entry_sort); - } - else - { - vec_add1 - (spd->ipv4_inbound_policy_discard_and_bypass_indices, - policy_index); - vec_sort_with_function - (spd->ipv4_inbound_policy_discard_and_bypass_indices, - ipsec_spd_entry_sort); - } - } - } - - ipsec_main.spd_to_sort = NULL; - } - else - { - u32 i, j; - /* *INDENT-OFF* */ - pool_foreach_index(i, spd->policies, ({ - vp = pool_elt_at_index(spd->policies, i); - if (vp->priority != policy->priority) - continue; - if (vp->is_outbound != policy->is_outbound) - continue; - if (vp->policy != policy->policy) - continue; - if (vp->sa_id != policy->sa_id) - continue; - if (vp->protocol != policy->protocol) - continue; - if (vp->lport.start != policy->lport.start) - continue; - if (vp->lport.stop != policy->lport.stop) - continue; - if (vp->rport.start != policy->rport.start) - continue; - if (vp->rport.stop != policy->rport.stop) - continue; - if (vp->is_ipv6 != policy->is_ipv6) - continue; - if (policy->is_ipv6) - { - if (vp->laddr.start.ip6.as_u64[0] != policy->laddr.start.ip6.as_u64[0]) - continue; - if (vp->laddr.start.ip6.as_u64[1] != policy->laddr.start.ip6.as_u64[1]) - continue; - if (vp->laddr.stop.ip6.as_u64[0] != policy->laddr.stop.ip6.as_u64[0]) - continue; - if (vp->laddr.stop.ip6.as_u64[1] != policy->laddr.stop.ip6.as_u64[1]) - continue; - if (vp->raddr.start.ip6.as_u64[0] != policy->raddr.start.ip6.as_u64[0]) - continue; - if (vp->raddr.start.ip6.as_u64[1] != policy->raddr.start.ip6.as_u64[1]) - continue; - if (vp->raddr.stop.ip6.as_u64[0] != policy->raddr.stop.ip6.as_u64[0]) - continue; - if (vp->laddr.stop.ip6.as_u64[1] != policy->laddr.stop.ip6.as_u64[1]) - continue; - if (policy->is_outbound) - { - vec_foreach_index(j, spd->ipv6_outbound_policies) { - if (vec_elt(spd->ipv6_outbound_policies, j) == i) { - vec_del1 (spd->ipv6_outbound_policies, j); - break; - } - } - } - else - { - if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) - { - vec_foreach_index(j, spd->ipv6_inbound_protect_policy_indices) { - if (vec_elt(spd->ipv6_inbound_protect_policy_indices, j) == i) { - vec_del1 (spd->ipv6_inbound_protect_policy_indices, j); - break; - } - } - } - else - { - vec_foreach_index(j, spd->ipv6_inbound_policy_discard_and_bypass_indices) { - if (vec_elt(spd->ipv6_inbound_policy_discard_and_bypass_indices, j) == i) { - vec_del1 (spd->ipv6_inbound_policy_discard_and_bypass_indices, j); - break; - } - } - } - } - } - else - { - if (vp->laddr.start.ip4.as_u32 != policy->laddr.start.ip4.as_u32) - continue; - if (vp->laddr.stop.ip4.as_u32 != policy->laddr.stop.ip4.as_u32) - continue; - if (vp->raddr.start.ip4.as_u32 != policy->raddr.start.ip4.as_u32) - continue; - if (vp->raddr.stop.ip4.as_u32 != policy->raddr.stop.ip4.as_u32) - continue; - if (policy->is_outbound) - { - vec_foreach_index(j, spd->ipv4_outbound_policies) { - if (vec_elt(spd->ipv4_outbound_policies, j) == i) { - vec_del1 (spd->ipv4_outbound_policies, j); - break; - } - } - } - else - { - if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) - { - vec_foreach_index(j, spd->ipv4_inbound_protect_policy_indices) { - if (vec_elt(spd->ipv4_inbound_protect_policy_indices, j) == i) { - vec_del1 (spd->ipv4_inbound_protect_policy_indices, j); - break; - } - } - } - else - { - vec_foreach_index(j, spd->ipv4_inbound_policy_discard_and_bypass_indices) { - if (vec_elt(spd->ipv4_inbound_policy_discard_and_bypass_indices, j) == i) { - vec_del1 (spd->ipv4_inbound_policy_discard_and_bypass_indices, j); - break; - } - } - } - } - } - pool_put (spd->policies, vp); - break; - })); - /* *INDENT-ON* */ - } - - return 0; -} - -u8 -ipsec_is_sa_used (u32 sa_index) -{ - ipsec_main_t *im = &ipsec_main; - ipsec_spd_t *spd; - ipsec_policy_t *p; - ipsec_tunnel_if_t *t; - - /* *INDENT-OFF* */ - pool_foreach(spd, im->spds, ({ - pool_foreach(p, spd->policies, ({ - if (p->policy == IPSEC_POLICY_ACTION_PROTECT) - { - if (p->sa_index == sa_index) - return 1; - } - })); - })); - - pool_foreach(t, im->tunnel_interfaces, ({ - if (t->input_sa_index == sa_index) - return 1; - if (t->output_sa_index == sa_index) - return 1; - })); - /* *INDENT-ON* */ - - return 0; -} - -clib_error_t * -ipsec_call_add_del_callbacks (ipsec_main_t * im, ipsec_sa_t * sa, - u32 sa_index, int is_add) -{ - ipsec_ah_backend_t *ab; - ipsec_esp_backend_t *eb; - switch (sa->protocol) - { - case IPSEC_PROTOCOL_AH: - ab = pool_elt_at_index (im->ah_backends, im->ah_current_backend); - if (ab->add_del_sa_sess_cb) - return ab->add_del_sa_sess_cb (sa_index, is_add); - break; - case IPSEC_PROTOCOL_ESP: - eb = pool_elt_at_index (im->esp_backends, im->esp_current_backend); - if (eb->add_del_sa_sess_cb) - return eb->add_del_sa_sess_cb (sa_index, is_add); - break; - } - return 0; -} - -int -ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add) -{ - ipsec_main_t *im = &ipsec_main; - ipsec_sa_t *sa = 0; - uword *p; - u32 sa_index; - clib_error_t *err; - - clib_warning ("id %u spi %u", new_sa->id, new_sa->spi); - - p = hash_get (im->sa_index_by_sa_id, new_sa->id); - if (p && is_add) - return VNET_API_ERROR_SYSCALL_ERROR_1; /* already exists */ - if (!p && !is_add) - return VNET_API_ERROR_SYSCALL_ERROR_1; - - if (!is_add) /* delete */ - { - sa_index = p[0]; - sa = pool_elt_at_index (im->sad, sa_index); - if (ipsec_is_sa_used (sa_index)) - { - clib_warning ("sa_id %u used in policy", sa->id); - return VNET_API_ERROR_SYSCALL_ERROR_1; /* sa used in policy */ - } - hash_unset (im->sa_index_by_sa_id, sa->id); - err = ipsec_call_add_del_callbacks (im, sa, sa_index, 0); - if (err) - return VNET_API_ERROR_SYSCALL_ERROR_1; - pool_put (im->sad, sa); - } - else /* create new SA */ - { - pool_get (im->sad, sa); - clib_memcpy (sa, new_sa, sizeof (*sa)); - sa_index = sa - im->sad; - hash_set (im->sa_index_by_sa_id, sa->id, sa_index); - err = ipsec_call_add_del_callbacks (im, sa, sa_index, 1); - if (err) - return VNET_API_ERROR_SYSCALL_ERROR_1; - } - return 0; -} - -int -ipsec_set_sa_key (vlib_main_t * vm, ipsec_sa_t * sa_update) -{ - ipsec_main_t *im = &ipsec_main; - uword *p; - u32 sa_index; - ipsec_sa_t *sa = 0; - clib_error_t *err; - - p = hash_get (im->sa_index_by_sa_id, sa_update->id); - if (!p) - return VNET_API_ERROR_SYSCALL_ERROR_1; /* no such sa-id */ - - sa_index = p[0]; - sa = pool_elt_at_index (im->sad, sa_index); - - /* new crypto key */ - if (0 < sa_update->crypto_key_len) - { - clib_memcpy (sa->crypto_key, sa_update->crypto_key, - sa_update->crypto_key_len); - sa->crypto_key_len = sa_update->crypto_key_len; - } - - /* new integ key */ - if (0 < sa_update->integ_key_len) - { - clib_memcpy (sa->integ_key, sa_update->integ_key, - sa_update->integ_key_len); - sa->integ_key_len = sa_update->integ_key_len; - } - - if (0 < sa_update->crypto_key_len || 0 < sa_update->integ_key_len) - { - err = ipsec_call_add_del_callbacks (im, sa, sa_index, 0); - if (err) - return VNET_API_ERROR_SYSCALL_ERROR_1; - } - - return 0; -} - static void ipsec_rand_seed (void) { @@ -668,6 +181,14 @@ ipsec_register_esp_backend (vlib_main_t * vm, ipsec_main_t * im, return b - im->esp_backends; } +static walk_rc_t +ipsec_sa_restack (ipsec_sa_t * sa, void *ctx) +{ + ipsec_sa_stack (sa); + + return (WALK_CONTINUE); +} + int ipsec_select_ah_backend (ipsec_main_t * im, u32 backend_idx) { @@ -686,6 +207,8 @@ ipsec_select_ah_backend (ipsec_main_t * im, u32 backend_idx) im->ah6_decrypt_node_index = b->ah6_decrypt_node_index; im->ah6_encrypt_next_index = b->ah6_encrypt_next_index; im->ah6_decrypt_next_index = b->ah6_decrypt_next_index; + + ipsec_sa_walk (ipsec_sa_restack, NULL); return 0; } @@ -707,6 +230,8 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx) im->esp6_decrypt_node_index = b->esp6_decrypt_node_index; im->esp6_encrypt_next_index = b->esp6_encrypt_next_index; im->esp6_decrypt_next_index = b->esp6_decrypt_next_index; + + ipsec_sa_walk (ipsec_sa_restack, NULL); return 0; } diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h index 691bc071bba4..3d81884064ca 100644 --- a/src/vnet/ipsec/ipsec.h +++ b/src/vnet/ipsec/ipsec.h @@ -25,242 +25,11 @@ #include #include -#define IPSEC_FLAG_IPSEC_GRE_TUNNEL (1 << 0) - -#define foreach_ipsec_output_next \ - _ (DROP, "error-drop") \ - _ (ESP4_ENCRYPT, "esp4-encrypt") \ - _ (AH4_ENCRYPT, "ah4-encrypt") \ - _ (ESP6_ENCRYPT, "esp6-encrypt") \ - _ (AH6_ENCRYPT, "ah6-encrypt") - -#define _(v, s) IPSEC_OUTPUT_NEXT_##v, -typedef enum -{ - foreach_ipsec_output_next -#undef _ - IPSEC_OUTPUT_N_NEXT, -} ipsec_output_next_t; - -#define foreach_ipsec_input_next \ - _ (DROP, "error-drop") \ - _ (ESP4_DECRYPT, "esp4-decrypt") \ - _ (AH4_DECRYPT, "ah4-decrypt") \ - _ (ESP6_DECRYPT, "esp6-decrypt") \ - _ (AH6_DECRYPT, "ah6-decrypt") - -#define _(v, s) IPSEC_INPUT_NEXT_##v, -typedef enum -{ - foreach_ipsec_input_next -#undef _ - IPSEC_INPUT_N_NEXT, -} ipsec_input_next_t; - -#define foreach_ipsec_policy_action \ - _ (0, BYPASS, "bypass") \ - _ (1, DISCARD, "discard") \ - _ (2, RESOLVE, "resolve") \ - _ (3, PROTECT, "protect") - -typedef enum -{ -#define _(v, f, s) IPSEC_POLICY_ACTION_##f = v, - foreach_ipsec_policy_action -#undef _ - IPSEC_POLICY_N_ACTION, -} ipsec_policy_action_t; - -#define foreach_ipsec_crypto_alg \ - _ (0, NONE, "none") \ - _ (1, AES_CBC_128, "aes-cbc-128") \ - _ (2, AES_CBC_192, "aes-cbc-192") \ - _ (3, AES_CBC_256, "aes-cbc-256") \ - _ (4, AES_CTR_128, "aes-ctr-128") \ - _ (5, AES_CTR_192, "aes-ctr-192") \ - _ (6, AES_CTR_256, "aes-ctr-256") \ - _ (7, AES_GCM_128, "aes-gcm-128") \ - _ (8, AES_GCM_192, "aes-gcm-192") \ - _ (9, AES_GCM_256, "aes-gcm-256") \ - _ (10, DES_CBC, "des-cbc") \ - _ (11, 3DES_CBC, "3des-cbc") - -typedef enum -{ -#define _(v, f, s) IPSEC_CRYPTO_ALG_##f = v, - foreach_ipsec_crypto_alg -#undef _ - IPSEC_CRYPTO_N_ALG, -} ipsec_crypto_alg_t; - -#define foreach_ipsec_integ_alg \ - _ (0, NONE, "none") \ - _ (1, MD5_96, "md5-96") /* RFC2403 */ \ - _ (2, SHA1_96, "sha1-96") /* RFC2404 */ \ - _ (3, SHA_256_96, "sha-256-96") /* draft-ietf-ipsec-ciph-sha-256-00 */ \ - _ (4, SHA_256_128, "sha-256-128") /* RFC4868 */ \ - _ (5, SHA_384_192, "sha-384-192") /* RFC4868 */ \ - _ (6, SHA_512_256, "sha-512-256") /* RFC4868 */ - -typedef enum -{ -#define _(v, f, s) IPSEC_INTEG_ALG_##f = v, - foreach_ipsec_integ_alg -#undef _ - IPSEC_INTEG_N_ALG, -} ipsec_integ_alg_t; - -typedef enum -{ - IPSEC_PROTOCOL_AH = 0, - IPSEC_PROTOCOL_ESP = 1 -} ipsec_protocol_t; - -typedef struct -{ - u32 id; - u32 spi; - ipsec_protocol_t protocol; - - ipsec_crypto_alg_t crypto_alg; - u8 crypto_key_len; - u8 crypto_key[128]; - - ipsec_integ_alg_t integ_alg; - u8 integ_key_len; - u8 integ_key[128]; - - u8 use_esn; - u8 use_anti_replay; - - u8 is_tunnel; - u8 is_tunnel_ip6; - u8 udp_encap; - ip46_address_t tunnel_src_addr; - ip46_address_t tunnel_dst_addr; - - u32 tx_fib_index; - u32 salt; - - /* runtime */ - u32 seq; - u32 seq_hi; - u32 last_seq; - u32 last_seq_hi; - u64 replay_window; - - /* lifetime data */ - u64 total_data_size; -} ipsec_sa_t; - -typedef struct -{ - ip46_address_t start, stop; -} ip46_address_range_t; - -typedef struct -{ - u16 start, stop; -} port_range_t; - -typedef struct -{ - u8 is_add; - u8 esn; - u8 anti_replay; - ip4_address_t local_ip, remote_ip; - u32 local_spi; - u32 remote_spi; - ipsec_crypto_alg_t crypto_alg; - u8 local_crypto_key_len; - u8 local_crypto_key[128]; - u8 remote_crypto_key_len; - u8 remote_crypto_key[128]; - ipsec_integ_alg_t integ_alg; - u8 local_integ_key_len; - u8 local_integ_key[128]; - u8 remote_integ_key_len; - u8 remote_integ_key[128]; - u8 renumber; - u32 show_instance; - u8 udp_encap; - u32 tx_table_id; -} ipsec_add_del_tunnel_args_t; - -typedef struct -{ - u8 is_add; - u32 local_sa_id; - u32 remote_sa_id; - ip4_address_t local_ip; - ip4_address_t remote_ip; -} ipsec_add_del_ipsec_gre_tunnel_args_t; - -typedef enum -{ - IPSEC_IF_SET_KEY_TYPE_NONE, - IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO, - IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO, - IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG, - IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG, -} ipsec_if_set_key_type_t; - -typedef struct -{ - u32 id; - i32 priority; - u8 is_outbound; - - // Selector - u8 is_ipv6; - ip46_address_range_t laddr; - ip46_address_range_t raddr; - u8 protocol; - port_range_t lport; - port_range_t rport; - - // Policy - u8 policy; - u32 sa_id; - u32 sa_index; - - // Counter - vlib_counter_t counter; -} ipsec_policy_t; - -typedef struct -{ - u32 id; - /* pool of policies */ - ipsec_policy_t *policies; - /* vectors of policy indices */ - u32 *ipv4_outbound_policies; - u32 *ipv6_outbound_policies; - u32 *ipv4_inbound_protect_policy_indices; - u32 *ipv4_inbound_policy_discard_and_bypass_indices; - u32 *ipv6_inbound_protect_policy_indices; - u32 *ipv6_inbound_policy_discard_and_bypass_indices; -} ipsec_spd_t; - -typedef struct -{ - u32 spd_index; -} ip4_ipsec_config_t; - -typedef struct -{ - u32 spd_index; -} ip6_ipsec_config_t; - -typedef struct -{ - /* Required for pool_get_aligned */ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u32 input_sa_index; - u32 output_sa_index; - u32 hw_if_index; - u32 show_instance; -} ipsec_tunnel_if_t; +#include +#include +#include +#include +#include typedef clib_error_t *(*add_del_sa_sess_cb_t) (u32 sa_index, u8 is_add); typedef clib_error_t *(*check_support_cb_t) (ipsec_sa_t * sa); @@ -350,11 +119,13 @@ typedef struct { /* pool of tunnel instances */ ipsec_spd_t *spds; + /* Pool of security associations */ ipsec_sa_t *sad; + /* pool of policies */ + ipsec_policy_t *policies; /* pool of tunnel interfaces */ ipsec_tunnel_if_t *tunnel_interfaces; - u32 *free_tunnel_if_indices; u32 **empty_buffers; @@ -364,9 +135,6 @@ typedef struct vlib_main_t *vlib_main; vnet_main_t *vnet_main; - /* next node indices */ - u32 feature_next_node_index[32]; - /* hashes */ uword *spd_index_by_spd_id; uword *spd_index_by_sw_if_index; @@ -406,9 +174,6 @@ typedef struct u32 ah_default_backend; /* index of default esp backend */ u32 esp_default_backend; - - /* helper for sort function */ - ipsec_spd_t *spd_to_sort; } ipsec_main_t; extern ipsec_main_t ipsec_main; @@ -431,35 +196,7 @@ extern vlib_node_registration_t ipsec_if_input_node; /* * functions */ -int ipsec_set_interface_spd (vlib_main_t * vm, u32 sw_if_index, u32 spd_id, - int is_add); -int ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add); -int ipsec_add_del_policy (vlib_main_t * vm, ipsec_policy_t * policy, - int is_add); -int ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add); -int ipsec_set_sa_key (vlib_main_t * vm, ipsec_sa_t * sa_update); - -u32 ipsec_get_sa_index_by_sa_id (u32 sa_id); -u8 ipsec_is_sa_used (u32 sa_index); -u8 *format_ipsec_policy_action (u8 * s, va_list * args); -u8 *format_ipsec_crypto_alg (u8 * s, va_list * args); -u8 *format_ipsec_integ_alg (u8 * s, va_list * args); u8 *format_ipsec_replay_window (u8 * s, va_list * args); -uword unformat_ipsec_policy_action (unformat_input_t * input, va_list * args); -uword unformat_ipsec_crypto_alg (unformat_input_t * input, va_list * args); -uword unformat_ipsec_integ_alg (unformat_input_t * input, va_list * args); - -int ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, - ipsec_add_del_tunnel_args_t * args, - u32 * sw_if_index); -int ipsec_add_del_tunnel_if (ipsec_add_del_tunnel_args_t * args); -int ipsec_add_del_ipsec_gre_tunnel (vnet_main_t * vnm, - ipsec_add_del_ipsec_gre_tunnel_args_t * - args); -int ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index, - ipsec_if_set_key_type_t type, u8 alg, u8 * key); -int ipsec_set_interface_sa (vnet_main_t * vnm, u32 hw_if_index, u32 sa_id, - u8 is_outbound); /* * inline functions diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index fc09f77db6c8..2d464b312903 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -51,8 +52,8 @@ #define foreach_vpe_api_msg \ _(IPSEC_SPD_ADD_DEL, ipsec_spd_add_del) \ _(IPSEC_INTERFACE_ADD_DEL_SPD, ipsec_interface_add_del_spd) \ -_(IPSEC_SPD_ADD_DEL_ENTRY, ipsec_spd_add_del_entry) \ -_(IPSEC_SAD_ADD_DEL_ENTRY, ipsec_sad_add_del_entry) \ +_(IPSEC_SPD_ENTRY_ADD_DEL, ipsec_spd_entry_add_del) \ +_(IPSEC_SAD_ENTRY_ADD_DEL, ipsec_sad_entry_add_del) \ _(IPSEC_SA_SET_KEY, ipsec_sa_set_key) \ _(IPSEC_SA_DUMP, ipsec_sa_dump) \ _(IPSEC_SPDS_DUMP, ipsec_spds_dump) \ @@ -119,11 +120,30 @@ static void vl_api_ipsec_interface_add_del_spd_t_handler REPLY_MACRO (VL_API_IPSEC_INTERFACE_ADD_DEL_SPD_REPLY); } -static void vl_api_ipsec_spd_add_del_entry_t_handler - (vl_api_ipsec_spd_add_del_entry_t * mp) +static int +ipsec_spd_action_decode (vl_api_ipsec_spd_action_t in, + ipsec_policy_action_t * out) +{ + in = clib_net_to_host_u32 (in); + + switch (in) + { +#define _(v,f,s) case IPSEC_API_SPD_ACTION_##f: \ + *out = IPSEC_POLICY_ACTION_##f; \ + return (0); + foreach_ipsec_policy_action +#undef _ + } + return (VNET_API_ERROR_UNIMPLEMENTED); +} + +static void vl_api_ipsec_spd_entry_add_del_t_handler + (vl_api_ipsec_spd_entry_add_del_t * mp) { vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main (); - vl_api_ipsec_spd_add_del_entry_reply_t *rmp; + vl_api_ipsec_spd_entry_add_del_reply_t *rmp; + ip46_type_t itype; + u32 stat_index; int rv; #if WITH_LIBSSL > 0 @@ -131,128 +151,255 @@ static void vl_api_ipsec_spd_add_del_entry_t_handler clib_memset (&p, 0, sizeof (p)); - p.id = ntohl (mp->spd_id); - p.priority = ntohl (mp->priority); - p.is_outbound = mp->is_outbound; - p.is_ipv6 = mp->is_ipv6; + p.id = ntohl (mp->entry.spd_id); + p.priority = ntohl (mp->entry.priority); + p.is_outbound = mp->entry.is_outbound; + + itype = ip_address_decode (&mp->entry.remote_address_start, &p.raddr.start); + ip_address_decode (&mp->entry.remote_address_stop, &p.raddr.stop); + ip_address_decode (&mp->entry.local_address_start, &p.laddr.start); + ip_address_decode (&mp->entry.local_address_stop, &p.laddr.stop); + + p.is_ipv6 = (itype == IP46_TYPE_IP6); + + p.protocol = mp->entry.protocol; + p.rport.start = ntohs (mp->entry.remote_port_start); + p.rport.stop = ntohs (mp->entry.remote_port_stop); + p.lport.start = ntohs (mp->entry.local_port_start); + p.lport.stop = ntohs (mp->entry.local_port_stop); + + rv = ipsec_spd_action_decode (mp->entry.policy, &p.policy); + + if (rv) + goto out; - if (mp->is_ipv6 || mp->is_ip_any) - { - clib_memcpy (&p.raddr.start, mp->remote_address_start, 16); - clib_memcpy (&p.raddr.stop, mp->remote_address_stop, 16); - clib_memcpy (&p.laddr.start, mp->local_address_start, 16); - clib_memcpy (&p.laddr.stop, mp->local_address_stop, 16); - } - else - { - clib_memcpy (&p.raddr.start.ip4.data, mp->remote_address_start, 4); - clib_memcpy (&p.raddr.stop.ip4.data, mp->remote_address_stop, 4); - clib_memcpy (&p.laddr.start.ip4.data, mp->local_address_start, 4); - clib_memcpy (&p.laddr.stop.ip4.data, mp->local_address_stop, 4); - } - p.protocol = mp->protocol; - p.rport.start = ntohs (mp->remote_port_start); - p.rport.stop = ntohs (mp->remote_port_stop); - p.lport.start = ntohs (mp->local_port_start); - p.lport.stop = ntohs (mp->local_port_stop); /* policy action resolve unsupported */ - if (mp->policy == IPSEC_POLICY_ACTION_RESOLVE) + if (p.policy == IPSEC_POLICY_ACTION_RESOLVE) { clib_warning ("unsupported action: 'resolve'"); rv = VNET_API_ERROR_UNIMPLEMENTED; goto out; } - p.policy = mp->policy; - p.sa_id = ntohl (mp->sa_id); + p.sa_id = ntohl (mp->entry.sa_id); - rv = ipsec_add_del_policy (vm, &p, mp->is_add); + rv = ipsec_add_del_policy (vm, &p, mp->is_add, &stat_index); if (rv) goto out; - if (mp->is_ip_any) - { - p.is_ipv6 = 1; - rv = ipsec_add_del_policy (vm, &p, mp->is_add); - } #else rv = VNET_API_ERROR_UNIMPLEMENTED; goto out; #endif out: - REPLY_MACRO (VL_API_IPSEC_SPD_ADD_DEL_ENTRY_REPLY); + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_REPLY, + ({ + rmp->stat_index = ntohl(stat_index); + })); + /* *INDENT-ON* */ } -static void vl_api_ipsec_sad_add_del_entry_t_handler - (vl_api_ipsec_sad_add_del_entry_t * mp) +static int +ipsec_proto_decode (vl_api_ipsec_proto_t in, ipsec_protocol_t * out) { - vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main (); - vl_api_ipsec_sad_add_del_entry_reply_t *rmp; - int rv; -#if WITH_LIBSSL > 0 - ipsec_main_t *im = &ipsec_main; - ipsec_sa_t sa; + in = clib_net_to_host_u32 (in); - clib_memset (&sa, 0, sizeof (sa)); + switch (in) + { + case IPSEC_API_PROTO_ESP: + *out = IPSEC_PROTOCOL_ESP; + return (0); + case IPSEC_API_PROTO_AH: + *out = IPSEC_PROTOCOL_AH; + return (0); + } + return (VNET_API_ERROR_UNIMPLEMENTED); +} - sa.id = ntohl (mp->sad_id); - sa.spi = ntohl (mp->spi); - sa.protocol = mp->protocol; - /* check for unsupported crypto-alg */ - if (mp->crypto_algorithm >= IPSEC_CRYPTO_N_ALG) +static vl_api_ipsec_proto_t +ipsec_proto_encode (ipsec_protocol_t p) +{ + switch (p) { - clib_warning ("unsupported crypto-alg: '%U'", format_ipsec_crypto_alg, - mp->crypto_algorithm); - rv = VNET_API_ERROR_UNIMPLEMENTED; - goto out; + case IPSEC_PROTOCOL_ESP: + return clib_host_to_net_u32 (IPSEC_API_PROTO_ESP); + case IPSEC_PROTOCOL_AH: + return clib_host_to_net_u32 (IPSEC_API_PROTO_AH); } - sa.crypto_alg = mp->crypto_algorithm; - sa.crypto_key_len = mp->crypto_key_length; - clib_memcpy (&sa.crypto_key, mp->crypto_key, sizeof (sa.crypto_key)); - /* check for unsupported integ-alg */ - if (mp->integrity_algorithm >= IPSEC_INTEG_N_ALG) + return (VNET_API_ERROR_UNIMPLEMENTED); +} + +static int +ipsec_crypto_algo_decode (vl_api_ipsec_crypto_alg_t in, + ipsec_crypto_alg_t * out) +{ + in = clib_net_to_host_u32 (in); + + switch (in) { - clib_warning ("unsupported integ-alg: '%U'", format_ipsec_integ_alg, - mp->integrity_algorithm); - rv = VNET_API_ERROR_UNIMPLEMENTED; - goto out; +#define _(v,f,s) case IPSEC_API_CRYPTO_ALG_##f: \ + *out = IPSEC_CRYPTO_ALG_##f; \ + return (0); + foreach_ipsec_crypto_alg +#undef _ } + return (VNET_API_ERROR_UNIMPLEMENTED); +} - sa.integ_alg = mp->integrity_algorithm; - sa.integ_key_len = mp->integrity_key_length; - clib_memcpy (&sa.integ_key, mp->integrity_key, sizeof (sa.integ_key)); - sa.use_esn = mp->use_extended_sequence_number; - sa.is_tunnel = mp->is_tunnel; - sa.is_tunnel_ip6 = mp->is_tunnel_ipv6; - sa.udp_encap = mp->udp_encap; - if (sa.is_tunnel_ip6) +static vl_api_ipsec_crypto_alg_t +ipsec_crypto_algo_encode (ipsec_crypto_alg_t c) +{ + switch (c) { - clib_memcpy (&sa.tunnel_src_addr, mp->tunnel_src_address, 16); - clib_memcpy (&sa.tunnel_dst_addr, mp->tunnel_dst_address, 16); +#define _(v,f,s) case IPSEC_CRYPTO_ALG_##f: \ + return clib_host_to_net_u32(IPSEC_API_CRYPTO_ALG_##f); + foreach_ipsec_crypto_alg +#undef _ + case IPSEC_CRYPTO_N_ALG: + break; } - else + ASSERT (0); + return (VNET_API_ERROR_UNIMPLEMENTED); +} + + +static int +ipsec_integ_algo_decode (vl_api_ipsec_integ_alg_t in, ipsec_integ_alg_t * out) +{ + in = clib_net_to_host_u32 (in); + + switch (in) { - clib_memcpy (&sa.tunnel_src_addr.ip4.data, mp->tunnel_src_address, 4); - clib_memcpy (&sa.tunnel_dst_addr.ip4.data, mp->tunnel_dst_address, 4); +#define _(v,f,s) case IPSEC_API_INTEG_ALG_##f: \ + *out = IPSEC_INTEG_ALG_##f; \ + return (0); + foreach_ipsec_integ_alg +#undef _ } - sa.use_anti_replay = mp->use_anti_replay; + return (VNET_API_ERROR_UNIMPLEMENTED); +} - clib_error_t *err = ipsec_check_support_cb (im, &sa); - if (err) +static vl_api_ipsec_integ_alg_t +ipsec_integ_algo_encode (ipsec_integ_alg_t i) +{ + switch (i) { - clib_warning ("%s", err->what); - rv = VNET_API_ERROR_UNIMPLEMENTED; - goto out; +#define _(v,f,s) case IPSEC_INTEG_ALG_##f: \ + return (clib_host_to_net_u32(IPSEC_API_INTEG_ALG_##f)); + foreach_ipsec_integ_alg +#undef _ + case IPSEC_INTEG_N_ALG: + break; } + ASSERT (0); + return (VNET_API_ERROR_UNIMPLEMENTED); +} + +static void +ipsec_key_decode (const vl_api_key_t * key, ipsec_key_t * out) +{ + ipsec_mk_key (out, key->data, key->length); +} + +static void +ipsec_key_encode (const ipsec_key_t * in, vl_api_key_t * out) +{ + out->length = in->len; + clib_memcpy (out->data, in->data, out->length); +} + +static ipsec_sa_flags_t +ipsec_sa_flags_decode (vl_api_ipsec_sad_flags_t in) +{ + ipsec_sa_flags_t flags = IPSEC_SA_FLAG_NONE; + in = clib_net_to_host_u32 (in); + +#define _(v,f,s) if (in & IPSEC_API_SAD_FLAG_##f) \ + flags |= IPSEC_SA_FLAG_##f; + foreach_ipsec_sa_flags +#undef _ + return (flags); +} + +static vl_api_ipsec_sad_flags_t +ipsec_sad_flags_encode (const ipsec_sa_t * sa) +{ + vl_api_ipsec_sad_flags_t flags = IPSEC_API_SAD_FLAG_NONE; + + if (sa->use_esn) + flags |= IPSEC_API_SAD_FLAG_USE_EXTENDED_SEQ_NUM; + if (sa->use_anti_replay) + flags |= IPSEC_API_SAD_FLAG_USE_ANTI_REPLAY; + if (sa->is_tunnel) + flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL; + if (sa->is_tunnel_ip6) + flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL_V6; + if (sa->udp_encap) + flags |= IPSEC_API_SAD_FLAG_UDP_ENCAP; + + return clib_host_to_net_u32 (flags); +} + +static void vl_api_ipsec_sad_entry_add_del_t_handler + (vl_api_ipsec_sad_entry_add_del_t * mp) +{ + vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main (); + vl_api_ipsec_sad_entry_add_del_reply_t *rmp; + ip46_address_t tun_src = { }, tun_dst = + { + }; + ipsec_key_t crypto_key, integ_key; + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; + ipsec_protocol_t proto; + ipsec_sa_flags_t flags; + u32 id, spi; + int rv; + +#if WITH_LIBSSL > 0 + + id = ntohl (mp->entry.sad_id); + spi = ntohl (mp->entry.spi); + + rv = ipsec_proto_decode (mp->entry.protocol, &proto); + + if (rv) + goto out; + + rv = ipsec_crypto_algo_decode (mp->entry.crypto_algorithm, &crypto_alg); + + if (rv) + goto out; + + rv = ipsec_integ_algo_decode (mp->entry.integrity_algorithm, &integ_alg); + + if (rv) + goto out; + + ipsec_key_decode (&mp->entry.crypto_key, &crypto_key); + ipsec_key_decode (&mp->entry.integrity_key, &integ_key); + + flags = ipsec_sa_flags_decode (mp->entry.flags); + + ip_address_decode (&mp->entry.tunnel_src, &tun_src); + ip_address_decode (&mp->entry.tunnel_dst, &tun_dst); + + + if (mp->is_add) + rv = ipsec_sa_add (id, spi, proto, + crypto_alg, &crypto_key, + integ_alg, &integ_key, flags, + 0, &tun_src, &tun_dst, NULL); + else + rv = ipsec_sa_del (id); - rv = ipsec_add_del_sa (vm, &sa, mp->is_add); #else rv = VNET_API_ERROR_UNIMPLEMENTED; - goto out; #endif out: - REPLY_MACRO (VL_API_IPSEC_SAD_ADD_DEL_ENTRY_REPLY); + REPLY_MACRO (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_REPLY); } static void @@ -260,6 +407,7 @@ send_ipsec_spds_details (ipsec_spd_t * spd, vl_api_registration_t * reg, u32 context) { vl_api_ipsec_spds_details_t *mp; + u32 n_policies = 0; mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); @@ -267,7 +415,10 @@ send_ipsec_spds_details (ipsec_spd_t * spd, vl_api_registration_t * reg, mp->context = context; mp->spd_id = htonl (spd->id); - mp->npolicies = htonl (pool_len (spd->policies)); +#define _(s, n) n_policies += vec_len (spd->policies[IPSEC_SPD_POLICY_##s]); + foreach_ipsec_spd_policy_type +#undef _ + mp->npolicies = htonl (n_policies); vl_api_send_msg (reg, (u8 *) mp); } @@ -293,6 +444,22 @@ vl_api_ipsec_spds_dump_t_handler (vl_api_ipsec_spds_dump_t * mp) #endif } +vl_api_ipsec_spd_action_t +ipsec_spd_action_encode (ipsec_policy_action_t in) +{ + vl_api_ipsec_spd_action_t out = IPSEC_API_SPD_ACTION_BYPASS; + + switch (in) + { +#define _(v,f,s) case IPSEC_POLICY_ACTION_##f: \ + out = IPSEC_API_SPD_ACTION_##f; \ + break; + foreach_ipsec_policy_action +#undef _ + } + return (clib_host_to_net_u32 (out)); +} + static void send_ipsec_spd_details (ipsec_policy_t * p, vl_api_registration_t * reg, u32 context) @@ -304,33 +471,25 @@ send_ipsec_spd_details (ipsec_policy_t * p, vl_api_registration_t * reg, mp->_vl_msg_id = ntohs (VL_API_IPSEC_SPD_DETAILS); mp->context = context; - mp->spd_id = htonl (p->id); - mp->priority = htonl (p->priority); - mp->is_outbound = p->is_outbound; - mp->is_ipv6 = p->is_ipv6; - if (p->is_ipv6) - { - memcpy (mp->local_start_addr, &p->laddr.start.ip6, 16); - memcpy (mp->local_stop_addr, &p->laddr.stop.ip6, 16); - memcpy (mp->remote_start_addr, &p->raddr.start.ip6, 16); - memcpy (mp->remote_stop_addr, &p->raddr.stop.ip6, 16); - } - else - { - memcpy (mp->local_start_addr, &p->laddr.start.ip4, 4); - memcpy (mp->local_stop_addr, &p->laddr.stop.ip4, 4); - memcpy (mp->remote_start_addr, &p->raddr.start.ip4, 4); - memcpy (mp->remote_stop_addr, &p->raddr.stop.ip4, 4); - } - mp->local_start_port = htons (p->lport.start); - mp->local_stop_port = htons (p->lport.stop); - mp->remote_start_port = htons (p->rport.start); - mp->remote_stop_port = htons (p->rport.stop); - mp->protocol = p->protocol; - mp->policy = p->policy; - mp->sa_id = htonl (p->sa_id); - mp->bytes = clib_host_to_net_u64 (p->counter.bytes); - mp->packets = clib_host_to_net_u64 (p->counter.packets); + mp->entry.spd_id = htonl (p->id); + mp->entry.priority = htonl (p->priority); + mp->entry.is_outbound = p->is_outbound; + + ip_address_encode (&p->laddr.start, IP46_TYPE_ANY, + &mp->entry.local_address_start); + ip_address_encode (&p->laddr.stop, IP46_TYPE_ANY, + &mp->entry.local_address_stop); + ip_address_encode (&p->raddr.start, IP46_TYPE_ANY, + &mp->entry.remote_address_start); + ip_address_encode (&p->raddr.stop, IP46_TYPE_ANY, + &mp->entry.remote_address_stop); + mp->entry.local_port_start = htons (p->lport.start); + mp->entry.local_port_stop = htons (p->lport.stop); + mp->entry.remote_port_start = htons (p->rport.start); + mp->entry.remote_port_stop = htons (p->rport.stop); + mp->entry.protocol = p->protocol; + mp->entry.policy = ipsec_spd_action_encode (p->policy); + mp->entry.sa_id = htonl (p->sa_id); vl_api_send_msg (reg, (u8 *) mp); } @@ -340,10 +499,11 @@ vl_api_ipsec_spd_dump_t_handler (vl_api_ipsec_spd_dump_t * mp) { vl_api_registration_t *reg; ipsec_main_t *im = &ipsec_main; + ipsec_spd_policy_t ptype; ipsec_policy_t *policy; ipsec_spd_t *spd; uword *p; - u32 spd_index; + u32 spd_index, *ii; #if WITH_LIBSSL > 0 reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) @@ -357,12 +517,15 @@ vl_api_ipsec_spd_dump_t_handler (vl_api_ipsec_spd_dump_t * mp) spd = pool_elt_at_index (im->spds, spd_index); /* *INDENT-OFF* */ - pool_foreach (policy, spd->policies, - ({ - if (mp->sa_id == ~(0) || ntohl (mp->sa_id) == policy->sa_id) - send_ipsec_spd_details (policy, reg, - mp->context);} - )); + FOR_EACH_IPSEC_SPD_POLICY_TYPE(ptype) { + vec_foreach(ii, spd->policies[ptype]) + { + policy = pool_elt_at_index(im->policies, *ii); + + if (mp->sa_id == ~(0) || ntohl (mp->sa_id) == policy->sa_id) + send_ipsec_spd_details (policy, reg, mp->context); + } + } /* *INDENT-ON* */ #else clib_warning ("unimplemented"); @@ -428,16 +591,17 @@ vl_api_ipsec_sa_set_key_t_handler (vl_api_ipsec_sa_set_key_t * mp) { vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main (); vl_api_ipsec_sa_set_key_reply_t *rmp; + ipsec_key_t ck, ik; + u32 id; int rv; #if WITH_LIBSSL > 0 - ipsec_sa_t sa; - sa.id = ntohl (mp->sa_id); - sa.crypto_key_len = mp->crypto_key_length; - clib_memcpy (&sa.crypto_key, mp->crypto_key, sizeof (sa.crypto_key)); - sa.integ_key_len = mp->integrity_key_length; - clib_memcpy (&sa.integ_key, mp->integrity_key, sizeof (sa.integ_key)); - - rv = ipsec_set_sa_key (vm, &sa); + + id = ntohl (mp->sa_id); + + ipsec_key_decode (&mp->crypto_key, &ck); + ipsec_key_decode (&mp->integrity_key, &ik); + + rv = ipsec_set_sa_key (id, &ck, &ik); #else rv = VNET_API_ERROR_UNIMPLEMENTED; #endif @@ -473,8 +637,8 @@ vl_api_ipsec_tunnel_if_add_del_t_handler (vl_api_ipsec_tunnel_if_add_del_t * tun.remote_integ_key_len = mp->remote_integ_key_len; tun.udp_encap = mp->udp_encap; tun.tx_table_id = ntohl (mp->tx_table_id); - memcpy (&tun.local_ip, mp->local_ip, 4); - memcpy (&tun.remote_ip, mp->remote_ip, 4); + memcpy (&tun.local_ip.ip4, mp->local_ip, 4); + memcpy (&tun.remote_ip.ip4, mp->remote_ip, 4); memcpy (&tun.local_crypto_key, &mp->local_crypto_key, mp->local_crypto_key_len); memcpy (&tun.remote_crypto_key, &mp->remote_crypto_key, @@ -492,11 +656,12 @@ vl_api_ipsec_tunnel_if_add_del_t_handler (vl_api_ipsec_tunnel_if_add_del_t * rv = VNET_API_ERROR_UNIMPLEMENTED; #endif - REPLY_MACRO2 (VL_API_IPSEC_TUNNEL_IF_ADD_DEL_REPLY, ( - { - rmp->sw_if_index = - htonl (sw_if_index); - })); + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_IPSEC_TUNNEL_IF_ADD_DEL_REPLY, + ({ + rmp->sw_if_index = htonl (sw_if_index); + })); + /* *INDENT-ON* */ } static void @@ -510,40 +675,29 @@ send_ipsec_sa_details (ipsec_sa_t * sa, vl_api_registration_t * reg, mp->_vl_msg_id = ntohs (VL_API_IPSEC_SA_DETAILS); mp->context = context; - mp->sa_id = htonl (sa->id); - mp->sw_if_index = htonl (sw_if_index); - - mp->spi = htonl (sa->spi); - mp->protocol = sa->protocol; - - mp->crypto_alg = sa->crypto_alg; - mp->crypto_key_len = sa->crypto_key_len; - memcpy (mp->crypto_key, sa->crypto_key, sa->crypto_key_len); + mp->entry.sad_id = htonl (sa->id); + mp->entry.spi = htonl (sa->spi); + mp->entry.protocol = ipsec_proto_encode (sa->protocol); + mp->entry.tx_table_id = + htonl (fib_table_get_table_id (sa->tx_fib_index, FIB_PROTOCOL_IP4)); - mp->integ_alg = sa->integ_alg; - mp->integ_key_len = sa->integ_key_len; - memcpy (mp->integ_key, sa->integ_key, sa->integ_key_len); + mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg); + ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key); - mp->use_esn = sa->use_esn; - mp->use_anti_replay = sa->use_anti_replay; + mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg); + ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key); - mp->is_tunnel = sa->is_tunnel; - mp->is_tunnel_ip6 = sa->is_tunnel_ip6; + mp->entry.flags = ipsec_sad_flags_encode (sa); if (sa->is_tunnel) { - if (sa->is_tunnel_ip6) - { - memcpy (mp->tunnel_src_addr, &sa->tunnel_src_addr.ip6, 16); - memcpy (mp->tunnel_dst_addr, &sa->tunnel_dst_addr.ip6, 16); - } - else - { - memcpy (mp->tunnel_src_addr, &sa->tunnel_src_addr.ip4, 4); - memcpy (mp->tunnel_dst_addr, &sa->tunnel_dst_addr.ip4, 4); - } + ip_address_encode (&sa->tunnel_src_addr, IP46_TYPE_ANY, + &mp->entry.tunnel_src); + ip_address_encode (&sa->tunnel_dst_addr, IP46_TYPE_ANY, + &mp->entry.tunnel_dst); } + mp->sw_if_index = htonl (sw_if_index); mp->salt = clib_host_to_net_u32 (sa->salt); mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq)); mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->last_seq)); @@ -555,10 +709,6 @@ send_ipsec_sa_details (ipsec_sa_t * sa, vl_api_registration_t * reg, if (sa->use_anti_replay) mp->replay_window = clib_host_to_net_u64 (sa->replay_window); mp->total_data_size = clib_host_to_net_u64 (sa->total_data_size); - mp->udp_encap = sa->udp_encap; - - mp->tx_table_id = - htonl (fib_table_get_table_id (sa->tx_fib_index, FIB_PROTOCOL_IP4)); vl_api_send_msg (reg, (u8 *) mp); } @@ -1033,7 +1183,7 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp) mp->context = context; snprintf ((char *)mp->name, sizeof (mp->name), "%.*s", vec_len (ab->name), ab->name); - mp->protocol = IPSEC_PROTOCOL_AH; + mp->protocol = ntohl (IPSEC_API_PROTO_AH); mp->index = ab - im->ah_backends; mp->active = mp->index == im->ah_current_backend ? 1 : 0; vl_api_send_msg (rp, (u8 *)mp); @@ -1045,7 +1195,7 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp) mp->context = context; snprintf ((char *)mp->name, sizeof (mp->name), "%.*s", vec_len (eb->name), eb->name); - mp->protocol = IPSEC_PROTOCOL_ESP; + mp->protocol = ntohl (IPSEC_API_PROTO_ESP); mp->index = eb - im->esp_backends; mp->active = mp->index == im->esp_current_backend ? 1 : 0; vl_api_send_msg (rp, (u8 *)mp); @@ -1058,14 +1208,21 @@ vl_api_ipsec_select_backend_t_handler (vl_api_ipsec_select_backend_t * mp) { ipsec_main_t *im = &ipsec_main; vl_api_ipsec_select_backend_reply_t *rmp; + ipsec_protocol_t protocol; int rv = 0; if (pool_elts (im->sad) > 0) { rv = VNET_API_ERROR_INSTANCE_IN_USE; goto done; } + + rv = ipsec_proto_decode (mp->protocol, &protocol); + + if (rv) + goto done; + #if WITH_LIBSSL > 0 - switch (mp->protocol) + switch (protocol) { case IPSEC_PROTOCOL_ESP: if (pool_is_free_index (im->esp_backends, mp->index)) diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c index f0717e91dd1a..52a30a428d08 100644 --- a/src/vnet/ipsec/ipsec_cli.c +++ b/src/vnet/ipsec/ipsec_cli.c @@ -73,87 +73,62 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - ipsec_main_t *im = &ipsec_main; unformat_input_t _line_input, *line_input = &_line_input; - ipsec_sa_t sa; - int is_add = ~0; - u8 *ck = 0, *ik = 0; - clib_error_t *error = NULL; - - clib_memset (&sa, 0, sizeof (sa)); - sa.tx_fib_index = ~((u32) 0); /* Only supported for ipsec interfaces */ + ip46_address_t tun_src = { }, tun_dst = + { + }; + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; + ipsec_protocol_t proto; + ipsec_sa_flags_t flags; + clib_error_t *error; + ipsec_key_t ck, ik; + int is_add, rv; + u32 id, spi; + + error = NULL; + is_add = 0; + flags = IPSEC_SA_FLAG_NONE; + proto = IPSEC_PROTOCOL_ESP; if (!unformat_user (input, unformat_line_input, line_input)) return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "add %u", &sa.id)) + if (unformat (line_input, "add %u", &id)) is_add = 1; - else if (unformat (line_input, "del %u", &sa.id)) + else if (unformat (line_input, "del %u", &id)) is_add = 0; - else if (unformat (line_input, "spi %u", &sa.spi)) + else if (unformat (line_input, "spi %u", &spi)) ; else if (unformat (line_input, "esp")) - sa.protocol = IPSEC_PROTOCOL_ESP; + proto = IPSEC_PROTOCOL_ESP; else if (unformat (line_input, "ah")) - { - sa.protocol = IPSEC_PROTOCOL_AH; - } - else - if (unformat (line_input, "crypto-key %U", unformat_hex_string, &ck)) - sa.crypto_key_len = vec_len (ck); - else - if (unformat - (line_input, "crypto-alg %U", unformat_ipsec_crypto_alg, - &sa.crypto_alg)) - { - if (sa.crypto_alg < IPSEC_CRYPTO_ALG_NONE || - sa.crypto_alg >= IPSEC_CRYPTO_N_ALG) - { - error = clib_error_return (0, "unsupported crypto-alg: '%U'", - format_ipsec_crypto_alg, - sa.crypto_alg); - goto done; - } - } - else - if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik)) - sa.integ_key_len = vec_len (ik); - else if (unformat (line_input, "integ-alg %U", unformat_ipsec_integ_alg, - &sa.integ_alg)) - { - if (sa.integ_alg < IPSEC_INTEG_ALG_NONE || - sa.integ_alg >= IPSEC_INTEG_N_ALG) - { - error = clib_error_return (0, "unsupported integ-alg: '%U'", - format_ipsec_integ_alg, - sa.integ_alg); - goto done; - } - } - else if (unformat (line_input, "tunnel-src %U", - unformat_ip4_address, &sa.tunnel_src_addr.ip4)) - sa.is_tunnel = 1; - else if (unformat (line_input, "tunnel-dst %U", - unformat_ip4_address, &sa.tunnel_dst_addr.ip4)) - sa.is_tunnel = 1; + proto = IPSEC_PROTOCOL_AH; + else if (unformat (line_input, "crypto-key %U", + unformat_ipsec_key, &ck)) + ; + else if (unformat (line_input, "crypto-alg %U", + unformat_ipsec_crypto_alg, &crypto_alg)) + ; + else if (unformat (line_input, "integ-key %U", unformat_ipsec_key, &ik)) + ; + else if (unformat (line_input, "integ-alg %U", + unformat_ipsec_integ_alg, &integ_alg)) + ; else if (unformat (line_input, "tunnel-src %U", - unformat_ip6_address, &sa.tunnel_src_addr.ip6)) + unformat_ip46_address, &tun_src, IP46_TYPE_ANY)) { - sa.is_tunnel = 1; - sa.is_tunnel_ip6 = 1; + flags |= IPSEC_SA_FLAG_IS_TUNNEL; + if (!ip46_address_is_ip4 (&tun_src)) + flags |= IPSEC_SA_FLAG_IS_TUNNEL_V6; } else if (unformat (line_input, "tunnel-dst %U", - unformat_ip6_address, &sa.tunnel_dst_addr.ip6)) - { - sa.is_tunnel = 1; - sa.is_tunnel_ip6 = 1; - } + unformat_ip46_address, &tun_dst, IP46_TYPE_ANY)) + ; else if (unformat (line_input, "udp-encap")) - { - sa.udp_encap = 1; - } + flags |= IPSEC_SA_FLAG_UDP_ENCAP; else { error = clib_error_return (0, "parse error: '%U'", @@ -162,26 +137,15 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, } } - if (sa.crypto_key_len > sizeof (sa.crypto_key)) - sa.crypto_key_len = sizeof (sa.crypto_key); - - if (sa.integ_key_len > sizeof (sa.integ_key)) - sa.integ_key_len = sizeof (sa.integ_key); - - if (ck) - memcpy (sa.crypto_key, ck, sa.crypto_key_len); - - if (ik) - memcpy (sa.integ_key, ik, sa.integ_key_len); - if (is_add) - { - error = ipsec_check_support_cb (im, &sa); - if (error) - goto done; - } + rv = ipsec_sa_add (id, spi, proto, crypto_alg, + &ck, integ_alg, &ik, flags, + 0, &tun_src, &tun_dst, NULL); + else + rv = ipsec_sa_del (id); - ipsec_add_del_sa (vm, &sa, is_add); + if (rv) + clib_error_return (0, "failed"); done: unformat_free (line_input); @@ -258,9 +222,8 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; ipsec_policy_t p; - int is_add = 0; - int is_ip_any = 1; - u32 tmp, tmp2; + int rv, is_add = 0; + u32 tmp, tmp2, stat_index; clib_error_t *error = NULL; clib_memset (&p, 0, sizeof (p)); @@ -304,24 +267,22 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm, else if (unformat (line_input, "local-ip-range %U - %U", unformat_ip4_address, &p.laddr.start.ip4, unformat_ip4_address, &p.laddr.stop.ip4)) - is_ip_any = 0; + ; else if (unformat (line_input, "remote-ip-range %U - %U", unformat_ip4_address, &p.raddr.start.ip4, unformat_ip4_address, &p.raddr.stop.ip4)) - is_ip_any = 0; + ; else if (unformat (line_input, "local-ip-range %U - %U", unformat_ip6_address, &p.laddr.start.ip6, unformat_ip6_address, &p.laddr.stop.ip6)) { p.is_ipv6 = 1; - is_ip_any = 0; } else if (unformat (line_input, "remote-ip-range %U - %U", unformat_ip6_address, &p.raddr.start.ip6, unformat_ip6_address, &p.raddr.stop.ip6)) { p.is_ipv6 = 1; - is_ip_any = 0; } else if (unformat (line_input, "local-port-range %u - %u", &tmp, &tmp2)) { @@ -363,12 +324,12 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm, goto done; } } - ipsec_add_del_policy (vm, &p, is_add); - if (is_ip_any) - { - p.is_ipv6 = 1; - ipsec_add_del_policy (vm, &p, is_add); - } + rv = ipsec_add_del_policy (vm, &p, is_add, &stat_index); + + if (!rv) + vlib_cli_output (vm, "policy-index:%d", stat_index); + else + vlib_cli_output (vm, "error:%d", rv); done: unformat_free (line_input); @@ -391,25 +352,22 @@ set_ipsec_sa_key_command_fn (vlib_main_t * vm, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; - ipsec_sa_t sa; - u8 *ck = 0, *ik = 0; clib_error_t *error = NULL; - - clib_memset (&sa, 0, sizeof (sa)); + ipsec_key_t ck, ik; + u32 id; if (!unformat_user (input, unformat_line_input, line_input)) return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "%u", &sa.id)) + if (unformat (line_input, "%u", &id)) ; else - if (unformat (line_input, "crypto-key %U", unformat_hex_string, &ck)) - sa.crypto_key_len = vec_len (ck); - else - if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik)) - sa.integ_key_len = vec_len (ik); + if (unformat (line_input, "crypto-key %U", unformat_ipsec_key, &ck)) + ; + else if (unformat (line_input, "integ-key %U", unformat_ipsec_key, &ik)) + ; else { error = clib_error_return (0, "parse error: '%U'", @@ -418,19 +376,7 @@ set_ipsec_sa_key_command_fn (vlib_main_t * vm, } } - if (sa.crypto_key_len > sizeof (sa.crypto_key)) - sa.crypto_key_len = sizeof (sa.crypto_key); - - if (sa.integ_key_len > sizeof (sa.integ_key)) - sa.integ_key_len = sizeof (sa.integ_key); - - if (ck) - strncpy ((char *) sa.crypto_key, (char *) ck, sa.crypto_key_len); - - if (ik) - strncpy ((char *) sa.integ_key, (char *) ik, sa.integ_key_len); - - ipsec_set_sa_key (vm, &sa); + ipsec_set_sa_key (id, &ck, &ik); done: unformat_free (line_input); @@ -451,217 +397,28 @@ static clib_error_t * show_ipsec_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - ipsec_spd_t *spd; - ipsec_sa_t *sa; - ipsec_policy_t *p; ipsec_main_t *im = &ipsec_main; - u32 *i; - ipsec_tunnel_if_t *t; + u32 spd_id, sw_if_index, sai; vnet_hw_interface_t *hi; + ipsec_tunnel_if_t *t; u8 *protocol = NULL; u8 *policy = NULL; - u32 tx_table_id; + u32 i; /* *INDENT-OFF* */ - pool_foreach (sa, im->sad, ({ - if (sa->id) { - vlib_cli_output(vm, "sa %u spi %u mode %s protocol %s%s%s%s", sa->id, sa->spi, - sa->is_tunnel ? "tunnel" : "transport", - sa->protocol ? "esp" : "ah", - sa->udp_encap ? " udp-encap-enabled" : "", - sa->use_anti_replay ? " anti-replay" : "", - sa->use_esn ? " extended-sequence-number" : ""); - if (sa->protocol == IPSEC_PROTOCOL_ESP) { - vlib_cli_output(vm, " crypto alg %U%s%U integrity alg %U%s%U", - format_ipsec_crypto_alg, sa->crypto_alg, - sa->crypto_alg ? " key " : "", - format_hex_bytes, sa->crypto_key, sa->crypto_key_len, - format_ipsec_integ_alg, sa->integ_alg, - sa->integ_alg ? " key " : "", - format_hex_bytes, sa->integ_key, sa->integ_key_len); - } - if (sa->is_tunnel && sa->is_tunnel_ip6) { - vlib_cli_output(vm, " tunnel src %U dst %U", - format_ip6_address, &sa->tunnel_src_addr.ip6, - format_ip6_address, &sa->tunnel_dst_addr.ip6); - } else if (sa->is_tunnel) { - vlib_cli_output(vm, " tunnel src %U dst %U", - format_ip4_address, &sa->tunnel_src_addr.ip4, - format_ip4_address, &sa->tunnel_dst_addr.ip4); - } - } + pool_foreach_index (sai, im->sad, ({ + vlib_cli_output(vm, "%U", format_ipsec_sa, sai); + })); + pool_foreach_index (i, im->spds, ({ + vlib_cli_output(vm, "%U", format_ipsec_spd, i); })); - /* *INDENT-ON* */ - /* *INDENT-OFF* */ - pool_foreach (spd, im->spds, ({ - vlib_cli_output(vm, "spd %u", spd->id); - - vlib_cli_output(vm, " outbound policies"); - vec_foreach(i, spd->ipv4_outbound_policies) - { - p = pool_elt_at_index(spd->policies, *i); - vec_reset_length(protocol); - vec_reset_length(policy); - if (p->protocol) { - protocol = format(protocol, "%U", format_ip_protocol, p->protocol); - } else { - protocol = format(protocol, "any"); - } - if (p->policy == IPSEC_POLICY_ACTION_PROTECT) { - policy = format(policy, " sa %u", p->sa_id); - } + vlib_cli_output (vm, "SPD Bindings:"); - vlib_cli_output(vm, " priority %d action %U protocol %v%v", - p->priority, format_ipsec_policy_action, p->policy, - protocol, policy); - vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", - format_ip4_address, &p->laddr.start.ip4, - format_ip4_address, &p->laddr.stop.ip4, - p->lport.start, p->lport.stop); - vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u", - format_ip4_address, &p->raddr.start.ip4, - format_ip4_address, &p->raddr.stop.ip4, - p->rport.start, p->rport.stop); - vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, - p->counter.bytes); - }; - vec_foreach(i, spd->ipv6_outbound_policies) - { - p = pool_elt_at_index(spd->policies, *i); - vec_reset_length(protocol); - vec_reset_length(policy); - if (p->protocol) { - protocol = format(protocol, "%U", format_ip_protocol, p->protocol); - } else { - protocol = format(protocol, "any"); - } - if (p->policy == IPSEC_POLICY_ACTION_PROTECT) { - policy = format(policy, " sa %u", p->sa_id); - } - vlib_cli_output(vm, " priority %d action %U protocol %v%v", - p->priority, format_ipsec_policy_action, p->policy, - protocol, policy); - vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", - format_ip6_address, &p->laddr.start.ip6, - format_ip6_address, &p->laddr.stop.ip6, - p->lport.start, p->lport.stop); - vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u", - format_ip6_address, &p->raddr.start.ip6, - format_ip6_address, &p->raddr.stop.ip6, - p->rport.start, p->rport.stop); - vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, - p->counter.bytes); - }; - vlib_cli_output(vm, " inbound policies"); - vec_foreach(i, spd->ipv4_inbound_protect_policy_indices) - { - p = pool_elt_at_index(spd->policies, *i); - vec_reset_length(protocol); - vec_reset_length(policy); - if (p->protocol) { - protocol = format(protocol, "%U", format_ip_protocol, p->protocol); - } else { - protocol = format(protocol, "any"); - } - if (p->policy == IPSEC_POLICY_ACTION_PROTECT) { - policy = format(policy, " sa %u", p->sa_id); - } - vlib_cli_output(vm, " priority %d action %U protocol %v%v", - p->priority, format_ipsec_policy_action, p->policy, - protocol, policy); - vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", - format_ip4_address, &p->laddr.start.ip4, - format_ip4_address, &p->laddr.stop.ip4, - p->lport.start, p->lport.stop); - vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u", - format_ip4_address, &p->raddr.start.ip4, - format_ip4_address, &p->raddr.stop.ip4, - p->rport.start, p->rport.stop); - vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, - p->counter.bytes); - }; - vec_foreach(i, spd->ipv4_inbound_policy_discard_and_bypass_indices) - { - p = pool_elt_at_index(spd->policies, *i); - vec_reset_length(protocol); - vec_reset_length(policy); - if (p->protocol) { - protocol = format(protocol, "%U", format_ip_protocol, p->protocol); - } else { - protocol = format(protocol, "any"); - } - if (p->policy == IPSEC_POLICY_ACTION_PROTECT) { - policy = format(policy, " sa %u", p->sa_id); - } - vlib_cli_output(vm, " priority %d action %U protocol %v%v", - p->priority, format_ipsec_policy_action, p->policy, - protocol, policy); - vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", - format_ip4_address, &p->laddr.start.ip4, - format_ip4_address, &p->laddr.stop.ip4, - p->lport.start, p->lport.stop); - vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u", - format_ip4_address, &p->raddr.start.ip4, - format_ip4_address, &p->raddr.stop.ip4, - p->rport.start, p->rport.stop); - vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, - p->counter.bytes); - }; - vec_foreach(i, spd->ipv6_inbound_protect_policy_indices) - { - p = pool_elt_at_index(spd->policies, *i); - vec_reset_length(protocol); - vec_reset_length(policy); - if (p->protocol) { - protocol = format(protocol, "%U", format_ip_protocol, p->protocol); - } else { - protocol = format(protocol, "any"); - } - if (p->policy == IPSEC_POLICY_ACTION_PROTECT) { - policy = format(policy, " sa %u", p->sa_id); - } - vlib_cli_output(vm, " priority %d action %U protocol %v%v", - p->priority, format_ipsec_policy_action, p->policy, - protocol, policy); - vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", - format_ip6_address, &p->laddr.start.ip6, - format_ip6_address, &p->laddr.stop.ip6, - p->lport.start, p->lport.stop); - vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u", - format_ip6_address, &p->raddr.start.ip6, - format_ip6_address, &p->raddr.stop.ip6, - p->rport.start, p->rport.stop); - vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, - p->counter.bytes); - }; - vec_foreach(i, spd->ipv6_inbound_policy_discard_and_bypass_indices) - { - p = pool_elt_at_index(spd->policies, *i); - vec_reset_length(protocol); - vec_reset_length(policy); - if (p->protocol) { - protocol = format(protocol, "%U", format_ip_protocol, p->protocol); - } else { - protocol = format(protocol, "any"); - } - if (p->policy == IPSEC_POLICY_ACTION_PROTECT) { - policy = format(policy, " sa %u", p->sa_id); - } - vlib_cli_output(vm, " priority %d action %U protocol %v%v", - p->priority, format_ipsec_policy_action, p->policy, - protocol, policy); - vlib_cli_output(vm, " local addr range %U - %U port range %u - %u", - format_ip6_address, &p->laddr.start.ip6, - format_ip6_address, &p->laddr.stop.ip6, - p->lport.start, p->lport.stop); - vlib_cli_output(vm, " remote addr range %U - %U port range %u - %u", - format_ip6_address, &p->raddr.start.ip6, - format_ip6_address, &p->raddr.stop.ip6, - p->rport.start, p->rport.stop); - vlib_cli_output(vm, " packets %u bytes %u", p->counter.packets, - p->counter.bytes); - }; + hash_foreach(sw_if_index, spd_id, im->spd_index_by_sw_if_index, ({ + vlib_cli_output (vm, " %d -> %U", spd_id, + format_vnet_sw_if_index_name, im->vnet_main, + sw_if_index); })); /* *INDENT-ON* */ @@ -671,34 +428,14 @@ show_ipsec_command_fn (vlib_main_t * vm, if (t->hw_if_index == ~0) continue; hi = vnet_get_hw_interface (im->vnet_main, t->hw_if_index); - vlib_cli_output(vm, " %s seq", hi->name); - sa = pool_elt_at_index(im->sad, t->output_sa_index); - - tx_table_id = fib_table_get_table_id(sa->tx_fib_index, FIB_PROTOCOL_IP4); - - vlib_cli_output(vm, " seq %u seq-hi %u esn %u anti-replay %u udp-encap %u tx-table %u", - sa->seq, sa->seq_hi, sa->use_esn, sa->use_anti_replay, sa->udp_encap, tx_table_id); - vlib_cli_output(vm, " local-spi %u local-ip %U", sa->spi, - format_ip4_address, &sa->tunnel_src_addr.ip4); - vlib_cli_output(vm, " local-crypto %U %U", - format_ipsec_crypto_alg, sa->crypto_alg, - format_hex_bytes, sa->crypto_key, sa->crypto_key_len); - vlib_cli_output(vm, " local-integrity %U %U", - format_ipsec_integ_alg, sa->integ_alg, - format_hex_bytes, sa->integ_key, sa->integ_key_len); - sa = pool_elt_at_index(im->sad, t->input_sa_index); - vlib_cli_output(vm, " last-seq %u last-seq-hi %u esn %u anti-replay %u window %U", - sa->last_seq, sa->last_seq_hi, sa->use_esn, - sa->use_anti_replay, - format_ipsec_replay_window, sa->replay_window); - vlib_cli_output(vm, " remote-spi %u remote-ip %U", sa->spi, - format_ip4_address, &sa->tunnel_src_addr.ip4); - vlib_cli_output(vm, " remote-crypto %U %U", - format_ipsec_crypto_alg, sa->crypto_alg, - format_hex_bytes, sa->crypto_key, sa->crypto_key_len); - vlib_cli_output(vm, " remote-integrity %U %U", - format_ipsec_integ_alg, sa->integ_alg, - format_hex_bytes, sa->integ_key, sa->integ_key_len); + + vlib_cli_output(vm, " %s", hi->name); + + vlib_cli_output(vm, " out-bound sa"); + vlib_cli_output(vm, " %U", format_ipsec_sa, t->output_sa_index); + + vlib_cli_output(vm, " in-bound sa"); + vlib_cli_output(vm, " %U", format_ipsec_sa, t->input_sa_index); })); vec_free(policy); vec_free(protocol); @@ -856,19 +593,9 @@ clear_ipsec_counters_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - ipsec_main_t *im = &ipsec_main; - ipsec_spd_t *spd; - ipsec_policy_t *p; + vlib_clear_combined_counters (&ipsec_spd_policy_counters); - /* *INDENT-OFF* */ - pool_foreach (spd, im->spds, ({ - pool_foreach(p, spd->policies, ({ - p->counter.packets = p->counter.bytes = 0; - })); - })); - /* *INDENT-ON* */ - - return 0; + return (NULL); } /* *INDENT-OFF* */ @@ -888,6 +615,8 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, ipsec_add_del_tunnel_args_t a; int rv; u32 num_m_args = 0; + u8 ipv4_set = 0; + u8 ipv6_set = 0; clib_error_t *error = NULL; clib_memset (&a, 0, sizeof (a)); @@ -900,24 +629,37 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat - (line_input, "local-ip %U", unformat_ip4_address, &a.local_ip)) - num_m_args++; + (line_input, "local-ip %U", unformat_ip46_address, &a.local_ip, + IP46_TYPE_ANY)) + { + ip46_address_is_ip4 (&a.local_ip) ? (ipv4_set = 1) : (ipv6_set = 1); + num_m_args++; + } else if (unformat - (line_input, "remote-ip %U", unformat_ip4_address, &a.remote_ip)) - num_m_args++; + (line_input, "remote-ip %U", unformat_ip46_address, &a.remote_ip, + IP46_TYPE_ANY)) + { + ip46_address_is_ip4 (&a.remote_ip) ? (ipv4_set = 1) : (ipv6_set = + 1); + num_m_args++; + } else if (unformat (line_input, "local-spi %u", &a.local_spi)) num_m_args++; else if (unformat (line_input, "remote-spi %u", &a.remote_spi)) num_m_args++; else if (unformat (line_input, "instance %u", &a.show_instance)) a.renumber = 1; - else if (unformat (line_input, "del")) - a.is_add = 0; else if (unformat (line_input, "udp-encap")) a.udp_encap = 1; + else if (unformat (line_input, "use-esn")) + a.esn = 1; + else if (unformat (line_input, "use-anti-replay")) + a.anti_replay = 1; else if (unformat (line_input, "tx-table %u", &a.tx_table_id)) ; + else if (unformat (line_input, "del")) + a.is_add = 0; else { error = clib_error_return (0, "unknown input `%U'", @@ -932,6 +674,12 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, goto done; } + if (ipv6_set) + return clib_error_return (0, "currently only IPv4 supported"); + + if (ipv4_set && ipv6_set) + return clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + rv = ipsec_add_del_tunnel_if (&a); switch (rv) @@ -961,7 +709,7 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, VLIB_CLI_COMMAND (create_ipsec_tunnel_command, static) = { .path = "create ipsec tunnel", .short_help = "create ipsec tunnel local-ip local-spi " - "remote-ip remote-spi [instance ] [udp-encap] " + "remote-ip remote-spi [instance ] [udp-encap] [use-esn] [use-anti-replay] " "[tx-table ]", .function = create_ipsec_tunnel_command_fn, }; diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c index 38aed79a1552..04a2a0b5be1b 100644 --- a/src/vnet/ipsec/ipsec_format.c +++ b/src/vnet/ipsec/ipsec_format.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -132,6 +133,153 @@ format_ipsec_replay_window (u8 * s, va_list * args) return s; } +u8 * +format_ipsec_policy (u8 * s, va_list * args) +{ + u32 pi = va_arg (*args, u32); + ipsec_main_t *im = &ipsec_main; + ipsec_policy_t *p; + vlib_counter_t counts; + + p = pool_elt_at_index (im->policies, pi); + + s = format (s, " [%d] priority %d action %U protocol ", + pi, p->priority, format_ipsec_policy_action, p->policy); + if (p->protocol) + { + s = format (s, "%U", format_ip_protocol, p->protocol); + } + else + { + s = format (s, "any"); + } + if (p->policy == IPSEC_POLICY_ACTION_PROTECT) + { + s = format (s, " sa %u", p->sa_id); + } + if (p->is_ipv6) + { + s = format (s, "\n local addr range %U - %U port range %u - %u", + format_ip6_address, &p->laddr.start.ip6, + format_ip6_address, &p->laddr.stop.ip6, + p->lport.start, p->lport.stop); + s = format (s, "\n remote addr range %U - %U port range %u - %u", + format_ip6_address, &p->raddr.start.ip6, + format_ip6_address, &p->raddr.stop.ip6, + p->rport.start, p->rport.stop); + } + else + { + s = format (s, "\n local addr range %U - %U port range %u - %u", + format_ip4_address, &p->laddr.start.ip4, + format_ip4_address, &p->laddr.stop.ip4, + p->lport.start, p->lport.stop); + s = format (s, "\n remote addr range %U - %U port range %u - %u", + format_ip4_address, &p->raddr.start.ip4, + format_ip4_address, &p->raddr.stop.ip4, + p->rport.start, p->rport.stop); + } + vlib_get_combined_counter (&ipsec_spd_policy_counters, pi, &counts); + s = format (s, "\n packets %u bytes %u", counts.packets, counts.bytes); + + return (s); +} + +u8 * +format_ipsec_spd (u8 * s, va_list * args) +{ + u32 si = va_arg (*args, u32); + ipsec_main_t *im = &ipsec_main; + ipsec_spd_t *spd; + u32 *i; + + spd = pool_elt_at_index (im->spds, si); + + s = format (s, "spd %u", spd->id); + +#define _(v, n) \ + s = format (s, "\n %s:", n); \ + vec_foreach(i, spd->policies[IPSEC_SPD_POLICY_##v]) \ + { \ + s = format (s, "\n %U", format_ipsec_policy, *i); \ + } + foreach_ipsec_spd_policy_type; +#undef _ + + return (s); +} + +u8 * +format_ipsec_key (u8 * s, va_list * args) +{ + ipsec_key_t *key = va_arg (*args, ipsec_key_t *); + + return (format (s, "%U", format_hex_bytes, key->data, key->len)); +} + +uword +unformat_ipsec_key (unformat_input_t * input, va_list * args) +{ + ipsec_key_t *key = va_arg (*args, ipsec_key_t *); + u8 *data; + + if (unformat (input, "%U", unformat_hex_string, &data)) + { + ipsec_mk_key (key, data, vec_len (data)); + vec_free (data); + } + else + return 0; + return 1; +} + +u8 * +format_ipsec_sa (u8 * s, va_list * args) +{ + u32 sai = va_arg (*args, u32); + ipsec_main_t *im = &ipsec_main; + u32 tx_table_id; + ipsec_sa_t *sa; + + sa = pool_elt_at_index (im->sad, sai); + + s = format (s, "[%d] sa %u spi %u mode %s%s protocol %s%s%s%s", + sai, sa->id, sa->spi, + sa->is_tunnel ? "tunnel" : "transport", + sa->is_tunnel_ip6 ? "-ip6" : "", + sa->protocol ? "esp" : "ah", + sa->udp_encap ? " udp-encap-enabled" : "", + sa->use_anti_replay ? " anti-replay" : "", + sa->use_esn ? " extended-sequence-number" : ""); + s = format (s, "\n last-seq %u last-seq-hi %u window %U", + sa->last_seq, sa->last_seq_hi, + format_ipsec_replay_window, sa->replay_window); + s = format (s, "\n crypto alg %U%s%U", + format_ipsec_crypto_alg, sa->crypto_alg, + sa->crypto_alg ? " key " : "", + format_ipsec_key, &sa->crypto_key); + s = format (s, "\n integrity alg %U%s%U", + format_ipsec_integ_alg, sa->integ_alg, + sa->integ_alg ? " key " : "", format_ipsec_key, &sa->integ_key); + + if (sa->is_tunnel) + { + tx_table_id = fib_table_get_table_id (sa->tx_fib_index, + FIB_PROTOCOL_IP4); + s = format (s, "\n table-ID %d tunnel src %U dst %U", + tx_table_id, + format_ip46_address, &sa->tunnel_src_addr, IP46_TYPE_ANY, + format_ip46_address, &sa->tunnel_dst_addr, IP46_TYPE_ANY); + s = format (s, "\n resovle via fib-entry: %d", sa->fib_entry_index); + s = format (s, "\n stacked on:"); + s = + format (s, "\n %U", format_dpo_id, &sa->dpo[IPSEC_PROTOCOL_ESP], + 6); + } + + return (s); +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ipsec/ipsec_if.c b/src/vnet/ipsec/ipsec_if.c index 0dfb6909e422..3ec4b0b460c2 100644 --- a/src/vnet/ipsec/ipsec_if.c +++ b/src/vnet/ipsec/ipsec_if.c @@ -109,7 +109,9 @@ ipsec_if_tx_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); t0 = pool_elt_at_index (im->tunnel_interfaces, hi0->dev_instance); vnet_buffer (b0)->ipsec.sad_index = t0->output_sa_index; - next0 = IPSEC_OUTPUT_NEXT_ESP4_ENCRYPT; + + /* 0, tx-node next[0] was added by vlib_node_add_next_with_slot */ + next0 = 0; len0 = vlib_buffer_length_in_chain (vm, b0); @@ -250,6 +252,18 @@ ipsec_add_del_tunnel_if (ipsec_add_del_tunnel_args_t * args) return 0; } +static u32 +ipsec_tun_mk_input_sa_id (u32 ti) +{ + return (0x80000000 | ti); +} + +static u32 +ipsec_tun_mk_output_sa_id (u32 ti) +{ + return (0xc0000000 | ti); +} + int ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, ipsec_add_del_tunnel_args_t * args, @@ -260,12 +274,14 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, vnet_hw_interface_t *hi = NULL; u32 hw_if_index = ~0; uword *p; - ipsec_sa_t *sa; u32 dev_instance; u32 slot; - u32 tx_fib_index = ~0; + ipsec_key_t crypto_key, integ_key; + ipsec_sa_flags_t flags; + int rv; - u64 key = (u64) args->remote_ip.as_u32 << 32 | (u64) args->remote_spi; + u64 key = ((u64) args->remote_ip.ip4.as_u32 << 32 | + (u64) clib_host_to_net_u32 (args->remote_spi)); p = hash_get (im->ipsec_if_pool_index_by_key, key); if (args->is_add) @@ -274,10 +290,6 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, if (p) return VNET_API_ERROR_INVALID_VALUE; - tx_fib_index = fib_table_find (FIB_PROTOCOL_IP4, args->tx_table_id); - if (tx_fib_index == ~((u32) 0)) - return VNET_API_ERROR_NO_SUCH_FIB; - pool_get_aligned (im->tunnel_interfaces, t, CLIB_CACHE_LINE_BYTES); clib_memset (t, 0, sizeof (*t)); @@ -296,59 +308,53 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, hash_set (im->ipsec_if_real_dev_by_show_dev, t->show_instance, dev_instance); - pool_get (im->sad, sa); - clib_memset (sa, 0, sizeof (*sa)); - t->input_sa_index = sa - im->sad; - sa->protocol = IPSEC_PROTOCOL_ESP; - sa->spi = args->remote_spi; - sa->tunnel_src_addr.ip4.as_u32 = args->remote_ip.as_u32; - sa->tunnel_dst_addr.ip4.as_u32 = args->local_ip.as_u32; - sa->is_tunnel = 1; - sa->use_esn = args->esn; - sa->use_anti_replay = args->anti_replay; - sa->integ_alg = args->integ_alg; - sa->udp_encap = args->udp_encap; - sa->tx_fib_index = ~((u32) 0); /* Not used, but set for troubleshooting */ - if (args->remote_integ_key_len <= sizeof (args->remote_integ_key)) - { - sa->integ_key_len = args->remote_integ_key_len; - clib_memcpy (sa->integ_key, args->remote_integ_key, - args->remote_integ_key_len); - } - sa->crypto_alg = args->crypto_alg; - if (args->remote_crypto_key_len <= sizeof (args->remote_crypto_key)) - { - sa->crypto_key_len = args->remote_crypto_key_len; - clib_memcpy (sa->crypto_key, args->remote_crypto_key, - args->remote_crypto_key_len); - } - - pool_get (im->sad, sa); - clib_memset (sa, 0, sizeof (*sa)); - t->output_sa_index = sa - im->sad; - sa->protocol = IPSEC_PROTOCOL_ESP; - sa->spi = args->local_spi; - sa->tunnel_src_addr.ip4.as_u32 = args->local_ip.as_u32; - sa->tunnel_dst_addr.ip4.as_u32 = args->remote_ip.as_u32; - sa->is_tunnel = 1; - sa->use_esn = args->esn; - sa->use_anti_replay = args->anti_replay; - sa->integ_alg = args->integ_alg; - sa->udp_encap = args->udp_encap; - sa->tx_fib_index = tx_fib_index; - if (args->local_integ_key_len <= sizeof (args->local_integ_key)) - { - sa->integ_key_len = args->local_integ_key_len; - clib_memcpy (sa->integ_key, args->local_integ_key, - args->local_integ_key_len); - } - sa->crypto_alg = args->crypto_alg; - if (args->local_crypto_key_len <= sizeof (args->local_crypto_key)) - { - sa->crypto_key_len = args->local_crypto_key_len; - clib_memcpy (sa->crypto_key, args->local_crypto_key, - args->local_crypto_key_len); - } + flags = IPSEC_SA_FLAG_IS_TUNNEL; + if (args->udp_encap) + flags |= IPSEC_SA_FLAG_UDP_ENCAP; + if (args->esn) + flags |= IPSEC_SA_FLAG_USE_EXTENDED_SEQ_NUM; + if (args->anti_replay) + flags |= IPSEC_SA_FLAG_USE_ANTI_REPLAY; + + ipsec_mk_key (&crypto_key, + args->remote_crypto_key, args->remote_crypto_key_len); + ipsec_mk_key (&integ_key, + args->remote_integ_key, args->remote_integ_key_len); + + rv = ipsec_sa_add (ipsec_tun_mk_input_sa_id (dev_instance), + args->remote_spi, + IPSEC_PROTOCOL_ESP, + args->crypto_alg, + &crypto_key, + args->integ_alg, + &integ_key, + flags, + args->tx_table_id, + &args->remote_ip, + &args->local_ip, &t->input_sa_index); + + if (rv) + return VNET_API_ERROR_UNIMPLEMENTED; + + ipsec_mk_key (&crypto_key, + args->local_crypto_key, args->local_crypto_key_len); + ipsec_mk_key (&integ_key, + args->local_integ_key, args->local_integ_key_len); + + rv = ipsec_sa_add (ipsec_tun_mk_output_sa_id (dev_instance), + args->local_spi, + IPSEC_PROTOCOL_ESP, + args->crypto_alg, + &crypto_key, + args->integ_alg, + &integ_key, + flags, + args->tx_table_id, + &args->local_ip, + &args->remote_ip, &t->output_sa_index); + + if (rv) + return VNET_API_ERROR_UNIMPLEMENTED; hash_set (im->ipsec_if_pool_index_by_key, key, t - im->tunnel_interfaces); @@ -359,12 +365,12 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, t - im->tunnel_interfaces); hi = vnet_get_hw_interface (vnm, hw_if_index); + /* add esp4 as the next-node-index of this tx-node */ slot = vlib_node_add_next_with_slot - (vnm->vlib_main, hi->tx_node_index, im->esp4_encrypt_node_index, - IPSEC_OUTPUT_NEXT_ESP4_ENCRYPT); + (vnm->vlib_main, hi->tx_node_index, im->esp4_encrypt_node_index, 0); - ASSERT (slot == IPSEC_OUTPUT_NEXT_ESP4_ENCRYPT); + ASSERT (slot == 0); t->hw_if_index = hw_if_index; @@ -392,18 +398,14 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, vnet_delete_hw_interface (vnm, t->hw_if_index); - /* delete input and output SA */ - - sa = pool_elt_at_index (im->sad, t->input_sa_index); - pool_put (im->sad, sa); - - sa = pool_elt_at_index (im->sad, t->output_sa_index); - pool_put (im->sad, sa); - hash_unset (im->ipsec_if_pool_index_by_key, key); hash_unset (im->ipsec_if_real_dev_by_show_dev, t->show_instance); pool_put (im->tunnel_interfaces, t); + + /* delete input and output SA */ + ipsec_sa_del (ipsec_tun_mk_input_sa_id (p[0])); + ipsec_sa_del (ipsec_tun_mk_output_sa_id (p[0])); } if (sw_if_index) @@ -435,9 +437,11 @@ ipsec_add_del_ipsec_gre_tunnel (vnet_main_t * vnm, sa = pool_elt_at_index (im->sad, p[0]); if (sa->is_tunnel) - key = (u64) sa->tunnel_dst_addr.ip4.as_u32 << 32 | (u64) sa->spi; + key = ((u64) sa->tunnel_dst_addr.ip4.as_u32 << 32 | + (u64) clib_host_to_net_u32 (sa->spi)); else - key = (u64) args->remote_ip.as_u32 << 32 | (u64) sa->spi; + key = ((u64) args->remote_ip.as_u32 << 32 | + (u64) clib_host_to_net_u32 (sa->spi)); p = hash_get (im->ipsec_if_pool_index_by_key, key); @@ -493,29 +497,25 @@ ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index, { sa = pool_elt_at_index (im->sad, t->output_sa_index); sa->crypto_alg = alg; - sa->crypto_key_len = vec_len (key); - clib_memcpy (sa->crypto_key, key, vec_len (key)); + ipsec_mk_key (&sa->crypto_key, key, vec_len (key)); } else if (type == IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG) { sa = pool_elt_at_index (im->sad, t->output_sa_index); sa->integ_alg = alg; - sa->integ_key_len = vec_len (key); - clib_memcpy (sa->integ_key, key, vec_len (key)); + ipsec_mk_key (&sa->integ_key, key, vec_len (key)); } else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO) { sa = pool_elt_at_index (im->sad, t->input_sa_index); sa->crypto_alg = alg; - sa->crypto_key_len = vec_len (key); - clib_memcpy (sa->crypto_key, key, vec_len (key)); + ipsec_mk_key (&sa->crypto_key, key, vec_len (key)); } else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG) { sa = pool_elt_at_index (im->sad, t->input_sa_index); sa->integ_alg = alg; - sa->integ_key_len = vec_len (key); - clib_memcpy (sa->integ_key, key, vec_len (key)); + ipsec_mk_key (&sa->integ_key, key, vec_len (key)); } else return VNET_API_ERROR_INVALID_VALUE; @@ -566,15 +566,16 @@ ipsec_set_interface_sa (vnet_main_t * vnm, u32 hw_if_index, u32 sa_id, old_sa = pool_elt_at_index (im->sad, old_sa_index); /* unset old inbound hash entry. packets should stop arriving */ - key = - (u64) old_sa->tunnel_src_addr.ip4.as_u32 << 32 | (u64) old_sa->spi; + key = ((u64) old_sa->tunnel_src_addr.ip4.as_u32 << 32 | + (u64) clib_host_to_net_u32 (old_sa->spi)); p = hash_get (im->ipsec_if_pool_index_by_key, key); if (p) hash_unset (im->ipsec_if_pool_index_by_key, key); /* set new inbound SA, then set new hash entry */ t->input_sa_index = sa_index; - key = (u64) sa->tunnel_src_addr.ip4.as_u32 << 32 | (u64) sa->spi; + key = ((u64) sa->tunnel_src_addr.ip4.as_u32 << 32 | + (u64) clib_host_to_net_u32 (sa->spi)); hash_set (im->ipsec_if_pool_index_by_key, key, hi->dev_instance); } else diff --git a/src/vnet/ipsec/ipsec_if.h b/src/vnet/ipsec/ipsec_if.h new file mode 100644 index 000000000000..67d5554e777e --- /dev/null +++ b/src/vnet/ipsec/ipsec_if.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __IPSEC_IF_H__ +#define __IPSEC_IF_H__ + +#include + +typedef enum +{ + IPSEC_IF_SET_KEY_TYPE_NONE, + IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO, + IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO, + IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG, + IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG, +} ipsec_if_set_key_type_t; + +typedef struct +{ + /* Required for pool_get_aligned */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u32 input_sa_index; + u32 output_sa_index; + u32 hw_if_index; + u32 show_instance; +} ipsec_tunnel_if_t; + +typedef struct +{ + u8 is_add; + u8 esn; + u8 anti_replay; + ip46_address_t local_ip, remote_ip; + u32 local_spi; + u32 remote_spi; + ipsec_crypto_alg_t crypto_alg; + u8 local_crypto_key_len; + u8 local_crypto_key[128]; + u8 remote_crypto_key_len; + u8 remote_crypto_key[128]; + ipsec_integ_alg_t integ_alg; + u8 local_integ_key_len; + u8 local_integ_key[128]; + u8 remote_integ_key_len; + u8 remote_integ_key[128]; + u8 renumber; + u32 show_instance; + u8 udp_encap; + u32 tx_table_id; +} ipsec_add_del_tunnel_args_t; + +typedef struct +{ + u8 is_add; + u32 local_sa_id; + u32 remote_sa_id; + ip4_address_t local_ip; + ip4_address_t remote_ip; +} ipsec_add_del_ipsec_gre_tunnel_args_t; + +extern int ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, + ipsec_add_del_tunnel_args_t * + args, u32 * sw_if_index); +extern int ipsec_add_del_tunnel_if (ipsec_add_del_tunnel_args_t * args); +extern int ipsec_add_del_ipsec_gre_tunnel (vnet_main_t * vnm, + ipsec_add_del_ipsec_gre_tunnel_args_t + * args); + +extern int ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index, + ipsec_if_set_key_type_t type, + u8 alg, u8 * key); +extern int ipsec_set_interface_sa (vnet_main_t * vnm, u32 hw_if_index, + u32 sa_id, u8 is_outbound); + +#endif /* __IPSEC_IF_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ipsec/ipsec_if_in.c b/src/vnet/ipsec/ipsec_if_in.c index 1dbd12710c9c..b8610f4864fc 100644 --- a/src/vnet/ipsec/ipsec_if_in.c +++ b/src/vnet/ipsec/ipsec_if_in.c @@ -25,7 +25,8 @@ /* Statistics (not really errors) */ #define foreach_ipsec_if_input_error \ _(RX, "good packets received") \ -_(DISABLED, "ipsec packets received on disabled interface") +_(DISABLED, "ipsec packets received on disabled interface") \ +_(NO_TUNNEL, "no matching tunnel") static char *ipsec_if_input_error_strings[] = { #define _(sym,string) string, @@ -48,7 +49,7 @@ typedef struct u32 seq; } ipsec_if_input_trace_t; -u8 * +static u8 * format_ipsec_if_input_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); @@ -76,7 +77,7 @@ VLIB_NODE_FN (ipsec_if_input_node) (vlib_main_t * vm, ipsec_sa_t *sa0; vlib_combined_counter_main_t *rx_counter; vlib_combined_counter_main_t *drop_counter; - u32 n_disabled = 0; + u32 n_disabled = 0, n_no_tunnel = 0; rx_counter = vim->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX; drop_counter = vim->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP; @@ -111,8 +112,7 @@ VLIB_NODE_FN (ipsec_if_input_node) (vlib_main_t * vm, next0 = IPSEC_INPUT_NEXT_DROP; - u64 key = (u64) ip0->src_address.as_u32 << 32 | - (u64) clib_net_to_host_u32 (esp0->spi); + u64 key = (u64) ip0->src_address.as_u32 << 32 | (u64) esp0->spi; p = hash_get (im->ipsec_if_pool_index_by_key, key); @@ -181,6 +181,11 @@ VLIB_NODE_FN (ipsec_if_input_node) (vlib_main_t * vm, vlib_buffer_advance (b0, ip4_header_bytes (ip0)); next0 = im->esp4_decrypt_next_index; } + else + { + b0->error = node->errors[IPSEC_IF_INPUT_ERROR_NO_TUNNEL]; + n_no_tunnel++; + } trace: if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -216,6 +221,8 @@ VLIB_NODE_FN (ipsec_if_input_node) (vlib_main_t * vm, vlib_node_increment_counter (vm, ipsec_if_input_node.index, IPSEC_IF_INPUT_ERROR_DISABLED, n_disabled); + vlib_node_increment_counter (vm, ipsec_if_input_node.index, + IPSEC_IF_INPUT_ERROR_DISABLED, n_no_tunnel); return from_frame->n_vectors; } diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c index 1f3d6d01a93a..193b03ff0ae2 100644 --- a/src/vnet/ipsec/ipsec_input.c +++ b/src/vnet/ipsec/ipsec_input.c @@ -24,9 +24,9 @@ #include #include -#define foreach_ipsec_input_error \ - _(RX_PKTS, "IPSEC pkts received") \ - _(DECRYPTION_FAILED, "IPSEC decryption failed") +#define foreach_ipsec_input_error \ +_(RX_PKTS, "IPSEC pkts received") \ +_(RX_MATCH_PKTS, "IPSEC pkts matched") typedef enum { @@ -44,7 +44,9 @@ static char *ipsec_input_error_strings[] = { typedef struct { + ip_protocol_t proto; u32 spd; + u32 policy_index; u32 sa_id; u32 spi; u32 seq; @@ -58,23 +60,10 @@ format_ipsec_input_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ipsec_input_trace_t *t = va_arg (*args, ipsec_input_trace_t *); - if (t->spi == 0 && t->seq == 0) - { - s = format (s, "esp: no esp packet"); - return s; - } + s = format (s, "%U: sa_id %u spd %u policy %d spi %u seq %u", + format_ip_protocol, t->proto, t->sa_id, + t->spd, t->policy_index, t->spi, t->seq); - if (t->sa_id != 0) - { - s = - format (s, "esp: sa_id %u spd %u spi %u seq %u", t->sa_id, t->spd, - t->spi, t->seq); - } - else - { - s = - format (s, "esp: no sa spd %u spi %u seq %u", t->spd, t->spi, t->seq); - } return s; } @@ -86,9 +75,9 @@ ipsec_input_protect_policy_match (ipsec_spd_t * spd, u32 sa, u32 da, u32 spi) ipsec_sa_t *s; u32 *i; - vec_foreach (i, spd->ipv4_inbound_protect_policy_indices) + vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT]) { - p = pool_elt_at_index (spd->policies, *i); + p = pool_elt_at_index (im->policies, *i); s = pool_elt_at_index (im->sad, p->sa_index); if (spi != s->spi) @@ -142,9 +131,9 @@ ipsec6_input_protect_policy_match (ipsec_spd_t * spd, ipsec_sa_t *s; u32 *i; - vec_foreach (i, spd->ipv6_inbound_protect_policy_indices) + vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT]) { - p = pool_elt_at_index (spd->policies, *i); + p = pool_elt_at_index (im->policies, *i); s = pool_elt_at_index (im->sad, p->sa_index); if (spi != s->spi) @@ -178,11 +167,14 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - u32 n_left_from, *from, next_index, *to_next; + u32 n_left_from, *from, next_index, *to_next, thread_index; ipsec_main_t *im = &ipsec_main; + u32 ipsec_unprocessed = 0; + u32 ipsec_matched = 0; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; + thread_index = vm->thread_index; next_index = node->cached_next_index; @@ -194,7 +186,7 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next > 0) { - u32 bi0, next0; + u32 bi0, next0, pi0; vlib_buffer_t *b0; ip4_header_t *ip0; esp_header_t *esp0; @@ -249,38 +241,45 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, clib_net_to_host_u32 (esp0->spi)); - if (PREDICT_TRUE (p0 != 0)) + if (PREDICT_TRUE (p0 != NULL)) { - p0->counter.packets++; - p0->counter.bytes += clib_net_to_host_u16 (ip0->length); + ipsec_matched += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter + (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->length)); + vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; vnet_buffer (b0)->ipsec.flags = 0; next0 = im->esp4_decrypt_next_index; vlib_buffer_advance (b0, ((u8 *) esp0 - (u8 *) ip0)); goto trace0; } + else + { + pi0 = ~0; + }; /* FIXME bypass and discard */ trace0: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP || - ip0->protocol == IP_PROTOCOL_UDP) - { - if (p0) - tr->sa_id = p0->sa_id; - tr->spi = clib_host_to_net_u32 (esp0->spi); - tr->seq = clib_host_to_net_u32 (esp0->seq); - tr->spd = spd0->id; - } - } + tr->proto = ip0->protocol; + if (p0) + tr->sa_id = p0->sa_id; + tr->spi = clib_net_to_host_u32 (esp0->spi); + tr->seq = clib_net_to_host_u32 (esp0->seq); + tr->spd = spd0->id; + tr->policy_index = pi0; + } } - - - if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_AH)) + else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH) { ah0 = (ah_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); p0 = ipsec_input_protect_policy_match (spd0, @@ -295,29 +294,44 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, if (PREDICT_TRUE (p0 != 0)) { - p0->counter.packets++; - p0->counter.bytes += clib_net_to_host_u16 (ip0->length); + ipsec_matched += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter + (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->length)); + vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; vnet_buffer (b0)->ipsec.flags = 0; next0 = im->ah4_decrypt_next_index; goto trace1; } + else + { + pi0 = ~0; + } /* FIXME bypass and discard */ trace1: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP) - { - if (p0) - tr->sa_id = p0->sa_id; - tr->spi = clib_host_to_net_u32 (ah0->spi); - tr->seq = clib_host_to_net_u32 (ah0->seq_no); - tr->spd = spd0->id; - } + + tr->proto = ip0->protocol; + if (p0) + tr->sa_id = p0->sa_id; + tr->spi = clib_net_to_host_u32 (ah0->spi); + tr->seq = clib_net_to_host_u32 (ah0->seq_no); + tr->spd = spd0->id; + tr->policy_index = pi0; } } + else + { + ipsec_unprocessed += 1; + } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, @@ -325,10 +339,14 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } + vlib_node_increment_counter (vm, ipsec4_input_node.index, IPSEC_INPUT_ERROR_RX_PKTS, - from_frame->n_vectors); + from_frame->n_vectors - ipsec_unprocessed); + vlib_node_increment_counter (vm, ipsec4_input_node.index, + IPSEC_INPUT_ERROR_RX_MATCH_PKTS, + ipsec_matched); return from_frame->n_vectors; } @@ -339,10 +357,8 @@ VLIB_REGISTER_NODE (ipsec4_input_node,static) = { .vector_size = sizeof (u32), .format_trace = format_ipsec_input_trace, .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(ipsec_input_error_strings), .error_strings = ipsec_input_error_strings, - .n_next_nodes = IPSEC_INPUT_N_NEXT, .next_nodes = { #define _(s,n) [IPSEC_INPUT_NEXT_##s] = n, @@ -359,11 +375,14 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - u32 n_left_from, *from, next_index, *to_next; + u32 n_left_from, *from, next_index, *to_next, thread_index; ipsec_main_t *im = &ipsec_main; + u32 ipsec_unprocessed = 0; + u32 ipsec_matched = 0; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; + thread_index = vm->thread_index; next_index = node->cached_next_index; @@ -375,7 +394,7 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next > 0) { - u32 bi0, next0; + u32 bi0, next0, pi0; vlib_buffer_t *b0; ip6_header_t *ip0; esp_header_t *esp0; @@ -420,16 +439,25 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, if (PREDICT_TRUE (p0 != 0)) { - p0->counter.packets++; - p0->counter.bytes += - clib_net_to_host_u16 (ip0->payload_length); - p0->counter.bytes += header_size; + ipsec_matched += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter + (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length) + + header_size); + vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; vnet_buffer (b0)->ipsec.flags = 0; next0 = im->esp6_decrypt_next_index; vlib_buffer_advance (b0, header_size); goto trace0; } + else + { + pi0 = ~0; + } } else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH) { @@ -441,30 +469,42 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, if (PREDICT_TRUE (p0 != 0)) { - p0->counter.packets++; - p0->counter.bytes += - clib_net_to_host_u16 (ip0->payload_length); - p0->counter.bytes += header_size; + ipsec_matched += 1; + pi0 = p0 - im->policies; + vlib_increment_combined_counter + (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length) + + header_size); + vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; vnet_buffer (b0)->ipsec.flags = 0; next0 = im->ah6_decrypt_next_index; goto trace0; } + else + { + pi0 = ~0; + } + } + else + { + ipsec_unprocessed += 1; } trace0: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP) - { - if (p0) - tr->sa_id = p0->sa_id; - tr->spi = clib_host_to_net_u32 (esp0->spi); - tr->seq = clib_host_to_net_u32 (esp0->seq); - tr->spd = spd0->id; - } + + if (p0) + tr->sa_id = p0->sa_id; + tr->proto = ip0->protocol; + tr->spi = clib_net_to_host_u32 (esp0->spi); + tr->seq = clib_net_to_host_u32 (esp0->seq); + tr->spd = spd0->id; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, @@ -472,9 +512,14 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } + vlib_node_increment_counter (vm, ipsec6_input_node.index, IPSEC_INPUT_ERROR_RX_PKTS, - from_frame->n_vectors); + from_frame->n_vectors - ipsec_unprocessed); + + vlib_node_increment_counter (vm, ipsec6_input_node.index, + IPSEC_INPUT_ERROR_RX_MATCH_PKTS, + ipsec_matched); return from_frame->n_vectors; } @@ -486,11 +531,14 @@ VLIB_REGISTER_NODE (ipsec6_input_node,static) = { .vector_size = sizeof (u32), .format_trace = format_ipsec_input_trace, .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(ipsec_input_error_strings), .error_strings = ipsec_input_error_strings, - - .sibling_of = "ipsec4-input-feature", + .n_next_nodes = IPSEC_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [IPSEC_INPUT_NEXT_##s] = n, + foreach_ipsec_input_next +#undef _ + }, }; /* *INDENT-ON* */ diff --git a/src/vnet/ipsec/ipsec_io.h b/src/vnet/ipsec/ipsec_io.h new file mode 100644 index 000000000000..c180a784eaa5 --- /dev/null +++ b/src/vnet/ipsec/ipsec_io.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __IPSEC_IO_H__ +#define __IPSEC_IO_H__ + +#define IPSEC_FLAG_IPSEC_GRE_TUNNEL (1 << 0) + +#define foreach_ipsec_output_next \ + _ (DROP, "error-drop") + +#define _(v, s) IPSEC_OUTPUT_NEXT_##v, +typedef enum +{ + foreach_ipsec_output_next +#undef _ + IPSEC_OUTPUT_N_NEXT, +} ipsec_output_next_t; + +#define foreach_ipsec_input_next \ + _ (DROP, "error-drop") + +#define _(v, s) IPSEC_INPUT_NEXT_##v, +typedef enum +{ + foreach_ipsec_input_next +#undef _ + IPSEC_INPUT_N_NEXT, +} ipsec_input_next_t; + + +typedef struct +{ + u32 spd_index; +} ip4_ipsec_config_t; + +typedef struct +{ + u32 spd_index; +} ip6_ipsec_config_t; + +#endif /* __IPSEC_IO_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c index 2ab98e7e1408..405612692371 100644 --- a/src/vnet/ipsec/ipsec_output.c +++ b/src/vnet/ipsec/ipsec_output.c @@ -48,6 +48,7 @@ static char *ipsec_output_error_strings[] = { typedef struct { u32 spd_id; + u32 policy_id; } ipsec_output_trace_t; /* packet trace format function */ @@ -58,14 +59,8 @@ format_ipsec_output_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ipsec_output_trace_t *t = va_arg (*args, ipsec_output_trace_t *); - if (t->spd_id != ~0) - { - s = format (s, "spd %u ", t->spd_id); - } - else - { - s = format (s, "no spd"); - } + s = format (s, "spd %u policy %d", t->spd_id, t->policy_id); + return s; } @@ -73,15 +68,16 @@ always_inline ipsec_policy_t * ipsec_output_policy_match (ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp, u16 rp) { + ipsec_main_t *im = &ipsec_main; ipsec_policy_t *p; u32 *i; if (!spd) return 0; - vec_foreach (i, spd->ipv4_outbound_policies) + vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP4_OUTBOUND]) { - p = pool_elt_at_index (spd->policies, *i); + p = pool_elt_at_index (im->policies, *i); if (PREDICT_FALSE (p->protocol && (p->protocol != pr))) continue; @@ -134,15 +130,16 @@ ipsec6_output_policy_match (ipsec_spd_t * spd, ip6_address_t * la, ip6_address_t * ra, u16 lp, u16 rp, u8 pr) { + ipsec_main_t *im = &ipsec_main; ipsec_policy_t *p; u32 *i; if (!spd) return 0; - vec_foreach (i, spd->ipv6_outbound_policies) + vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP6_OUTBOUND]) { - p = pool_elt_at_index (spd->policies, *i); + p = pool_elt_at_index (im->policies, *i); if (PREDICT_FALSE (p->protocol && (p->protocol != pr))) continue; @@ -181,7 +178,7 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { ipsec_main_t *im = &ipsec_main; - u32 *from, *to_next = 0; + u32 *from, *to_next = 0, thread_index; u32 n_left_from, sw_if_index0, last_sw_if_index = (u32) ~ 0; u32 next_node_index = (u32) ~ 0, last_next_node_index = (u32) ~ 0; vlib_frame_t *f = 0; @@ -192,10 +189,11 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; + thread_index = vm->thread_index; while (n_left_from > 0) { - u32 bi0; + u32 bi0, pi0; vlib_buffer_t *b0; ipsec_policy_t *p0; ip4_header_t *ip0; @@ -203,6 +201,7 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, udp_header_t *udp0; u32 iph_offset = 0; tcp_header_t *tcp0; + u64 bytes0; bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); @@ -271,6 +270,21 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (p0 != NULL)) { + pi0 = p0 - im->policies; + + vlib_prefetch_combined_counter (&ipsec_spd_policy_counters, + thread_index, pi0); + + if (is_ipv6) + { + bytes0 = clib_net_to_host_u16 (ip6_0->payload_length); + bytes0 += sizeof (ip6_header_t); + } + else + { + bytes0 = clib_net_to_host_u16 (ip0->length); + } + if (p0->policy == IPSEC_POLICY_ACTION_PROTECT) { ipsec_sa_t *sa = 0; @@ -286,12 +300,9 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, else next_node_index = im->ah4_encrypt_node_index; vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; - p0->counter.packets++; + if (is_ipv6) { - p0->counter.bytes += - clib_net_to_host_u16 (ip6_0->payload_length); - p0->counter.bytes += sizeof (ip6_header_t); if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)) { @@ -311,7 +322,6 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - p0->counter.bytes += clib_net_to_host_u16 (ip0->length); if (b0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM) { ip0->checksum = ip4_header_checksum (ip0); @@ -338,37 +348,18 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { nc_bypass++; next_node_index = get_next_output_feature_node_index (b0, node); - p0->counter.packets++; - if (is_ipv6) - { - p0->counter.bytes += - clib_net_to_host_u16 (ip6_0->payload_length); - p0->counter.bytes += sizeof (ip6_header_t); - } - else - { - p0->counter.bytes += clib_net_to_host_u16 (ip0->length); - } } else { nc_discard++; - p0->counter.packets++; - if (is_ipv6) - { - p0->counter.bytes += - clib_net_to_host_u16 (ip6_0->payload_length); - p0->counter.bytes += sizeof (ip6_header_t); - } - else - { - p0->counter.bytes += clib_net_to_host_u16 (ip0->length); - } next_node_index = im->error_drop_node_index; } + vlib_increment_combined_counter + (&ipsec_spd_policy_counters, thread_index, pi0, 1, bytes0); } else { + pi0 = ~0; nc_nomatch++; next_node_index = im->error_drop_node_index; } @@ -397,12 +388,14 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, to_next += 1; f->n_vectors++; - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ipsec_output_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); if (spd0) tr->spd_id = spd0->id; + tr->policy_id = pi0; } } diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c new file mode 100644 index 000000000000..c4721c7afad0 --- /dev/null +++ b/src/vnet/ipsec/ipsec_sa.c @@ -0,0 +1,385 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +static clib_error_t * +ipsec_call_add_del_callbacks (ipsec_main_t * im, ipsec_sa_t * sa, + u32 sa_index, int is_add) +{ + ipsec_ah_backend_t *ab; + ipsec_esp_backend_t *eb; + switch (sa->protocol) + { + case IPSEC_PROTOCOL_AH: + ab = pool_elt_at_index (im->ah_backends, im->ah_current_backend); + if (ab->add_del_sa_sess_cb) + return ab->add_del_sa_sess_cb (sa_index, is_add); + break; + case IPSEC_PROTOCOL_ESP: + eb = pool_elt_at_index (im->esp_backends, im->esp_current_backend); + if (eb->add_del_sa_sess_cb) + return eb->add_del_sa_sess_cb (sa_index, is_add); + break; + } + return 0; +} + +void +ipsec_mk_key (ipsec_key_t * key, const u8 * data, u8 len) +{ + memset (key, 0, sizeof (*key)); + + if (len > sizeof (key->data)) + key->len = sizeof (key->data); + else + key->len = len; + + memcpy (key->data, data, key->len); +} + +/** + * 'stack' (resolve the recursion for) the SA tunnel destination + */ +void +ipsec_sa_stack (ipsec_sa_t * sa) +{ + ipsec_main_t *im = &ipsec_main; + fib_forward_chain_type_t fct; + dpo_id_t tmp = DPO_INVALID; + + fct = fib_forw_chain_type_from_fib_proto ((sa->is_tunnel_ip6 ? + FIB_PROTOCOL_IP6 : + FIB_PROTOCOL_IP4)); + + fib_entry_contribute_forwarding (sa->fib_entry_index, fct, &tmp); + + dpo_stack_from_node ((sa->is_tunnel_ip6 ? + im->ah6_encrypt_node_index : + im->ah4_encrypt_node_index), + &sa->dpo[IPSEC_PROTOCOL_AH], &tmp); + dpo_stack_from_node ((sa->is_tunnel_ip6 ? + im->esp6_encrypt_node_index : + im->esp4_encrypt_node_index), + &sa->dpo[IPSEC_PROTOCOL_ESP], &tmp); + dpo_reset (&tmp); +} + +int +ipsec_sa_add (u32 id, + u32 spi, + ipsec_protocol_t proto, + ipsec_crypto_alg_t crypto_alg, + const ipsec_key_t * ck, + ipsec_integ_alg_t integ_alg, + const ipsec_key_t * ik, + ipsec_sa_flags_t flags, + u32 tx_table_id, + const ip46_address_t * tun_src, + const ip46_address_t * tun_dst, u32 * sa_out_index) +{ + ipsec_main_t *im = &ipsec_main; + clib_error_t *err; + ipsec_sa_t *sa; + u32 sa_index; + uword *p; + + p = hash_get (im->sa_index_by_sa_id, id); + if (p) + return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; + + pool_get_zero (im->sad, sa); + + fib_node_init (&sa->node, FIB_NODE_TYPE_IPSEC_SA); + sa_index = sa - im->sad; + + sa->id = id; + sa->spi = spi; + sa->protocol = proto; + sa->crypto_alg = crypto_alg; + clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key)); + sa->integ_alg = integ_alg; + clib_memcpy (&sa->integ_key, ik, sizeof (sa->integ_key)); + ip46_address_copy (&sa->tunnel_src_addr, tun_src); + ip46_address_copy (&sa->tunnel_dst_addr, tun_dst); + + if (flags & IPSEC_SA_FLAG_USE_EXTENDED_SEQ_NUM) + sa->use_esn = 1; + if (flags & IPSEC_SA_FLAG_USE_ANTI_REPLAY) + sa->use_anti_replay = 1; + if (flags & IPSEC_SA_FLAG_IS_TUNNEL) + sa->is_tunnel = 1; + if (flags & IPSEC_SA_FLAG_IS_TUNNEL_V6) + sa->is_tunnel_ip6 = 1; + if (flags & IPSEC_SA_FLAG_UDP_ENCAP) + sa->udp_encap = 1; + + err = ipsec_check_support_cb (im, sa); + if (err) + { + clib_warning ("%s", err->what); + pool_put (im->sad, sa); + return VNET_API_ERROR_UNIMPLEMENTED; + } + + err = ipsec_call_add_del_callbacks (im, sa, sa_index, 1); + if (err) + { + pool_put (im->sad, sa); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + + if (sa->is_tunnel) + { + fib_protocol_t fproto = (sa->is_tunnel_ip6 ? + FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4); + fib_prefix_t pfx = { + .fp_addr = sa->tunnel_dst_addr, + .fp_len = (sa->is_tunnel_ip6 ? 128 : 32), + .fp_proto = fproto, + }; + sa->tx_fib_index = fib_table_find (fproto, tx_table_id); + if (sa->tx_fib_index == ~((u32) 0)) + { + pool_put (im->sad, sa); + return VNET_API_ERROR_NO_SUCH_FIB; + } + + sa->fib_entry_index = fib_table_entry_special_add (sa->tx_fib_index, + &pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE); + sa->sibling = fib_entry_child_add (sa->fib_entry_index, + FIB_NODE_TYPE_IPSEC_SA, sa_index); + ipsec_sa_stack (sa); + } + hash_set (im->sa_index_by_sa_id, sa->id, sa_index); + + if (sa_out_index) + *sa_out_index = sa_index; + + return (0); +} + +u32 +ipsec_sa_del (u32 id) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_sa_t *sa = 0; + uword *p; + u32 sa_index; + clib_error_t *err; + + p = hash_get (im->sa_index_by_sa_id, id); + + if (!p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + sa_index = p[0]; + sa = pool_elt_at_index (im->sad, sa_index); + if (ipsec_is_sa_used (sa_index)) + { + clib_warning ("sa_id %u used in policy", sa->id); + /* sa used in policy */ + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + hash_unset (im->sa_index_by_sa_id, sa->id); + err = ipsec_call_add_del_callbacks (im, sa, sa_index, 0); + if (err) + return VNET_API_ERROR_SYSCALL_ERROR_1; + if (sa->is_tunnel) + { + fib_entry_child_remove (sa->fib_entry_index, sa->sibling); + fib_table_entry_special_remove + (sa->tx_fib_index, + fib_entry_get_prefix (sa->fib_entry_index), FIB_SOURCE_RR); + dpo_reset (&sa->dpo[IPSEC_PROTOCOL_AH]); + dpo_reset (&sa->dpo[IPSEC_PROTOCOL_ESP]); + } + pool_put (im->sad, sa); + return 0; +} + +u8 +ipsec_is_sa_used (u32 sa_index) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_tunnel_if_t *t; + ipsec_policy_t *p; + + /* *INDENT-OFF* */ + pool_foreach(p, im->policies, ({ + if (p->policy == IPSEC_POLICY_ACTION_PROTECT) + { + if (p->sa_index == sa_index) + return 1; + } + })); + + pool_foreach(t, im->tunnel_interfaces, ({ + if (t->input_sa_index == sa_index) + return 1; + if (t->output_sa_index == sa_index) + return 1; + })); + /* *INDENT-ON* */ + + return 0; +} + +int +ipsec_set_sa_key (u32 id, const ipsec_key_t * ck, const ipsec_key_t * ik) +{ + ipsec_main_t *im = &ipsec_main; + uword *p; + u32 sa_index; + ipsec_sa_t *sa = 0; + clib_error_t *err; + + p = hash_get (im->sa_index_by_sa_id, id); + if (!p) + return VNET_API_ERROR_SYSCALL_ERROR_1; /* no such sa-id */ + + sa_index = p[0]; + sa = pool_elt_at_index (im->sad, sa_index); + + /* new crypto key */ + if (ck) + { + clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key)); + } + + /* new integ key */ + if (ik) + { + clib_memcpy (&sa->integ_key, 0, sizeof (sa->integ_key)); + } + + if (ck || ik) + { + err = ipsec_call_add_del_callbacks (im, sa, sa_index, 0); + if (err) + { + clib_error_free (err); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + } + + return 0; +} + +u32 +ipsec_get_sa_index_by_sa_id (u32 sa_id) +{ + ipsec_main_t *im = &ipsec_main; + uword *p = hash_get (im->sa_index_by_sa_id, sa_id); + if (!p) + return ~0; + + return p[0]; +} + +void +ipsec_sa_walk (ipsec_sa_walk_cb_t cb, void *ctx) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_sa_t *sa; + + /* *INDENT-OFF* */ + pool_foreach (sa, im->sad, + ({ + if (WALK_CONTINUE != cb(sa, ctx)) + break; + })); + /* *INDENT-ON* */ +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t * +ipsec_sa_fib_node_get (fib_node_index_t index) +{ + ipsec_main_t *im; + ipsec_sa_t *sa; + + im = &ipsec_main; + sa = pool_elt_at_index (im->sad, index); + + return (&sa->node); +} + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +ipsec_sa_last_lock_gone (fib_node_t * node) +{ + /* + * The ipsec SA is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT (0); +} + +static ipsec_sa_t * +ipsec_sa_from_fib_node (fib_node_t * node) +{ + ASSERT (FIB_NODE_TYPE_IPSEC_SA == node->fn_type); + return ((ipsec_sa_t *) (((char *) node) - + STRUCT_OFFSET_OF (ipsec_sa_t, node))); + +} + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +ipsec_sa_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) +{ + ipsec_sa_stack (ipsec_sa_from_fib_node (node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/* + * Virtual function table registered by MPLS GRE tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t ipsec_sa_vft = { + .fnv_get = ipsec_sa_fib_node_get, + .fnv_last_lock = ipsec_sa_last_lock_gone, + .fnv_back_walk = ipsec_sa_back_walk, +}; + +/* force inclusion from application's main.c */ +clib_error_t * +ipsec_sa_interface_init (vlib_main_t * vm) +{ + fib_node_register_type (FIB_NODE_TYPE_IPSEC_SA, &ipsec_sa_vft); + + return 0; +} + +VLIB_INIT_FUNCTION (ipsec_sa_interface_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h new file mode 100644 index 000000000000..2e39566bd636 --- /dev/null +++ b/src/vnet/ipsec/ipsec_sa.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __IPSEC_SPD_SA_H__ +#define __IPSEC_SPD_SA_H__ + +#include +#include +#include + +#define foreach_ipsec_crypto_alg \ + _ (0, NONE, "none") \ + _ (1, AES_CBC_128, "aes-cbc-128") \ + _ (2, AES_CBC_192, "aes-cbc-192") \ + _ (3, AES_CBC_256, "aes-cbc-256") \ + _ (4, AES_CTR_128, "aes-ctr-128") \ + _ (5, AES_CTR_192, "aes-ctr-192") \ + _ (6, AES_CTR_256, "aes-ctr-256") \ + _ (7, AES_GCM_128, "aes-gcm-128") \ + _ (8, AES_GCM_192, "aes-gcm-192") \ + _ (9, AES_GCM_256, "aes-gcm-256") \ + _ (10, DES_CBC, "des-cbc") \ + _ (11, 3DES_CBC, "3des-cbc") + +typedef enum +{ +#define _(v, f, s) IPSEC_CRYPTO_ALG_##f = v, + foreach_ipsec_crypto_alg +#undef _ + IPSEC_CRYPTO_N_ALG, +} ipsec_crypto_alg_t; + +#define foreach_ipsec_integ_alg \ + _ (0, NONE, "none") \ + _ (1, MD5_96, "md5-96") /* RFC2403 */ \ + _ (2, SHA1_96, "sha1-96") /* RFC2404 */ \ + _ (3, SHA_256_96, "sha-256-96") /* draft-ietf-ipsec-ciph-sha-256-00 */ \ + _ (4, SHA_256_128, "sha-256-128") /* RFC4868 */ \ + _ (5, SHA_384_192, "sha-384-192") /* RFC4868 */ \ + _ (6, SHA_512_256, "sha-512-256") /* RFC4868 */ + +typedef enum +{ +#define _(v, f, s) IPSEC_INTEG_ALG_##f = v, + foreach_ipsec_integ_alg +#undef _ + IPSEC_INTEG_N_ALG, +} ipsec_integ_alg_t; + +typedef enum +{ + IPSEC_PROTOCOL_AH = 0, + IPSEC_PROTOCOL_ESP = 1 +} ipsec_protocol_t; + +#define IPSEC_N_PROTOCOLS (IPSEC_PROTOCOL_ESP+1) + +#define IPSEC_KEY_MAX_LEN 128 +typedef struct ipsec_key_t_ +{ + u8 len; + u8 data[IPSEC_KEY_MAX_LEN]; +} ipsec_key_t; + +/* + * Enable extended sequence numbers + * Enable Anti-replay + * IPsec tunnel mode if non-zero, else transport mode + * IPsec tunnel mode is IPv6 if non-zero, + * else IPv4 tunnel only valid if is_tunnel is non-zero + * enable UDP encapsulation for NAT traversal + */ +#define foreach_ipsec_sa_flags \ + _ (0, NONE, "none") \ + _ (1, USE_EXTENDED_SEQ_NUM, "esn") \ + _ (2, USE_ANTI_REPLAY, "anti-replay") \ + _ (4, IS_TUNNEL, "tunnel") \ + _ (8, IS_TUNNEL_V6, "tunnel-v6") \ + _ (16, UDP_ENCAP, "udp-encap") \ + +typedef enum ipsec_sad_flags_t_ +{ +#define _(v, f, s) IPSEC_SA_FLAG_##f = v, + foreach_ipsec_sa_flags +#undef _ +} ipsec_sa_flags_t; + +typedef struct +{ + fib_node_t node; + u32 id; + u32 spi; + ipsec_protocol_t protocol; + + ipsec_crypto_alg_t crypto_alg; + ipsec_key_t crypto_key; + + ipsec_integ_alg_t integ_alg; + ipsec_key_t integ_key; + + u8 use_esn; + u8 use_anti_replay; + + u8 is_tunnel; + u8 is_tunnel_ip6; + u8 udp_encap; + ip46_address_t tunnel_src_addr; + ip46_address_t tunnel_dst_addr; + + fib_node_index_t fib_entry_index; + u32 sibling; + dpo_id_t dpo[IPSEC_N_PROTOCOLS]; + + u32 tx_fib_index; + u32 salt; + + /* runtime */ + u32 seq; + u32 seq_hi; + u32 last_seq; + u32 last_seq_hi; + u64 replay_window; + + /* lifetime data */ + u64 total_data_size; +} ipsec_sa_t; + +extern void ipsec_mk_key (ipsec_key_t * key, const u8 * data, u8 len); + +extern int ipsec_sa_add (u32 id, + u32 spi, + ipsec_protocol_t proto, + ipsec_crypto_alg_t crypto_alg, + const ipsec_key_t * ck, + ipsec_integ_alg_t integ_alg, + const ipsec_key_t * ik, + ipsec_sa_flags_t flags, + u32 tx_table_id, + const ip46_address_t * tunnel_src_addr, + const ip46_address_t * tunnel_dst_addr, + u32 * sa_index); +extern u32 ipsec_sa_del (u32 id); +extern void ipsec_sa_stack (ipsec_sa_t * sa); + +extern u8 ipsec_is_sa_used (u32 sa_index); +extern int ipsec_set_sa_key (u32 id, + const ipsec_key_t * ck, const ipsec_key_t * ik); +extern u32 ipsec_get_sa_index_by_sa_id (u32 sa_id); + +typedef walk_rc_t (*ipsec_sa_walk_cb_t) (ipsec_sa_t * sa, void *ctx); +extern void ipsec_sa_walk (ipsec_sa_walk_cb_t cd, void *ctx); + +extern u8 *format_ipsec_crypto_alg (u8 * s, va_list * args); +extern u8 *format_ipsec_integ_alg (u8 * s, va_list * args); +extern u8 *format_ipsec_sa (u8 * s, va_list * args); +extern u8 *format_ipsec_key (u8 * s, va_list * args); +extern uword unformat_ipsec_crypto_alg (unformat_input_t * input, + va_list * args); +extern uword unformat_ipsec_integ_alg (unformat_input_t * input, + va_list * args); +extern uword unformat_ipsec_key (unformat_input_t * input, va_list * args); + +#endif /* __IPSEC_SPD_SA_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ipsec/ipsec_spd.c b/src/vnet/ipsec/ipsec_spd.c new file mode 100644 index 000000000000..19525b206a6a --- /dev/null +++ b/src/vnet/ipsec/ipsec_spd.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +int +ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_spd_t *spd = 0; + uword *p; + u32 spd_index, k, v; + + p = hash_get (im->spd_index_by_spd_id, spd_id); + if (p && is_add) + return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; + if (!p && !is_add) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (!is_add) /* delete */ + { + spd_index = p[0]; + spd = pool_elt_at_index (im->spds, spd_index); + if (!spd) + return VNET_API_ERROR_INVALID_VALUE; + /* *INDENT-OFF* */ + hash_foreach (k, v, im->spd_index_by_sw_if_index, ({ + if (v == spd_index) + ipsec_set_interface_spd(vm, k, spd_id, 0); + })); + /* *INDENT-ON* */ + hash_unset (im->spd_index_by_spd_id, spd_id); +#define _(s,v) vec_free(spd->policies[IPSEC_SPD_POLICY_##s]); + foreach_ipsec_spd_policy_type +#undef _ + pool_put (im->spds, spd); + } + else /* create new SPD */ + { + pool_get (im->spds, spd); + clib_memset (spd, 0, sizeof (*spd)); + spd_index = spd - im->spds; + spd->id = spd_id; + hash_set (im->spd_index_by_spd_id, spd_id, spd_index); + } + return 0; +} + +int +ipsec_set_interface_spd (vlib_main_t * vm, u32 sw_if_index, u32 spd_id, + int is_add) +{ + ipsec_main_t *im = &ipsec_main; + ip4_ipsec_config_t config; + + u32 spd_index; + uword *p; + + p = hash_get (im->spd_index_by_spd_id, spd_id); + if (!p) + return VNET_API_ERROR_SYSCALL_ERROR_1; /* no such spd-id */ + + spd_index = p[0]; + + p = hash_get (im->spd_index_by_sw_if_index, sw_if_index); + if (p && is_add) + return VNET_API_ERROR_SYSCALL_ERROR_1; /* spd already assigned */ + + if (is_add) + { + hash_set (im->spd_index_by_sw_if_index, sw_if_index, spd_index); + } + else + { + hash_unset (im->spd_index_by_sw_if_index, sw_if_index); + } + + clib_warning ("sw_if_index %u spd_id %u spd_index %u", + sw_if_index, spd_id, spd_index); + + /* enable IPsec on TX */ + vnet_feature_enable_disable ("ip4-output", "ipsec4-output-feature", + sw_if_index, is_add, 0, 0); + vnet_feature_enable_disable ("ip6-output", "ipsec6-output-feature", + sw_if_index, is_add, 0, 0); + + config.spd_index = spd_index; + + /* enable IPsec on RX */ + vnet_feature_enable_disable ("ip4-unicast", "ipsec4-input-feature", + sw_if_index, is_add, &config, sizeof (config)); + vnet_feature_enable_disable ("ip6-unicast", "ipsec6-input-feature", + sw_if_index, is_add, &config, sizeof (config)); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ipsec/ipsec_spd.h b/src/vnet/ipsec/ipsec_spd.h new file mode 100644 index 000000000000..dd09041b046e --- /dev/null +++ b/src/vnet/ipsec/ipsec_spd.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __IPSEC_SPD_H__ +#define __IPSEC_SPD_H__ + +#include + +#define foreach_ipsec_spd_policy_type \ + _(IP4_OUTBOUND, "ip4-outbound") \ + _(IP6_OUTBOUND, "ip6-outbound") \ + _(IP4_INBOUND_PROTECT, "ip4-inbound-protect") \ + _(IP6_INBOUND_PROTECT, "ip6-inbound-protect") \ + _(IP4_INBOUND_BYPASS, "ip4-inbound-bypass") \ + _(IP6_INBOUND_BYPASS, "ip6-inbound-bypass") + +typedef enum ipsec_spd_policy_t_ +{ +#define _(s,v) IPSEC_SPD_POLICY_##s, + foreach_ipsec_spd_policy_type +#undef _ + IPSEC_SPD_POLICY_N_TYPES, +} ipsec_spd_policy_t; + +#define FOR_EACH_IPSEC_SPD_POLICY_TYPE(_t) \ + for (_t = 0; _t < IPSEC_SPD_POLICY_N_TYPES; _t++) + +/** + * @brief A Secruity Policy Database + */ +typedef struct +{ + /** the User's ID for this policy */ + u32 id; + /** vectors for each of the policy types */ + u32 *policies[IPSEC_SPD_POLICY_N_TYPES]; +} ipsec_spd_t; + +/** + * @brief Add/Delete a SPD + */ +extern int ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add); + +/** + * @brief Bind/attach a SPD to an interface + */ +extern int ipsec_set_interface_spd (vlib_main_t * vm, + u32 sw_if_index, u32 spd_id, int is_add); + +extern u8 *format_ipsec_spd (u8 * s, va_list * args); + +#endif /* __IPSEC_SPD_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ipsec/ipsec_spd_policy.c b/src/vnet/ipsec/ipsec_spd_policy.c new file mode 100644 index 000000000000..5ad147b934fa --- /dev/null +++ b/src/vnet/ipsec/ipsec_spd_policy.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +/** + * @brief + * Policy packet & bytes counters + */ +vlib_combined_counter_main_t ipsec_spd_policy_counters = { + .name = "policy", + .stat_segment_name = "/net/ipsec/policy", +}; + +static int +ipsec_policy_is_equal (ipsec_policy_t * p1, ipsec_policy_t * p2) +{ + if (p1->priority != p2->priority) + return 0; + if (p1->is_outbound != p2->is_outbound) + return (0); + if (p1->policy != p2->policy) + return (0); + if (p1->sa_id != p2->sa_id) + return (0); + if (p1->protocol != p2->protocol) + return (0); + if (p1->lport.start != p2->lport.start) + return (0); + if (p1->lport.stop != p2->lport.stop) + return (0); + if (p1->rport.start != p2->rport.start) + return (0); + if (p1->rport.stop != p2->rport.stop) + return (0); + if (p1->is_ipv6 != p2->is_ipv6) + return (0); + if (p2->is_ipv6) + { + if (p1->laddr.start.ip6.as_u64[0] != p2->laddr.start.ip6.as_u64[0]) + return (0); + if (p1->laddr.start.ip6.as_u64[1] != p2->laddr.start.ip6.as_u64[1]) + return (0); + if (p1->laddr.stop.ip6.as_u64[0] != p2->laddr.stop.ip6.as_u64[0]) + return (0); + if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1]) + return (0); + if (p1->raddr.start.ip6.as_u64[0] != p2->raddr.start.ip6.as_u64[0]) + return (0); + if (p1->raddr.start.ip6.as_u64[1] != p2->raddr.start.ip6.as_u64[1]) + return (0); + if (p1->raddr.stop.ip6.as_u64[0] != p2->raddr.stop.ip6.as_u64[0]) + return (0); + if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1]) + return (0); + } + else + { + if (p1->laddr.start.ip4.as_u32 != p2->laddr.start.ip4.as_u32) + return (0); + if (p1->laddr.stop.ip4.as_u32 != p2->laddr.stop.ip4.as_u32) + return (0); + if (p1->raddr.start.ip4.as_u32 != p2->raddr.start.ip4.as_u32) + return (0); + if (p1->raddr.stop.ip4.as_u32 != p2->raddr.stop.ip4.as_u32) + return (0); + } + return (1); +} + +static int +ipsec_spd_entry_sort (void *a1, void *a2) +{ + ipsec_main_t *im = &ipsec_main; + u32 *id1 = a1; + u32 *id2 = a2; + ipsec_policy_t *p1, *p2; + + p1 = pool_elt_at_index (im->policies, *id1); + p2 = pool_elt_at_index (im->policies, *id2); + if (p1 && p2) + return p2->priority - p1->priority; + + return 0; +} + +int +ipsec_add_del_policy (vlib_main_t * vm, + ipsec_policy_t * policy, int is_add, u32 * stat_index) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_spd_t *spd = 0; + ipsec_policy_t *vp; + u32 spd_index; + uword *p; + + clib_warning ("policy-id %u priority %d is_outbound %u", policy->id, + policy->priority, policy->is_outbound); + + if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) + { + p = hash_get (im->sa_index_by_sa_id, policy->sa_id); + if (!p) + return VNET_API_ERROR_SYSCALL_ERROR_1; + policy->sa_index = p[0]; + } + + p = hash_get (im->spd_index_by_spd_id, policy->id); + + if (!p) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + spd_index = p[0]; + spd = pool_elt_at_index (im->spds, spd_index); + if (!spd) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + if (is_add) + { + u32 policy_index; + + pool_get (im->policies, vp); + clib_memcpy (vp, policy, sizeof (*vp)); + policy_index = vp - im->policies; + + vlib_validate_combined_counter (&ipsec_spd_policy_counters, + policy_index); + vlib_zero_combined_counter (&ipsec_spd_policy_counters, policy_index); + + if (policy->is_outbound) + { + if (policy->is_ipv6) + { + vec_add1 (spd->policies[IPSEC_SPD_POLICY_IP6_OUTBOUND], + policy_index); + vec_sort_with_function (spd->policies + [IPSEC_SPD_POLICY_IP6_OUTBOUND], + ipsec_spd_entry_sort); + } + else + { + vec_add1 (spd->policies[IPSEC_SPD_POLICY_IP4_OUTBOUND], + policy_index); + vec_sort_with_function (spd->policies + [IPSEC_SPD_POLICY_IP4_OUTBOUND], + ipsec_spd_entry_sort); + } + } + else + { + if (policy->is_ipv6) + { + if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) + { + vec_add1 (spd->policies + [IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT], + policy_index); + vec_sort_with_function (spd->policies + [IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT], + ipsec_spd_entry_sort); + } + else + { + vec_add1 + (spd->policies[IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS], + policy_index); + vec_sort_with_function + (spd->policies[IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS], + ipsec_spd_entry_sort); + } + } + else + { + if (policy->policy == IPSEC_POLICY_ACTION_PROTECT) + { + vec_add1 (spd->policies + [IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT], + policy_index); + vec_sort_with_function (spd->policies + [IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT], + ipsec_spd_entry_sort); + } + else + { + vec_add1 + (spd->policies[IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS], + policy_index); + vec_sort_with_function + (spd->policies[IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS], + ipsec_spd_entry_sort); + } + } + } + *stat_index = policy_index; + } + else + { + ipsec_spd_policy_t ptype; + u32 ii; + + FOR_EACH_IPSEC_SPD_POLICY_TYPE (ptype) + { + vec_foreach_index (ii, (spd->policies[ptype])) + { + vp = pool_elt_at_index (im->policies, spd->policies[ptype][ii]); + if (ipsec_policy_is_equal (vp, policy)) + { + vec_del1 (spd->policies[ptype], ii); + pool_put (im->policies, vp); + goto done; + } + } + } + done:; + } + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ipsec/ipsec_spd_policy.h b/src/vnet/ipsec/ipsec_spd_policy.h new file mode 100644 index 000000000000..40fad3429783 --- /dev/null +++ b/src/vnet/ipsec/ipsec_spd_policy.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __IPSEC_SPD_POLICY_H__ +#define __IPSEC_SPD_POLICY_H__ + +#include + +#define foreach_ipsec_policy_action \ + _ (0, BYPASS, "bypass") \ + _ (1, DISCARD, "discard") \ + _ (2, RESOLVE, "resolve") \ + _ (3, PROTECT, "protect") + +typedef enum +{ +#define _(v, f, s) IPSEC_POLICY_ACTION_##f = v, + foreach_ipsec_policy_action +#undef _ +} ipsec_policy_action_t; + +#define IPSEC_POLICY_N_ACTION (IPSEC_POLICY_ACTION_PROTECT + 1) + +typedef struct +{ + ip46_address_t start, stop; +} ip46_address_range_t; + +typedef struct +{ + u16 start, stop; +} port_range_t; + +/** + * @brief + * Policy packet & bytes counters + */ +extern vlib_combined_counter_main_t ipsec_spd_policy_counters; + +/** + * @brief A Secruity Policy. An entry in an SPD + */ +typedef struct ipsec_policy_t_ +{ + u32 id; + i32 priority; + u8 is_outbound; + + // Selector + u8 is_ipv6; + ip46_address_range_t laddr; + ip46_address_range_t raddr; + u8 protocol; + port_range_t lport; + port_range_t rport; + + // Policy + ipsec_policy_action_t policy; + u32 sa_id; + u32 sa_index; +} ipsec_policy_t; + +/** + * @brief Add/Delete a SPD + */ +extern int ipsec_add_del_policy (vlib_main_t * vm, + ipsec_policy_t * policy, + int is_add, u32 * stat_index); + +extern u8 *format_ipsec_policy (u8 * s, va_list * args); +extern u8 *format_ipsec_policy_action (u8 * s, va_list * args); +extern uword unformat_ipsec_policy_action (unformat_input_t * input, + va_list * args); + + +#endif /* __IPSEC_SPD_POLICY_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h index 7abe462054e5..51c8dac23e44 100644 --- a/src/vnet/l2/l2_bvi.h +++ b/src/vnet/l2/l2_bvi.h @@ -46,7 +46,7 @@ l2_to_bvi (vlib_main_t * vlib_main, { vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnet_main, bvi_sw_if_index); - if (!eth_mac_equal (e0->dst_address, hi->hw_address)) + if (!ethernet_mac_address_equal (e0->dst_address, hi->hw_address)) return TO_BVI_ERR_BAD_MAC; } diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h index 93da1277e676..12e7e54038c4 100644 --- a/src/vnet/l2/l2_input.h +++ b/src/vnet/l2/l2_input.h @@ -114,6 +114,7 @@ l2input_bd_config (u32 bd_index) _(GBP_NULL_CLASSIFY, "gbp-null-classify") \ _(GBP_SRC_CLASSIFY, "gbp-src-classify") \ _(GBP_LPM_CLASSIFY, "l2-gbp-lpm-classify") \ + _(GBP_SCLASS_2_ID, "l2-gbp-sclass-2-id") \ _(VTR, "l2-input-vtr") \ _(L2_IP_QOS_RECORD, "l2-ip-qos-record") \ _(VPATH, "vpath-input-l2") \ diff --git a/src/vnet/l2/l2_learn.h b/src/vnet/l2/l2_learn.h index c1071450852d..58e19babc6e2 100644 --- a/src/vnet/l2/l2_learn.h +++ b/src/vnet/l2/l2_learn.h @@ -52,7 +52,7 @@ extern l2learn_main_t l2learn_main; extern vlib_node_registration_t l2fib_mac_age_scanner_process_node; -enum +typedef enum { L2_MAC_AGE_PROCESS_EVENT_START = 1, L2_MAC_AGE_PROCESS_EVENT_STOP = 2, diff --git a/src/vnet/l2/l2_output.h b/src/vnet/l2/l2_output.h index 74d2829839f6..fdb6167155ff 100644 --- a/src/vnet/l2/l2_output.h +++ b/src/vnet/l2/l2_output.h @@ -81,6 +81,7 @@ extern vlib_node_registration_t l2output_node; #define foreach_l2output_feat \ _(OUTPUT, "interface-output") \ _(SPAN, "span-l2-output") \ + _(GBP_ID_2_SCLASS, "l2-gbp-id-2-sclass") \ _(GBP_POLICY_PORT, "gbp-policy-port") \ _(GBP_POLICY_MAC, "gbp-policy-mac") \ _(CFM, "feature-bitmap-drop") \ diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c index e2d2a6796621..54286c23d727 100644 --- a/src/vnet/l2/l2_patch.c +++ b/src/vnet/l2/l2_patch.c @@ -49,7 +49,7 @@ format_l2_patch_trace (u8 * s, va_list * args) return s; } -l2_patch_main_t l2_patch_main; +static l2_patch_main_t l2_patch_main; static vlib_node_registration_t l2_patch_node; diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c index ca71bf885cf7..6369e4e82e31 100644 --- a/src/vnet/lisp-cp/control.c +++ b/src/vnet/lisp-cp/control.c @@ -774,6 +774,7 @@ vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a, m->is_static = a->is_static; m->key = vec_dup (a->key); m->key_id = a->key_id; + m->authoritative = a->authoritative; map_index = m - lcm->mapping_pool; gid_dictionary_add_del (&lcm->mapping_index_by_gid, &a->eid, map_index, @@ -3457,8 +3458,7 @@ lisp_cp_lookup_inline (vlib_main_t * vm, + sizeof (*eth0)); arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply); arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0]; - clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, - (u8 *) & mac0, 6); + mac_address_from_u64 (&arp0->ip4_over_ethernet[0].mac, mac0); clib_memcpy (&arp0->ip4_over_ethernet[0].ip4, &gid_address_arp_ip4 (&dst), 4); diff --git a/src/vnet/lisp-cp/lisp_cli.c b/src/vnet/lisp-cp/lisp_cli.c index 690850c7ac27..5cd183402b00 100644 --- a/src/vnet/lisp-cp/lisp_cli.c +++ b/src/vnet/lisp-cp/lisp_cli.c @@ -1177,6 +1177,7 @@ lisp_add_del_locator_set_command_fn (vlib_main_t * vm, &locator.weight)) { locator.local = 1; + locator.state = 1; vec_add1 (locators, locator); } else diff --git a/src/vnet/lisp-cp/lisp_msg_serdes.c b/src/vnet/lisp-cp/lisp_msg_serdes.c index f45f08fad6c0..90afb10b68b5 100644 --- a/src/vnet/lisp-cp/lisp_msg_serdes.c +++ b/src/vnet/lisp-cp/lisp_msg_serdes.c @@ -34,6 +34,7 @@ lisp_msg_put_locators (vlib_buffer_t * b, locator_t * locators) LOC_MWEIGHT (p) = loc->mweight; LOC_LOCAL (p) = loc->local; LOC_PROBED (p) = loc->probed ? 1 : 0; + LOC_REACHABLE (p) = loc->state ? 1 : 0; lisp_msg_put_gid (b, &loc->address); } } diff --git a/src/vnet/lisp-cp/one_cli.c b/src/vnet/lisp-cp/one_cli.c index 7823ea14cd98..e000c02e414e 100644 --- a/src/vnet/lisp-cp/one_cli.c +++ b/src/vnet/lisp-cp/one_cli.c @@ -178,6 +178,8 @@ lisp_add_del_local_eid_command_fn (vlib_main_t * vm, unformat_input_t * input, } locator_set_index = p[0]; } + else if (unformat (line_input, "authoritative")) + a->authoritative = 1; else { error = unformat_parse_error (line_input); @@ -1719,6 +1721,7 @@ lisp_add_del_locator_set_command_fn (vlib_main_t * vm, &locator.weight)) { locator.local = 1; + locator.state = 1; vec_add1 (locators, locator); } else diff --git a/src/vnet/lldp/lldp_node.h b/src/vnet/lldp/lldp_node.h index eca1d12e1695..f9bc95998275 100644 --- a/src/vnet/lldp/lldp_node.h +++ b/src/vnet/lldp/lldp_node.h @@ -131,7 +131,7 @@ typedef struct u8 data[400]; } lldp_input_trace_t; -enum +typedef enum { LLDP_EVENT_RESCHEDULE = 1, } lldp_process_event_t; diff --git a/src/vnet/mpls/mpls_lookup.h b/src/vnet/mpls/mpls_lookup.h index 95558e05a4ee..17f9468f0acd 100644 --- a/src/vnet/mpls/mpls_lookup.h +++ b/src/vnet/mpls/mpls_lookup.h @@ -23,7 +23,7 @@ /** * The arc/edge from the MPLS lookup node to the MPLS replicate node */ -u32 mpls_lookup_to_replicate_edge; +extern u32 mpls_lookup_to_replicate_edge; /** * Enum of statically configred MPLS lookup next nodes diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index 27c2a2a6f002..ad61ac2b25d4 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -724,6 +724,7 @@ vnet_mpls_tunnel_path_remove (u32 sw_if_index, old_pl_index = mt->mt_path_list; + fib_path_list_lock(old_pl_index); mt->mt_path_list = fib_path_list_copy_and_path_remove(old_pl_index, FIB_PATH_LIST_FLAG_SHARED, @@ -735,6 +736,7 @@ vnet_mpls_tunnel_path_remove (u32 sw_if_index, if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) { /* no paths left */ + fib_path_list_unlock(old_pl_index); return (0); } else @@ -758,6 +760,7 @@ vnet_mpls_tunnel_path_remove (u32 sw_if_index, mt->mt_path_list); mpls_tunnel_restack(mt); + fib_path_list_unlock(old_pl_index); } return (fib_path_list_get_n_paths(mt->mt_path_list)); @@ -878,7 +881,7 @@ format_mpls_tunnel (u8 * s, va_list * args) mpls_tunnel_t *mt = va_arg (*args, mpls_tunnel_t *); mpls_tunnel_attribute_t attr; - s = format(s, "mpls_tunnel%d: sw_if_index:%d hw_if_index:%d", + s = format(s, "mpls-tunnel%d: sw_if_index:%d hw_if_index:%d", mt - mpls_tunnel_pool, mt->mt_sw_if_index, mt->mt_hw_if_index); diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c index a09107c50c53..803db129e000 100644 --- a/src/vnet/pg/cli.c +++ b/src/vnet/pg/cli.c @@ -42,9 +42,9 @@ #include #include -#ifdef CLIB_UNIX +#include #include -#endif + /* Root of all packet generator cli commands. */ /* *INDENT-OFF* */ @@ -145,40 +145,66 @@ VLIB_CLI_COMMAND (disable_streams_cli, static) = { }; /* *INDENT-ON* */ +static u8 * +format_pg_edit_group (u8 * s, va_list * va) +{ + pg_edit_group_t *g = va_arg (*va, pg_edit_group_t *); + + s = + format (s, "hdr-size %d, offset %d, ", g->n_packet_bytes, + g->start_byte_offset); + if (g->edit_function) + { + u8 *function_name; + u8 *junk_after_name; + function_name = format (0, "%U%c", format_clib_elf_symbol_with_address, + g->edit_function, 0); + junk_after_name = function_name; + while (*junk_after_name && *junk_after_name != ' ') + junk_after_name++; + *junk_after_name = 0; + s = format (s, "edit-funtion %s, ", function_name); + vec_free (function_name); + } + + return s; +} + static u8 * format_pg_stream (u8 * s, va_list * va) { pg_stream_t *t = va_arg (*va, pg_stream_t *); - u8 *v; + int verbose = va_arg (*va, int); if (!t) - return format (s, "%=16s%=12s%=16s%s", + return format (s, "%-16s%=12s%=16s%s", "Name", "Enabled", "Count", "Parameters"); - s = format (s, "%-16v%=12s%16Ld", + s = format (s, "%-16v%=12s%=16Ld", t->name, pg_stream_is_enabled (t) ? "Yes" : "No", t->n_packets_generated); - v = 0; - - v = format (v, "limit %Ld, ", t->n_packets_limit); + int indent = format_get_indent (s); - v = format (v, "rate %.2e pps, ", t->rate_packets_per_second); - - v = format (v, "size %d%c%d, ", + s = format (s, "limit %Ld, ", t->n_packets_limit); + s = format (s, "rate %.2e pps, ", t->rate_packets_per_second); + s = format (s, "size %d%c%d, ", t->min_packet_bytes, t->packet_size_edit_type == PG_EDIT_RANDOM ? '+' : '-', t->max_packet_bytes); + s = format (s, "buffer-size %d, ", t->buffer_bytes); + s = format (s, "worker %d, ", t->worker_index); - v = format (v, "buffer-size %d, ", t->buffer_bytes); - - v = format (v, "worker %d, ", t->worker_index); - - if (v) + if (verbose) + { + pg_edit_group_t *g; + /* *INDENT-OFF* */ + vec_foreach (g, t->edit_groups) { - s = format (s, " %v", v); - vec_free (v); + s = format (s, "\n%U%U", format_white_space, indent, format_pg_edit_group, g); + } + /* *INDENT-ON* */ } return s; @@ -190,6 +216,15 @@ show_streams (vlib_main_t * vm, { pg_main_t *pg = &pg_main; pg_stream_t *s; + int verbose = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else + break; + } if (pool_elts (pg->streams) == 0) { @@ -197,10 +232,10 @@ show_streams (vlib_main_t * vm, goto done; } - vlib_cli_output (vm, "%U", format_pg_stream, 0); + vlib_cli_output (vm, "%U", format_pg_stream, 0, 0); /* *INDENT-OFF* */ pool_foreach (s, pg->streams, ({ - vlib_cli_output (vm, "%U", format_pg_stream, s); + vlib_cli_output (vm, "%U", format_pg_stream, s, verbose); })); /* *INDENT-ON* */ @@ -210,8 +245,8 @@ show_streams (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_streams_cli, static) = { - .path = "show packet-generator", - .short_help = "Show packet generator streams", + .path = "show packet-generator ", + .short_help = "show packet-generator [verbose]", .function = show_streams, }; /* *INDENT-ON* */ @@ -277,10 +312,11 @@ validate_stream (pg_stream_t * s) if (s->max_packet_bytes < s->min_packet_bytes) return clib_error_create ("max-size < min-size"); - if (s->buffer_bytes >= 4096 || s->buffer_bytes == 0) - return - clib_error_create ("buffer-size must be positive and < 4096, given %d", - s->buffer_bytes); + u32 hdr_size = pg_edit_group_n_bytes (s, 0); + if (s->min_packet_bytes < hdr_size) + return clib_error_create ("min-size < total header size %d", hdr_size); + if (s->buffer_bytes == 0) + return clib_error_create ("buffer-size must be positive"); if (s->rate_packets_per_second < 0) return clib_error_create ("negative rate"); @@ -305,7 +341,7 @@ new_stream (vlib_main_t * vm, s.sw_if_index[VLIB_RX] = s.sw_if_index[VLIB_TX] = ~0; s.node_index = ~0; s.max_packet_bytes = s.min_packet_bytes = 64; - s.buffer_bytes = VLIB_BUFFER_DATA_SIZE; + s.buffer_bytes = vlib_buffer_get_default_data_size (vm); s.if_id = 0; pcap_file_name = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -362,10 +398,6 @@ new_stream (vlib_main_t * vm, } } - error = validate_stream (&s); - if (error) - return error; - if (!sub_input_given && !pcap_file_name) { error = clib_error_create ("no packet data given"); @@ -419,6 +451,10 @@ new_stream (vlib_main_t * vm, } } + error = validate_stream (&s); + if (error) + return error; + pg_stream_add (pg, &s); return 0; @@ -501,7 +537,10 @@ change_stream_parameters (vlib_main_t * vm, error = validate_stream (&s_new); if (!error) - s[0] = s_new; + { + s[0] = s_new; + pg_stream_change (pg, s); + } return error; } diff --git a/src/vnet/pg/edit.c b/src/vnet/pg/edit.c index 8d1437e96bcc..7d4e1713c4cf 100644 --- a/src/vnet/pg/edit.c +++ b/src/vnet/pg/edit.c @@ -167,13 +167,13 @@ unformat_pg_payload (unformat_input_t * input, va_list * args) for (i = 0; i < len; i++) v[i] = i % ilen; - e = pg_create_edit_group (s, sizeof (e[0]), vec_len (v), 0); + e = pg_create_edit_group (s, sizeof (e[0]), len, 0); e->type = PG_EDIT_FIXED; - e->n_bits = vec_len (v) * BITS (v[0]); + e->n_bits = len * BITS (v[0]); /* Least significant bit is at end of bitstream, since everything is always bigendian. */ - e->lsb_bit_offset = e->n_bits - BITS (v[0]); + e->lsb_bit_offset = len > 0 ? e->n_bits - BITS (v[0]) : 0; e->values[PG_EDIT_LO] = v; diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c index 792c67cd88d5..138a5757c83f 100644 --- a/src/vnet/pg/input.c +++ b/src/vnet/pg/input.c @@ -1199,6 +1199,7 @@ pg_stream_fill_replay (pg_main_t * pg, pg_stream_t * s, u32 n_alloc) u32 *buffers; vlib_main_t *vm = vlib_get_main (); vnet_main_t *vnm = vnet_get_main (); + u32 buf_sz = vlib_buffer_get_default_data_size (vm); vnet_interface_main_t *im = &vnm->interface_main; vnet_sw_interface_t *si; @@ -1216,8 +1217,7 @@ pg_stream_fill_replay (pg_main_t * pg, pg_stream_t * s, u32 n_alloc) u8 *d0; d0 = vec_elt (s->replay_packet_templates, i); - buffer_alloc_request += (vec_len (d0) + (VLIB_BUFFER_DATA_SIZE - 1)) - / VLIB_BUFFER_DATA_SIZE; + buffer_alloc_request += (vec_len (d0) + (buf_sz - 1)) / buf_sz; i = ((i + 1) == l) ? 0 : i + 1; n_left--; @@ -1261,7 +1261,7 @@ pg_stream_fill_replay (pg_main_t * pg, pg_stream_t * s, u32 n_alloc) /* Copy the data */ while (bytes_to_copy) { - bytes_this_chunk = clib_min (bytes_to_copy, VLIB_BUFFER_DATA_SIZE); + bytes_this_chunk = clib_min (bytes_to_copy, buf_sz); ASSERT (current_buffer_index < vec_len (buffers)); b = vlib_get_buffer (vm, buffers[current_buffer_index]); clib_memcpy_fast (b->data, d0 + data_offset, bytes_this_chunk); @@ -1590,13 +1590,12 @@ pg_generate_packets (vlib_node_runtime_t * node, head = clib_fifo_head (bi0->buffer_fifo); if (head + n_this_frame <= end) - clib_memcpy_fast (to_next, head, n_this_frame * sizeof (u32)); + vlib_buffer_copy_indices (to_next, head, n_this_frame); else { u32 n = end - head; - clib_memcpy_fast (to_next + 0, head, n * sizeof (u32)); - clib_memcpy_fast (to_next + n, start, - (n_this_frame - n) * sizeof (u32)); + vlib_buffer_copy_indices (to_next + 0, head, n); + vlib_buffer_copy_indices (to_next + n, start, n_this_frame - n); } if (s->replay_packet_templates == 0) diff --git a/src/vnet/pg/pg.h b/src/vnet/pg/pg.h index 16463c2a8c50..bc3b03b7670b 100644 --- a/src/vnet/pg/pg.h +++ b/src/vnet/pg/pg.h @@ -345,6 +345,7 @@ vlib_node_function_t pg_input, pg_output; /* Stream add/delete. */ void pg_stream_del (pg_main_t * pg, uword index); void pg_stream_add (pg_main_t * pg, pg_stream_t * s_init); +void pg_stream_change (pg_main_t * pg, pg_stream_t * s); /* Enable/disable stream. */ void pg_stream_enable_disable (pg_main_t * pg, pg_stream_t * s, diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c index ddd15d664069..c60c5845af69 100644 --- a/src/vnet/pg/stream.c +++ b/src/vnet/pg/stream.c @@ -437,7 +437,7 @@ pg_stream_add (pg_main_t * pg, pg_stream_t * s_init) { int n; - s->buffer_bytes = VLIB_BUFFER_DATA_SIZE; + s->buffer_bytes = vlib_buffer_get_default_data_size (vm); n = s->max_packet_bytes / s->buffer_bytes; n += (s->max_packet_bytes % s->buffer_bytes) != 0; @@ -483,6 +483,31 @@ pg_stream_del (pg_main_t * pg, uword index) pool_put (pg->streams, s); } +void +pg_stream_change (pg_main_t * pg, pg_stream_t * s) +{ + /* Determine packet size. */ + switch (s->packet_size_edit_type) + { + case PG_EDIT_INCREMENT: + case PG_EDIT_RANDOM: + if (s->min_packet_bytes == s->max_packet_bytes) + s->packet_size_edit_type = PG_EDIT_FIXED; + case PG_EDIT_FIXED: + break; + + default: + /* Get packet size from fixed edits. */ + s->packet_size_edit_type = PG_EDIT_FIXED; + if (!s->replay_packet_templates) + s->min_packet_bytes = s->max_packet_bytes = + vec_len (s->fixed_packet_data); + break; + } + + s->last_increment_packet_size = s->min_packet_bytes; +} + /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/sctp/sctp.c b/src/vnet/sctp/sctp.c index 86aef88f432d..f84996186e05 100644 --- a/src/vnet/sctp/sctp.c +++ b/src/vnet/sctp/sctp.c @@ -892,7 +892,7 @@ sctp_main_enable (vlib_main_t * vm) vec_validate (tm->ip_lookup_tx_frames[0], num_threads - 1); vec_validate (tm->ip_lookup_tx_frames[1], num_threads - 1); - tm->bytes_per_buffer = VLIB_BUFFER_DATA_SIZE; + tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm); vec_validate (tm->time_now, num_threads - 1); return error; @@ -943,9 +943,9 @@ sctp_update_time (f64 now, u8 thread_index) /* *INDENT OFF* */ const static transport_proto_vft_t sctp_proto = { .enable = sctp_enable_disable, - .bind = sctp_session_bind, - .unbind = sctp_session_unbind, - .open = sctp_session_open, + .start_listen = sctp_session_bind, + .stop_listen = sctp_session_unbind, + .connect = sctp_session_open, .close = sctp_session_close, .cleanup = sctp_session_cleanup, .push_header = sctp_push_header, diff --git a/src/vnet/sctp/sctp_input.c b/src/vnet/sctp/sctp_input.c index 88e4eab754d5..f24e5c7a0a52 100644 --- a/src/vnet/sctp/sctp_input.c +++ b/src/vnet/sctp/sctp_input.c @@ -1764,7 +1764,7 @@ sctp46_listen_process_inline (vlib_main_t * vm, if (error0 == SCTP_ERROR_NONE) { - if (stream_session_accept + if (session_stream_accept (&child_conn-> sub_conn[SCTP_PRIMARY_PATH_IDX].connection, sctp_listener-> diff --git a/src/vnet/sctp/sctp_output.c b/src/vnet/sctp/sctp_output.c index c0e1a6eb3394..8fea714f6a2b 100644 --- a/src/vnet/sctp/sctp_output.c +++ b/src/vnet/sctp/sctp_output.c @@ -264,21 +264,20 @@ sctp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) vnet_buffer (b)->sctp.subconn_idx = MAX_SCTP_CONNECTIONS; /* Leave enough space for headers */ - return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); } always_inline void * sctp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) { ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST; b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->total_length_not_including_first_buffer = 0; vnet_buffer (b)->sctp.flags = 0; vnet_buffer (b)->sctp.subconn_idx = MAX_SCTP_CONNECTIONS; VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); /* Leave enough space for headers */ - return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); } always_inline int diff --git a/src/vnet/session-apps/echo_client.c b/src/vnet/session-apps/echo_client.c index d100aae5c2ad..8b7788fa266c 100644 --- a/src/vnet/session-apps/echo_client.c +++ b/src/vnet/session-apps/echo_client.c @@ -263,7 +263,7 @@ echo_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, } if (PREDICT_FALSE (delete_session == 1)) { - stream_session_t *s; + session_t *s; clib_atomic_fetch_add (&ecm->tx_total, sp->bytes_sent); clib_atomic_fetch_add (&ecm->rx_total, sp->bytes_received); @@ -356,7 +356,7 @@ echo_clients_init (vlib_main_t * vm) static int echo_clients_session_connected_callback (u32 app_index, u32 api_context, - stream_session_t * s, u8 is_fail) + session_t * s, u8 is_fail) { echo_client_main_t *ecm = &echo_client_main; eclient_session_t *session; @@ -393,9 +393,9 @@ echo_clients_session_connected_callback (u32 app_index, u32 api_context, session_index = session - ecm->sessions; session->bytes_to_send = ecm->bytes_to_send; session->bytes_to_receive = ecm->no_return ? 0ULL : ecm->bytes_to_send; - session->data.rx_fifo = s->server_rx_fifo; + session->data.rx_fifo = s->rx_fifo; session->data.rx_fifo->client_session_index = session_index; - session->data.tx_fifo = s->server_tx_fifo; + session->data.tx_fifo = s->tx_fifo; session->data.tx_fifo->client_session_index = session_index; session->data.vpp_evt_q = ecm->vpp_event_queue[thread_index]; session->vpp_session_handle = session_handle (s); @@ -422,7 +422,7 @@ echo_clients_session_connected_callback (u32 app_index, u32 api_context, } static void -echo_clients_session_reset_callback (stream_session_t * s) +echo_clients_session_reset_callback (session_t * s) { echo_client_main_t *ecm = &echo_client_main; vnet_disconnect_args_t _a = { 0 }, *a = &_a; @@ -437,13 +437,13 @@ echo_clients_session_reset_callback (stream_session_t * s) } static int -echo_clients_session_create_callback (stream_session_t * s) +echo_clients_session_create_callback (session_t * s) { return 0; } static void -echo_clients_session_disconnect_callback (stream_session_t * s) +echo_clients_session_disconnect_callback (session_t * s) { echo_client_main_t *ecm = &echo_client_main; vnet_disconnect_args_t _a = { 0 }, *a = &_a; @@ -454,7 +454,7 @@ echo_clients_session_disconnect_callback (stream_session_t * s) } void -echo_clients_session_disconnect (stream_session_t * s) +echo_clients_session_disconnect (session_t * s) { echo_client_main_t *ecm = &echo_client_main; vnet_disconnect_args_t _a = { 0 }, *a = &_a; @@ -464,7 +464,7 @@ echo_clients_session_disconnect (stream_session_t * s) } static int -echo_clients_rx_callback (stream_session_t * s) +echo_clients_rx_callback (session_t * s) { echo_client_main_t *ecm = &echo_client_main; eclient_session_t *sp; @@ -475,15 +475,13 @@ echo_clients_rx_callback (stream_session_t * s) return -1; } - sp = pool_elt_at_index (ecm->sessions, - s->server_rx_fifo->client_session_index); + sp = pool_elt_at_index (ecm->sessions, s->rx_fifo->client_session_index); receive_data_chunk (ecm, sp); - if (svm_fifo_max_dequeue (s->server_rx_fifo)) + if (svm_fifo_max_dequeue (s->rx_fifo)) { - if (svm_fifo_set_event (s->server_rx_fifo)) - session_send_io_evt_to_thread (s->server_rx_fifo, - FIFO_EVENT_BUILTIN_RX); + if (svm_fifo_set_event (s->rx_fifo)) + session_send_io_evt_to_thread (s->rx_fifo, FIFO_EVENT_BUILTIN_RX); } return 0; } @@ -513,7 +511,7 @@ echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret) echo_client_main_t *ecm = &echo_client_main; vnet_app_attach_args_t _a, *a = &_a; u64 options[16]; - clib_error_t *error = 0; + int rv; clib_memset (a, 0, sizeof (*a)); clib_memset (options, 0, sizeof (options)); @@ -543,8 +541,8 @@ echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret) a->options = options; a->namespace_id = appns_id; - if ((error = vnet_application_attach (a))) - return error; + if ((rv = vnet_application_attach (a))) + return clib_error_return (0, "attach returned %d", rv); ecm->app_index = a->app_index; return 0; @@ -594,8 +592,7 @@ echo_clients_connect (vlib_main_t * vm, u32 n_clients) { echo_client_main_t *ecm = &echo_client_main; vnet_connect_args_t _a, *a = &_a; - clib_error_t *error = 0; - int i; + int i, rv; clib_memset (a, 0, sizeof (*a)); for (i = 0; i < n_clients; i++) @@ -604,8 +601,8 @@ echo_clients_connect (vlib_main_t * vm, u32 n_clients) a->api_context = i; a->app_index = ecm->app_index; - if ((error = vnet_connect_uri (a))) - return error; + if ((rv = vnet_connect_uri (a))) + return clib_error_return (0, "connect returned: %d", rv); /* Crude pacing for call setups */ if ((i % 4) == 0) diff --git a/src/vnet/session-apps/echo_server.c b/src/vnet/session-apps/echo_server.c index c0fdb1316036..f9d3d93c4e73 100644 --- a/src/vnet/session-apps/echo_server.c +++ b/src/vnet/session-apps/echo_server.c @@ -17,6 +17,7 @@ #include #include #include +#include typedef struct { @@ -55,7 +56,7 @@ typedef struct echo_server_main_t echo_server_main; int -echo_server_session_accept_callback (stream_session_t * s) +echo_server_session_accept_callback (session_t * s) { echo_server_main_t *esm = &echo_server_main; @@ -70,7 +71,7 @@ echo_server_session_accept_callback (stream_session_t * s) } void -echo_server_session_disconnect_callback (stream_session_t * s) +echo_server_session_disconnect_callback (session_t * s) { echo_server_main_t *esm = &echo_server_main; vnet_disconnect_args_t _a = { 0 }, *a = &_a; @@ -81,7 +82,7 @@ echo_server_session_disconnect_callback (stream_session_t * s) } void -echo_server_session_reset_callback (stream_session_t * s) +echo_server_session_reset_callback (session_t * s) { echo_server_main_t *esm = &echo_server_main; vnet_disconnect_args_t _a = { 0 }, *a = &_a; @@ -93,7 +94,7 @@ echo_server_session_reset_callback (stream_session_t * s) int echo_server_session_connected_callback (u32 app_index, u32 api_context, - stream_session_t * s, u8 is_fail) + session_t * s, u8 is_fail) { clib_warning ("called..."); return -1; @@ -135,15 +136,15 @@ test_bytes (echo_server_main_t * esm, int actual_transfer) * If no-echo, just drop the data and be done with it. */ int -echo_server_builtin_server_rx_callback_no_echo (stream_session_t * s) +echo_server_builtin_server_rx_callback_no_echo (session_t * s) { - svm_fifo_t *rx_fifo = s->server_rx_fifo; + svm_fifo_t *rx_fifo = s->rx_fifo; svm_fifo_dequeue_drop (rx_fifo, svm_fifo_max_dequeue (rx_fifo)); return 0; } int -echo_server_rx_callback (stream_session_t * s) +echo_server_rx_callback (session_t * s) { u32 n_written, max_dequeue, max_enqueue, max_transfer; int actual_transfer; @@ -154,8 +155,8 @@ echo_server_rx_callback (stream_session_t * s) ASSERT (s->thread_index == thread_index); - rx_fifo = s->server_rx_fifo; - tx_fifo = s->server_tx_fifo; + rx_fifo = s->rx_fifo; + tx_fifo = s->tx_fifo; ASSERT (rx_fifo->master_thread_index == thread_index); ASSERT (tx_fifo->master_thread_index == thread_index); @@ -362,7 +363,7 @@ static int echo_server_listen () { echo_server_main_t *esm = &echo_server_main; - vnet_bind_args_t _a, *a = &_a; + vnet_listen_args_t _a, *a = &_a; clib_memset (a, 0, sizeof (*a)); a->app_index = esm->app_index; a->uri = esm->server_uri; diff --git a/src/vnet/session-apps/http_server.c b/src/vnet/session-apps/http_server.c index 719608a79d11..9df27fac0b01 100644 --- a/src/vnet/session-apps/http_server.c +++ b/src/vnet/session-apps/http_server.c @@ -1,5 +1,5 @@ /* -* Copyright (c) 2015-2017 Cisco and/or its affiliates. +* Copyright (c) 2015-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -16,6 +16,7 @@ #include #include #include +#include typedef enum { @@ -475,7 +476,7 @@ session_rx_request (http_session_t * hs) } static int -http_server_rx_callback (stream_session_t * s) +http_server_rx_callback (session_t * s) { http_server_args *args; http_session_t *hs; @@ -507,7 +508,7 @@ http_server_rx_callback (stream_session_t * s) } static int -http_server_rx_callback_static (stream_session_t * s) +http_server_rx_callback_static (session_t * s) { http_server_main_t *hsm = &http_server_main; vnet_disconnect_args_t _a = { 0 }, *a = &_a; @@ -578,7 +579,7 @@ http_server_rx_callback_static (stream_session_t * s) } static int -http_server_session_accept_callback (stream_session_t * s) +http_server_session_accept_callback (session_t * s) { http_server_main_t *hsm = &http_server_main; http_session_t *hs; @@ -592,8 +593,8 @@ http_server_session_accept_callback (stream_session_t * s) hs = http_server_session_alloc (s->thread_index); http_server_session_lookup_add (s->thread_index, s->session_index, hs->session_index); - hs->rx_fifo = s->server_rx_fifo; - hs->tx_fifo = s->server_tx_fifo; + hs->rx_fifo = s->rx_fifo; + hs->tx_fifo = s->tx_fifo; hs->vpp_session_index = s->session_index; hs->vpp_session_handle = session_handle (s); hs->session_state = HTTP_STATE_ESTABLISHED; @@ -606,7 +607,7 @@ http_server_session_accept_callback (stream_session_t * s) } static void -http_server_session_disconnect_callback (stream_session_t * s) +http_server_session_disconnect_callback (session_t * s) { http_server_main_t *hsm = &http_server_main; vnet_disconnect_args_t _a = { 0 }, *a = &_a; @@ -627,7 +628,7 @@ http_server_session_disconnect_callback (stream_session_t * s) } static void -http_server_session_reset_callback (stream_session_t * s) +http_server_session_reset_callback (session_t * s) { http_server_main_t *hsm = &http_server_main; vnet_disconnect_args_t _a = { 0 }, *a = &_a; @@ -649,7 +650,7 @@ http_server_session_reset_callback (stream_session_t * s) static int http_server_session_connected_callback (u32 app_index, u32 api_context, - stream_session_t * s, u8 is_fail) + session_t * s, u8 is_fail) { clib_warning ("called..."); return -1; @@ -727,7 +728,7 @@ static int http_server_listen () { http_server_main_t *hsm = &http_server_main; - vnet_bind_args_t _a, *a = &_a; + vnet_listen_args_t _a, *a = &_a; clib_memset (a, 0, sizeof (*a)); a->app_index = hsm->app_index; a->uri = "tcp://0.0.0.0/80"; diff --git a/src/vnet/session-apps/proxy.c b/src/vnet/session-apps/proxy.c index 06c095440bcc..ba7b05784fa5 100644 --- a/src/vnet/session-apps/proxy.c +++ b/src/vnet/session-apps/proxy.c @@ -59,13 +59,13 @@ proxy_call_main_thread (vnet_connect_args_t * a) } static void -delete_proxy_session (stream_session_t * s, int is_active_open) +delete_proxy_session (session_t * s, int is_active_open) { proxy_main_t *pm = &proxy_main; proxy_session_t *ps = 0; vnet_disconnect_args_t _a, *a = &_a; - stream_session_t *active_open_session = 0; - stream_session_t *server_session = 0; + session_t *active_open_session = 0; + session_t *server_session = 0; uword *p; u64 handle; @@ -143,7 +143,7 @@ delete_proxy_session (stream_session_t * s, int is_active_open) } static int -proxy_accept_callback (stream_session_t * s) +proxy_accept_callback (session_t * s) { proxy_main_t *pm = &proxy_main; @@ -155,13 +155,13 @@ proxy_accept_callback (stream_session_t * s) } static void -proxy_disconnect_callback (stream_session_t * s) +proxy_disconnect_callback (session_t * s) { delete_proxy_session (s, 0 /* is_active_open */ ); } static void -proxy_reset_callback (stream_session_t * s) +proxy_reset_callback (session_t * s) { clib_warning ("Reset session %U", format_stream_session, s, 2); delete_proxy_session (s, 0 /* is_active_open */ ); @@ -169,7 +169,7 @@ proxy_reset_callback (stream_session_t * s) static int proxy_connected_callback (u32 app_index, u32 api_context, - stream_session_t * s, u8 is_fail) + session_t * s, u8 is_fail) { clib_warning ("called..."); return -1; @@ -183,7 +183,7 @@ proxy_add_segment_callback (u32 client_index, u64 segment_handle) } static int -proxy_rx_callback (stream_session_t * s) +proxy_rx_callback (session_t * s) { u32 max_dequeue; int actual_transfer __attribute__ ((unused)); @@ -204,7 +204,7 @@ proxy_rx_callback (stream_session_t * s) if (PREDICT_TRUE (p != 0)) { clib_spinlock_unlock_if_init (&pm->sessions_lock); - active_open_tx_fifo = s->server_rx_fifo; + active_open_tx_fifo = s->rx_fifo; /* * Send event for active open tx fifo @@ -220,13 +220,13 @@ proxy_rx_callback (stream_session_t * s) } else { - rx_fifo = s->server_rx_fifo; - tx_fifo = s->server_tx_fifo; + rx_fifo = s->rx_fifo; + tx_fifo = s->tx_fifo; ASSERT (rx_fifo->master_thread_index == thread_index); ASSERT (tx_fifo->master_thread_index == thread_index); - max_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo); + max_dequeue = svm_fifo_max_dequeue (s->rx_fifo); if (PREDICT_FALSE (max_dequeue == 0)) return 0; @@ -272,7 +272,7 @@ static session_cb_vft_t proxy_session_cb_vft = { static int active_open_connected_callback (u32 app_index, u32 opaque, - stream_session_t * s, u8 is_fail) + session_t * s, u8 is_fail) { proxy_main_t *pm = &proxy_main; proxy_session_t *ps; @@ -292,23 +292,23 @@ active_open_connected_callback (u32 app_index, u32 opaque, ps = pool_elt_at_index (pm->sessions, opaque); ps->vpp_active_open_handle = session_handle (s); - s->server_tx_fifo = ps->server_rx_fifo; - s->server_rx_fifo = ps->server_tx_fifo; + s->tx_fifo = ps->server_rx_fifo; + s->rx_fifo = ps->server_tx_fifo; /* * Reset the active-open tx-fifo master indices so the active-open session * will receive data, etc. */ - s->server_tx_fifo->master_session_index = s->session_index; - s->server_tx_fifo->master_thread_index = s->thread_index; + s->tx_fifo->master_session_index = s->session_index; + s->tx_fifo->master_thread_index = s->thread_index; /* * Account for the active-open session's use of the fifos * so they won't disappear until the last session which uses * them disappears */ - s->server_tx_fifo->refcnt++; - s->server_rx_fifo->refcnt++; + s->tx_fifo->refcnt++; + s->rx_fifo->refcnt++; hash_set (pm->proxy_session_by_active_open_handle, ps->vpp_active_open_handle, opaque); @@ -319,36 +319,36 @@ active_open_connected_callback (u32 app_index, u32 opaque, * Send event for active open tx fifo */ ASSERT (s->thread_index == thread_index); - if (svm_fifo_set_event (s->server_tx_fifo)) - session_send_io_evt_to_thread (s->server_tx_fifo, FIFO_EVENT_APP_TX); + if (svm_fifo_set_event (s->tx_fifo)) + session_send_io_evt_to_thread (s->tx_fifo, FIFO_EVENT_APP_TX); return 0; } static void -active_open_reset_callback (stream_session_t * s) +active_open_reset_callback (session_t * s) { delete_proxy_session (s, 1 /* is_active_open */ ); } static int -active_open_create_callback (stream_session_t * s) +active_open_create_callback (session_t * s) { return 0; } static void -active_open_disconnect_callback (stream_session_t * s) +active_open_disconnect_callback (session_t * s) { delete_proxy_session (s, 1 /* is_active_open */ ); } static int -active_open_rx_callback (stream_session_t * s) +active_open_rx_callback (session_t * s) { svm_fifo_t *proxy_tx_fifo; - proxy_tx_fifo = s->server_rx_fifo; + proxy_tx_fifo = s->rx_fifo; /* * Send event for server tx fifo @@ -463,7 +463,7 @@ static int proxy_server_listen () { proxy_main_t *pm = &proxy_main; - vnet_bind_args_t _a, *a = &_a; + vnet_listen_args_t _a, *a = &_a; clib_memset (a, 0, sizeof (*a)); a->app_index = pm->server_app_index; a->uri = (char *) pm->server_uri; diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index 85b5f9394271..e6292157dfad 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -20,6 +20,23 @@ static app_main_t app_main; +#define app_interface_check_thread_and_barrier(_fn, _arg) \ + if (PREDICT_FALSE (!vlib_thread_is_main_w_barrier ())) \ + { \ + vlib_rpc_call_main_thread (_fn, (u8 *) _arg, sizeof(*_arg)); \ + return 0; \ + } + +static void +application_local_listener_session_endpoint (local_session_t * ll, + session_endpoint_t * sep) +{ + sep->transport_proto = + session_type_transport_proto (ll->listener_session_type); + sep->port = ll->port; + sep->is_ip4 = ll->listener_session_type & 1; +} + static app_listener_t * app_listener_alloc (application_t * app) { @@ -27,10 +44,13 @@ app_listener_alloc (application_t * app) pool_get (app->listeners, app_listener); clib_memset (app_listener, 0, sizeof (*app_listener)); app_listener->al_index = app_listener - app->listeners; + app_listener->app_index = app->app_index; + app_listener->session_index = SESSION_INVALID_INDEX; + app_listener->local_index = SESSION_INVALID_INDEX; return app_listener; } -static app_listener_t * +app_listener_t * app_listener_get (application_t * app, u32 app_listener_index) { return pool_elt_at_index (app->listeners, app_listener_index); @@ -45,29 +65,242 @@ app_listener_free (application_t * app, app_listener_t * app_listener) clib_memset (app_listener, 0xfa, sizeof (*app_listener)); } +local_session_t * +application_local_listen_session_alloc (application_t * app) +{ + local_session_t *ll; + pool_get_zero (app->local_listen_sessions, ll); + ll->session_index = ll - app->local_listen_sessions; + ll->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, 0); + ll->app_index = app->app_index; + return ll; +} + +void +application_local_listen_session_free (application_t * app, + local_session_t * ll) +{ + pool_put (app->local_listen_sessions, ll); + if (CLIB_DEBUG) + clib_memset (ll, 0xfb, sizeof (*ll)); +} + +static u32 +app_listener_id (app_listener_t * al) +{ + ASSERT (al->app_index < 1 << 16 && al->al_index < 1 << 16); + return (al->app_index << 16 | al->al_index); +} + +session_handle_t +app_listener_handle (app_listener_t * al) +{ + return ((u64) SESSION_LISTENER_PREFIX << 32 | (u64) app_listener_id (al)); +} + +static void +app_listener_id_parse (u32 listener_id, u32 * app_index, + u32 * app_listener_index) +{ + *app_index = listener_id >> 16; + *app_listener_index = listener_id & 0xFFFF; +} + +void +app_listener_handle_parse (session_handle_t handle, u32 * app_index, + u32 * app_listener_index) +{ + app_listener_id_parse (handle & 0xFFFFFFFF, app_index, app_listener_index); +} + static app_listener_t * -app_local_listener_alloc (application_t * app) +app_listener_get_w_id (u32 listener_id) +{ + u32 app_index, app_listener_index; + application_t *app; + + app_listener_id_parse (listener_id, &app_index, &app_listener_index); + app = application_get_if_valid (app_index); + if (!app) + return 0; + return app_listener_get (app, app_listener_index); +} + +app_listener_t * +app_listener_get_w_session (session_t * ls) +{ + application_t *app; + + app = application_get_if_valid (ls->app_index); + if (!app) + return 0; + return app_listener_get (app, ls->al_index); +} + +app_listener_t * +app_listener_get_w_handle (session_handle_t handle) +{ + + if (handle >> 32 != SESSION_LISTENER_PREFIX) + return 0; + + return app_listener_get_w_id (handle & 0xFFFFFFFF); +} + +app_listener_t * +app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext) +{ + u32 table_index, fib_proto; + session_endpoint_t *sep; + session_handle_t handle; + local_session_t *ll; + session_t *ls; + + sep = (session_endpoint_t *) sep_ext; + if (application_has_local_scope (app) && session_endpoint_is_local (sep)) + { + table_index = application_local_session_table (app); + handle = session_lookup_endpoint_listener (table_index, sep, 1); + if (handle != SESSION_INVALID_HANDLE) + { + ll = application_get_local_listener_w_handle (handle); + return app_listener_get_w_session ((session_t *) ll); + } + } + + fib_proto = session_endpoint_fib_proto (sep); + table_index = application_session_table (app, fib_proto); + handle = session_lookup_endpoint_listener (table_index, sep, 1); + if (handle != SESSION_INVALID_HANDLE) + { + ls = listen_session_get_from_handle (handle); + return app_listener_get_w_session ((session_t *) ls); + } + + return 0; +} + +int +app_listener_alloc_and_init (application_t * app, + session_endpoint_cfg_t * sep, + app_listener_t ** listener) { app_listener_t *app_listener; - pool_get (app->local_listeners, app_listener); - clib_memset (app_listener, 0, sizeof (*app_listener)); - app_listener->al_index = app_listener - app->local_listeners; - return app_listener; + local_session_t *ll = 0; + session_handle_t lh; + session_type_t st; + session_t *ls = 0; + int rv; + + app_listener = app_listener_alloc (app); + st = session_type_from_proto_and_ip (sep->transport_proto, sep->is_ip4); + + /* + * Add session endpoint to local session table. Only binds to "inaddr_any" + * (i.e., zero address) are added to local scope table. + */ + if (application_has_local_scope (app) + && session_endpoint_is_local ((session_endpoint_t *) sep)) + { + u32 table_index; + + ll = application_local_listen_session_alloc (app); + ll->port = sep->port; + /* Store the original session type for the unbind */ + ll->listener_session_type = st; + table_index = application_local_session_table (app); + lh = application_local_session_handle (ll); + session_lookup_add_session_endpoint (table_index, + (session_endpoint_t *) sep, lh); + app_listener->local_index = ll->session_index; + ll->al_index = app_listener->al_index; + } + + if (application_has_global_scope (app)) + { + /* + * Start listening on local endpoint for requested transport and scope. + * Creates a stream session with state LISTENING to be used in session + * lookups, prior to establishing connection. Requests transport to + * build it's own specific listening connection. + */ + ls = listen_session_new (0, st); + ls->app_index = app->app_index; + ls->app_wrk_index = sep->app_wrk_index; + + /* Listen pool can be reallocated if the transport is + * recursive (tls) */ + lh = session_handle (ls); + + if ((rv = session_listen (ls, sep))) + { + ls = session_get_from_handle (lh); + session_free (ls); + return rv; + } + app_listener->session_index = ls->session_index; + ls->al_index = app_listener->al_index; + } + + if (!ll && !ls) + { + app_listener_free (app, app_listener); + return -1; + } + + *listener = app_listener; + return 0; } -static app_listener_t * -app_local_listener_get (application_t * app, u32 app_listener_index) +void +app_listener_cleanup (app_listener_t * al) { - return pool_elt_at_index (app->local_listeners, app_listener_index); + application_t *app = application_get (al->app_index); + + if (al->session_index != SESSION_INVALID_INDEX) + { + session_t *ls = session_get (al->session_index, 0); + session_stop_listen (ls); + listen_session_del (ls); + } + if (al->local_index != SESSION_INVALID_INDEX) + { + session_endpoint_t sep = SESSION_ENDPOINT_NULL; + local_session_t *ll; + u32 table_index; + + table_index = application_local_session_table (app); + ll = application_get_local_listen_session (app, al->local_index); + application_local_listener_session_endpoint (ll, &sep); + session_lookup_del_session_endpoint (table_index, &sep); + application_local_listen_session_free (app, ll); + } + app_listener_free (app, al); } -static void -app_local_listener_free (application_t * app, app_listener_t * app_listener) +app_worker_t * +app_listener_select_worker (app_listener_t * al) { - clib_bitmap_free (app_listener->workers); - pool_put (app->local_listeners, app_listener); - if (CLIB_DEBUG) - clib_memset (app_listener, 0xfa, sizeof (*app_listener)); + application_t *app; + u32 wrk_index; + + app = application_get (al->app_index); + wrk_index = clib_bitmap_next_set (al->workers, al->accept_rotor + 1); + if (wrk_index == ~0) + wrk_index = clib_bitmap_first_set (al->workers); + + ASSERT (wrk_index != ~0); + al->accept_rotor = wrk_index; + return application_get_worker (app, wrk_index); +} + +session_t * +app_listener_get_session (app_listener_t * al) +{ + if (al->session_index == SESSION_INVALID_INDEX) + return 0; + + return listen_session_get (al->session_index); } static app_worker_map_t * @@ -130,16 +363,6 @@ application_local_session_table (application_t * app) return app_ns->local_table_index; } -static void -application_local_listener_session_endpoint (local_session_t * ll, - session_endpoint_t * sep) -{ - sep->transport_proto = - session_type_transport_proto (ll->listener_session_type); - sep->port = ll->port; - sep->is_ip4 = ll->listener_session_type & 1; -} - /** * Returns app name for app-index */ @@ -199,7 +422,7 @@ application_lookup_name (const u8 * name) return 0; } -application_t * +static application_t * application_alloc (void) { application_t *app; @@ -266,7 +489,7 @@ application_verify_cfg (ssvm_segment_type_t st) return 1; } -int +static int application_alloc_and_init (app_init_args_t * a) { ssvm_segment_type_t seg_type = SSVM_SEGMENT_MEMFD; @@ -351,7 +574,7 @@ application_alloc_and_init (app_init_args_t * a) return 0; } -void +static void application_free (application_t * app) { app_worker_map_t *wrk_map; @@ -408,7 +631,7 @@ application_free (application_t * app) pool_put (app_main.app_pool, app); } -void +static void application_detach_process (application_t * app, u32 api_client_index) { vnet_app_worker_add_del_args_t _args = { 0 }, *args = &_args; @@ -475,126 +698,16 @@ application_n_workers (application_t * app) } app_worker_t * -application_listener_select_worker (stream_session_t * ls, u8 is_local) -{ - app_listener_t *app_listener; - application_t *app; - u32 wrk_index; - - app = application_get (ls->app_index); - if (!is_local) - app_listener = app_listener_get (app, ls->listener_db_index); - else - app_listener = app_local_listener_get (app, ls->listener_db_index); - - wrk_index = clib_bitmap_next_set (app_listener->workers, - app_listener->accept_rotor + 1); - if (wrk_index == ~0) - wrk_index = clib_bitmap_first_set (app_listener->workers); - - ASSERT (wrk_index != ~0); - app_listener->accept_rotor = wrk_index; - return application_get_worker (app, wrk_index); -} - -app_worker_t * -app_worker_alloc (application_t * app) -{ - app_worker_t *app_wrk; - pool_get (app_main.workers, app_wrk); - clib_memset (app_wrk, 0, sizeof (*app_wrk)); - app_wrk->wrk_index = app_wrk - app_main.workers; - app_wrk->app_index = app->app_index; - app_wrk->wrk_map_index = ~0; - app_wrk->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX; - app_wrk->first_segment_manager = APP_INVALID_SEGMENT_MANAGER_INDEX; - app_wrk->local_segment_manager = APP_INVALID_SEGMENT_MANAGER_INDEX; - APP_DBG ("New app %v worker %u", app_get_name (app), app_wrk->wrk_index); - return app_wrk; -} - -app_worker_t * -app_worker_get (u32 wrk_index) -{ - return pool_elt_at_index (app_main.workers, wrk_index); -} - -app_worker_t * -app_worker_get_if_valid (u32 wrk_index) -{ - if (pool_is_free_index (app_main.workers, wrk_index)) - return 0; - return pool_elt_at_index (app_main.workers, wrk_index); -} - -void -app_worker_free (app_worker_t * app_wrk) +application_listener_select_worker (session_t * ls) { - application_t *app = application_get (app_wrk->app_index); - vnet_unbind_args_t _a, *a = &_a; - u64 handle, *handles = 0; - segment_manager_t *sm; - u32 sm_index; - int i; - - /* - * Listener cleanup - */ - - /* *INDENT-OFF* */ - hash_foreach (handle, sm_index, app_wrk->listeners_table, - ({ - vec_add1 (handles, handle); - sm = segment_manager_get (sm_index); - sm->app_wrk_index = SEGMENT_MANAGER_INVALID_APP_INDEX; - })); - /* *INDENT-ON* */ + app_listener_t *al; - for (i = 0; i < vec_len (handles); i++) - { - a->app_index = app->app_index; - a->wrk_map_index = app_wrk->wrk_map_index; - a->handle = handles[i]; - /* seg manager is removed when unbind completes */ - vnet_unbind (a); - } - - /* - * Connects segment manager cleanup - */ - - if (app_wrk->connects_seg_manager != APP_INVALID_SEGMENT_MANAGER_INDEX) - { - sm = segment_manager_get (app_wrk->connects_seg_manager); - sm->app_wrk_index = SEGMENT_MANAGER_INVALID_APP_INDEX; - segment_manager_init_del (sm); - } - - /* If first segment manager is used by a listener */ - if (app_wrk->first_segment_manager != APP_INVALID_SEGMENT_MANAGER_INDEX - && app_wrk->first_segment_manager != app_wrk->connects_seg_manager) - { - sm = segment_manager_get (app_wrk->first_segment_manager); - sm->first_is_protected = 0; - sm->app_wrk_index = SEGMENT_MANAGER_INVALID_APP_INDEX; - /* .. and has no fifos, e.g. it might be used for redirected sessions, - * remove it */ - if (!segment_manager_has_fifos (sm)) - segment_manager_del (sm); - } - - /* - * Local sessions - */ - app_worker_local_sessions_free (app_wrk); - - pool_put (app_main.workers, app_wrk); - if (CLIB_DEBUG) - clib_memset (app_wrk, 0xfe, sizeof (*app_wrk)); + al = app_listener_get_w_session (ls); + return app_listener_select_worker (al); } int -app_worker_alloc_and_init (application_t * app, app_worker_t ** wrk) +application_alloc_worker_and_init (application_t * app, app_worker_t ** wrk) { app_worker_map_t *wrk_map; app_worker_t *app_wrk; @@ -641,383 +754,470 @@ app_worker_alloc_and_init (application_t * app, app_worker_t ** wrk) return 0; } -application_t * -app_worker_get_app (u32 wrk_index) +int +vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) { + svm_fifo_segment_private_t *fs; + app_worker_map_t *wrk_map; app_worker_t *app_wrk; - app_wrk = app_worker_get_if_valid (wrk_index); - if (!app_wrk) - return 0; - return application_get_if_valid (app_wrk->app_index); -} + segment_manager_t *sm; + application_t *app; + int rv; -static segment_manager_t * -app_worker_alloc_segment_manager (app_worker_t * app_wrk) -{ - segment_manager_t *sm = 0; + app = application_get (a->app_index); + if (!app) + return VNET_API_ERROR_INVALID_VALUE; - /* If the first segment manager is not in use, don't allocate a new one */ - if (app_wrk->first_segment_manager != APP_INVALID_SEGMENT_MANAGER_INDEX - && app_wrk->first_segment_manager_in_use == 0) + if (a->is_add) { + if ((rv = application_alloc_worker_and_init (app, &app_wrk))) + return rv; + + /* Map worker api index to the app */ + app_wrk->api_client_index = a->api_client_index; + application_api_table_add (app->app_index, a->api_client_index); + sm = segment_manager_get (app_wrk->first_segment_manager); - app_wrk->first_segment_manager_in_use = 1; - return sm; + fs = segment_manager_get_segment_w_lock (sm, 0); + a->segment = &fs->ssvm; + a->segment_handle = segment_manager_segment_handle (sm, fs); + segment_manager_segment_reader_unlock (sm); + a->evt_q = app_wrk->event_queue; + a->wrk_map_index = app_wrk->wrk_map_index; } + else + { + wrk_map = app_worker_map_get (app, a->wrk_map_index); + if (!wrk_map) + return VNET_API_ERROR_INVALID_VALUE; - sm = segment_manager_new (); - sm->app_wrk_index = app_wrk->wrk_index; + app_wrk = app_worker_get (wrk_map->wrk_index); + if (!app_wrk) + return VNET_API_ERROR_INVALID_VALUE; - return sm; + application_api_table_del (app_wrk->api_client_index); + app_worker_free (app_wrk); + app_worker_map_free (app, wrk_map); + if (application_n_workers (app) == 0) + application_free (app); + } + return 0; } -int -app_worker_start_listen (app_worker_t * app_wrk, stream_session_t * ls) +static int +app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index) { - segment_manager_t *sm; - - /* Allocate segment manager. All sessions derived out of a listen session - * have fifos allocated by the same segment manager. */ - if (!(sm = app_worker_alloc_segment_manager (app_wrk))) - return -1; - - /* Add to app's listener table. Useful to find all child listeners - * when app goes down, although, just for unbinding this is not needed */ - hash_set (app_wrk->listeners_table, listen_session_get_handle (ls), - segment_manager_index (sm)); - - if (!ls->server_rx_fifo - && session_transport_service_type (ls) == TRANSPORT_SERVICE_CL) + app_namespace_t *app_ns; + if (vec_len (namespace_id) == 0) { - if (session_alloc_fifos (sm, ls)) - return -1; + /* Use default namespace */ + *app_ns_index = 0; + return 0; } + + *app_ns_index = app_namespace_index_from_id (namespace_id); + if (*app_ns_index == APP_NAMESPACE_INVALID_INDEX) + return VNET_API_ERROR_APP_INVALID_NS; + app_ns = app_namespace_get (*app_ns_index); + if (!app_ns) + return VNET_API_ERROR_APP_INVALID_NS; + if (app_ns->ns_secret != secret) + return VNET_API_ERROR_APP_WRONG_NS_SECRET; return 0; } +static u8 * +app_name_from_api_index (u32 api_client_index) +{ + vl_api_registration_t *regp; + regp = vl_api_client_index_to_registration (api_client_index); + if (regp) + return format (0, "%s%c", regp->name, 0); + + clib_warning ("api client index %u does not have an api registration!", + api_client_index); + return format (0, "unknown%c", 0); +} + +/** + * Attach application to vpp + * + * Allocates a vpp app, i.e., a structure that keeps back pointers + * to external app and a segment manager for shared memory fifo based + * communication with the external app. + */ int -app_worker_stop_listen (app_worker_t * app_wrk, session_handle_t handle) +vnet_application_attach (vnet_app_attach_args_t * a) { + svm_fifo_segment_private_t *fs; + application_t *app = 0; + app_worker_t *app_wrk; segment_manager_t *sm; - uword *sm_indexp; + u32 app_ns_index = 0; + u8 *app_name = 0; + u64 secret; + int rv; + + if (a->api_client_index != APP_INVALID_INDEX) + app = application_lookup (a->api_client_index); + else if (a->name) + app = application_lookup_name (a->name); + else + return VNET_API_ERROR_INVALID_VALUE; + + if (app) + return VNET_API_ERROR_APP_ALREADY_ATTACHED; - sm_indexp = hash_get (app_wrk->listeners_table, handle); - if (PREDICT_FALSE (!sm_indexp)) + if (a->api_client_index != APP_INVALID_INDEX) { - clib_warning ("listener handle was removed %llu!", handle); - return -1; + app_name = app_name_from_api_index (a->api_client_index); + a->name = app_name; + } + + secret = a->options[APP_OPTIONS_NAMESPACE_SECRET]; + if ((rv = app_validate_namespace (a->namespace_id, secret, &app_ns_index))) + return rv; + a->options[APP_OPTIONS_NAMESPACE] = app_ns_index; + + if ((rv = application_alloc_and_init ((app_init_args_t *) a))) + return rv; + + app = application_get (a->app_index); + if ((rv = application_alloc_worker_and_init (app, &app_wrk))) + return rv; + + a->app_evt_q = app_wrk->event_queue; + app_wrk->api_client_index = a->api_client_index; + sm = segment_manager_get (app_wrk->first_segment_manager); + fs = segment_manager_get_segment_w_lock (sm, 0); + + if (application_is_proxy (app)) + application_setup_proxy (app); + + ASSERT (vec_len (fs->ssvm.name) <= 128); + a->segment = &fs->ssvm; + a->segment_handle = segment_manager_segment_handle (sm, fs); + + segment_manager_segment_reader_unlock (sm); + vec_free (app_name); + return 0; +} + +/** + * Detach application from vpp + */ +int +vnet_application_detach (vnet_app_detach_args_t * a) +{ + application_t *app; + + app = application_get_if_valid (a->app_index); + if (!app) + { + clib_warning ("app not attached"); + return VNET_API_ERROR_APPLICATION_NOT_ATTACHED; + } + + app_interface_check_thread_and_barrier (vnet_application_detach, a); + application_detach_process (app, a->api_client_index); + return 0; +} + + +static u8 +session_endpoint_in_ns (session_endpoint_t * sep) +{ + u8 is_lep = session_endpoint_is_local (sep); + if (!is_lep && sep->sw_if_index != ENDPOINT_INVALID_INDEX + && !ip_interface_has_address (sep->sw_if_index, &sep->ip, sep->is_ip4)) + { + clib_warning ("sw_if_index %u not configured with ip %U", + sep->sw_if_index, format_ip46_address, &sep->ip, + sep->is_ip4); + return 0; } + return (is_lep || ip_is_local (sep->fib_index, &sep->ip, sep->is_ip4)); +} + +static void +session_endpoint_update_for_app (session_endpoint_cfg_t * sep, + application_t * app, u8 is_connect) +{ + app_namespace_t *app_ns; + u32 ns_index, fib_index; + + ns_index = app->ns_index; - sm = segment_manager_get (*sm_indexp); - if (app_wrk->first_segment_manager == *sm_indexp) + /* App is a transport proto, so fetch the calling app's ns */ + if (app->flags & APP_OPTIONS_FLAGS_IS_TRANSPORT_APP) { - /* Delete sessions but don't remove segment manager */ - app_wrk->first_segment_manager_in_use = 0; - segment_manager_del_sessions (sm); + app_worker_t *owner_wrk; + application_t *owner_app; + + owner_wrk = app_worker_get (sep->app_wrk_index); + owner_app = application_get (owner_wrk->app_index); + ns_index = owner_app->ns_index; } + app_ns = app_namespace_get (ns_index); + if (!app_ns) + return; + + /* Ask transport and network to bind to/connect using local interface + * that "supports" app's namespace. This will fix our local connection + * endpoint. + */ + + /* If in default namespace and user requested a fib index use it */ + if (ns_index == 0 && sep->fib_index != ENDPOINT_INVALID_INDEX) + fib_index = sep->fib_index; else + fib_index = sep->is_ip4 ? app_ns->ip4_fib_index : app_ns->ip6_fib_index; + sep->peer.fib_index = fib_index; + sep->fib_index = fib_index; + + if (!is_connect) { - segment_manager_init_del (sm); + sep->sw_if_index = app_ns->sw_if_index; } - hash_unset (app_wrk->listeners_table, handle); + else + { + if (app_ns->sw_if_index != APP_NAMESPACE_INVALID_INDEX + && sep->peer.sw_if_index != ENDPOINT_INVALID_INDEX + && sep->peer.sw_if_index != app_ns->sw_if_index) + clib_warning ("Local sw_if_index different from app ns sw_if_index"); - return 0; + sep->peer.sw_if_index = app_ns->sw_if_index; + } } int -app_worker_own_session (app_worker_t * app_wrk, stream_session_t * s) +vnet_listen (vnet_listen_args_t * a) { - segment_manager_t *sm; - svm_fifo_t *rxf, *txf; - - s->app_wrk_index = app_wrk->wrk_index; + app_listener_t *app_listener; + app_worker_t *app_wrk; + application_t *app; + int rv; - rxf = s->server_rx_fifo; - txf = s->server_tx_fifo; + app = application_get_if_valid (a->app_index); + if (!app) + return VNET_API_ERROR_APPLICATION_NOT_ATTACHED; - if (!rxf || !txf) - return 0; + app_wrk = application_get_worker (app, a->wrk_map_index); + if (!app_wrk) + return VNET_API_ERROR_INVALID_VALUE; - s->server_rx_fifo = 0; - s->server_tx_fifo = 0; + a->sep_ext.app_wrk_index = app_wrk->wrk_index; - sm = app_worker_get_or_alloc_connect_segment_manager (app_wrk); - if (session_alloc_fifos (sm, s)) - return -1; + session_endpoint_update_for_app (&a->sep_ext, app, 0 /* is_connect */ ); + if (!session_endpoint_in_ns (&a->sep)) + return VNET_API_ERROR_INVALID_VALUE_2; - if (!svm_fifo_is_empty (rxf)) + /* + * Check if we already have an app listener + */ + app_listener = app_listener_lookup (app, &a->sep_ext); + if (app_listener) { - clib_memcpy_fast (s->server_rx_fifo->data, rxf->data, rxf->nitems); - s->server_rx_fifo->head = rxf->head; - s->server_rx_fifo->tail = rxf->tail; - s->server_rx_fifo->cursize = rxf->cursize; + if (app_listener->app_index != app->app_index) + return VNET_API_ERROR_ADDRESS_IN_USE; + if (app_worker_start_listen (app_wrk, app_listener)) + return -1; + a->handle = app_listener_handle (app_listener); + return 0; } - if (!svm_fifo_is_empty (txf)) + /* + * Create new app listener + */ + if ((rv = app_listener_alloc_and_init (app, &a->sep_ext, &app_listener))) + return rv; + + if ((rv = app_worker_start_listen (app_wrk, app_listener))) { - clib_memcpy_fast (s->server_tx_fifo->data, txf->data, txf->nitems); - s->server_tx_fifo->head = txf->head; - s->server_tx_fifo->tail = txf->tail; - s->server_tx_fifo->cursize = txf->cursize; + app_listener_cleanup (app_listener); + return rv; } - segment_manager_dealloc_fifos (rxf->segment_index, rxf, txf); - + a->handle = app_listener_handle (app_listener); return 0; } -/** - * Start listening local transport endpoint for requested transport. - * - * Creates a 'dummy' stream session with state LISTENING to be used in session - * lookups, prior to establishing connection. Requests transport to build - * it's own specific listening connection. - */ int -application_start_listen (application_t * app, - session_endpoint_cfg_t * sep_ext, - session_handle_t * res) +vnet_connect (vnet_connect_args_t * a) { - app_listener_t *app_listener; - u32 table_index, fib_proto; - session_endpoint_t *sep; - app_worker_t *app_wrk; - stream_session_t *ls; - session_handle_t lh; - session_type_t sst; + app_worker_t *server_wrk, *client_wrk; + application_t *client; + local_session_t *ll; + app_listener_t *al; + u32 table_index; + session_t *ls; + u8 fib_proto; + u64 lh; + + if (session_endpoint_is_zero (&a->sep)) + return VNET_API_ERROR_INVALID_VALUE; + + client = application_get (a->app_index); + session_endpoint_update_for_app (&a->sep_ext, client, 1 /* is_connect */ ); + client_wrk = application_get_worker (client, a->wrk_map_index); /* - * Check if sep is already listened on + * First check the local scope for locally attached destinations. + * If we have local scope, we pass *all* connects through it since we may + * have special policy rules even for non-local destinations, think proxy. */ - sep = (session_endpoint_t *) sep_ext; - fib_proto = session_endpoint_fib_proto (sep); - table_index = application_session_table (app, fib_proto); - lh = session_lookup_endpoint_listener (table_index, sep, 1); - if (lh != SESSION_INVALID_HANDLE) + if (application_has_local_scope (client)) { - ls = listen_session_get_from_handle (lh); - if (ls->app_index != app->app_index) - return VNET_API_ERROR_ADDRESS_IN_USE; + table_index = application_local_session_table (client); + lh = session_lookup_local_endpoint (table_index, &a->sep); + if (lh == SESSION_DROP_HANDLE) + return VNET_API_ERROR_APP_CONNECT_FILTERED; - app_wrk = app_worker_get (sep_ext->app_wrk_index); - if (ls->app_wrk_index == app_wrk->wrk_index) - return VNET_API_ERROR_ADDRESS_IN_USE; + if (lh == SESSION_INVALID_HANDLE) + goto global_scope; - if (app_worker_start_listen (app_wrk, ls)) - return -1; + ll = application_get_local_listener_w_handle (lh); + al = app_listener_get_w_session ((session_t *) ll); - app_listener = app_listener_get (app, ls->listener_db_index); - app_listener->workers = clib_bitmap_set (app_listener->workers, - app_wrk->wrk_map_index, 1); + /* + * Break loop if rule in local table points to connecting app. This + * can happen if client is a generic proxy. Route connect through + * global table instead. + */ + if (al->app_index == a->app_index) + goto global_scope; - *res = listen_session_get_handle (ls); - return 0; + server_wrk = app_listener_select_worker (al); + return app_worker_local_session_connect (client_wrk, server_wrk, ll, + a->api_context); } /* - * Allocate new listener for application + * If nothing found, check the global scope for locally attached + * destinations. Make sure first that we're allowed to. */ - sst = session_type_from_proto_and_ip (sep_ext->transport_proto, - sep_ext->is_ip4); - ls = listen_session_new (0, sst); - ls->app_index = app->app_index; - lh = listen_session_get_handle (ls); - if (session_listen (ls, sep_ext)) - goto err; +global_scope: + if (session_endpoint_is_local (&a->sep)) + return VNET_API_ERROR_SESSION_CONNECT; - ls = listen_session_get_from_handle (lh); - app_listener = app_listener_alloc (app); - ls->listener_db_index = app_listener->al_index; + if (!application_has_global_scope (client)) + return VNET_API_ERROR_APP_CONNECT_SCOPE; + + fib_proto = session_endpoint_fib_proto (&a->sep); + table_index = application_session_table (client, fib_proto); + ls = session_lookup_listener (table_index, &a->sep); + if (ls) + { + al = app_listener_get_w_session (ls); + server_wrk = app_listener_select_worker (al); + ll = (local_session_t *) ls; + return app_worker_local_session_connect (client_wrk, server_wrk, ll, + a->api_context); + } /* - * Setup app worker as a listener + * Not connecting to a local server, propagate to transport */ - app_wrk = app_worker_get (sep_ext->app_wrk_index); - ls->app_wrk_index = app_wrk->wrk_index; - if (app_worker_start_listen (app_wrk, ls)) - goto err; - app_listener->workers = clib_bitmap_set (app_listener->workers, - app_wrk->wrk_map_index, 1); - - *res = lh; + if (app_worker_connect_session (client_wrk, &a->sep, a->api_context)) + return VNET_API_ERROR_SESSION_CONNECT; return 0; - -err: - listen_session_del (ls); - return -1; } -/** - * Stop listening on session associated to handle - * - * @param handle listener handle - * @param app_index index of the app owning the handle. - * @param app_wrk_index index of the worker requesting the stop - */ int -application_stop_listen (u32 app_index, u32 app_wrk_index, - session_handle_t handle) +vnet_unlisten (vnet_unlisten_args_t * a) { - app_listener_t *app_listener; - stream_session_t *listener; app_worker_t *app_wrk; + app_listener_t *al; application_t *app; - listener = listen_session_get_from_handle (handle); - app = application_get (app_index); - if (PREDICT_FALSE (!app || app->app_index != listener->app_index)) - { - clib_warning ("app doesn't own handle %llu!", handle); - return -1; - } + if (!(app = application_get_if_valid (a->app_index))) + return VNET_API_ERROR_APPLICATION_NOT_ATTACHED; - app_listener = app_listener_get (app, listener->listener_db_index); - if (!clib_bitmap_get (app_listener->workers, app_wrk_index)) + al = app_listener_get_w_handle (a->handle); + if (al->app_index != app->app_index) { - clib_warning ("worker %u not listening on handle %lu", app_wrk_index, - handle); - return 0; + clib_warning ("app doesn't own handle %llu!", a->handle); + return -1; } - app_wrk = application_get_worker (app, app_wrk_index); - app_worker_stop_listen (app_wrk, handle); - clib_bitmap_set_no_check (app_listener->workers, app_wrk_index, 0); - - if (clib_bitmap_is_zero (app_listener->workers)) + app_wrk = application_get_worker (app, a->wrk_map_index); + if (!app_wrk) { - session_stop_listen (listener); - app_listener_free (app, app_listener); - listen_session_del (listener); + clib_warning ("no app %u worker %u", app->app_index, a->wrk_map_index); + return -1; } - return 0; + return app_worker_stop_listen (app_wrk, al); } int -app_worker_open_session (app_worker_t * app, session_endpoint_t * sep, - u32 api_context) +vnet_disconnect_session (vnet_disconnect_args_t * a) { - int rv; + if (session_handle_is_local (a->handle)) + { + local_session_t *ls; - /* Make sure we have a segment manager for connects */ - app_worker_alloc_connects_segment_manager (app); + /* Disconnect reply came to worker 1 not main thread */ + app_interface_check_thread_and_barrier (vnet_disconnect_session, a); - if ((rv = session_open (app->wrk_index, sep, api_context))) - return rv; + if (!(ls = app_worker_get_local_session_from_handle (a->handle))) + return 0; - return 0; -} + return app_worker_local_session_disconnect (a->app_index, ls); + } + else + { + app_worker_t *app_wrk; + session_t *s; -int -app_worker_alloc_connects_segment_manager (app_worker_t * app_wrk) -{ - segment_manager_t *sm; + s = session_get_from_handle_if_valid (a->handle); + if (!s) + return VNET_API_ERROR_INVALID_VALUE; + app_wrk = app_worker_get (s->app_wrk_index); + if (app_wrk->app_index != a->app_index) + return VNET_API_ERROR_INVALID_VALUE; - if (app_wrk->connects_seg_manager == APP_INVALID_SEGMENT_MANAGER_INDEX) - { - sm = app_worker_alloc_segment_manager (app_wrk); - if (sm == 0) - return -1; - app_wrk->connects_seg_manager = segment_manager_index (sm); + /* We're peeking into another's thread pool. Make sure */ + ASSERT (s->session_index == session_index_from_handle (a->handle)); + + session_close (s); } return 0; } -segment_manager_t * -app_worker_get_connect_segment_manager (app_worker_t * app) -{ - ASSERT (app->connects_seg_manager != (u32) ~ 0); - return segment_manager_get (app->connects_seg_manager); -} - -segment_manager_t * -app_worker_get_or_alloc_connect_segment_manager (app_worker_t * app_wrk) +int +application_change_listener_owner (session_t * s, app_worker_t * app_wrk) { - if (app_wrk->connects_seg_manager == (u32) ~ 0) - app_worker_alloc_connects_segment_manager (app_wrk); - return segment_manager_get (app_wrk->connects_seg_manager); -} + app_worker_t *old_wrk = app_worker_get (s->app_wrk_index); + app_listener_t *app_listener; + application_t *app; -segment_manager_t * -app_worker_get_listen_segment_manager (app_worker_t * app, - stream_session_t * listener) -{ - uword *smp; - smp = hash_get (app->listeners_table, listen_session_get_handle (listener)); - ASSERT (smp != 0); - return segment_manager_get (*smp); -} + if (!old_wrk) + return -1; -clib_error_t * -vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) -{ - svm_fifo_segment_private_t *fs; - app_worker_map_t *wrk_map; - app_worker_t *app_wrk; - segment_manager_t *sm; - application_t *app; - int rv; + hash_unset (old_wrk->listeners_table, listen_session_get_handle (s)); + if (session_transport_service_type (s) == TRANSPORT_SERVICE_CL + && s->rx_fifo) + segment_manager_dealloc_fifos (s->rx_fifo->segment_index, s->rx_fifo, + s->tx_fifo); - app = application_get (a->app_index); + app = application_get (old_wrk->app_index); if (!app) - return clib_error_return_code (0, VNET_API_ERROR_INVALID_VALUE, 0, - "App %u does not exist", a->app_index); + return -1; - if (a->is_add) - { - if ((rv = app_worker_alloc_and_init (app, &app_wrk))) - return clib_error_return_code (0, rv, 0, "app wrk init: %d", rv); + app_listener = app_listener_get (app, s->al_index); - /* Map worker api index to the app */ - app_wrk->api_client_index = a->api_client_index; - application_api_table_add (app->app_index, a->api_client_index); + /* Only remove from lb for now */ + app_listener->workers = clib_bitmap_set (app_listener->workers, + old_wrk->wrk_map_index, 0); - sm = segment_manager_get (app_wrk->first_segment_manager); - fs = segment_manager_get_segment_w_lock (sm, 0); - a->segment = &fs->ssvm; - a->segment_handle = segment_manager_segment_handle (sm, fs); - segment_manager_segment_reader_unlock (sm); - a->evt_q = app_wrk->event_queue; - a->wrk_map_index = app_wrk->wrk_map_index; - } - else - { - wrk_map = app_worker_map_get (app, a->wrk_map_index); - if (!wrk_map) - return clib_error_return_code (0, VNET_API_ERROR_INVALID_VALUE, 0, - "App %u does not have worker %u", - app->app_index, a->wrk_map_index); - app_wrk = app_worker_get (wrk_map->wrk_index); - if (!app_wrk) - return clib_error_return_code (0, VNET_API_ERROR_INVALID_VALUE, 0, - "No worker %u", a->wrk_map_index); - application_api_table_del (app_wrk->api_client_index); - app_worker_free (app_wrk); - app_worker_map_free (app, wrk_map); - if (application_n_workers (app) == 0) - application_free (app); - } - return 0; -} + if (app_worker_start_listen (app_wrk, app_listener)) + return -1; -segment_manager_t * -application_get_local_segment_manager (app_worker_t * app) -{ - return segment_manager_get (app->local_segment_manager); -} + s->app_wrk_index = app_wrk->wrk_index; -segment_manager_t * -application_get_local_segment_manager_w_session (app_worker_t * app, - local_session_t * ls) -{ - stream_session_t *listener; - if (application_local_session_listener_has_transport (ls)) - { - listener = listen_session_get (ls->listener_index); - return app_worker_get_listen_segment_manager (app, listener); - } - return segment_manager_get (app->local_segment_manager); + return 0; } int @@ -1056,78 +1256,6 @@ application_use_mq_for_ctrl (application_t * app) return app->flags & APP_OPTIONS_FLAGS_USE_MQ_FOR_CTRL_MSGS; } -/** - * Send an API message to the external app, to map new segment - */ -int -app_worker_add_segment_notify (u32 app_wrk_index, u64 segment_handle) -{ - app_worker_t *app_wrk = app_worker_get (app_wrk_index); - application_t *app = application_get (app_wrk->app_index); - return app->cb_fns.add_segment_callback (app_wrk->api_client_index, - segment_handle); -} - -u32 -application_n_listeners (app_worker_t * app) -{ - return hash_elts (app->listeners_table); -} - -stream_session_t * -app_worker_first_listener (app_worker_t * app, u8 fib_proto, - u8 transport_proto) -{ - stream_session_t *listener; - u64 handle; - u32 sm_index; - u8 sst; - - sst = session_type_from_proto_and_ip (transport_proto, - fib_proto == FIB_PROTOCOL_IP4); - - /* *INDENT-OFF* */ - hash_foreach (handle, sm_index, app->listeners_table, ({ - listener = listen_session_get_from_handle (handle); - if (listener->session_type == sst - && listener->enqueue_epoch != SESSION_PROXY_LISTENER_INDEX) - return listener; - })); - /* *INDENT-ON* */ - - return 0; -} - -u8 -app_worker_application_is_builtin (app_worker_t * app_wrk) -{ - return app_wrk->app_is_builtin; -} - -stream_session_t * -application_proxy_listener (app_worker_t * app, u8 fib_proto, - u8 transport_proto) -{ - stream_session_t *listener; - u64 handle; - u32 sm_index; - u8 sst; - - sst = session_type_from_proto_and_ip (transport_proto, - fib_proto == FIB_PROTOCOL_IP4); - - /* *INDENT-OFF* */ - hash_foreach (handle, sm_index, app->listeners_table, ({ - listener = listen_session_get_from_handle (handle); - if (listener->session_type == sst - && listener->enqueue_epoch == SESSION_PROXY_LISTENER_INDEX) - return listener; - })); - /* *INDENT-ON* */ - - return 0; -} - static clib_error_t * application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto, u8 transport_proto, u8 is_start) @@ -1137,8 +1265,9 @@ application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto, session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; transport_connection_t *tc; app_worker_t *app_wrk; - stream_session_t *s; - u64 handle; + app_listener_t *al; + session_t *s; + u32 flags; /* TODO decide if we want proxy to be enabled for all workers */ app_wrk = application_get_default_worker (app); @@ -1152,14 +1281,21 @@ application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto, sep.sw_if_index = app_ns->sw_if_index; sep.transport_proto = transport_proto; sep.app_wrk_index = app_wrk->wrk_index; /* only default */ - application_start_listen (app, &sep, &handle); - s = listen_session_get_from_handle (handle); + + /* force global scope listener */ + flags = app->flags; + app->flags &= ~APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE; + app_listener_alloc_and_init (app, &sep, &al); + app->flags = flags; + + app_worker_start_listen (app_wrk, al); + s = listen_session_get (al->session_index); s->enqueue_epoch = SESSION_PROXY_LISTENER_INDEX; } } else { - s = application_proxy_listener (app_wrk, fib_proto, transport_proto); + s = app_worker_proxy_listener (app_wrk, fib_proto, transport_proto); ASSERT (s); } @@ -1273,707 +1409,8 @@ application_get_segment_manager_properties (u32 app_index) return &app->sm_properties; } -static inline int -app_enqueue_evt (svm_msg_q_t * mq, svm_msg_q_msg_t * msg, u8 lock) -{ - if (PREDICT_FALSE (svm_msg_q_is_full (mq))) - { - clib_warning ("evt q full"); - svm_msg_q_free_msg (mq, msg); - if (lock) - svm_msg_q_unlock (mq); - return -1; - } - - if (lock) - { - svm_msg_q_add_and_unlock (mq, msg); - return 0; - } - - /* Even when not locking the ring, we must wait for queue mutex */ - if (svm_msg_q_add (mq, msg, SVM_Q_WAIT)) - { - clib_warning ("msg q add returned"); - return -1; - } - return 0; -} - -static inline int -app_send_io_evt_rx (app_worker_t * app_wrk, stream_session_t * s, u8 lock) -{ - session_event_t *evt; - svm_msg_q_msg_t msg; - svm_msg_q_t *mq; - - if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY - && s->session_state != SESSION_STATE_LISTENING)) - { - /* Session is closed so app will never clean up. Flush rx fifo */ - if (s->session_state == SESSION_STATE_CLOSED) - svm_fifo_dequeue_drop_all (s->server_rx_fifo); - return 0; - } - - if (app_worker_application_is_builtin (app_wrk)) - { - application_t *app = application_get (app_wrk->app_index); - return app->cb_fns.builtin_app_rx_callback (s); - } - - if (svm_fifo_has_event (s->server_rx_fifo) - || svm_fifo_is_empty (s->server_rx_fifo)) - return 0; - - mq = app_wrk->event_queue; - if (lock) - svm_msg_q_lock (mq); - - if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) - { - clib_warning ("evt q rings full"); - if (lock) - svm_msg_q_unlock (mq); - return -1; - } - - msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); - ASSERT (!svm_msg_q_msg_is_invalid (&msg)); - - evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); - evt->fifo = s->server_rx_fifo; - evt->event_type = FIFO_EVENT_APP_RX; - - (void) svm_fifo_set_event (s->server_rx_fifo); - - if (app_enqueue_evt (mq, &msg, lock)) - return -1; - return 0; -} - -static inline int -app_send_io_evt_tx (app_worker_t * app_wrk, stream_session_t * s, u8 lock) -{ - svm_msg_q_t *mq; - session_event_t *evt; - svm_msg_q_msg_t msg; - - if (app_worker_application_is_builtin (app_wrk)) - return 0; - - mq = app_wrk->event_queue; - if (lock) - svm_msg_q_lock (mq); - - if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) - { - clib_warning ("evt q rings full"); - if (lock) - svm_msg_q_unlock (mq); - return -1; - } - - msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); - ASSERT (!svm_msg_q_msg_is_invalid (&msg)); - - evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); - evt->event_type = FIFO_EVENT_APP_TX; - evt->fifo = s->server_tx_fifo; - - return app_enqueue_evt (mq, &msg, lock); -} - -/* *INDENT-OFF* */ -typedef int (app_send_evt_handler_fn) (app_worker_t *app, - stream_session_t *s, - u8 lock); -static app_send_evt_handler_fn * const app_send_evt_handler_fns[3] = { - app_send_io_evt_rx, - 0, - app_send_io_evt_tx, -}; -/* *INDENT-ON* */ - -/** - * Send event to application - * - * Logic from queue perspective is non-blocking. If there's - * not enough space to enqueue a message, we return. - */ -int -app_worker_send_event (app_worker_t * app, stream_session_t * s, u8 evt_type) -{ - ASSERT (app && evt_type <= FIFO_EVENT_APP_TX); - return app_send_evt_handler_fns[evt_type] (app, s, 0 /* lock */ ); -} - -/** - * Send event to application - * - * Logic from queue perspective is blocking. However, if queue is full, - * we return. - */ -int -app_worker_lock_and_send_event (app_worker_t * app, stream_session_t * s, - u8 evt_type) -{ - return app_send_evt_handler_fns[evt_type] (app, s, 1 /* lock */ ); -} - -local_session_t * -application_local_session_alloc (app_worker_t * app_wrk) -{ - local_session_t *s; - pool_get (app_wrk->local_sessions, s); - clib_memset (s, 0, sizeof (*s)); - s->app_wrk_index = app_wrk->wrk_index; - s->session_index = s - app_wrk->local_sessions; - s->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, 0); - return s; -} - -void -application_local_session_free (app_worker_t * app, local_session_t * s) -{ - pool_put (app->local_sessions, s); - if (CLIB_DEBUG) - clib_memset (s, 0xfc, sizeof (*s)); -} - -local_session_t * -application_get_local_session (app_worker_t * app_wrk, u32 session_index) -{ - if (pool_is_free_index (app_wrk->local_sessions, session_index)) - return 0; - return pool_elt_at_index (app_wrk->local_sessions, session_index); -} - -local_session_t * -application_get_local_session_from_handle (session_handle_t handle) -{ - app_worker_t *server_wrk; - u32 session_index, server_wrk_index; - local_session_parse_handle (handle, &server_wrk_index, &session_index); - server_wrk = app_worker_get_if_valid (server_wrk_index); - if (!server_wrk) - return 0; - return application_get_local_session (server_wrk, session_index); -} - -local_session_t * -application_local_listen_session_alloc (application_t * app) -{ - local_session_t *ll; - pool_get (app->local_listen_sessions, ll); - clib_memset (ll, 0, sizeof (*ll)); - return ll; -} - -u32 -application_local_listener_index (application_t * app, local_session_t * ll) -{ - return (ll - app->local_listen_sessions); -} - -void -application_local_listen_session_free (application_t * app, - local_session_t * ll) -{ - pool_put (app->local_listen_sessions, ll); - if (CLIB_DEBUG) - clib_memset (ll, 0xfb, sizeof (*ll)); -} - -int -application_start_local_listen (application_t * app, - session_endpoint_cfg_t * sep_ext, - session_handle_t * handle) -{ - app_listener_t *app_listener; - session_endpoint_t *sep; - app_worker_t *app_wrk; - session_handle_t lh; - local_session_t *ll; - u32 table_index; - - sep = (session_endpoint_t *) sep_ext; - table_index = application_local_session_table (app); - app_wrk = app_worker_get (sep_ext->app_wrk_index); - - /* An exact sep match, as opposed to session_lookup_local_listener */ - lh = session_lookup_endpoint_listener (table_index, sep, 1); - if (lh != SESSION_INVALID_HANDLE) - { - ll = application_get_local_listener_w_handle (lh); - if (ll->app_index != app->app_index) - return VNET_API_ERROR_ADDRESS_IN_USE; - - if (ll->app_wrk_index == app_wrk->wrk_index) - return VNET_API_ERROR_ADDRESS_IN_USE; - - app_listener = app_local_listener_get (app, ll->listener_db_index); - app_listener->workers = clib_bitmap_set (app_listener->workers, - app_wrk->wrk_map_index, 1); - *handle = application_local_session_handle (ll); - return 0; - } - - ll = application_local_listen_session_alloc (app); - ll->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, 0); - ll->app_wrk_index = app_wrk->app_index; - ll->session_index = application_local_listener_index (app, ll); - ll->port = sep_ext->port; - /* Store the original session type for the unbind */ - ll->listener_session_type = - session_type_from_proto_and_ip (sep_ext->transport_proto, - sep_ext->is_ip4); - ll->transport_listener_index = ~0; - ll->app_index = app->app_index; - - app_listener = app_local_listener_alloc (app); - ll->listener_db_index = app_listener->al_index; - app_listener->workers = clib_bitmap_set (app_listener->workers, - app_wrk->wrk_map_index, 1); - - *handle = application_local_session_handle (ll); - session_lookup_add_session_endpoint (table_index, sep, *handle); - - return 0; -} - -/** - * Clean up local session table. If we have a listener session use it to - * find the port and proto. If not, the handle must be a local table handle - * so parse it. - */ -int -application_stop_local_listen (u32 app_index, u32 wrk_map_index, - session_handle_t lh) -{ - session_endpoint_t sep = SESSION_ENDPOINT_NULL; - u32 table_index, ll_index, server_index; - app_listener_t *app_listener; - app_worker_t *server_wrk; - stream_session_t *sl = 0; - local_session_t *ll, *ls; - application_t *server; - - server = application_get (app_index); - table_index = application_local_session_table (server); - - /* We have both local and global table binds. Figure from global what - * the sep we should be cleaning up is. - */ - if (!session_handle_is_local (lh)) - { - sl = listen_session_get_from_handle (lh); - if (!sl || listen_session_get_local_session_endpoint (sl, &sep)) - { - clib_warning ("broken listener"); - return -1; - } - lh = session_lookup_endpoint_listener (table_index, &sep, 0); - if (lh == SESSION_INVALID_HANDLE) - return -1; - } - - local_session_parse_handle (lh, &server_index, &ll_index); - if (PREDICT_FALSE (server_index != app_index)) - { - clib_warning ("app %u does not own local handle 0x%lx", app_index, lh); - return -1; - } - - ll = application_get_local_listen_session (server, ll_index); - if (PREDICT_FALSE (!ll)) - { - clib_warning ("no local listener"); - return -1; - } - - app_listener = app_local_listener_get (server, ll->listener_db_index); - if (!clib_bitmap_get (app_listener->workers, wrk_map_index)) - { - clib_warning ("app wrk %u not listening on handle %lu", wrk_map_index, - lh); - return -1; - } - - server_wrk = application_get_worker (server, wrk_map_index); - /* *INDENT-OFF* */ - pool_foreach (ls, server_wrk->local_sessions, ({ - if (ls->listener_index == ll->session_index) - application_local_session_disconnect (server_wrk->app_index, ls); - })); - /* *INDENT-ON* */ - - clib_bitmap_set_no_check (app_listener->workers, wrk_map_index, 0); - if (clib_bitmap_is_zero (app_listener->workers)) - { - app_local_listener_free (server, app_listener); - application_local_listener_session_endpoint (ll, &sep); - session_lookup_del_session_endpoint (table_index, &sep); - application_local_listen_session_free (server, ll); - } - - return 0; -} - -static void -application_local_session_fix_eventds (svm_msg_q_t * sq, svm_msg_q_t * cq) -{ - int fd; - - /* - * segment manager initializes only the producer eventds, since vpp is - * typically the producer. But for local sessions, we also pass to the - * apps the mqs they listen on for events from peer apps, so they are also - * consumer fds. - */ - fd = svm_msg_q_get_producer_eventfd (sq); - svm_msg_q_set_consumer_eventfd (sq, fd); - fd = svm_msg_q_get_producer_eventfd (cq); - svm_msg_q_set_consumer_eventfd (cq, fd); -} - -int -application_local_session_connect (app_worker_t * client_wrk, - app_worker_t * server_wrk, - local_session_t * ll, u32 opaque) -{ - u32 seg_size, evt_q_sz, evt_q_elts, margin = 16 << 10; - u32 round_rx_fifo_sz, round_tx_fifo_sz, sm_index; - segment_manager_properties_t *props, *cprops; - int rv, has_transport, seg_index; - svm_fifo_segment_private_t *seg; - application_t *server, *client; - segment_manager_t *sm; - local_session_t *ls; - svm_msg_q_t *sq, *cq; - u64 segment_handle; - - ls = application_local_session_alloc (server_wrk); - server = application_get (server_wrk->app_index); - client = application_get (client_wrk->app_index); - - props = application_segment_manager_properties (server); - cprops = application_segment_manager_properties (client); - evt_q_elts = props->evt_q_size + cprops->evt_q_size; - evt_q_sz = segment_manager_evt_q_expected_size (evt_q_elts); - round_rx_fifo_sz = 1 << max_log2 (props->rx_fifo_size); - round_tx_fifo_sz = 1 << max_log2 (props->tx_fifo_size); - seg_size = round_rx_fifo_sz + round_tx_fifo_sz + evt_q_sz + margin; - - has_transport = session_has_transport ((stream_session_t *) ll); - if (!has_transport) - { - /* Local sessions don't have backing transport */ - ls->port = ll->port; - sm = application_get_local_segment_manager (server_wrk); - } - else - { - stream_session_t *sl = (stream_session_t *) ll; - transport_connection_t *tc; - tc = listen_session_get_transport (sl); - ls->port = tc->lcl_port; - sm = app_worker_get_listen_segment_manager (server_wrk, sl); - } - - seg_index = segment_manager_add_segment (sm, seg_size); - if (seg_index < 0) - { - clib_warning ("failed to add new cut-through segment"); - return seg_index; - } - seg = segment_manager_get_segment_w_lock (sm, seg_index); - sq = segment_manager_alloc_queue (seg, props); - cq = segment_manager_alloc_queue (seg, cprops); - - if (props->use_mq_eventfd) - application_local_session_fix_eventds (sq, cq); - - ls->server_evt_q = pointer_to_uword (sq); - ls->client_evt_q = pointer_to_uword (cq); - rv = segment_manager_try_alloc_fifos (seg, props->rx_fifo_size, - props->tx_fifo_size, - &ls->server_rx_fifo, - &ls->server_tx_fifo); - if (rv) - { - clib_warning ("failed to add fifos in cut-through segment"); - segment_manager_segment_reader_unlock (sm); - goto failed; - } - sm_index = segment_manager_index (sm); - ls->server_rx_fifo->ct_session_index = ls->session_index; - ls->server_tx_fifo->ct_session_index = ls->session_index; - ls->server_rx_fifo->segment_manager = sm_index; - ls->server_tx_fifo->segment_manager = sm_index; - ls->server_rx_fifo->segment_index = seg_index; - ls->server_tx_fifo->segment_index = seg_index; - ls->svm_segment_index = seg_index; - ls->listener_index = ll->session_index; - ls->client_wrk_index = client_wrk->wrk_index; - ls->client_opaque = opaque; - ls->listener_session_type = ll->session_type; - ls->session_state = SESSION_STATE_READY; - - segment_handle = segment_manager_segment_handle (sm, seg); - if ((rv = server->cb_fns.add_segment_callback (server_wrk->api_client_index, - segment_handle))) - { - clib_warning ("failed to notify server of new segment"); - segment_manager_segment_reader_unlock (sm); - goto failed; - } - segment_manager_segment_reader_unlock (sm); - if ((rv = server->cb_fns.session_accept_callback ((stream_session_t *) ls))) - { - clib_warning ("failed to send accept cut-through notify to server"); - goto failed; - } - if (server->flags & APP_OPTIONS_FLAGS_IS_BUILTIN) - application_local_session_connect_notify (ls); - - return 0; - -failed: - if (!has_transport) - segment_manager_del_segment (sm, seg); - return rv; -} - -static u64 -application_client_local_connect_key (local_session_t * ls) -{ - return (((u64) ls->app_wrk_index) << 32 | (u64) ls->session_index); -} - -static void -application_client_local_connect_key_parse (u64 key, u32 * app_wrk_index, - u32 * session_index) -{ - *app_wrk_index = key >> 32; - *session_index = key & 0xFFFFFFFF; -} - -int -application_local_session_connect_notify (local_session_t * ls) -{ - svm_fifo_segment_private_t *seg; - app_worker_t *client_wrk, *server_wrk; - segment_manager_t *sm; - application_t *client; - int rv, is_fail = 0; - u64 segment_handle; - u64 client_key; - - client_wrk = app_worker_get (ls->client_wrk_index); - server_wrk = app_worker_get (ls->app_wrk_index); - client = application_get (client_wrk->app_index); - - sm = application_get_local_segment_manager_w_session (server_wrk, ls); - seg = segment_manager_get_segment_w_lock (sm, ls->svm_segment_index); - segment_handle = segment_manager_segment_handle (sm, seg); - if ((rv = client->cb_fns.add_segment_callback (client_wrk->api_client_index, - segment_handle))) - { - clib_warning ("failed to notify client %u of new segment", - ls->client_wrk_index); - segment_manager_segment_reader_unlock (sm); - application_local_session_disconnect (ls->client_wrk_index, ls); - is_fail = 1; - } - else - { - segment_manager_segment_reader_unlock (sm); - } - - client->cb_fns.session_connected_callback (client_wrk->wrk_index, - ls->client_opaque, - (stream_session_t *) ls, - is_fail); - - client_key = application_client_local_connect_key (ls); - hash_set (client_wrk->local_connects, client_key, client_key); - return 0; -} - -int -application_local_session_cleanup (app_worker_t * client_wrk, - app_worker_t * server_wrk, - local_session_t * ls) -{ - svm_fifo_segment_private_t *seg; - stream_session_t *listener; - segment_manager_t *sm; - u64 client_key; - u8 has_transport; - - /* Retrieve listener transport type as it is the one that decides where - * the fifos are allocated */ - has_transport = application_local_session_listener_has_transport (ls); - if (!has_transport) - sm = application_get_local_segment_manager_w_session (server_wrk, ls); - else - { - listener = listen_session_get (ls->listener_index); - sm = app_worker_get_listen_segment_manager (server_wrk, listener); - } - - seg = segment_manager_get_segment (sm, ls->svm_segment_index); - if (client_wrk) - { - client_key = application_client_local_connect_key (ls); - hash_unset (client_wrk->local_connects, client_key); - } - - if (!has_transport) - { - application_t *server = application_get (server_wrk->app_index); - u64 segment_handle = segment_manager_segment_handle (sm, seg); - server->cb_fns.del_segment_callback (server_wrk->api_client_index, - segment_handle); - if (client_wrk) - { - application_t *client = application_get (client_wrk->app_index); - client->cb_fns.del_segment_callback (client_wrk->api_client_index, - segment_handle); - } - segment_manager_del_segment (sm, seg); - } - - application_local_session_free (server_wrk, ls); - - return 0; -} - -int -application_local_session_disconnect (u32 app_index, local_session_t * ls) -{ - app_worker_t *client_wrk, *server_wrk; - u8 is_server = 0, is_client = 0; - application_t *app; - - app = application_get_if_valid (app_index); - if (!app) - return 0; - - client_wrk = app_worker_get_if_valid (ls->client_wrk_index); - server_wrk = app_worker_get (ls->app_wrk_index); - - if (server_wrk->app_index == app_index) - is_server = 1; - else if (client_wrk && client_wrk->app_index == app_index) - is_client = 1; - - if (!is_server && !is_client) - { - clib_warning ("app %u is neither client nor server for session 0x%lx", - app_index, application_local_session_handle (ls)); - return VNET_API_ERROR_INVALID_VALUE; - } - - if (ls->session_state == SESSION_STATE_CLOSED) - return application_local_session_cleanup (client_wrk, server_wrk, ls); - - if (app_index == ls->client_wrk_index) - { - mq_send_local_session_disconnected_cb (ls->app_wrk_index, ls); - } - else - { - if (!client_wrk) - { - return application_local_session_cleanup (client_wrk, server_wrk, - ls); - } - else if (ls->session_state < SESSION_STATE_READY) - { - application_t *client = application_get (client_wrk->app_index); - client->cb_fns.session_connected_callback (client_wrk->wrk_index, - ls->client_opaque, - (stream_session_t *) ls, - 1 /* is_fail */ ); - ls->session_state = SESSION_STATE_CLOSED; - return application_local_session_cleanup (client_wrk, server_wrk, - ls); - } - else - { - mq_send_local_session_disconnected_cb (client_wrk->wrk_index, ls); - } - } - - ls->session_state = SESSION_STATE_CLOSED; - - return 0; -} - -int -application_local_session_disconnect_w_index (u32 app_wrk_index, u32 ls_index) -{ - app_worker_t *app_wrk; - local_session_t *ls; - app_wrk = app_worker_get (app_wrk_index); - ls = application_get_local_session (app_wrk, ls_index); - return application_local_session_disconnect (app_wrk_index, ls); -} - -void -app_worker_local_sessions_free (app_worker_t * app_wrk) -{ - u32 index, server_wrk_index, session_index; - u64 handle, *handles = 0; - app_worker_t *server_wrk; - segment_manager_t *sm; - local_session_t *ls; - int i; - - /* - * Local sessions - */ - if (app_wrk->local_sessions) - { - /* *INDENT-OFF* */ - pool_foreach (ls, app_wrk->local_sessions, ({ - application_local_session_disconnect (app_wrk->wrk_index, ls); - })); - /* *INDENT-ON* */ - } - - /* - * Local connects - */ - vec_reset_length (handles); - /* *INDENT-OFF* */ - hash_foreach (handle, index, app_wrk->local_connects, ({ - vec_add1 (handles, handle); - })); - /* *INDENT-ON* */ - - for (i = 0; i < vec_len (handles); i++) - { - application_client_local_connect_key_parse (handles[i], - &server_wrk_index, - &session_index); - server_wrk = app_worker_get_if_valid (server_wrk_index); - if (server_wrk) - { - ls = application_get_local_session (server_wrk, session_index); - application_local_session_disconnect (app_wrk->wrk_index, ls); - } - } - - sm = segment_manager_get (app_wrk->local_segment_manager); - sm->app_wrk_index = SEGMENT_MANAGER_INVALID_APP_INDEX; - segment_manager_del (sm); -} - -clib_error_t * -vnet_app_add_tls_cert (vnet_app_add_tls_cert_args_t * a) +clib_error_t * +vnet_app_add_tls_cert (vnet_app_add_tls_cert_args_t * a) { application_t *app; app = application_get (a->app_index); @@ -1996,45 +1433,6 @@ vnet_app_add_tls_key (vnet_app_add_tls_key_args_t * a) return 0; } -u8 * -format_app_worker_listener (u8 * s, va_list * args) -{ - app_worker_t *app_wrk = va_arg (*args, app_worker_t *); - u64 handle = va_arg (*args, u64); - u32 sm_index = va_arg (*args, u32); - int verbose = va_arg (*args, int); - stream_session_t *listener; - const u8 *app_name; - u8 *str; - - if (!app_wrk) - { - if (verbose) - s = format (s, "%-40s%-25s%=10s%-15s%-15s%-10s", "Connection", "App", - "Wrk", "API Client", "ListenerID", "SegManager"); - else - s = format (s, "%-40s%-25s%=10s", "Connection", "App", "Wrk"); - - return s; - } - - app_name = application_name_from_index (app_wrk->app_index); - listener = listen_session_get_from_handle (handle); - str = format (0, "%U", format_stream_session, listener, verbose); - - if (verbose) - { - char buf[32]; - sprintf (buf, "%u(%u)", app_wrk->wrk_map_index, app_wrk->wrk_index); - s = format (s, "%-40s%-25s%=10s%-15u%-15u%-10u", str, app_name, - buf, app_wrk->api_client_index, handle, sm_index); - } - else - s = format (s, "%-40s%-25s%=10u", str, app_name, app_wrk->wrk_map_index); - - return s; -} - static void application_format_listeners (application_t * app, int verbose) { @@ -2064,69 +1462,6 @@ application_format_listeners (application_t * app, int verbose) /* *INDENT-ON* */ } -static void -app_worker_format_connects (app_worker_t * app_wrk, int verbose) -{ - svm_fifo_segment_private_t *fifo_segment; - vlib_main_t *vm = vlib_get_main (); - segment_manager_t *sm; - const u8 *app_name; - u8 *s = 0; - - /* Header */ - if (!app_wrk) - { - if (verbose) - vlib_cli_output (vm, "%-40s%-20s%-15s%-10s", "Connection", "App", - "API Client", "SegManager"); - else - vlib_cli_output (vm, "%-40s%-20s", "Connection", "App"); - return; - } - - if (app_wrk->connects_seg_manager == (u32) ~ 0) - return; - - app_name = application_name_from_index (app_wrk->app_index); - - /* Across all fifo segments */ - sm = segment_manager_get (app_wrk->connects_seg_manager); - - /* *INDENT-OFF* */ - segment_manager_foreach_segment_w_lock (fifo_segment, sm, ({ - svm_fifo_t *fifo; - u8 *str; - - fifo = svm_fifo_segment_get_fifo_list (fifo_segment); - while (fifo) - { - u32 session_index, thread_index; - stream_session_t *session; - - session_index = fifo->master_session_index; - thread_index = fifo->master_thread_index; - - session = session_get (session_index, thread_index); - str = format (0, "%U", format_stream_session, session, verbose); - - if (verbose) - s = format (s, "%-40s%-20s%-15u%-10u", str, app_name, - app_wrk->api_client_index, app_wrk->connects_seg_manager); - else - s = format (s, "%-40s%-20s", str, app_name); - - vlib_cli_output (vm, "%v", s); - vec_reset_length (s); - vec_free (str); - - fifo = fifo->next; - } - vec_free (s); - })); - /* *INDENT-ON* */ - -} - static void application_format_connects (application_t * app, int verbose) { @@ -2147,40 +1482,6 @@ application_format_connects (application_t * app, int verbose) /* *INDENT-ON* */ } -static void -app_worker_format_local_sessions (app_worker_t * app_wrk, int verbose) -{ - vlib_main_t *vm = vlib_get_main (); - local_session_t *ls; - transport_proto_t tp; - u8 *conn = 0; - - /* Header */ - if (app_wrk == 0) - { - vlib_cli_output (vm, "%-40s%-15s%-20s", "Connection", "ServerApp", - "ClientApp"); - return; - } - - if (!pool_elts (app_wrk->local_sessions) - && !pool_elts (app_wrk->local_connects)) - return; - - /* *INDENT-OFF* */ - pool_foreach (ls, app_wrk->local_sessions, ({ - tp = session_type_transport_proto(ls->listener_session_type); - conn = format (0, "[L][%U] *:%u", format_transport_proto_short, tp, - ls->port); - vlib_cli_output (vm, "%-40v%-15u%-20u", conn, ls->app_wrk_index, - ls->client_wrk_index); - vec_reset_length (conn); - })); - /* *INDENT-ON* */ - - vec_free (conn); -} - static void application_format_local_sessions (application_t * app, int verbose) { @@ -2222,43 +1523,6 @@ application_format_local_sessions (application_t * app, int verbose) /* *INDENT-ON* */ } -static void -app_worker_format_local_connects (app_worker_t * app, int verbose) -{ - vlib_main_t *vm = vlib_get_main (); - u32 app_wrk_index, session_index; - app_worker_t *server_wrk; - local_session_t *ls; - u64 client_key; - u64 value; - - /* Header */ - if (app == 0) - { - if (verbose) - vlib_cli_output (vm, "%-40s%-15s%-20s%-10s", "Connection", "App", - "Peer App", "SegManager"); - else - vlib_cli_output (vm, "%-40s%-15s%-20s", "Connection", "App", - "Peer App"); - return; - } - - if (!app->local_connects) - return; - - /* *INDENT-OFF* */ - hash_foreach (client_key, value, app->local_connects, ({ - application_client_local_connect_key_parse (client_key, &app_wrk_index, - &session_index); - server_wrk = app_worker_get (app_wrk_index); - ls = application_get_local_session (server_wrk, session_index); - vlib_cli_output (vm, "%-40s%-15s%-20s", "TODO", ls->app_wrk_index, - ls->client_wrk_index); - })); - /* *INDENT-ON* */ -} - static void application_format_local_connects (application_t * app, int verbose) { @@ -2279,19 +1543,6 @@ application_format_local_connects (application_t * app, int verbose) /* *INDENT-ON* */ } -u8 * -format_application_worker (u8 * s, va_list * args) -{ - app_worker_t *app_wrk = va_arg (*args, app_worker_t *); - u32 indent = 1; - - s = format (s, "%U wrk-index %u app-index %u map-index %u " - "api-client-index %d\n", format_white_space, indent, - app_wrk->wrk_index, app_wrk->app_index, app_wrk->wrk_map_index, - app_wrk->api_client_index); - return s; -} - u8 * format_application (u8 * s, va_list * args) { @@ -2329,7 +1580,7 @@ format_application (u8 * s, va_list * args) /* *INDENT-OFF* */ pool_foreach (wrk_map, app->worker_maps, ({ app_wrk = app_worker_get (wrk_map->wrk_index); - s = format (s, "%U", format_application_worker, app_wrk); + s = format (s, "%U", format_app_worker, app_wrk); })); /* *INDENT-ON* */ diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h index 1d2064df62e0..0eaca8bc95ab 100644 --- a/src/vnet/session/application.h +++ b/src/vnet/session/application.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -16,9 +16,10 @@ #ifndef SRC_VNET_SESSION_APPLICATION_H_ #define SRC_VNET_SESSION_APPLICATION_H_ -#include -#include +#include #include +#include +#include #define APP_DEBUG 0 @@ -28,35 +29,6 @@ #define APP_DBG(_fmt, _args...) #endif -typedef struct _stream_session_cb_vft -{ - /** Notify server of new segment */ - int (*add_segment_callback) (u32 api_client_index, u64 segment_handle); - - /** Notify server of new segment */ - int (*del_segment_callback) (u32 api_client_index, u64 segment_handle); - - /** Notify server of newly accepted session */ - int (*session_accept_callback) (stream_session_t * new_session); - - /** Connection request callback */ - int (*session_connected_callback) (u32 app_wrk_index, u32 opaque, - stream_session_t * s, u8 code); - - /** Notify app that session is closing */ - void (*session_disconnect_callback) (stream_session_t * s); - - /** Notify app that session was reset */ - void (*session_reset_callback) (stream_session_t * s); - - /** Direct RX callback for built-in application */ - int (*builtin_app_rx_callback) (stream_session_t * session); - - /** Direct TX callback for built-in application */ - int (*builtin_app_tx_callback) (stream_session_t * session); - -} session_cb_vft_t; - typedef struct app_worker_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -116,6 +88,9 @@ typedef struct app_listener_ clib_bitmap_t *workers; /**< workers accepting connections */ u32 accept_rotor; /**< last worker to accept a connection */ u32 al_index; + u32 app_index; + u32 local_index; + u32 session_index; } app_listener_t; typedef struct application_ @@ -174,11 +149,6 @@ typedef struct app_main_ */ application_t *app_pool; - /** - * Pool of workers associated to apps - */ - app_worker_t *workers; - /** * Hash table of apps by api client index */ @@ -190,14 +160,6 @@ typedef struct app_main_ uword *app_by_name; } app_main_t; -#define foreach_app_init_args \ - _(u32, api_client_index) \ - _(u8 *, name) \ - _(u64 *, options) \ - _(u8 *, namespace_id) \ - _(session_cb_vft_t *, session_cb_vft) \ - _(u32, app_index) \ - typedef struct app_init_args_ { #define _(_type, _name) _type _name; @@ -220,51 +182,27 @@ typedef struct _vnet_app_worker_add_del_args #define APP_NS_INVALID_INDEX ((u32)~0) #define APP_INVALID_SEGMENT_MANAGER_INDEX ((u32) ~0) -app_worker_t *app_worker_alloc (application_t * app); -int app_worker_alloc_and_init (application_t * app, app_worker_t ** wrk); -app_worker_t *app_worker_get (u32 wrk_index); -app_worker_t *app_worker_get_if_valid (u32 wrk_index); -application_t *app_worker_get_app (u32 wrk_index); -int app_worker_own_session (app_worker_t * app_wrk, stream_session_t * s); -void app_worker_free (app_worker_t * app_wrk); -int app_worker_open_session (app_worker_t * app, session_endpoint_t * tep, - u32 api_context); -segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *, - stream_session_t *); -segment_manager_t *app_worker_get_connect_segment_manager (app_worker_t *); -segment_manager_t - * app_worker_get_or_alloc_connect_segment_manager (app_worker_t *); -int app_worker_alloc_connects_segment_manager (app_worker_t * app); -int app_worker_add_segment_notify (u32 app_or_wrk, u64 segment_handle); -u32 app_worker_n_listeners (app_worker_t * app); -stream_session_t *app_worker_first_listener (app_worker_t * app, - u8 fib_proto, - u8 transport_proto); -u8 app_worker_application_is_builtin (app_worker_t * app_wrk); -int app_worker_send_event (app_worker_t * app, stream_session_t * s, u8 evt); -int app_worker_lock_and_send_event (app_worker_t * app, stream_session_t * s, - u8 evt_type); -clib_error_t *vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a); - -int application_start_listen (application_t * app, - session_endpoint_cfg_t * tep, - session_handle_t * handle); -int application_stop_listen (u32 app_index, u32 app_or_wrk, - session_handle_t handle); - -application_t *application_alloc (void); -int application_alloc_and_init (app_init_args_t * args); -void application_free (application_t * app); -void application_detach_process (application_t * app, u32 api_client_index); +app_listener_t *app_listener_get (application_t * app, u32 al_index); +int app_listener_alloc_and_init (application_t * app, + session_endpoint_cfg_t * sep, + app_listener_t ** listener); +void app_listener_cleanup (app_listener_t * app_listener); +session_handle_t app_listener_handle (app_listener_t * app_listener); +app_listener_t *app_listener_lookup (application_t * app, + session_endpoint_cfg_t * sep); +app_listener_t *app_listener_get_w_handle (session_handle_t handle); +app_listener_t *app_listener_get_w_session (session_t * ls); +app_worker_t *app_listener_select_worker (app_listener_t * al); +session_t *app_listener_get_session (app_listener_t * al); + application_t *application_get (u32 index); application_t *application_get_if_valid (u32 index); application_t *application_lookup (u32 api_client_index); application_t *application_lookup_name (const u8 * name); app_worker_t *application_get_worker (application_t * app, u32 wrk_index); app_worker_t *application_get_default_worker (application_t * app); -app_worker_t *application_listener_select_worker (stream_session_t * ls, - u8 is_local); - +app_worker_t *application_listener_select_worker (session_t * ls); +int application_change_listener_owner (session_t * s, app_worker_t * app_wrk); int application_is_proxy (application_t * app); int application_is_builtin (application_t * app); int application_is_builtin_proxy (application_t * app); @@ -284,76 +222,62 @@ segment_manager_properties_t * application_segment_manager_properties (application_t * app); /* - * Local session + * App worker */ -local_session_t *application_local_session_alloc (app_worker_t * app); -void application_local_session_free (app_worker_t * app, - local_session_t * ls); -local_session_t *application_get_local_session (app_worker_t * app, - u32 session_index); -local_session_t *application_get_local_session_from_handle (session_handle_t - handle); -local_session_t - * application_get_local_listen_session_from_handle (session_handle_t lh); -int application_start_local_listen (application_t * server, - session_endpoint_cfg_t * sep, - session_handle_t * handle); -int application_stop_local_listen (u32 app_index, u32 app_or_wrk, - session_handle_t lh); -int application_local_session_connect (app_worker_t * client, - app_worker_t * server, - local_session_t * ls, u32 opaque); -int application_local_session_connect_notify (local_session_t * ls); -int application_local_session_disconnect (u32 app_or_wrk, - local_session_t * ls); -int application_local_session_disconnect_w_index (u32 app_or_wrk, - u32 ls_index); -void app_worker_local_sessions_free (app_worker_t * app); - -always_inline u32 -local_session_id (local_session_t * ls) -{ - ASSERT (ls->session_index < (2 << 16)); - u32 app_or_wrk_index; - - if (ls->session_state == SESSION_STATE_LISTENING) - { - ASSERT (ls->app_index < (2 << 16)); - app_or_wrk_index = ls->app_index; - } - else - { - ASSERT (ls->app_wrk_index < (2 << 16)); - app_or_wrk_index = ls->app_wrk_index; - } - - return ((u32) app_or_wrk_index << 16 | (u32) ls->session_index); -} - -always_inline void -local_session_parse_id (u32 ls_id, u32 * app_or_wrk, u32 * session_index) -{ - *app_or_wrk = ls_id >> 16; - *session_index = ls_id & 0xFF; -} +app_worker_t *app_worker_alloc (application_t * app); +int application_alloc_worker_and_init (application_t * app, + app_worker_t ** wrk); +app_worker_t *app_worker_get (u32 wrk_index); +app_worker_t *app_worker_get_if_valid (u32 wrk_index); +application_t *app_worker_get_app (u32 wrk_index); +int app_worker_own_session (app_worker_t * app_wrk, session_t * s); +void app_worker_free (app_worker_t * app_wrk); +int app_worker_connect_session (app_worker_t * app, session_endpoint_t * tep, + u32 api_context); +int app_worker_start_listen (app_worker_t * app_wrk, app_listener_t * lstnr); +int app_worker_stop_listen (app_worker_t * app_wrk, app_listener_t * al); +segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *, + session_t *); +segment_manager_t *app_worker_get_connect_segment_manager (app_worker_t *); +segment_manager_t + * app_worker_get_or_alloc_connect_segment_manager (app_worker_t *); +int app_worker_alloc_connects_segment_manager (app_worker_t * app); +int app_worker_add_segment_notify (u32 app_or_wrk, u64 segment_handle); +u32 app_worker_n_listeners (app_worker_t * app); +session_t *app_worker_first_listener (app_worker_t * app, + u8 fib_proto, u8 transport_proto); +u8 app_worker_application_is_builtin (app_worker_t * app_wrk); +int app_worker_send_event (app_worker_t * app, session_t * s, u8 evt); +int app_worker_lock_and_send_event (app_worker_t * app, session_t * s, + u8 evt_type); +session_t *app_worker_proxy_listener (app_worker_t * app, u8 fib_proto, + u8 transport_proto); +u8 *format_app_worker (u8 * s, va_list * args); +u8 *format_app_worker_listener (u8 * s, va_list * args); +void app_worker_format_connects (app_worker_t * app_wrk, int verbose); +int vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a); -always_inline void -local_session_parse_handle (session_handle_t handle, u32 * app_or_wrk_index, - u32 * session_index) -{ - u32 bottom; - ASSERT ((handle >> 32) == SESSION_LOCAL_HANDLE_PREFIX); - bottom = (handle & 0xFFFFFFFF); - local_session_parse_id (bottom, app_or_wrk_index, session_index); -} +/* + * Local session + */ -always_inline session_handle_t -application_local_session_handle (local_session_t * ls) -{ - return ((u64) SESSION_LOCAL_HANDLE_PREFIX << 32) - | (u64) local_session_id (ls); -} +local_session_t *app_worker_local_session_alloc (app_worker_t * app); +void app_worker_local_session_free (app_worker_t * app, local_session_t * ls); +local_session_t *app_worker_get_local_session (app_worker_t * app, + u32 session_index); +local_session_t *app_worker_get_local_session_from_handle (session_handle_t + handle); +int app_worker_local_session_connect (app_worker_t * client, + app_worker_t * server, + local_session_t * ls, u32 opaque); +int app_worker_local_session_connect_notify (local_session_t * ls); +int app_worker_local_session_disconnect (u32 app_or_wrk, + local_session_t * ls); +int app_worker_local_session_disconnect_w_index (u32 app_or_wrk, + u32 ls_index); +void app_worker_format_local_sessions (app_worker_t * app_wrk, int verbose); +void app_worker_format_local_connects (app_worker_t * app, int verbose); always_inline local_session_t * application_get_local_listen_session (application_t * app, u32 session_index) diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index c3c84fd8da9d..ae00292d918e 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2016-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -13,360 +13,13 @@ * limitations under the License. */ #include - +#include #include -#include -#include /** @file VPP's application/session API bind/unbind/connect/disconnect calls */ -/* - * TLS server cert and keys to be used for testing only - */ -const char test_srv_crt_rsa[] = - "-----BEGIN CERTIFICATE-----\r\n" - "MIID5zCCAs+gAwIBAgIJALeMYCEHrTtJMA0GCSqGSIb3DQEBCwUAMIGJMQswCQYD\r\n" - "VQQGEwJVUzELMAkGA1UECAwCQ0ExETAPBgNVBAcMCFNhbiBKb3NlMQ4wDAYDVQQK\r\n" - "DAVDaXNjbzEOMAwGA1UECwwFZmQuaW8xFjAUBgNVBAMMDXRlc3R0bHMuZmQuaW8x\r\n" - "IjAgBgkqhkiG9w0BCQEWE3ZwcC1kZXZAbGlzdHMuZmQuaW8wHhcNMTgwMzA1MjEx\r\n" - "NTEyWhcNMjgwMzAyMjExNTEyWjCBiTELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNB\r\n" - "MREwDwYDVQQHDAhTYW4gSm9zZTEOMAwGA1UECgwFQ2lzY28xDjAMBgNVBAsMBWZk\r\n" - "LmlvMRYwFAYDVQQDDA10ZXN0dGxzLmZkLmlvMSIwIAYJKoZIhvcNAQkBFhN2cHAt\r\n" - "ZGV2QGxpc3RzLmZkLmlvMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA\r\n" - "4C1k8a1DuStgggqT4o09fP9sJ2dC54bxhS/Xk2VEfaIZ222WSo4X/syRVfVy9Yah\r\n" - "cpI1zJ/RDxaZSFhgA+nPZBrFMsrULkrdAOpOVj8eDEp9JuWdO2ODSoFnCvLxcYWB\r\n" - "Yc5kHryJpEaGJl1sFQSesnzMFty/59ta0stk0Fp8r5NhIjWvSovGzPo6Bhz+VS2c\r\n" - "ebIZh4x1t2hHaFcgm0qJoJ6DceReWCW8w+yOVovTolGGq+bpb2Hn7MnRSZ2K2NdL\r\n" - "+aLXpkZbS/AODP1FF2vTO1mYL290LO7/51vJmPXNKSDYMy5EvILr5/VqtjsFCwRL\r\n" - "Q4jcM/+GeHSAFWx4qIv0BwIDAQABo1AwTjAdBgNVHQ4EFgQUWa1SOB37xmT53tZQ\r\n" - "aXuLLhRI7U8wHwYDVR0jBBgwFoAUWa1SOB37xmT53tZQaXuLLhRI7U8wDAYDVR0T\r\n" - "BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAoUht13W4ya27NVzQuCMvqPWL3VM4\r\n" - "3xbPFk02FaGz/WupPu276zGlzJAZrbuDcQowwwU1Ni1Yygxl96s1c2M5rHDTrOKG\r\n" - "rK0hbkSFBo+i6I8u4HiiQ4rYmG0Hv6+sXn3of0HsbtDPGgWZoipPWDljPYEURu3e\r\n" - "3HRe/Dtsj9CakBoSDzs8ndWaBR+f4sM9Tk1cjD46Gq2T/qpSPXqKxEUXlzhdCAn4\r\n" - "twub17Bq2kykHpppCwPg5M+v30tHG/R2Go15MeFWbEJthFk3TZMjKL7UFs7fH+x2\r\n" - "wSonXb++jY+KmCb93C+soABBizE57g/KmiR2IxQ/LMjDik01RSUIaM0lLA==\r\n" - "-----END CERTIFICATE-----\r\n"; -const u32 test_srv_crt_rsa_len = sizeof (test_srv_crt_rsa); - -const char test_srv_key_rsa[] = - "-----BEGIN PRIVATE KEY-----\r\n" - "MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDgLWTxrUO5K2CC\r\n" - "CpPijT18/2wnZ0LnhvGFL9eTZUR9ohnbbZZKjhf+zJFV9XL1hqFykjXMn9EPFplI\r\n" - "WGAD6c9kGsUyytQuSt0A6k5WPx4MSn0m5Z07Y4NKgWcK8vFxhYFhzmQevImkRoYm\r\n" - "XWwVBJ6yfMwW3L/n21rSy2TQWnyvk2EiNa9Ki8bM+joGHP5VLZx5shmHjHW3aEdo\r\n" - "VyCbSomgnoNx5F5YJbzD7I5Wi9OiUYar5ulvYefsydFJnYrY10v5otemRltL8A4M\r\n" - "/UUXa9M7WZgvb3Qs7v/nW8mY9c0pINgzLkS8guvn9Wq2OwULBEtDiNwz/4Z4dIAV\r\n" - "bHioi/QHAgMBAAECggEBAMzGipP8+oT166U+NlJXRFifFVN1DvdhG9PWnOxGL+c3\r\n" - "ILmBBC08WQzmHshPemBvR6DZkA1H23cV5JTiLWrFtC00CvhXsLRMrE5+uWotI6yE\r\n" - "iofybMroHvD6/X5R510UX9hQ6MHu5ShLR5VZ9zXHz5MpTmB/60jG5dLx+jgcwBK8\r\n" - "LuGv2YB/WCUwT9QJ3YU2eaingnXtz/MrFbkbltrqlnBdlD+kTtw6Yac9y1XuuQXc\r\n" - "BPeulLNDuPolJVWbUvDBZrpt2dXTgz8ws1sv+wCNE0xwQJsqW4Nx3QkpibUL9RUr\r\n" - "CVbKlNfa9lopT6nGKlgX69R/uH35yh9AOsfasro6w0ECgYEA82UJ8u/+ORah+0sF\r\n" - "Q0FfW5MTdi7OAUHOz16pUsGlaEv0ERrjZxmAkHA/VRwpvDBpx4alCv0Hc39PFLIk\r\n" - "nhSsM2BEuBkTAs6/GaoNAiBtQVE/hN7awNRWVmlieS0go3Y3dzaE9IUMyj8sPOFT\r\n" - "5JdJ6BM69PHKCkY3dKdnnfpFEuECgYEA68mRpteunF1mdZgXs+WrN+uLlRrQR20F\r\n" - "ZyMYiUCH2Dtn26EzA2moy7FipIIrQcX/j+KhYNGM3e7MU4LymIO29E18mn8JODnH\r\n" - "sQOXzBTsf8A4yIVMkcuQD3bfb0JiUGYUPOidTp2N7IJA7+6Yc3vQOyb74lnKnJoO\r\n" - "gougPT2wS+cCgYAn7muzb6xFsXDhyW0Tm6YJYBfRS9yAWEuVufINobeBZPSl2cN1\r\n" - "Jrnw+HlrfTNbrJWuJmjtZJXUXQ6cVp2rUbjutNyRV4vG6iRwEXYQ40EJdkr1gZpi\r\n" - "CHQhuShuuPih2MNAy7EEbM+sXrDjTBR3bFqzuHPzu7dp+BshCFX3lRfAAQKBgGQt\r\n" - "K5i7IhCFDjb/+3IPLgOAK7mZvsvZ4eXD33TQ2eZgtut1PXtBtNl17/b85uv293Fm\r\n" - "VDISVcsk3eLNS8zIiT6afUoWlxAwXEs0v5WRfjl4radkGvgGiJpJYvyeM67877RB\r\n" - "EDSKc/X8ESLfOB44iGvZUEMG6zJFscx9DgN25iQZAoGAbyd+JEWwdVH9/K3IH1t2\r\n" - "PBkZX17kNWv+iVM1WyFjbe++vfKZCrOJiyiqhDeEqgrP3AuNMlaaduC3VRC3G5oV\r\n" - "Mj1tlhDWQ/qhvKdCKNdIVQYDE75nw+FRWV8yYkHAnXYW3tNoweDIwixE0hkPR1bc\r\n" - "oEjPLVNtx8SOj/M4rhaPT3I=\r\n" "-----END PRIVATE KEY-----\r\n"; -const u32 test_srv_key_rsa_len = sizeof (test_srv_key_rsa); - -#define app_interface_check_thread_and_barrier(_fn, _arg) \ - if (PREDICT_FALSE (!vlib_thread_is_main_w_barrier ())) \ - { \ - vlib_rpc_call_main_thread (_fn, (u8 *) _arg, sizeof(*_arg)); \ - return 0; \ - } - -static u8 -session_endpoint_is_local (session_endpoint_t * sep) -{ - return (ip_is_zero (&sep->ip, sep->is_ip4) - || ip_is_local_host (&sep->ip, sep->is_ip4)); -} - -static u8 -session_endpoint_is_zero (session_endpoint_t * sep) -{ - return ip_is_zero (&sep->ip, sep->is_ip4); -} - -u8 -session_endpoint_in_ns (session_endpoint_t * sep) -{ - u8 is_lep = session_endpoint_is_local (sep); - if (!is_lep && sep->sw_if_index != ENDPOINT_INVALID_INDEX - && !ip_interface_has_address (sep->sw_if_index, &sep->ip, sep->is_ip4)) - { - clib_warning ("sw_if_index %u not configured with ip %U", - sep->sw_if_index, format_ip46_address, &sep->ip, - sep->is_ip4); - return 0; - } - return (is_lep || ip_is_local (sep->fib_index, &sep->ip, sep->is_ip4)); -} - -int -api_parse_session_handle (u64 handle, u32 * session_index, u32 * thread_index) -{ - session_manager_main_t *smm = vnet_get_session_manager_main (); - stream_session_t *pool; - - *thread_index = handle & 0xFFFFFFFF; - *session_index = handle >> 32; - - if (*thread_index >= vec_len (smm->wrk)) - return VNET_API_ERROR_INVALID_VALUE; - - pool = smm->wrk[*thread_index].sessions; - - if (pool_is_free_index (pool, *session_index)) - return VNET_API_ERROR_INVALID_VALUE_2; - - return 0; -} - -static void -session_endpoint_update_for_app (session_endpoint_cfg_t * sep, - application_t * app, u8 is_connect) -{ - app_namespace_t *app_ns; - u32 ns_index, fib_index; - - ns_index = app->ns_index; - - /* App is a transport proto, so fetch the calling app's ns */ - if (app->flags & APP_OPTIONS_FLAGS_IS_TRANSPORT_APP) - { - app_worker_t *owner_wrk; - application_t *owner_app; - - owner_wrk = app_worker_get (sep->app_wrk_index); - owner_app = application_get (owner_wrk->app_index); - ns_index = owner_app->ns_index; - } - app_ns = app_namespace_get (ns_index); - if (!app_ns) - return; - - /* Ask transport and network to bind to/connect using local interface - * that "supports" app's namespace. This will fix our local connection - * endpoint. - */ - - /* If in default namespace and user requested a fib index use it */ - if (ns_index == 0 && sep->fib_index != ENDPOINT_INVALID_INDEX) - fib_index = sep->fib_index; - else - fib_index = sep->is_ip4 ? app_ns->ip4_fib_index : app_ns->ip6_fib_index; - sep->peer.fib_index = fib_index; - sep->fib_index = fib_index; - - if (!is_connect) - { - sep->sw_if_index = app_ns->sw_if_index; - } - else - { - if (app_ns->sw_if_index != APP_NAMESPACE_INVALID_INDEX - && sep->peer.sw_if_index != ENDPOINT_INVALID_INDEX - && sep->peer.sw_if_index != app_ns->sw_if_index) - clib_warning ("Local sw_if_index different from app ns sw_if_index"); - - sep->peer.sw_if_index = app_ns->sw_if_index; - } -} - -static inline int -vnet_bind_inline (vnet_bind_args_t * a) -{ - u64 ll_handle = SESSION_INVALID_HANDLE; - app_worker_t *app_wrk; - application_t *app; - int rv; - - app = application_get_if_valid (a->app_index); - if (!app) - { - SESSION_DBG ("app not attached"); - return VNET_API_ERROR_APPLICATION_NOT_ATTACHED; - } - app_wrk = application_get_worker (app, a->wrk_map_index); - a->sep_ext.app_wrk_index = app_wrk->wrk_index; - - session_endpoint_update_for_app (&a->sep_ext, app, 0 /* is_connect */ ); - if (!session_endpoint_in_ns (&a->sep)) - return VNET_API_ERROR_INVALID_VALUE_2; - - /* - * Add session endpoint to local session table. Only binds to "inaddr_any" - * (i.e., zero address) are added to local scope table. - */ - if (application_has_local_scope (app) - && session_endpoint_is_local (&a->sep)) - { - if ((rv = application_start_local_listen (app, &a->sep_ext, - &a->handle))) - return rv; - ll_handle = a->handle; - } - - if (!application_has_global_scope (app)) - return (ll_handle == SESSION_INVALID_HANDLE ? -1 : 0); - - /* - * Add session endpoint to global session table - */ - - /* Setup listen path down to transport */ - rv = application_start_listen (app, &a->sep_ext, &a->handle); - if (rv && ll_handle != SESSION_INVALID_HANDLE) - { - application_stop_local_listen (a->app_index, a->wrk_map_index, - ll_handle); - return rv; - } - - /* - * Store in local table listener the index of the transport layer - * listener. We'll need if if local listeners are hit and we need to - * return global handle - */ - if (ll_handle != SESSION_INVALID_HANDLE) - { - local_session_t *ll; - stream_session_t *tl; - ll = application_get_local_listener_w_handle (ll_handle); - tl = listen_session_get_from_handle (a->handle); - if (ll->transport_listener_index == ~0) - ll->transport_listener_index = tl->session_index; - } - return rv; -} - -static inline int -vnet_unbind_inline (vnet_unbind_args_t * a) -{ - application_t *app; - int rv; - - if (!(app = application_get_if_valid (a->app_index))) - { - SESSION_DBG ("app (%d) not attached", wrk_map_index); - return VNET_API_ERROR_APPLICATION_NOT_ATTACHED; - } - - if (application_has_local_scope (app)) - { - if ((rv = application_stop_local_listen (a->app_index, - a->wrk_map_index, a->handle))) - return rv; - } - - /* - * Clear the global scope table of the listener - */ - if (application_has_global_scope (app)) - return application_stop_listen (a->app_index, a->wrk_map_index, - a->handle); - return 0; -} - -static int -application_connect (vnet_connect_args_t * a) -{ - app_worker_t *server_wrk, *client_wrk; - u32 table_index, server_index, li; - stream_session_t *listener; - application_t *client, *server; - local_session_t *ll; - u8 fib_proto; - u64 lh; - - if (session_endpoint_is_zero (&a->sep)) - return VNET_API_ERROR_INVALID_VALUE; - - client = application_get (a->app_index); - session_endpoint_update_for_app (&a->sep_ext, client, 1 /* is_connect */ ); - client_wrk = application_get_worker (client, a->wrk_map_index); - - /* - * First check the local scope for locally attached destinations. - * If we have local scope, we pass *all* connects through it since we may - * have special policy rules even for non-local destinations, think proxy. - */ - if (application_has_local_scope (client)) - { - table_index = application_local_session_table (client); - lh = session_lookup_local_endpoint (table_index, &a->sep); - if (lh == SESSION_DROP_HANDLE) - return VNET_API_ERROR_APP_CONNECT_FILTERED; - - if (lh == SESSION_INVALID_HANDLE) - goto global_scope; - - local_session_parse_handle (lh, &server_index, &li); - - /* - * Break loop if rule in local table points to connecting app. This - * can happen if client is a generic proxy. Route connect through - * global table instead. - */ - if (server_index != a->app_index) - { - server = application_get (server_index); - ll = application_get_local_listen_session (server, li); - listener = (stream_session_t *) ll; - server_wrk = application_listener_select_worker (listener, - 1 /* is_local */ ); - return application_local_session_connect (client_wrk, - server_wrk, ll, - a->api_context); - } - } - - /* - * If nothing found, check the global scope for locally attached - * destinations. Make sure first that we're allowed to. - */ - -global_scope: - if (session_endpoint_is_local (&a->sep)) - return VNET_API_ERROR_SESSION_CONNECT; - - if (!application_has_global_scope (client)) - return VNET_API_ERROR_APP_CONNECT_SCOPE; - - fib_proto = session_endpoint_fib_proto (&a->sep); - table_index = application_session_table (client, fib_proto); - listener = session_lookup_listener (table_index, &a->sep); - if (listener) - { - server_wrk = application_listener_select_worker (listener, - 0 /* is_local */ ); - ll = (local_session_t *) listener; - return application_local_session_connect (client_wrk, server_wrk, ll, - a->api_context); - } - - /* - * Not connecting to a local server, propagate to transport - */ - if (app_worker_open_session (client_wrk, &a->sep, a->api_context)) - return VNET_API_ERROR_SESSION_CONNECT; - return 0; -} - /** * unformat a vnet URI * @@ -464,129 +117,8 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep) return 0; } -static int -app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index) -{ - app_namespace_t *app_ns; - if (vec_len (namespace_id) == 0) - { - /* Use default namespace */ - *app_ns_index = 0; - return 0; - } - - *app_ns_index = app_namespace_index_from_id (namespace_id); - if (*app_ns_index == APP_NAMESPACE_INVALID_INDEX) - return VNET_API_ERROR_APP_INVALID_NS; - app_ns = app_namespace_get (*app_ns_index); - if (!app_ns) - return VNET_API_ERROR_APP_INVALID_NS; - if (app_ns->ns_secret != secret) - return VNET_API_ERROR_APP_WRONG_NS_SECRET; - return 0; -} - -static u8 * -app_name_from_api_index (u32 api_client_index) -{ - vl_api_registration_t *regp; - regp = vl_api_client_index_to_registration (api_client_index); - if (regp) - return format (0, "%s%c", regp->name, 0); - - clib_warning ("api client index %u does not have an api registration!", - api_client_index); - return format (0, "unknown%c", 0); -} - -/** - * Attach application to vpp - * - * Allocates a vpp app, i.e., a structure that keeps back pointers - * to external app and a segment manager for shared memory fifo based - * communication with the external app. - */ -clib_error_t * -vnet_application_attach (vnet_app_attach_args_t * a) -{ - svm_fifo_segment_private_t *fs; - application_t *app = 0; - app_worker_t *app_wrk; - segment_manager_t *sm; - u32 app_ns_index = 0; - u8 *app_name = 0; - u64 secret; - int rv; - - if (a->api_client_index != APP_INVALID_INDEX) - app = application_lookup (a->api_client_index); - else if (a->name) - app = application_lookup_name (a->name); - else - return clib_error_return_code (0, VNET_API_ERROR_INVALID_VALUE, 0, - "api index or name must be provided"); - - if (app) - return clib_error_return_code (0, VNET_API_ERROR_APP_ALREADY_ATTACHED, 0, - "app already attached"); - - if (a->api_client_index != APP_INVALID_INDEX) - { - app_name = app_name_from_api_index (a->api_client_index); - a->name = app_name; - } - - secret = a->options[APP_OPTIONS_NAMESPACE_SECRET]; - if ((rv = app_validate_namespace (a->namespace_id, secret, &app_ns_index))) - return clib_error_return_code (0, rv, 0, "namespace validation: %d", rv); - a->options[APP_OPTIONS_NAMESPACE] = app_ns_index; - - if ((rv = application_alloc_and_init ((app_init_args_t *) a))) - return clib_error_return_code (0, rv, 0, "app init: %d", rv); - - app = application_get (a->app_index); - if ((rv = app_worker_alloc_and_init (app, &app_wrk))) - return clib_error_return_code (0, rv, 0, "app default wrk init: %d", rv); - - a->app_evt_q = app_wrk->event_queue; - app_wrk->api_client_index = a->api_client_index; - sm = segment_manager_get (app_wrk->first_segment_manager); - fs = segment_manager_get_segment_w_lock (sm, 0); - - if (application_is_proxy (app)) - application_setup_proxy (app); - - ASSERT (vec_len (fs->ssvm.name) <= 128); - a->segment = &fs->ssvm; - a->segment_handle = segment_manager_segment_handle (sm, fs); - - segment_manager_segment_reader_unlock (sm); - vec_free (app_name); - return 0; -} - -/** - * Detach application from vpp - */ -int -vnet_application_detach (vnet_app_detach_args_t * a) -{ - application_t *app; - - app = application_get_if_valid (a->app_index); - if (!app) - { - clib_warning ("app not attached"); - return VNET_API_ERROR_APPLICATION_NOT_ATTACHED; - } - - app_interface_check_thread_and_barrier (vnet_application_detach, a); - application_detach_process (app, a->api_client_index); - return 0; -} - int -vnet_bind_uri (vnet_bind_args_t * a) +vnet_bind_uri (vnet_listen_args_t * a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; int rv; @@ -596,19 +128,18 @@ vnet_bind_uri (vnet_bind_args_t * a) return rv; sep.app_wrk_index = 0; clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); - return vnet_bind_inline (a); + return vnet_listen (a); } int -vnet_unbind_uri (vnet_unbind_args_t * a) +vnet_unbind_uri (vnet_unlisten_args_t * a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; - stream_session_t *listener; + session_t *listener; u32 table_index; int rv; - rv = parse_uri (a->uri, &sep); - if (rv) + if ((rv = parse_uri (a->uri, &sep))) return rv; /* NOTE: only default fib tables supported for uri apis */ @@ -619,86 +150,21 @@ vnet_unbind_uri (vnet_unbind_args_t * a) if (!listener) return VNET_API_ERROR_ADDRESS_NOT_IN_USE; a->handle = listen_session_get_handle (listener); - return vnet_unbind_inline (a); + return vnet_unlisten (a); } -clib_error_t * +int vnet_connect_uri (vnet_connect_args_t * a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; int rv; - /* Parse uri */ - rv = parse_uri (a->uri, &sep); - if (rv) - return clib_error_return_code (0, rv, 0, "app init: %d", rv); + if ((rv = parse_uri (a->uri, &sep))) + return rv; clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); - if ((rv = application_connect (a))) - return clib_error_return_code (0, rv, 0, "connect failed"); - return 0; -} - -int -vnet_disconnect_session (vnet_disconnect_args_t * a) -{ - if (session_handle_is_local (a->handle)) - { - local_session_t *ls; - - /* Disconnect reply came to worker 1 not main thread */ - app_interface_check_thread_and_barrier (vnet_disconnect_session, a); - - if (!(ls = application_get_local_session_from_handle (a->handle))) - return 0; - - return application_local_session_disconnect (a->app_index, ls); - } - else - { - app_worker_t *app_wrk; - stream_session_t *s; - - s = session_get_from_handle_if_valid (a->handle); - if (!s) - return VNET_API_ERROR_INVALID_VALUE; - app_wrk = app_worker_get (s->app_wrk_index); - if (app_wrk->app_index != a->app_index) - return VNET_API_ERROR_INVALID_VALUE; - - /* We're peeking into another's thread pool. Make sure */ - ASSERT (s->session_index == session_index_from_handle (a->handle)); - - session_close (s); - } - return 0; -} - -clib_error_t * -vnet_bind (vnet_bind_args_t * a) -{ - int rv; - if ((rv = vnet_bind_inline (a))) - return clib_error_return_code (0, rv, 0, "bind failed: %d", rv); - return 0; -} - -clib_error_t * -vnet_unbind (vnet_unbind_args_t * a) -{ - int rv; - if ((rv = vnet_unbind_inline (a))) - return clib_error_return_code (0, rv, 0, "unbind failed: %d", rv); - return 0; -} - -clib_error_t * -vnet_connect (vnet_connect_args_t * a) -{ - int rv; - - if ((rv = application_connect (a))) - return clib_error_return_code (0, rv, 0, "connect failed: %d", rv); + if ((rv = vnet_connect (a))) + return rv; return 0; } diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index 9c48faa8abc0..18878cdf5681 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2016-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -15,11 +15,48 @@ #ifndef __included_uri_h__ #define __included_uri_h__ +#include +#include #include -#include -#include -#include -#include +#include +#include + +typedef struct _stream_session_cb_vft +{ + /** Notify server of new segment */ + int (*add_segment_callback) (u32 api_client_index, u64 segment_handle); + + /** Notify server of new segment */ + int (*del_segment_callback) (u32 api_client_index, u64 segment_handle); + + /** Notify server of newly accepted session */ + int (*session_accept_callback) (session_t * new_session); + + /** Connection request callback */ + int (*session_connected_callback) (u32 app_wrk_index, u32 opaque, + session_t * s, u8 code); + + /** Notify app that session is closing */ + void (*session_disconnect_callback) (session_t * s); + + /** Notify app that session was reset */ + void (*session_reset_callback) (session_t * s); + + /** Direct RX callback for built-in application */ + int (*builtin_app_rx_callback) (session_t * session); + + /** Direct TX callback for built-in application */ + int (*builtin_app_tx_callback) (session_t * session); + +} session_cb_vft_t; + +#define foreach_app_init_args \ + _(u32, api_client_index) \ + _(u8 *, name) \ + _(u64 *, options) \ + _(u8 *, namespace_id) \ + _(session_cb_vft_t *, session_cb_vft) \ + _(u32, app_index) \ typedef struct _vnet_app_attach_args_t { @@ -56,7 +93,7 @@ typedef struct _vnet_bind_args_t u32 segment_name_length; u64 server_event_queue_address; u64 handle; -} vnet_bind_args_t; +} vnet_listen_args_t; typedef struct _vnet_unbind_args_t { @@ -67,7 +104,7 @@ typedef struct _vnet_unbind_args_t }; u32 app_index; /**< Owning application index */ u32 wrk_map_index; /**< App's local pool worker index */ -} vnet_unbind_args_t; +} vnet_unlisten_args_t; typedef struct _vnet_connect_args { @@ -102,6 +139,14 @@ typedef struct _vnet_application_add_tls_key_args_t u8 *key; } vnet_app_add_tls_key_args_t; +typedef enum tls_engine_type_ +{ + TLS_ENGINE_NONE, + TLS_ENGINE_MBEDTLS, + TLS_ENGINE_OPENSSL, + TLS_N_ENGINES +} tls_engine_type_t; + /* Application attach options */ typedef enum { @@ -167,25 +212,20 @@ typedef enum session_fd_flag_ #undef _ } session_fd_flag_t; -int vnet_bind_uri (vnet_bind_args_t *); -int vnet_unbind_uri (vnet_unbind_args_t * a); -clib_error_t *vnet_connect_uri (vnet_connect_args_t * a); +int vnet_bind_uri (vnet_listen_args_t *); +int vnet_unbind_uri (vnet_unlisten_args_t * a); +int vnet_connect_uri (vnet_connect_args_t * a); -clib_error_t *vnet_application_attach (vnet_app_attach_args_t * a); -clib_error_t *vnet_bind (vnet_bind_args_t * a); -clib_error_t *vnet_connect (vnet_connect_args_t * a); -clib_error_t *vnet_unbind (vnet_unbind_args_t * a); +int vnet_application_attach (vnet_app_attach_args_t * a); int vnet_application_detach (vnet_app_detach_args_t * a); +int vnet_listen (vnet_listen_args_t * a); +int vnet_connect (vnet_connect_args_t * a); +int vnet_unlisten (vnet_unlisten_args_t * a); int vnet_disconnect_session (vnet_disconnect_args_t * a); clib_error_t *vnet_app_add_tls_cert (vnet_app_add_tls_cert_args_t * a); clib_error_t *vnet_app_add_tls_key (vnet_app_add_tls_key_args_t * a); -extern const char test_srv_crt_rsa[]; -extern const u32 test_srv_crt_rsa_len; -extern const char test_srv_key_rsa[]; -extern const u32 test_srv_key_rsa_len; - typedef struct app_session_transport_ { ip46_address_t rmt_ip; /**< remote ip */ diff --git a/src/vnet/session/application_namespace.c b/src/vnet/session/application_namespace.c index 1896a7231153..47a369ed765e 100644 --- a/src/vnet/session/application_namespace.c +++ b/src/vnet/session/application_namespace.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -61,7 +61,7 @@ app_namespace_alloc (u8 * ns_id) return app_ns; } -clib_error_t * +int vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) { app_namespace_t *app_ns; @@ -71,9 +71,8 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) { if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX && !vnet_get_sw_interface_safe (vnet_get_main (), a->sw_if_index)) - return clib_error_return_code (0, VNET_API_ERROR_INVALID_SW_IF_INDEX, - 0, "sw_if_index %u doesn't exist", - a->sw_if_index); + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX) { @@ -86,9 +85,8 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) } if (a->sw_if_index == APP_NAMESPACE_INVALID_INDEX && a->ip4_fib_id == APP_NAMESPACE_INVALID_INDEX) - return clib_error_return_code (0, VNET_API_ERROR_INVALID_VALUE, 0, - "sw_if_index or fib_id must be " - "configured"); + return VNET_API_ERROR_INVALID_VALUE; + app_ns = app_namespace_get_from_id (a->ns_id); if (!app_ns) { @@ -109,8 +107,7 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) } else { - return clib_error_return_code (0, VNET_API_ERROR_UNIMPLEMENTED, 0, - "namespace deletion not supported"); + return VNET_API_ERROR_UNIMPLEMENTED; } return 0; } @@ -184,6 +181,7 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input, u32 sw_if_index, fib_id = APP_NAMESPACE_INVALID_INDEX; u64 secret; clib_error_t *error = 0; + int rv; session_cli_return_if_not_enabled (); @@ -228,7 +226,8 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input, .ip4_fib_id = fib_id, .is_add = 1 }; - error = vnet_app_namespace_add_del (&args); + if ((rv = vnet_app_namespace_add_del (&args))) + return clib_error_return (0, "app namespace add del returned %d", rv); } return error; diff --git a/src/vnet/session/application_namespace.h b/src/vnet/session/application_namespace.h index 6eb9d53c8644..3a24fe1e6fad 100644 --- a/src/vnet/session/application_namespace.h +++ b/src/vnet/session/application_namespace.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -70,8 +70,7 @@ const u8 *app_namespace_id (app_namespace_t * app_ns); const u8 *app_namespace_id_from_index (u32 index); u32 app_namespace_index_from_id (const u8 * ns_id); void app_namespaces_init (void); -clib_error_t *vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * - a); +int vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a); u32 app_namespace_get_fib_index (app_namespace_t * app_ns, u8 fib_proto); session_table_t *app_namespace_get_local_table (app_namespace_t * app_ns); diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c new file mode 100644 index 000000000000..7fdf71d9aeeb --- /dev/null +++ b/src/vnet/session/application_worker.c @@ -0,0 +1,1165 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +/** + * Pool of workers associated to apps + */ +static app_worker_t *app_workers; + +static inline u64 +application_client_local_connect_key (local_session_t * ls) +{ + return (((u64) ls->app_wrk_index) << 32 | (u64) ls->session_index); +} + +static inline void +application_client_local_connect_key_parse (u64 key, u32 * app_wrk_index, + u32 * session_index) +{ + *app_wrk_index = key >> 32; + *session_index = key & 0xFFFFFFFF; +} + +local_session_t * +app_worker_local_session_alloc (app_worker_t * app_wrk) +{ + local_session_t *s; + pool_get (app_wrk->local_sessions, s); + clib_memset (s, 0, sizeof (*s)); + s->app_wrk_index = app_wrk->wrk_index; + s->session_index = s - app_wrk->local_sessions; + s->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, 0); + return s; +} + +void +app_worker_local_session_free (app_worker_t * app_wrk, local_session_t * s) +{ + pool_put (app_wrk->local_sessions, s); + if (CLIB_DEBUG) + clib_memset (s, 0xfc, sizeof (*s)); +} + +local_session_t * +app_worker_get_local_session (app_worker_t * app_wrk, u32 session_index) +{ + if (pool_is_free_index (app_wrk->local_sessions, session_index)) + return 0; + return pool_elt_at_index (app_wrk->local_sessions, session_index); +} + +local_session_t * +app_worker_get_local_session_from_handle (session_handle_t handle) +{ + app_worker_t *server_wrk; + u32 session_index, server_wrk_index; + local_session_parse_handle (handle, &server_wrk_index, &session_index); + server_wrk = app_worker_get_if_valid (server_wrk_index); + if (!server_wrk) + return 0; + return app_worker_get_local_session (server_wrk, session_index); +} + +void +app_worker_local_sessions_free (app_worker_t * app_wrk) +{ + u32 index, server_wrk_index, session_index; + u64 handle, *handles = 0; + app_worker_t *server_wrk; + segment_manager_t *sm; + local_session_t *ls; + int i; + + /* + * Local sessions + */ + if (app_wrk->local_sessions) + { + /* *INDENT-OFF* */ + pool_foreach (ls, app_wrk->local_sessions, ({ + app_worker_local_session_disconnect (app_wrk->wrk_index, ls); + })); + /* *INDENT-ON* */ + } + + /* + * Local connects + */ + vec_reset_length (handles); + /* *INDENT-OFF* */ + hash_foreach (handle, index, app_wrk->local_connects, ({ + vec_add1 (handles, handle); + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (handles); i++) + { + application_client_local_connect_key_parse (handles[i], + &server_wrk_index, + &session_index); + server_wrk = app_worker_get_if_valid (server_wrk_index); + if (server_wrk) + { + ls = app_worker_get_local_session (server_wrk, session_index); + app_worker_local_session_disconnect (app_wrk->wrk_index, ls); + } + } + + sm = segment_manager_get (app_wrk->local_segment_manager); + sm->app_wrk_index = SEGMENT_MANAGER_INVALID_APP_INDEX; + segment_manager_del (sm); +} + +app_worker_t * +app_worker_alloc (application_t * app) +{ + app_worker_t *app_wrk; + pool_get (app_workers, app_wrk); + clib_memset (app_wrk, 0, sizeof (*app_wrk)); + app_wrk->wrk_index = app_wrk - app_workers; + app_wrk->app_index = app->app_index; + app_wrk->wrk_map_index = ~0; + app_wrk->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX; + app_wrk->first_segment_manager = APP_INVALID_SEGMENT_MANAGER_INDEX; + app_wrk->local_segment_manager = APP_INVALID_SEGMENT_MANAGER_INDEX; + APP_DBG ("New app %v worker %u", app_get_name (app), app_wrk->wrk_index); + return app_wrk; +} + +app_worker_t * +app_worker_get (u32 wrk_index) +{ + return pool_elt_at_index (app_workers, wrk_index); +} + +app_worker_t * +app_worker_get_if_valid (u32 wrk_index) +{ + if (pool_is_free_index (app_workers, wrk_index)) + return 0; + return pool_elt_at_index (app_workers, wrk_index); +} + +void +app_worker_free (app_worker_t * app_wrk) +{ + application_t *app = application_get (app_wrk->app_index); + vnet_unlisten_args_t _a, *a = &_a; + u64 handle, *handles = 0; + segment_manager_t *sm; + u32 sm_index; + int i; + app_listener_t *al; + session_t *ls; + + /* + * Listener cleanup + */ + + /* *INDENT-OFF* */ + hash_foreach (handle, sm_index, app_wrk->listeners_table, ({ + ls = listen_session_get_from_handle (handle); + al = app_listener_get (app, ls->al_index); + vec_add1 (handles, app_listener_handle (al)); + sm = segment_manager_get (sm_index); + sm->app_wrk_index = SEGMENT_MANAGER_INVALID_APP_INDEX; + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (handles); i++) + { + a->app_index = app->app_index; + a->wrk_map_index = app_wrk->wrk_map_index; + a->handle = handles[i]; + /* seg manager is removed when unbind completes */ + (void) vnet_unlisten (a); + } + + /* + * Connects segment manager cleanup + */ + + if (app_wrk->connects_seg_manager != APP_INVALID_SEGMENT_MANAGER_INDEX) + { + sm = segment_manager_get (app_wrk->connects_seg_manager); + sm->app_wrk_index = SEGMENT_MANAGER_INVALID_APP_INDEX; + segment_manager_init_del (sm); + } + + /* If first segment manager is used by a listener */ + if (app_wrk->first_segment_manager != APP_INVALID_SEGMENT_MANAGER_INDEX + && app_wrk->first_segment_manager != app_wrk->connects_seg_manager) + { + sm = segment_manager_get (app_wrk->first_segment_manager); + sm->first_is_protected = 0; + sm->app_wrk_index = SEGMENT_MANAGER_INVALID_APP_INDEX; + /* .. and has no fifos, e.g. it might be used for redirected sessions, + * remove it */ + if (!segment_manager_has_fifos (sm)) + segment_manager_del (sm); + } + + /* + * Local sessions + */ + app_worker_local_sessions_free (app_wrk); + + pool_put (app_workers, app_wrk); + if (CLIB_DEBUG) + clib_memset (app_wrk, 0xfe, sizeof (*app_wrk)); +} + +application_t * +app_worker_get_app (u32 wrk_index) +{ + app_worker_t *app_wrk; + app_wrk = app_worker_get_if_valid (wrk_index); + if (!app_wrk) + return 0; + return application_get_if_valid (app_wrk->app_index); +} + +static segment_manager_t * +app_worker_alloc_segment_manager (app_worker_t * app_wrk) +{ + segment_manager_t *sm = 0; + + /* If the first segment manager is not in use, don't allocate a new one */ + if (app_wrk->first_segment_manager != APP_INVALID_SEGMENT_MANAGER_INDEX + && app_wrk->first_segment_manager_in_use == 0) + { + sm = segment_manager_get (app_wrk->first_segment_manager); + app_wrk->first_segment_manager_in_use = 1; + return sm; + } + + sm = segment_manager_new (); + sm->app_wrk_index = app_wrk->wrk_index; + + return sm; +} + +int +app_worker_start_listen (app_worker_t * app_wrk, + app_listener_t * app_listener) +{ + segment_manager_t *sm; + session_t *ls; + + if (clib_bitmap_get (app_listener->workers, app_wrk->wrk_map_index)) + return VNET_API_ERROR_ADDRESS_IN_USE; + + app_listener->workers = clib_bitmap_set (app_listener->workers, + app_wrk->wrk_map_index, 1); + + if (app_listener->session_index == SESSION_INVALID_INDEX) + return 0; + + ls = session_get (app_listener->session_index, 0); + + /* Allocate segment manager. All sessions derived out of a listen session + * have fifos allocated by the same segment manager. */ + if (!(sm = app_worker_alloc_segment_manager (app_wrk))) + return -1; + + /* Keep track of the segment manager for the listener or this worker */ + hash_set (app_wrk->listeners_table, listen_session_get_handle (ls), + segment_manager_index (sm)); + + if (session_transport_service_type (ls) == TRANSPORT_SERVICE_CL) + { + if (!ls->rx_fifo && session_alloc_fifos (sm, ls)) + return -1; + } + return 0; +} + +int +app_worker_stop_listen (app_worker_t * app_wrk, app_listener_t * al) +{ + session_handle_t handle; + segment_manager_t *sm; + uword *sm_indexp; + + if (!clib_bitmap_get (al->workers, app_wrk->wrk_map_index)) + return 0; + + if (al->session_index != SESSION_INVALID_INDEX) + { + session_t *ls; + + ls = listen_session_get (al->session_index); + handle = listen_session_get_handle (ls); + + sm_indexp = hash_get (app_wrk->listeners_table, handle); + if (PREDICT_FALSE (!sm_indexp)) + { + clib_warning ("listener handle was removed %llu!", handle); + return -1; + } + + sm = segment_manager_get (*sm_indexp); + if (app_wrk->first_segment_manager == *sm_indexp) + { + /* Delete sessions but don't remove segment manager */ + app_wrk->first_segment_manager_in_use = 0; + segment_manager_del_sessions (sm); + } + else + { + segment_manager_init_del (sm); + } + hash_unset (app_wrk->listeners_table, handle); + } + + if (al->local_index != SESSION_INVALID_INDEX) + { + local_session_t *ll, *ls; + application_t *app; + + app = application_get (app_wrk->app_index); + ll = application_get_local_listen_session (app, al->local_index); + + /* *INDENT-OFF* */ + pool_foreach (ls, app_wrk->local_sessions, ({ + if (ls->listener_index == ll->session_index) + app_worker_local_session_disconnect (app_wrk->app_index, ls); + })); + /* *INDENT-ON* */ + } + + clib_bitmap_set_no_check (al->workers, app_wrk->wrk_map_index, 0); + if (clib_bitmap_is_zero (al->workers)) + app_listener_cleanup (al); + + return 0; +} + +int +app_worker_own_session (app_worker_t * app_wrk, session_t * s) +{ + segment_manager_t *sm; + svm_fifo_t *rxf, *txf; + + if (s->session_state == SESSION_STATE_LISTENING) + return application_change_listener_owner (s, app_wrk); + + s->app_wrk_index = app_wrk->wrk_index; + + rxf = s->rx_fifo; + txf = s->tx_fifo; + + if (!rxf || !txf) + return 0; + + s->rx_fifo = 0; + s->tx_fifo = 0; + + sm = app_worker_get_or_alloc_connect_segment_manager (app_wrk); + if (session_alloc_fifos (sm, s)) + return -1; + + if (!svm_fifo_is_empty (rxf)) + { + clib_memcpy_fast (s->rx_fifo->data, rxf->data, rxf->nitems); + s->rx_fifo->head = rxf->head; + s->rx_fifo->tail = rxf->tail; + s->rx_fifo->cursize = rxf->cursize; + } + + if (!svm_fifo_is_empty (txf)) + { + clib_memcpy_fast (s->tx_fifo->data, txf->data, txf->nitems); + s->tx_fifo->head = txf->head; + s->tx_fifo->tail = txf->tail; + s->tx_fifo->cursize = txf->cursize; + } + + segment_manager_dealloc_fifos (rxf->segment_index, rxf, txf); + + return 0; +} + +int +app_worker_connect_session (app_worker_t * app, session_endpoint_t * sep, + u32 api_context) +{ + int rv; + + /* Make sure we have a segment manager for connects */ + app_worker_alloc_connects_segment_manager (app); + + if ((rv = session_open (app->wrk_index, sep, api_context))) + return rv; + + return 0; +} + +int +app_worker_alloc_connects_segment_manager (app_worker_t * app_wrk) +{ + segment_manager_t *sm; + + if (app_wrk->connects_seg_manager == APP_INVALID_SEGMENT_MANAGER_INDEX) + { + sm = app_worker_alloc_segment_manager (app_wrk); + if (sm == 0) + return -1; + app_wrk->connects_seg_manager = segment_manager_index (sm); + } + return 0; +} + +segment_manager_t * +app_worker_get_connect_segment_manager (app_worker_t * app) +{ + ASSERT (app->connects_seg_manager != (u32) ~ 0); + return segment_manager_get (app->connects_seg_manager); +} + +segment_manager_t * +app_worker_get_or_alloc_connect_segment_manager (app_worker_t * app_wrk) +{ + if (app_wrk->connects_seg_manager == (u32) ~ 0) + app_worker_alloc_connects_segment_manager (app_wrk); + return segment_manager_get (app_wrk->connects_seg_manager); +} + +segment_manager_t * +app_worker_get_listen_segment_manager (app_worker_t * app, + session_t * listener) +{ + uword *smp; + smp = hash_get (app->listeners_table, listen_session_get_handle (listener)); + ASSERT (smp != 0); + return segment_manager_get (*smp); +} + +session_t * +app_worker_first_listener (app_worker_t * app_wrk, u8 fib_proto, + u8 transport_proto) +{ + session_t *listener; + u64 handle; + u32 sm_index; + u8 sst; + + sst = session_type_from_proto_and_ip (transport_proto, + fib_proto == FIB_PROTOCOL_IP4); + + /* *INDENT-OFF* */ + hash_foreach (handle, sm_index, app_wrk->listeners_table, ({ + listener = listen_session_get_from_handle (handle); + if (listener->session_type == sst + && listener->enqueue_epoch != SESSION_PROXY_LISTENER_INDEX) + return listener; + })); + /* *INDENT-ON* */ + + return 0; +} + +session_t * +app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto, + u8 transport_proto) +{ + session_t *listener; + u64 handle; + u32 sm_index; + u8 sst; + + sst = session_type_from_proto_and_ip (transport_proto, + fib_proto == FIB_PROTOCOL_IP4); + + /* *INDENT-OFF* */ + hash_foreach (handle, sm_index, app_wrk->listeners_table, ({ + listener = listen_session_get_from_handle (handle); + if (listener->session_type == sst + && listener->enqueue_epoch == SESSION_PROXY_LISTENER_INDEX) + return listener; + })); + /* *INDENT-ON* */ + + return 0; +} + +/** + * Send an API message to the external app, to map new segment + */ +int +app_worker_add_segment_notify (u32 app_wrk_index, u64 segment_handle) +{ + app_worker_t *app_wrk = app_worker_get (app_wrk_index); + application_t *app = application_get (app_wrk->app_index); + return app->cb_fns.add_segment_callback (app_wrk->api_client_index, + segment_handle); +} + +u8 +app_worker_application_is_builtin (app_worker_t * app_wrk) +{ + return app_wrk->app_is_builtin; +} + +static inline int +app_enqueue_evt (svm_msg_q_t * mq, svm_msg_q_msg_t * msg, u8 lock) +{ + if (PREDICT_FALSE (svm_msg_q_is_full (mq))) + { + clib_warning ("evt q full"); + svm_msg_q_free_msg (mq, msg); + if (lock) + svm_msg_q_unlock (mq); + return -1; + } + + if (lock) + { + svm_msg_q_add_and_unlock (mq, msg); + return 0; + } + + /* Even when not locking the ring, we must wait for queue mutex */ + if (svm_msg_q_add (mq, msg, SVM_Q_WAIT)) + { + clib_warning ("msg q add returned"); + return -1; + } + return 0; +} + +static inline int +app_send_io_evt_rx (app_worker_t * app_wrk, session_t * s, u8 lock) +{ + session_event_t *evt; + svm_msg_q_msg_t msg; + svm_msg_q_t *mq; + + if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY + && s->session_state != SESSION_STATE_LISTENING)) + { + /* Session is closed so app will never clean up. Flush rx fifo */ + if (s->session_state == SESSION_STATE_CLOSED) + svm_fifo_dequeue_drop_all (s->rx_fifo); + return 0; + } + + if (app_worker_application_is_builtin (app_wrk)) + { + application_t *app = application_get (app_wrk->app_index); + return app->cb_fns.builtin_app_rx_callback (s); + } + + if (svm_fifo_has_event (s->rx_fifo) || svm_fifo_is_empty (s->rx_fifo)) + return 0; + + mq = app_wrk->event_queue; + if (lock) + svm_msg_q_lock (mq); + + if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) + { + clib_warning ("evt q rings full"); + if (lock) + svm_msg_q_unlock (mq); + return -1; + } + + msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); + ASSERT (!svm_msg_q_msg_is_invalid (&msg)); + + evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); + evt->fifo = s->rx_fifo; + evt->event_type = FIFO_EVENT_APP_RX; + + (void) svm_fifo_set_event (s->rx_fifo); + + if (app_enqueue_evt (mq, &msg, lock)) + return -1; + return 0; +} + +static inline int +app_send_io_evt_tx (app_worker_t * app_wrk, session_t * s, u8 lock) +{ + svm_msg_q_t *mq; + session_event_t *evt; + svm_msg_q_msg_t msg; + + if (app_worker_application_is_builtin (app_wrk)) + return 0; + + mq = app_wrk->event_queue; + if (lock) + svm_msg_q_lock (mq); + + if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) + { + clib_warning ("evt q rings full"); + if (lock) + svm_msg_q_unlock (mq); + return -1; + } + + msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); + ASSERT (!svm_msg_q_msg_is_invalid (&msg)); + + evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); + evt->event_type = FIFO_EVENT_APP_TX; + evt->fifo = s->tx_fifo; + + return app_enqueue_evt (mq, &msg, lock); +} + +/* *INDENT-OFF* */ +typedef int (app_send_evt_handler_fn) (app_worker_t *app, + session_t *s, + u8 lock); +static app_send_evt_handler_fn * const app_send_evt_handler_fns[3] = { + app_send_io_evt_rx, + 0, + app_send_io_evt_tx, +}; +/* *INDENT-ON* */ + +/** + * Send event to application + * + * Logic from queue perspective is non-blocking. If there's + * not enough space to enqueue a message, we return. + */ +int +app_worker_send_event (app_worker_t * app, session_t * s, u8 evt_type) +{ + ASSERT (app && evt_type <= FIFO_EVENT_APP_TX); + return app_send_evt_handler_fns[evt_type] (app, s, 0 /* lock */ ); +} + +/** + * Send event to application + * + * Logic from queue perspective is blocking. However, if queue is full, + * we return. + */ +int +app_worker_lock_and_send_event (app_worker_t * app, session_t * s, + u8 evt_type) +{ + return app_send_evt_handler_fns[evt_type] (app, s, 1 /* lock */ ); +} + +segment_manager_t * +app_worker_get_local_segment_manager (app_worker_t * app_worker) +{ + return segment_manager_get (app_worker->local_segment_manager); +} + +segment_manager_t * +app_worker_get_local_segment_manager_w_session (app_worker_t * app_wrk, + local_session_t * ls) +{ + session_t *listener; + if (application_local_session_listener_has_transport (ls)) + { + listener = listen_session_get (ls->listener_index); + return app_worker_get_listen_segment_manager (app_wrk, listener); + } + return segment_manager_get (app_wrk->local_segment_manager); +} + +int +app_worker_local_session_cleanup (app_worker_t * client_wrk, + app_worker_t * server_wrk, + local_session_t * ls) +{ + svm_fifo_segment_private_t *seg; + session_t *listener; + segment_manager_t *sm; + u64 client_key; + u8 has_transport; + + /* Retrieve listener transport type as it is the one that decides where + * the fifos are allocated */ + has_transport = application_local_session_listener_has_transport (ls); + if (!has_transport) + sm = app_worker_get_local_segment_manager_w_session (server_wrk, ls); + else + { + listener = listen_session_get (ls->listener_index); + sm = app_worker_get_listen_segment_manager (server_wrk, listener); + } + + seg = segment_manager_get_segment (sm, ls->svm_segment_index); + if (client_wrk) + { + client_key = application_client_local_connect_key (ls); + hash_unset (client_wrk->local_connects, client_key); + } + + if (!has_transport) + { + application_t *server = application_get (server_wrk->app_index); + u64 segment_handle = segment_manager_segment_handle (sm, seg); + server->cb_fns.del_segment_callback (server_wrk->api_client_index, + segment_handle); + if (client_wrk) + { + application_t *client = application_get (client_wrk->app_index); + client->cb_fns.del_segment_callback (client_wrk->api_client_index, + segment_handle); + } + segment_manager_del_segment (sm, seg); + } + + app_worker_local_session_free (server_wrk, ls); + + return 0; +} + +static void +application_local_session_fix_eventds (svm_msg_q_t * sq, svm_msg_q_t * cq) +{ + int fd; + + /* + * segment manager initializes only the producer eventds, since vpp is + * typically the producer. But for local sessions, we also pass to the + * apps the mqs they listen on for events from peer apps, so they are also + * consumer fds. + */ + fd = svm_msg_q_get_producer_eventfd (sq); + svm_msg_q_set_consumer_eventfd (sq, fd); + fd = svm_msg_q_get_producer_eventfd (cq); + svm_msg_q_set_consumer_eventfd (cq, fd); +} + +int +app_worker_local_session_connect (app_worker_t * client_wrk, + app_worker_t * server_wrk, + local_session_t * ll, u32 opaque) +{ + u32 seg_size, evt_q_sz, evt_q_elts, margin = 16 << 10; + u32 round_rx_fifo_sz, round_tx_fifo_sz, sm_index; + segment_manager_properties_t *props, *cprops; + int rv, has_transport, seg_index; + svm_fifo_segment_private_t *seg; + application_t *server, *client; + segment_manager_t *sm; + local_session_t *ls; + svm_msg_q_t *sq, *cq; + u64 segment_handle; + + ls = app_worker_local_session_alloc (server_wrk); + server = application_get (server_wrk->app_index); + client = application_get (client_wrk->app_index); + + props = application_segment_manager_properties (server); + cprops = application_segment_manager_properties (client); + evt_q_elts = props->evt_q_size + cprops->evt_q_size; + evt_q_sz = segment_manager_evt_q_expected_size (evt_q_elts); + round_rx_fifo_sz = 1 << max_log2 (props->rx_fifo_size); + round_tx_fifo_sz = 1 << max_log2 (props->tx_fifo_size); + seg_size = round_rx_fifo_sz + round_tx_fifo_sz + evt_q_sz + margin; + + has_transport = session_has_transport ((session_t *) ll); + if (!has_transport) + { + /* Local sessions don't have backing transport */ + ls->port = ll->port; + sm = app_worker_get_local_segment_manager (server_wrk); + } + else + { + session_t *sl = (session_t *) ll; + transport_connection_t *tc; + tc = listen_session_get_transport (sl); + ls->port = tc->lcl_port; + sm = app_worker_get_listen_segment_manager (server_wrk, sl); + } + + seg_index = segment_manager_add_segment (sm, seg_size); + if (seg_index < 0) + { + clib_warning ("failed to add new cut-through segment"); + return seg_index; + } + seg = segment_manager_get_segment_w_lock (sm, seg_index); + sq = segment_manager_alloc_queue (seg, props); + cq = segment_manager_alloc_queue (seg, cprops); + + if (props->use_mq_eventfd) + application_local_session_fix_eventds (sq, cq); + + ls->server_evt_q = pointer_to_uword (sq); + ls->client_evt_q = pointer_to_uword (cq); + rv = segment_manager_try_alloc_fifos (seg, props->rx_fifo_size, + props->tx_fifo_size, + &ls->rx_fifo, &ls->tx_fifo); + if (rv) + { + clib_warning ("failed to add fifos in cut-through segment"); + segment_manager_segment_reader_unlock (sm); + goto failed; + } + sm_index = segment_manager_index (sm); + ls->rx_fifo->ct_session_index = ls->session_index; + ls->tx_fifo->ct_session_index = ls->session_index; + ls->rx_fifo->segment_manager = sm_index; + ls->tx_fifo->segment_manager = sm_index; + ls->rx_fifo->segment_index = seg_index; + ls->tx_fifo->segment_index = seg_index; + ls->svm_segment_index = seg_index; + ls->listener_index = ll->session_index; + ls->client_wrk_index = client_wrk->wrk_index; + ls->client_opaque = opaque; + ls->listener_session_type = ll->session_type; + ls->session_state = SESSION_STATE_READY; + + segment_handle = segment_manager_segment_handle (sm, seg); + if ((rv = server->cb_fns.add_segment_callback (server_wrk->api_client_index, + segment_handle))) + { + clib_warning ("failed to notify server of new segment"); + segment_manager_segment_reader_unlock (sm); + goto failed; + } + segment_manager_segment_reader_unlock (sm); + if ((rv = server->cb_fns.session_accept_callback ((session_t *) ls))) + { + clib_warning ("failed to send accept cut-through notify to server"); + goto failed; + } + if (server->flags & APP_OPTIONS_FLAGS_IS_BUILTIN) + app_worker_local_session_connect_notify (ls); + + return 0; + +failed: + if (!has_transport) + segment_manager_del_segment (sm, seg); + return rv; +} + +int +app_worker_local_session_connect_notify (local_session_t * ls) +{ + svm_fifo_segment_private_t *seg; + app_worker_t *client_wrk, *server_wrk; + segment_manager_t *sm; + application_t *client; + int rv, is_fail = 0; + u64 segment_handle; + u64 client_key; + + client_wrk = app_worker_get (ls->client_wrk_index); + server_wrk = app_worker_get (ls->app_wrk_index); + client = application_get (client_wrk->app_index); + + sm = app_worker_get_local_segment_manager_w_session (server_wrk, ls); + seg = segment_manager_get_segment_w_lock (sm, ls->svm_segment_index); + segment_handle = segment_manager_segment_handle (sm, seg); + if ((rv = client->cb_fns.add_segment_callback (client_wrk->api_client_index, + segment_handle))) + { + clib_warning ("failed to notify client %u of new segment", + ls->client_wrk_index); + segment_manager_segment_reader_unlock (sm); + app_worker_local_session_disconnect (ls->client_wrk_index, ls); + is_fail = 1; + } + else + { + segment_manager_segment_reader_unlock (sm); + } + + client->cb_fns.session_connected_callback (client_wrk->wrk_index, + ls->client_opaque, + (session_t *) ls, is_fail); + + client_key = application_client_local_connect_key (ls); + hash_set (client_wrk->local_connects, client_key, client_key); + return 0; +} + +int +app_worker_local_session_disconnect (u32 app_index, local_session_t * ls) +{ + app_worker_t *client_wrk, *server_wrk; + u8 is_server = 0, is_client = 0; + application_t *app; + + app = application_get_if_valid (app_index); + if (!app) + return 0; + + client_wrk = app_worker_get_if_valid (ls->client_wrk_index); + server_wrk = app_worker_get (ls->app_wrk_index); + + if (server_wrk->app_index == app_index) + is_server = 1; + else if (client_wrk && client_wrk->app_index == app_index) + is_client = 1; + + if (!is_server && !is_client) + { + clib_warning ("app %u is neither client nor server for session 0x%lx", + app_index, application_local_session_handle (ls)); + return VNET_API_ERROR_INVALID_VALUE; + } + + if (ls->session_state == SESSION_STATE_CLOSED) + return app_worker_local_session_cleanup (client_wrk, server_wrk, ls); + + if (app_index == ls->client_wrk_index) + { + mq_send_local_session_disconnected_cb (ls->app_wrk_index, ls); + } + else + { + if (!client_wrk) + { + return app_worker_local_session_cleanup (client_wrk, server_wrk, + ls); + } + else if (ls->session_state < SESSION_STATE_READY) + { + application_t *client = application_get (client_wrk->app_index); + client->cb_fns.session_connected_callback (client_wrk->wrk_index, + ls->client_opaque, + (session_t *) ls, + 1 /* is_fail */ ); + ls->session_state = SESSION_STATE_CLOSED; + return app_worker_local_session_cleanup (client_wrk, server_wrk, + ls); + } + else + { + mq_send_local_session_disconnected_cb (client_wrk->wrk_index, ls); + } + } + + ls->session_state = SESSION_STATE_CLOSED; + + return 0; +} + +int +app_worker_local_session_disconnect_w_index (u32 app_wrk_index, u32 ls_index) +{ + app_worker_t *app_wrk; + local_session_t *ls; + app_wrk = app_worker_get (app_wrk_index); + ls = app_worker_get_local_session (app_wrk, ls_index); + return app_worker_local_session_disconnect (app_wrk_index, ls); +} + +u8 * +format_app_worker_listener (u8 * s, va_list * args) +{ + app_worker_t *app_wrk = va_arg (*args, app_worker_t *); + u64 handle = va_arg (*args, u64); + u32 sm_index = va_arg (*args, u32); + int verbose = va_arg (*args, int); + session_t *listener; + const u8 *app_name; + u8 *str; + + if (!app_wrk) + { + if (verbose) + s = format (s, "%-40s%-25s%=10s%-15s%-15s%-10s", "Connection", "App", + "Wrk", "API Client", "ListenerID", "SegManager"); + else + s = format (s, "%-40s%-25s%=10s", "Connection", "App", "Wrk"); + + return s; + } + + app_name = application_name_from_index (app_wrk->app_index); + listener = listen_session_get_from_handle (handle); + str = format (0, "%U", format_stream_session, listener, verbose); + + if (verbose) + { + char buf[32]; + sprintf (buf, "%u(%u)", app_wrk->wrk_map_index, app_wrk->wrk_index); + s = format (s, "%-40s%-25s%=10s%-15u%-15u%-10u", str, app_name, + buf, app_wrk->api_client_index, handle, sm_index); + } + else + s = format (s, "%-40s%-25s%=10u", str, app_name, app_wrk->wrk_map_index); + + return s; +} + +u8 * +format_app_worker (u8 * s, va_list * args) +{ + app_worker_t *app_wrk = va_arg (*args, app_worker_t *); + u32 indent = 1; + + s = format (s, "%U wrk-index %u app-index %u map-index %u " + "api-client-index %d\n", format_white_space, indent, + app_wrk->wrk_index, app_wrk->app_index, app_wrk->wrk_map_index, + app_wrk->api_client_index); + return s; +} + +void +app_worker_format_connects (app_worker_t * app_wrk, int verbose) +{ + svm_fifo_segment_private_t *fifo_segment; + vlib_main_t *vm = vlib_get_main (); + segment_manager_t *sm; + const u8 *app_name; + u8 *s = 0; + + /* Header */ + if (!app_wrk) + { + if (verbose) + vlib_cli_output (vm, "%-40s%-20s%-15s%-10s", "Connection", "App", + "API Client", "SegManager"); + else + vlib_cli_output (vm, "%-40s%-20s", "Connection", "App"); + return; + } + + if (app_wrk->connects_seg_manager == (u32) ~ 0) + return; + + app_name = application_name_from_index (app_wrk->app_index); + + /* Across all fifo segments */ + sm = segment_manager_get (app_wrk->connects_seg_manager); + + /* *INDENT-OFF* */ + segment_manager_foreach_segment_w_lock (fifo_segment, sm, ({ + svm_fifo_t *fifo; + u8 *str; + + fifo = svm_fifo_segment_get_fifo_list (fifo_segment); + while (fifo) + { + u32 session_index, thread_index; + session_t *session; + + session_index = fifo->master_session_index; + thread_index = fifo->master_thread_index; + + session = session_get (session_index, thread_index); + str = format (0, "%U", format_stream_session, session, verbose); + + if (verbose) + s = format (s, "%-40s%-20s%-15u%-10u", str, app_name, + app_wrk->api_client_index, app_wrk->connects_seg_manager); + else + s = format (s, "%-40s%-20s", str, app_name); + + vlib_cli_output (vm, "%v", s); + vec_reset_length (s); + vec_free (str); + + fifo = fifo->next; + } + vec_free (s); + })); + /* *INDENT-ON* */ +} + +void +app_worker_format_local_sessions (app_worker_t * app_wrk, int verbose) +{ + vlib_main_t *vm = vlib_get_main (); + app_worker_t *client_wrk; + local_session_t *ls; + transport_proto_t tp; + u8 *conn = 0; + + /* Header */ + if (app_wrk == 0) + { + vlib_cli_output (vm, "%-40s%-15s%-20s", "Connection", "ServerApp", + "ClientApp"); + return; + } + + if (!pool_elts (app_wrk->local_sessions) + && !pool_elts (app_wrk->local_connects)) + return; + + /* *INDENT-OFF* */ + pool_foreach (ls, app_wrk->local_sessions, ({ + tp = session_type_transport_proto(ls->listener_session_type); + conn = format (0, "[L][%U] *:%u", format_transport_proto_short, tp, + ls->port); + client_wrk = app_worker_get (ls->client_wrk_index); + vlib_cli_output (vm, "%-40v%-15u%-20u", conn, ls->app_index, + client_wrk->app_index); + vec_reset_length (conn); + })); + /* *INDENT-ON* */ + + vec_free (conn); +} + +void +app_worker_format_local_connects (app_worker_t * app, int verbose) +{ + vlib_main_t *vm = vlib_get_main (); + u32 app_wrk_index, session_index; + app_worker_t *server_wrk; + local_session_t *ls; + u64 client_key; + u64 value; + + /* Header */ + if (app == 0) + { + if (verbose) + vlib_cli_output (vm, "%-40s%-15s%-20s%-10s", "Connection", "App", + "Peer App", "SegManager"); + else + vlib_cli_output (vm, "%-40s%-15s%-20s", "Connection", "App", + "Peer App"); + return; + } + + if (!app->local_connects) + return; + + /* *INDENT-OFF* */ + hash_foreach (client_key, value, app->local_connects, ({ + application_client_local_connect_key_parse (client_key, &app_wrk_index, + &session_index); + server_wrk = app_worker_get (app_wrk_index); + ls = app_worker_get_local_session (server_wrk, session_index); + vlib_cli_output (vm, "%-40s%-15s%-20s", "TODO", ls->app_wrk_index, + ls->client_wrk_index); + })); + /* *INDENT-ON* */ +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/mma_16.h b/src/vnet/session/mma_16.h index 3e2e84d4a8bd..3bd712d05b86 100644 --- a/src/vnet/session/mma_16.h +++ b/src/vnet/session/mma_16.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: diff --git a/src/vnet/session/mma_40.h b/src/vnet/session/mma_40.h index 773b7f08cca2..2857fd445fe7 100644 --- a/src/vnet/session/mma_40.h +++ b/src/vnet/session/mma_40.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: diff --git a/src/vnet/session/mma_template.c b/src/vnet/session/mma_template.c index 9dd7c76cb066..e66f2919720c 100644 --- a/src/vnet/session/mma_template.c +++ b/src/vnet/session/mma_template.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: diff --git a/src/vnet/session/mma_template.h b/src/vnet/session/mma_template.h index fd5e3ea57176..dc3545a4ffef 100644 --- a/src/vnet/session/mma_template.h +++ b/src/vnet/session/mma_template.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c index e48e951722d2..638f078e20da 100644 --- a/src/vnet/session/segment_manager.c +++ b/src/vnet/session/segment_manager.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -379,7 +379,7 @@ void segment_manager_del_sessions (segment_manager_t * sm) { svm_fifo_segment_private_t *fifo_segment; - stream_session_t *session; + session_t *session; svm_fifo_t *fifo; ASSERT (pool_elts (sm->segments) != 0); @@ -399,7 +399,7 @@ segment_manager_del_sessions (segment_manager_t * sm) if (fifo->ct_session_index != SVM_FIFO_INVALID_SESSION_INDEX) { svm_fifo_t *next = fifo->next; - application_local_session_disconnect_w_index (sm->app_wrk_index, + app_worker_local_session_disconnect_w_index (sm->app_wrk_index, fifo->ct_session_index); fifo = next; continue; @@ -599,6 +599,9 @@ segment_manager_dealloc_fifos (u32 segment_index, svm_fifo_t * rx_fifo, svm_fifo_segment_private_t *fifo_segment; segment_manager_t *sm; + if (!rx_fifo || !tx_fifo) + return; + /* It's possible to have no segment manager if the session was removed * as result of a detach. */ if (!(sm = segment_manager_get_if_valid (rx_fifo->segment_manager))) diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h index 657a1fcc2319..15fd067053f5 100644 --- a/src/vnet/session/segment_manager.h +++ b/src/vnet/session/segment_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index c56712bbf873..a94a0c408824 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -20,12 +20,10 @@ #include #include #include -#include #include #include session_manager_main_t session_manager_main; -extern transport_proto_vft_t *tp_vfts; static inline int session_send_evt_to_thread (void *data, void *args, u32 thread_index, @@ -72,7 +70,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index, break; case FIFO_EVENT_BUILTIN_TX: case FIFO_EVENT_DISCONNECT: - evt->session_handle = session_handle ((stream_session_t *) data); + evt->session_handle = session_handle ((session_t *) data); break; default: clib_warning ("evt unhandled!"); @@ -98,8 +96,7 @@ session_send_io_evt_to_thread_custom (void *data, u32 thread_index, } int -session_send_ctrl_evt_to_thread (stream_session_t * s, - session_evt_type_t evt_type) +session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type) { /* only event supported for now is disconnect */ ASSERT (evt_type == FIFO_EVENT_DISCONNECT); @@ -120,7 +117,7 @@ session_send_rpc_evt_to_thread (u32 thread_index, void *fp, void *rpc_args) } static void -session_program_transport_close (stream_session_t * s) +session_program_transport_close (session_t * s) { u32 thread_index = vlib_get_thread_index (); session_manager_worker_t *wrk; @@ -140,11 +137,11 @@ session_program_transport_close (stream_session_t * s) session_send_ctrl_evt_to_thread (s, FIFO_EVENT_DISCONNECT); } -stream_session_t * +session_t * session_alloc (u32 thread_index) { session_manager_worker_t *wrk = &session_manager_main.wrk[thread_index]; - stream_session_t *s; + session_t *s; u8 will_expand = 0; pool_get_aligned_will_expand (wrk->sessions, will_expand, CLIB_CACHE_LINE_BYTES); @@ -166,7 +163,7 @@ session_alloc (u32 thread_index) } void -session_free (stream_session_t * s) +session_free (session_t * s) { pool_put (session_manager_main.wrk[s->thread_index].sessions, s); if (CLIB_DEBUG) @@ -174,10 +171,10 @@ session_free (stream_session_t * s) } void -session_free_w_fifos (stream_session_t * s) +session_free_w_fifos (session_t * s) { - segment_manager_dealloc_fifos (s->svm_segment_index, s->server_rx_fifo, - s->server_tx_fifo); + segment_manager_dealloc_fifos (s->svm_segment_index, s->rx_fifo, + s->tx_fifo); session_free (s); } @@ -187,7 +184,7 @@ session_free_w_fifos (stream_session_t * s) * Transport connection must still be valid. */ static void -session_delete (stream_session_t * s) +session_delete (session_t * s) { int rv; @@ -199,7 +196,7 @@ session_delete (stream_session_t * s) } int -session_alloc_fifos (segment_manager_t * sm, stream_session_t * s) +session_alloc_fifos (segment_manager_t * sm, session_t * s) { svm_fifo_t *server_rx_fifo = 0, *server_tx_fifo = 0; u32 fifo_segment_index; @@ -216,16 +213,16 @@ session_alloc_fifos (segment_manager_t * sm, stream_session_t * s) server_tx_fifo->master_session_index = s->session_index; server_tx_fifo->master_thread_index = s->thread_index; - s->server_rx_fifo = server_rx_fifo; - s->server_tx_fifo = server_tx_fifo; + s->rx_fifo = server_rx_fifo; + s->tx_fifo = server_tx_fifo; s->svm_segment_index = fifo_segment_index; return 0; } -static stream_session_t * +static session_t * session_alloc_for_connection (transport_connection_t * tc) { - stream_session_t *s; + session_t *s; u32 thread_index = tc->thread_index; ASSERT (thread_index == vlib_get_thread_index () @@ -244,9 +241,9 @@ session_alloc_for_connection (transport_connection_t * tc) static int session_alloc_and_init (segment_manager_t * sm, transport_connection_t * tc, - u8 alloc_fifos, stream_session_t ** ret_s) + u8 alloc_fifos, session_t ** ret_s) { - stream_session_t *s; + session_t *s; int rv; s = session_alloc_for_connection (tc); @@ -301,7 +298,7 @@ session_enqueue_discard_chain_bytes (vlib_main_t * vm, vlib_buffer_t * b, * Enqueue buffer chain tail */ always_inline int -session_enqueue_chain_tail (stream_session_t * s, vlib_buffer_t * b, +session_enqueue_chain_tail (session_t * s, vlib_buffer_t * b, u32 offset, u8 is_in_order) { vlib_buffer_t *chain_b; @@ -332,7 +329,7 @@ session_enqueue_chain_tail (stream_session_t * s, vlib_buffer_t * b, continue; if (is_in_order) { - rv = svm_fifo_enqueue_nowait (s->server_rx_fifo, len, data); + rv = svm_fifo_enqueue_nowait (s->rx_fifo, len, data); if (rv == len) { written += rv; @@ -355,8 +352,7 @@ session_enqueue_chain_tail (stream_session_t * s, vlib_buffer_t * b, } else { - rv = svm_fifo_enqueue_with_offset (s->server_rx_fifo, offset, len, - data); + rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, len, data); if (rv) { clib_warning ("failed to enqueue multi-buffer seg"); @@ -393,14 +389,14 @@ session_enqueue_stream_connection (transport_connection_t * tc, vlib_buffer_t * b, u32 offset, u8 queue_event, u8 is_in_order) { - stream_session_t *s; + session_t *s; int enqueued = 0, rv, in_order_off; s = session_get (tc->s_index, tc->thread_index); if (is_in_order) { - enqueued = svm_fifo_enqueue_nowait (s->server_rx_fifo, + enqueued = svm_fifo_enqueue_nowait (s->rx_fifo, b->current_length, vlib_buffer_get_current (b)); if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) @@ -414,7 +410,7 @@ session_enqueue_stream_connection (transport_connection_t * tc, } else { - rv = svm_fifo_enqueue_with_offset (s->server_rx_fifo, offset, + rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, b->current_length, vlib_buffer_get_current (b)); if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv)) @@ -442,18 +438,18 @@ session_enqueue_stream_connection (transport_connection_t * tc, } int -session_enqueue_dgram_connection (stream_session_t * s, +session_enqueue_dgram_connection (session_t * s, session_dgram_hdr_t * hdr, vlib_buffer_t * b, u8 proto, u8 queue_event) { int enqueued = 0, rv, in_order_off; - ASSERT (svm_fifo_max_enqueue (s->server_rx_fifo) + ASSERT (svm_fifo_max_enqueue (s->rx_fifo) >= b->current_length + sizeof (*hdr)); - svm_fifo_enqueue_nowait (s->server_rx_fifo, sizeof (session_dgram_hdr_t), + svm_fifo_enqueue_nowait (s->rx_fifo, sizeof (session_dgram_hdr_t), (u8 *) hdr); - enqueued = svm_fifo_enqueue_nowait (s->server_rx_fifo, b->current_length, + enqueued = svm_fifo_enqueue_nowait (s->rx_fifo, b->current_length, vlib_buffer_get_current (b)); if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && enqueued >= 0)) { @@ -483,12 +479,12 @@ u8 stream_session_no_space (transport_connection_t * tc, u32 thread_index, u16 data_len) { - stream_session_t *s = session_get (tc->s_index, thread_index); + session_t *s = session_get (tc->s_index, thread_index); if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY)) return 1; - if (data_len > svm_fifo_max_enqueue (s->server_rx_fifo)) + if (data_len > svm_fifo_max_enqueue (s->rx_fifo)) return 1; return 0; @@ -497,25 +493,49 @@ stream_session_no_space (transport_connection_t * tc, u32 thread_index, u32 session_tx_fifo_max_dequeue (transport_connection_t * tc) { - stream_session_t *s = session_get (tc->s_index, tc->thread_index); - if (!s->server_tx_fifo) + session_t *s = session_get (tc->s_index, tc->thread_index); + if (!s->tx_fifo) return 0; - return svm_fifo_max_dequeue (s->server_tx_fifo); + return svm_fifo_max_dequeue (s->tx_fifo); } int stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer, u32 offset, u32 max_bytes) { - stream_session_t *s = session_get (tc->s_index, tc->thread_index); - return svm_fifo_peek (s->server_tx_fifo, offset, max_bytes, buffer); + session_t *s = session_get (tc->s_index, tc->thread_index); + return svm_fifo_peek (s->tx_fifo, offset, max_bytes, buffer); } u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes) { - stream_session_t *s = session_get (tc->s_index, tc->thread_index); - return svm_fifo_dequeue_drop (s->server_tx_fifo, max_bytes); + session_t *s = session_get (tc->s_index, tc->thread_index); + return svm_fifo_dequeue_drop (s->tx_fifo, max_bytes); +} + +static inline int +session_notify_subscribers (u32 app_index, session_t * s, + svm_fifo_t * f, session_evt_type_t evt_type) +{ + app_worker_t *app_wrk; + application_t *app; + int i; + + app = application_get (app_index); + if (!app) + return -1; + + for (i = 0; i < f->n_subscribers; i++) + { + app_wrk = application_get_worker (app, f->subscribers[i]); + if (!app_wrk) + continue; + if (app_worker_lock_and_send_event (app_wrk, s, evt_type)) + return -1; + } + + return 0; } /** @@ -527,12 +547,12 @@ stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes) * @return 0 on success or negative number if failed to send notification. */ static inline int -session_enqueue_notify (stream_session_t * s) +session_enqueue_notify (session_t * s) { - app_worker_t *app; + app_worker_t *app_wrk; - app = app_worker_get_if_valid (s->app_wrk_index); - if (PREDICT_FALSE (!app)) + app_wrk = app_worker_get_if_valid (s->app_wrk_index); + if (PREDICT_FALSE (!app_wrk)) { SESSION_DBG ("invalid s->app_index = %d", s->app_wrk_index); return 0; @@ -541,26 +561,40 @@ session_enqueue_notify (stream_session_t * s) /* *INDENT-OFF* */ SESSION_EVT_DBG(SESSION_EVT_ENQ, s, ({ ed->data[0] = FIFO_EVENT_APP_RX; - ed->data[1] = svm_fifo_max_dequeue (s->server_rx_fifo); + ed->data[1] = svm_fifo_max_dequeue (s->rx_fifo); })); /* *INDENT-ON* */ - return app_worker_lock_and_send_event (app, s, FIFO_EVENT_APP_RX); + if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s, + FIFO_EVENT_APP_RX))) + return -1; + + if (PREDICT_FALSE (svm_fifo_n_subscribers (s->rx_fifo))) + return session_notify_subscribers (app_wrk->app_index, s, + s->rx_fifo, FIFO_EVENT_APP_RX); + + return 0; } int -session_dequeue_notify (stream_session_t * s) +session_dequeue_notify (session_t * s) { - app_worker_t *app; + app_worker_t *app_wrk; - app = app_worker_get_if_valid (s->app_wrk_index); - if (PREDICT_FALSE (!app)) + app_wrk = app_worker_get_if_valid (s->app_wrk_index); + if (PREDICT_FALSE (!app_wrk)) return -1; - if (app_worker_lock_and_send_event (app, s, FIFO_EVENT_APP_TX)) + if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s, + FIFO_EVENT_APP_TX))) return -1; - svm_fifo_clear_tx_ntf (s->server_tx_fifo); + if (PREDICT_FALSE (s->tx_fifo->n_subscribers)) + return session_notify_subscribers (app_wrk->app_index, s, + s->tx_fifo, FIFO_EVENT_APP_TX); + + svm_fifo_clear_tx_ntf (s->tx_fifo); + return 0; } @@ -576,7 +610,7 @@ int session_manager_flush_enqueue_events (u8 transport_proto, u32 thread_index) { session_manager_worker_t *wrk = session_manager_get_worker (thread_index); - stream_session_t *s; + session_t *s; int i, errors = 0; u32 *indices; @@ -620,17 +654,17 @@ void stream_session_init_fifos_pointers (transport_connection_t * tc, u32 rx_pointer, u32 tx_pointer) { - stream_session_t *s; + session_t *s; s = session_get (tc->s_index, tc->thread_index); - svm_fifo_init_pointers (s->server_rx_fifo, rx_pointer); - svm_fifo_init_pointers (s->server_tx_fifo, tx_pointer); + svm_fifo_init_pointers (s->rx_fifo, rx_pointer); + svm_fifo_init_pointers (s->tx_fifo, tx_pointer); } int session_stream_connect_notify (transport_connection_t * tc, u8 is_fail) { u32 opaque = 0, new_ti, new_si; - stream_session_t *new_s = 0; + session_t *new_s = 0; segment_manager_t *sm; app_worker_t *app_wrk; application_t *app; @@ -716,14 +750,13 @@ static void session_switch_pool (void *cb_args) { session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args; - transport_proto_t tp; - stream_session_t *s; + session_t *s; ASSERT (args->thread_index == vlib_get_thread_index ()); s = session_get (args->session_index, args->thread_index); - s->server_tx_fifo->master_session_index = args->new_session_index; - s->server_tx_fifo->master_thread_index = args->new_thread_index; - tp = session_get_transport_proto (s); - tp_vfts[tp].cleanup (s->connection_index, s->thread_index); + s->tx_fifo->master_session_index = args->new_session_index; + s->tx_fifo->master_thread_index = args->new_thread_index; + transport_cleanup (session_get_transport_proto (s), s->connection_index, + s->thread_index); session_free (s); clib_mem_free (cb_args); } @@ -733,10 +766,9 @@ session_switch_pool (void *cb_args) */ int session_dgram_connect_notify (transport_connection_t * tc, - u32 old_thread_index, - stream_session_t ** new_session) + u32 old_thread_index, session_t ** new_session) { - stream_session_t *new_s; + session_t *new_s; session_switch_pool_args_t *rpc_args; /* @@ -744,8 +776,8 @@ session_dgram_connect_notify (transport_connection_t * tc, */ new_s = session_clone_safe (tc->s_index, old_thread_index); new_s->connection_index = tc->c_index; - new_s->server_rx_fifo->master_session_index = new_s->session_index; - new_s->server_rx_fifo->master_thread_index = new_s->thread_index; + new_s->rx_fifo->master_session_index = new_s->session_index; + new_s->rx_fifo->master_thread_index = new_s->thread_index; new_s->session_state = SESSION_STATE_READY; session_lookup_add_connection (tc, session_handle (new_s)); @@ -772,7 +804,7 @@ stream_session_accept_notify (transport_connection_t * tc) { app_worker_t *app_wrk; application_t *app; - stream_session_t *s; + session_t *s; s = session_get (tc->s_index, tc->thread_index); app_wrk = app_worker_get_if_valid (s->app_wrk_index); @@ -795,7 +827,7 @@ session_transport_closing_notify (transport_connection_t * tc) { app_worker_t *app_wrk; application_t *app; - stream_session_t *s; + session_t *s; s = session_get (tc->s_index, tc->thread_index); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) @@ -819,14 +851,14 @@ session_transport_closing_notify (transport_connection_t * tc) void session_transport_delete_notify (transport_connection_t * tc) { - stream_session_t *s; + session_t *s; /* App might've been removed already */ if (!(s = session_get_if_valid (tc->s_index, tc->thread_index))) return; /* Make sure we don't try to send anything more */ - svm_fifo_dequeue_drop_all (s->server_tx_fifo); + svm_fifo_dequeue_drop_all (s->tx_fifo); switch (s->session_state) { @@ -874,7 +906,7 @@ session_transport_delete_notify (transport_connection_t * tc) void session_transport_closed_notify (transport_connection_t * tc) { - stream_session_t *s; + session_t *s; if (!(s = session_get_if_valid (tc->s_index, tc->thread_index))) return; @@ -896,11 +928,11 @@ session_transport_closed_notify (transport_connection_t * tc) void session_transport_reset_notify (transport_connection_t * tc) { - stream_session_t *s; + session_t *s; app_worker_t *app_wrk; application_t *app; s = session_get (tc->s_index, tc->thread_index); - svm_fifo_dequeue_drop_all (s->server_tx_fifo); + svm_fifo_dequeue_drop_all (s->tx_fifo); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return; s->session_state = SESSION_STATE_TRANSPORT_CLOSING; @@ -913,17 +945,17 @@ session_transport_reset_notify (transport_connection_t * tc) * Accept a stream session. Optionally ping the server by callback. */ int -stream_session_accept (transport_connection_t * tc, u32 listener_index, +session_stream_accept (transport_connection_t * tc, u32 listener_index, u8 notify) { - stream_session_t *s, *listener; + session_t *s, *listener; app_worker_t *app_wrk; segment_manager_t *sm; int rv; /* Find the server */ listener = listen_session_get (listener_index); - app_wrk = application_listener_select_worker (listener, 0); + app_wrk = application_listener_select_worker (listener); sm = app_worker_get_listen_segment_manager (app_wrk, listener); if ((rv = session_alloc_and_init (sm, tc, 1, &s))) @@ -949,19 +981,19 @@ session_open_cl (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) transport_endpoint_cfg_t *tep; segment_manager_t *sm; app_worker_t *app_wrk; - stream_session_t *s; + session_t *s; application_t *app; int rv; tep = session_endpoint_to_transport_cfg (rmt); - rv = tp_vfts[rmt->transport_proto].open (tep); + rv = transport_connect (rmt->transport_proto, tep); if (rv < 0) { SESSION_DBG ("Transport failed to open connection."); return VNET_API_ERROR_SESSION_CONNECT; } - tc = tp_vfts[rmt->transport_proto].get_half_open ((u32) rv); + tc = transport_get_half_open (rmt->transport_proto, (u32) rv); /* For dgram type of service, allocate session and fifos now. */ @@ -989,14 +1021,14 @@ session_open_vc (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) int rv; tep = session_endpoint_to_transport_cfg (rmt); - rv = tp_vfts[rmt->transport_proto].open (tep); + rv = transport_connect (rmt->transport_proto, tep); if (rv < 0) { SESSION_DBG ("Transport failed to open connection."); return VNET_API_ERROR_SESSION_CONNECT; } - tc = tp_vfts[rmt->transport_proto].get_half_open ((u32) rv); + tc = transport_get_half_open (rmt->transport_proto, (u32) rv); /* If transport offers a stream service, only allocate session once the * connection has been established. @@ -1024,7 +1056,7 @@ session_open_app (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) sep->app_wrk_index = app_wrk_index; sep->opaque = opaque; - return tp_vfts[rmt->transport_proto].open (tep_cfg); + return transport_connect (rmt->transport_proto, tep_cfg); } typedef int (*session_open_service_fn) (u32, session_endpoint_t *, u32); @@ -1053,7 +1085,8 @@ static session_open_service_fn session_open_srv_fns[TRANSPORT_N_SERVICES] = { int session_open (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) { - transport_service_type_t tst = tp_vfts[rmt->transport_proto].service_type; + transport_service_type_t tst; + tst = transport_protocol_service_type (rmt->transport_proto); return session_open_srv_fns[tst] (app_wrk_index, rmt, opaque); } @@ -1066,7 +1099,7 @@ session_open (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) * @param sep Local endpoint to be listened on. */ int -session_listen (stream_session_t * ls, session_endpoint_cfg_t * sep) +session_listen (session_t * ls, session_endpoint_cfg_t * sep) { transport_connection_t *tc; transport_endpoint_t *tep; @@ -1075,7 +1108,7 @@ session_listen (stream_session_t * ls, session_endpoint_cfg_t * sep) /* Transport bind/listen */ tep = session_endpoint_to_transport (sep); s_index = ls->session_index; - tc_index = tp_vfts[sep->transport_proto].bind (s_index, tep); + tc_index = transport_start_listen (sep->transport_proto, s_index, tep); if (tc_index == (u32) ~ 0) return -1; @@ -1085,8 +1118,8 @@ session_listen (stream_session_t * ls, session_endpoint_cfg_t * sep) ls->connection_index = tc_index; /* Add to the main lookup table after transport was initialized */ - tc = tp_vfts[sep->transport_proto].get_listener (tc_index); - session_lookup_add_connection (tc, s_index); + tc = transport_get_listener (sep->transport_proto, tc_index); + session_lookup_add_connection (tc, listen_session_get_handle (ls)); return 0; } @@ -1096,25 +1129,20 @@ session_listen (stream_session_t * ls, session_endpoint_cfg_t * sep) * @param s Session to stop listening on. It must be in state LISTENING. */ int -session_stop_listen (stream_session_t * s) +session_stop_listen (session_t * s) { transport_proto_t tp = session_get_transport_proto (s); transport_connection_t *tc; + if (s->session_state != SESSION_STATE_LISTENING) - { - clib_warning ("not a listening session"); - return -1; - } + return -1; - tc = tp_vfts[tp].get_listener (s->connection_index); + tc = transport_get_listener (tp, s->connection_index); if (!tc) - { - clib_warning ("no transport"); - return VNET_API_ERROR_ADDRESS_NOT_IN_USE; - } + return VNET_API_ERROR_ADDRESS_NOT_IN_USE; session_lookup_del_connection (tc); - tp_vfts[tp].unbind (s->connection_index); + transport_stop_listen (tp, s->connection_index); return 0; } @@ -1125,7 +1153,7 @@ session_stop_listen (stream_session_t * s) * requests are served before transport is notified. */ void -session_close (stream_session_t * s) +session_close (session_t * s) { if (!s) return; @@ -1139,7 +1167,7 @@ session_close (stream_session_t * s) /* Session already closed. Clear the tx fifo */ if (s->session_state == SESSION_STATE_CLOSED) - svm_fifo_dequeue_drop_all (s->server_tx_fifo); + svm_fifo_dequeue_drop_all (s->tx_fifo); return; } @@ -1155,7 +1183,7 @@ session_close (stream_session_t * s) * Must be called from the session's thread. */ void -session_transport_close (stream_session_t * s) +session_transport_close (session_t * s) { /* If transport is already closed, just free the session */ if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) @@ -1170,13 +1198,13 @@ session_transport_close (stream_session_t * s) * point, either after sending everything or after a timeout, call delete * notify. This will finally lead to the complete cleanup of the session. */ - if (svm_fifo_max_dequeue (s->server_tx_fifo)) + if (svm_fifo_max_dequeue (s->tx_fifo)) s->session_state = SESSION_STATE_CLOSED_WAITING; else s->session_state = SESSION_STATE_CLOSED; - tp_vfts[session_get_transport_proto (s)].close (s->connection_index, - s->thread_index); + transport_close (session_get_transport_proto (s), s->connection_index, + s->thread_index); } /** @@ -1187,41 +1215,19 @@ session_transport_close (stream_session_t * s) * closed. */ void -session_transport_cleanup (stream_session_t * s) +session_transport_cleanup (session_t * s) { s->session_state = SESSION_STATE_CLOSED; /* Delete from main lookup table before we axe the the transport */ session_lookup_del_session (s); - tp_vfts[session_get_transport_proto (s)].cleanup (s->connection_index, - s->thread_index); + transport_cleanup (session_get_transport_proto (s), s->connection_index, + s->thread_index); /* Since we called cleanup, no delete notification will come. So, make * sure the session is properly freed. */ session_free_w_fifos (s); } -transport_service_type_t -session_transport_service_type (stream_session_t * s) -{ - transport_proto_t tp; - tp = session_get_transport_proto (s); - return transport_protocol_service_type (tp); -} - -transport_tx_fn_type_t -session_transport_tx_fn_type (stream_session_t * s) -{ - transport_proto_t tp; - tp = session_get_transport_proto (s); - return transport_protocol_tx_fn_type (tp); -} - -u8 -session_tx_is_dgram (stream_session_t * s) -{ - return (session_transport_tx_fn_type (s) == TRANSPORT_TX_DGRAM); -} - /** * Allocate event queues in the shared-memory segment * @@ -1237,7 +1243,7 @@ session_vpp_event_queues_allocate (session_manager_main_t * smm) u32 evt_q_length = 2048, evt_size = sizeof (session_event_t); ssvm_private_t *eqs = &smm->evt_qs_segment; api_main_t *am = &api_main; - u64 eqs_size = 64 << 20; + uword eqs_size = 64 << 20; pid_t vpp_pid = getpid (); void *oldheap; int i; @@ -1274,7 +1280,7 @@ session_vpp_event_queues_allocate (session_manager_main_t * smm) svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { {evt_q_length, evt_size, 0} , - {evt_q_length << 1, 256, 0} + {evt_q_length >> 1, 256, 0} }; cfg->consumer_pid = 0; cfg->n_rings = 2; @@ -1348,32 +1354,29 @@ session_register_transport (transport_proto_t transport_proto, } transport_connection_t * -session_get_transport (stream_session_t * s) +session_get_transport (session_t * s) { - transport_proto_t tp; if (s->session_state != SESSION_STATE_LISTENING) - { - tp = session_get_transport_proto (s); - return tp_vfts[tp].get_connection (s->connection_index, - s->thread_index); - } - return 0; + return transport_get_connection (session_get_transport_proto (s), + s->connection_index, s->thread_index); + else + return transport_get_listener (session_get_transport_proto (s), + s->connection_index); } transport_connection_t * -listen_session_get_transport (stream_session_t * s) +listen_session_get_transport (session_t * s) { - transport_proto_t tp = session_get_transport_proto (s); - return tp_vfts[tp].get_listener (s->connection_index); + return transport_get_listener (session_get_transport_proto (s), + s->connection_index); } int -listen_session_get_local_session_endpoint (stream_session_t * listener, +listen_session_get_local_session_endpoint (session_t * listener, session_endpoint_t * sep) { - transport_proto_t tp = session_get_transport_proto (listener); transport_connection_t *tc; - tc = tp_vfts[tp].get_listener (listener->connection_index); + tc = listen_session_get_transport (listener); if (!tc) { clib_warning ("no transport"); @@ -1633,6 +1636,9 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) ; else if (unformat (input, "evt_qs_memfd_seg")) smm->evt_qs_use_memfd_seg = 1; + else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size, + &smm->evt_qs_segment_size)) + ; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 2bbc380282cc..721119f00970 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -15,72 +15,13 @@ #ifndef __included_session_h__ #define __included_session_h__ -#include +#include #include -#include #include #include -#include +#include -#define HALF_OPEN_LOOKUP_INVALID_VALUE ((u64)~0) -#define INVALID_INDEX ((u32)~0) #define SESSION_PROXY_LISTENER_INDEX ((u8)~0 - 1) -#define SESSION_LOCAL_HANDLE_PREFIX 0x7FFFFFFF - -/* TODO decide how much since we have pre-data as well */ -#define MAX_HDRS_LEN 100 /* Max number of bytes for headers */ - -typedef enum -{ - FIFO_EVENT_APP_RX, - SESSION_IO_EVT_CT_RX, - FIFO_EVENT_APP_TX, - SESSION_IO_EVT_CT_TX, - SESSION_IO_EVT_TX_FLUSH, - FIFO_EVENT_DISCONNECT, - FIFO_EVENT_BUILTIN_RX, - FIFO_EVENT_BUILTIN_TX, - FIFO_EVENT_RPC, - SESSION_CTRL_EVT_BOUND, - SESSION_CTRL_EVT_ACCEPTED, - SESSION_CTRL_EVT_ACCEPTED_REPLY, - SESSION_CTRL_EVT_CONNECTED, - SESSION_CTRL_EVT_CONNECTED_REPLY, - SESSION_CTRL_EVT_DISCONNECTED, - SESSION_CTRL_EVT_DISCONNECTED_REPLY, - SESSION_CTRL_EVT_RESET, - SESSION_CTRL_EVT_RESET_REPLY, - SESSION_CTRL_EVT_REQ_WORKER_UPDATE, - SESSION_CTRL_EVT_WORKER_UPDATE, - SESSION_CTRL_EVT_WORKER_UPDATE_REPLY, -} session_evt_type_t; - -static inline const char * -fifo_event_type_str (session_evt_type_t et) -{ - switch (et) - { - case FIFO_EVENT_APP_RX: - return "FIFO_EVENT_APP_RX"; - case FIFO_EVENT_APP_TX: - return "FIFO_EVENT_APP_TX"; - case FIFO_EVENT_DISCONNECT: - return "FIFO_EVENT_DISCONNECT"; - case FIFO_EVENT_BUILTIN_RX: - return "FIFO_EVENT_BUILTIN_RX"; - case FIFO_EVENT_RPC: - return "FIFO_EVENT_RPC"; - default: - return "UNKNOWN FIFO EVENT"; - } -} - -typedef enum -{ - SESSION_MQ_IO_EVT_RING, - SESSION_MQ_CTRL_EVT_RING, - SESSION_MQ_N_RINGS -} session_mq_rings_e; #define foreach_session_input_error \ _(NO_SESSION, "No session drops") \ @@ -102,63 +43,10 @@ typedef enum SESSION_N_ERROR, } session_error_t; -typedef struct -{ - void *fp; - void *arg; -} session_rpc_args_t; - -typedef u64 session_handle_t; - -/* *INDENT-OFF* */ -typedef struct -{ - u8 event_type; - u8 postponed; - union - { - svm_fifo_t *fifo; - session_handle_t session_handle; - session_rpc_args_t rpc_args; - struct - { - u8 data[0]; - }; - }; -} __clib_packed session_event_t; -/* *INDENT-ON* */ - -#define SESSION_MSG_NULL { } - -typedef struct session_dgram_pre_hdr_ -{ - u32 data_length; - u32 data_offset; -} session_dgram_pre_hdr_t; - -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct session_dgram_header_ -{ - u32 data_length; - u32 data_offset; - ip46_address_t rmt_ip; - ip46_address_t lcl_ip; - u16 rmt_port; - u16 lcl_port; - u8 is_ip4; -}) session_dgram_hdr_t; -/* *INDENT-ON* */ - -#define SESSION_CONN_ID_LEN 37 -#define SESSION_CONN_HDR_LEN 45 - -STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8), - "session conn id wrong length"); - typedef struct session_tx_context_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - stream_session_t *s; + session_t *s; transport_proto_vft_t *transport_vft; transport_connection_t *tc; vlib_buffer_t *b; @@ -181,7 +69,7 @@ typedef struct session_manager_worker_ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); /** Worker session pool */ - stream_session_t *sessions; + session_t *sessions; /** vpp event message queue for worker */ svm_msg_q_t *vpp_event_queue; @@ -266,7 +154,7 @@ typedef struct session_manager_main_ /** Session ssvm segment configs*/ uword session_baseva; uword session_va_space_size; - u32 evt_qs_segment_size; + uword evt_qs_segment_size; u8 evt_qs_use_memfd_seg; /** Session table size parameters */ @@ -303,25 +191,10 @@ extern vlib_node_registration_t session_queue_process_node; #define SESSION_Q_PROCESS_FLUSH_FRAMES 1 #define SESSION_Q_PROCESS_STOP 2 -/* - * Session manager function - */ -always_inline session_manager_main_t * -vnet_get_session_manager_main () -{ - return &session_manager_main; -} - -always_inline session_manager_worker_t * -session_manager_get_worker (u32 thread_index) -{ - return &session_manager_main.wrk[thread_index]; -} - always_inline u8 stream_session_is_valid (u32 si, u8 thread_index) { - stream_session_t *s; + session_t *s; s = pool_elt_at_index (session_manager_main.wrk[thread_index].sessions, si); if (s->thread_index != thread_index || s->session_index != si /* || s->server_rx_fifo->master_session_index != si @@ -332,12 +205,12 @@ stream_session_is_valid (u32 si, u8 thread_index) return 1; } -stream_session_t *session_alloc (u32 thread_index); -int session_alloc_fifos (segment_manager_t * sm, stream_session_t * s); -void session_free (stream_session_t * s); -void session_free_w_fifos (stream_session_t * s); +session_t *session_alloc (u32 thread_index); +int session_alloc_fifos (segment_manager_t * sm, session_t * s); +void session_free (session_t * s); +void session_free_w_fifos (session_t * s); -always_inline stream_session_t * +always_inline session_t * session_get (u32 si, u32 thread_index) { ASSERT (stream_session_is_valid (si, thread_index)); @@ -345,7 +218,7 @@ session_get (u32 si, u32 thread_index) si); } -always_inline stream_session_t * +always_inline session_t * session_get_if_valid (u64 si, u32 thread_index) { if (thread_index >= vec_len (session_manager_main.wrk)) @@ -360,33 +233,7 @@ session_get_if_valid (u64 si, u32 thread_index) si); } -always_inline session_handle_t -session_handle (stream_session_t * s) -{ - return ((u64) s->thread_index << 32) | (u64) s->session_index; -} - -always_inline u32 -session_index_from_handle (session_handle_t handle) -{ - return handle & 0xFFFFFFFF; -} - -always_inline u32 -session_thread_from_handle (session_handle_t handle) -{ - return handle >> 32; -} - -always_inline void -session_parse_handle (session_handle_t handle, u32 * index, - u32 * thread_index) -{ - *index = session_index_from_handle (handle); - *thread_index = session_thread_from_handle (handle); -} - -always_inline stream_session_t * +always_inline session_t * session_get_from_handle (session_handle_t handle) { session_manager_main_t *smm = &session_manager_main; @@ -395,7 +242,7 @@ session_get_from_handle (session_handle_t handle) return pool_elt_at_index (smm->wrk[thread_index].sessions, session_index); } -always_inline stream_session_t * +always_inline session_t * session_get_from_handle_if_valid (session_handle_t handle) { u32 session_index, thread_index; @@ -403,63 +250,19 @@ session_get_from_handle_if_valid (session_handle_t handle) return session_get_if_valid (session_index, thread_index); } -always_inline u8 -session_handle_is_local (session_handle_t handle) -{ - if ((handle >> 32) == SESSION_LOCAL_HANDLE_PREFIX) - return 1; - return 0; -} - -always_inline transport_proto_t -session_type_transport_proto (session_type_t st) -{ - return (st >> 1); -} - -always_inline u8 -session_type_is_ip4 (session_type_t st) -{ - return (st & 1); -} - -always_inline transport_proto_t -session_get_transport_proto (stream_session_t * s) -{ - return (s->session_type >> 1); -} - -always_inline fib_protocol_t -session_get_fib_proto (stream_session_t * s) +always_inline u64 +session_segment_handle (session_t * s) { - u8 is_ip4 = s->session_type & 1; - return (is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); -} + svm_fifo_t *f; -always_inline session_type_t -session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4) -{ - return (proto << 1 | is_ip4); -} + if (s->session_state == SESSION_STATE_LISTENING) + return SESSION_INVALID_HANDLE; -always_inline u64 -session_segment_handle (stream_session_t * s) -{ - svm_fifo_t *f = s->server_rx_fifo; + f = s->rx_fifo; return segment_manager_make_segment_handle (f->segment_manager, f->segment_index); } -always_inline u8 -session_has_transport (stream_session_t * s) -{ - return (session_get_transport_proto (s) != TRANSPORT_PROTO_NONE); -} - -transport_service_type_t session_transport_service_type (stream_session_t *); -transport_tx_fn_type_t session_transport_tx_fn_type (stream_session_t *); -u8 session_tx_is_dgram (stream_session_t * s); - /** * Acquires a lock that blocks a session pool from expanding. * @@ -493,7 +296,7 @@ session_pool_remove_peeker (u32 thread_index) * * Caller should drop the peek 'lock' as soon as possible. */ -always_inline stream_session_t * +always_inline session_t * session_get_from_handle_safe (u64 handle) { u32 thread_index = session_thread_from_handle (handle); @@ -513,62 +316,15 @@ session_get_from_handle_safe (u64 handle) } always_inline u32 -transport_max_rx_enqueue (transport_connection_t * tc) -{ - stream_session_t *s = session_get (tc->s_index, tc->thread_index); - return svm_fifo_max_enqueue (s->server_rx_fifo); -} - -always_inline u32 -transport_max_tx_dequeue (transport_connection_t * tc) -{ - stream_session_t *s = session_get (tc->s_index, tc->thread_index); - return svm_fifo_max_dequeue (s->server_tx_fifo); -} - -always_inline u32 -transport_rx_fifo_size (transport_connection_t * tc) -{ - stream_session_t *s = session_get (tc->s_index, tc->thread_index); - return s->server_rx_fifo->nitems; -} - -always_inline u32 -transport_tx_fifo_size (transport_connection_t * tc) -{ - stream_session_t *s = session_get (tc->s_index, tc->thread_index); - return s->server_tx_fifo->nitems; -} - -always_inline u8 -transport_rx_fifo_has_ooo_data (transport_connection_t * tc) -{ - stream_session_t *s = session_get (tc->c_index, tc->thread_index); - return svm_fifo_has_ooo_data (s->server_rx_fifo); -} - -always_inline f64 -transport_dispatch_period (u32 thread_index) -{ - return session_manager_main.wrk[thread_index].dispatch_period; -} - -always_inline f64 -transport_time_now (u32 thread_index) -{ - return session_manager_main.wrk[thread_index].last_vlib_time; -} - -always_inline u32 -session_get_index (stream_session_t * s) +session_get_index (session_t * s) { return (s - session_manager_main.wrk[s->thread_index].sessions); } -always_inline stream_session_t * +always_inline session_t * session_clone_safe (u32 session_index, u32 thread_index) { - stream_session_t *old_s, *new_s; + session_t *old_s, *new_s; u32 current_thread_index = vlib_get_thread_index (); /* If during the memcpy pool is reallocated AND the memory allocator @@ -586,15 +342,35 @@ session_clone_safe (u32 session_index, u32 thread_index) return new_s; } -transport_connection_t *session_get_transport (stream_session_t * s); +int session_open (u32 app_index, session_endpoint_t * tep, u32 opaque); +int session_listen (session_t * s, session_endpoint_cfg_t * sep); +int session_stop_listen (session_t * s); +void session_close (session_t * s); +void session_transport_close (session_t * s); +void session_transport_cleanup (session_t * s); +int session_send_io_evt_to_thread (svm_fifo_t * f, + session_evt_type_t evt_type); +int session_dequeue_notify (session_t * s); +int session_send_io_evt_to_thread_custom (void *data, u32 thread_index, + session_evt_type_t evt_type); +void session_send_rpc_evt_to_thread (u32 thread_index, void *fp, + void *rpc_args); +transport_connection_t *session_get_transport (session_t * s); -u32 session_tx_fifo_max_dequeue (transport_connection_t * tc); -int -session_enqueue_stream_connection (transport_connection_t * tc, - vlib_buffer_t * b, u32 offset, - u8 queue_event, u8 is_in_order); -int session_enqueue_dgram_connection (stream_session_t * s, +u8 *format_stream_session (u8 * s, va_list * args); +uword unformat_stream_session (unformat_input_t * input, va_list * args); +uword unformat_transport_connection (unformat_input_t * input, + va_list * args); + +/* + * Interface to transport protos + */ + +int session_enqueue_stream_connection (transport_connection_t * tc, + vlib_buffer_t * b, u32 offset, + u8 queue_event, u8 is_in_order); +int session_enqueue_dgram_connection (session_t * s, session_dgram_hdr_t * hdr, vlib_buffer_t * b, u8 proto, u8 queue_event); @@ -605,8 +381,7 @@ u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes); int session_stream_connect_notify (transport_connection_t * tc, u8 is_fail); int session_dgram_connect_notify (transport_connection_t * tc, u32 old_thread_index, - stream_session_t ** new_session); -int session_dequeue_notify (stream_session_t * s); + session_t ** new_session); void stream_session_init_fifos_pointers (transport_connection_t * tc, u32 rx_pointer, u32 tx_pointer); @@ -615,60 +390,81 @@ void session_transport_closing_notify (transport_connection_t * tc); void session_transport_delete_notify (transport_connection_t * tc); void session_transport_closed_notify (transport_connection_t * tc); void session_transport_reset_notify (transport_connection_t * tc); -int stream_session_accept (transport_connection_t * tc, u32 listener_index, +int session_stream_accept (transport_connection_t * tc, u32 listener_index, u8 notify); -int session_open (u32 app_index, session_endpoint_t * tep, u32 opaque); -int session_listen (stream_session_t * s, session_endpoint_cfg_t * sep); -int session_stop_listen (stream_session_t * s); -void session_close (stream_session_t * s); -void session_transport_close (stream_session_t * s); -void session_transport_cleanup (stream_session_t * s); -int session_send_io_evt_to_thread (svm_fifo_t * f, - session_evt_type_t evt_type); -int session_send_io_evt_to_thread_custom (void *data, u32 thread_index, - session_evt_type_t evt_type); -void session_send_rpc_evt_to_thread (u32 thread_index, void *fp, - void *rpc_args); - -ssvm_private_t *session_manager_get_evt_q_segment (void); - -u8 *format_stream_session (u8 * s, va_list * args); -uword unformat_stream_session (unformat_input_t * input, va_list * args); -uword unformat_transport_connection (unformat_input_t * input, - va_list * args); - +u32 session_tx_fifo_max_dequeue (transport_connection_t * tc); void session_register_transport (transport_proto_t transport_proto, const transport_proto_vft_t * vft, u8 is_ip4, u32 output_node); -always_inline void -transport_add_tx_event (transport_connection_t * tc) +always_inline u32 +transport_max_rx_enqueue (transport_connection_t * tc) { - stream_session_t *s = session_get (tc->s_index, tc->thread_index); - if (svm_fifo_has_event (s->server_tx_fifo)) - return; - session_send_io_evt_to_thread (s->server_tx_fifo, FIFO_EVENT_APP_TX); + session_t *s = session_get (tc->s_index, tc->thread_index); + return svm_fifo_max_enqueue (s->rx_fifo); } -clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en); +always_inline u32 +transport_max_tx_dequeue (transport_connection_t * tc) +{ + session_t *s = session_get (tc->s_index, tc->thread_index); + return svm_fifo_max_dequeue (s->tx_fifo); +} -always_inline svm_msg_q_t * -session_manager_get_vpp_event_queue (u32 thread_index) +always_inline u32 +transport_rx_fifo_size (transport_connection_t * tc) { - return session_manager_main.wrk[thread_index].vpp_event_queue; + session_t *s = session_get (tc->s_index, tc->thread_index); + return s->rx_fifo->nitems; } -int session_manager_flush_enqueue_events (u8 proto, u32 thread_index); -int session_manager_flush_all_enqueue_events (u8 transport_proto); +always_inline u32 +transport_tx_fifo_size (transport_connection_t * tc) +{ + session_t *s = session_get (tc->s_index, tc->thread_index); + return s->tx_fifo->nitems; +} + +always_inline u8 +transport_rx_fifo_has_ooo_data (transport_connection_t * tc) +{ + session_t *s = session_get (tc->c_index, tc->thread_index); + return svm_fifo_has_ooo_data (s->rx_fifo); +} + +always_inline f64 +transport_dispatch_period (u32 thread_index) +{ + return session_manager_main.wrk[thread_index].dispatch_period; +} + +always_inline f64 +transport_time_now (u32 thread_index) +{ + return session_manager_main.wrk[thread_index].last_vlib_time; +} + +always_inline void +transport_add_tx_event (transport_connection_t * tc) +{ + session_t *s = session_get (tc->s_index, tc->thread_index); + if (svm_fifo_has_event (s->tx_fifo)) + return; + session_send_io_evt_to_thread (s->tx_fifo, FIFO_EVENT_APP_TX); +} + +/* + * Listen sessions + */ always_inline u64 -listen_session_get_handle (stream_session_t * s) +listen_session_get_handle (session_t * s) { ASSERT (s->session_state == SESSION_STATE_LISTENING); return session_handle (s); } -always_inline stream_session_t * +always_inline session_t * listen_session_get_from_handle (session_handle_t handle) { return session_get_from_handle (handle); @@ -681,35 +477,55 @@ listen_session_parse_handle (session_handle_t handle, u32 * index, session_parse_handle (handle, index, thread_index); } -always_inline stream_session_t * +always_inline session_t * listen_session_new (u8 thread_index, session_type_t type) { - stream_session_t *s; + session_t *s; s = session_alloc (thread_index); s->session_type = type; s->session_state = SESSION_STATE_LISTENING; return s; } -always_inline stream_session_t * +always_inline session_t * listen_session_get (u32 index) { return session_get (index, 0); } always_inline void -listen_session_del (stream_session_t * s) +listen_session_del (session_t * s) { session_free (s); } -transport_connection_t *listen_session_get_transport (stream_session_t * s); +transport_connection_t *listen_session_get_transport (session_t * s); int -listen_session_get_local_session_endpoint (stream_session_t * listener, +listen_session_get_local_session_endpoint (session_t * listener, session_endpoint_t * sep); -void session_flush_frames_main_thread (vlib_main_t * vm); +/* + * Session manager functions + */ + +always_inline session_manager_main_t * +vnet_get_session_manager_main () +{ + return &session_manager_main; +} + +always_inline session_manager_worker_t * +session_manager_get_worker (u32 thread_index) +{ + return &session_manager_main.wrk[thread_index]; +} + +always_inline svm_msg_q_t * +session_manager_get_vpp_event_queue (u32 thread_index) +{ + return session_manager_main.wrk[thread_index].vpp_event_queue; +} always_inline u8 session_manager_is_enabled () @@ -723,7 +539,12 @@ do { \ return clib_error_return(0, "session layer is not enabled"); \ } while (0) +int session_manager_flush_enqueue_events (u8 proto, u32 thread_index); +int session_manager_flush_all_enqueue_events (u8 transport_proto); +void session_flush_frames_main_thread (vlib_main_t * vm); +ssvm_private_t *session_manager_get_evt_q_segment (void); void session_node_enable_disable (u8 is_en); +clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en); #endif /* __included_session_h__ */ diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index 2d20d5f697b8..015b29f81d2d 100755 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Copyright (c) 2015-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -198,16 +199,16 @@ send_app_cut_through_registration_add (u32 api_client_index, } static int -send_session_accept_callback (stream_session_t * s) +send_session_accept_callback (session_t * s) { app_worker_t *server_wrk = app_worker_get (s->app_wrk_index); - transport_proto_vft_t *tp_vft; vl_api_accept_session_t *mp; vl_api_registration_t *reg; transport_connection_t *tc; - stream_session_t *listener; + session_t *listener; svm_msg_q_t *vpp_queue; application_t *server; + app_listener_t *al; server = application_get (server_wrk->app_index); reg = @@ -223,13 +224,14 @@ send_session_accept_callback (stream_session_t * s) mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SESSION); mp->context = server_wrk->wrk_index; - mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo); - mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo); + mp->server_rx_fifo = pointer_to_uword (s->rx_fifo); + mp->server_tx_fifo = pointer_to_uword (s->tx_fifo); if (session_has_transport (s)) { listener = listen_session_get (s->listener_index); - mp->listener_handle = listen_session_get_handle (listener); + al = app_listener_get (server, listener->al_index); + mp->listener_handle = app_listener_handle (al); if (application_is_proxy (server)) { listener = @@ -241,8 +243,8 @@ send_session_accept_callback (stream_session_t * s) vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); mp->vpp_event_queue_address = pointer_to_uword (vpp_queue); mp->handle = session_handle (s); - tp_vft = transport_protocol_get_vft (session_get_transport_proto (s)); - tc = tp_vft->get_connection (s->connection_index, s->thread_index); + tc = transport_get_connection (session_get_transport_proto (s), + s->connection_index, s->thread_index); mp->port = tc->rmt_port; mp->is_ip4 = tc->is_ip4; clib_memcpy_fast (&mp->ip, &tc->rmt_ip, sizeof (tc->rmt_ip)); @@ -254,22 +256,16 @@ send_session_accept_callback (stream_session_t * s) if (application_local_session_listener_has_transport (ls)) { listener = listen_session_get (ls->listener_index); - mp->listener_handle = listen_session_get_handle (listener); + al = app_listener_get (server, listener->al_index); + mp->listener_handle = app_listener_handle (al); mp->is_ip4 = session_type_is_ip4 (listener->session_type); } else { ll = application_get_local_listen_session (server, ls->listener_index); - if (ll->transport_listener_index != ~0) - { - listener = listen_session_get (ll->transport_listener_index); - mp->listener_handle = listen_session_get_handle (listener); - } - else - { - mp->listener_handle = application_local_session_handle (ll); - } + al = app_listener_get (server, ll->al_index); + mp->listener_handle = app_listener_handle (al); mp->is_ip4 = session_type_is_ip4 (ll->listener_session_type); } mp->handle = application_local_session_handle (ls); @@ -283,7 +279,7 @@ send_session_accept_callback (stream_session_t * s) } static void -send_session_disconnect_callback (stream_session_t * s) +send_session_disconnect_callback (session_t * s) { app_worker_t *app_wrk = app_worker_get (s->app_wrk_index); vl_api_disconnect_session_t *mp; @@ -305,7 +301,7 @@ send_session_disconnect_callback (stream_session_t * s) } static void -send_session_reset_callback (stream_session_t * s) +send_session_reset_callback (session_t * s) { app_worker_t *app_wrk = app_worker_get (s->app_wrk_index); vl_api_registration_t *reg; @@ -327,7 +323,7 @@ send_session_reset_callback (stream_session_t * s) int send_session_connected_callback (u32 app_wrk_index, u32 api_context, - stream_session_t * s, u8 is_fail) + session_t * s, u8 is_fail) { vl_api_connect_session_reply_t *mp; transport_connection_t *tc; @@ -365,8 +361,8 @@ send_session_connected_callback (u32 app_wrk_index, u32 api_context, clib_memcpy_fast (mp->lcl_ip, &tc->lcl_ip, sizeof (tc->lcl_ip)); mp->is_ip4 = tc->is_ip4; mp->lcl_port = tc->lcl_port; - mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo); - mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo); + mp->server_rx_fifo = pointer_to_uword (s->rx_fifo); + mp->server_tx_fifo = pointer_to_uword (s->tx_fifo); } else { @@ -375,8 +371,8 @@ send_session_connected_callback (u32 app_wrk_index, u32 api_context, mp->lcl_port = ls->port; mp->vpp_event_queue_address = ls->server_evt_q; mp->client_event_queue_address = ls->client_evt_q; - mp->server_rx_fifo = pointer_to_uword (s->server_tx_fifo); - mp->server_tx_fifo = pointer_to_uword (s->server_rx_fifo); + mp->server_rx_fifo = pointer_to_uword (s->tx_fifo); + mp->server_tx_fifo = pointer_to_uword (s->rx_fifo); } done: @@ -415,17 +411,17 @@ mq_try_lock_and_alloc_msg (svm_msg_q_t * app_mq, svm_msg_q_msg_t * msg) } static int -mq_send_session_accepted_cb (stream_session_t * s) +mq_send_session_accepted_cb (session_t * s) { app_worker_t *app_wrk = app_worker_get (s->app_wrk_index); svm_msg_q_msg_t _msg, *msg = &_msg; svm_msg_q_t *vpp_queue, *app_mq; - transport_proto_vft_t *tp_vft; transport_connection_t *tc; - stream_session_t *listener; + session_t *listener; session_accepted_msg_t *mp; session_event_t *evt; application_t *app; + app_listener_t *al; app = application_get (app_wrk->app_index); app_mq = app_wrk->event_queue; @@ -436,15 +432,17 @@ mq_send_session_accepted_cb (stream_session_t * s) clib_memset (evt, 0, sizeof (*evt)); evt->event_type = SESSION_CTRL_EVT_ACCEPTED; mp = (session_accepted_msg_t *) evt->data; + clib_memset (mp, 0, sizeof (*mp)); mp->context = app->app_index; - mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo); - mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo); + mp->server_rx_fifo = pointer_to_uword (s->rx_fifo); + mp->server_tx_fifo = pointer_to_uword (s->tx_fifo); mp->segment_handle = session_segment_handle (s); if (session_has_transport (s)) { listener = listen_session_get (s->listener_index); - mp->listener_handle = listen_session_get_handle (listener); + al = app_listener_get (app, listener->al_index); + mp->listener_handle = app_listener_handle (al); if (application_is_proxy (app)) { listener = @@ -456,8 +454,8 @@ mq_send_session_accepted_cb (stream_session_t * s) vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); mp->vpp_event_queue_address = pointer_to_uword (vpp_queue); mp->handle = session_handle (s); - tp_vft = transport_protocol_get_vft (session_get_transport_proto (s)); - tc = tp_vft->get_connection (s->connection_index, s->thread_index); + tc = transport_get_connection (session_get_transport_proto (s), + s->connection_index, s->thread_index); mp->port = tc->rmt_port; mp->is_ip4 = tc->is_ip4; clib_memcpy_fast (&mp->ip, &tc->rmt_ip, sizeof (tc->rmt_ip)); @@ -476,21 +474,15 @@ mq_send_session_accepted_cb (stream_session_t * s) if (application_local_session_listener_has_transport (ls)) { listener = listen_session_get (ls->listener_index); - mp->listener_handle = listen_session_get_handle (listener); + al = app_listener_get (app, listener->al_index); + mp->listener_handle = app_listener_handle (al); mp->is_ip4 = session_type_is_ip4 (listener->session_type); } else { ll = application_get_local_listen_session (app, ls->listener_index); - if (ll->transport_listener_index != ~0) - { - listener = listen_session_get (ll->transport_listener_index); - mp->listener_handle = listen_session_get_handle (listener); - } - else - { - mp->listener_handle = application_local_session_handle (ll); - } + al = app_listener_get (app, ll->al_index); + mp->listener_handle = app_listener_handle (al); mp->is_ip4 = session_type_is_ip4 (ll->listener_session_type); } mp->handle = application_local_session_handle (ls); @@ -505,10 +497,10 @@ mq_send_session_accepted_cb (stream_session_t * s) return 0; } -static void -mq_send_session_disconnected_cb (stream_session_t * s) +static inline void +mq_send_session_close_evt (app_worker_t * app_wrk, session_handle_t sh, + session_evt_type_t evt_type) { - app_worker_t *app_wrk = app_worker_get (s->app_wrk_index); svm_msg_q_msg_t _msg, *msg = &_msg; session_disconnected_msg_t *mp; svm_msg_q_t *app_mq; @@ -519,58 +511,77 @@ mq_send_session_disconnected_cb (stream_session_t * s) return; evt = svm_msg_q_msg_data (app_mq, msg); clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_DISCONNECTED; + evt->event_type = evt_type; mp = (session_disconnected_msg_t *) evt->data; - mp->handle = session_handle (s); + mp->handle = sh; mp->context = app_wrk->api_client_index; svm_msg_q_add_and_unlock (app_mq, msg); } +static inline void +mq_notify_close_subscribers (u32 app_index, session_handle_t sh, + svm_fifo_t * f, session_evt_type_t evt_type) +{ + app_worker_t *app_wrk; + application_t *app; + int i; + + app = application_get (app_index); + if (!app) + return; + + for (i = 0; i < f->n_subscribers; i++) + { + if (!(app_wrk = application_get_worker (app, f->subscribers[i]))) + continue; + mq_send_session_close_evt (app_wrk, sh, SESSION_CTRL_EVT_DISCONNECTED); + } +} + +static void +mq_send_session_disconnected_cb (session_t * s) +{ + app_worker_t *app_wrk = app_worker_get (s->app_wrk_index); + session_handle_t sh = session_handle (s); + + mq_send_session_close_evt (app_wrk, session_handle (s), + SESSION_CTRL_EVT_DISCONNECTED); + + if (svm_fifo_n_subscribers (s->rx_fifo)) + mq_notify_close_subscribers (app_wrk->app_index, sh, s->rx_fifo, + SESSION_CTRL_EVT_DISCONNECTED); +} + void mq_send_local_session_disconnected_cb (u32 app_wrk_index, local_session_t * ls) { app_worker_t *app_wrk = app_worker_get (app_wrk_index); - svm_msg_q_msg_t _msg, *msg = &_msg; - session_disconnected_msg_t *mp; - svm_msg_q_t *app_mq; - session_event_t *evt; + session_handle_t sh = application_local_session_handle (ls); - app_mq = app_wrk->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return; - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_DISCONNECTED; - mp = (session_disconnected_msg_t *) evt->data; - mp->handle = application_local_session_handle (ls); - mp->context = app_wrk->api_client_index; - svm_msg_q_add_and_unlock (app_mq, msg); + mq_send_session_close_evt (app_wrk, sh, SESSION_CTRL_EVT_DISCONNECTED); + + if (svm_fifo_n_subscribers (ls->rx_fifo)) + mq_notify_close_subscribers (app_wrk->app_index, sh, ls->rx_fifo, + SESSION_CTRL_EVT_DISCONNECTED); } static void -mq_send_session_reset_cb (stream_session_t * s) +mq_send_session_reset_cb (session_t * s) { - app_worker_t *app = app_worker_get (s->app_wrk_index); - svm_msg_q_msg_t _msg, *msg = &_msg; - session_reset_msg_t *mp; - svm_msg_q_t *app_mq; - session_event_t *evt; + app_worker_t *app_wrk = app_worker_get (s->app_wrk_index); + session_handle_t sh = session_handle (s); - app_mq = app->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return; - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_RESET; - mp = (session_reset_msg_t *) evt->data; - mp->handle = session_handle (s); - svm_msg_q_add_and_unlock (app_mq, msg); + mq_send_session_close_evt (app_wrk, sh, SESSION_CTRL_EVT_RESET); + + if (svm_fifo_n_subscribers (s->rx_fifo)) + mq_notify_close_subscribers (app_wrk->app_index, sh, s->rx_fifo, + SESSION_CTRL_EVT_RESET); } static int mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, - stream_session_t * s, u8 is_fail) + session_t * s, u8 is_fail) { svm_msg_q_msg_t _msg, *msg = &_msg; session_connected_msg_t *mp; @@ -595,6 +606,7 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, clib_memset (evt, 0, sizeof (*evt)); evt->event_type = SESSION_CTRL_EVT_CONNECTED; mp = (session_connected_msg_t *) evt->data; + clib_memset (mp, 0, sizeof (*mp)); mp->context = api_context; if (is_fail) @@ -617,8 +629,8 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, clib_memcpy_fast (mp->lcl_ip, &tc->lcl_ip, sizeof (tc->lcl_ip)); mp->is_ip4 = tc->is_ip4; mp->lcl_port = tc->lcl_port; - mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo); - mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo); + mp->server_rx_fifo = pointer_to_uword (s->rx_fifo); + mp->server_tx_fifo = pointer_to_uword (s->tx_fifo); } else { @@ -636,8 +648,8 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, mp->vpp_event_queue_address = pointer_to_uword (vpp_mq); mp->client_event_queue_address = ls->client_evt_q; mp->server_event_queue_address = ls->server_evt_q; - mp->server_rx_fifo = pointer_to_uword (s->server_tx_fifo); - mp->server_tx_fifo = pointer_to_uword (s->server_rx_fifo); + mp->server_rx_fifo = pointer_to_uword (s->tx_fifo); + mp->server_tx_fifo = pointer_to_uword (s->rx_fifo); } done: @@ -655,11 +667,12 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, svm_msg_q_msg_t _msg, *msg = &_msg; svm_msg_q_t *app_mq, *vpp_evt_q; transport_connection_t *tc; - stream_session_t *ls = 0; session_bound_msg_t *mp; app_worker_t *app_wrk; session_event_t *evt; application_t *app; + app_listener_t *al; + session_t *ls = 0; app_wrk = app_worker_get (app_wrk_index); app = application_get (app_wrk->app_index); @@ -686,7 +699,8 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, mp->handle = handle; if (application_has_global_scope (app)) { - ls = listen_session_get_from_handle (handle); + al = app_listener_get_w_handle (handle); + ls = app_listener_get_session (al); tc = listen_session_get_transport (ls); mp->lcl_port = tc->lcl_port; mp->lcl_is_ip4 = tc->is_ip4; @@ -700,12 +714,13 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, mp->lcl_is_ip4 = session_type_is_ip4 (local->session_type); } + vpp_evt_q = session_manager_get_vpp_event_queue (0); + mp->vpp_evt_q = pointer_to_uword (vpp_evt_q); + if (ls && session_transport_service_type (ls) == TRANSPORT_SERVICE_CL) { - mp->rx_fifo = pointer_to_uword (ls->server_rx_fifo); - mp->tx_fifo = pointer_to_uword (ls->server_tx_fifo); - vpp_evt_q = session_manager_get_vpp_event_queue (0); - mp->vpp_evt_q = pointer_to_uword (vpp_evt_q); + mp->rx_fifo = pointer_to_uword (ls->rx_fifo); + mp->tx_fifo = pointer_to_uword (ls->tx_fifo); } done: @@ -742,7 +757,6 @@ vl_api_application_attach_t_handler (vl_api_application_attach_t * mp) ssvm_private_t *segp, *evt_q_segment; vnet_app_attach_args_t _a, *a = &_a; vl_api_registration_t *reg; - clib_error_t *error = 0; u8 fd_flags = 0; reg = vl_api_client_index_to_registration (mp->client_index); @@ -781,10 +795,9 @@ vl_api_application_attach_t_handler (vl_api_application_attach_t * mp) mp->namespace_id_len); } - if ((error = vnet_application_attach (a))) + if ((rv = vnet_application_attach (a))) { - rv = clib_error_get_code (error); - clib_error_report (error); + clib_warning ("attach returned: %d", rv); vec_free (a->namespace_id); goto done; } @@ -868,9 +881,9 @@ static void vl_api_bind_uri_t_handler (vl_api_bind_uri_t * mp) { transport_connection_t *tc = 0; - vnet_bind_args_t _a, *a = &_a; + vnet_listen_args_t _a, *a = &_a; vl_api_bind_uri_reply_t *rmp; - stream_session_t *s; + session_t *s; application_t *app = 0; svm_msg_q_t *vpp_evt_q; app_worker_t *app_wrk; @@ -911,8 +924,8 @@ vl_api_bind_uri_t_handler (vl_api_bind_uri_t * mp) clib_memcpy_fast (rmp->lcl_ip, &tc->lcl_ip, sizeof(tc->lcl_ip)); if (session_transport_service_type (s) == TRANSPORT_SERVICE_CL) { - rmp->rx_fifo = pointer_to_uword (s->server_rx_fifo); - rmp->tx_fifo = pointer_to_uword (s->server_tx_fifo); + rmp->rx_fifo = pointer_to_uword (s->rx_fifo); + rmp->tx_fifo = pointer_to_uword (s->tx_fifo); vpp_evt_q = session_manager_get_vpp_event_queue (0); rmp->vpp_evt_q = pointer_to_uword (vpp_evt_q); } @@ -935,7 +948,7 @@ vl_api_unbind_uri_t_handler (vl_api_unbind_uri_t * mp) { vl_api_unbind_uri_reply_t *rmp; application_t *app; - vnet_unbind_args_t _a, *a = &_a; + vnet_unlisten_args_t _a, *a = &_a; int rv; if (session_manager_is_enabled () == 0) @@ -966,7 +979,6 @@ vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp) vl_api_connect_session_reply_t *rmp; vnet_connect_args_t _a, *a = &_a; application_t *app; - clib_error_t *error = 0; int rv = 0; if (session_manager_is_enabled () == 0) @@ -982,11 +994,8 @@ vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp) a->uri = (char *) mp->uri; a->api_context = mp->context; a->app_index = app->app_index; - if ((error = vnet_connect_uri (a))) - { - rv = clib_error_get_code (error); - clib_error_report (error); - } + if ((rv = vnet_connect_uri (a))) + clib_warning ("connect_uri returned: %d", rv); } else { @@ -1067,7 +1076,7 @@ vl_api_reset_session_reply_t_handler (vl_api_reset_session_reply_t * mp) vnet_disconnect_args_t _a = { 0 }, *a = &_a; app_worker_t *app_wrk; application_t *app; - stream_session_t *s; + session_t *s; u32 index, thread_index; app = application_lookup (mp->context); @@ -1109,7 +1118,7 @@ vl_api_accept_session_reply_t_handler (vl_api_accept_session_reply_t * mp) { vnet_disconnect_args_t _a = { 0 }, *a = &_a; local_session_t *ls; - stream_session_t *s; + session_t *s; /* Server isn't interested, kill the session */ if (mp->retval) @@ -1122,14 +1131,14 @@ vl_api_accept_session_reply_t_handler (vl_api_accept_session_reply_t * mp) if (session_handle_is_local (mp->handle)) { - ls = application_get_local_session_from_handle (mp->handle); + ls = app_worker_get_local_session_from_handle (mp->handle); if (!ls || ls->app_wrk_index != mp->context) { clib_warning ("server %u doesn't own local handle %llu", mp->context, mp->handle); return; } - if (application_local_session_connect_notify (ls)) + if (app_worker_local_session_connect_notify (ls)) return; ls->session_state = SESSION_STATE_READY; } @@ -1160,16 +1169,16 @@ vl_api_map_another_segment_reply_t_handler (vl_api_map_another_segment_reply_t static void vl_api_bind_sock_t_handler (vl_api_bind_sock_t * mp) { + vnet_listen_args_t _a, *a = &_a; + transport_connection_t *tc = 0; vl_api_bind_sock_reply_t *rmp; - vnet_bind_args_t _a, *a = &_a; - int rv = 0; - clib_error_t *error; + svm_msg_q_t *vpp_evt_q; application_t *app = 0; app_worker_t *app_wrk; - stream_session_t *s; - transport_connection_t *tc = 0; ip46_address_t *ip46; - svm_msg_q_t *vpp_evt_q; + app_listener_t *al; + session_t *s; + int rv = 0; if (session_manager_is_enabled () == 0) { @@ -1195,11 +1204,8 @@ vl_api_bind_sock_t_handler (vl_api_bind_sock_t * mp) a->app_index = app->app_index; a->wrk_map_index = mp->wrk_index; - if ((error = vnet_bind (a))) - { - rv = clib_error_get_code (error); - clib_error_report (error); - } + if ((rv = vnet_listen (a))) + clib_warning ("listen returned: %d", rv); done: /* *INDENT-OFF* */ @@ -1211,13 +1217,14 @@ vl_api_bind_sock_t_handler (vl_api_bind_sock_t * mp) rmp->lcl_is_ip4 = mp->is_ip4; if (app && application_has_global_scope (app)) { - s = listen_session_get_from_handle (a->handle); + al = app_listener_get_w_handle (a->handle); + s = app_listener_get_session (al); tc = listen_session_get_transport (s); clib_memcpy_fast (rmp->lcl_ip, &tc->lcl_ip, sizeof (tc->lcl_ip)); if (session_transport_service_type (s) == TRANSPORT_SERVICE_CL) { - rmp->rx_fifo = pointer_to_uword (s->server_rx_fifo); - rmp->tx_fifo = pointer_to_uword (s->server_tx_fifo); + rmp->rx_fifo = pointer_to_uword (s->rx_fifo); + rmp->tx_fifo = pointer_to_uword (s->tx_fifo); vpp_evt_q = session_manager_get_vpp_event_queue (0); rmp->vpp_evt_q = pointer_to_uword (vpp_evt_q); } @@ -1239,9 +1246,8 @@ static void vl_api_unbind_sock_t_handler (vl_api_unbind_sock_t * mp) { vl_api_unbind_sock_reply_t *rmp; - vnet_unbind_args_t _a, *a = &_a; + vnet_unlisten_args_t _a, *a = &_a; application_t *app; - clib_error_t *error; int rv = 0; if (session_manager_is_enabled () == 0) @@ -1256,11 +1262,8 @@ vl_api_unbind_sock_t_handler (vl_api_unbind_sock_t * mp) a->app_index = app->app_index; a->handle = mp->handle; a->wrk_map_index = mp->wrk_index; - if ((error = vnet_unbind (a))) - { - rv = clib_error_get_code (error); - clib_error_report (error); - } + if ((rv = vnet_unlisten (a))) + clib_warning ("unlisten returned: %d", rv); } done: @@ -1273,7 +1276,6 @@ vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp) vl_api_connect_session_reply_t *rmp; vnet_connect_args_t _a, *a = &_a; application_t *app = 0; - clib_error_t *error = 0; int rv = 0; if (session_manager_is_enabled () == 0) @@ -1306,11 +1308,8 @@ vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp) a->api_context = mp->context; a->app_index = app->app_index; a->wrk_map_index = mp->wrk_index; - if ((error = vnet_connect (a))) - { - rv = clib_error_get_code (error); - clib_error_report (error); - } + if ((rv = vnet_connect (a))) + clib_warning ("connect returned: %u", rv); vec_free (a->sep_ext.hostname); } else @@ -1339,7 +1338,6 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp) int rv = 0, fds[SESSION_N_FD_TYPE], n_fds = 0; vl_api_app_worker_add_del_reply_t *rmp; vl_api_registration_t *reg; - clib_error_t *error = 0; application_t *app; u8 fd_flags = 0; @@ -1366,11 +1364,10 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp) .api_client_index = mp->client_index, .is_add = mp->is_add }; - error = vnet_app_worker_add_del (&args); - if (error) + rv = vnet_app_worker_add_del (&args); + if (rv) { - rv = clib_error_get_code (error); - clib_error_report (error); + clib_warning ("app worker add/del returned: %d", rv); goto done; } @@ -1420,7 +1417,6 @@ static void vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp) { vl_api_app_namespace_add_del_reply_t *rmp; - clib_error_t *error = 0; u32 appns_index = 0; u8 *ns_id = 0; int rv = 0; @@ -1446,13 +1442,8 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp) .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id), .is_add = 1 }; - error = vnet_app_namespace_add_del (&args); - if (error) - { - rv = clib_error_get_code (error); - clib_error_report (error); - } - else + rv = vnet_app_namespace_add_del (&args); + if (!rv) { appns_index = app_namespace_index_from_id (ns_id); if (appns_index == APP_NAMESPACE_INVALID_INDEX) @@ -1478,7 +1469,6 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp) vl_api_session_rule_add_del_reply_t *rmp; session_rule_add_del_args_t args; session_rule_table_add_del_args_t *table_args = &args.table_args; - clib_error_t *error; u8 fib_proto; int rv = 0; @@ -1503,12 +1493,9 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp) clib_memset (&table_args->rmt.fp_addr, 0, sizeof (table_args->rmt.fp_addr)); ip_set (&table_args->lcl.fp_addr, mp->lcl_ip, mp->is_ip4); ip_set (&table_args->rmt.fp_addr, mp->rmt_ip, mp->is_ip4); - error = vnet_session_rule_add_del (&args); - if (error) - { - rv = clib_error_get_code (error); - clib_error_report (error); - } + rv = vnet_session_rule_add_del (&args); + if (rv) + clib_warning ("rule add del returned: %d", rv); vec_free (table_args->tag); REPLY_MACRO (VL_API_SESSION_RULE_ADD_DEL_REPLY); } diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c index 9da7f5b46b85..e92d432c8a85 100755 --- a/src/vnet/session/session_cli.c +++ b/src/vnet/session/session_cli.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -18,27 +18,25 @@ u8 * format_session_fifos (u8 * s, va_list * args) { - stream_session_t *ss = va_arg (*args, stream_session_t *); + session_t *ss = va_arg (*args, session_t *); int verbose = va_arg (*args, int); session_event_t _e, *e = &_e; u8 found; - if (!ss->server_rx_fifo || !ss->server_tx_fifo) + if (!ss->rx_fifo || !ss->tx_fifo) return s; - s = format (s, " Rx fifo: %U", format_svm_fifo, ss->server_rx_fifo, - verbose); - if (verbose > 2 && ss->server_rx_fifo->has_event) + s = format (s, " Rx fifo: %U", format_svm_fifo, ss->rx_fifo, verbose); + if (verbose > 2 && ss->rx_fifo->has_event) { - found = session_node_lookup_fifo_event (ss->server_rx_fifo, e); + found = session_node_lookup_fifo_event (ss->rx_fifo, e); s = format (s, " session node event: %s\n", found ? "found" : "not found"); } - s = format (s, " Tx fifo: %U", format_svm_fifo, ss->server_tx_fifo, - verbose); - if (verbose > 2 && ss->server_tx_fifo->has_event) + s = format (s, " Tx fifo: %U", format_svm_fifo, ss->tx_fifo, verbose); + if (verbose > 2 && ss->tx_fifo->has_event) { - found = session_node_lookup_fifo_event (ss->server_tx_fifo, e); + found = session_node_lookup_fifo_event (ss->tx_fifo, e); s = format (s, " session node event: %s\n", found ? "found" : "not found"); } @@ -56,15 +54,27 @@ format_session_fifos (u8 * s, va_list * args) u8 * format_stream_session (u8 * s, va_list * args) { - stream_session_t *ss = va_arg (*args, stream_session_t *); + session_t *ss = va_arg (*args, session_t *); int verbose = va_arg (*args, int); u32 tp = session_get_transport_proto (ss); u8 *str = 0; - if (verbose == 1 && ss->session_state >= SESSION_STATE_ACCEPTING) - str = format (0, "%-10u%-10u", - svm_fifo_max_dequeue (ss->server_rx_fifo), - svm_fifo_max_dequeue (ss->server_tx_fifo)); + if (ss->session_state >= SESSION_STATE_TRANSPORT_CLOSED) + { + s = format (s, "[%u:%u] CLOSED", ss->thread_index, ss->session_index); + return s; + } + + if (verbose == 1) + { + u8 post_accept = ss->session_state >= SESSION_STATE_ACCEPTING; + u8 hasf = post_accept | session_tx_is_dgram (ss); + u32 rxf, txf; + + rxf = hasf ? svm_fifo_max_dequeue (ss->rx_fifo) : 0; + txf = hasf ? svm_fifo_max_dequeue (ss->tx_fifo) : 0; + str = format (0, "%-10u%-10u", rxf, txf); + } if (ss->session_state >= SESSION_STATE_ACCEPTING) { @@ -77,8 +87,8 @@ format_stream_session (u8 * s, va_list * args) } else if (ss->session_state == SESSION_STATE_LISTENING) { - s = format (s, "%-40U%v", format_transport_listen_connection, - tp, ss->connection_index, str); + s = format (s, "%U%v", format_transport_listen_connection, + tp, ss->connection_index, verbose, str); if (verbose > 1) s = format (s, "\n%U", format_session_fifos, ss, verbose); } @@ -152,10 +162,10 @@ unformat_stream_session_id (unformat_input_t * input, va_list * args) uword unformat_stream_session (unformat_input_t * input, va_list * args) { - stream_session_t **result = va_arg (*args, stream_session_t **); + session_t **result = va_arg (*args, session_t **); u32 lcl_port = 0, rmt_port = 0, fib_index = 0; ip46_address_t lcl, rmt; - stream_session_t *s; + session_t *s; u8 proto = ~0; u8 is_ip4 = 0; @@ -219,10 +229,10 @@ static clib_error_t * show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - u8 *str = 0, one_session = 0, do_listeners = 0, sst, do_elog = 0; + u8 one_session = 0, do_listeners = 0, sst, do_elog = 0; session_manager_main_t *smm = &session_manager_main; u32 transport_proto = ~0, track_index; - stream_session_t *pool, *s; + session_t *pool, *s; transport_connection_t *tc; app_worker_t *app_wrk; int verbose = 0, i; @@ -255,7 +265,7 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, if (one_session) { - str = format (0, "%U", format_stream_session, s, 3); + u8 *str = format (0, "%U", format_stream_session, s, 3); if (do_elog && s->session_state != SESSION_STATE_LISTENING) { elog_main_t *em = &vm->elog_main; @@ -270,13 +280,14 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, dt, track_index); } vlib_cli_output (vm, "%v", str); + vec_free (str); return 0; } if (do_listeners) { sst = session_type_from_proto_and_ip (transport_proto, 1); - vlib_cli_output (vm, "%-40s%-24s", "Listener", "App"); + vlib_cli_output (vm, "%-50s%-24s", "Listener", "App"); /* *INDENT-OFF* */ pool_foreach (s, smm->wrk[0].sessions, ({ if (s->session_state != SESSION_STATE_LISTENING @@ -284,7 +295,7 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, continue; app_wrk = app_worker_get (s->app_wrk_index); app_name = application_name_from_index (app_wrk->app_index); - vlib_cli_output (vm, "%U%-25v%", format_stream_session, s, 1, + vlib_cli_output (vm, "%U%-25v%", format_stream_session, s, 0, app_name); })); /* *INDENT-ON* */ @@ -293,42 +304,46 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, for (i = 0; i < vec_len (smm->wrk); i++) { - u32 once_per_pool; - pool = smm->wrk[i].sessions; + u32 once_per_pool = 1, n_closed = 0; - once_per_pool = 1; + pool = smm->wrk[i].sessions; + if (!pool_elts (pool)) + { + vlib_cli_output (vm, "Thread %d: no sessions", i); + continue; + } - if (pool_elts (pool)) + if (!verbose) { + vlib_cli_output (vm, "Thread %d: %d sessions", i, pool_elts (pool)); + continue; + } - vlib_cli_output (vm, "Thread %d: %d active sessions", - i, pool_elts (pool)); - if (verbose) - { - if (once_per_pool && verbose == 1) - { - str = format (str, "%-50s%-15s%-10s%-10s", - "Connection", "State", "Rx-f", "Tx-f"); - vlib_cli_output (vm, "%v", str); - vec_reset_length (str); - once_per_pool = 0; - } - - /* *INDENT-OFF* */ - pool_foreach (s, pool, - ({ - vec_reset_length (str); - str = format (str, "%U", format_stream_session, s, verbose); - vlib_cli_output (vm, "%v", str); - })); - /* *INDENT-ON* */ - } + if (once_per_pool && verbose == 1) + { + vlib_cli_output (vm, "%s%-50s%-15s%-10s%-10s", i ? "\n" : "", + "Connection", "State", "Rx-f", "Tx-f"); + once_per_pool = 0; } + + /* *INDENT-OFF* */ + pool_foreach (s, pool, ({ + if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) + { + n_closed += 1; + continue; + } + vlib_cli_output (vm, "%U", format_stream_session, s, verbose); + })); + /* *INDENT-ON* */ + + if (!n_closed) + vlib_cli_output (vm, "Thread %d: active sessions %u", i, + pool_elts (pool) - n_closed); else - vlib_cli_output (vm, "Thread %d: no active sessions", i); - vec_reset_length (str); + vlib_cli_output (vm, "Thread %d: active sessions %u closed %u", i, + pool_elts (pool) - n_closed, n_closed); } - vec_free (str); return 0; } @@ -337,13 +352,14 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, VLIB_CLI_COMMAND (vlib_cli_show_session_command) = { .path = "show session", - .short_help = "show session [verbose [nnn]]", + .short_help = "show session [verbose [n]] [listeners ] " + "[ [elog]]", .function = show_session_command_fn, }; /* *INDENT-ON* */ static int -clear_session (stream_session_t * s) +clear_session (session_t * s) { app_worker_t *server_wrk = app_worker_get (s->app_wrk_index); application_t *server = application_get (server_wrk->app_index); @@ -359,7 +375,7 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input, u32 thread_index = 0, clear_all = 0; session_manager_worker_t *wrk; u32 session_index = ~0; - stream_session_t *session; + session_t *session; if (!smm->is_enabled) { @@ -420,7 +436,7 @@ show_session_fifo_trace_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - stream_session_t *s = 0; + session_t *s = 0; u8 is_rx = 0, *str = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -449,8 +465,8 @@ show_session_fifo_trace_command_fn (vlib_main_t * vm, } str = is_rx ? - svm_fifo_dump_trace (str, s->server_rx_fifo) : - svm_fifo_dump_trace (str, s->server_tx_fifo); + svm_fifo_dump_trace (str, s->rx_fifo) : + svm_fifo_dump_trace (str, s->tx_fifo); vlib_cli_output (vm, "%v", str); return 0; @@ -469,7 +485,7 @@ static clib_error_t * session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - stream_session_t *s = 0; + session_t *s = 0; u8 is_rx = 0, *str = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -496,8 +512,8 @@ session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input, } str = is_rx ? - svm_fifo_replay (str, s->server_rx_fifo, 0, 1) : - svm_fifo_replay (str, s->server_tx_fifo, 0, 1); + svm_fifo_replay (str, s->rx_fifo, 0, 1) : + svm_fifo_replay (str, s->tx_fifo, 0, 1); vlib_cli_output (vm, "%v", str); return 0; diff --git a/src/vnet/session/session_debug.h b/src/vnet/session/session_debug.h index 559f0bd6ed17..2912ae3828c2 100644 --- a/src/vnet/session/session_debug.h +++ b/src/vnet/session/session_debug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 931c3d0f9e2f..854da7fdd3c9 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -29,11 +29,6 @@ #include #include -/** - * External vector of per transport virtual functions table - */ -extern transport_proto_vft_t *tp_vfts; - /** * Network namespace index (i.e., fib index) to session lookup table. We * should have one per network protocol type but for now we only support IP4/6 @@ -338,11 +333,11 @@ session_lookup_del_connection (transport_connection_t * tc) } int -session_lookup_del_session (stream_session_t * s) +session_lookup_del_session (session_t * s) { - transport_proto_t tp = session_get_transport_proto (s); transport_connection_t *ts; - ts = tp_vfts[tp].get_connection (s->connection_index, s->thread_index); + ts = transport_get_connection (session_get_transport_proto (s), + s->connection_index, s->thread_index); return session_lookup_del_connection (ts); } @@ -371,7 +366,7 @@ session_lookup_action_to_handle (u32 action_index) } } -static stream_session_t * +static session_t * session_lookup_app_listen_session (u32 app_index, u8 fib_proto, u8 transport_proto) { @@ -384,7 +379,7 @@ session_lookup_app_listen_session (u32 app_index, u8 fib_proto, fib_proto, transport_proto); } -static stream_session_t * +static session_t * session_lookup_action_to_session (u32 action_index, u8 fib_proto, u8 transport_proto) { @@ -396,7 +391,7 @@ session_lookup_action_to_session (u32 action_index, u8 fib_proto, } /** UNUSED */ -stream_session_t * +session_t * session_lookup_rules_table_session4 (session_table_t * st, u8 proto, ip4_address_t * lcl, u16 lcl_port, ip4_address_t * rmt, u16 rmt_port) @@ -412,7 +407,7 @@ session_lookup_rules_table_session4 (session_table_t * st, u8 proto, } /** UNUSED */ -stream_session_t * +session_t * session_lookup_rules_table_session6 (session_table_t * st, u8 proto, ip6_address_t * lcl, u16 lcl_port, ip6_address_t * rmt, u16 rmt_port) @@ -614,7 +609,7 @@ session_lookup_local_endpoint (u32 table_index, session_endpoint_t * sep) return SESSION_INVALID_HANDLE; } -static inline stream_session_t * +static inline session_t * session_lookup_listener4_i (session_table_t * st, ip4_address_t * lcl, u16 lcl_port, u8 proto, u8 use_wildcard) { @@ -655,7 +650,7 @@ session_lookup_listener4_i (session_table_t * st, ip4_address_t * lcl, return 0; } -stream_session_t * +session_t * session_lookup_listener4 (u32 fib_index, ip4_address_t * lcl, u16 lcl_port, u8 proto) { @@ -666,7 +661,7 @@ session_lookup_listener4 (u32 fib_index, ip4_address_t * lcl, u16 lcl_port, return session_lookup_listener4_i (st, lcl, lcl_port, proto, 0); } -static stream_session_t * +static session_t * session_lookup_listener6_i (session_table_t * st, ip6_address_t * lcl, u16 lcl_port, u8 proto, u8 ip_wildcard) { @@ -698,7 +693,7 @@ session_lookup_listener6_i (session_table_t * st, ip6_address_t * lcl, return 0; } -stream_session_t * +session_t * session_lookup_listener6 (u32 fib_index, ip6_address_t * lcl, u16 lcl_port, u8 proto) { @@ -712,7 +707,7 @@ session_lookup_listener6 (u32 fib_index, ip6_address_t * lcl, u16 lcl_port, /** * Lookup listener, exact or proxy (inaddr_any:0) match */ -stream_session_t * +session_t * session_lookup_listener (u32 table_index, session_endpoint_t * sep) { session_table_t *st; @@ -812,12 +807,10 @@ session_lookup_half_open_handle (transport_connection_t * tc) transport_connection_t * session_lookup_half_open_connection (u64 handle, u8 proto, u8 is_ip4) { - u32 sst; - if (handle != HALF_OPEN_LOOKUP_INVALID_VALUE) { - sst = session_type_from_proto_and_ip (proto, is_ip4); - return tp_vfts[sst].get_half_open (handle & 0xFFFFFFFF); + u32 sst = session_type_from_proto_and_ip (proto, is_ip4); + return transport_get_half_open (sst, handle & 0xFFFFFFFF); } return 0; } @@ -856,7 +849,7 @@ session_lookup_connection_wt4 (u32 fib_index, ip4_address_t * lcl, { session_table_t *st; session_kv4_t kv4; - stream_session_t *s; + session_t *s; u32 action_index; int rv; @@ -877,8 +870,8 @@ session_lookup_connection_wt4 (u32 fib_index, ip4_address_t * lcl, return 0; } s = session_get (kv4.value & 0xFFFFFFFFULL, thread_index); - return tp_vfts[proto].get_connection (s->connection_index, - thread_index); + return transport_get_connection (proto, s->connection_index, + thread_index); } /* @@ -886,7 +879,7 @@ session_lookup_connection_wt4 (u32 fib_index, ip4_address_t * lcl, */ rv = clib_bihash_search_inline_16_8 (&st->v4_half_open_hash, &kv4); if (rv == 0) - return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF); + return transport_get_half_open (proto, kv4.value & 0xFFFFFFFF); /* * Check the session rules table @@ -902,7 +895,7 @@ session_lookup_connection_wt4 (u32 fib_index, ip4_address_t * lcl, } if ((s = session_lookup_action_to_session (action_index, FIB_PROTOCOL_IP4, proto))) - return tp_vfts[proto].get_listener (s->connection_index); + return transport_get_listener (proto, s->connection_index); return 0; } @@ -911,7 +904,7 @@ session_lookup_connection_wt4 (u32 fib_index, ip4_address_t * lcl, */ s = session_lookup_listener4_i (st, lcl, lcl_port, proto, 1); if (s) - return tp_vfts[proto].get_listener (s->connection_index); + return transport_get_listener (proto, s->connection_index); return 0; } @@ -938,7 +931,7 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl, { session_table_t *st; session_kv4_t kv4; - stream_session_t *s; + session_t *s; u32 action_index; int rv; @@ -954,8 +947,8 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl, if (rv == 0) { s = session_get_from_handle (kv4.value); - return tp_vfts[proto].get_connection (s->connection_index, - s->thread_index); + return transport_get_connection (proto, s->connection_index, + s->thread_index); } /* @@ -963,7 +956,7 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl, */ rv = clib_bihash_search_inline_16_8 (&st->v4_half_open_hash, &kv4); if (rv == 0) - return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF); + return transport_get_half_open (proto, kv4.value & 0xFFFFFFFF); /* * Check the session rules table @@ -976,7 +969,7 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl, return 0; if ((s = session_lookup_action_to_session (action_index, FIB_PROTOCOL_IP4, proto))) - return tp_vfts[proto].get_listener (s->connection_index); + return transport_get_listener (proto, s->connection_index); return 0; } @@ -985,7 +978,7 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl, */ s = session_lookup_listener4_i (st, lcl, lcl_port, proto, 1); if (s) - return tp_vfts[proto].get_listener (s->connection_index); + return transport_get_listener (proto, s->connection_index); return 0; } @@ -1003,13 +996,13 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl, * * Typically used by dgram connections */ -stream_session_t * +session_t * session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt, u16 lcl_port, u16 rmt_port, u8 proto) { session_table_t *st; session_kv4_t kv4; - stream_session_t *s; + session_t *s; u32 action_index; int rv; @@ -1079,7 +1072,7 @@ session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl, u8 * result) { session_table_t *st; - stream_session_t *s; + session_t *s; session_kv6_t kv6; u32 action_index; int rv; @@ -1099,14 +1092,14 @@ session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl, return 0; } s = session_get (kv6.value & 0xFFFFFFFFULL, thread_index); - return tp_vfts[proto].get_connection (s->connection_index, - thread_index); + return transport_get_connection (proto, s->connection_index, + thread_index); } /* Try half-open connections */ rv = clib_bihash_search_inline_48_8 (&st->v6_half_open_hash, &kv6); if (rv == 0) - return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF); + return transport_get_half_open (proto, kv6.value & 0xFFFFFFFF); /* Check the session rules table */ action_index = session_rules_table_lookup6 (&st->session_rules[proto], lcl, @@ -1120,14 +1113,14 @@ session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl, } if ((s = session_lookup_action_to_session (action_index, FIB_PROTOCOL_IP6, proto))) - return tp_vfts[proto].get_listener (s->connection_index); + return transport_get_listener (proto, s->connection_index); return 0; } /* If nothing is found, check if any listener is available */ s = session_lookup_listener6_i (st, lcl, lcl_port, proto, 1); if (s) - return tp_vfts[proto].get_listener (s->connection_index); + return transport_get_listener (proto, s->connection_index); return 0; } @@ -1154,7 +1147,7 @@ session_lookup_connection6 (u32 fib_index, ip6_address_t * lcl, u8 proto) { session_table_t *st; - stream_session_t *s; + session_t *s; session_kv6_t kv6; u32 action_index; int rv; @@ -1168,14 +1161,14 @@ session_lookup_connection6 (u32 fib_index, ip6_address_t * lcl, if (rv == 0) { s = session_get_from_handle (kv6.value); - return tp_vfts[proto].get_connection (s->connection_index, - s->thread_index); + return transport_get_connection (proto, s->connection_index, + s->thread_index); } /* Try half-open connections */ rv = clib_bihash_search_inline_48_8 (&st->v6_half_open_hash, &kv6); if (rv == 0) - return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF); + return transport_get_half_open (proto, kv6.value & 0xFFFFFFFF); /* Check the session rules table */ action_index = session_rules_table_lookup6 (&st->session_rules[proto], lcl, @@ -1186,14 +1179,14 @@ session_lookup_connection6 (u32 fib_index, ip6_address_t * lcl, return 0; if ((s = session_lookup_action_to_session (action_index, FIB_PROTOCOL_IP6, proto))) - return tp_vfts[proto].get_listener (s->connection_index); + return transport_get_listener (proto, s->connection_index); return 0; } /* If nothing is found, check if any listener is available */ s = session_lookup_listener6_i (st, lcl, lcl_port, proto, 1); if (s) - return tp_vfts[proto].get_listener (s->connection_index); + return transport_get_listener (proto, s->connection_index); return 0; } @@ -1211,13 +1204,13 @@ session_lookup_connection6 (u32 fib_index, ip6_address_t * lcl, * * Typically used by dgram connections */ -stream_session_t * +session_t * session_lookup_safe6 (u32 fib_index, ip6_address_t * lcl, ip6_address_t * rmt, u16 lcl_port, u16 rmt_port, u8 proto) { session_table_t *st; session_kv6_t kv6; - stream_session_t *s; + session_t *s; u32 action_index; int rv; @@ -1247,7 +1240,7 @@ session_lookup_safe6 (u32 fib_index, ip6_address_t * lcl, ip6_address_t * rmt, return 0; } -clib_error_t * +int vnet_session_rule_add_del (session_rule_add_del_args_t * args) { app_namespace_t *app_ns = app_namespace_get (args->appns_index); @@ -1255,29 +1248,26 @@ vnet_session_rule_add_del (session_rule_add_del_args_t * args) session_table_t *st; u32 fib_index; u8 fib_proto; - clib_error_t *error; + int rv = 0; if (!app_ns) - return clib_error_return_code (0, VNET_API_ERROR_APP_INVALID_NS, 0, - "invalid app ns"); + return VNET_API_ERROR_APP_INVALID_NS; + if (args->scope > 3) - return clib_error_return_code (0, VNET_API_ERROR_INVALID_VALUE, 0, - "invalid scope"); + return VNET_API_ERROR_INVALID_VALUE; + if (args->transport_proto != TRANSPORT_PROTO_TCP && args->transport_proto != TRANSPORT_PROTO_UDP) - return clib_error_return_code (0, VNET_API_ERROR_INVALID_VALUE, 0, - "invalid transport proto"); + return VNET_API_ERROR_INVALID_VALUE; + if ((args->scope & SESSION_RULE_SCOPE_GLOBAL) || args->scope == 0) { fib_proto = args->table_args.rmt.fp_proto; fib_index = app_namespace_get_fib_index (app_ns, fib_proto); st = session_table_get_for_fib_index (fib_proto, fib_index); srt = &st->session_rules[args->transport_proto]; - if ((error = session_rules_table_add_del (srt, &args->table_args))) - { - clib_error_report (error); - return error; - } + if ((rv = session_rules_table_add_del (srt, &args->table_args))) + return rv; } if (args->scope & SESSION_RULE_SCOPE_LOCAL) { @@ -1286,9 +1276,9 @@ vnet_session_rule_add_del (session_rule_add_del_args_t * args) args->table_args.lcl_port = 0; st = app_namespace_get_local_table (app_ns); srt = &st->session_rules[args->transport_proto]; - error = session_rules_table_add_del (srt, &args->table_args); + rv = session_rules_table_add_del (srt, &args->table_args); } - return error; + return rv; } /** @@ -1316,7 +1306,7 @@ format_ip4_session_lookup_kvp (u8 * s, va_list * args) clib_bihash_kv_16_8_t *kvp = va_arg (*args, clib_bihash_kv_16_8_t *); u32 is_local = va_arg (*args, u32), app_wrk_index, session_index; v4_connection_key_t *key = (v4_connection_key_t *) kvp->key; - stream_session_t *session; + session_t *session; app_worker_t *app_wrk; const u8 *app_name; u8 *str = 0; @@ -1395,7 +1385,7 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, u8 fib_proto, is_add = 1, *ns_id = 0; u8 *tag = 0; app_namespace_t *app_ns; - clib_error_t *error; + int rv; clib_memset (&lcl_ip, 0, sizeof (lcl_ip)); clib_memset (&rmt_ip, 0, sizeof (rmt_ip)); @@ -1492,9 +1482,11 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, .appns_index = appns_index, .scope = scope, }; - error = vnet_session_rule_add_del (&args); + if ((rv = vnet_session_rule_add_del (&args))) + return clib_error_return (0, "rule add del returned %u", rv); + vec_free (tag); - return error; + return 0; } /* *INDENT-OFF* */ diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h index 212a11833a3f..8d7701da19e6 100644 --- a/src/vnet/session/session_lookup.h +++ b/src/vnet/session/session_lookup.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -17,10 +17,11 @@ #define SRC_VNET_SESSION_SESSION_LOOKUP_H_ #include -#include -#include +#include #include +#define HALF_OPEN_LOOKUP_INVALID_VALUE ((u64)~0) + typedef enum session_lookup_result_ { SESSION_LOOKUP_RESULT_NONE, @@ -28,12 +29,12 @@ typedef enum session_lookup_result_ SESSION_LOOKUP_RESULT_FILTERED } session_lookup_result_t; -stream_session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl, - ip4_address_t * rmt, u16 lcl_port, - u16 rmt_port, u8 proto); -stream_session_t *session_lookup_safe6 (u32 fib_index, ip6_address_t * lcl, - ip6_address_t * rmt, u16 lcl_port, - u16 rmt_port, u8 proto); +session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl, + ip4_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto); +session_t *session_lookup_safe6 (u32 fib_index, ip6_address_t * lcl, + ip6_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto); transport_connection_t *session_lookup_connection_wt4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt, @@ -58,27 +59,26 @@ transport_connection_t *session_lookup_connection6 (u32 fib_index, ip6_address_t * rmt, u16 lcl_port, u16 rmt_port, u8 proto); -stream_session_t *session_lookup_listener4 (u32 fib_index, - ip4_address_t * lcl, u16 lcl_port, - u8 proto); -stream_session_t *session_lookup_listener6 (u32 fib_index, - ip6_address_t * lcl, u16 lcl_port, - u8 proto); -stream_session_t *session_lookup_listener (u32 table_index, - session_endpoint_t * sep); +session_t *session_lookup_listener4 (u32 fib_index, + ip4_address_t * lcl, u16 lcl_port, + u8 proto); +session_t *session_lookup_listener6 (u32 fib_index, + ip6_address_t * lcl, u16 lcl_port, + u8 proto); +session_t *session_lookup_listener (u32 table_index, + session_endpoint_t * sep); int session_lookup_add_connection (transport_connection_t * tc, u64 value); int session_lookup_del_connection (transport_connection_t * tc); u64 session_lookup_endpoint_listener (u32 table_index, session_endpoint_t * sepi, u8 use_rules); u64 session_lookup_local_endpoint (u32 table_index, session_endpoint_t * sep); -stream_session_t *session_lookup_global_session_endpoint (session_endpoint_t - *); +session_t *session_lookup_global_session_endpoint (session_endpoint_t *); int session_lookup_add_session_endpoint (u32 table_index, session_endpoint_t * sep, u64 value); int session_lookup_del_session_endpoint (u32 table_index, session_endpoint_t * sep); -int session_lookup_del_session (stream_session_t * s); +int session_lookup_del_session (session_t * s); int session_lookup_del_half_open (transport_connection_t * tc); int session_lookup_add_half_open (transport_connection_t * tc, u64 value); u64 session_lookup_half_open_handle (transport_connection_t * tc); @@ -96,7 +96,7 @@ void session_lookup_dump_rules_table (u32 fib_index, u8 fib_proto, void session_lookup_dump_local_rules_table (u32 fib_index, u8 fib_proto, u8 transport_proto); -enum _session_rule_scope +typedef enum _session_rule_scope { SESSION_RULE_SCOPE_GLOBAL = 1, SESSION_RULE_SCOPE_LOCAL = 2, @@ -123,7 +123,7 @@ typedef struct _session_rule_add_del_args u8 transport_proto; } session_rule_add_del_args_t; -clib_error_t *vnet_session_rule_add_del (session_rule_add_del_args_t * args); +int vnet_session_rule_add_del (session_rule_add_del_args_t * args); void session_lookup_set_tables_appns (app_namespace_t * app_ns); void session_lookup_init (void); diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index f5e5efeaf0f9..2a8d97200691 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -29,10 +29,10 @@ session_mq_accepted_reply_handler (void *data) { session_accepted_reply_msg_t *mp = (session_accepted_reply_msg_t *) data; vnet_disconnect_args_t _a = { 0 }, *a = &_a; - stream_session_state_t old_state; + session_state_t old_state; app_worker_t *app_wrk; local_session_t *ls; - stream_session_t *s; + session_t *s; /* Server isn't interested, kill the session */ if (mp->retval) @@ -45,7 +45,7 @@ session_mq_accepted_reply_handler (void *data) if (session_handle_is_local (mp->handle)) { - ls = application_get_local_session_from_handle (mp->handle); + ls = app_worker_get_local_session_from_handle (mp->handle); if (!ls) { clib_warning ("unknown local handle 0x%lx", mp->handle); @@ -58,7 +58,7 @@ session_mq_accepted_reply_handler (void *data) mp->context, mp->handle); return; } - if (application_local_session_connect_notify (ls)) + if (app_worker_local_session_connect_notify (ls)) return; ls->session_state = SESSION_STATE_READY; } @@ -77,7 +77,7 @@ session_mq_accepted_reply_handler (void *data) old_state = s->session_state; s->session_state = SESSION_STATE_READY; - if (!svm_fifo_is_empty (s->server_rx_fifo)) + if (!svm_fifo_is_empty (s->rx_fifo)) app_worker_lock_and_send_event (app_wrk, s, FIFO_EVENT_APP_RX); /* Closed while waiting for app to reply. Resend disconnect */ @@ -97,7 +97,7 @@ session_mq_reset_reply_handler (void *data) vnet_disconnect_args_t _a = { 0 }, *a = &_a; session_reset_reply_msg_t *mp; app_worker_t *app_wrk; - stream_session_t *s; + session_t *s; application_t *app; u32 index, thread_index; @@ -144,7 +144,7 @@ session_mq_disconnected_handler (void *data) session_disconnected_msg_t *mp; app_worker_t *app_wrk; session_event_t *evt; - stream_session_t *s; + session_t *s; application_t *app; int rv = 0; @@ -216,7 +216,7 @@ session_mq_worker_update_handler (void *data) app_worker_t *app_wrk; u32 owner_app_wrk_map; session_event_t *evt; - stream_session_t *s; + session_t *s; application_t *app; app = application_lookup (mp->client_index); @@ -269,18 +269,18 @@ session_mq_worker_update_handler (void *data) evt->event_type = SESSION_CTRL_EVT_WORKER_UPDATE_REPLY; rmp = (session_worker_update_reply_msg_t *) evt->data; rmp->handle = mp->handle; - rmp->rx_fifo = pointer_to_uword (s->server_rx_fifo); - rmp->tx_fifo = pointer_to_uword (s->server_tx_fifo); + rmp->rx_fifo = pointer_to_uword (s->rx_fifo); + rmp->tx_fifo = pointer_to_uword (s->tx_fifo); rmp->segment_handle = session_segment_handle (s); svm_msg_q_add (app_wrk->event_queue, msg, SVM_Q_WAIT); /* * Retransmit messages that may have been lost */ - if (!svm_fifo_is_empty (s->server_tx_fifo)) - session_send_io_evt_to_thread (s->server_tx_fifo, FIFO_EVENT_APP_TX); + if (s->tx_fifo && !svm_fifo_is_empty (s->tx_fifo)) + session_send_io_evt_to_thread (s->tx_fifo, FIFO_EVENT_APP_TX); - if (!svm_fifo_is_empty (s->server_rx_fifo)) + if (s->rx_fifo && !svm_fifo_is_empty (s->rx_fifo)) app_worker_lock_and_send_event (app_wrk, s, FIFO_EVENT_APP_RX); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) @@ -337,7 +337,7 @@ enum static void session_tx_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node, u32 next_index, u32 * to_next, u16 n_segs, - stream_session_t * s, u32 n_trace) + session_t * s, u32 n_trace) { session_queue_trace_t *t; vlib_buffer_t *b; @@ -379,14 +379,12 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, *n_bufs -= 1; chain_bi0 = wrk->tx_buffers[*n_bufs]; - _vec_len (wrk->tx_buffers) = *n_bufs; - chain_b = vlib_get_buffer (vm, chain_bi0); chain_b->current_data = 0; data = vlib_buffer_get_current (chain_b); if (peek_data) { - n_bytes_read = svm_fifo_peek (ctx->s->server_tx_fifo, + n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, ctx->tx_offset, len_to_deq, data); ctx->tx_offset += n_bytes_read; } @@ -394,7 +392,7 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, { if (ctx->transport_vft->tx_type == TRANSPORT_TX_DGRAM) { - svm_fifo_t *f = ctx->s->server_tx_fifo; + svm_fifo_t *f = ctx->s->tx_fifo; session_dgram_hdr_t *hdr = &ctx->hdr; u16 deq_now; deq_now = clib_min (hdr->data_length - hdr->data_offset, @@ -411,7 +409,7 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, } } else - n_bytes_read = svm_fifo_dequeue_nowait (ctx->s->server_tx_fifo, + n_bytes_read = svm_fifo_dequeue_nowait (ctx->s->tx_fifo, len_to_deq, data); } ASSERT (n_bytes_read == len_to_deq); @@ -434,20 +432,6 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, ctx->left_to_snd -= left_from_seg; } -always_inline int -session_output_try_get_buffers (vlib_main_t * vm, - session_manager_worker_t * wrk, - u32 thread_index, u16 * n_bufs, u32 wanted) -{ - u32 n_alloc; - vec_validate_aligned (wrk->tx_buffers, wanted - 1, CLIB_CACHE_LINE_BYTES); - n_alloc = vlib_buffer_alloc (vm, &wrk->tx_buffers[*n_bufs], - wanted - *n_bufs); - *n_bufs += n_alloc; - _vec_len (wrk->tx_buffers) = *n_bufs; - return n_alloc; -} - always_inline void session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, vlib_buffer_t * b, u16 * n_bufs, u8 peek_data) @@ -463,12 +447,12 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; b->current_data = 0; - data0 = vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + data0 = vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); len_to_deq = clib_min (ctx->left_to_snd, ctx->deq_per_first_buf); if (peek_data) { - n_bytes_read = svm_fifo_peek (ctx->s->server_tx_fifo, ctx->tx_offset, + n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, ctx->tx_offset, len_to_deq, data0); ASSERT (n_bytes_read > 0); /* Keep track of progress locally, transport is also supposed to @@ -480,7 +464,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, if (ctx->transport_vft->tx_type == TRANSPORT_TX_DGRAM) { session_dgram_hdr_t *hdr = &ctx->hdr; - svm_fifo_t *f = ctx->s->server_tx_fifo; + svm_fifo_t *f = ctx->s->tx_fifo; u16 deq_now; u32 offset; @@ -505,7 +489,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, } else { - n_bytes_read = svm_fifo_dequeue_nowait (ctx->s->server_tx_fifo, + n_bytes_read = svm_fifo_dequeue_nowait (ctx->s->tx_fifo, len_to_deq, data0); ASSERT (n_bytes_read > 0); } @@ -530,7 +514,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, } always_inline u8 -session_tx_not_ready (stream_session_t * s, u8 peek_data) +session_tx_not_ready (session_t * s, u8 peek_data) { if (peek_data) { @@ -569,7 +553,7 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx, u32 max_segs, u8 peek_data) { u32 n_bytes_per_buf, n_bytes_per_seg; - ctx->max_dequeue = svm_fifo_max_dequeue (ctx->s->server_tx_fifo); + ctx->max_dequeue = svm_fifo_max_dequeue (ctx->s->tx_fifo); if (peek_data) { /* Offset in rx fifo from where to peek data */ @@ -590,7 +574,7 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx, ctx->max_len_to_snd = 0; return; } - svm_fifo_peek (ctx->s->server_tx_fifo, 0, sizeof (ctx->hdr), + svm_fifo_peek (ctx->s->tx_fifo, 0, sizeof (ctx->hdr), (u8 *) & ctx->hdr); ASSERT (ctx->hdr.data_length > ctx->hdr.data_offset); ctx->max_dequeue = ctx->hdr.data_length - ctx->hdr.data_offset; @@ -621,13 +605,14 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx, ctx->max_len_to_snd = max_segs * ctx->snd_mss; } - n_bytes_per_buf = VLIB_BUFFER_DATA_SIZE; - ASSERT (n_bytes_per_buf > MAX_HDRS_LEN); - n_bytes_per_seg = MAX_HDRS_LEN + ctx->snd_mss; + n_bytes_per_buf = vlib_buffer_get_default_data_size (vm); + ASSERT (n_bytes_per_buf > TRANSPORT_MAX_HDRS_LEN); + n_bytes_per_seg = TRANSPORT_MAX_HDRS_LEN + ctx->snd_mss; ctx->n_bufs_per_seg = ceil ((f64) n_bytes_per_seg / n_bytes_per_buf); ctx->deq_per_buf = clib_min (ctx->snd_mss, n_bytes_per_buf); ctx->deq_per_first_buf = clib_min (ctx->snd_mss, - n_bytes_per_buf - MAX_HDRS_LEN); + n_bytes_per_buf - + TRANSPORT_MAX_HDRS_LEN); } always_inline int @@ -636,9 +621,8 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, session_event_t * e, int *n_tx_packets, u8 peek_data) { - u32 next_index, next0, next1, *to_next, n_left_to_next; + u32 next_index, next0, next1, *to_next, n_left_to_next, n_left, pbi; u32 n_trace = vlib_get_trace_count (vm, node), n_bufs_needed = 0; - u32 thread_index = vm->thread_index, n_left, pbi; session_manager_main_t *smm = &session_manager_main; session_tx_context_t *ctx = &wrk->ctx; transport_proto_t tp; @@ -677,7 +661,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, } /* Allow enqueuing of a new event */ - svm_fifo_unset_event (ctx->s->server_tx_fifo); + svm_fifo_unset_event (ctx->s->tx_fifo); /* Check how much we can pull. */ session_tx_set_dequeue_params (vm, ctx, VLIB_FRAME_SIZE - *n_tx_packets, @@ -686,21 +670,16 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_FALSE (!ctx->max_len_to_snd)) return SESSION_TX_NO_DATA; - n_bufs = vec_len (wrk->tx_buffers); n_bufs_needed = ctx->n_segs_per_evt * ctx->n_bufs_per_seg; - - /* - * Make sure we have at least one full frame of buffers ready - */ - if (n_bufs < n_bufs_needed) + vec_validate_aligned (wrk->tx_buffers, n_bufs_needed - 1, + CLIB_CACHE_LINE_BYTES); + n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_needed); + if (PREDICT_FALSE (n_bufs < n_bufs_needed)) { - session_output_try_get_buffers (vm, wrk, thread_index, &n_bufs, - ctx->n_bufs_per_seg * VLIB_FRAME_SIZE); - if (PREDICT_FALSE (n_bufs < n_bufs_needed)) - { - vec_add1 (wrk->pending_event_vector, *e); - return SESSION_TX_NO_BUFFERS; - } + if (n_bufs) + vlib_buffer_free (vm, wrk->tx_buffers, n_bufs); + vec_add1 (wrk->pending_event_vector, *e); + return SESSION_TX_NO_BUFFERS; } /* @@ -783,8 +762,11 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_FALSE (n_trace > 0)) session_tx_trace_frame (vm, node, next_index, to_next, ctx->n_segs_per_evt, ctx->s, n_trace); - - _vec_len (wrk->tx_buffers) = n_bufs; + if (PREDICT_FALSE (n_bufs)) + { + clib_warning ("not all buffers consumed"); + vlib_buffer_free (vm, wrk->tx_buffers, n_bufs); + } *n_tx_packets += ctx->n_segs_per_evt; transport_connection_update_tx_stats (ctx->tc, ctx->max_len_to_snd); vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -792,18 +774,18 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, /* If we couldn't dequeue all bytes mark as partially read */ ASSERT (ctx->left_to_snd == 0); if (ctx->max_len_to_snd < ctx->max_dequeue) - if (svm_fifo_set_event (ctx->s->server_tx_fifo)) + if (svm_fifo_set_event (ctx->s->tx_fifo)) vec_add1 (wrk->pending_event_vector, *e); if (!peek_data && ctx->transport_vft->tx_type == TRANSPORT_TX_DGRAM) { /* Fix dgram pre header */ if (ctx->max_len_to_snd < ctx->max_dequeue) - svm_fifo_overwrite_head (ctx->s->server_tx_fifo, (u8 *) & ctx->hdr, + svm_fifo_overwrite_head (ctx->s->tx_fifo, (u8 *) & ctx->hdr, sizeof (session_dgram_pre_hdr_t)); /* More data needs to be read */ - else if (svm_fifo_max_dequeue (ctx->s->server_tx_fifo) > 0) - if (svm_fifo_set_event (ctx->s->server_tx_fifo)) + else if (svm_fifo_max_dequeue (ctx->s->tx_fifo) > 0) + if (svm_fifo_set_event (ctx->s->tx_fifo)) vec_add1 (wrk->pending_event_vector, *e); } return SESSION_TX_OK; @@ -831,17 +813,17 @@ session_tx_fifo_dequeue_internal (vlib_main_t * vm, session_manager_worker_t * wrk, session_event_t * e, int *n_tx_pkts) { - stream_session_t *s = wrk->ctx.s; + session_t *s = wrk->ctx.s; application_t *app; if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED)) return 0; app = application_get (s->t_app_index); - svm_fifo_unset_event (s->server_tx_fifo); + svm_fifo_unset_event (s->tx_fifo); return app->cb_fns.builtin_app_tx_callback (s); } -always_inline stream_session_t * +always_inline session_t * session_event_get_session (session_event_t * e, u8 thread_index) { return session_get_if_valid (e->fifo->master_session_index, thread_index); @@ -921,7 +903,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, for (i = 0; i < n_events; i++) { - stream_session_t *s; /* $$$ prefetch 1 ahead maybe */ + session_t *s; /* $$$ prefetch 1 ahead maybe */ session_event_t *e; u8 need_tx_ntf; @@ -950,7 +932,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, &n_tx_packets); if (PREDICT_TRUE (rv == SESSION_TX_OK)) { - need_tx_ntf = svm_fifo_needs_tx_ntf (s->server_tx_fifo, + need_tx_ntf = svm_fifo_needs_tx_ntf (s->tx_fifo, wrk->ctx.max_len_to_snd); if (PREDICT_FALSE (need_tx_ntf)) session_dequeue_notify (s); @@ -972,8 +954,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, * and the tx queue is still not empty, try to wait for some * dispatch cycles */ if (!e->postponed - || (e->postponed < 200 - && svm_fifo_max_dequeue (s->server_tx_fifo))) + || (e->postponed < 200 && svm_fifo_max_dequeue (s->tx_fifo))) { e->postponed += 1; vec_add1 (wrk->pending_disconnects, *e); @@ -986,7 +967,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, s = session_event_get_session (e, thread_index); if (PREDICT_FALSE (!s || s->session_state >= SESSION_STATE_CLOSING)) continue; - svm_fifo_unset_event (s->server_rx_fifo); + svm_fifo_unset_event (s->rx_fifo); app_wrk = app_worker_get (s->app_wrk_index); app = application_get (app_wrk->app_index); app->cb_fns.builtin_app_rx_callback (s); @@ -1057,7 +1038,7 @@ dump_thread_0_event_queue (void) u32 my_thread_index = vm->thread_index; session_event_t _e, *e = &_e; svm_msg_q_ring_t *ring; - stream_session_t *s0; + session_t *s0; svm_msg_q_msg_t *msg; svm_msg_q_t *mq; int i, index; @@ -1111,7 +1092,7 @@ dump_thread_0_event_queue (void) static u8 session_node_cmp_event (session_event_t * e, svm_fifo_t * f) { - stream_session_t *s; + session_t *s; switch (e->event_type) { case FIFO_EVENT_APP_RX: @@ -1129,7 +1110,7 @@ session_node_cmp_event (session_event_t * e, svm_fifo_t * f) clib_warning ("session has event but doesn't exist!"); break; } - if (s->server_rx_fifo == f || s->server_tx_fifo == f) + if (s->rx_fifo == f || s->tx_fifo == f) return 1; break; default: @@ -1248,7 +1229,6 @@ VLIB_REGISTER_NODE (session_queue_process_node) = }; /* *INDENT-ON* */ - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/session/session_rules_table.c b/src/vnet/session/session_rules_table.c index 6304606ca431..7f5c7bb3bbb2 100644 --- a/src/vnet/session/session_rules_table.c +++ b/src/vnet/session/session_rules_table.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -388,7 +388,7 @@ session_rules_table_lookup6 (session_rules_table_t * srt, * * @return 0 if success, clib_error_t error otherwise */ -clib_error_t * +int session_rules_table_add_del (session_rules_table_t * srt, session_rule_table_add_del_args_t * args) { @@ -398,8 +398,7 @@ session_rules_table_add_del (session_rules_table_t * srt, ri_from_tag = session_rules_table_rule_for_tag (srt, args->tag); if (args->is_add && ri_from_tag != SESSION_RULES_TABLE_INVALID_INDEX) - return clib_error_return_code (0, VNET_API_ERROR_INVALID_VALUE, 0, - "tag exists"); + return VNET_API_ERROR_INVALID_VALUE; if (fib_proto == FIB_PROTOCOL_IP4) { @@ -510,8 +509,7 @@ session_rules_table_add_del (session_rules_table_t * srt, } } else - return clib_error_return_code (0, VNET_API_ERROR_INVALID_VALUE_2, 0, - "invalid fib proto"); + return VNET_API_ERROR_INVALID_VALUE_2; return 0; } diff --git a/src/vnet/session/session_rules_table.h b/src/vnet/session/session_rules_table.h index 9088afcca276..8679cb8a0c70 100644 --- a/src/vnet/session/session_rules_table.h +++ b/src/vnet/session/session_rules_table.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -111,9 +111,8 @@ void session_rules_table_show_rule (vlib_main_t * vm, ip46_address_t * lcl_ip, u16 lcl_port, ip46_address_t * rmt_ip, u16 rmt_port, u8 is_ip4); -clib_error_t *session_rules_table_add_del (session_rules_table_t * srt, - session_rule_table_add_del_args_t * - args); +int session_rules_table_add_del (session_rules_table_t * srt, + session_rule_table_add_del_args_t * args); u8 *session_rules_table_rule_tag (session_rules_table_t * srt, u32 ri, u8 is_ip4); void session_rules_table_init (session_rules_table_t * srt); diff --git a/src/vnet/session/session_table.c b/src/vnet/session/session_table.c index c74e290bda76..da3e4cd40965 100644 --- a/src/vnet/session/session_table.c +++ b/src/vnet/session/session_table.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h index f7f8c91b26cc..bf33d50dda65 100644 --- a/src/vnet/session/session_table.h +++ b/src/vnet/session/session_table.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h new file mode 100644 index 000000000000..846af25c7773 --- /dev/null +++ b/src/vnet/session/session_types.h @@ -0,0 +1,491 @@ +/* + * Copyright (c) 2017-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VNET_SESSION_SESSION_TYPES_H_ +#define SRC_VNET_SESSION_SESSION_TYPES_H_ + +#include +#include + +#define SESSION_LOCAL_HANDLE_PREFIX 0x7FFFFFFF +#define SESSION_LISTENER_PREFIX 0x5FFFFFFF + +#define foreach_session_endpoint_fields \ + foreach_transport_endpoint_cfg_fields \ + _(u8, transport_proto) \ + +typedef struct _session_endpoint +{ +#define _(type, name) type name; + foreach_session_endpoint_fields +#undef _ +} session_endpoint_t; + +typedef struct _session_endpoint_cfg +{ +#define _(type, name) type name; + foreach_session_endpoint_fields +#undef _ + u32 app_wrk_index; + u32 opaque; + u8 *hostname; +} session_endpoint_cfg_t; + +#define SESSION_IP46_ZERO \ +{ \ + .ip6 = { \ + { 0, 0, }, \ + }, \ +} + +#define TRANSPORT_ENDPOINT_NULL \ +{ \ + .sw_if_index = ENDPOINT_INVALID_INDEX, \ + .ip = SESSION_IP46_ZERO, \ + .fib_index = ENDPOINT_INVALID_INDEX, \ + .is_ip4 = 0, \ + .port = 0, \ +} +#define SESSION_ENDPOINT_NULL \ +{ \ + .sw_if_index = ENDPOINT_INVALID_INDEX, \ + .ip = SESSION_IP46_ZERO, \ + .fib_index = ENDPOINT_INVALID_INDEX, \ + .is_ip4 = 0, \ + .port = 0, \ + .peer = TRANSPORT_ENDPOINT_NULL, \ + .transport_proto = 0, \ +} +#define SESSION_ENDPOINT_CFG_NULL \ +{ \ + .sw_if_index = ENDPOINT_INVALID_INDEX, \ + .ip = SESSION_IP46_ZERO, \ + .fib_index = ENDPOINT_INVALID_INDEX, \ + .is_ip4 = 0, \ + .port = 0, \ + .peer = TRANSPORT_ENDPOINT_NULL, \ + .transport_proto = 0, \ + .app_wrk_index = ENDPOINT_INVALID_INDEX, \ + .opaque = ENDPOINT_INVALID_INDEX, \ + .hostname = 0, \ +} + +#define session_endpoint_to_transport(_sep) ((transport_endpoint_t *)_sep) +#define session_endpoint_to_transport_cfg(_sep) \ + ((transport_endpoint_cfg_t *)_sep) + +always_inline u8 +session_endpoint_fib_proto (session_endpoint_t * sep) +{ + return sep->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; +} + +static inline u8 +session_endpoint_is_local (session_endpoint_t * sep) +{ + return (ip_is_zero (&sep->ip, sep->is_ip4) + || ip_is_local_host (&sep->ip, sep->is_ip4)); +} + +static inline u8 +session_endpoint_is_zero (session_endpoint_t * sep) +{ + return ip_is_zero (&sep->ip, sep->is_ip4); +} + +typedef u8 session_type_t; +typedef u64 session_handle_t; + +/* + * Application session state + */ +typedef enum +{ + SESSION_STATE_LISTENING, + SESSION_STATE_CONNECTING, + SESSION_STATE_ACCEPTING, + SESSION_STATE_READY, + SESSION_STATE_OPENED, + SESSION_STATE_TRANSPORT_CLOSING, + SESSION_STATE_CLOSING, + SESSION_STATE_CLOSED_WAITING, + SESSION_STATE_TRANSPORT_CLOSED, + SESSION_STATE_CLOSED, + SESSION_STATE_N_STATES, +} session_state_t; + +typedef struct generic_session_ +{ + svm_fifo_t *rx_fifo; /**< rx fifo */ + svm_fifo_t *tx_fifo; /**< tx fifo */ + session_type_t session_type; /**< session type */ + volatile u8 session_state; /**< session state */ + u32 session_index; /**< index in owning pool */ +} generic_session_t; + +typedef struct session_ +{ + /** fifo pointers. Once allocated, these do not move */ + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; + + /** Type */ + session_type_t session_type; + + /** State */ + volatile u8 session_state; + + /** Session index in per_thread pool */ + u32 session_index; + + /** App worker pool index */ + u32 app_wrk_index; + + u8 thread_index; + + /** To avoid n**2 "one event per frame" check */ + u64 enqueue_epoch; + + /** svm segment index where fifos were allocated */ + u32 svm_segment_index; + + /** Transport specific */ + u32 connection_index; + + union + { + /** Parent listener session if the result of an accept */ + u32 listener_index; + + /** Application index if a listener */ + u32 app_index; + }; + + union + { + /** Transport app index for apps acting as transports */ + u32 t_app_index; + + /** App listener index */ + u32 al_index; + + /** Opaque, for general use */ + u32 opaque; + }; + + CLIB_CACHE_LINE_ALIGN_MARK (pad); +} session_t; + +always_inline session_type_t +session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4) +{ + return (proto << 1 | is_ip4); +} + +always_inline transport_proto_t +session_type_transport_proto (session_type_t st) +{ + return (st >> 1); +} + +always_inline u8 +session_type_is_ip4 (session_type_t st) +{ + return (st & 1); +} + +always_inline transport_proto_t +session_get_transport_proto (session_t * s) +{ + return (s->session_type >> 1); +} + +always_inline fib_protocol_t +session_get_fib_proto (session_t * s) +{ + u8 is_ip4 = s->session_type & 1; + return (is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); +} + +always_inline u8 +session_has_transport (session_t * s) +{ + return (session_get_transport_proto (s) != TRANSPORT_PROTO_NONE); +} + +static inline transport_service_type_t +session_transport_service_type (session_t * s) +{ + transport_proto_t tp; + tp = session_get_transport_proto (s); + return transport_protocol_service_type (tp); +} + +static inline transport_tx_fn_type_t +session_transport_tx_fn_type (session_t * s) +{ + transport_proto_t tp; + tp = session_get_transport_proto (s); + return transport_protocol_tx_fn_type (tp); +} + +static inline u8 +session_tx_is_dgram (session_t * s) +{ + return (session_transport_tx_fn_type (s) == TRANSPORT_TX_DGRAM); +} + +always_inline session_handle_t +session_handle (session_t * s) +{ + return ((u64) s->thread_index << 32) | (u64) s->session_index; +} + +always_inline u32 +session_index_from_handle (session_handle_t handle) +{ + return handle & 0xFFFFFFFF; +} + +always_inline u32 +session_thread_from_handle (session_handle_t handle) +{ + return handle >> 32; +} + +always_inline void +session_parse_handle (session_handle_t handle, u32 * index, + u32 * thread_index) +{ + *index = session_index_from_handle (handle); + *thread_index = session_thread_from_handle (handle); +} + +always_inline u8 +session_handle_is_local (session_handle_t handle) +{ + if ((handle >> 32) == SESSION_LOCAL_HANDLE_PREFIX) + return 1; + return 0; +} + +typedef struct local_session_ +{ + /** fifo pointers. Once allocated, these do not move */ + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; + + /** Type */ + session_type_t session_type; + + /** State */ + volatile u8 session_state; + + /** Session index */ + u32 session_index; + + /** Server index */ + u32 app_wrk_index; + + /** Port for connection. Overlaps thread_index/enqueue_epoch */ + u16 port; + + /** Partly overlaps enqueue_epoch */ + u8 pad_epoch[7]; + + /** Segment index where fifos were allocated */ + u32 svm_segment_index; + + /** Transport listener index. Overlaps connection index */ + u32 transport_listener_index; + + union + { + u32 listener_index; + u32 app_index; + }; + + u32 al_index; + + /** Has transport embedded when listener not purely local */ + session_type_t listener_session_type; + + /** + * Client data + */ + u32 client_wrk_index; + u32 client_opaque; + + u64 server_evt_q; + u64 client_evt_q; + + CLIB_CACHE_LINE_ALIGN_MARK (pad); +} local_session_t; + +always_inline u32 +local_session_id (local_session_t * ls) +{ + ASSERT (ls->session_index < (2 << 16)); + u32 app_or_wrk_index; + + if (ls->session_state == SESSION_STATE_LISTENING) + { + ASSERT (ls->app_index < (2 << 16)); + app_or_wrk_index = ls->app_index; + } + else + { + ASSERT (ls->app_wrk_index < (2 << 16)); + app_or_wrk_index = ls->app_wrk_index; + } + + return ((u32) app_or_wrk_index << 16 | (u32) ls->session_index); +} + +always_inline void +local_session_parse_id (u32 ls_id, u32 * app_or_wrk, u32 * session_index) +{ + *app_or_wrk = ls_id >> 16; + *session_index = ls_id & 0xFF; +} + +always_inline void +local_session_parse_handle (session_handle_t handle, u32 * app_or_wrk_index, + u32 * session_index) +{ + u32 bottom; + ASSERT ((handle >> 32) == SESSION_LOCAL_HANDLE_PREFIX); + bottom = (handle & 0xFFFFFFFF); + local_session_parse_id (bottom, app_or_wrk_index, session_index); +} + +always_inline session_handle_t +application_local_session_handle (local_session_t * ls) +{ + return ((u64) SESSION_LOCAL_HANDLE_PREFIX << 32) + | (u64) local_session_id (ls); +} + +typedef enum +{ + FIFO_EVENT_APP_RX, + SESSION_IO_EVT_CT_RX, + FIFO_EVENT_APP_TX, + SESSION_IO_EVT_CT_TX, + SESSION_IO_EVT_TX_FLUSH, + FIFO_EVENT_DISCONNECT, + FIFO_EVENT_BUILTIN_RX, + FIFO_EVENT_BUILTIN_TX, + FIFO_EVENT_RPC, + SESSION_CTRL_EVT_BOUND, + SESSION_CTRL_EVT_ACCEPTED, + SESSION_CTRL_EVT_ACCEPTED_REPLY, + SESSION_CTRL_EVT_CONNECTED, + SESSION_CTRL_EVT_CONNECTED_REPLY, + SESSION_CTRL_EVT_DISCONNECTED, + SESSION_CTRL_EVT_DISCONNECTED_REPLY, + SESSION_CTRL_EVT_RESET, + SESSION_CTRL_EVT_RESET_REPLY, + SESSION_CTRL_EVT_REQ_WORKER_UPDATE, + SESSION_CTRL_EVT_WORKER_UPDATE, + SESSION_CTRL_EVT_WORKER_UPDATE_REPLY, +} session_evt_type_t; + +static inline const char * +fifo_event_type_str (session_evt_type_t et) +{ + switch (et) + { + case FIFO_EVENT_APP_RX: + return "FIFO_EVENT_APP_RX"; + case FIFO_EVENT_APP_TX: + return "FIFO_EVENT_APP_TX"; + case FIFO_EVENT_DISCONNECT: + return "FIFO_EVENT_DISCONNECT"; + case FIFO_EVENT_BUILTIN_RX: + return "FIFO_EVENT_BUILTIN_RX"; + case FIFO_EVENT_RPC: + return "FIFO_EVENT_RPC"; + default: + return "UNKNOWN FIFO EVENT"; + } +} + +typedef enum +{ + SESSION_MQ_IO_EVT_RING, + SESSION_MQ_CTRL_EVT_RING, + SESSION_MQ_N_RINGS +} session_mq_rings_e; + +typedef struct +{ + void *fp; + void *arg; +} session_rpc_args_t; + +/* *INDENT-OFF* */ +typedef struct +{ + u8 event_type; + u8 postponed; + union + { + svm_fifo_t *fifo; + session_handle_t session_handle; + session_rpc_args_t rpc_args; + struct + { + u8 data[0]; + }; + }; +} __clib_packed session_event_t; +/* *INDENT-ON* */ + +#define SESSION_MSG_NULL { } + +typedef struct session_dgram_pre_hdr_ +{ + u32 data_length; + u32 data_offset; +} session_dgram_pre_hdr_t; + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct session_dgram_header_ +{ + u32 data_length; + u32 data_offset; + ip46_address_t rmt_ip; + ip46_address_t lcl_ip; + u16 rmt_port; + u16 lcl_port; + u8 is_ip4; +}) session_dgram_hdr_t; +/* *INDENT-ON* */ + +#define SESSION_CONN_ID_LEN 37 +#define SESSION_CONN_HDR_LEN 45 + +STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8), + "session conn id wrong length"); +#endif /* SRC_VNET_SESSION_SESSION_TYPES_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index c2d21bf28f91..2c4efe15806a 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -13,7 +13,7 @@ * limitations under the License. */ -#include +#include #include #include @@ -125,14 +125,13 @@ u8 * format_transport_listen_connection (u8 * s, va_list * args) { u32 transport_proto = va_arg (*args, u32); - u32 listen_index = va_arg (*args, u32); transport_proto_vft_t *tp_vft; tp_vft = transport_protocol_get_vft (transport_proto); if (!tp_vft) return s; - s = format (s, "%U", tp_vft->format_listener, listen_index); + s = (tp_vft->format_listener) (s, args); return s; } @@ -272,6 +271,37 @@ transport_protocol_tx_fn_type (transport_proto_t tp) return tp_vfts[tp].tx_type; } +void +transport_cleanup (transport_proto_t tp, u32 conn_index, u8 thread_index) +{ + tp_vfts[tp].cleanup (conn_index, thread_index); +} + +int +transport_connect (transport_proto_t tp, transport_endpoint_cfg_t * tep) +{ + return tp_vfts[tp].connect (tep); +} + +void +transport_close (transport_proto_t tp, u32 conn_index, u8 thread_index) +{ + tp_vfts[tp].close (conn_index, thread_index); +} + +u32 +transport_start_listen (transport_proto_t tp, u32 session_index, + transport_endpoint_t * tep) +{ + return tp_vfts[tp].start_listen (session_index, tep); +} + +u32 +transport_stop_listen (transport_proto_t tp, u32 conn_index) +{ + return tp_vfts[tp].stop_listen (conn_index); +} + u8 transport_protocol_is_cl (transport_proto_t tp) { diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index 952f97d9ac33..8500e9d24457 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2017-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -13,165 +13,194 @@ * limitations under the License. */ -#ifndef VNET_VNET_URI_TRANSPORT_H_ -#define VNET_VNET_URI_TRANSPORT_H_ +#ifndef SRC_VNET_SESSION_TRANSPORT_H_ +#define SRC_VNET_SESSION_TRANSPORT_H_ #include -#include -#include +#include -typedef struct _transport_stats +/* + * Transport protocol virtual function table + */ +/* *INDENT-OFF* */ +typedef struct _transport_proto_vft +{ + /* + * Setup + */ + u32 (*start_listen) (u32 session_index, transport_endpoint_t * lcl); + u32 (*stop_listen) (u32 conn_index); + int (*connect) (transport_endpoint_cfg_t * rmt); + void (*close) (u32 conn_index, u32 thread_index); + void (*cleanup) (u32 conn_index, u32 thread_index); + clib_error_t *(*enable) (vlib_main_t * vm, u8 is_en); + + /* + * Transmission + */ + + u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b); + u16 (*send_mss) (transport_connection_t * tc); + u32 (*send_space) (transport_connection_t * tc); + u32 (*tx_fifo_offset) (transport_connection_t * tc); + void (*update_time) (f64 time_now, u8 thread_index); + void (*flush_data) (transport_connection_t *tconn); + + /* + * Connection retrieval + */ + transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx); + transport_connection_t *(*get_listener) (u32 conn_index); + transport_connection_t *(*get_half_open) (u32 conn_index); + + /* + * Format + */ + u8 *(*format_connection) (u8 * s, va_list * args); + u8 *(*format_listener) (u8 * s, va_list * args); + u8 *(*format_half_open) (u8 * s, va_list * args); + + /* + * Properties + */ + transport_tx_fn_type_t tx_type; + transport_service_type_t service_type; +} transport_proto_vft_t; +/* *INDENT-ON* */ + +extern transport_proto_vft_t *tp_vfts; + +#define transport_proto_foreach(VAR, BODY) \ +do { \ + for (VAR = 0; VAR < vec_len (tp_vfts); VAR++) \ + if (tp_vfts[VAR].push_header != 0) \ + do { BODY; } while (0); \ +} while (0) + +int transport_connect (transport_proto_t tp, transport_endpoint_cfg_t * tep); +void transport_close (transport_proto_t tp, u32 conn_index, u8 thread_index); +u32 transport_start_listen (transport_proto_t tp, u32 session_index, + transport_endpoint_t * tep); +u32 transport_stop_listen (transport_proto_t tp, u32 conn_index); +void transport_cleanup (transport_proto_t tp, u32 conn_index, + u8 thread_index); + +static inline transport_connection_t * +transport_get_connection (transport_proto_t tp, u32 conn_index, + u8 thread_index) { - u64 tx_bytes; -} transport_stats_t; + return tp_vfts[tp].get_connection (conn_index, thread_index); +} -typedef struct _spacer +static inline transport_connection_t * +transport_get_listener (transport_proto_t tp, u32 conn_index) { - u64 bucket; - u32 max_burst_size; - f32 tokens_per_period; - u64 last_update; -} spacer_t; + return tp_vfts[tp].get_listener (conn_index); +} -/* - * Protocol independent transport properties associated to a session - */ -typedef struct _transport_connection +static inline transport_connection_t * +transport_get_half_open (transport_proto_t tp, u32 conn_index) { - /** Connection ID */ - union - { - /* - * Network connection ID tuple - */ - struct - { - ip46_address_t rmt_ip; /**< Remote IP */ - ip46_address_t lcl_ip; /**< Local IP */ - u16 rmt_port; /**< Remote port */ - u16 lcl_port; /**< Local port */ - u8 is_ip4; /**< Flag if IP4 connection */ - u8 proto; /**< Protocol id */ - u32 fib_index; /**< Network namespace */ - }; - /* - * Opaque connection ID - */ - u8 opaque_conn_id[42]; - }; - - u32 s_index; /**< Parent session index */ - u32 c_index; /**< Connection index in transport pool */ - u32 thread_index; /**< Worker-thread index */ - - /*fib_node_index_t rmt_fei; - dpo_id_t rmt_dpo; */ - - u8 flags; /**< Transport specific flags */ - transport_stats_t stats; /**< Transport connection stats */ - spacer_t pacer; /**< Simple transport pacer */ + return tp_vfts[tp].get_half_open (conn_index); +} + +void transport_register_protocol (transport_proto_t transport_proto, + const transport_proto_vft_t * vft, + fib_protocol_t fib_proto, u32 output_node); +transport_proto_vft_t *transport_protocol_get_vft (transport_proto_t tp); +void transport_update_time (f64 time_now, u8 thread_index); + +int transport_alloc_local_port (u8 proto, ip46_address_t * ip); +int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt, + ip46_address_t * lcl_addr, + u16 * lcl_port); +void transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port); +void transport_enable_disable (vlib_main_t * vm, u8 is_en); +void transport_init (void); +always_inline u32 +transport_elog_track_index (transport_connection_t * tc) +{ #if TRANSPORT_DEBUG - elog_track_t elog_track; /**< Event logging */ - u32 cc_stat_tstamp; /**< CC stats timestamp */ + return tc->elog_track.track_index_plus_one - 1; +#else + return ~0; #endif +} - /** Macros for 'derived classes' where base is named "connection" */ -#define c_lcl_ip connection.lcl_ip -#define c_rmt_ip connection.rmt_ip -#define c_lcl_ip4 connection.lcl_ip.ip4 -#define c_rmt_ip4 connection.rmt_ip.ip4 -#define c_lcl_ip6 connection.lcl_ip.ip6 -#define c_rmt_ip6 connection.rmt_ip.ip6 -#define c_lcl_port connection.lcl_port -#define c_rmt_port connection.rmt_port -#define c_proto connection.proto -#define c_fib_index connection.fib_index -#define c_s_index connection.s_index -#define c_c_index connection.c_index -#define c_is_ip4 connection.is_ip4 -#define c_thread_index connection.thread_index -#define c_elog_track connection.elog_track -#define c_cc_stat_tstamp connection.cc_stat_tstamp -#define c_rmt_fei connection.rmt_fei -#define c_rmt_dpo connection.rmt_dpo -#define c_opaque_id connection.opaque_conn_id -#define c_stats connection.stats -#define c_pacer connection.pacer -#define c_flags connection.flags -} transport_connection_t; - -#define TRANSPORT_CONNECTION_F_IS_TX_PACED 1 << 0 - -typedef enum _transport_proto -{ - TRANSPORT_PROTO_TCP, - TRANSPORT_PROTO_UDP, - TRANSPORT_PROTO_SCTP, - TRANSPORT_PROTO_NONE, - TRANSPORT_PROTO_TLS, - TRANSPORT_PROTO_UDPC, - TRANSPORT_N_PROTO -} transport_proto_t; - -u8 *format_transport_proto (u8 * s, va_list * args); -u8 *format_transport_proto_short (u8 * s, va_list * args); -u8 *format_transport_connection (u8 * s, va_list * args); -u8 *format_transport_listen_connection (u8 * s, va_list * args); -u8 *format_transport_half_open_connection (u8 * s, va_list * args); - -uword unformat_transport_proto (unformat_input_t * input, va_list * args); - -#define foreach_transport_endpoint_fields \ - _(ip46_address_t, ip) /**< ip address in net order */ \ - _(u16, port) /**< port in net order */ \ - _(u8, is_ip4) /**< set if ip4 */ \ - _(u32, sw_if_index) /**< interface endpoint is associated with */ \ - _(u32, fib_index) /**< fib table endpoint is associated with */ \ - -typedef struct transport_endpoint_ -{ -#define _(type, name) type name; - foreach_transport_endpoint_fields -#undef _ -} transport_endpoint_t; +void transport_connection_tx_pacer_reset (transport_connection_t * tc, + u32 rate_bytes_per_sec, + u32 initial_bucket, u64 time_now); +/** + * Initialize tx pacer for connection + * + * @param tc transport connection + * @param rate_bytes_per_second initial byte rate + * @param burst_bytes initial burst size in bytes + */ +void transport_connection_tx_pacer_init (transport_connection_t * tc, + u32 rate_bytes_per_sec, + u32 initial_bucket); -#define foreach_transport_endpoint_cfg_fields \ - foreach_transport_endpoint_fields \ - _(transport_endpoint_t, peer) \ +/** + * Update tx pacer pacing rate + * + * @param tc transport connection + * @param bytes_per_sec new pacing rate + */ +void transport_connection_tx_pacer_update (transport_connection_t * tc, + u64 bytes_per_sec); -typedef struct transport_endpoint_pair_ -{ -#define _(type, name) type name; - foreach_transport_endpoint_cfg_fields -#undef _ -} transport_endpoint_cfg_t; +/** + * Get maximum tx burst allowed for transport connection + * + * @param tc transport connection + * @param time_now current cpu time as returned by @ref clib_cpu_time_now + * @param mss transport's mss + */ +u32 transport_connection_snd_space (transport_connection_t * tc, + u64 time_now, u16 mss); -typedef clib_bihash_24_8_t transport_endpoint_table_t; +u32 transport_connection_tx_pacer_burst (transport_connection_t * tc, + u64 time_now); -#define ENDPOINT_INVALID_INDEX ((u32)~0) +/** + * Initialize period for tx pacers + * + * Defines a unit of time with respect to number of cpu cycles that is to + * be used by all tx pacers. + */ +void transport_init_tx_pacers_period (void); +/** + * Check if transport connection is paced + */ always_inline u8 -transport_connection_fib_proto (transport_connection_t * tc) +transport_connection_is_tx_paced (transport_connection_t * tc) { - return tc->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; + return (tc->flags & TRANSPORT_CONNECTION_F_IS_TX_PACED); } -always_inline u8 -transport_endpoint_fib_proto (transport_endpoint_t * tep) -{ - return tep->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; -} +u8 *format_transport_pacer (u8 * s, va_list * args); -int transport_alloc_local_port (u8 proto, ip46_address_t * ip); -int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt, - ip46_address_t * lcl_addr, - u16 * lcl_port); -void transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port); -u8 transport_protocol_is_cl (transport_proto_t tp); -void transport_init (void); +/** + * Update tx byte stats for transport connection + * + * If tx pacing is enabled, this also updates pacer bucket to account for the + * amount of bytes that have been sent. + * + * @param tc transport connection + * @param pkts packets recently sent + * @param bytes bytes recently sent + */ +void transport_connection_update_tx_stats (transport_connection_t * tc, + u32 bytes); + +void +transport_connection_tx_pacer_update_bytes (transport_connection_t * tc, + u32 bytes); -#endif /* VNET_VNET_URI_TRANSPORT_H_ */ +#endif /* SRC_VNET_SESSION_TRANSPORT_H_ */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h new file mode 100644 index 000000000000..d309c581db8d --- /dev/null +++ b/src/vnet/session/transport_types.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef VNET_VNET_URI_TRANSPORT_TYPES_H_ +#define VNET_VNET_URI_TRANSPORT_TYPES_H_ + +#include +#include +#include + +#define TRANSPORT_MAX_HDRS_LEN 100 /* Max number of bytes for headers */ + +typedef enum transport_dequeue_type_ +{ + TRANSPORT_TX_PEEK, /**< reliable transport protos */ + TRANSPORT_TX_DEQUEUE, /**< unreliable transport protos */ + TRANSPORT_TX_INTERNAL, /**< apps acting as transports */ + TRANSPORT_TX_DGRAM, /**< datagram mode */ + TRANSPORT_TX_N_FNS +} transport_tx_fn_type_t; + +typedef enum transport_service_type_ +{ + TRANSPORT_SERVICE_VC, /**< virtual circuit service */ + TRANSPORT_SERVICE_CL, /**< connectionless service */ + TRANSPORT_SERVICE_APP, /**< app transport service */ + TRANSPORT_N_SERVICES +} transport_service_type_t; + +typedef struct _transport_stats +{ + u64 tx_bytes; +} transport_stats_t; + +typedef struct _spacer +{ + u64 bucket; + u32 max_burst_size; + f32 tokens_per_period; + u64 last_update; +} spacer_t; + +/* + * Protocol independent transport properties associated to a session + */ +typedef struct _transport_connection +{ + /** Connection ID */ + union + { + /* + * Network connection ID tuple + */ + struct + { + ip46_address_t rmt_ip; /**< Remote IP */ + ip46_address_t lcl_ip; /**< Local IP */ + u16 rmt_port; /**< Remote port */ + u16 lcl_port; /**< Local port */ + u8 is_ip4; /**< Flag if IP4 connection */ + u8 proto; /**< Protocol id */ + u32 fib_index; /**< Network namespace */ + }; + /* + * Opaque connection ID + */ + u8 opaque_conn_id[42]; + }; + + u32 s_index; /**< Parent session index */ + u32 c_index; /**< Connection index in transport pool */ + u32 thread_index; /**< Worker-thread index */ + + /*fib_node_index_t rmt_fei; + dpo_id_t rmt_dpo; */ + + u8 flags; /**< Transport specific flags */ + transport_stats_t stats; /**< Transport connection stats */ + spacer_t pacer; /**< Simple transport pacer */ + +#if TRANSPORT_DEBUG + elog_track_t elog_track; /**< Event logging */ + u32 cc_stat_tstamp; /**< CC stats timestamp */ +#endif + + /** Macros for 'derived classes' where base is named "connection" */ +#define c_lcl_ip connection.lcl_ip +#define c_rmt_ip connection.rmt_ip +#define c_lcl_ip4 connection.lcl_ip.ip4 +#define c_rmt_ip4 connection.rmt_ip.ip4 +#define c_lcl_ip6 connection.lcl_ip.ip6 +#define c_rmt_ip6 connection.rmt_ip.ip6 +#define c_lcl_port connection.lcl_port +#define c_rmt_port connection.rmt_port +#define c_proto connection.proto +#define c_fib_index connection.fib_index +#define c_s_index connection.s_index +#define c_c_index connection.c_index +#define c_is_ip4 connection.is_ip4 +#define c_thread_index connection.thread_index +#define c_elog_track connection.elog_track +#define c_cc_stat_tstamp connection.cc_stat_tstamp +#define c_rmt_fei connection.rmt_fei +#define c_rmt_dpo connection.rmt_dpo +#define c_opaque_id connection.opaque_conn_id +#define c_stats connection.stats +#define c_pacer connection.pacer +#define c_flags connection.flags +} transport_connection_t; + +#define TRANSPORT_CONNECTION_F_IS_TX_PACED 1 << 0 + +typedef enum _transport_proto +{ + TRANSPORT_PROTO_TCP, + TRANSPORT_PROTO_UDP, + TRANSPORT_PROTO_SCTP, + TRANSPORT_PROTO_NONE, + TRANSPORT_PROTO_TLS, + TRANSPORT_PROTO_UDPC, + TRANSPORT_N_PROTO +} transport_proto_t; + +u8 *format_transport_proto (u8 * s, va_list * args); +u8 *format_transport_proto_short (u8 * s, va_list * args); +u8 *format_transport_connection (u8 * s, va_list * args); +u8 *format_transport_listen_connection (u8 * s, va_list * args); +u8 *format_transport_half_open_connection (u8 * s, va_list * args); + +uword unformat_transport_proto (unformat_input_t * input, va_list * args); + +#define foreach_transport_endpoint_fields \ + _(ip46_address_t, ip) /**< ip address in net order */ \ + _(u16, port) /**< port in net order */ \ + _(u8, is_ip4) /**< set if ip4 */ \ + _(u32, sw_if_index) /**< interface endpoint is associated with */ \ + _(u32, fib_index) /**< fib table endpoint is associated with */ \ + +typedef struct transport_endpoint_ +{ +#define _(type, name) type name; + foreach_transport_endpoint_fields +#undef _ +} transport_endpoint_t; + +#define foreach_transport_endpoint_cfg_fields \ + foreach_transport_endpoint_fields \ + _(transport_endpoint_t, peer) \ + +typedef struct transport_endpoint_pair_ +{ +#define _(type, name) type name; + foreach_transport_endpoint_cfg_fields +#undef _ +} transport_endpoint_cfg_t; + +typedef clib_bihash_24_8_t transport_endpoint_table_t; + +#define ENDPOINT_INVALID_INDEX ((u32)~0) + +always_inline u8 +transport_connection_fib_proto (transport_connection_t * tc) +{ + return tc->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; +} + +always_inline u8 +transport_endpoint_fib_proto (transport_endpoint_t * tep) +{ + return tep->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; +} + +u8 transport_protocol_is_cl (transport_proto_t tp); +transport_service_type_t transport_protocol_service_type (transport_proto_t); +transport_tx_fn_type_t transport_protocol_tx_fn_type (transport_proto_t tp); + +#endif /* VNET_VNET_URI_TRANSPORT_TYPES_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/srp/node.c b/src/vnet/srp/node.c index cf9bd31c5130..a852f61ab4df 100644 --- a/src/vnet/srp/node.c +++ b/src/vnet/srp/node.c @@ -333,8 +333,7 @@ srp_topology_packet (vlib_main_t * vm, u32 sw_if_index, u8 ** contents) u32 * to_next; u32 bi = ~0; - if (vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX, - /* buffer to append to */ &bi, + if (vlib_buffer_add_data (vm, /* buffer to append to */ &bi, *contents, vec_len (*contents))) { /* complete or partial buffer allocation failure */ @@ -628,9 +627,8 @@ static void tx_ips_packet (srp_interface_t * si, = ~ip_csum_fold (ip_incremental_checksum (0, &i->control, sizeof (i[0]) - STRUCT_OFFSET_OF (srp_ips_header_t, control))); - if (vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX, - /* buffer to append to */ &bi, - i, sizeof (i[0]))) + if (vlib_buffer_add_data (vm, /* buffer to append to */ &bi, i, + sizeof (i[0]))) { /* complete or partial allocation failure */ if (bi != ~0) diff --git a/src/vnet/syslog/syslog.c b/src/vnet/syslog/syslog.c index 1cffe03d768b..462c5deeecd7 100644 --- a/src/vnet/syslog/syslog.c +++ b/src/vnet/syslog/syslog.c @@ -163,7 +163,6 @@ syslog_msg_send (syslog_msg_t * syslog_msg) u32 bi, msg_len, *to_next; u8 *tmp; vlib_buffer_t *b; - vlib_buffer_free_list_t *fl; vlib_frame_t *f; int i; @@ -171,9 +170,6 @@ syslog_msg_send (syslog_msg_t * syslog_msg) return -1; b = vlib_get_buffer (vm, bi); - clib_memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b))); - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vlib_buffer_init_for_free_list (b, fl); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); /* one message per UDP datagram RFC5426 3.1. */ diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 037abdc652d0..81f209b5d7cf 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -920,8 +920,12 @@ static u8 * format_tcp_listener_session (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); + u32 verbose = va_arg (*args, u32); tcp_connection_t *tc = tcp_listener_get (tci); - return format (s, "%U", format_tcp_connection_id, tc); + s = format (s, "%-50U", format_tcp_connection_id, tc); + if (verbose) + s = format (s, "%-15U", format_tcp_state, tc->state); + return s; } static u8 * @@ -1169,13 +1173,13 @@ tcp_session_flush_data (transport_connection_t * tconn) /* *INDENT-OFF* */ const static transport_proto_vft_t tcp_proto = { .enable = vnet_tcp_enable_disable, - .bind = tcp_session_bind, - .unbind = tcp_session_unbind, + .start_listen = tcp_session_bind, + .stop_listen = tcp_session_unbind, .push_header = tcp_session_push_header, .get_connection = tcp_session_get_transport, .get_listener = tcp_session_get_listener, .get_half_open = tcp_half_open_session_get_transport, - .open = tcp_session_open, + .connect = tcp_session_open, .close = tcp_session_close, .cleanup = tcp_session_cleanup, .send_mss = tcp_session_send_mss, @@ -1472,7 +1476,7 @@ tcp_main_enable (vlib_main_t * vm) tcp_initialize_timer_wheels (tm); tcp_initialize_iss_seed (tm); - tm->bytes_per_buffer = VLIB_BUFFER_DATA_SIZE; + tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm); return error; } diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 8266f1ff4491..68750ce373f2 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -612,7 +612,6 @@ tcp_half_open_connection_get (u32 conn_index) return tc; } -void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b); void tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b); void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b); void tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index f1f945e10920..da03cababb7f 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -1798,7 +1798,7 @@ static int tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, u16 data_len) { - stream_session_t *s0; + session_t *s0; int rv, offset; ASSERT (seq_gt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt)); @@ -1828,15 +1828,15 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, s0 = session_get (tc->c_s_index, tc->c_thread_index); /* Get the newest segment from the fifo */ - newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo); + newest = svm_fifo_newest_ooo_segment (s0->rx_fifo); if (newest) { - offset = ooo_segment_offset (s0->server_rx_fifo, newest); + offset = ooo_segment_offset (s0->rx_fifo, newest); ASSERT (offset <= vnet_buffer (b)->tcp.seq_number - tc->rcv_nxt); start = tc->rcv_nxt + offset; - end = start + ooo_segment_length (s0->server_rx_fifo, newest); + end = start + ooo_segment_length (s0->rx_fifo, newest); tcp_update_sack_list (tc, start, end); - svm_fifo_newest_ooo_segment_reset (s0->server_rx_fifo); + svm_fifo_newest_ooo_segment_reset (s0->rx_fifo); TCP_EVT_DBG (TCP_EVT_CC_SACKS, tc); } } @@ -3147,7 +3147,7 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, child0->rto = TCP_RTO_MIN; TCP_EVT_DBG (TCP_EVT_SYN_RCVD, child0, 1); - if (stream_session_accept (&child0->connection, lc0->c_s_index, + if (session_stream_accept (&child0->connection, lc0->c_s_index, 0 /* notify */ )) { tcp_connection_cleanup (child0); @@ -3368,6 +3368,13 @@ tcp_input_lookup_buffer (vlib_buffer_t * b, u8 thread_index, u32 * error, *error = TCP_ERROR_LENGTH; return 0; } + if (PREDICT_FALSE + (ip6_address_is_link_local_unicast (&ip6->dst_address))) + { + ip4_main_t *im = &ip4_main; + fib_index = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (b)->sw_if_index[VLIB_RX]); + } tc = session_lookup_connection_wt6 (fib_index, &ip6->dst_address, &ip6->src_address, tcp->dst_port, diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index f0fe25495a1b..9f38851b026e 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -14,7 +14,6 @@ */ #include -#include #include vlib_node_registration_t tcp4_output_node; @@ -464,41 +463,6 @@ tcp_init_mss (tcp_connection_t * tc) tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP; } -static int -tcp_alloc_tx_buffers (tcp_worker_ctx_t * wrk, u16 * n_bufs, u32 wanted) -{ - vlib_main_t *vm = vlib_get_main (); - u32 n_alloc; - - ASSERT (wanted > *n_bufs); - vec_validate_aligned (wrk->tx_buffers, wanted - 1, CLIB_CACHE_LINE_BYTES); - n_alloc = vlib_buffer_alloc (vm, &wrk->tx_buffers[*n_bufs], - wanted - *n_bufs); - *n_bufs += n_alloc; - _vec_len (wrk->tx_buffers) = *n_bufs; - return n_alloc; -} - -always_inline int -tcp_get_free_buffer_index (tcp_worker_ctx_t * wrk, u32 * bidx) -{ - u16 n_bufs = vec_len (wrk->tx_buffers); - - TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL (wrk->vm->thread_index); - - if (PREDICT_FALSE (!n_bufs)) - { - if (!tcp_alloc_tx_buffers (wrk, &n_bufs, VLIB_FRAME_SIZE)) - { - *bidx = ~0; - return -1; - } - } - *bidx = wrk->tx_buffers[--n_bufs]; - _vec_len (wrk->tx_buffers) = n_bufs; - return 0; -} - static void * tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) { @@ -512,27 +476,26 @@ tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) vnet_buffer (b)->tcp.flags = 0; /* Leave enough space for headers */ - return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); } static void * tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) { ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST; b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->total_length_not_including_first_buffer = 0; b->current_data = 0; vnet_buffer (b)->tcp.flags = 0; VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); /* Leave enough space for headers */ - return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); } /** * Prepare ACK */ -static void +static inline void tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, u8 flags) { @@ -557,12 +520,9 @@ tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, /** * Convert buffer to ACK */ -void +static inline void tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b) { - vlib_main_t *vm = vlib_get_main (); - - tcp_reuse_buffer (vm, b); tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK); TCP_EVT_DBG (TCP_EVT_ACK_SENT, tc); tc->rcv_las = tc->rcv_nxt; @@ -574,13 +534,7 @@ tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b) void tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b) { - vlib_main_t *vm = vlib_get_main (); - u8 flags = 0; - - tcp_reuse_buffer (vm, b); - - flags = TCP_FLAG_FIN | TCP_FLAG_ACK; - tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, flags); + tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK); /* Reset flags, make sure ack is sent */ vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; @@ -617,15 +571,12 @@ tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b) void tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) { - vlib_main_t *vm = vlib_get_main (); tcp_options_t _snd_opts, *snd_opts = &_snd_opts; u8 tcp_opts_len, tcp_hdr_opts_len; tcp_header_t *th; u16 initial_wnd; clib_memset (snd_opts, 0, sizeof (*snd_opts)); - tcp_reuse_buffer (vm, b); - initial_wnd = tcp_initial_window_to_advertise (tc); tcp_opts_len = tcp_make_synack_options (tc, snd_opts); tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); @@ -840,7 +791,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, ip6_header_t *ih6, *pkt_ih6; fib_protocol_t fib_proto; - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); @@ -916,7 +867,7 @@ tcp_send_reset (tcp_connection_t * tc) u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len; u8 flags; - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); @@ -999,7 +950,7 @@ tcp_send_syn (tcp_connection_t * tc) tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK); - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); @@ -1024,12 +975,12 @@ tcp_send_synack (tcp_connection_t * tc) vlib_buffer_t *b; u32 bi; - /* Get buffer */ - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; tc->rtt_ts = tcp_time_now_us (tc->c_thread_index); b = vlib_get_buffer (vm, bi); + tcp_init_buffer (vm, b); tcp_make_synack (tc, b); tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); } @@ -1093,7 +1044,7 @@ tcp_send_fin (tcp_connection_t * tc) if (fin_snt) tc->snd_nxt = tc->snd_una; - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) { /* Out of buffers so program fin retransmit ASAP */ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1); @@ -1243,13 +1194,10 @@ tcp_send_ack (tcp_connection_t * tc) vlib_buffer_t *b; u32 bi; - /* Get buffer */ - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); - - /* Fill in the ACK */ tcp_make_ack (tc, b); tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); } @@ -1337,12 +1285,12 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b) { u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer; - u32 bi, seg_size; vlib_main_t *vm = wrk->vm; + u32 bi, seg_size; int n_bytes = 0; u8 *data; - seg_size = max_deq_bytes + MAX_HDRS_LEN; + seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN; /* * Prepare options @@ -1356,7 +1304,7 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, /* Easy case, buffer size greater than mss */ if (PREDICT_TRUE (seg_size <= bytes_per_buffer)) { - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return 0; *b = vlib_get_buffer (vm, bi); data = tcp_init_buffer (vm, *b); @@ -1371,30 +1319,28 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, /* Split mss into multiple buffers */ else { - u32 chain_bi = ~0, n_bufs_per_seg; - u16 n_peeked, len_to_deq, available_bufs; + u32 chain_bi = ~0, n_bufs_per_seg, n_bufs; + u16 n_peeked, len_to_deq; vlib_buffer_t *chain_b, *prev_b; int i; /* Make sure we have enough buffers */ n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer); - available_bufs = vec_len (wrk->tx_buffers); - if (n_bufs_per_seg > available_bufs) + vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1, + CLIB_CACHE_LINE_BYTES); + n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg); + if (PREDICT_FALSE (n_bufs != n_bufs_per_seg)) { - tcp_alloc_tx_buffers (wrk, &available_bufs, VLIB_FRAME_SIZE); - if (n_bufs_per_seg > available_bufs) - { - *b = 0; - return 0; - } + if (n_bufs) + vlib_buffer_free (vm, wrk->tx_buffers, n_bufs); + return 0; } - (void) tcp_get_free_buffer_index (wrk, &bi); - ASSERT (bi != (u32) ~ 0); - *b = vlib_get_buffer (vm, bi); + *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]); data = tcp_init_buffer (vm, *b); n_bytes = stream_session_peek_bytes (&tc->connection, data, offset, - bytes_per_buffer - MAX_HDRS_LEN); + bytes_per_buffer - + TRANSPORT_MAX_HDRS_LEN); b[0]->current_length = n_bytes; b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; b[0]->total_length_not_including_first_buffer = 0; @@ -1405,8 +1351,7 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, { prev_b = chain_b; len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer); - tcp_get_free_buffer_index (wrk, &chain_bi); - ASSERT (chain_bi != (u32) ~ 0); + chain_bi = wrk->tx_buffers[--n_bufs]; chain_b = vlib_get_buffer (vm, chain_bi); chain_b->current_data = 0; data = vlib_buffer_get_current (chain_b); @@ -1428,6 +1373,12 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0); if (seq_gt (tc->snd_nxt, tc->snd_una_max)) tc->snd_una_max = tc->snd_nxt; + + if (PREDICT_FALSE (n_bufs)) + { + clib_warning ("not all buffers consumed"); + vlib_buffer_free (vm, wrk->tx_buffers, n_bufs); + } } ASSERT (n_bytes > 0); @@ -1639,7 +1590,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK); - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); @@ -1663,7 +1614,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); tc->rtt_ts = 0; - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) { tcp_retransmit_timer_force_update (tc); return; @@ -1750,7 +1701,7 @@ tcp_timer_persist_handler (u32 index) /* * Try to force the first unsent segment (or buffer) */ - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) { tcp_persist_timer_set (tc); return; @@ -1760,9 +1711,10 @@ tcp_timer_persist_handler (u32 index) tcp_validate_txf_size (tc, offset); tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state); - max_snd_bytes = clib_min (tc->snd_mss, tm->bytes_per_buffer - MAX_HDRS_LEN); - n_bytes = stream_session_peek_bytes (&tc->connection, data, offset, - max_snd_bytes); + max_snd_bytes = + clib_min (tc->snd_mss, tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN); + n_bytes = + stream_session_peek_bytes (&tc->connection, data, offset, max_snd_bytes); b->current_length = n_bytes; ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT) || tc->snd_nxt == tc->snd_una_max diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c index 34de539b2951..d78dbcfc79cc 100644 --- a/src/vnet/tls/tls.c +++ b/src/vnet/tls/tls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Cisco and/or its affiliates. + * Copyright (c) 2018-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -51,40 +51,40 @@ tls_get_available_engine (void) } int -tls_add_vpp_q_rx_evt (stream_session_t * s) +tls_add_vpp_q_rx_evt (session_t * s) { - if (svm_fifo_set_event (s->server_rx_fifo)) - session_send_io_evt_to_thread (s->server_rx_fifo, FIFO_EVENT_APP_RX); + if (svm_fifo_set_event (s->rx_fifo)) + session_send_io_evt_to_thread (s->rx_fifo, FIFO_EVENT_APP_RX); return 0; } int -tls_add_vpp_q_builtin_rx_evt (stream_session_t * s) +tls_add_vpp_q_builtin_rx_evt (session_t * s) { - if (svm_fifo_set_event (s->server_rx_fifo)) - session_send_io_evt_to_thread (s->server_rx_fifo, FIFO_EVENT_BUILTIN_RX); + if (svm_fifo_set_event (s->rx_fifo)) + session_send_io_evt_to_thread (s->rx_fifo, FIFO_EVENT_BUILTIN_RX); return 0; } int -tls_add_vpp_q_tx_evt (stream_session_t * s) +tls_add_vpp_q_tx_evt (session_t * s) { - if (svm_fifo_set_event (s->server_tx_fifo)) - session_send_io_evt_to_thread (s->server_tx_fifo, FIFO_EVENT_APP_TX); + if (svm_fifo_set_event (s->tx_fifo)) + session_send_io_evt_to_thread (s->tx_fifo, FIFO_EVENT_APP_TX); return 0; } int -tls_add_vpp_q_builtin_tx_evt (stream_session_t * s) +tls_add_vpp_q_builtin_tx_evt (session_t * s) { - if (svm_fifo_set_event (s->server_tx_fifo)) + if (svm_fifo_set_event (s->tx_fifo)) session_send_io_evt_to_thread_custom (s, s->thread_index, FIFO_EVENT_BUILTIN_TX); return 0; } static inline int -tls_add_app_q_evt (app_worker_t * app, stream_session_t * app_session) +tls_add_app_q_evt (app_worker_t * app, session_t * app_session) { return app_worker_lock_and_send_event (app, app_session, FIFO_EVENT_APP_RX); } @@ -178,7 +178,7 @@ tls_ctx_half_open_index (tls_ctx_t * ctx) } void -tls_notify_app_enqueue (tls_ctx_t * ctx, stream_session_t * app_session) +tls_notify_app_enqueue (tls_ctx_t * ctx, session_t * app_session) { app_worker_t *app; app = app_worker_get_if_valid (app_session->app_wrk_index); @@ -189,7 +189,7 @@ tls_notify_app_enqueue (tls_ctx_t * ctx, stream_session_t * app_session) int tls_notify_app_accept (tls_ctx_t * ctx) { - stream_session_t *app_listener, *app_session; + session_t *app_listener, *app_session; segment_manager_t *sm; app_worker_t *app_wrk; application_t *app; @@ -206,7 +206,7 @@ tls_notify_app_accept (tls_ctx_t * ctx) app = application_get (app_wrk->app_index); lctx = tls_listener_ctx_get (ctx->listener_ctx_index); - app_session = session_alloc (vlib_get_thread_index ()); + app_session = session_get (ctx->c_s_index, ctx->c_thread_index); app_session->app_wrk_index = ctx->parent_app_index; app_session->connection_index = ctx->tls_ctx_handle; @@ -221,7 +221,6 @@ tls_notify_app_accept (tls_ctx_t * ctx) TLS_DBG (1, "failed to allocate fifos"); return rv; } - ctx->c_s_index = app_session->session_index; ctx->app_session_handle = session_handle (app_session); session_lookup_add_connection (&ctx->connection, session_handle (app_session)); @@ -231,8 +230,8 @@ tls_notify_app_accept (tls_ctx_t * ctx) int tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed) { - int (*cb_fn) (u32, u32, stream_session_t *, u8); - stream_session_t *app_session; + int (*cb_fn) (u32, u32, session_t *, u8); + session_t *app_session; segment_manager_t *sm; app_worker_t *app_wrk; application_t *app; @@ -251,7 +250,7 @@ tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed) goto failed; sm = app_worker_get_connect_segment_manager (app_wrk); - app_session = session_alloc (vlib_get_thread_index ()); + app_session = session_get (ctx->c_s_index, ctx->c_thread_index); app_session->app_wrk_index = ctx->parent_app_index; app_session->connection_index = ctx->tls_ctx_handle; app_session->session_type = @@ -261,7 +260,6 @@ tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed) if (session_alloc_fifos (sm, app_session)) goto failed; - ctx->app_session_handle = session_handle (app_session); app_session->session_state = SESSION_STATE_CONNECTING; if (cb_fn (ctx->parent_app_index, ctx->parent_app_api_context, app_session, 0 /* not failed */ )) @@ -271,9 +269,7 @@ tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed) return -1; } - /* parent_app_api_context should not be overwitten before used, - * so defer setting c_s_index */ - ctx->c_s_index = app_session->session_index; + ctx->app_session_handle = session_handle (app_session); app_session->session_state = SESSION_STATE_READY; session_lookup_add_connection (&ctx->connection, session_handle (app_session)); @@ -345,13 +341,13 @@ tls_ctx_init_client (tls_ctx_t * ctx) } static inline int -tls_ctx_write (tls_ctx_t * ctx, stream_session_t * app_session) +tls_ctx_write (tls_ctx_t * ctx, session_t * app_session) { return tls_vfts[ctx->tls_ctx_engine].ctx_write (ctx, app_session); } static inline int -tls_ctx_read (tls_ctx_t * ctx, stream_session_t * tls_session) +tls_ctx_read (tls_ctx_t * ctx, session_t * tls_session) { return tls_vfts[ctx->tls_ctx_engine].ctx_read (ctx, tls_session); } @@ -363,7 +359,7 @@ tls_ctx_handshake_is_over (tls_ctx_t * ctx) } void -tls_session_reset_callback (stream_session_t * s) +tls_session_reset_callback (session_t * s) { clib_warning ("called..."); } @@ -382,9 +378,9 @@ tls_del_segment_callback (u32 client_index, u64 segment_handle) } void -tls_session_disconnect_callback (stream_session_t * tls_session) +tls_session_disconnect_callback (session_t * tls_session) { - stream_session_t *app_session; + session_t *app_session; tls_ctx_t *ctx; app_worker_t *app_wrk; application_t *app; @@ -403,9 +399,9 @@ tls_session_disconnect_callback (stream_session_t * tls_session) } int -tls_session_accept_callback (stream_session_t * tls_session) +tls_session_accept_callback (session_t * tls_session) { - stream_session_t *tls_listener; + session_t *tls_listener, *app_session; tls_ctx_t *lctx, *ctx; u32 ctx_handle; @@ -422,6 +418,12 @@ tls_session_accept_callback (stream_session_t * tls_session) ctx->tls_session_handle = session_handle (tls_session); ctx->listener_ctx_index = tls_listener->opaque; + /* Preallocate app session. Avoids allocating a session post handshake + * on tls_session rx and potentially invalidating the session pool */ + app_session = session_alloc (ctx->c_thread_index); + app_session->session_state = SESSION_STATE_CLOSED; + ctx->c_s_index = app_session->session_index; + TLS_DBG (1, "Accept on listener %u new connection [%u]%x", tls_listener->opaque, vlib_get_thread_index (), ctx_handle); @@ -429,7 +431,7 @@ tls_session_accept_callback (stream_session_t * tls_session) } int -tls_app_tx_callback (stream_session_t * app_session) +tls_app_tx_callback (session_t * app_session) { tls_ctx_t *ctx; if (PREDICT_FALSE (app_session->session_state == SESSION_STATE_CLOSED)) @@ -440,7 +442,7 @@ tls_app_tx_callback (stream_session_t * app_session) } int -tls_app_rx_callback (stream_session_t * tls_session) +tls_app_rx_callback (session_t * tls_session) { tls_ctx_t *ctx; @@ -451,8 +453,9 @@ tls_app_rx_callback (stream_session_t * tls_session) int tls_session_connected_callback (u32 tls_app_index, u32 ho_ctx_index, - stream_session_t * tls_session, u8 is_fail) + session_t * tls_session, u8 is_fail) { + session_t *app_session; tls_ctx_t *ho_ctx, *ctx; u32 ctx_handle; @@ -460,7 +463,7 @@ tls_session_connected_callback (u32 tls_app_index, u32 ho_ctx_index, if (is_fail) { - int (*cb_fn) (u32, u32, stream_session_t *, u8), rv = 0; + int (*cb_fn) (u32, u32, session_t *, u8), rv = 0; u32 wrk_index, api_context; app_worker_t *app_wrk; application_t *app; @@ -496,6 +499,12 @@ tls_session_connected_callback (u32 tls_app_index, u32 ho_ctx_index, tls_session->opaque = ctx_handle; tls_session->session_state = SESSION_STATE_READY; + /* Preallocate app session. Avoids allocating a session post handshake + * on tls_session rx and potentially invalidating the session pool */ + app_session = session_alloc (ctx->c_thread_index); + app_session->session_state = SESSION_STATE_CLOSED; + ctx->c_s_index = app_session->session_index; + return tls_ctx_init_client (ctx); } @@ -520,10 +529,10 @@ tls_connect (transport_endpoint_cfg_t * tep) tls_engine_type_t engine_type; tls_main_t *tm = &tls_main; app_worker_t *app_wrk; - clib_error_t *error; application_t *app; tls_ctx_t *ctx; u32 ctx_index; + int rv; sep = (session_endpoint_cfg_t *) tep; app_wrk = app_worker_get (sep->app_wrk_index); @@ -554,8 +563,8 @@ tls_connect (transport_endpoint_cfg_t * tep) cargs->sep.transport_proto = TRANSPORT_PROTO_TCP; cargs->app_index = tm->app_index; cargs->api_context = ctx_index; - if ((error = vnet_connect (cargs))) - return clib_error_get_code (error); + if ((rv = vnet_connect (cargs))) + return rv; TLS_DBG (1, "New connect request %u engine %d", ctx_index, engine_type); return 0; @@ -577,13 +586,13 @@ tls_disconnect (u32 ctx_handle, u32 thread_index) u32 tls_start_listen (u32 app_listener_index, transport_endpoint_t * tep) { - vnet_bind_args_t _bargs, *args = &_bargs; + vnet_listen_args_t _bargs, *args = &_bargs; app_worker_t *app_wrk; tls_main_t *tm = &tls_main; session_handle_t tls_handle; session_endpoint_cfg_t *sep; - stream_session_t *tls_listener; - stream_session_t *app_listener; + session_t *tls_listener; + session_t *app_listener; tls_engine_type_t engine_type; application_t *app; tls_ctx_t *lctx; @@ -603,7 +612,7 @@ tls_start_listen (u32 app_listener_index, transport_endpoint_t * tep) clib_memset (args, 0, sizeof (*args)); args->app_index = tm->app_index; args->sep_ext = *sep; - if (vnet_bind (args)) + if (vnet_listen (args)) return -1; tls_handle = args->handle; @@ -632,15 +641,16 @@ tls_stop_listen (u32 lctx_index) { tls_engine_type_t engine_type; tls_ctx_t *lctx; + int rv; lctx = tls_listener_ctx_get (lctx_index); - vnet_unbind_args_t a = { + vnet_unlisten_args_t a = { .handle = lctx->tls_session_handle, .app_index = tls_main.app_index, .wrk_map_index = 0 /* default wrk */ }; - if (vnet_unbind (&a)) - clib_warning ("unbind returned"); + if ((rv = vnet_unlisten (&a))) + clib_warning ("unlisten returned %d", rv); engine_type = lctx->tls_ctx_engine; tls_vfts[engine_type].ctx_stop_listen (lctx); @@ -696,7 +706,7 @@ format_tls_connection (u8 * s, va_list * args) s = format (s, "%-50U", format_tls_ctx, ctx, thread_index); if (verbose) { - stream_session_t *ts; + session_t *ts; ts = session_get_from_handle (ctx->app_session_handle); s = format (s, "state: %-7u", ts->session_state); if (verbose > 1) @@ -730,12 +740,12 @@ format_tls_half_open (u8 * s, va_list * args) /* *INDENT-OFF* */ const static transport_proto_vft_t tls_proto = { - .open = tls_connect, + .connect = tls_connect, .close = tls_disconnect, - .bind = tls_start_listen, + .start_listen = tls_start_listen, + .stop_listen = tls_stop_listen, .get_connection = tls_connection_get, .get_listener = tls_listener_get, - .unbind = tls_stop_listen, .tx_type = TRANSPORT_TX_INTERNAL, .service_type = TRANSPORT_SERVICE_APP, .format_connection = format_tls_connection, diff --git a/src/vnet/tls/tls.h b/src/vnet/tls/tls.h index 09f1bdc7b075..ac0b39b82956 100644 --- a/src/vnet/tls/tls.h +++ b/src/vnet/tls/tls.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Cisco and/or its affiliates. + * Copyright (c) 2018-2019 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -13,8 +13,9 @@ * limitations under the License. */ - #include +#include +#include #include #ifndef SRC_VNET_TLS_TLS_H_ @@ -39,7 +40,10 @@ typedef CLIB_PACKED (struct tls_cxt_id_ { u32 parent_app_index; - session_handle_t app_session_handle; + union { + session_handle_t app_session_handle; + u32 parent_app_api_ctx; + }; session_handle_t tls_session_handle; u32 ssl_ctx; u32 listener_ctx_index; @@ -67,7 +71,7 @@ typedef struct tls_ctx_ #define tls_ctx_handle c_c_index /* Temporary storage for session open opaque. Overwritten once * underlying tcp connection is established */ -#define parent_app_api_context c_s_index +#define parent_app_api_context c_tls_ctx_id.parent_app_api_ctx u8 is_passive_close; u8 resume; @@ -98,32 +102,25 @@ typedef struct tls_engine_vft_ tls_ctx_t *(*ctx_get_w_thread) (u32 ctx_index, u8 thread_index); int (*ctx_init_client) (tls_ctx_t * ctx); int (*ctx_init_server) (tls_ctx_t * ctx); - int (*ctx_read) (tls_ctx_t * ctx, stream_session_t * tls_session); - int (*ctx_write) (tls_ctx_t * ctx, stream_session_t * app_session); + int (*ctx_read) (tls_ctx_t * ctx, session_t * tls_session); + int (*ctx_write) (tls_ctx_t * ctx, session_t * app_session); u8 (*ctx_handshake_is_over) (tls_ctx_t * ctx); int (*ctx_start_listen) (tls_ctx_t * ctx); int (*ctx_stop_listen) (tls_ctx_t * ctx); } tls_engine_vft_t; -typedef enum tls_engine_type_ -{ - TLS_ENGINE_NONE, - TLS_ENGINE_MBEDTLS, - TLS_ENGINE_OPENSSL, - TLS_N_ENGINES -} tls_engine_type_t; - tls_main_t *vnet_tls_get_main (void); void tls_register_engine (const tls_engine_vft_t * vft, tls_engine_type_t type); -int tls_add_vpp_q_rx_evt (stream_session_t * s); -int tls_add_vpp_q_tx_evt (stream_session_t * s); -int tls_add_vpp_q_builtin_tx_evt (stream_session_t * s); -int tls_add_vpp_q_builtin_rx_evt (stream_session_t * s); +int tls_add_vpp_q_rx_evt (session_t * s); +int tls_add_vpp_q_tx_evt (session_t * s); +int tls_add_vpp_q_builtin_tx_evt (session_t * s); +int tls_add_vpp_q_builtin_rx_evt (session_t * s); int tls_notify_app_accept (tls_ctx_t * ctx); int tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed); -void tls_notify_app_enqueue (tls_ctx_t * ctx, stream_session_t * app_session); +void tls_notify_app_enqueue (tls_ctx_t * ctx, session_t * app_session); #endif /* SRC_VNET_TLS_TLS_H_ */ + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/tls/tls_test.h b/src/vnet/tls/tls_test.h new file mode 100644 index 000000000000..ad9c8ba2d266 --- /dev/null +++ b/src/vnet/tls/tls_test.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef SRC_VNET_TLS_TLS_TEST_H_ +#define SRC_VNET_TLS_TLS_TEST_H_ + +/* + * TLS server cert and keys to be used for testing only + */ +static const char test_srv_crt_rsa[] = + "-----BEGIN CERTIFICATE-----\r\n" + "MIID5zCCAs+gAwIBAgIJALeMYCEHrTtJMA0GCSqGSIb3DQEBCwUAMIGJMQswCQYD\r\n" + "VQQGEwJVUzELMAkGA1UECAwCQ0ExETAPBgNVBAcMCFNhbiBKb3NlMQ4wDAYDVQQK\r\n" + "DAVDaXNjbzEOMAwGA1UECwwFZmQuaW8xFjAUBgNVBAMMDXRlc3R0bHMuZmQuaW8x\r\n" + "IjAgBgkqhkiG9w0BCQEWE3ZwcC1kZXZAbGlzdHMuZmQuaW8wHhcNMTgwMzA1MjEx\r\n" + "NTEyWhcNMjgwMzAyMjExNTEyWjCBiTELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNB\r\n" + "MREwDwYDVQQHDAhTYW4gSm9zZTEOMAwGA1UECgwFQ2lzY28xDjAMBgNVBAsMBWZk\r\n" + "LmlvMRYwFAYDVQQDDA10ZXN0dGxzLmZkLmlvMSIwIAYJKoZIhvcNAQkBFhN2cHAt\r\n" + "ZGV2QGxpc3RzLmZkLmlvMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA\r\n" + "4C1k8a1DuStgggqT4o09fP9sJ2dC54bxhS/Xk2VEfaIZ222WSo4X/syRVfVy9Yah\r\n" + "cpI1zJ/RDxaZSFhgA+nPZBrFMsrULkrdAOpOVj8eDEp9JuWdO2ODSoFnCvLxcYWB\r\n" + "Yc5kHryJpEaGJl1sFQSesnzMFty/59ta0stk0Fp8r5NhIjWvSovGzPo6Bhz+VS2c\r\n" + "ebIZh4x1t2hHaFcgm0qJoJ6DceReWCW8w+yOVovTolGGq+bpb2Hn7MnRSZ2K2NdL\r\n" + "+aLXpkZbS/AODP1FF2vTO1mYL290LO7/51vJmPXNKSDYMy5EvILr5/VqtjsFCwRL\r\n" + "Q4jcM/+GeHSAFWx4qIv0BwIDAQABo1AwTjAdBgNVHQ4EFgQUWa1SOB37xmT53tZQ\r\n" + "aXuLLhRI7U8wHwYDVR0jBBgwFoAUWa1SOB37xmT53tZQaXuLLhRI7U8wDAYDVR0T\r\n" + "BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAoUht13W4ya27NVzQuCMvqPWL3VM4\r\n" + "3xbPFk02FaGz/WupPu276zGlzJAZrbuDcQowwwU1Ni1Yygxl96s1c2M5rHDTrOKG\r\n" + "rK0hbkSFBo+i6I8u4HiiQ4rYmG0Hv6+sXn3of0HsbtDPGgWZoipPWDljPYEURu3e\r\n" + "3HRe/Dtsj9CakBoSDzs8ndWaBR+f4sM9Tk1cjD46Gq2T/qpSPXqKxEUXlzhdCAn4\r\n" + "twub17Bq2kykHpppCwPg5M+v30tHG/R2Go15MeFWbEJthFk3TZMjKL7UFs7fH+x2\r\n" + "wSonXb++jY+KmCb93C+soABBizE57g/KmiR2IxQ/LMjDik01RSUIaM0lLA==\r\n" + "-----END CERTIFICATE-----\r\n"; +static const u32 test_srv_crt_rsa_len = sizeof (test_srv_crt_rsa); + +static const char test_srv_key_rsa[] = + "-----BEGIN PRIVATE KEY-----\r\n" + "MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDgLWTxrUO5K2CC\r\n" + "CpPijT18/2wnZ0LnhvGFL9eTZUR9ohnbbZZKjhf+zJFV9XL1hqFykjXMn9EPFplI\r\n" + "WGAD6c9kGsUyytQuSt0A6k5WPx4MSn0m5Z07Y4NKgWcK8vFxhYFhzmQevImkRoYm\r\n" + "XWwVBJ6yfMwW3L/n21rSy2TQWnyvk2EiNa9Ki8bM+joGHP5VLZx5shmHjHW3aEdo\r\n" + "VyCbSomgnoNx5F5YJbzD7I5Wi9OiUYar5ulvYefsydFJnYrY10v5otemRltL8A4M\r\n" + "/UUXa9M7WZgvb3Qs7v/nW8mY9c0pINgzLkS8guvn9Wq2OwULBEtDiNwz/4Z4dIAV\r\n" + "bHioi/QHAgMBAAECggEBAMzGipP8+oT166U+NlJXRFifFVN1DvdhG9PWnOxGL+c3\r\n" + "ILmBBC08WQzmHshPemBvR6DZkA1H23cV5JTiLWrFtC00CvhXsLRMrE5+uWotI6yE\r\n" + "iofybMroHvD6/X5R510UX9hQ6MHu5ShLR5VZ9zXHz5MpTmB/60jG5dLx+jgcwBK8\r\n" + "LuGv2YB/WCUwT9QJ3YU2eaingnXtz/MrFbkbltrqlnBdlD+kTtw6Yac9y1XuuQXc\r\n" + "BPeulLNDuPolJVWbUvDBZrpt2dXTgz8ws1sv+wCNE0xwQJsqW4Nx3QkpibUL9RUr\r\n" + "CVbKlNfa9lopT6nGKlgX69R/uH35yh9AOsfasro6w0ECgYEA82UJ8u/+ORah+0sF\r\n" + "Q0FfW5MTdi7OAUHOz16pUsGlaEv0ERrjZxmAkHA/VRwpvDBpx4alCv0Hc39PFLIk\r\n" + "nhSsM2BEuBkTAs6/GaoNAiBtQVE/hN7awNRWVmlieS0go3Y3dzaE9IUMyj8sPOFT\r\n" + "5JdJ6BM69PHKCkY3dKdnnfpFEuECgYEA68mRpteunF1mdZgXs+WrN+uLlRrQR20F\r\n" + "ZyMYiUCH2Dtn26EzA2moy7FipIIrQcX/j+KhYNGM3e7MU4LymIO29E18mn8JODnH\r\n" + "sQOXzBTsf8A4yIVMkcuQD3bfb0JiUGYUPOidTp2N7IJA7+6Yc3vQOyb74lnKnJoO\r\n" + "gougPT2wS+cCgYAn7muzb6xFsXDhyW0Tm6YJYBfRS9yAWEuVufINobeBZPSl2cN1\r\n" + "Jrnw+HlrfTNbrJWuJmjtZJXUXQ6cVp2rUbjutNyRV4vG6iRwEXYQ40EJdkr1gZpi\r\n" + "CHQhuShuuPih2MNAy7EEbM+sXrDjTBR3bFqzuHPzu7dp+BshCFX3lRfAAQKBgGQt\r\n" + "K5i7IhCFDjb/+3IPLgOAK7mZvsvZ4eXD33TQ2eZgtut1PXtBtNl17/b85uv293Fm\r\n" + "VDISVcsk3eLNS8zIiT6afUoWlxAwXEs0v5WRfjl4radkGvgGiJpJYvyeM67877RB\r\n" + "EDSKc/X8ESLfOB44iGvZUEMG6zJFscx9DgN25iQZAoGAbyd+JEWwdVH9/K3IH1t2\r\n" + "PBkZX17kNWv+iVM1WyFjbe++vfKZCrOJiyiqhDeEqgrP3AuNMlaaduC3VRC3G5oV\r\n" + "Mj1tlhDWQ/qhvKdCKNdIVQYDE75nw+FRWV8yYkHAnXYW3tNoweDIwixE0hkPR1bc\r\n" + "oEjPLVNtx8SOj/M4rhaPT3I=\r\n" "-----END PRIVATE KEY-----\r\n"; +static const u32 test_srv_key_rsa_len = sizeof (test_srv_key_rsa); + +#endif /* SRC_VNET_TLS_TLS_TEST_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c index 7674ef2e5d61..694cffa6f5f3 100644 --- a/src/vnet/udp/udp.c +++ b/src/vnet/udp/udp.c @@ -318,9 +318,9 @@ udp_session_get_half_open (u32 conn_index) /* *INDENT-OFF* */ const static transport_proto_vft_t udp_proto = { - .bind = udp_session_bind, - .open = udp_open_connection, - .unbind = udp_session_unbind, + .start_listen = udp_session_bind, + .connect = udp_open_connection, + .stop_listen = udp_session_unbind, .push_header = udp_push_header, .get_connection = udp_session_get, .get_listener = udp_session_get_listener, @@ -362,9 +362,9 @@ udpc_connection_listen (u32 session_index, transport_endpoint_t * lcl) /* *INDENT-OFF* */ const static transport_proto_vft_t udpc_proto = { - .bind = udpc_connection_listen, - .open = udpc_connection_open, - .unbind = udp_session_unbind, + .start_listen = udpc_connection_listen, + .stop_listen = udp_session_unbind, + .connect = udpc_connection_open, .push_header = udp_push_header, .get_connection = udp_session_get, .get_listener = udp_session_get_listener, diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c index ad469f696672..1adefab23f49 100644 --- a/src/vnet/udp/udp_input.c +++ b/src/vnet/udp/udp_input.c @@ -104,7 +104,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, ip4_header_t *ip40; ip6_header_t *ip60; u8 *data0; - stream_session_t *s0; + session_t *s0; udp_connection_t *uc0, *child0, *new_uc0; transport_connection_t *tc0; int wrote0; @@ -207,7 +207,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, child0->c_rmt_port = udp0->src_port; child0->c_is_ip4 = is_ip4; - if (stream_session_accept (&child0->connection, + if (session_stream_accept (&child0->connection, tc0->s_index, 1)) { error0 = UDP_ERROR_CREATE_SESSION; @@ -229,7 +229,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (!uc0->is_connected) { - if (svm_fifo_max_enqueue (s0->server_rx_fifo) + if (svm_fifo_max_enqueue (s0->rx_fifo) < b0->current_length + sizeof (session_dgram_hdr_t)) { error0 = UDP_ERROR_FIFO_FULL; @@ -255,8 +255,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - if (svm_fifo_max_enqueue (s0->server_rx_fifo) - < b0->current_length) + if (svm_fifo_max_enqueue (s0->rx_fifo) < b0->current_length) { error0 = UDP_ERROR_FIFO_FULL; goto trace0; diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index e002a248ce41..103b6496a34d 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -243,7 +243,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) tuntap_main_t *tm = &tuntap_main; vlib_buffer_t *b; u32 bi; - const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + const uword buffer_size = vlib_buffer_get_default_data_size (vm); u16 thread_index = vm->thread_index; /** Make sure we have some RX buffers. */ @@ -490,7 +490,7 @@ tuntap_config (vlib_main_t * vm, unformat_input_t * input) u8 *name; int flags = IFF_TUN | IFF_NO_PI; int is_enabled = 0, is_ether = 0, have_normal_interface = 0; - const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + const uword buffer_size = vlib_buffer_get_default_data_size (vm); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 927bcab9e92a..31d9c3444c23 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -77,6 +76,7 @@ #include #include #include +#include /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/vxlan-gbp/decap.c b/src/vnet/vxlan-gbp/decap.c index 613cb0123199..6c14ef79531a 100644 --- a/src/vnet/vxlan-gbp/decap.c +++ b/src/vnet/vxlan-gbp/decap.c @@ -334,7 +334,7 @@ vxlan_gbp_input (vlib_main_t * vm, } vnet_buffer2 (b0)->gbp.flags = vxlan_gbp_get_gpflags (vxlan_gbp0); - vnet_buffer2 (b0)->gbp.src_epg = vxlan_gbp_get_sclass (vxlan_gbp0); + vnet_buffer2 (b0)->gbp.sclass = vxlan_gbp_get_sclass (vxlan_gbp0); if (PREDICT_FALSE @@ -369,7 +369,7 @@ vxlan_gbp_input (vlib_main_t * vm, } vnet_buffer2 (b1)->gbp.flags = vxlan_gbp_get_gpflags (vxlan_gbp1); - vnet_buffer2 (b1)->gbp.src_epg = vxlan_gbp_get_sclass (vxlan_gbp1); + vnet_buffer2 (b1)->gbp.sclass = vxlan_gbp_get_sclass (vxlan_gbp1); vnet_update_l2_len (b0); vnet_update_l2_len (b1); @@ -473,7 +473,7 @@ vxlan_gbp_input (vlib_main_t * vm, (rx_counter, thread_index, stats_t0->sw_if_index, 1, len0); } vnet_buffer2 (b0)->gbp.flags = vxlan_gbp_get_gpflags (vxlan_gbp0); - vnet_buffer2 (b0)->gbp.src_epg = vxlan_gbp_get_sclass (vxlan_gbp0); + vnet_buffer2 (b0)->gbp.sclass = vxlan_gbp_get_sclass (vxlan_gbp0); /* Required to make the l2 tag push / pop code work on l2 subifs */ vnet_update_l2_len (b0); diff --git a/src/vnet/vxlan-gbp/encap.c b/src/vnet/vxlan-gbp/encap.c index f1b839ce316a..f8fc9b4b9980 100644 --- a/src/vnet/vxlan-gbp/encap.c +++ b/src/vnet/vxlan-gbp/encap.c @@ -260,9 +260,9 @@ vxlan_gbp_encap_inline (vlib_main_t * vm, vxlan_gbp0->gpflags = vnet_buffer2 (b0)->gbp.flags; vxlan_gbp1->gpflags = vnet_buffer2 (b1)->gbp.flags; vxlan_gbp0->sclass = - clib_host_to_net_u16 (vnet_buffer2 (b0)->gbp.src_epg); + clib_host_to_net_u16 (vnet_buffer2 (b0)->gbp.sclass); vxlan_gbp1->sclass = - clib_host_to_net_u16 (vnet_buffer2 (b1)->gbp.src_epg); + clib_host_to_net_u16 (vnet_buffer2 (b1)->gbp.sclass); if (csum_offload) { @@ -324,7 +324,7 @@ vxlan_gbp_encap_inline (vlib_main_t * vm, vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->tunnel_index = t0 - vxm->tunnels; tr->vni = t0->vni; - tr->sclass = vnet_buffer2 (b0)->gbp.src_epg; + tr->sclass = vnet_buffer2 (b0)->gbp.sclass; tr->flags = vnet_buffer2 (b0)->gbp.flags; } @@ -334,7 +334,7 @@ vxlan_gbp_encap_inline (vlib_main_t * vm, vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->tunnel_index = t1 - vxm->tunnels; tr->vni = t1->vni; - tr->sclass = vnet_buffer2 (b1)->gbp.src_epg; + tr->sclass = vnet_buffer2 (b1)->gbp.sclass; tr->flags = vnet_buffer2 (b1)->gbp.flags; } @@ -426,7 +426,7 @@ vxlan_gbp_encap_inline (vlib_main_t * vm, /* set source class and gpflags */ vxlan_gbp0->gpflags = vnet_buffer2 (b0)->gbp.flags; vxlan_gbp0->sclass = - clib_host_to_net_u16 (vnet_buffer2 (b0)->gbp.src_epg); + clib_host_to_net_u16 (vnet_buffer2 (b0)->gbp.sclass); if (csum_offload) { @@ -469,7 +469,7 @@ vxlan_gbp_encap_inline (vlib_main_t * vm, vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->tunnel_index = t0 - vxm->tunnels; tr->vni = t0->vni; - tr->sclass = vnet_buffer2 (b0)->gbp.src_epg; + tr->sclass = vnet_buffer2 (b0)->gbp.sclass; tr->flags = vnet_buffer2 (b0)->gbp.flags; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, diff --git a/src/vpp-api/python/CMakeLists.txt b/src/vpp-api/python/CMakeLists.txt index 52215ccafec5..a81a6f66f63a 100644 --- a/src/vpp-api/python/CMakeLists.txt +++ b/src/vpp-api/python/CMakeLists.txt @@ -12,7 +12,6 @@ # limitations under the License. find_package(PythonInterp 2.7) -find_package(PythonLibs 2.7) if(PYTHONINTERP_FOUND) install( diff --git a/src/vpp/CMakeLists.txt b/src/vpp/CMakeLists.txt index 7de8c7a90897..d14aa6145ea3 100644 --- a/src/vpp/CMakeLists.txt +++ b/src/vpp/CMakeLists.txt @@ -116,3 +116,6 @@ add_vpp_executable(vpp_prometheus_export LINK_LIBRARIES vppapiclient vppinfra svm vlibmemoryclient DEPENDS api_headers ) + +install(FILES conf/startup.conf DESTINATION etc/vpp COMPONENT vpp) +install(FILES conf/80-vpp.conf DESTINATION etc/sysctl.d COMPONENT vpp) diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index ed769d284fc3..b5ff3ccad715 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -59,6 +59,7 @@ #include #include +#include #define vl_typedefs /* define message structures */ #include @@ -698,7 +699,7 @@ format_arp_event (u8 * s, va_list * args) vl_api_ip4_arp_event_t *event = va_arg (*args, vl_api_ip4_arp_event_t *); s = format (s, "pid %d: ", ntohl (event->pid)); - s = format (s, "resolution for %U", format_ip4_address, &event->address); + s = format (s, "resolution for %U", format_vl_api_ip4_address, event->ip); return s; } @@ -708,7 +709,7 @@ format_nd_event (u8 * s, va_list * args) vl_api_ip6_nd_event_t *event = va_arg (*args, vl_api_ip6_nd_event_t *); s = format (s, "pid %d: ", ntohl (event->pid)); - s = format (s, "resolution for %U", format_ip6_address, event->address); + s = format (s, "resolution for %U", format_vl_api_ip6_address, event->ip); return s; } diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 3725dd370d42..ab98954abbca 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +45,7 @@ #include #include +#include #include #include @@ -536,72 +539,6 @@ static void *vl_api_bd_ip_mac_dump_t_print FINISH; } -static void *vl_api_tap_connect_t_print - (vl_api_tap_connect_t * mp, void *handle) -{ - u8 *s; - u8 null_mac[6]; - - clib_memset (null_mac, 0, sizeof (null_mac)); - - s = format (0, "SCRIPT: tap_connect "); - s = format (s, "tapname %s ", mp->tap_name); - if (mp->use_random_mac) - s = format (s, "random-mac "); - if (mp->tag[0]) - s = format (s, "tag %s ", mp->tag); - if (memcmp (mp->mac_address, null_mac, 6)) - s = format (s, "mac %U ", format_ethernet_address, mp->mac_address); - if (mp->ip4_address_set) - s = format (s, "address %U/%d ", format_ip4_address, mp->ip4_address, - mp->ip4_mask_width); - if (mp->ip6_address_set) - s = format (s, "address %U/%d ", format_ip6_address, mp->ip6_address, - mp->ip6_mask_width); - FINISH; -} - -static void *vl_api_tap_modify_t_print - (vl_api_tap_modify_t * mp, void *handle) -{ - u8 *s; - u8 null_mac[6]; - - clib_memset (null_mac, 0, sizeof (null_mac)); - - s = format (0, "SCRIPT: tap_modify "); - s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); - s = format (s, "tapname %s ", mp->tap_name); - if (mp->use_random_mac) - s = format (s, "random-mac "); - - if (memcmp (mp->mac_address, null_mac, 6)) - s = format (s, "mac %U ", format_ethernet_address, mp->mac_address); - - FINISH; -} - -static void *vl_api_tap_delete_t_print - (vl_api_tap_delete_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: tap_delete "); - s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); - - FINISH; -} - -static void *vl_api_sw_interface_tap_dump_t_print - (vl_api_sw_interface_tap_dump_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_tap_dump "); - - FINISH; -} - static void *vl_api_tap_create_v2_t_print (vl_api_tap_create_v2_t * mp, void *handle) { @@ -662,6 +599,49 @@ static void *vl_api_sw_interface_tap_v2_dump_t_print FINISH; } +static void *vl_api_virtio_pci_create_t_print + (vl_api_virtio_pci_create_t * mp, void *handle) +{ + u8 *s; + u8 null_mac[6]; + + clib_memset (null_mac, 0, sizeof (null_mac)); + + s = format (0, "SCRIPT: virtio_pci_create "); + s = format (s, "pci_addr %U ", format_vlib_pci_addr, ntohl (mp->pci_addr)); + if (memcmp (mp->mac_address, null_mac, 6)) + s = format (s, "mac-address %U ", + format_ethernet_address, mp->mac_address); + if (mp->tx_ring_sz) + s = format (s, "tx-ring-size %u ", ntohs (mp->tx_ring_sz)); + if (mp->rx_ring_sz) + s = format (s, "rx-ring-size %u ", ntohs (mp->rx_ring_sz)); + if (mp->features) + s = format (s, "features 0x%llx ", clib_net_to_host_u64 (mp->features)); + FINISH; +} + +static void *vl_api_virtio_pci_delete_t_print + (vl_api_virtio_pci_delete_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: virtio_pci_delete "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_sw_interface_virtio_pci_dump_t_print + (vl_api_sw_interface_virtio_pci_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_virtio_pci_dump "); + + FINISH; +} + static void *vl_api_bond_create_t_print (vl_api_bond_create_t * mp, void *handle) { @@ -919,11 +899,10 @@ static void *vl_api_proxy_arp_add_del_t_print s = format (0, "SCRIPT: proxy_arp_add_del "); s = format (s, "%U - %U ", - format_ip4_address, mp->proxy.low_address, - format_ip4_address, mp->proxy.hi_address); + format_vl_api_ip4_address, mp->proxy.low, + format_vl_api_ip4_address, mp->proxy.hi); - if (mp->proxy.vrf_id) - s = format (s, "vrf %d ", ntohl (mp->proxy.vrf_id)); + s = format (s, "table %d ", ntohl (mp->proxy.table_id)); if (mp->is_add == 0) s = format (s, "del "); @@ -1042,31 +1021,21 @@ static void *vl_api_ip_neighbor_add_del_t_print (vl_api_ip_neighbor_add_del_t * mp, void *handle) { u8 *s; - u8 null_mac[6]; - - clib_memset (null_mac, 0, sizeof (null_mac)); s = format (0, "SCRIPT: ip_neighbor_add_del "); - s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "sw_if_index %d ", ntohl (mp->neighbor.sw_if_index)); - if (mp->is_static) + if (IP_API_NEIGHBOR_FLAG_STATIC & ntohl (mp->neighbor.flags)) s = format (s, "is_static "); - if (mp->is_no_adj_fib) + if (IP_API_NEIGHBOR_FLAG_NO_FIB_ENTRY & ntohl (mp->neighbor.flags)) s = format (s, "is_no_fib_entry "); - if (memcmp (mp->mac_address, null_mac, 6)) - s = format (s, "mac %U ", format_ethernet_address, mp->mac_address); + s = format (s, "mac %U ", format_vl_api_mac_address, + &mp->neighbor.mac_address); - if (mp->is_ipv6) - s = - format (s, "dst %U ", format_ip6_address, - (ip6_address_t *) mp->dst_address); - else - s = - format (s, "dst %U ", format_ip4_address, - (ip4_address_t *) mp->dst_address); + s = format (s, "dst %U ", format_vl_api_address, &mp->neighbor.ip_address); if (mp->is_add == 0) s = format (s, "del "); @@ -1074,6 +1043,7 @@ static void *vl_api_ip_neighbor_add_del_t_print FINISH; } + static void *vl_api_create_vlan_subif_t_print (vl_api_create_vlan_subif_t * mp, void *handle) { @@ -1291,8 +1261,7 @@ static void *vl_api_sw_interface_ip6nd_ra_prefix_t_print s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); - s = format (s, "%U/%d ", format_ip6_address, mp->address, - mp->address_length); + s = format (s, "%U ", format_vl_api_prefix, &mp->prefix); s = format (s, "val_life %d ", ntohl (mp->val_lifetime)); @@ -2239,10 +2208,7 @@ static void *vl_api_ip_probe_neighbor_t_print s = format (0, "SCRIPT: ip_probe_neighbor "); s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); - if (mp->is_ipv6) - s = format (s, "address %U ", format_ip6_address, &mp->dst_address); - else - s = format (s, "address %U ", format_ip4_address, &mp->dst_address); + s = format (s, "address %U ", format_vl_api_address, &mp->dst); FINISH; } @@ -2285,7 +2251,7 @@ static void *vl_api_want_ip4_arp_events_t_print s = format (0, "SCRIPT: want_ip4_arp_events "); s = format (s, "pid %d address %U ", ntohl (mp->pid), - format_ip4_address, &mp->address); + format_ip4_address, mp->ip); if (mp->enable_disable == 0) s = format (s, "del "); @@ -2299,7 +2265,7 @@ static void *vl_api_want_ip6_nd_events_t_print s = format (0, "SCRIPT: want_ip6_nd_events "); s = format (s, "pid %d address %U ", ntohl (mp->pid), - format_ip6_address, mp->address); + format_vl_api_ip6_address, mp->ip); if (mp->enable_disable == 0) s = format (s, "del "); @@ -2889,12 +2855,7 @@ static void *vl_api_ip_source_and_port_range_check_add_del_t_print int i; s = format (0, "SCRIPT: ip_source_and_port_range_check_add_del "); - if (mp->is_ipv6) - s = format (s, "%U/%d ", format_ip6_address, mp->address, - mp->mask_length); - else - s = format (s, "%U/%d ", format_ip4_address, mp->address, - mp->mask_length); + s = format (s, "%U ", format_vl_api_prefix, &mp->prefix); for (i = 0; i < mp->number_of_ranges; i++) { @@ -3684,14 +3645,9 @@ static void *vl_api_ip_container_proxy_add_del_t_print { u8 *s; s = format (0, "SCRIPT: ip_container_proxy_add_del "); - if (mp->is_ip4) - s = format (s, "is_add %d address %U/%d sw_if_index %d", - mp->is_add, format_ip4_address, - (ip4_address_t *) mp->ip, mp->plen, mp->sw_if_index); - else - s = format (s, "is_add %d address %U/%d sw_if_index %d", - mp->is_add, format_ip6_address, - (ip6_address_t *) mp->ip, mp->plen, mp->sw_if_index); + s = format (s, "is_add %d prefix %U sw_if_index %d", + mp->is_add, format_vl_api_prefix, mp->sw_if_index); + FINISH; } @@ -3702,9 +3658,8 @@ static void *vl_api_qos_record_enable_disable_t_print s = format (0, "SCRIPT: qos_record_enable_disable "); s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); - s = - format (s, "input_source %U ", format_qos_source, - ntohl (mp->input_source)); + s = format (s, "input_source %U ", format_qos_source, + ntohl (mp->input_source)); if (!mp->enable) s = format (s, "disable "); @@ -3741,10 +3696,6 @@ _(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \ _(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \ _(SW_INTERFACE_SET_VXLAN_BYPASS, sw_interface_set_vxlan_bypass) \ _(SW_INTERFACE_SET_GENEVE_BYPASS, sw_interface_set_geneve_bypass) \ -_(TAP_CONNECT, tap_connect) \ -_(TAP_MODIFY, tap_modify) \ -_(TAP_DELETE, tap_delete) \ -_(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump) \ _(BOND_CREATE, bond_create) \ _(BOND_DELETE, bond_delete) \ _(BOND_ENSLAVE, bond_enslave) \ diff --git a/src/vpp/api/types.c b/src/vpp/api/types.c index 0a48711c611f..e36b8dd6781f 100644 --- a/src/vpp/api/types.c +++ b/src/vpp/api/types.c @@ -47,6 +47,26 @@ format_vl_api_address_union (u8 * s, va_list * args) return s; } +u8 * +format_vl_api_ip4_address (u8 * s, va_list * args) +{ + const vl_api_ip4_address_t *addr = va_arg (*args, vl_api_ip4_address_t *); + + s = format (s, "%U", format_ip4_address, addr); + + return s; +} + +u8 * +format_vl_api_ip6_address (u8 * s, va_list * args) +{ + const vl_api_ip6_address_t *addr = va_arg (*args, vl_api_ip6_address_t *); + + s = format (s, "%U", format_ip6_address, addr); + + return s; +} + u8 * format_vl_api_prefix (u8 * s, va_list * args) { @@ -58,6 +78,14 @@ format_vl_api_prefix (u8 * s, va_list * args) return s; } +u8 * +format_vl_api_mac_address (u8 * s, va_list * args) +{ + vl_api_mac_address_t *mac = va_arg (*args, vl_api_mac_address_t *); + + return (format (s, "%U", format_ethernet_address, mac)); +} + uword unformat_vl_api_mac_address (unformat_input_t * input, va_list * args) { @@ -81,11 +109,34 @@ unformat_vl_api_address (unformat_input_t * input, va_list * args) return (1); } -u8 * -format_vl_api_mac_address (u8 * s, va_list * args) +uword +unformat_vl_api_ip4_address (unformat_input_t * input, va_list * args) { - vl_api_mac_address_t *mac = va_arg (*args, vl_api_mac_address_t *); + vl_api_ip4_address_t *ip = va_arg (*args, vl_api_ip4_address_t *); - return (format (s, "%U", format_ethernet_address, mac)); + if (unformat (input, "%U", unformat_ip4_address, ip)) + return (1); + return (0); +} + +uword +unformat_vl_api_ip6_address (unformat_input_t * input, va_list * args) +{ + vl_api_ip6_address_t *ip = va_arg (*args, vl_api_ip6_address_t *); + + if (unformat (input, "%U", unformat_ip6_address, ip)) + return (1); + return (0); +} + +uword +unformat_vl_api_prefix (unformat_input_t * input, va_list * args) +{ + vl_api_prefix_t *pfx = va_arg (*args, vl_api_prefix_t *); + + if (unformat (input, "%U/%d", unformat_vl_api_address, &pfx->address, + &pfx->address_length)) + return (1); + return (0); } diff --git a/src/vpp/api/types.h b/src/vpp/api/types.h index 9a45639d030f..cf1b26d9bfc6 100644 --- a/src/vpp/api/types.h +++ b/src/vpp/api/types.h @@ -28,8 +28,13 @@ const vl_api_address_t VL_API_ZERO_ADDRESS; extern uword unformat_vl_api_mac_address (unformat_input_t * input, va_list * args); extern uword unformat_vl_api_address (unformat_input_t * input, va_list * args); +extern uword unformat_vl_api_ip4_address (unformat_input_t * input, va_list * args); +extern uword unformat_vl_api_ip6_address (unformat_input_t * input, va_list * args); +extern uword unformat_vl_api_prefix (unformat_input_t * input, va_list * args); extern u8 *format_vl_api_address (u8 * s, va_list * args); +extern u8 *format_vl_api_ip4_address (u8 * s, va_list * args); +extern u8 *format_vl_api_ip6_address (u8 * s, va_list * args); extern u8 *format_vl_api_address_union (u8 * s, va_list * args); extern u8 *format_vl_api_prefix (u8 * s, va_list * args); extern u8 *format_vl_api_mprefix (u8 * s, va_list * args); diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 378a3b3a5fa8..a2a677926e1d 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -26,7 +26,6 @@ option version = "1.1.0"; * If you're looking for interface APIs, please * see .../src/vnet/{interface.api,interface_api.c} * IP APIs: see .../src/vnet/ip/{ip.api, ip_api.c} - * TAP APIs: see .../src/vnet/unix/{tap.api, tap_api.c} * VXLAN APIs: see .../src/vnet/vxlan/{vxlan.api, vxlan_api.c} * GENEVE APIs: see .../src/vnet/geneve/{geneve.api, geneve_api.c} * LLDP APIs: see .../src/vnet/lldp/{lldp.api, lldp_api.c} diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf index 7b4f69d51c71..610f2486a5d5 100644 --- a/src/vpp/conf/startup.conf +++ b/src/vpp/conf/startup.conf @@ -69,6 +69,17 @@ cpu { # scheduler-priority 50 } +# buffers { + ## Increase number of buffers allocated, needed only in scenarios with + ## large number of interfaces and worker threads. Value is per numa node. + ## Default is 16384 (8192 if running unpriviledged) + # buffers-per-numa 128000 + + ## Size of buffer data area + ## Default is 2048 + # default data-size 2048 +# } + # dpdk { ## Change default settings for all interfaces # dev default { @@ -127,11 +138,6 @@ cpu { ## disables Jumbo MTU support # no-multi-seg - ## Increase number of buffers allocated, needed only in scenarios with - ## large number of interfaces and worker threads. Value is per CPU socket. - ## Default is 16384 - # num-mbufs 128000 - ## Change hugepages allocation per-socket, needed only if there is need for ## larger number of mbufs. Default is 256M on each detected CPU socket # socket-mem 2048,2048 diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index 41f70d0c8f19..6819ae6a3a42 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -57,11 +57,13 @@ vpp_find_plugin_path () return; *p = 0; - s = format (0, "%s/lib/vpp_plugins", path); + s = format (0, "%s/lib/" CLIB_TARGET_TRIPLET "/vpp_plugins:" + "%s/lib/vpp_plugins", path, path); vec_add1 (s, 0); vlib_plugin_path = (char *) s; - s = format (0, "%s/lib/vpp_api_test_plugins", path); + s = format (0, "%s/lib/" CLIB_TARGET_TRIPLET "/vpp_api_test_plugins:" + "%s/lib/vpp_api_test_plugins", path, path); vec_add1 (s, 0); vat_plugin_path = (char *) s; } diff --git a/src/vppinfra/cache.h b/src/vppinfra/cache.h index e8e89ba85563..e2c406d84326 100644 --- a/src/vppinfra/cache.h +++ b/src/vppinfra/cache.h @@ -58,6 +58,7 @@ #define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES) #define CLIB_CACHE_LINE_ALIGN_MARK(mark) u8 mark[0] __attribute__((aligned(CLIB_CACHE_LINE_BYTES))) +#define CLIB_CACHE_LINE_ROUND(x) ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1)) /* Default cache line fill buffers. */ #ifndef CLIB_N_PREFETCHES diff --git a/src/vppinfra/config.h.in b/src/vppinfra/config.h.in index 58a973144e02..a7a22a6a992a 100644 --- a/src/vppinfra/config.h.in +++ b/src/vppinfra/config.h.in @@ -21,4 +21,6 @@ #endif #define USE_DLMALLOC @DLMALLOC@ + +#define CLIB_TARGET_TRIPLET "@CMAKE_C_COMPILER_TARGET@" #endif diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h index 6dc0a23e4676..ee199e2cf325 100644 --- a/src/vppinfra/cpu.h +++ b/src/vppinfra/cpu.h @@ -16,6 +16,7 @@ #ifndef included_clib_cpu_h #define included_clib_cpu_h +#include #include /* @@ -44,12 +45,8 @@ if (clib_cpu_supports_ ## arch()) \ return & fn ## _ ##arch; -#define CLIB_MULTIARCH_SELECT_FN(fn,...) \ - __VA_ARGS__ void * fn ## _multiarch_select(void) \ -{ \ - foreach_march_variant(CLIB_MULTIARCH_ARCH_CHECK, fn) \ - return & fn; \ -} +/* FIXME to be removed */ +#define CLIB_MULTIARCH_SELECT_FN(fn,...) #ifdef CLIB_MARCH_VARIANT #define __CLIB_MULTIARCH_FN(a,b) a##_##b @@ -61,6 +58,62 @@ #define CLIB_MARCH_SFX CLIB_MULTIARCH_FN +typedef struct _clib_march_fn_registration +{ + void *function; + int priority; + struct _clib_march_fn_registration *next; + char *name; +} clib_march_fn_registration; + +static_always_inline void * +clib_march_select_fn_ptr (clib_march_fn_registration * r) +{ + void *rv = 0; + int last_prio = -1; + + while (r) + { + if (last_prio < r->priority) + { + last_prio = r->priority; + rv = r->function; + } + r = r->next; + } + return rv; +} + +#define CLIB_MARCH_FN_POINTER(fn) \ + clib_march_select_fn_ptr (fn##_march_fn_registrations); + +#define _CLIB_MARCH_FN_REGISTRATION(fn) \ +static clib_march_fn_registration \ +CLIB_MARCH_SFX(fn##_march_fn_registration) = \ +{ \ + .name = CLIB_MARCH_VARIANT_STR \ +}; \ +\ +static void __clib_constructor \ +fn##_march_register () \ +{ \ + clib_march_fn_registration *r; \ + r = & CLIB_MARCH_SFX (fn##_march_fn_registration); \ + r->priority = CLIB_MARCH_FN_PRIORITY(); \ + r->next = fn##_march_fn_registrations; \ + r->function = CLIB_MARCH_SFX (fn); \ + fn##_march_fn_registrations = r; \ +} + +#ifdef CLIB_MARCH_VARIANT +#define CLIB_MARCH_FN_REGISTRATION(fn) \ +extern clib_march_fn_registration *fn##_march_fn_registrations; \ +_CLIB_MARCH_FN_REGISTRATION(fn) +#else +#define CLIB_MARCH_FN_REGISTRATION(fn) \ +clib_march_fn_registration *fn##_march_fn_registrations = 0; \ +_CLIB_MARCH_FN_REGISTRATION(fn) +#endif #define foreach_x86_64_flags \ _ (sse3, 1, ecx, 0) \ _ (ssse3, 1, ecx, 9) \ @@ -99,6 +152,22 @@ _ (asimddp, 20) \ _ (sha512, 21) \ _ (sve, 22) +static inline u32 +clib_get_current_cpu_id () +{ + unsigned cpu, node; + syscall (__NR_getcpu, &cpu, &node, 0); + return cpu; +} + +static inline u32 +clib_get_current_numa_node () +{ + unsigned cpu, node; + syscall (__NR_getcpu, &cpu, &node, 0); + return node; +} + #if defined(__x86_64__) #include "cpuid.h" diff --git a/src/vppinfra/error_bootstrap.h b/src/vppinfra/error_bootstrap.h index 248d389d0f2e..18d2176812da 100644 --- a/src/vppinfra/error_bootstrap.h +++ b/src/vppinfra/error_bootstrap.h @@ -91,6 +91,10 @@ do { \ #define STATIC_ASSERT_OFFSET_OF(s, e, o) \ STATIC_ASSERT (STRUCT_OFFSET_OF(s,e) == o, "Offset of " #s "." #e " must be " # o) +#define STATIC_ASSERT_FITS_IN(s, e, o) \ + STATIC_ASSERT (STRUCT_OFFSET_OF(s,e) <= (o - sizeof(((s *)0)->e)), \ + #s "." #e " does not fit into " # o " bytes") + /* Assert without allocating memory. */ #define ASSERT_AND_PANIC(truth) \ do { \ diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c index 41309dd1d99e..5662a36939ec 100644 --- a/src/vppinfra/pmalloc.c +++ b/src/vppinfra/pmalloc.c @@ -138,6 +138,9 @@ alloc_chunk_from_page (clib_pmalloc_main_t * pm, clib_pmalloc_page_t * pp, pp->n_free_chunks = a->subpages_per_page; } + if (pp->n_free_blocks < n_blocks) + return 0; + alloc_chunk_index = pp->first_chunk_index; next_chunk: diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h index 10262e90ec79..747a7170800c 100644 --- a/src/vppinfra/pool.h +++ b/src/vppinfra/pool.h @@ -352,18 +352,21 @@ do { \ typeof (P) _pool_var (new) = 0; \ pool_header_t * _pool_var (ph), * _pool_var (new_ph); \ u32 _pool_var (n) = pool_len (P); \ - _pool_var (new) = _vec_resize (_pool_var (new), _pool_var (n), \ - _pool_var (n) * sizeof ((P)[0]), \ - pool_aligned_header_bytes, (A)); \ - clib_memcpy_fast (_pool_var (new), (P), \ - _pool_var (n) * sizeof ((P)[0])); \ - _pool_var (ph) = pool_header (P); \ - _pool_var (new_ph) = pool_header (_pool_var (new)); \ - _pool_var (new_ph)->free_bitmap = \ - clib_bitmap_dup (_pool_var (ph)->free_bitmap); \ - _pool_var (new_ph)->free_indices = \ - vec_dup (_pool_var (ph)->free_indices); \ - _pool_var (new_ph)->max_elts = _pool_var (ph)->max_elts; \ + if ((P)) \ + { \ + _pool_var (new) = _vec_resize (_pool_var (new), _pool_var (n), \ + _pool_var (n) * sizeof ((P)[0]), \ + pool_aligned_header_bytes, (A)); \ + clib_memcpy_fast (_pool_var (new), (P), \ + _pool_var (n) * sizeof ((P)[0])); \ + _pool_var (ph) = pool_header (P); \ + _pool_var (new_ph) = pool_header (_pool_var (new)); \ + _pool_var (new_ph)->free_bitmap = \ + clib_bitmap_dup (_pool_var (ph)->free_bitmap); \ + _pool_var (new_ph)->free_indices = \ + vec_dup (_pool_var (ph)->free_indices); \ + _pool_var (new_ph)->max_elts = _pool_var (ph)->max_elts; \ + } \ _pool_var (new); \ }) diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h index 42f7890f3d06..d9cd8fe1af9d 100644 --- a/src/vppinfra/string.h +++ b/src/vppinfra/string.h @@ -213,74 +213,6 @@ memset_s_inline (void *s, rsize_t smax, int c, rsize_t n) */ #define clib_memset(s,c,n) memset_s_inline(s,n,c,n) -/* - * Copy 64 bytes of data to 4 destinations - * this function is typically used in quad-loop case when whole cacheline - * needs to be copied to 4 different places. First it reads whole cacheline - * to 1/2/4 SIMD registers and then it writes data to 4 destinations. - */ - -static_always_inline void -clib_memcpy64_x4 (void *d0, void *d1, void *d2, void *d3, void *s) -{ -#if defined (__AVX512F__) - __m512i r0 = _mm512_loadu_si512 (s); - - _mm512_storeu_si512 (d0, r0); - _mm512_storeu_si512 (d1, r0); - _mm512_storeu_si512 (d2, r0); - _mm512_storeu_si512 (d3, r0); - -#elif defined (__AVX2__) - __m256i r0 = _mm256_loadu_si256 ((__m256i *) (s + 0 * 32)); - __m256i r1 = _mm256_loadu_si256 ((__m256i *) (s + 1 * 32)); - - _mm256_storeu_si256 ((__m256i *) (d0 + 0 * 32), r0); - _mm256_storeu_si256 ((__m256i *) (d0 + 1 * 32), r1); - - _mm256_storeu_si256 ((__m256i *) (d1 + 0 * 32), r0); - _mm256_storeu_si256 ((__m256i *) (d1 + 1 * 32), r1); - - _mm256_storeu_si256 ((__m256i *) (d2 + 0 * 32), r0); - _mm256_storeu_si256 ((__m256i *) (d2 + 1 * 32), r1); - - _mm256_storeu_si256 ((__m256i *) (d3 + 0 * 32), r0); - _mm256_storeu_si256 ((__m256i *) (d3 + 1 * 32), r1); - -#elif defined (__SSSE3__) - __m128i r0 = _mm_loadu_si128 ((__m128i *) (s + 0 * 16)); - __m128i r1 = _mm_loadu_si128 ((__m128i *) (s + 1 * 16)); - __m128i r2 = _mm_loadu_si128 ((__m128i *) (s + 2 * 16)); - __m128i r3 = _mm_loadu_si128 ((__m128i *) (s + 3 * 16)); - - _mm_storeu_si128 ((__m128i *) (d0 + 0 * 16), r0); - _mm_storeu_si128 ((__m128i *) (d0 + 1 * 16), r1); - _mm_storeu_si128 ((__m128i *) (d0 + 2 * 16), r2); - _mm_storeu_si128 ((__m128i *) (d0 + 3 * 16), r3); - - _mm_storeu_si128 ((__m128i *) (d1 + 0 * 16), r0); - _mm_storeu_si128 ((__m128i *) (d1 + 1 * 16), r1); - _mm_storeu_si128 ((__m128i *) (d1 + 2 * 16), r2); - _mm_storeu_si128 ((__m128i *) (d1 + 3 * 16), r3); - - _mm_storeu_si128 ((__m128i *) (d2 + 0 * 16), r0); - _mm_storeu_si128 ((__m128i *) (d2 + 1 * 16), r1); - _mm_storeu_si128 ((__m128i *) (d2 + 2 * 16), r2); - _mm_storeu_si128 ((__m128i *) (d2 + 3 * 16), r3); - - _mm_storeu_si128 ((__m128i *) (d3 + 0 * 16), r0); - _mm_storeu_si128 ((__m128i *) (d3 + 1 * 16), r1); - _mm_storeu_si128 ((__m128i *) (d3 + 2 * 16), r2); - _mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3); - -#else - clib_memcpy_fast (d0, s, 64); - clib_memcpy_fast (d1, s, 64); - clib_memcpy_fast (d2, s, 64); - clib_memcpy_fast (d3, s, 64); -#endif -} - static_always_inline void clib_memset_u64 (void *p, u64 val, uword count) { diff --git a/test/Makefile b/test/Makefile index 8db34820a391..a032318dbcaf 100644 --- a/test/Makefile +++ b/test/Makefile @@ -98,7 +98,6 @@ $(GET_PIP_SCRIPT): $(PIP_INSTALL_DONE): $(GET_PIP_SCRIPT) @virtualenv $(VENV_PATH) -p $(PYTHON_INTERP) - @bash -c "source $(VENV_PATH)/bin/activate && $(PYTHON_INTERP) $(GET_PIP_SCRIPT)" @bash -c "source $(VENV_PATH)/bin/activate && $(PYTHON_INTERP) -m pip install $(PYTHON_DEPENDS)" @touch $@ @@ -179,10 +178,10 @@ retest: verify-test-dir sanity reset shell: verify-test-dir $(PAPI_INSTALL_DONE) @echo "source $(VENV_PATH)/bin/activate;\ echo '***';\ - echo _BUILD_DIR=$(VPP_BUILD_DIR);\ - echo _BIN=$(VPP_BIN);\ - echo _PLUGIN_PATH=$(VPP_PLUGIN_PATH);\ - echo _INSTALL_PATH=$(VPP_INSTALL_PATH);\ + echo VPP_BUILD_DIR=$(VPP_BUILD_DIR);\ + echo VPP_BIN=$(VPP_BIN);\ + echo VPP_PLUGIN_PATH=$(VPP_PLUGIN_PATH);\ + echo VPP_INSTALL_PATH=$(VPP_INSTALL_PATH);\ echo EXTERN_TESTS=$(EXTERN_TESTS);\ echo EXTERN_PLUGINS=$(EXTERN_PLUGINS);\ echo EXTERN_COV_DIR=$(EXTERN_COV_DIR);\ @@ -204,9 +203,9 @@ wipe: reset @rm -f $(PAPI_INSTALL_FLAGS) doc: verify-test-dir $(PIP_PATCH_DONE) - @virtualenv $(VENV_PATH) -p $(PYTHON_INTERP) - @bash -c "source $(VENV_PATH)/bin/activate && $(PYTHON_INTERP) -m pip install sphinx sphinx-rtd-theme" - @bash -c "source $(VENV_PATH)/bin/activate && make -C doc WS_ROOT=$(WS_ROOT) BR=$(BR) NO_VPP_PAPI=1 html" + @virtualenv $(VENV_PATH) -p python3 + @bash -c "source $(VENV_PATH)/bin/activate && python3 -m pip install sphinx sphinx-rtd-theme" + @bash -c "source $(VENV_PATH)/bin/activate && make -C doc WS_ROOT=$(WS_ROOT) BR=$(BR) html" .PHONY: wipe-doc @@ -214,11 +213,11 @@ wipe-doc: @make -C doc wipe BR=$(BR) cov: wipe-cov reset ext verify-test-dir $(PAPI_INSTALL_DONE) - @lcov --zerocounters --directory $(_BUILD_DIR) + @lcov --zerocounters --directory $(VPP_BUILD_DIR) @test -z "$(EXTERN_COV_DIR)" || lcov --zerocounters --directory $(EXTERN_COV_DIR) $(call retest-func) @mkdir $(BUILD_COV_DIR) - @lcov --capture --directory $(_BUILD_DIR) --output-file $(BUILD_COV_DIR)/coverage.info + @lcov --capture --directory $(VPP_BUILD_DIR) --output-file $(BUILD_COV_DIR)/coverage.info @test -z "$(EXTERN_COV_DIR)" || lcov --capture --directory $(EXTERN_COV_DIR) --output-file $(BUILD_COV_DIR)/extern-coverage.info @genhtml $(BUILD_COV_DIR)/coverage.info --output-directory $(BUILD_COV_DIR)/html @test -z "$(EXTERN_COV_DIR)" || genhtml $(BUILD_COV_DIR)/extern-coverage.info --output-directory $(BUILD_COV_DIR)/extern-html @@ -233,10 +232,10 @@ wipe-cov: wipe .PHONY: checkstyle checkstyle: verify-test-dir - @virtualenv $(VENV_PATH) -p $(PYTHON_INTERP) - @bash -c "source $(VENV_PATH)/bin/activate && $(PYTHON_INTERP) -m pip install pycodestyle" + @virtualenv $(VENV_PATH) -p python3 + @bash -c "source $(VENV_PATH)/bin/activate && python3 -m pip install pycodestyle" @bash -c "source $(VENV_PATH)/bin/activate &&\ - pycodestyle --show-source --ignore=W504,E126,E241,E226,E305,E704,E741,E722 -v $(WS_ROOT)/test/*.py ||\ + pycodestyle --show-source --ignore=W504,E126,E241,E226,E305,E704,E741,E722 --exclude=$(WS_ROOT)/test/_*.py -v $(WS_ROOT)/test/*.py ||\ (echo \"*******************************************************************\" &&\ echo \"* Test framework PEP8 compliance check FAILED \" &&\ echo \"*******************************************************************\" &&\ @@ -260,6 +259,7 @@ help: @echo "" @echo "Arguments controlling test runs:" @echo " V=[0|1|2] - set test verbosity level" + @echo " 0=ERROR, 1=INFO, 2=DEBUG" @echo " TEST_JOBS=[|auto] - use parallel processes for test execution or automatic discovery of maximum acceptable processes (default: 1)" @echo " CACHE_OUTPUT=[0|1] - cache VPP stdout/stderr and log as one block after test finishes (default: 1)" @echo " FAILFAST=[0|1] - fail fast if 1, complete all tests if 0" diff --git a/test/doc/conf.py b/test/doc/conf.py index ec8958ea2f0a..98cd7ab306db 100644 --- a/test/doc/conf.py +++ b/test/doc/conf.py @@ -20,7 +20,6 @@ import sys sys.path.insert(0, os.path.abspath('..')) - # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. @@ -33,6 +32,9 @@ extensions = [ 'sphinx.ext.autodoc', ] +autodoc_mock_imports = ['objgraph', + 'pympler', + 'vpp_papi'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -122,7 +124,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -#html_theme = 'alabaster' +# html_theme = 'alabaster' html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme @@ -149,8 +151,8 @@ # html_logo = None # The name of an image file (relative to this directory) to use as a favicon of -# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. +# the docs. This file should be a Windows icon file (.ico) being 16x16 or +# 32x32 pixels large. # # html_favicon = None diff --git a/test/ext/vom_test.cpp b/test/ext/vom_test.cpp index d72c4db97488..c259bb071df4 100644 --- a/test/ext/vom_test.cpp +++ b/test/ext/vom_test.cpp @@ -1773,7 +1773,9 @@ BOOST_AUTO_TEST_CASE(test_routing) { HW::item hw_neighbour(true, rc_t::OK); mac_address_t mac_n({0,1,2,4,5,6}); neighbour *ne = new neighbour(itf1, nh_10, mac_n); - ADD_EXPECT(neighbour_cmds::create_cmd(hw_neighbour, hw_ifh.data(), mac_n, nh_10)); + ADD_EXPECT(neighbour_cmds::create_cmd(hw_neighbour, hw_ifh.data(), + mac_n, nh_10, + neighbour::flags_t::STATIC)); TRY_CHECK_RC(OM::write(ian, *ne)); /* @@ -1829,7 +1831,9 @@ BOOST_AUTO_TEST_CASE(test_routing) { delete mp1; delete mp2; - ADD_EXPECT(neighbour_cmds::delete_cmd(hw_neighbour, hw_ifh.data(), mac_n, nh_10)); + ADD_EXPECT(neighbour_cmds::delete_cmd(hw_neighbour, hw_ifh.data(), + mac_n, nh_10, + neighbour::flags_t::STATIC)); ADD_EXPECT(route::ip_route_cmds::delete_cmd(hw_route_dvr, 0, pfx_6, *path_l2)); ADD_EXPECT(route::ip_route_cmds::delete_cmd(hw_route_5_2, 1, pfx_5, *path_11)); ADD_EXPECT(route::ip_route_cmds::delete_cmd(hw_route_5_2, 1, pfx_5, *path_12)); diff --git a/test/framework.py b/test/framework.py index ea637eba9ebd..5b29ac47cf4a 100644 --- a/test/framework.py +++ b/test/framework.py @@ -391,6 +391,7 @@ def setUpClass(cls): Perform class setup before running the testcase Remove shared memory files, start vpp and connect the vpp-api """ + super(VppTestCase, cls).setUpClass() gc.collect() # run garbage collection first random.seed() cls.logger = get_logger(cls.__name__) @@ -532,7 +533,7 @@ def quit(cls): stderr_log(single_line_delim) stderr_log('VPP output to stderr while running %s:', cls.__name__) stderr_log(single_line_delim) - vpp_output = "".join(str(cls.vpp_stderr_deque)) + vpp_output = "".join(cls.vpp_stderr_deque) with open(cls.tempdir + '/vpp_stderr.txt', 'w') as f: f.write(vpp_output) stderr_log('\n%s', vpp_output) @@ -550,6 +551,7 @@ def tearDownClass(cls): def tearDown(self): """ Show various debug prints after each test """ + super(VppTestCase, self).tearDown() self.logger.debug("--- tearDown() for %s.%s(%s) called ---" % (self.__class__.__name__, self._testMethodName, self._testMethodDoc)) @@ -576,6 +578,7 @@ def tearDown(self): def setUp(self): """ Clear trace before running each test""" + super(VppTestCase, self).setUp() self.reporter.send_keep_alive(self) self.logger.debug("--- setUp() for %s.%s(%s) called ---" % (self.__class__.__name__, self._testMethodName, @@ -1034,7 +1037,8 @@ class VppTestResult(unittest.TestResult): core_crash_test_cases_info = set() current_test_case_info = None - def __init__(self, stream, descriptions, verbosity, runner): + def __init__(self, stream=None, descriptions=None, verbosity=None, + runner=None): """ :param stream File descriptor to store where to report test results. Set to the standard error stream by default. @@ -1279,12 +1283,12 @@ def resultclass(self): def __init__(self, keep_alive_pipe=None, descriptions=True, verbosity=1, result_pipe=None, failfast=False, buffer=False, - resultclass=None, print_summary=True): + resultclass=None, print_summary=True, **kwargs): # ignore stream setting here, use hard-coded stdout to be in sync # with prints from VppTestCase methods ... super(VppTestRunner, self).__init__(sys.stdout, descriptions, verbosity, failfast, buffer, - resultclass) + resultclass, **kwargs) KeepAliveReporter.pipe = keep_alive_pipe self.orig_stream = self.stream diff --git a/test/hook.py b/test/hook.py index a8f37c7a35b2..cddb603e46ec 100644 --- a/test/hook.py +++ b/test/hook.py @@ -6,6 +6,10 @@ import ipaddress from subprocess import check_output, CalledProcessError from util import check_core_path, get_core_path +try: + text_type = unicode +except NameError: + text_type = str class Hook(object): @@ -13,8 +17,9 @@ class Hook(object): Generic hooks before/after API/CLI calls """ - def __init__(self, logger): - self.logger = logger + def __init__(self, test): + self.test = test + self.logger = test.logger def before_api(self, api_name, api_args): """ @@ -32,6 +37,7 @@ def _friendly_format(val): return '{!s} ({!s})'.format(val, ':'.join(['{:02x}'.format( ord(x)) for x in val])) try: + # we don't call test_type(val) because it is a packed value. return '{!s} ({!s})'.format(val, str( ipaddress.ip_address(val))) except ipaddress.AddressValueError: @@ -74,13 +80,12 @@ class VppDiedError(Exception): class PollHook(Hook): """ Hook which checks if the vpp subprocess is alive """ - def __init__(self, testcase): - super(PollHook, self).__init__(testcase.logger) - self.testcase = testcase + def __init__(self, test): + super(PollHook, self).__init__(test) def on_crash(self, core_path): self.logger.error("Core file present, debug with: gdb %s %s" % - (self.testcase.vpp_bin, core_path)) + (self.test.vpp_bin, core_path)) check_core_path(self.logger, core_path) self.logger.error("Running `file %s':" % core_path) try: @@ -96,27 +101,27 @@ def poll_vpp(self): Poll the vpp status and throw an exception if it's not running :raises VppDiedError: exception if VPP is not running anymore """ - if self.testcase.vpp_dead: + if self.test.vpp_dead: # already dead, nothing to do return - self.testcase.vpp.poll() - if self.testcase.vpp.returncode is not None: + self.test.vpp.poll() + if self.test.vpp.returncode is not None: signaldict = dict( (k, v) for v, k in reversed(sorted(signal.__dict__.items())) if v.startswith('SIG') and not v.startswith('SIG_')) - if self.testcase.vpp.returncode in signaldict: - s = signaldict[abs(self.testcase.vpp.returncode)] + if self.test.vpp.returncode in signaldict: + s = signaldict[abs(self.test.vpp.returncode)] else: s = "unknown" msg = "VPP subprocess died unexpectedly with returncode %d [%s]." \ - % (self.testcase.vpp.returncode, s) + % (self.test.vpp.returncode, s) self.logger.critical(msg) - core_path = get_core_path(self.testcase.tempdir) + core_path = get_core_path(self.test.tempdir) if os.path.isfile(core_path): self.on_crash(core_path) - self.testcase.vpp_dead = True + self.test.vpp_dead = True raise VppDiedError(msg) def before_api(self, api_name, api_args): @@ -146,11 +151,11 @@ def before_cli(self, cli): class StepHook(PollHook): """ Hook which requires user to press ENTER before doing any API/CLI """ - def __init__(self, testcase): + def __init__(self, test): self.skip_stack = None self.skip_num = None self.skip_count = 0 - super(StepHook, self).__init__(testcase) + super(StepHook, self).__init__(test) def skip(self): if self.skip_stack is None: diff --git a/test/lisp.py b/test/lisp.py index a1f0c1675a05..2dace8f2f745 100644 --- a/test/lisp.py +++ b/test/lisp.py @@ -64,7 +64,7 @@ def sw_if_index(self): @property def priority(self): - return self.priority + return self._priority @property def weight(self): @@ -184,6 +184,10 @@ def query_vpp_config(self): mapping = self.get_lisp_mapping_dump_entry() return mapping + def object_id(self): + return 'lisp-mapping-[%s]-%s-%s-%s' % ( + self.vni, self.eid, self.priority, self.weight) + class VppLocalMapping(VppLispMapping): """ LISP Local mapping """ diff --git a/test/requirements.txt b/test/requirements.txt index 52dca1718524..b05cc74bc1a8 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -10,4 +10,4 @@ pycodestyle # MIT (Expat license) https://pypi scapy==2.4.0; python_version >= '2.7' or python_version >= '3.4' # GPL2 https://github.com/secdev/scapy/blob/master/LICENSE six # MIT subprocess32 # PSF -syslog_rfc5424_parser>=0.2.0 # ISC +syslog_rfc5424_parser>=0.3.0 # ISC diff --git a/test/template_ipsec.py b/test/template_ipsec.py index d35cf420d373..77461d4397ff 100644 --- a/test/template_ipsec.py +++ b/test/template_ipsec.py @@ -8,60 +8,71 @@ from framework import VppTestCase, VppTestRunner from util import ppp +from vpp_papi import VppEnum class IPsecIPv4Params(object): + addr_type = socket.AF_INET addr_any = "0.0.0.0" addr_bcast = "255.255.255.255" addr_len = 32 is_ipv6 = 0 - remote_tun_if_host = '1.1.1.1' - scapy_tun_sa_id = 10 - scapy_tun_spi = 1001 - vpp_tun_sa_id = 20 - vpp_tun_spi = 1000 + def __init__(self): + self.remote_tun_if_host = '1.1.1.1' + + self.scapy_tun_sa_id = 10 + self.scapy_tun_spi = 1001 + self.vpp_tun_sa_id = 20 + self.vpp_tun_spi = 1000 - scapy_tra_sa_id = 30 - scapy_tra_spi = 2001 - vpp_tra_sa_id = 40 - vpp_tra_spi = 2000 + self.scapy_tra_sa_id = 30 + self.scapy_tra_spi = 2001 + self.vpp_tra_sa_id = 40 + self.vpp_tra_spi = 2000 - auth_algo_vpp_id = 2 # internal VPP enum value for SHA1_96 - auth_algo = 'HMAC-SHA1-96' # scapy name - auth_key = 'C91KUR9GYMm5GfkEvNjX' + self.auth_algo_vpp_id = (VppEnum.vl_api_ipsec_integ_alg_t. + IPSEC_API_INTEG_ALG_SHA1_96) + self.auth_algo = 'HMAC-SHA1-96' # scapy name + self.auth_key = 'C91KUR9GYMm5GfkEvNjX' - crypt_algo_vpp_id = 1 # internal VPP enum value for AES_CBC_128 - crypt_algo = 'AES-CBC' # scapy name - crypt_key = 'JPjyOWBeVEQiMe7h' + self.crypt_algo_vpp_id = (VppEnum.vl_api_ipsec_crypto_alg_t. + IPSEC_API_CRYPTO_ALG_AES_CBC_128) + self.crypt_algo = 'AES-CBC' # scapy name + self.crypt_key = 'JPjyOWBeVEQiMe7h' class IPsecIPv6Params(object): + addr_type = socket.AF_INET6 addr_any = "0::0" addr_bcast = "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff" addr_len = 128 is_ipv6 = 1 - remote_tun_if_host = '1111:1111:1111:1111:1111:1111:1111:1111' - scapy_tun_sa_id = 50 - scapy_tun_spi = 3001 - vpp_tun_sa_id = 60 - vpp_tun_spi = 3000 + def __init__(self): + self.remote_tun_if_host = '1111:1111:1111:1111:1111:1111:1111:1111' - scapy_tra_sa_id = 70 - scapy_tra_spi = 4001 - vpp_tra_sa_id = 80 - vpp_tra_spi = 4000 + self.scapy_tun_sa_id = 50 + self.scapy_tun_spi = 3001 + self.vpp_tun_sa_id = 60 + self.vpp_tun_spi = 3000 - auth_algo_vpp_id = 4 # internal VPP enum value for SHA_256_128 - auth_algo = 'SHA2-256-128' # scapy name - auth_key = 'C91KUR9GYMm5GfkEvNjX' + self.scapy_tra_sa_id = 70 + self.scapy_tra_spi = 4001 + self.vpp_tra_sa_id = 80 + self.vpp_tra_spi = 4000 - crypt_algo_vpp_id = 3 # internal VPP enum value for AES_CBC_256 - crypt_algo = 'AES-CBC' # scapy name - crypt_key = 'JPjyOWBeVEQiMe7hJPjyOWBeVEQiMe7h' + self.auth_algo_vpp_id = (VppEnum.vl_api_ipsec_integ_alg_t. + IPSEC_API_INTEG_ALG_SHA_256_128) + self.auth_algo = 'SHA2-256-128' # scapy name + self.auth_key = 'C91KUR9GYMm5GfkEvNjX' + + self.crypt_algo_vpp_id = (VppEnum.vl_api_ipsec_crypto_alg_t. + IPSEC_API_CRYPTO_ALG_AES_CBC_256) + self.crypt_algo = 'AES-CBC' # scapy name + self.crypt_key = 'JPjyOWBeVEQiMe7hJPjyOWBeVEQiMe7h' class TemplateIpsec(VppTestCase): @@ -82,39 +93,48 @@ class TemplateIpsec(VppTestCase): |tun_if| -------> |VPP| ------> |pg1| ------ --- --- """ - ipv4_params = IPsecIPv4Params() - ipv6_params = IPsecIPv6Params() - params = {ipv4_params.addr_type: ipv4_params, - ipv6_params.addr_type: ipv6_params} - payload = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + def ipsec_select_backend(self): + """ empty method to be overloaded when necessary """ + pass + + def setUp(self): + super(TemplateIpsec, self).setUp() - tun_spd_id = 1 - tra_spd_id = 2 + self.ipv4_params = IPsecIPv4Params() + self.ipv6_params = IPsecIPv6Params() + self.params = {self.ipv4_params.addr_type: self.ipv4_params, + self.ipv6_params.addr_type: self.ipv6_params} - vpp_esp_protocol = 1 - vpp_ah_protocol = 0 + self.payload = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"\ + "XXXXXXXXXXXXXXXXXXXXX" - @classmethod - def ipsec_select_backend(cls): - """ empty method to be overloaded when necessary """ - pass + self.tun_spd_id = 1 + self.tra_spd_id = 2 - @classmethod - def setUpClass(cls): - super(TemplateIpsec, cls).setUpClass() - cls.create_pg_interfaces(range(3)) - cls.interfaces = list(cls.pg_interfaces) - for i in cls.interfaces: + self.vpp_esp_protocol = (VppEnum.vl_api_ipsec_proto_t. + IPSEC_API_PROTO_ESP) + self.vpp_ah_protocol = (VppEnum.vl_api_ipsec_proto_t. + IPSEC_API_PROTO_AH) + + self.create_pg_interfaces(range(3)) + self.interfaces = list(self.pg_interfaces) + for i in self.interfaces: i.admin_up() i.config_ip4() i.resolve_arp() i.config_ip6() i.resolve_ndp() - cls.ipsec_select_backend() + self.ipsec_select_backend() def tearDown(self): super(TemplateIpsec, self).tearDown() + + for i in self.interfaces: + i.admin_down() + i.unconfig_ip4() + i.unconfig_ip6() + if not self.vpp_dead: self.vapi.cli("show hardware") @@ -158,15 +178,14 @@ def configure_sa_tun(self, params): src=self.tun_if.local_addr[params.addr_type])) return vpp_tun_sa, scapy_tun_sa - @classmethod - def configure_sa_tra(cls, params): - params.scapy_tra_sa = SecurityAssociation(cls.encryption_type, + def configure_sa_tra(self, params): + params.scapy_tra_sa = SecurityAssociation(self.encryption_type, spi=params.vpp_tra_spi, crypt_algo=params.crypt_algo, crypt_key=params.crypt_key, auth_algo=params.auth_algo, auth_key=params.auth_key) - params.vpp_tra_sa = SecurityAssociation(cls.encryption_type, + params.vpp_tra_sa = SecurityAssociation(self.encryption_type, spi=params.scapy_tra_spi, crypt_algo=params.crypt_algo, crypt_key=params.crypt_key, @@ -361,6 +380,11 @@ def test_tun_basic44(self, count=1): self.logger.info(self.vapi.ppcli("show error")) self.logger.info(self.vapi.ppcli("show ipsec")) + if (hasattr(p, "spd_policy_in_any")): + pkts = p.spd_policy_in_any.get_stats()['packets'] + self.assertEqual(pkts, count, + "incorrect SPD any policy: expected %d != %d" % + (count, pkts)) self.assert_packet_counter_equal(self.tun4_encrypt_node_name, count) self.assert_packet_counter_equal(self.tun4_decrypt_node_name, count) diff --git a/test/test_acl_plugin_conns.py b/test/test_acl_plugin_conns.py index d3c8581396d8..b6c47373b8ff 100644 --- a/test/test_acl_plugin_conns.py +++ b/test/test_acl_plugin_conns.py @@ -142,6 +142,10 @@ def setUpClass(cls): i.resolve_arp() i.resolve_ndp() + @classmethod + def tearDownClass(cls): + super(ACLPluginConnTestCase, cls).tearDownClass() + def tearDown(self): """Run standard test teardown and log various show commands """ diff --git a/test/test_bfd.py b/test/test_bfd.py index f40bcc65c229..a450a3355274 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -69,6 +69,10 @@ def setUpClass(cls): super(BFDAPITestCase, cls).tearDownClass() raise + @classmethod + def tearDownClass(cls): + super(BFDAPITestCase, cls).tearDownClass() + def setUp(self): super(BFDAPITestCase, self).setUp() self.factory = AuthKeyFactory() @@ -302,7 +306,6 @@ def test_set_del_udp_echo_source(self): self.assertFalse(echo_source.have_usable_ip6) -@unittest.skipUnless(running_extended_tests, "part of extended tests") class BFDTestSession(object): """ BFD session as seen from test framework side """ @@ -692,6 +695,10 @@ def setUpClass(cls): super(BFD4TestCase, cls).tearDownClass() raise + @classmethod + def tearDownClass(cls): + super(BFD4TestCase, cls).tearDownClass() + def setUp(self): super(BFD4TestCase, self).setUp() self.factory = AuthKeyFactory() @@ -1498,6 +1505,10 @@ def setUpClass(cls): super(BFD6TestCase, cls).tearDownClass() raise + @classmethod + def tearDownClass(cls): + super(BFD6TestCase, cls).tearDownClass() + def setUp(self): super(BFD6TestCase, self).setUp() self.factory = AuthKeyFactory() @@ -1687,6 +1698,14 @@ class BFDFIBTestCase(VppTestCase): vpp_session = None test_session = None + @classmethod + def setUpClass(cls): + super(BFDFIBTestCase, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(BFDFIBTestCase, cls).tearDownClass() + def setUp(self): super(BFDFIBTestCase, self).setUp() self.create_pg_interfaces(range(1)) @@ -1805,6 +1824,10 @@ def setUpClass(cls): super(BFDSHA1TestCase, cls).tearDownClass() raise + @classmethod + def tearDownClass(cls): + super(BFDSHA1TestCase, cls).tearDownClass() + def setUp(self): super(BFDSHA1TestCase, self).setUp() self.factory = AuthKeyFactory() @@ -2038,6 +2061,10 @@ def setUpClass(cls): super(BFDAuthOnOffTestCase, cls).tearDownClass() raise + @classmethod + def tearDownClass(cls): + super(BFDAuthOnOffTestCase, cls).tearDownClass() + def setUp(self): super(BFDAuthOnOffTestCase, self).setUp() self.factory = AuthKeyFactory() @@ -2245,6 +2272,10 @@ def setUpClass(cls): super(BFDCLITestCase, cls).tearDownClass() raise + @classmethod + def tearDownClass(cls): + super(BFDCLITestCase, cls).tearDownClass() + def setUp(self): super(BFDCLITestCase, self).setUp() self.factory = AuthKeyFactory() diff --git a/test/test_classifier.py b/test/test_classifier.py index f865cb9f6d54..8d4bde8e730d 100644 --- a/test/test_classifier.py +++ b/test/test_classifier.py @@ -267,7 +267,7 @@ def create_classify_table(self, key, mask, data_offset=0): miss_next_index=0, current_data_flag=1, current_data_offset=data_offset) - self.assertIsNotNone(r, msg='No response msg for add_del_table') + self.assertIsNotNone(r, 'No response msg for add_del_table') self.acl_tbl_idx[key] = r.new_table_index def create_classify_session(self, table_index, match, pbr_option=0, @@ -288,7 +288,7 @@ def create_classify_session(self, table_index, match, pbr_option=0, opaque_index=0, action=pbr_option, metadata=vrfid) - self.assertIsNotNone(r, msg='No response msg for add_del_session') + self.assertIsNotNone(r, 'No response msg for add_del_session') def input_acl_set_interface(self, intf, table_index, is_add=1): """Configure Input ACL interface @@ -302,7 +302,7 @@ def input_acl_set_interface(self, intf, table_index, is_add=1): is_add, intf.sw_if_index, ip4_table_index=table_index) - self.assertIsNotNone(r, msg='No response msg for acl_set_interface') + self.assertIsNotNone(r, 'No response msg for acl_set_interface') def output_acl_set_interface(self, intf, table_index, is_add=1): """Configure Output ACL interface @@ -316,7 +316,7 @@ def output_acl_set_interface(self, intf, table_index, is_add=1): is_add, intf.sw_if_index, ip4_table_index=table_index) - self.assertIsNotNone(r, msg='No response msg for acl_set_interface') + self.assertIsNotNone(r, 'No response msg for acl_set_interface') # Tests split to different test case classes because of issue reported in diff --git a/test/test_classifier_ip6.py b/test/test_classifier_ip6.py index cbcf5c472579..f6d12c7b3db2 100644 --- a/test/test_classifier_ip6.py +++ b/test/test_classifier_ip6.py @@ -236,7 +236,7 @@ def create_classify_table(self, key, mask, data_offset=0): miss_next_index=0, current_data_flag=1, current_data_offset=data_offset) - self.assertIsNotNone(r, msg='No response msg for add_del_table') + self.assertIsNotNone(r, 'No response msg for add_del_table') self.acl_tbl_idx[key] = r.new_table_index def create_classify_session(self, table_index, match, vrfid=0, is_add=1): @@ -254,7 +254,7 @@ def create_classify_session(self, table_index, match, vrfid=0, is_add=1): binascii.unhexlify(match), opaque_index=0, metadata=vrfid) - self.assertIsNotNone(r, msg='No response msg for add_del_session') + self.assertIsNotNone(r, 'No response msg for add_del_session') def input_acl_set_interface(self, intf, table_index, is_add=1): """Configure Input ACL interface @@ -268,7 +268,7 @@ def input_acl_set_interface(self, intf, table_index, is_add=1): is_add, intf.sw_if_index, ip6_table_index=table_index) - self.assertIsNotNone(r, msg='No response msg for acl_set_interface') + self.assertIsNotNone(r, 'No response msg for acl_set_interface') def output_acl_set_interface(self, intf, table_index, is_add=1): """Configure Output ACL interface @@ -282,7 +282,7 @@ def output_acl_set_interface(self, intf, table_index, is_add=1): is_add, intf.sw_if_index, ip6_table_index=table_index) - self.assertIsNotNone(r, msg='No response msg for acl_set_interface') + self.assertIsNotNone(r, 'No response msg for acl_set_interface') class TestClassifierIP6(TestClassifier): diff --git a/test/test_classify_l2_acl.py b/test/test_classify_l2_acl.py index d9557ee7ef22..5c9ccc9ae8d4 100644 --- a/test/test_classify_l2_acl.py +++ b/test/test_classify_l2_acl.py @@ -209,7 +209,7 @@ def create_classify_table(self, key, mask, data_offset=0, is_add=1): miss_next_index=0, current_data_flag=1, current_data_offset=data_offset) - self.assertIsNotNone(r, msg='No response msg for add_del_table') + self.assertIsNotNone(r, 'No response msg for add_del_table') self.acl_tbl_idx[key] = r.new_table_index def create_classify_session(self, intf, table_index, match, @@ -229,7 +229,7 @@ def create_classify_session(self, intf, table_index, match, table_index, binascii.unhexlify(match), hit_next_index=hit_next_index) - self.assertIsNotNone(r, msg='No response msg for add_del_session') + self.assertIsNotNone(r, 'No response msg for add_del_session') def input_acl_set_interface(self, intf, table_index, is_add=1): """Configure Input ACL interface @@ -243,7 +243,7 @@ def input_acl_set_interface(self, intf, table_index, is_add=1): is_add, intf.sw_if_index, l2_table_index=table_index) - self.assertIsNotNone(r, msg='No response msg for acl_set_interface') + self.assertIsNotNone(r, 'No response msg for acl_set_interface') def output_acl_set_interface(self, intf, table_index, is_add=1): """Configure Output ACL interface @@ -257,7 +257,7 @@ def output_acl_set_interface(self, intf, table_index, is_add=1): is_add, intf.sw_if_index, l2_table_index=table_index) - self.assertIsNotNone(r, msg='No response msg for acl_set_interface') + self.assertIsNotNone(r, 'No response msg for acl_set_interface') def create_hosts(self, count, start=0): """ diff --git a/test/test_container.py b/test/test_container.py index 66b7748630f4..56644bce610e 100644 --- a/test/test_container.py +++ b/test/test_container.py @@ -37,6 +37,10 @@ def setUpClass(cls): i.resolve_arp() i.resolve_ndp() + @classmethod + def tearDownClass(cls): + super(ContainerIntegrationTestCase, cls).tearDownClass() + def tearDown(self): """Run standard test teardown and log various show commands """ diff --git a/test/test_dhcp.py b/test/test_dhcp.py index 3d00f1b84680..d1af9fd9e7de 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -360,6 +360,16 @@ def verify_dhcp6_advert(self, pkt, intf, peer): # not sure why this is not decoding # adv = pkt[DHCP6_Advertise] + def wait_for_no_route(self, address, length, + n_tries=50, s_time=1): + while (n_tries): + if not find_route(self, address, length): + return True + n_tries = n_tries - 1 + self.sleep(s_time) + + return False + def test_dhcp_proxy(self): """ DHCPv4 Proxy """ @@ -1052,8 +1062,7 @@ def test_dhcp6_proxy(self): nd_entry = VppNeighbor(self, self.pg1.sw_if_index, self.pg1.remote_hosts[1].mac, - self.pg1.remote_hosts[1].ip6, - af=AF_INET6) + self.pg1.remote_hosts[1].ip6) nd_entry.add_vpp_config() # @@ -1267,9 +1276,10 @@ def test_dhcp_client(self): # remove the left over ARP entry self.vapi.ip_neighbor_add_del(self.pg3.sw_if_index, - mac_pton(self.pg3.remote_mac), + self.pg3.remote_mac, self.pg3.remote_ip4, is_add=0) + # # remove the DHCP config # @@ -1423,7 +1433,7 @@ def test_dhcp_client(self): # remove the left over ARP entry self.vapi.ip_neighbor_add_del(self.pg3.sw_if_index, - mac_pton(self.pg3.remote_mac), + self.pg3.remote_mac, self.pg3.remote_ip4, is_add=0) @@ -1530,20 +1540,15 @@ def test_dhcp_client(self): # remove the left over ARP entry self.vapi.ip_neighbor_add_del(self.pg3.sw_if_index, - mac_pton(self.pg3.remote_mac), + self.pg3.remote_mac, self.pg3.remote_ip4, is_add=0) # - # Sleep for the lease time - # - self.sleep(lease_time+1) - - # - # And now the route should be gone + # the route should be gone after the lease expires # - self.assertFalse(find_route(self, self.pg3.local_ip4, 32)) - self.assertFalse(find_route(self, self.pg3.local_ip4, 24)) + self.assertTrue(self.wait_for_no_route(self.pg3.local_ip4, 32)) + self.assertTrue(self.wait_for_no_route(self.pg3.local_ip4, 24)) # # remove the DHCP config diff --git a/test/test_flowprobe.py b/test/test_flowprobe.py index bb5062755f93..c7b1f722fe0f 100644 --- a/test/test_flowprobe.py +++ b/test/test_flowprobe.py @@ -158,6 +158,10 @@ class VppTestCase) before running the test case, set test case related super(MethodHolder, cls).tearDownClass() raise + @classmethod + def tearDownClass(cls): + super(MethodHolder, cls).tearDownClass() + def create_stream(self, src_if=None, dst_if=None, packets=None, size=None, ip_ver='v4'): """Create a packet stream to tickle the plugin @@ -325,6 +329,14 @@ def wait_for_cflow_packet(self, collector_intf, set_id=2, timeout=1, class Flowprobe(MethodHolder): """Template verification, timer tests""" + @classmethod + def setUpClass(cls): + super(Flowprobe, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(Flowprobe, cls).tearDownClass() + def test_0001(self): """ timer less than template timeout""" self.logger.info("FFP_TEST_START_0001") @@ -461,6 +473,14 @@ def test_cflow_packet(self): class Datapath(MethodHolder): """collect information on Ethernet, IP4 and IP6 datapath (no timers)""" + @classmethod + def setUpClass(cls): + super(Datapath, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(Datapath, cls).tearDownClass() + def test_templatesL2(self): """ verify template on L2 datapath""" self.logger.info("FFP_TEST_START_0000") @@ -828,6 +848,14 @@ def test_0002(self): class DisableIPFIX(MethodHolder): """Disable IPFIX""" + @classmethod + def setUpClass(cls): + super(DisableIPFIX, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(DisableIPFIX, cls).tearDownClass() + def test_0001(self): """ disable IPFIX after first packets""" self.logger.info("FFP_TEST_START_0001") @@ -869,6 +897,14 @@ def test_0001(self): class ReenableIPFIX(MethodHolder): """Re-enable IPFIX""" + @classmethod + def setUpClass(cls): + super(ReenableIPFIX, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(ReenableIPFIX, cls).tearDownClass() + def test_0011(self): """ disable IPFIX after first packets and re-enable after few packets """ @@ -891,7 +927,7 @@ def test_0011(self): self.wait_for_cflow_packet(self.collector, templates[1]) self.collector.get_capture(4) - # disble IPFIX + # disable IPFIX ipfix.disable_exporter() self.vapi.cli("ipfix flush") self.pg_enable_capture([self.collector]) @@ -930,6 +966,14 @@ def test_0011(self): class DisableFP(MethodHolder): """Disable Flowprobe feature""" + @classmethod + def setUpClass(cls): + super(DisableFP, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(DisableFP, cls).tearDownClass() + def test_0001(self): """ disable flowprobe feature after first packets""" self.logger.info("FFP_TEST_START_0001") @@ -970,6 +1014,14 @@ def test_0001(self): class ReenableFP(MethodHolder): """Re-enable Flowprobe feature""" + @classmethod + def setUpClass(cls): + super(ReenableFP, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(ReenableFP, cls).tearDownClass() + def test_0001(self): """ disable flowprobe feature after first packets and re-enable after few packets """ diff --git a/test/test_gbp.py b/test/test_gbp.py index 51df8d9e936f..fd8b79b26b6b 100644 --- a/test/test_gbp.py +++ b/test/test_gbp.py @@ -282,7 +282,7 @@ class VppGbpEndpointGroup(VppObject): GBP Endpoint Group """ - def __init__(self, test, epg, rd, bd, uplink, + def __init__(self, test, epg, sclass, rd, bd, uplink, bvi, bvi_ip4, bvi_ip6=None): self._test = test self.uplink = uplink @@ -292,10 +292,14 @@ def __init__(self, test, epg, rd, bd, uplink, self.epg = epg self.bd = bd self.rd = rd + self.sclass = sclass + if 0 == self.sclass: + self.sclass = 0xffff def add_vpp_config(self): self._test.vapi.gbp_endpoint_group_add( self.epg, + self.sclass, self.bd.bd.bd_id, self.rd.rd_id, self.uplink.sw_if_index if self.uplink else INDEX_INVALID) @@ -324,10 +328,12 @@ class VppGbpBridgeDomain(VppObject): GBP Bridge Domain """ - def __init__(self, test, bd, bvi, uu_flood=None, learn=True): + def __init__(self, test, bd, bvi, uu_fwd=None, + bm_flood=None, learn=True): self._test = test self.bvi = bvi - self.uu_flood = uu_flood + self.uu_fwd = uu_fwd + self.bm_flood = bm_flood self.bd = bd e = VppEnum.vl_api_gbp_bridge_domain_flags_t @@ -341,7 +347,8 @@ def add_vpp_config(self): self.bd.bd_id, self.learn, self.bvi.sw_if_index, - self.uu_flood.sw_if_index if self.uu_flood else INDEX_INVALID) + self.uu_fwd.sw_if_index if self.uu_fwd else INDEX_INVALID, + self.bm_flood.sw_if_index if self.bm_flood else INDEX_INVALID) self._test.registry.register(self, self._test.logger) def remove_vpp_config(self): @@ -731,23 +738,23 @@ def test_gbp(self): # 3 EPGs, 2 of which share a BD. # 2 NAT EPGs, one for floating-IP subnets, the other for internet # - epgs = [VppGbpEndpointGroup(self, 220, rd0, gbd1, self.pg4, + epgs = [VppGbpEndpointGroup(self, 220, 0, rd0, gbd1, self.pg4, self.loop0, "10.0.0.128", "2001:10::128"), - VppGbpEndpointGroup(self, 221, rd0, gbd1, self.pg5, + VppGbpEndpointGroup(self, 221, 0, rd0, gbd1, self.pg5, self.loop0, "10.0.1.128", "2001:10:1::128"), - VppGbpEndpointGroup(self, 222, rd0, gbd2, self.pg6, + VppGbpEndpointGroup(self, 222, 0, rd0, gbd2, self.pg6, self.loop1, "10.0.2.128", "2001:10:2::128"), - VppGbpEndpointGroup(self, 333, rd20, gbd20, self.pg7, + VppGbpEndpointGroup(self, 333, 0, rd20, gbd20, self.pg7, self.loop2, "11.0.0.128", "3001::128"), - VppGbpEndpointGroup(self, 444, rd20, gbd20, self.pg8, + VppGbpEndpointGroup(self, 444, 0, rd20, gbd20, self.pg8, self.loop2, "11.0.0.129", "3001::129")] @@ -1484,12 +1491,20 @@ def test_gbp_learn_l2(self): self.pg4.config_ip4() self.pg4.resolve_arp() + # + # Add a mcast destination VXLAN-GBP tunnel for B&M traffic + # + tun_bm = VppVxlanGbpTunnel(self, self.pg4.local_ip4, + "239.1.1.1", 88, + mcast_itf=self.pg4) + tun_bm.add_vpp_config() + # # a GBP bridge domain with a BVI and a UU-flood interface # bd1 = VppBridgeDomain(self, 1) bd1.add_vpp_config() - gbd1 = VppGbpBridgeDomain(self, bd1, self.loop0, self.pg3) + gbd1 = VppGbpBridgeDomain(self, bd1, self.loop0, self.pg3, tun_bm) gbd1.add_vpp_config() self.logger.info(self.vapi.cli("sh bridge 1 detail")) @@ -1502,12 +1517,12 @@ def test_gbp_learn_l2(self): # # The Endpoint-group in which we are learning endpoints # - epg_220 = VppGbpEndpointGroup(self, 220, rd1, gbd1, + epg_220 = VppGbpEndpointGroup(self, 220, 112, rd1, gbd1, None, self.loop0, "10.0.0.128", "2001:10::128") epg_220.add_vpp_config() - epg_330 = VppGbpEndpointGroup(self, 330, rd1, gbd1, + epg_330 = VppGbpEndpointGroup(self, 330, 113, rd1, gbd1, None, self.loop1, "10.0.1.128", "2001:11::128") @@ -1569,7 +1584,7 @@ def test_gbp_learn_l2(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=99, gpid=220, flags=0x88) / + VXLAN(vni=99, gpid=112, flags=0x88) / Ether(src=l['mac'], dst=ep.mac) / IP(src=l['ip'], dst=ep.ip4.address) / UDP(sport=1234, dport=1234) / @@ -1618,7 +1633,7 @@ def test_gbp_learn_l2(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=99, gpid=220, flags=0x88, gpflags="D") / + VXLAN(vni=99, gpid=112, flags=0x88, gpflags="D") / Ether(src=l['mac'], dst=ep.mac) / IP(src=l['ip'], dst=ep.ip4.address) / UDP(sport=1234, dport=1234) / @@ -1641,7 +1656,7 @@ def test_gbp_learn_l2(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=99, gpid=220, flags=0x88) / + VXLAN(vni=99, gpid=112, flags=0x88) / Ether(src=l['mac'], dst=ep.mac) / IP(src=l['ip'], dst=ep.ip4.address) / UDP(sport=1234, dport=1234) / @@ -1670,7 +1685,7 @@ def test_gbp_learn_l2(self): self.assertEqual(rx[IP].dst, self.pg2.remote_hosts[1].ip4) self.assertEqual(rx[UDP].dport, 48879) # the UDP source port is a random value for hashing - self.assertEqual(rx[VXLAN].gpid, 220) + self.assertEqual(rx[VXLAN].gpid, 112) self.assertEqual(rx[VXLAN].vni, 99) self.assertTrue(rx[VXLAN].flags.G) self.assertTrue(rx[VXLAN].flags.Instance) @@ -1693,7 +1708,7 @@ def test_gbp_learn_l2(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=99, gpid=330, flags=0x88, gpflags='A') / + VXLAN(vni=99, gpid=113, flags=0x88, gpflags='A') / Ether(src=l['mac'], dst=ep.mac) / IP(src=l['ip'], dst=ep.ip4.address) / UDP(sport=1234, dport=1234) / @@ -1726,7 +1741,7 @@ def test_gbp_learn_l2(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=99, gpid=330, flags=0x88, gpflags='A') / + VXLAN(vni=99, gpid=113, flags=0x88, gpflags='A') / Ether(src=l['mac'], dst=ep.mac) / IP(src=l['ip'], dst=ep.ip4.address) / UDP(sport=1234, dport=1234) / @@ -1773,18 +1788,7 @@ def test_gbp_learn_l2(self): IP(dst="10.0.0.133", src=ep.ip4.address) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)) - rxs = self.send_and_expect(ep.itf, [p_uu], gbd1.uu_flood) - - # - # Add a mcast destination VXLAN-GBP tunnel for B&M traffic - # - tun_bm = VppVxlanGbpTunnel(self, self.pg4.local_ip4, - "239.1.1.1", 88, - mcast_itf=self.pg4) - tun_bm.add_vpp_config() - bp_bm = VppBridgeDomainPort(self, bd1, tun_bm, - port_type=L2_PORT_TYPE.NORMAL) - bp_bm.add_vpp_config() + rxs = self.send_and_expect(ep.itf, [p_uu], gbd1.uu_fwd) self.logger.info(self.vapi.cli("sh bridge 1 detail")) @@ -1794,6 +1798,18 @@ def test_gbp_learn_l2(self): Raw('\xa5' * 100)) rxs = self.send_and_expect_only(ep.itf, [p_bm], tun_bm.mcast_itf) + for rx in rxs: + self.assertEqual(rx[IP].src, self.pg4.local_ip4) + self.assertEqual(rx[IP].dst, "239.1.1.1") + self.assertEqual(rx[UDP].dport, 48879) + # the UDP source port is a random value for hashing + self.assertEqual(rx[VXLAN].gpid, 112) + self.assertEqual(rx[VXLAN].vni, 88) + self.assertTrue(rx[VXLAN].flags.G) + self.assertTrue(rx[VXLAN].flags.Instance) + self.assertFalse(rx[VXLAN].gpflags.A) + self.assertFalse(rx[VXLAN].gpflags.D) + # # Check v6 Endpoints # @@ -1804,7 +1820,7 @@ def test_gbp_learn_l2(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=99, gpid=330, flags=0x88, gpflags='A') / + VXLAN(vni=99, gpid=113, flags=0x88, gpflags='A') / Ether(src=l['mac'], dst=ep.mac) / IPv6(src=l['ip6'], dst=ep.ip6.address) / UDP(sport=1234, dport=1234) / @@ -1907,7 +1923,7 @@ def test_gbp_learn_vlan_l2(self): # # The Endpoint-group in which we are learning endpoints # - epg_220 = VppGbpEndpointGroup(self, 220, rd1, gbd1, + epg_220 = VppGbpEndpointGroup(self, 220, 441, rd1, gbd1, None, self.loop0, "10.0.0.128", "2001:10::128") @@ -1945,7 +1961,7 @@ def test_gbp_learn_vlan_l2(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=99, gpid=220, flags=0x88) / + VXLAN(vni=99, gpid=441, flags=0x88) / Ether(src=l['mac'], dst=ep.mac) / IP(src=l['ip'], dst=ep.ip4.address) / UDP(sport=1234, dport=1234) / @@ -1993,7 +2009,7 @@ def test_gbp_learn_vlan_l2(self): self.assertEqual(rx[IP].dst, self.pg3.remote_ip4) self.assertEqual(rx[UDP].dport, 48879) # the UDP source port is a random value for hashing - self.assertEqual(rx[VXLAN].gpid, 220) + self.assertEqual(rx[VXLAN].gpid, 441) self.assertEqual(rx[VXLAN].vni, 116) self.assertTrue(rx[VXLAN].flags.G) self.assertTrue(rx[VXLAN].flags.Instance) @@ -2087,7 +2103,7 @@ def test_gbp_learn_l3(self): # # The Endpoint-group in which we are learning endpoints # - epg_220 = VppGbpEndpointGroup(self, 220, rd1, gbd1, + epg_220 = VppGbpEndpointGroup(self, 220, 441, rd1, gbd1, None, self.loop0, "10.0.0.128", "2001:10::128") @@ -2123,7 +2139,7 @@ def test_gbp_learn_l3(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=101, gpid=220, flags=0x88) / + VXLAN(vni=101, gpid=441, flags=0x88) / Ether(src=l['mac'], dst="00:00:00:11:11:11") / IP(src=l['ip'], dst=ep.ip4.address) / UDP(sport=1234, dport=1234) / @@ -2160,7 +2176,7 @@ def test_gbp_learn_l3(self): self.assertEqual(rx[IP].dst, self.pg2.remote_hosts[1].ip4) self.assertEqual(rx[UDP].dport, 48879) # the UDP source port is a random value for hashing - self.assertEqual(rx[VXLAN].gpid, 220) + self.assertEqual(rx[VXLAN].gpid, 441) self.assertEqual(rx[VXLAN].vni, 101) self.assertTrue(rx[VXLAN].flags.G) self.assertTrue(rx[VXLAN].flags.Instance) @@ -2190,7 +2206,7 @@ def test_gbp_learn_l3(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=101, gpid=220, flags=0x88) / + VXLAN(vni=101, gpid=441, flags=0x88) / Ether(src=l['mac'], dst="00:00:00:11:11:11") / IPv6(src=l['ip6'], dst=ep.ip6.address) / UDP(sport=1234, dport=1234) / @@ -2233,7 +2249,7 @@ def test_gbp_learn_l3(self): self.assertEqual(rx[IP].dst, self.pg2.remote_hosts[1].ip4) self.assertEqual(rx[UDP].dport, 48879) # the UDP source port is a random value for hashing - self.assertEqual(rx[VXLAN].gpid, 220) + self.assertEqual(rx[VXLAN].gpid, 441) self.assertEqual(rx[VXLAN].vni, 101) self.assertTrue(rx[VXLAN].flags.G) self.assertTrue(rx[VXLAN].flags.Instance) @@ -2281,7 +2297,7 @@ def test_gbp_learn_l3(self): self.assertEqual(rx[IP].dst, self.pg4.remote_ip4) self.assertEqual(rx[UDP].dport, 48879) # the UDP source port is a random value for hashing - self.assertEqual(rx[VXLAN].gpid, 220) + self.assertEqual(rx[VXLAN].gpid, 441) self.assertEqual(rx[VXLAN].vni, 114) self.assertTrue(rx[VXLAN].flags.G) self.assertTrue(rx[VXLAN].flags.Instance) @@ -2297,10 +2313,10 @@ def test_gbp_learn_l3(self): # arriving on an unknown TEP p = (Ether(src=self.pg2.remote_mac, dst=self.pg2.local_mac) / - IP(src=self.pg2.remote_hosts[1].ip4, + IP(src=self.pg2.remote_hosts[2].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=101, gpid=220, flags=0x88) / + VXLAN(vni=101, gpid=441, flags=0x88) / Ether(src=l['mac'], dst="00:00:00:11:11:11") / IP(src=l['ip'], dst=ep.ip4.address) / UDP(sport=1234, dport=1234) / @@ -2312,7 +2328,7 @@ def test_gbp_learn_l3(self): tep1_sw_if_index = find_vxlan_gbp_tunnel( self, self.pg2.local_ip4, - self.pg2.remote_hosts[1].ip4, + self.pg2.remote_hosts[2].ip4, vx_tun_l3.vni) self.assertNotEqual(INDEX_INVALID, tep1_sw_if_index) @@ -2372,7 +2388,7 @@ def test_gbp_learn_l3(self): self.assertEqual(rx[IP].dst, self.pg2.remote_hosts[1].ip4) self.assertEqual(rx[UDP].dport, 48879) # the UDP source port is a random value for hashing - self.assertEqual(rx[VXLAN].gpid, 220) + self.assertEqual(rx[VXLAN].gpid, 441) self.assertEqual(rx[VXLAN].vni, 101) self.assertTrue(rx[VXLAN].flags.G) self.assertTrue(rx[VXLAN].flags.Instance) @@ -2425,7 +2441,7 @@ def test_gbp_learn_l3(self): IP(src=self.pg2.remote_hosts[1].ip4, dst=self.pg2.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=101, gpid=220, flags=0x88) / + VXLAN(vni=101, gpid=441, flags=0x88) / Ether(src=l['mac'], dst="00:00:00:11:11:11") / IP(src=learnt[1]['ip'], dst=ep.ip4.address) / UDP(sport=1234, dport=1234) / @@ -2519,17 +2535,17 @@ def test_gbp_redirect(self): # # The Endpoint-groups in which we are learning endpoints # - epg_220 = VppGbpEndpointGroup(self, 220, rd1, gbd1, + epg_220 = VppGbpEndpointGroup(self, 220, 440, rd1, gbd1, None, gbd1.bvi, "10.0.0.128", "2001:10::128") epg_220.add_vpp_config() - epg_221 = VppGbpEndpointGroup(self, 221, rd1, gbd2, + epg_221 = VppGbpEndpointGroup(self, 221, 441, rd1, gbd2, None, gbd2.bvi, "10.0.1.128", "2001:11::128") epg_221.add_vpp_config() - epg_222 = VppGbpEndpointGroup(self, 222, rd1, gbd1, + epg_222 = VppGbpEndpointGroup(self, 222, 442, rd1, gbd1, None, gbd1.bvi, "10.0.2.128", "2001:12::128") @@ -2557,12 +2573,12 @@ def test_gbp_redirect(self): # # EPGs in which the service endpoints exist # - epg_320 = VppGbpEndpointGroup(self, 320, rd1, gbd3, + epg_320 = VppGbpEndpointGroup(self, 320, 550, rd1, gbd3, None, gbd1.bvi, "12.0.0.128", "4001:10::128") epg_320.add_vpp_config() - epg_321 = VppGbpEndpointGroup(self, 321, rd1, gbd4, + epg_321 = VppGbpEndpointGroup(self, 321, 551, rd1, gbd4, None, gbd2.bvi, "12.0.1.128", "4001:11::128") @@ -2915,7 +2931,7 @@ def test_gbp_redirect(self): IP(src=self.pg7.remote_ip4, dst=self.pg7.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=444, gpid=221, flags=0x88) / + VXLAN(vni=444, gpid=441, flags=0x88) / Ether(src="00:22:22:22:22:33", dst=str(self.router_mac)) / IP(src="10.0.0.88", dst=ep1.ip4.address) / UDP(sport=1234, dport=1234) / @@ -2933,7 +2949,7 @@ def test_gbp_redirect(self): IP(src=self.pg7.remote_ip4, dst=self.pg7.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=444, gpid=221, flags=0x88) / + VXLAN(vni=444, gpid=441, flags=0x88) / Ether(src="00:22:22:22:22:33", dst=str(self.router_mac)) / IPv6(src="2001:10::88", dst=ep1.ip6.address) / UDP(sport=1234, dport=1234) / @@ -3052,18 +3068,26 @@ def test_gbp_l3_out(self): self.pg7.config_ip4() self.pg7.resolve_arp() + # + # a multicast vxlan-gbp tunnel for broadcast in the BD + # + tun_bm = VppVxlanGbpTunnel(self, self.pg7.local_ip4, + "239.1.1.1", 88, + mcast_itf=self.pg7) + tun_bm.add_vpp_config() + # # a GBP external bridge domains for the EPs # bd1 = VppBridgeDomain(self, 1) bd1.add_vpp_config() - gbd1 = VppGbpBridgeDomain(self, bd1, self.loop0) + gbd1 = VppGbpBridgeDomain(self, bd1, self.loop0, None, tun_bm) gbd1.add_vpp_config() # # The Endpoint-groups in which the external endpoints exist # - epg_220 = VppGbpEndpointGroup(self, 220, rd1, gbd1, + epg_220 = VppGbpEndpointGroup(self, 220, 113, rd1, gbd1, None, gbd1.bvi, "10.0.0.128", "2001:10::128") @@ -3079,7 +3103,7 @@ def test_gbp_l3_out(self): l3o_1 = VppGbpSubnet( self, rd1, "10.0.0.0", 24, VppEnum.vl_api_gbp_subnet_type_t.GBP_API_SUBNET_L3_OUT, - epg=200) + epg=220) l3o_1.add_vpp_config() # @@ -3092,18 +3116,7 @@ def test_gbp_l3_out(self): ext_itf.add_vpp_config() # - # a multicast vxlan-gbp tunnel for broadcast in the BD - # - tun_bm = VppVxlanGbpTunnel(self, self.pg7.local_ip4, - "239.1.1.1", 88, - mcast_itf=self.pg7) - tun_bm.add_vpp_config() - bp_bm = VppBridgeDomainPort(self, bd1, tun_bm, - port_type=L2_PORT_TYPE.NORMAL) - bp_bm.add_vpp_config() - - # - # an unicast vxlan-gbp for inter-BD traffic + # an unicast vxlan-gbp for inter-RD traffic # vx_tun_l3 = VppGbpVxlanTunnel( self, 444, rd1.rd_id, @@ -3136,7 +3149,7 @@ def test_gbp_l3_out(self): self.assertTrue(rx[VXLAN].flags.G) self.assertTrue(rx[VXLAN].flags.Instance) # policy was applied to the original IP packet - self.assertEqual(rx[VXLAN].gpid, 200) + self.assertEqual(rx[VXLAN].gpid, 113) self.assertTrue(rx[VXLAN].gpflags.A) self.assertFalse(rx[VXLAN].gpflags.D) @@ -3175,7 +3188,7 @@ def test_gbp_l3_out(self): IP(src=self.pg7.remote_ip4, dst=self.pg7.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=444, gpid=220, flags=0x88) / + VXLAN(vni=444, gpid=113, flags=0x88) / Ether(src=self.pg0.remote_mac, dst=str(self.router_mac)) / IP(src="10.0.0.101", dst="10.0.0.1") / UDP(sport=1234, dport=1234) / @@ -3203,7 +3216,7 @@ def test_gbp_l3_out(self): IP(src=self.pg7.remote_ip4, dst=self.pg7.local_ip4) / UDP(sport=1234, dport=48879) / - VXLAN(vni=444, gpid=220, flags=0x88) / + VXLAN(vni=444, gpid=113, flags=0x88) / Ether(src=self.pg0.remote_mac, dst=str(self.router_mac)) / IP(src="10.0.0.101", dst="10.220.0.1") / UDP(sport=1234, dport=1234) / @@ -3211,102 +3224,6 @@ def test_gbp_l3_out(self): rxs = self.send_and_expect(self.pg7, p * 1, self.pg0) - # - # another external subnet, this time in a different EPG - # - ip_200 = VppIpRoute(self, "10.200.0.0", 24, - [VppRoutePath(eep.ip4.address, - eep.epg.bvi.sw_if_index)], - table_id=t4.table_id) - ip_200.add_vpp_config() - - l3o_200 = VppGbpSubnet( - self, rd1, "10.200.0.0", 24, - VppEnum.vl_api_gbp_subnet_type_t.GBP_API_SUBNET_L3_OUT, - epg=200) - l3o_200.add_vpp_config() - - p = (Ether(src=self.pg7.remote_mac, - dst=self.pg7.local_mac) / - IP(src=self.pg7.remote_ip4, - dst=self.pg7.local_ip4) / - UDP(sport=1234, dport=48879) / - VXLAN(vni=444, gpid=220, flags=0x88) / - Ether(src=self.pg0.remote_mac, dst=str(self.router_mac)) / - IP(src="10.0.0.101", dst="10.200.0.1") / - UDP(sport=1234, dport=1234) / - Raw('\xa5' * 100)) - - # - # packets dropped due to lack of contract. - # - rxs = self.send_and_assert_no_replies(self.pg7, p * 1) - - # - # from the the subnet in EPG 220 beyond the external to remote - # - p4 = (Ether(src=self.pg0.remote_mac, dst=str(self.router_mac)) / - Dot1Q(vlan=100) / - IP(src="10.220.0.1", dst=rep.ip4.address) / - UDP(sport=1234, dport=1234) / - Raw('\xa5' * 100)) - - rxs = self.send_and_expect(self.pg0, p4 * 1, self.pg7) - - for rx in rxs: - self.assertEqual(rx[Ether].src, self.pg7.local_mac) - self.assertEqual(rx[Ether].dst, self.pg7.remote_mac) - self.assertEqual(rx[IP].src, self.pg7.local_ip4) - self.assertEqual(rx[IP].dst, self.pg7.remote_ip4) - self.assertEqual(rx[VXLAN].vni, 444) - self.assertTrue(rx[VXLAN].flags.G) - self.assertTrue(rx[VXLAN].flags.Instance) - self.assertTrue(rx[VXLAN].gpflags.A) - self.assertFalse(rx[VXLAN].gpflags.D) - - # - # from the the subnet in EPG 200 beyond the external to remote - # dropped due to no contract - # - p4 = (Ether(src=self.pg0.remote_mac, dst=str(self.router_mac)) / - Dot1Q(vlan=100) / - IP(src="10.200.0.1", dst=rep.ip4.address) / - UDP(sport=1234, dport=1234) / - Raw('\xa5' * 100)) - - rxs = self.send_and_assert_no_replies(self.pg0, p4 * 1) - - # - # add a contract - # - acl = VppGbpAcl(self) - rule = acl.create_rule(permit_deny=1, proto=17) - rule2 = acl.create_rule(is_ipv6=1, permit_deny=1, proto=17) - acl_index = acl.add_vpp_config([rule, rule2]) - c1 = VppGbpContract( - self, 200, 220, acl_index, - [VppGbpContractRule( - VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT, - []), - VppGbpContractRule( - VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT, - [])], - [ETH_P_IP, ETH_P_IPV6]) - c1.add_vpp_config() - - rxs = self.send_and_expect(self.pg0, p4 * 1, self.pg7) - - for rx in rxs: - self.assertEqual(rx[Ether].src, self.pg7.local_mac) - self.assertEqual(rx[Ether].dst, self.pg7.remote_mac) - self.assertEqual(rx[IP].src, self.pg7.local_ip4) - self.assertEqual(rx[IP].dst, self.pg7.remote_ip4) - self.assertEqual(rx[VXLAN].vni, 444) - self.assertTrue(rx[VXLAN].flags.G) - self.assertTrue(rx[VXLAN].flags.Instance) - self.assertTrue(rx[VXLAN].gpflags.A) - self.assertFalse(rx[VXLAN].gpflags.D) - # # cleanup # diff --git a/test/test_ip6.py b/test/test_ip6.py index 930d556a876a..bfb5506e3fe9 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -26,6 +26,7 @@ from vpp_neighbor import find_nbr, VppNeighbor from vpp_pg_interface import is_ipv6_misc from vpp_sub_interface import VppSubInterface, VppDot1QSubint +from ipaddress import IPv6Network, IPv4Network AF_INET6 = socket.AF_INET6 @@ -445,7 +446,6 @@ def test_ns(self): self.pg0.sw_if_index, self.pg0.remote_hosts[2].mac, self.pg0.remote_hosts[2].ip6, - af=AF_INET6, is_no_fib_entry=1) nd_entry.add_vpp_config() @@ -454,8 +454,7 @@ def test_ns(self): # self.assertTrue(find_nbr(self, self.pg0.sw_if_index, - self.pg0._remote_hosts[2].ip6, - inet=AF_INET6)) + self.pg0._remote_hosts[2].ip6)) self.assertFalse(find_route(self, self.pg0._remote_hosts[2].ip6, 128, @@ -483,8 +482,7 @@ def test_ns(self): # self.assertTrue(find_nbr(self, self.pg0.sw_if_index, - self.pg0._remote_hosts[2].ip6_ll, - inet=AF_INET6)) + self.pg0._remote_hosts[2].ip6_ll)) self.assertFalse(find_route(self, self.pg0._remote_hosts[2].ip6_ll, 128, @@ -511,8 +509,7 @@ def test_ns(self): # self.assertTrue(find_nbr(self, self.pg0.sw_if_index, - self.pg0._remote_hosts[3].ip6_ll, - inet=AF_INET6)) + self.pg0._remote_hosts[3].ip6_ll)) self.assertFalse(find_route(self, self.pg0._remote_hosts[3].ip6_ll, 128, @@ -532,14 +529,12 @@ def test_ns_duplicates(self): ns_pg1 = VppNeighbor(self, self.pg1.sw_if_index, self.pg1.remote_hosts[1].mac, - self.pg1.remote_hosts[1].ip6, - af=AF_INET6) + self.pg1.remote_hosts[1].ip6) ns_pg1.add_vpp_config() ns_pg2 = VppNeighbor(self, self.pg2.sw_if_index, self.pg2.remote_mac, - self.pg1.remote_hosts[1].ip6, - af=AF_INET6) + self.pg1.remote_hosts[1].ip6) ns_pg2.add_vpp_config() # @@ -755,7 +750,7 @@ def test_rs(self): # # Configure The RA to announce the links prefix # - self.pg0.ip6_ra_prefix(self.pg0.local_ip6n, + self.pg0.ip6_ra_prefix(self.pg0.local_ip6, self.pg0.local_ip6_prefix_len) # @@ -781,7 +776,7 @@ def test_rs(self): # Change the prefix info to not off-link # L-flag is clear # - self.pg0.ip6_ra_prefix(self.pg0.local_ip6n, + self.pg0.ip6_ra_prefix(self.pg0.local_ip6, self.pg0.local_ip6_prefix_len, off_link=1) @@ -801,7 +796,7 @@ def test_rs(self): # Change the prefix info to not off-link, no-autoconfig # L and A flag are clear in the advert # - self.pg0.ip6_ra_prefix(self.pg0.local_ip6n, + self.pg0.ip6_ra_prefix(self.pg0.local_ip6, self.pg0.local_ip6_prefix_len, off_link=1, no_autoconfig=1) @@ -822,7 +817,7 @@ def test_rs(self): # Change the flag settings back to the defaults # L and A flag are set in the advert # - self.pg0.ip6_ra_prefix(self.pg0.local_ip6n, + self.pg0.ip6_ra_prefix(self.pg0.local_ip6, self.pg0.local_ip6_prefix_len) opt = ICMPv6NDOptPrefixInfo( @@ -841,7 +836,7 @@ def test_rs(self): # Change the prefix info to not off-link, no-autoconfig # L and A flag are clear in the advert # - self.pg0.ip6_ra_prefix(self.pg0.local_ip6n, + self.pg0.ip6_ra_prefix(self.pg0.local_ip6, self.pg0.local_ip6_prefix_len, off_link=1, no_autoconfig=1) @@ -862,7 +857,7 @@ def test_rs(self): # Use the reset to defults option to revert to defaults # L and A flag are clear in the advert # - self.pg0.ip6_ra_prefix(self.pg0.local_ip6n, + self.pg0.ip6_ra_prefix(self.pg0.local_ip6, self.pg0.local_ip6_prefix_len, use_default=1) @@ -881,7 +876,7 @@ def test_rs(self): # # Advertise Another prefix. With no L-flag/A-flag # - self.pg0.ip6_ra_prefix(self.pg1.local_ip6n, + self.pg0.ip6_ra_prefix(self.pg1.local_ip6, self.pg1.local_ip6_prefix_len, off_link=1, no_autoconfig=1) @@ -911,7 +906,7 @@ def test_rs(self): # Remove the first refix-info - expect the second is still in the # advert # - self.pg0.ip6_ra_prefix(self.pg0.local_ip6n, + self.pg0.ip6_ra_prefix(self.pg0.local_ip6, self.pg0.local_ip6_prefix_len, is_no=1) @@ -930,7 +925,7 @@ def test_rs(self): # # Remove the second prefix-info - expect no prefix-info i nthe adverts # - self.pg0.ip6_ra_prefix(self.pg1.local_ip6n, + self.pg0.ip6_ra_prefix(self.pg1.local_ip6, self.pg1.local_ip6_prefix_len, is_no=1) @@ -1064,11 +1059,13 @@ def test_rd_send_router_solicitation(self): self.pg1.local_mac) def verify_prefix_info(self, reported_prefix, prefix_option): - prefix = socket.inet_pton(socket.AF_INET6, - prefix_option.getfieldval("prefix")) - self.assert_equal(reported_prefix.dst_address, prefix) - self.assert_equal(reported_prefix.dst_address_length, - prefix_option.getfieldval("prefixlen")) + prefix = IPv6Network( + unicode(prefix_option.getfieldval("prefix") + + "/" + + str(prefix_option.getfieldval("prefixlen"))), + strict=False) + self.assert_equal(reported_prefix.prefix.network_address, + prefix.network_address) L = prefix_option.getfieldval("L") A = prefix_option.getfieldval("A") option_flags = (L << 7) | (A << 6) @@ -1366,8 +1363,7 @@ def test_nd_proxy(self): # self.assertTrue(find_nbr(self, self.pg1.sw_if_index, - self.pg0._remote_hosts[2].ip6, - inet=AF_INET6)) + self.pg0._remote_hosts[2].ip6)) # # ... and we can route traffic to it @@ -1428,8 +1424,7 @@ def test_nd_proxy(self): self.assertTrue(find_nbr(self, self.pg2.sw_if_index, - self.pg0._remote_hosts[3].ip6, - inet=AF_INET6)) + self.pg0._remote_hosts[3].ip6)) # # hosts can communicate. pg2->pg1 @@ -1469,12 +1464,10 @@ def test_nd_proxy(self): self.assertFalse(find_nbr(self, self.pg2.sw_if_index, - self.pg0._remote_hosts[3].ip6, - inet=AF_INET6)) + self.pg0._remote_hosts[3].ip6)) self.assertFalse(find_nbr(self, self.pg1.sw_if_index, - self.pg0._remote_hosts[2].ip6, - inet=AF_INET6)) + self.pg0._remote_hosts[2].ip6)) # # no longer proxy-ing... diff --git a/test/test_ipip.py b/test/test_ipip.py index e4a893bca516..cb2eaafb5fc0 100644 --- a/test/test_ipip.py +++ b/test/test_ipip.py @@ -250,6 +250,7 @@ def setUpClass(cls): cls.interfaces = list(cls.pg_interfaces) def setUp(self): + super(TestIPIP6, self).setUp() for i in self.interfaces: i.admin_up() i.config_ip4() diff --git a/test/test_ipsec_ah.py b/test/test_ipsec_ah.py index 928cd53c1f10..f8add0d3c9ca 100644 --- a/test/test_ipsec_ah.py +++ b/test/test_ipsec_ah.py @@ -6,6 +6,11 @@ from framework import VppTestRunner from template_ipsec import TemplateIpsec, IpsecTraTests, IpsecTunTests from template_ipsec import IpsecTcpTests +from vpp_ipsec import VppIpsecSA, VppIpsecSpd, VppIpsecSpdEntry,\ + VppIpsecSpdItfBinding +from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip import DpoProto +from vpp_papi import VppEnum class TemplateIpsecAh(TemplateIpsec): @@ -29,37 +34,45 @@ class TemplateIpsecAh(TemplateIpsec): --- --- --- """ - encryption_type = AH - - @classmethod - def setUpClass(cls): - super(TemplateIpsecAh, cls).setUpClass() - cls.tun_if = cls.pg0 - cls.tra_if = cls.pg2 - cls.logger.info(cls.vapi.ppcli("show int addr")) - cls.vapi.ipsec_spd_add_del(cls.tun_spd_id) - cls.vapi.ipsec_interface_add_del_spd(cls.tun_spd_id, - cls.tun_if.sw_if_index) - cls.vapi.ipsec_spd_add_del(cls.tra_spd_id) - cls.vapi.ipsec_interface_add_del_spd(cls.tra_spd_id, - cls.tra_if.sw_if_index) - for _, p in cls.params.items(): - cls.config_ah_tra(p) - cls.configure_sa_tra(p) - cls.logger.info(cls.vapi.ppcli("show ipsec")) - for _, p in cls.params.items(): - cls.config_ah_tun(p) - cls.logger.info(cls.vapi.ppcli("show ipsec")) - for _, p in cls.params.items(): - src = socket.inet_pton(p.addr_type, p.remote_tun_if_host) - cls.vapi.ip_add_del_route(src, p.addr_len, - cls.tun_if.remote_addr_n[p.addr_type], - is_ipv6=p.is_ipv6) - - @classmethod - def config_ah_tun(cls, params): + def setUp(self): + super(TemplateIpsecAh, self).setUp() + + self.encryption_type = AH + self.tun_if = self.pg0 + self.tra_if = self.pg2 + self.logger.info(self.vapi.ppcli("show int addr")) + + self.tra_spd = VppIpsecSpd(self, self.tra_spd_id) + self.tra_spd.add_vpp_config() + VppIpsecSpdItfBinding(self, self.tra_spd, + self.tra_if).add_vpp_config() + self.tun_spd = VppIpsecSpd(self, self.tun_spd_id) + self.tun_spd.add_vpp_config() + VppIpsecSpdItfBinding(self, self.tun_spd, + self.tun_if).add_vpp_config() + + for _, p in self.params.items(): + self.config_ah_tra(p) + self.configure_sa_tra(p) + self.logger.info(self.vapi.ppcli("show ipsec")) + for _, p in self.params.items(): + self.config_ah_tun(p) + self.logger.info(self.vapi.ppcli("show ipsec")) + for _, p in self.params.items(): + d = DpoProto.DPO_PROTO_IP6 if p.is_ipv6 else DpoProto.DPO_PROTO_IP4 + VppIpRoute(self, p.remote_tun_if_host, p.addr_len, + [VppRoutePath(self.tun_if.remote_addr[p.addr_type], + 0xffffffff, + proto=d)], + is_ip6=p.is_ipv6).add_vpp_config() + + def tearDown(self): + super(TemplateIpsecAh, self).tearDown() + if not self.vpp_dead: + self.vapi.cli("show hardware") + + def config_ah_tun(self, params): addr_type = params.addr_type - is_ipv6 = params.is_ipv6 scapy_tun_sa_id = params.scapy_tun_sa_id scapy_tun_spi = params.scapy_tun_spi vpp_tun_sa_id = params.vpp_tun_sa_id @@ -71,56 +84,69 @@ def config_ah_tun(cls, params): remote_tun_if_host = params.remote_tun_if_host addr_any = params.addr_any addr_bcast = params.addr_bcast - cls.vapi.ipsec_sad_add_del_entry(scapy_tun_sa_id, scapy_tun_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_ah_protocol, - cls.tun_if.local_addr_n[addr_type], - cls.tun_if.remote_addr_n[addr_type], - is_tunnel=1, is_tunnel_ipv6=is_ipv6) - cls.vapi.ipsec_sad_add_del_entry(vpp_tun_sa_id, vpp_tun_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_ah_protocol, - cls.tun_if.remote_addr_n[addr_type], - cls.tun_if.local_addr_n[addr_type], - is_tunnel=1, is_tunnel_ipv6=is_ipv6) - l_startaddr = r_startaddr = socket.inet_pton(addr_type, addr_any) - l_stopaddr = r_stopaddr = socket.inet_pton(addr_type, addr_bcast) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, vpp_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, is_ipv6=is_ipv6, - protocol=socket.IPPROTO_AH) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, vpp_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, is_outbound=0, - is_ipv6=is_ipv6, - protocol=socket.IPPROTO_AH) - l_startaddr = l_stopaddr = socket.inet_pton(addr_type, - remote_tun_if_host) - r_startaddr = r_stopaddr = cls.pg1.remote_addr_n[addr_type] - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, vpp_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, priority=10, policy=3, - is_outbound=0, is_ipv6=is_ipv6) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - r_startaddr, r_stopaddr, l_startaddr, - l_stopaddr, priority=10, policy=3, - is_ipv6=is_ipv6) - r_startaddr = r_stopaddr = cls.pg0.local_addr_n[addr_type] - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, vpp_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, priority=20, policy=3, - is_outbound=0, is_ipv6=is_ipv6) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - r_startaddr, r_stopaddr, l_startaddr, - l_stopaddr, priority=20, policy=3, - is_ipv6=is_ipv6) - - @classmethod - def config_ah_tra(cls, params): + e = VppEnum.vl_api_ipsec_spd_action_t + + VppIpsecSA(self, scapy_tun_sa_id, scapy_tun_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_ah_protocol, + self.tun_if.local_addr[addr_type], + self.tun_if.remote_addr[addr_type]).add_vpp_config() + VppIpsecSA(self, vpp_tun_sa_id, vpp_tun_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_ah_protocol, + self.tun_if.remote_addr[addr_type], + self.tun_if.local_addr[addr_type]).add_vpp_config() + + params.spd_policy_in_any = VppIpsecSpdEntry(self, self.tun_spd, + vpp_tun_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_AH) + params.spd_policy_in_any.add_vpp_config() + params.spd_policy_out_any = VppIpsecSpdEntry(self, self.tun_spd, + vpp_tun_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_AH, + is_outbound=0) + params.spd_policy_out_any.add_vpp_config() + + VppIpsecSpdEntry(self, self.tun_spd, vpp_tun_sa_id, + remote_tun_if_host, + remote_tun_if_host, + self.pg1.remote_addr[addr_type], + self.pg1.remote_addr[addr_type], + 0, priority=10, + policy=e.IPSEC_API_SPD_ACTION_PROTECT, + is_outbound=0).add_vpp_config() + VppIpsecSpdEntry(self, self.tun_spd, scapy_tun_sa_id, + self.pg1.remote_addr[addr_type], + self.pg1.remote_addr[addr_type], + remote_tun_if_host, + remote_tun_if_host, + 0, policy=e.IPSEC_API_SPD_ACTION_PROTECT, + priority=10).add_vpp_config() + + VppIpsecSpdEntry(self, self.tun_spd, vpp_tun_sa_id, + remote_tun_if_host, + remote_tun_if_host, + self.pg0.local_addr[addr_type], + self.pg0.local_addr[addr_type], + 0, priority=20, + policy=e.IPSEC_API_SPD_ACTION_PROTECT, + is_outbound=0).add_vpp_config() + VppIpsecSpdEntry(self, self.tun_spd, scapy_tun_sa_id, + self.pg0.local_addr[addr_type], + self.pg0.local_addr[addr_type], + remote_tun_if_host, + remote_tun_if_host, + 0, policy=e.IPSEC_API_SPD_ACTION_PROTECT, + priority=20).add_vpp_config() + + def config_ah_tra(self, params): addr_type = params.addr_type - is_ipv6 = params.is_ipv6 scapy_tra_sa_id = params.scapy_tra_sa_id scapy_tra_spi = params.scapy_tra_spi vpp_tra_sa_id = params.vpp_tra_sa_id @@ -131,44 +157,46 @@ def config_ah_tra(cls, params): crypt_key = params.crypt_key addr_any = params.addr_any addr_bcast = params.addr_bcast - cls.vapi.ipsec_sad_add_del_entry(scapy_tra_sa_id, scapy_tra_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_ah_protocol, is_tunnel=0, - is_tunnel_ipv6=0, - use_anti_replay=1) - cls.vapi.ipsec_sad_add_del_entry(vpp_tra_sa_id, vpp_tra_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_ah_protocol, is_tunnel=0, - is_tunnel_ipv6=0, - use_anti_replay=1) - l_startaddr = r_startaddr = socket.inet_pton(addr_type, addr_any) - l_stopaddr = r_stopaddr = socket.inet_pton(addr_type, addr_bcast) - cls.vapi.ipsec_spd_add_del_entry(cls.tra_spd_id, vpp_tra_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, is_ipv6=is_ipv6, - protocol=socket.IPPROTO_AH) - cls.vapi.ipsec_spd_add_del_entry(cls.tra_spd_id, scapy_tra_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, is_outbound=0, - is_ipv6=is_ipv6, - protocol=socket.IPPROTO_AH) - l_startaddr = l_stopaddr = cls.tra_if.local_addr_n[addr_type] - r_startaddr = r_stopaddr = cls.tra_if.remote_addr_n[addr_type] - cls.vapi.ipsec_spd_add_del_entry(cls.tra_spd_id, vpp_tra_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, priority=10, policy=3, - is_outbound=0, is_ipv6=is_ipv6) - cls.vapi.ipsec_spd_add_del_entry(cls.tra_spd_id, scapy_tra_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, priority=10, - policy=3, is_ipv6=is_ipv6) - - def tearDown(self): - super(TemplateIpsecAh, self).tearDown() - if not self.vpp_dead: - self.vapi.cli("show hardware") + flags = (VppEnum.vl_api_ipsec_sad_flags_t. + IPSEC_API_SAD_FLAG_USE_ANTI_REPLAY) + e = VppEnum.vl_api_ipsec_spd_action_t + + VppIpsecSA(self, scapy_tra_sa_id, scapy_tra_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_ah_protocol, + flags=flags).add_vpp_config() + VppIpsecSA(self, vpp_tra_sa_id, vpp_tra_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_ah_protocol, + flags=flags).add_vpp_config() + + VppIpsecSpdEntry(self, self.tra_spd, vpp_tra_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_AH).add_vpp_config() + VppIpsecSpdEntry(self, self.tra_spd, scapy_tra_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_AH, + is_outbound=0).add_vpp_config() + + VppIpsecSpdEntry(self, self.tra_spd, vpp_tra_sa_id, + self.tra_if.local_addr[addr_type], + self.tra_if.local_addr[addr_type], + self.tra_if.remote_addr[addr_type], + self.tra_if.remote_addr[addr_type], + 0, priority=10, + policy=e.IPSEC_API_SPD_ACTION_PROTECT, + is_outbound=0).add_vpp_config() + VppIpsecSpdEntry(self, self.tra_spd, scapy_tra_sa_id, + self.tra_if.local_addr[addr_type], + self.tra_if.local_addr[addr_type], + self.tra_if.remote_addr[addr_type], + self.tra_if.remote_addr[addr_type], + 0, policy=e.IPSEC_API_SPD_ACTION_PROTECT, + priority=10).add_vpp_config() class TestIpsecAh1(TemplateIpsecAh, IpsecTraTests, IpsecTunTests): diff --git a/test/test_ipsec_api.py b/test/test_ipsec_api.py index fed996e6a593..8aea42ab3df7 100644 --- a/test/test_ipsec_api.py +++ b/test/test_ipsec_api.py @@ -1,47 +1,58 @@ import unittest from framework import VppTestCase, VppTestRunner -from template_ipsec import TemplateIpsec +from template_ipsec import TemplateIpsec, IPsecIPv4Params +from vpp_papi import VppEnum class IpsecApiTestCase(VppTestCase): """ IPSec API tests """ - @classmethod - def setUpClass(cls): - super(IpsecApiTestCase, cls).setUpClass() - cls.create_pg_interfaces([0]) - cls.pg0.config_ip4() - cls.pg0.admin_up() + def setUp(self): + super(IpsecApiTestCase, self).setUp() + self.create_pg_interfaces([0]) + self.pg0.config_ip4() + self.pg0.admin_up() + + self.vpp_esp_protocol = (VppEnum.vl_api_ipsec_proto_t. + IPSEC_API_PROTO_ESP) + self.vpp_ah_protocol = (VppEnum.vl_api_ipsec_proto_t. + IPSEC_API_PROTO_AH) + self.ipv4_params = IPsecIPv4Params() + + def tearDown(self): + self.pg0.unconfig_ip4() + self.pg0.admin_down() + super(IpsecApiTestCase, self).tearDown() def test_backend_dump(self): """ backend dump """ d = self.vapi.ipsec_backend_dump() self.assert_equal(len(d), 2, "number of ipsec backends in dump") - self.assert_equal(d[0].protocol, TemplateIpsec.vpp_ah_protocol, + self.assert_equal(d[0].protocol, self.vpp_ah_protocol, "ipsec protocol in dump entry") self.assert_equal(d[0].index, 0, "index in dump entry") self.assert_equal(d[0].active, 1, "active flag in dump entry") - self.assert_equal(d[1].protocol, TemplateIpsec.vpp_esp_protocol, + self.assert_equal(d[1].protocol, self.vpp_esp_protocol, "ipsec protocol in dump entry") self.assert_equal(d[1].index, 0, "index in dump entry") self.assert_equal(d[1].active, 1, "active flag in dump entry") def test_select_valid_backend(self): """ select valid backend """ - self.vapi.ipsec_select_backend(TemplateIpsec.vpp_ah_protocol, 0) - self.vapi.ipsec_select_backend(TemplateIpsec.vpp_esp_protocol, 0) + self.vapi.ipsec_select_backend(self.vpp_ah_protocol, 0) + self.vapi.ipsec_select_backend(self.vpp_esp_protocol, 0) def test_select_invalid_backend(self): """ select invalid backend """ with self.vapi.assert_negative_api_retval(): - self.vapi.ipsec_select_backend(TemplateIpsec.vpp_ah_protocol, 200) + self.vapi.ipsec_select_backend(self.vpp_ah_protocol, 200) with self.vapi.assert_negative_api_retval(): - self.vapi.ipsec_select_backend(TemplateIpsec.vpp_esp_protocol, 200) + self.vapi.ipsec_select_backend(self.vpp_esp_protocol, 200) def test_select_backend_in_use(self): """ attempt to change backend while sad configured """ - params = TemplateIpsec.ipv4_params + params = self.ipv4_params addr_type = params.addr_type is_ipv6 = params.is_ipv6 scapy_tun_sa_id = params.scapy_tun_sa_id @@ -51,27 +62,25 @@ def test_select_backend_in_use(self): crypt_algo_vpp_id = params.crypt_algo_vpp_id crypt_key = params.crypt_key - self.vapi.ipsec_sad_add_del_entry(scapy_tun_sa_id, scapy_tun_spi, + self.vapi.ipsec_sad_entry_add_del(scapy_tun_sa_id, scapy_tun_spi, auth_algo_vpp_id, auth_key, crypt_algo_vpp_id, crypt_key, - TemplateIpsec.vpp_ah_protocol, - self.pg0.local_addr_n[addr_type], - self.pg0.remote_addr_n[addr_type], - is_tunnel=1, is_tunnel_ipv6=is_ipv6) + self.vpp_ah_protocol, + self.pg0.local_addr[addr_type], + self.pg0.remote_addr[addr_type]) with self.vapi.assert_negative_api_retval(): self.vapi.ipsec_select_backend( - protocol=TemplateIpsec.vpp_ah_protocol, index=0) + protocol=self.vpp_ah_protocol, index=0) - self.vapi.ipsec_sad_add_del_entry(scapy_tun_sa_id, scapy_tun_spi, + self.vapi.ipsec_sad_entry_add_del(scapy_tun_sa_id, scapy_tun_spi, auth_algo_vpp_id, auth_key, crypt_algo_vpp_id, crypt_key, - TemplateIpsec.vpp_ah_protocol, - self.pg0.local_addr_n[addr_type], - self.pg0.remote_addr_n[addr_type], - is_tunnel=1, is_tunnel_ipv6=is_ipv6, + self.vpp_ah_protocol, + self.pg0.local_addr[addr_type], + self.pg0.remote_addr[addr_type], is_add=0) self.vapi.ipsec_select_backend( - protocol=TemplateIpsec.vpp_ah_protocol, index=0) + protocol=self.vpp_ah_protocol, index=0) if __name__ == '__main__': diff --git a/test/test_ipsec_esp.py b/test/test_ipsec_esp.py index d22f965a31b8..ba67b60a08ed 100644 --- a/test/test_ipsec_esp.py +++ b/test/test_ipsec_esp.py @@ -5,6 +5,11 @@ from framework import VppTestRunner from template_ipsec import IpsecTraTests, IpsecTunTests from template_ipsec import TemplateIpsec, IpsecTcpTests +from vpp_ipsec import VppIpsecSpd, VppIpsecSpdEntry, VppIpsecSA,\ + VppIpsecSpdItfBinding +from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip import DpoProto +from vpp_papi import VppEnum class TemplateIpsecEsp(TemplateIpsec): @@ -38,37 +43,47 @@ class TemplateIpsecEsp(TemplateIpsec): --- --- --- """ - encryption_type = ESP - - @classmethod - def setUpClass(cls): - super(TemplateIpsecEsp, cls).setUpClass() - cls.tun_if = cls.pg0 - cls.tra_if = cls.pg2 - cls.logger.info(cls.vapi.ppcli("show int addr")) - cls.vapi.ipsec_spd_add_del(cls.tra_spd_id) - cls.vapi.ipsec_interface_add_del_spd(cls.tra_spd_id, - cls.tra_if.sw_if_index) - for _, p in cls.params.items(): - cls.config_esp_tra(p) - cls.configure_sa_tra(p) - cls.logger.info(cls.vapi.ppcli("show ipsec")) - cls.vapi.ipsec_spd_add_del(cls.tun_spd_id) - cls.vapi.ipsec_interface_add_del_spd(cls.tun_spd_id, - cls.tun_if.sw_if_index) - for _, p in cls.params.items(): - cls.config_esp_tun(p) - cls.logger.info(cls.vapi.ppcli("show ipsec")) - for _, p in cls.params.items(): - src = socket.inet_pton(p.addr_type, p.remote_tun_if_host) - cls.vapi.ip_add_del_route( - src, p.addr_len, cls.tun_if.remote_addr_n[p.addr_type], - is_ipv6=p.is_ipv6) - - @classmethod - def config_esp_tun(cls, params): + def setUp(self): + super(TemplateIpsecEsp, self).setUp() + self.encryption_type = ESP + self.tun_if = self.pg0 + self.tra_if = self.pg2 + self.logger.info(self.vapi.ppcli("show int addr")) + + self.tra_spd = VppIpsecSpd(self, self.tra_spd_id) + self.tra_spd.add_vpp_config() + VppIpsecSpdItfBinding(self, self.tra_spd, + self.tra_if).add_vpp_config() + + for _, p in self.params.items(): + self.config_esp_tra(p) + self.configure_sa_tra(p) + self.logger.info(self.vapi.ppcli("show ipsec")) + + self.tun_spd = VppIpsecSpd(self, self.tun_spd_id) + self.tun_spd.add_vpp_config() + VppIpsecSpdItfBinding(self, self.tun_spd, + self.tun_if).add_vpp_config() + + for _, p in self.params.items(): + self.config_esp_tun(p) + self.logger.info(self.vapi.ppcli("show ipsec")) + + for _, p in self.params.items(): + d = DpoProto.DPO_PROTO_IP6 if p.is_ipv6 else DpoProto.DPO_PROTO_IP4 + VppIpRoute(self, p.remote_tun_if_host, p.addr_len, + [VppRoutePath(self.tun_if.remote_addr[p.addr_type], + 0xffffffff, + proto=d)], + is_ip6=p.is_ipv6).add_vpp_config() + + def tearDown(self): + super(TemplateIpsecEsp, self).tearDown() + if not self.vpp_dead: + self.vapi.cli("show hardware") + + def config_esp_tun(self, params): addr_type = params.addr_type - is_ipv6 = params.is_ipv6 scapy_tun_sa_id = params.scapy_tun_sa_id scapy_tun_spi = params.scapy_tun_spi vpp_tun_sa_id = params.vpp_tun_sa_id @@ -80,58 +95,69 @@ def config_esp_tun(cls, params): remote_tun_if_host = params.remote_tun_if_host addr_any = params.addr_any addr_bcast = params.addr_bcast - cls.vapi.ipsec_sad_add_del_entry(scapy_tun_sa_id, scapy_tun_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_esp_protocol, - cls.tun_if.local_addr_n[addr_type], - cls.tun_if.remote_addr_n[addr_type], - is_tunnel=1, is_tunnel_ipv6=is_ipv6) - cls.vapi.ipsec_sad_add_del_entry(vpp_tun_sa_id, vpp_tun_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_esp_protocol, - cls.tun_if.remote_addr_n[addr_type], - cls.tun_if.local_addr_n[addr_type], - is_tunnel=1, is_tunnel_ipv6=is_ipv6) - l_startaddr = r_startaddr = socket.inet_pton(addr_type, addr_any) - l_stopaddr = r_stopaddr = socket.inet_pton(addr_type, addr_bcast) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, is_ipv6=is_ipv6, - protocol=socket.IPPROTO_ESP) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, is_outbound=0, - protocol=socket.IPPROTO_ESP, - is_ipv6=is_ipv6) - l_startaddr = l_stopaddr = socket.inet_pton(addr_type, - remote_tun_if_host) - r_startaddr = r_stopaddr = cls.pg1.remote_addr_n[addr_type] - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, vpp_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, priority=10, policy=3, - is_ipv6=is_ipv6, is_outbound=0) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - r_startaddr, r_stopaddr, l_startaddr, - l_stopaddr, priority=10, policy=3, - is_ipv6=is_ipv6) - l_startaddr = l_stopaddr = socket.inet_pton(addr_type, - remote_tun_if_host) - r_startaddr = r_stopaddr = cls.pg0.local_addr_n[addr_type] - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, vpp_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, priority=20, policy=3, - is_outbound=0, is_ipv6=is_ipv6) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - r_startaddr, r_stopaddr, l_startaddr, - l_stopaddr, priority=20, policy=3, - is_ipv6=is_ipv6) - - @classmethod - def config_esp_tra(cls, params): + e = VppEnum.vl_api_ipsec_spd_action_t + + VppIpsecSA(self, scapy_tun_sa_id, scapy_tun_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_esp_protocol, + self.tun_if.local_addr[addr_type], + self.tun_if.remote_addr[addr_type]).add_vpp_config() + VppIpsecSA(self, vpp_tun_sa_id, vpp_tun_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_esp_protocol, + self.tun_if.remote_addr[addr_type], + self.tun_if.local_addr[addr_type]).add_vpp_config() + + params.spd_policy_in_any = VppIpsecSpdEntry(self, self.tun_spd, + scapy_tun_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_ESP) + params.spd_policy_in_any.add_vpp_config() + params.spd_policy_out_any = VppIpsecSpdEntry(self, self.tun_spd, + scapy_tun_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_ESP, + is_outbound=0) + params.spd_policy_out_any.add_vpp_config() + + VppIpsecSpdEntry(self, self.tun_spd, vpp_tun_sa_id, + remote_tun_if_host, remote_tun_if_host, + self.pg1.remote_addr[addr_type], + self.pg1.remote_addr[addr_type], + 0, + priority=10, + policy=e.IPSEC_API_SPD_ACTION_PROTECT, + is_outbound=0).add_vpp_config() + VppIpsecSpdEntry(self, self.tun_spd, scapy_tun_sa_id, + self.pg1.remote_addr[addr_type], + self.pg1.remote_addr[addr_type], + remote_tun_if_host, remote_tun_if_host, + 0, + policy=e.IPSEC_API_SPD_ACTION_PROTECT, + priority=10).add_vpp_config() + + VppIpsecSpdEntry(self, self.tun_spd, vpp_tun_sa_id, + remote_tun_if_host, remote_tun_if_host, + self.pg0.local_addr[addr_type], + self.pg0.local_addr[addr_type], + 0, + priority=20, + policy=e.IPSEC_API_SPD_ACTION_PROTECT, + is_outbound=0).add_vpp_config() + VppIpsecSpdEntry(self, self.tun_spd, scapy_tun_sa_id, + self.pg0.local_addr[addr_type], + self.pg0.local_addr[addr_type], + remote_tun_if_host, remote_tun_if_host, + 0, + policy=e.IPSEC_API_SPD_ACTION_PROTECT, + priority=20).add_vpp_config() + + def config_esp_tra(self, params): addr_type = params.addr_type - is_ipv6 = params.is_ipv6 scapy_tra_sa_id = params.scapy_tra_sa_id scapy_tra_spi = params.scapy_tra_spi vpp_tra_sa_id = params.vpp_tra_sa_id @@ -142,37 +168,46 @@ def config_esp_tra(cls, params): crypt_key = params.crypt_key addr_any = params.addr_any addr_bcast = params.addr_bcast - cls.vapi.ipsec_sad_add_del_entry(scapy_tra_sa_id, scapy_tra_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_esp_protocol, is_tunnel=0, - use_anti_replay=1) - cls.vapi.ipsec_sad_add_del_entry(vpp_tra_sa_id, vpp_tra_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_esp_protocol, is_tunnel=0, - use_anti_replay=1) - l_startaddr = r_startaddr = socket.inet_pton(addr_type, addr_any) - l_stopaddr = r_stopaddr = socket.inet_pton(addr_type, addr_bcast) - cls.vapi.ipsec_spd_add_del_entry(cls.tra_spd_id, vpp_tra_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, is_ipv6=is_ipv6, - protocol=socket.IPPROTO_ESP) - cls.vapi.ipsec_spd_add_del_entry(cls.tra_spd_id, vpp_tra_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, is_outbound=0, - is_ipv6=is_ipv6, - protocol=socket.IPPROTO_ESP) - l_startaddr = l_stopaddr = cls.tra_if.local_addr_n[addr_type] - r_startaddr = r_stopaddr = cls.tra_if.remote_addr_n[addr_type] - cls.vapi.ipsec_spd_add_del_entry(cls.tra_spd_id, vpp_tra_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, priority=10, policy=3, - is_outbound=0, is_ipv6=is_ipv6) - cls.vapi.ipsec_spd_add_del_entry(cls.tra_spd_id, scapy_tra_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, priority=10, policy=3, - is_ipv6=is_ipv6) + flags = (VppEnum.vl_api_ipsec_sad_flags_t. + IPSEC_API_SAD_FLAG_USE_ANTI_REPLAY) + e = VppEnum.vl_api_ipsec_spd_action_t + + VppIpsecSA(self, scapy_tra_sa_id, scapy_tra_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_esp_protocol, + flags=flags).add_vpp_config() + VppIpsecSA(self, vpp_tra_sa_id, vpp_tra_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_esp_protocol, + flags=flags).add_vpp_config() + + VppIpsecSpdEntry(self, self.tra_spd, vpp_tra_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_ESP).add_vpp_config() + VppIpsecSpdEntry(self, self.tra_spd, vpp_tra_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_ESP, + is_outbound=0).add_vpp_config() + + VppIpsecSpdEntry(self, self.tra_spd, vpp_tra_sa_id, + self.tra_if.local_addr[addr_type], + self.tra_if.local_addr[addr_type], + self.tra_if.remote_addr[addr_type], + self.tra_if.remote_addr[addr_type], + 0, priority=10, + policy=e.IPSEC_API_SPD_ACTION_PROTECT, + is_outbound=0).add_vpp_config() + VppIpsecSpdEntry(self, self.tra_spd, scapy_tra_sa_id, + self.tra_if.local_addr[addr_type], + self.tra_if.local_addr[addr_type], + self.tra_if.remote_addr[addr_type], + self.tra_if.remote_addr[addr_type], + 0, policy=e.IPSEC_API_SPD_ACTION_PROTECT, + priority=10).add_vpp_config() class TestIpsecEsp1(TemplateIpsecEsp, IpsecTraTests, IpsecTunTests): diff --git a/test/test_ipsec_nat.py b/test/test_ipsec_nat.py index e9efa032a13e..cdb9cb438f22 100644 --- a/test/test_ipsec_nat.py +++ b/test/test_ipsec_nat.py @@ -7,6 +7,11 @@ from scapy.layers.ipsec import SecurityAssociation, ESP from util import ppp, ppc from template_ipsec import TemplateIpsec +from vpp_ipsec import VppIpsecSA, VppIpsecSpd, VppIpsecSpdEntry,\ + VppIpsecSpdItfBinding +from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip import DpoProto +from vpp_papi import VppEnum class IPSecNATTestCase(TemplateIpsec): @@ -31,20 +36,28 @@ class IPSecNATTestCase(TemplateIpsec): icmp_id_in = 6305 icmp_id_out = 6305 - @classmethod - def setUpClass(cls): - super(IPSecNATTestCase, cls).setUpClass() - cls.tun_if = cls.pg0 - cls.vapi.ipsec_spd_add_del(cls.tun_spd_id) - cls.vapi.ipsec_interface_add_del_spd(cls.tun_spd_id, - cls.tun_if.sw_if_index) - p = cls.ipv4_params - cls.config_esp_tun(p) - cls.logger.info(cls.vapi.ppcli("show ipsec")) - src = socket.inet_pton(p.addr_type, p.remote_tun_if_host) - cls.vapi.ip_add_del_route(src, p.addr_len, - cls.tun_if.remote_addr_n[p.addr_type], - is_ipv6=p.is_ipv6) + def setUp(self): + super(IPSecNATTestCase, self).setUp() + self.tun_if = self.pg0 + + self.tun_spd = VppIpsecSpd(self, self.tun_spd_id) + self.tun_spd.add_vpp_config() + VppIpsecSpdItfBinding(self, self.tun_spd, + self.tun_if).add_vpp_config() + + p = self.ipv4_params + self.config_esp_tun(p) + self.logger.info(self.vapi.ppcli("show ipsec")) + + d = DpoProto.DPO_PROTO_IP6 if p.is_ipv6 else DpoProto.DPO_PROTO_IP4 + VppIpRoute(self, p.remote_tun_if_host, p.addr_len, + [VppRoutePath(self.tun_if.remote_addr[p.addr_type], + 0xffffffff, + proto=d)], + is_ip6=p.is_ipv6).add_vpp_config() + + def tearDown(self): + super(IPSecNATTestCase, self).tearDown() def create_stream_plain(self, src_mac, dst_mac, src_ip, dst_ip): return [ @@ -131,8 +144,7 @@ def verify_capture_encrypted(self, capture, sa): ppp("Unexpected or invalid encrypted packet:", packet)) raise - @classmethod - def config_esp_tun(cls, params): + def config_esp_tun(self, params): addr_type = params.addr_type scapy_tun_sa_id = params.scapy_tun_sa_id scapy_tun_spi = params.scapy_tun_spi @@ -144,50 +156,62 @@ def config_esp_tun(cls, params): crypt_key = params.crypt_key addr_any = params.addr_any addr_bcast = params.addr_bcast - cls.vapi.ipsec_sad_add_del_entry(scapy_tun_sa_id, scapy_tun_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_esp_protocol, - cls.pg1.remote_addr_n[addr_type], - cls.tun_if.remote_addr_n[addr_type], - udp_encap=1) - cls.vapi.ipsec_sad_add_del_entry(vpp_tun_sa_id, vpp_tun_spi, - auth_algo_vpp_id, auth_key, - crypt_algo_vpp_id, crypt_key, - cls.vpp_esp_protocol, - cls.tun_if.remote_addr_n[addr_type], - cls.pg1.remote_addr_n[addr_type], - udp_encap=1) - l_startaddr = r_startaddr = socket.inet_pton(addr_type, addr_any) - l_stopaddr = r_stopaddr = socket.inet_pton(addr_type, addr_bcast) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, - protocol=socket.IPPROTO_ESP) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, is_outbound=0, - protocol=socket.IPPROTO_ESP) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, remote_port_start=4500, - remote_port_stop=4500, - protocol=socket.IPPROTO_UDP) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, remote_port_start=4500, - remote_port_stop=4500, - protocol=socket.IPPROTO_UDP, - is_outbound=0) - l_startaddr = l_stopaddr = cls.tun_if.remote_addr_n[addr_type] - r_startaddr = r_stopaddr = cls.pg1.remote_addr_n[addr_type] - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, vpp_tun_sa_id, - l_startaddr, l_stopaddr, r_startaddr, - r_stopaddr, priority=10, policy=3, - is_outbound=0) - cls.vapi.ipsec_spd_add_del_entry(cls.tun_spd_id, scapy_tun_sa_id, - r_startaddr, r_stopaddr, l_startaddr, - l_stopaddr, priority=10, policy=3) + flags = (VppEnum.vl_api_ipsec_sad_flags_t. + IPSEC_API_SAD_FLAG_UDP_ENCAP) + e = VppEnum.vl_api_ipsec_spd_action_t + + VppIpsecSA(self, scapy_tun_sa_id, scapy_tun_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_esp_protocol, + self.pg1.remote_addr[addr_type], + self.tun_if.remote_addr[addr_type], + flags=flags).add_vpp_config() + VppIpsecSA(self, vpp_tun_sa_id, vpp_tun_spi, + auth_algo_vpp_id, auth_key, + crypt_algo_vpp_id, crypt_key, + self.vpp_esp_protocol, + self.tun_if.remote_addr[addr_type], + self.pg1.remote_addr[addr_type], + flags=flags).add_vpp_config() + + VppIpsecSpdEntry(self, self.tun_spd, scapy_tun_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_ESP).add_vpp_config() + VppIpsecSpdEntry(self, self.tun_spd, scapy_tun_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_ESP, + is_outbound=0).add_vpp_config() + VppIpsecSpdEntry(self, self.tun_spd, scapy_tun_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_UDP, + remote_port_start=4500, + remote_port_stop=4500).add_vpp_config() + VppIpsecSpdEntry(self, self.tun_spd, scapy_tun_sa_id, + addr_any, addr_bcast, + addr_any, addr_bcast, + socket.IPPROTO_UDP, + remote_port_start=4500, + remote_port_stop=4500, + is_outbound=0).add_vpp_config() + VppIpsecSpdEntry(self, self.tun_spd, vpp_tun_sa_id, + self.tun_if.remote_addr[addr_type], + self.tun_if.remote_addr[addr_type], + self.pg1.remote_addr[addr_type], + self.pg1.remote_addr[addr_type], + 0, priority=10, + policy=e.IPSEC_API_SPD_ACTION_PROTECT, + is_outbound=0).add_vpp_config() + VppIpsecSpdEntry(self, self.tun_spd, scapy_tun_sa_id, + self.pg1.remote_addr[addr_type], + self.pg1.remote_addr[addr_type], + self.tun_if.remote_addr[addr_type], + self.tun_if.remote_addr[addr_type], + 0, policy=e.IPSEC_API_SPD_ACTION_PROTECT, + priority=10).add_vpp_config() def test_ipsec_nat_tun(self): """ IPSec/NAT tunnel test case """ diff --git a/test/test_ipsec_tun_if_esp.py b/test/test_ipsec_tun_if_esp.py index e10e2a3cfcbc..2734908e7a16 100644 --- a/test/test_ipsec_tun_if_esp.py +++ b/test/test_ipsec_tun_if_esp.py @@ -4,6 +4,7 @@ from framework import VppTestRunner from template_ipsec import TemplateIpsec, IpsecTun4Tests, IpsecTcpTests from vpp_ipsec_tun_interface import VppIpsecTunInterface +from vpp_ip_route import VppIpRoute, VppRoutePath class TemplateIpsecTunIfEsp(TemplateIpsec): @@ -11,12 +12,11 @@ class TemplateIpsecTunIfEsp(TemplateIpsec): encryption_type = ESP - @classmethod - def setUpClass(cls): - super(TemplateIpsecTunIfEsp, cls).setUpClass() - cls.tun_if = cls.pg0 - def setUp(self): + super(TemplateIpsecTunIfEsp, self).setUp() + + self.tun_if = self.pg0 + p = self.ipv4_params tun_if = VppIpsecTunInterface(self, self.pg0, p.vpp_tun_spi, p.scapy_tun_spi, p.crypt_algo_vpp_id, @@ -26,8 +26,10 @@ def setUp(self): tun_if.add_vpp_config() tun_if.admin_up() tun_if.config_ip4() - src4 = socket.inet_pton(socket.AF_INET, p.remote_tun_if_host) - self.vapi.ip_add_del_route(src4, 32, tun_if.remote_ip4n) + + VppIpRoute(self, p.remote_tun_if_host, 32, + [VppRoutePath(tun_if.remote_ip4, + 0xffffffff)]).add_vpp_config() def tearDown(self): if not self.vpp_dead: diff --git a/test/test_jvpp.py b/test/test_jvpp.py index b716af392056..9a58ca42383f 100644 --- a/test/test_jvpp.py +++ b/test/test_jvpp.py @@ -17,6 +17,14 @@ class TestJVpp(VppTestCase): """ JVPP Core Test Case """ + @classmethod + def setUpClass(cls): + super(TestJVpp, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestJVpp, cls).tearDownClass() + def invoke_for_jvpp_core(self, api_jar_name, test_class_name): self.jvpp_connection_test(api_jar_name=api_jar_name, test_class_name=test_class_name, diff --git a/test/test_l2bd_arp_term.py b/test/test_l2bd_arp_term.py index 2321aa7c094f..6d01daacc2ff 100644 --- a/test/test_l2bd_arp_term.py +++ b/test/test_l2bd_arp_term.py @@ -163,15 +163,13 @@ def inttoip4(ip): return '%s.%s.%s.%s' % (o1, o2, o3, o4) def arp_event_host(self, e): - return Host(mac=':'.join(['%02x' % ord(char) for char in e.new_mac]), - ip4=self.inttoip4(e.address)) + return Host(str(e.mac), ip4=str(e.ip)) def arp_event_hosts(self, evs): return {self.arp_event_host(e) for e in evs} def nd_event_host(self, e): - return Host(mac=':'.join(['%02x' % ord(char) for char in e.new_mac]), - ip6=inet_ntop(AF_INET6, e.address)) + return Host(str(e.mac), ip6=str(e.ip)) def nd_event_hosts(self, evs): return {self.nd_event_host(e) for e in evs} @@ -439,7 +437,7 @@ def test_l2bd_arp_term_11(self): def test_l2bd_arp_term_12(self): """ L2BD ND term - send NS packets verify reports """ - self.vapi.want_ip6_nd_events(address=inet_pton(AF_INET6, "::0")) + self.vapi.want_ip6_nd_events(ip="::") dst_host = self.ip6_host(50, 50, "00:00:11:22:33:44") self.bd_add_del(1, is_add=1) self.set_bd_flags(1, arp_term=True, flood=False, @@ -475,8 +473,7 @@ def test_l2bd_arp_term_13(self): def test_l2bd_arp_term_14(self): """ L2BD ND term - disable ip4 arp events,send ns, verify no events """ - self.vapi.want_ip6_nd_events(enable_disable=0, - address=inet_pton(AF_INET6, "::0")) + self.vapi.want_ip6_nd_events(enable_disable=0, ip="::") dst_host = self.ip6_host(50, 50, "00:00:11:22:33:44") macs = self.mac_list(range(10, 15)) hosts = self.ip6_hosts(5, 1, macs) diff --git a/test/test_nat.py b/test/test_nat.py index c64359a4db74..0d74cb60f87a 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -23,6 +23,7 @@ from syslog_rfc5424_parser.constants import SyslogFacility, SyslogSeverity from vpp_papi_provider import SYSLOG_SEVERITY from io import BytesIO +from vpp_papi import VppEnum class MethodHolder(VppTestCase): @@ -48,12 +49,13 @@ def clear_nat44(self): is_add=0) for intf in [self.pg7, self.pg8]: - neighbors = self.vapi.ip_neighbor_dump(intf.sw_if_index) - for n in neighbors: - self.vapi.ip_neighbor_add_del(intf.sw_if_index, - n.mac_address, - n.ip_address, - is_add=0) + self.vapi.ip_neighbor_add_del( + intf.sw_if_index, + intf.remote_mac, + intf.remote_ip4, + flags=(VppEnum.vl_api_ip_neighbor_flags_t. + IP_API_NEIGHBOR_FLAG_STATIC), + is_add=0) if self.pg7.has_ip4_config: self.pg7.unconfig_ip4() @@ -1043,6 +1045,10 @@ def verify_syslog_apmap(self, data, is_add=True): message = data.decode('utf-8') try: message = SyslogMessage.parse(message) + except ParseError as e: + self.logger.error(e) + raise + else: self.assertEqual(message.severity, SyslogSeverity.info) self.assertEqual(message.appname, 'NAT') self.assertEqual(message.msgid, 'APMADD' if is_add else 'APMDEL') @@ -1057,13 +1063,15 @@ def verify_syslog_apmap(self, data, is_add=True): self.assertEqual(sd_params.get('PROTO'), "%d" % IP_PROTOS.tcp) self.assertTrue(sd_params.get('SSUBIX') is not None) self.assertEqual(sd_params.get('SVLAN'), '0') - except ParseError as e: - self.logger.error(e) def verify_syslog_sess(self, data, is_add=True, is_ip6=False): message = data.decode('utf-8') try: message = SyslogMessage.parse(message) + except ParseError as e: + self.logger.error(e) + raise + else: self.assertEqual(message.severity, SyslogSeverity.info) self.assertEqual(message.appname, 'NAT') self.assertEqual(message.msgid, 'SADD' if is_add else 'SDEL') @@ -1085,8 +1093,6 @@ def verify_syslog_sess(self, data, is_add=True, is_ip6=False): self.assertEqual(sd_params.get('XDADDR'), self.pg1.remote_ip4) self.assertEqual(sd_params.get('XDPORT'), "%d" % self.tcp_external_port) - except ParseError as e: - self.logger.error(e) def verify_mss_value(self, pkt, mss): """ @@ -2960,14 +2966,18 @@ def test_vrf_feature_independent(self): def test_dynamic_ipless_interfaces(self): """ NAT44 interfaces without configured IP address """ - self.vapi.ip_neighbor_add_del(self.pg7.sw_if_index, - mac_pton(self.pg7.remote_mac), - self.pg7.remote_ip4n, - is_static=1) - self.vapi.ip_neighbor_add_del(self.pg8.sw_if_index, - mac_pton(self.pg8.remote_mac), - self.pg8.remote_ip4n, - is_static=1) + self.vapi.ip_neighbor_add_del( + self.pg7.sw_if_index, + self.pg7.remote_mac, + self.pg7.remote_ip4, + flags=(VppEnum.vl_api_ip_neighbor_flags_t. + IP_API_NEIGHBOR_FLAG_STATIC)) + self.vapi.ip_neighbor_add_del( + self.pg8.sw_if_index, + self.pg8.remote_mac, + self.pg8.remote_ip4, + flags=(VppEnum.vl_api_ip_neighbor_flags_t. + IP_API_NEIGHBOR_FLAG_STATIC)) self.vapi.ip_add_del_route(dst_address=self.pg7.remote_ip4n, dst_address_length=32, @@ -3002,14 +3012,18 @@ def test_dynamic_ipless_interfaces(self): def test_static_ipless_interfaces(self): """ NAT44 interfaces without configured IP address - 1:1 NAT """ - self.vapi.ip_neighbor_add_del(self.pg7.sw_if_index, - mac_pton(self.pg7.remote_mac), - self.pg7.remote_ip4n, - is_static=1) - self.vapi.ip_neighbor_add_del(self.pg8.sw_if_index, - mac_pton(self.pg8.remote_mac), - self.pg8.remote_ip4n, - is_static=1) + self.vapi.ip_neighbor_add_del( + self.pg7.sw_if_index, + self.pg7.remote_mac, + self.pg7.remote_ip4, + flags=(VppEnum.vl_api_ip_neighbor_flags_t. + IP_API_NEIGHBOR_FLAG_STATIC)) + self.vapi.ip_neighbor_add_del( + self.pg8.sw_if_index, + self.pg8.remote_mac, + self.pg8.remote_ip4, + flags=(VppEnum.vl_api_ip_neighbor_flags_t. + IP_API_NEIGHBOR_FLAG_STATIC)) self.vapi.ip_add_del_route(dst_address=self.pg7.remote_ip4n, dst_address_length=32, @@ -3048,14 +3062,18 @@ def test_static_with_port_ipless_interfaces(self): self.udp_port_out = 30607 self.icmp_id_out = 30608 - self.vapi.ip_neighbor_add_del(self.pg7.sw_if_index, - mac_pton(self.pg7.remote_mac), - self.pg7.remote_ip4n, - is_static=1) - self.vapi.ip_neighbor_add_del(self.pg8.sw_if_index, - mac_pton(self.pg8.remote_mac), - self.pg8.remote_ip4n, - is_static=1) + self.vapi.ip_neighbor_add_del( + self.pg7.sw_if_index, + self.pg7.remote_mac, + self.pg7.remote_ip4, + flags=(VppEnum.vl_api_ip_neighbor_flags_t. + IP_API_NEIGHBOR_FLAG_STATIC)) + self.vapi.ip_neighbor_add_del( + self.pg8.sw_if_index, + self.pg8.remote_mac, + self.pg8.remote_ip4, + flags=(VppEnum.vl_api_ip_neighbor_flags_t. + IP_API_NEIGHBOR_FLAG_STATIC)) self.vapi.ip_add_del_route(dst_address=self.pg7.remote_ip4n, dst_address_length=32, @@ -8165,6 +8183,9 @@ def verify_syslog_apmadd(self, data, isaddr, isport, xsaddr, xsport, message = data.decode('utf-8') try: message = SyslogMessage.parse(message) + except ParseError as e: + self.logger.error(e) + else: self.assertEqual(message.severity, SyslogSeverity.info) self.assertEqual(message.appname, 'NAT') self.assertEqual(message.msgid, 'APMADD') @@ -8179,8 +8200,6 @@ def verify_syslog_apmadd(self, data, isaddr, isport, xsaddr, xsport, self.assertEqual(sd_params.get('PROTO'), "%d" % proto) self.assertTrue(sd_params.get('SSUBIX') is not None) self.assertEqual(sd_params.get('SV6ENC'), sv6enc) - except ParseError as e: - self.logger.error(e) def test_dslite(self): """ Test DS-Lite """ diff --git a/test/test_neighbor.py b/test/test_neighbor.py index d551c94584a5..c378cff4e13e 100644 --- a/test/test_neighbor.py +++ b/test/test_neighbor.py @@ -7,6 +7,7 @@ from vpp_neighbor import VppNeighbor, find_nbr from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, \ VppIpTable, DpoProto +from vpp_papi import VppEnum from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP, Dot1Q @@ -693,8 +694,8 @@ def test_proxy_mirror_arp(self): # # Configure Proxy ARP for the subnet on PG0addresses on pg0 # - self.vapi.proxy_arp_add_del(self.pg0._local_ip4n_subnet, - self.pg0._local_ip4n_bcast) + self.vapi.proxy_arp_add_del(self.pg0._local_ip4_subnet, + self.pg0._local_ip4_bcast) # Make pg2 un-numbered to pg0 # @@ -731,8 +732,8 @@ def test_proxy_mirror_arp(self): # cleanup # self.pg2.set_proxy_arp(0) - self.vapi.proxy_arp_add_del(self.pg0._local_ip4n_subnet, - self.pg0._local_ip4n_bcast, + self.vapi.proxy_arp_add_del(self.pg0._local_ip4_subnet, + self.pg0._local_ip4_bcast, is_add=0) def test_proxy_arp(self): @@ -971,11 +972,7 @@ def test_arp_vrrp(self): UDP(sport=1234, dport=1234) / Raw()) - self.pg0.add_stream(p0) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - - rx1 = self.pg1.get_capture(1) + rx1 = self.send_and_expect(self.pg0, [p0], self.pg1) self.verify_arp_req(rx1[0], self.pg1.local_mac, @@ -992,20 +989,14 @@ def test_arp_vrrp(self): hwsrc="00:00:5e:00:01:09", pdst=self.pg1.local_ip4, psrc=self.pg1.remote_ip4)) - self.pg1.add_stream(p1) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() + self.send_and_assert_no_replies(self.pg1, p1, "ARP reply") # # IP packet destined for pg1 remote host arrives on pg0 again. # VPP should have an ARP entry for that address now and the packet # should be sent out pg1. # - self.pg0.add_stream(p0) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - - rx1 = self.pg1.get_capture(1) + rx1 = self.send_and_expect(self.pg0, [p0], self.pg1) self.verify_ip(rx1[0], self.pg1.local_mac, @@ -1380,9 +1371,43 @@ def test_arp_incomplete(self): # self.assertLess(len(rx), 64) + def test_arp_forus(self): + """ ARP for for-us """ + + # + # Test that VPP responds with ARP requests to addresses that + # are connected and local routes. + # Use one of the 'remote' addresses in the subnet as a local address + # The intention of this route is that it then acts like a secondardy + # address added to an interface + # + self.pg0.generate_remote_hosts(2) + + forus = VppIpRoute(self, self.pg0.remote_hosts[1].ip4, 32, + [VppRoutePath(self.pg0.remote_hosts[1].ip4, + self.pg0.sw_if_index)], + is_local=1) + forus.add_vpp_config() + + p = (Ether(dst="ff:ff:ff:ff:ff:ff", + src=self.pg0.remote_mac) / + ARP(op="who-has", + hwdst=self.pg0.local_mac, + hwsrc=self.pg0.remote_mac, + pdst=self.pg0.remote_hosts[1].ip4, + psrc=self.pg0.remote_ip4)) + + rx = self.send_and_expect(self.pg0, [p], self.pg0) + + self.verify_arp_resp(rx[0], + self.pg0.local_mac, + self.pg0.remote_mac, + self.pg0.remote_hosts[1].ip4, + self.pg0.remote_ip4) + class NeighborStatsTestCase(VppTestCase): - """ ARP Test Case """ + """ ARP/ND Counters """ def setUp(self): super(NeighborStatsTestCase, self).setUp() @@ -1455,14 +1480,12 @@ def test_nd_stats(self): nd1 = VppNeighbor(self, self.pg0.sw_if_index, self.pg0.remote_hosts[1].mac, - self.pg0.remote_hosts[1].ip6, - af=AF_INET6) + self.pg0.remote_hosts[1].ip6) nd1.add_vpp_config() nd2 = VppNeighbor(self, self.pg0.sw_if_index, self.pg0.remote_hosts[2].mac, - self.pg0.remote_hosts[2].ip6, - af=AF_INET6) + self.pg0.remote_hosts[2].ip6) nd2.add_vpp_config() p1 = (Ether(dst=self.pg1.local_mac, diff --git a/test/test_punt.py b/test/test_punt.py index d57a847ef0c3..7959b9818375 100644 --- a/test/test_punt.py +++ b/test/test_punt.py @@ -119,6 +119,14 @@ class TestPuntSocket(VppTestCase): portsCheck = dict() nr_packets = 256 + @classmethod + def setUpClass(cls): + super(TestPuntSocket, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestPuntSocket, cls).tearDownClass() + @classmethod def setUpConstants(cls): cls.extra_vpp_punt_config = [ @@ -135,6 +143,7 @@ def setUp(self): def tearDown(self): del self.sock_servers[:] + super(TestPuntSocket, self).tearDown() def socket_client_create(self, sock_name, id=None): thread = serverSocketThread(id, sock_name, self.portsCheck) @@ -149,6 +158,14 @@ def socket_client_close(self): class TestIP4PuntSocket(TestPuntSocket): """ Punt Socket for IPv4 """ + @classmethod + def setUpClass(cls): + super(TestIP4PuntSocket, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestIP4PuntSocket, cls).tearDownClass() + def setUp(self): super(TestIP4PuntSocket, self).setUp() @@ -385,6 +402,14 @@ def test_punt_socket_traffic_multi_ports_single_socket(self): class TestIP6PuntSocket(TestPuntSocket): """ Punt Socket for IPv6""" + @classmethod + def setUpClass(cls): + super(TestIP6PuntSocket, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestIP6PuntSocket, cls).tearDownClass() + def setUp(self): super(TestIP6PuntSocket, self).setUp() diff --git a/test/test_srv6.py b/test/test_srv6.py index 47832e28903f..46660f5bbb09 100644 --- a/test/test_srv6.py +++ b/test/test_srv6.py @@ -1231,7 +1231,7 @@ def test_SRv6_T_Insert_Classifier(self): match_n_vectors=(len(mask) - 1) // 32 + 1, current_data_flag=1, skip_n_vectors=2) # data offset - self.assertIsNotNone(r, msg='No response msg for add_del_table') + self.assertIsNotNone(r, 'No response msg for add_del_table') table_index = r.new_table_index # add the source routign node as a ip6 inacl netxt node @@ -1247,7 +1247,7 @@ def test_SRv6_T_Insert_Classifier(self): hit_next_index=inacl_next_node_index, action=3, metadata=0) # sr policy index - self.assertIsNotNone(r, msg='No response msg for add_del_session') + self.assertIsNotNone(r, 'No response msg for add_del_session') # log the classify table used in the steering policy self.logger.info(self.vapi.cli("show classify table")) @@ -1257,7 +1257,7 @@ def test_SRv6_T_Insert_Classifier(self): sw_if_index=self.pg3.sw_if_index, ip6_table_index=table_index) self.assertIsNotNone(r, - msg='No response msg for input_acl_set_interface') + 'No response msg for input_acl_set_interface') # log the ip6 inacl self.logger.info(self.vapi.cli("show inacl type ip6")) @@ -1292,7 +1292,7 @@ def test_SRv6_T_Insert_Classifier(self): sw_if_index=self.pg3.sw_if_index, ip6_table_index=table_index) self.assertIsNotNone(r, - msg='No response msg for input_acl_set_interface') + 'No response msg for input_acl_set_interface') # log the ip6 inacl after cleaning self.logger.info(self.vapi.cli("show inacl type ip6")) @@ -1313,13 +1313,13 @@ def test_SRv6_T_Insert_Classifier(self): 0, table_index, binascii.unhexlify(match)) - self.assertIsNotNone(r, msg='No response msg for add_del_session') + self.assertIsNotNone(r, 'No response msg for add_del_session') r = self.vapi.classify_add_del_table( 0, binascii.unhexlify(mask), table_index=table_index) - self.assertIsNotNone(r, msg='No response msg for add_del_table') + self.assertIsNotNone(r, 'No response msg for add_del_table') self.logger.info(self.vapi.cli("show classify table")) diff --git a/test/test_syslog.py b/test/test_syslog.py index 241787d75922..5c697ee1e44e 100644 --- a/test/test_syslog.py +++ b/test/test_syslog.py @@ -18,7 +18,7 @@ def setUpClass(cls): super(TestSyslog, cls).setUpClass() try: - cls.create_pg_interfaces(range(1)) + cls.pg0, = cls.create_pg_interfaces(range(1)) cls.pg0.admin_up() cls.pg0.config_ip4() cls.pg0.resolve_arp() @@ -35,7 +35,7 @@ def syslog_generate(self, facility, severity, appname, msgid, sd=None, :param facility: facility value :param severity: severity level :param appname: application name that originate message - :param msgid: message indetifier + :param msgid: message identifier :param sd: structured data (optional) :param msg: free-form message (optional) """ @@ -71,7 +71,7 @@ def syslog_verify(self, data, facility, severity, appname, msgid, sd=None, :param facility: facility value :param severity: severity level :param appname: application name that originate message - :param msgid: message indetifier + :param msgid: message identifier :param sd: structured data (optional) :param msg: free-form message (optional) """ @@ -80,6 +80,10 @@ def syslog_verify(self, data, facility, severity, appname, msgid, sd=None, sd = {} try: message = SyslogMessage.parse(message) + except ParseError as e: + self.logger.error(e) + raise + else: self.assertEqual(message.facility, facility) self.assertEqual(message.severity, severity) self.assertEqual(message.appname, appname) @@ -88,8 +92,6 @@ def syslog_verify(self, data, facility, severity, appname, msgid, sd=None, self.assertEqual(message.sd, sd) self.assertEqual(message.version, 1) self.assertEqual(message.hostname, self.pg0.local_ip4) - except ParseError as e: - self.logger.error(e) def test_syslog(self): """ Syslog Protocol test """ diff --git a/test/test_vapi.py b/test/test_vapi.py index ebd189cd7c89..7bb815d77f30 100644 --- a/test/test_vapi.py +++ b/test/test_vapi.py @@ -12,6 +12,14 @@ class VAPITestCase(VppTestCase): """ VAPI test """ + @classmethod + def setUpClass(cls): + super(VAPITestCase, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(VAPITestCase, cls).tearDownClass() + def test_vapi_c(self): """ run C VAPI tests """ var = "TEST_DIR" diff --git a/test/test_vcl.py b/test/test_vcl.py index 34cf0e2a391d..bd7eb76f07e4 100644 --- a/test/test_vcl.py +++ b/test/test_vcl.py @@ -32,7 +32,15 @@ def __init__(self, build_dir, appname, args, logger, env={}): class VCLTestCase(VppTestCase): """ VCL Test Class """ - def __init__(self, methodName): + @classmethod + def setUpClass(cls): + super(VCLTestCase, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(VCLTestCase, cls).tearDownClass() + + def setUp(self): var = "VPP_BUILD_DIR" self.build_dir = os.getenv(var, None) if self.build_dir is None: @@ -51,7 +59,7 @@ def __init__(self, methodName): if os.path.isfile("/tmp/ldp_server_af_unix_socket"): os.remove("/tmp/ldp_server_af_unix_socket") - super(VCLTestCase, self).__init__(methodName) + super(VCLTestCase, self).setUp() def cut_thru_setup(self): self.vapi.session_enable_disable(is_enabled=1) @@ -222,6 +230,14 @@ def validateResults(self, worker_client, worker_server, timeout): class LDPCutThruTestCase(VCLTestCase): """ LDP Cut Thru Tests """ + @classmethod + def setUpClass(cls): + super(LDPCutThruTestCase, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(LDPCutThruTestCase, cls).tearDownClass() + def setUp(self): super(LDPCutThruTestCase, self).setUp() @@ -290,6 +306,14 @@ def test_ldp_cut_thru_bi_dir_nsock(self): class VCLCutThruTestCase(VCLTestCase): """ VCL Cut Thru Tests """ + @classmethod + def setUpClass(cls): + super(VCLCutThruTestCase, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(VCLCutThruTestCase, cls).tearDownClass() + def setUp(self): super(VCLCutThruTestCase, self).setUp() @@ -339,6 +363,14 @@ def test_vcl_cut_thru_bi_dir_nsock(self): class LDPThruHostStackEcho(VCLTestCase): """ LDP Thru Host Stack Echo """ + @classmethod + def setUpClass(cls): + super(LDPThruHostStackEcho, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(LDPThruHostStackEcho, cls).tearDownClass() + def setUp(self): super(LDPThruHostStackEcho, self).setUp() @@ -362,6 +394,14 @@ def test_ldp_thru_host_stack_echo(self): class VCLThruHostStackEcho(VCLTestCase): """ VCL Thru Host Stack Echo """ + @classmethod + def setUpClass(cls): + super(VCLThruHostStackEcho, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(VCLThruHostStackEcho, cls).tearDownClass() + def setUp(self): super(VCLThruHostStackEcho, self).setUp() @@ -385,6 +425,14 @@ def tearDown(self): class VCLThruHostStackBidirNsock(VCLTestCase): """ VCL Thru Host Stack Bidir Nsock """ + @classmethod + def setUpClass(cls): + super(VCLThruHostStackBidirNsock, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(VCLThruHostStackBidirNsock, cls).tearDownClass() + def setUp(self): super(VCLThruHostStackBidirNsock, self).setUp() @@ -414,6 +462,14 @@ def test_vcl_thru_host_stack_bi_dir_nsock(self): class LDPThruHostStackBidirNsock(VCLTestCase): """ LDP Thru Host Stack Bidir Nsock """ + @classmethod + def setUpClass(cls): + super(LDPThruHostStackBidirNsock, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(LDPThruHostStackBidirNsock, cls).tearDownClass() + def setUp(self): super(LDPThruHostStackBidirNsock, self).setUp() @@ -449,6 +505,14 @@ def test_ldp_thru_host_stack_bi_dir_nsock(self): class LDPThruHostStackNsock(VCLTestCase): """ LDP Thru Host Stack Nsock """ + @classmethod + def setUpClass(cls): + super(LDPThruHostStackNsock, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(LDPThruHostStackNsock, cls).tearDownClass() + def setUp(self): super(LDPThruHostStackNsock, self).setUp() @@ -481,6 +545,14 @@ def test_ldp_thru_host_stack_uni_dir_nsock(self): class VCLThruHostStackNsock(VCLTestCase): """ VCL Thru Host Stack Nsock """ + @classmethod + def setUpClass(cls): + super(VCLThruHostStackNsock, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(VCLThruHostStackNsock, cls).tearDownClass() + def setUp(self): super(VCLThruHostStackNsock, self).setUp() @@ -513,6 +585,14 @@ def test_vcl_thru_host_stack_uni_dir_nsock(self): class LDPThruHostStackIperf(VCLTestCase): """ LDP Thru Host Stack Iperf """ + @classmethod + def setUpClass(cls): + super(LDPThruHostStackIperf, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(LDPThruHostStackIperf, cls).tearDownClass() + def setUp(self): super(LDPThruHostStackIperf, self).setUp() @@ -544,6 +624,14 @@ def test_ldp_thru_host_stack_iperf3(self): class LDPIpv6CutThruTestCase(VCLTestCase): """ LDP IPv6 Cut Thru Tests """ + @classmethod + def setUpClass(cls): + super(LDPIpv6CutThruTestCase, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(LDPIpv6CutThruTestCase, cls).tearDownClass() + def setUp(self): super(LDPIpv6CutThruTestCase, self).setUp() @@ -619,6 +707,14 @@ def test_ldp_ipv6_cut_thru_bi_dir_nsock(self): class VCLIpv6CutThruTestCase(VCLTestCase): """ VCL IPv6 Cut Thru Tests """ + @classmethod + def setUpClass(cls): + super(VCLIpv6CutThruTestCase, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(VCLIpv6CutThruTestCase, cls).tearDownClass() + def setUp(self): super(VCLIpv6CutThruTestCase, self).setUp() @@ -674,6 +770,14 @@ def test_vcl_ipv6_cut_thru_bi_dir_nsock(self): class VCLIpv6ThruHostStackEcho(VCLTestCase): """ VCL IPv6 Thru Host Stack Echo """ + @classmethod + def setUpClass(cls): + super(VCLIpv6ThruHostStackEcho, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(VCLIpv6ThruHostStackEcho, cls).tearDownClass() + def setUp(self): super(VCLIpv6ThruHostStackEcho, self).setUp() diff --git a/test/test_vom.py b/test/test_vom.py index 222d72c1238b..a27d5310f521 100644 --- a/test/test_vom.py +++ b/test/test_vom.py @@ -12,6 +12,14 @@ class VOMTestCase(VppTestCase): """ VPP Object Model Test """ + @classmethod + def setUpClass(cls): + super(VOMTestCase, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(VOMTestCase, cls).tearDownClass() + def test_vom_cpp(self): """ run C++ VOM tests """ var = "TEST_DIR" diff --git a/test/test_vxlan6.py b/test/test_vxlan6.py index fd89f05b03b4..5ae8e7e12687 100644 --- a/test/test_vxlan6.py +++ b/test/test_vxlan6.py @@ -171,6 +171,10 @@ def setUpClass(cls): super(TestVxlan6, cls).tearDownClass() raise + @classmethod + def tearDownClass(cls): + super(TestVxlan6, cls).tearDownClass() + # Method to define VPP actions before tear down of the test case. # Overrides tearDown method in VppTestCase class. # @param self The object pointer. diff --git a/test/test_vxlan_gpe.py b/test/test_vxlan_gpe.py index 7c2df4cf38a7..f635bb1d077a 100644 --- a/test/test_vxlan_gpe.py +++ b/test/test_vxlan_gpe.py @@ -221,6 +221,10 @@ def setUpClass(cls): super(TestVxlanGpe, cls).tearDownClass() raise + @classmethod + def tearDownClass(cls): + super(TestVxlanGpe, cls).tearDownClass() + @unittest.skip("test disabled for vxlan-gpe") def test_mcast_flood(self): """ inherited from BridgeDomain """ diff --git a/test/util.py b/test/util.py index 9652b803f1df..cf45c85a5f4b 100644 --- a/test/util.py +++ b/test/util.py @@ -19,15 +19,9 @@ def ppp(headline, packet): """ Return string containing the output of scapy packet.show() call. """ - o = BytesIO() - old_stdout = sys.stdout - sys.stdout = o - print(headline) - hexdump(packet) - print("") - packet.show() - sys.stdout = old_stdout - return o.getvalue() + return '%s\n%s\n\n%s\n' % (headline, + hexdump(packet, dump=True), + packet.show(dump=True)) def ppc(headline, capture, limit=10): diff --git a/test/vpp_interface.py b/test/vpp_interface.py index 719f77b36c7a..58384d2eeb9f 100644 --- a/test/vpp_interface.py +++ b/test/vpp_interface.py @@ -5,7 +5,7 @@ from six import moves from util import Host, mk_ll_addr -from vpp_papi import mac_pton, mac_ntop +from vpp_papi import mac_ntop class VppInterface(object): @@ -273,10 +273,9 @@ def configure_ipv4_neighbors(self): :param vrf_id: The FIB table / VRF ID. (Default value = 0) """ for host in self._remote_hosts: - macn = mac_pton(host.mac) - ipn = host.ip4n - self.test.vapi.ip_neighbor_add_del( - self.sw_if_index, macn, ipn) + self.test.vapi.ip_neighbor_add_del(self.sw_if_index, + host.mac, + host.ip4) def config_ip6(self): """Configure IPv6 address on the VPP interface.""" @@ -304,10 +303,9 @@ def configure_ipv6_neighbors(self): :param vrf_id: The FIB table / VRF ID. (Default value = 0) """ for host in self._remote_hosts: - macn = mac_pton(host.mac) - ipn = host.ip6n - self.test.vapi.ip_neighbor_add_del( - self.sw_if_index, macn, ipn, is_ipv6=1) + self.test.vapi.ip_neighbor_add_del(self.sw_if_index, + host.mac, + host.ip6) def unconfig(self): """Unconfigure IPv6 and IPv4 address on the VPP interface.""" diff --git a/test/vpp_ip.py b/test/vpp_ip.py index fe985fb901ce..8b7ea222a67a 100644 --- a/test/vpp_ip.py +++ b/test/vpp_ip.py @@ -7,6 +7,10 @@ from ipaddress import ip_address from socket import AF_INET, AF_INET6 from vpp_papi import VppEnum +try: + text_type = unicode +except NameError: + text_type = str _log = logging.getLogger(__name__) @@ -26,7 +30,7 @@ class DpoProto: class VppIpAddressUnion(): def __init__(self, addr): self.addr = addr - self.ip_addr = ip_address(unicode(self.addr)) + self.ip_addr = ip_address(text_type(self.addr)) def encode(self): if self.version == 6: @@ -191,8 +195,8 @@ def __init__(self, saddr, gaddr, len): self.saddr = saddr self.gaddr = gaddr self.len = len - self.ip_saddr = ip_address(unicode(self.saddr)) - self.ip_gaddr = ip_address(unicode(self.gaddr)) + self.ip_saddr = ip_address(text_type(self.saddr)) + self.ip_gaddr = ip_address(text_type(self.gaddr)) if self.ip_saddr.version != self.ip_gaddr.version: raise ValueError('Source and group addresses must be of the ' 'same address family.') diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index 5a6598eb0124..b3d12938d998 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -279,13 +279,13 @@ def __init__( is_dvr=0, next_hop_id=0xffffffff, proto=DpoProto.DPO_PROTO_IP4): + self.proto = proto self.nh_itf = nh_sw_if_index self.nh_table_id = nh_table_id self.nh_via_label = nh_via_label self.nh_labels = labels self.weight = 1 self.rpf_id = rpf_id - self.proto = proto if self.proto is DpoProto.DPO_PROTO_IP6: self.nh_addr = inet_pton(AF_INET6, nh_addr) elif self.proto is DpoProto.DPO_PROTO_IP4: @@ -391,8 +391,7 @@ def modify(self, paths, is_local=0, self.is_prohibit = is_prohibit def add_vpp_config(self): - if self.is_local or self.is_unreach or \ - self.is_prohibit or self.is_drop: + if self.is_unreach or self.is_prohibit or self.is_drop: r = self._test.vapi.ip_add_del_route( self.dest_addr, self.dest_addr_len, @@ -421,6 +420,7 @@ def add_vpp_config(self): next_hop_id=path.next_hop_id, is_ipv6=self.is_ip6, is_dvr=path.is_dvr, + is_local=self.is_local, is_resolve_host=path.is_resolve_host, is_resolve_attached=path.is_resolve_attached, is_source_lookup=path.is_source_lookup, @@ -430,8 +430,7 @@ def add_vpp_config(self): self._test.registry.register(self, self._test.logger) def remove_vpp_config(self): - if self.is_local or self.is_unreach or \ - self.is_prohibit or self.is_drop: + if self.is_unreach or self.is_prohibit or self.is_drop: self._test.vapi.ip_add_del_route( self.dest_addr, self.dest_addr_len, diff --git a/test/vpp_ipsec.py b/test/vpp_ipsec.py new file mode 100644 index 000000000000..917574ee9770 --- /dev/null +++ b/test/vpp_ipsec.py @@ -0,0 +1,254 @@ +from vpp_object import * +from ipaddress import ip_address +from vpp_papi import VppEnum + +try: + text_type = unicode +except NameError: + text_type = str + + +class VppIpsecSpd(VppObject): + """ + VPP SPD DB + """ + + def __init__(self, test, id): + self.test = test + self.id = id + + def add_vpp_config(self): + self.test.vapi.ipsec_spd_add_del(self.id) + self.test.registry.register(self, self.test.logger) + + def remove_vpp_config(self): + self.test.vapi.ipsec_spd_add_del(self.id, is_add=0) + + def __str__(self): + return self.object_id() + + def object_id(self): + return "ipsec-spd-%d" % self.id + + def query_vpp_config(self): + spds = self.test.vapi.ipsec_spds_dump() + for spd in spds: + if spd.spd_id == self.id: + return True + return False + + +class VppIpsecSpdItfBinding(VppObject): + """ + VPP SPD DB to interface binding + (i.e. this SPD is used on this interfce) + """ + + def __init__(self, test, spd, itf): + self.test = test + self.spd = spd + self.itf = itf + + def add_vpp_config(self): + self.test.vapi.ipsec_interface_add_del_spd(self.spd.id, + self.itf.sw_if_index) + self.test.registry.register(self, self.test.logger) + + def remove_vpp_config(self): + self.test.vapi.ipsec_interface_add_del_spd(self.spd.id, + self.itf.sw_if_index, + is_add=0) + + def __str__(self): + return self.object_id() + + def object_id(self): + return "bind-%s-to-%s" % (self.spd.id, self.itf) + + def query_vpp_config(self): + bs = self.test.vapi.ipsec_spd_interface_dump() + for b in bs: + if b.sw_if_index == self.itf.sw_if_index: + return True + return False + + +class VppIpsecSpdEntry(VppObject): + """ + VPP SPD DB Entry + """ + + def __init__(self, test, spd, sa_id, + local_start, local_stop, + remote_start, remote_stop, + proto, + priority=100, + policy=None, + is_outbound=1, + remote_port_start=0, + remote_port_stop=65535, + local_port_start=0, + local_port_stop=65535): + self.test = test + self.spd = spd + self.sa_id = sa_id + self.local_start = ip_address(text_type(local_start)) + self.local_stop = ip_address(text_type(local_stop)) + self.remote_start = ip_address(text_type(remote_start)) + self.remote_stop = ip_address(text_type(remote_stop)) + self.proto = proto + self.is_outbound = is_outbound + self.priority = priority + if not policy: + self.policy = (VppEnum.vl_api_ipsec_spd_action_t. + IPSEC_API_SPD_ACTION_BYPASS) + else: + self.policy = policy + self.is_ipv6 = (0 if self.local_start.version == 4 else 1) + self.local_port_start = local_port_start + self.local_port_stop = local_port_stop + self.remote_port_start = remote_port_start + self.remote_port_stop = remote_port_stop + + def add_vpp_config(self): + rv = self.test.vapi.ipsec_spd_entry_add_del( + self.spd.id, + self.sa_id, + self.local_start, + self.local_stop, + self.remote_start, + self.remote_stop, + protocol=self.proto, + is_ipv6=self.is_ipv6, + is_outbound=self.is_outbound, + priority=self.priority, + policy=self.policy, + local_port_start=self.local_port_start, + local_port_stop=self.local_port_stop, + remote_port_start=self.remote_port_start, + remote_port_stop=self.remote_port_stop) + self.stat_index = rv.stat_index + self.test.registry.register(self, self.test.logger) + + def remove_vpp_config(self): + self.test.vapi.ipsec_spd_entry_add_del( + self.spd.id, + self.sa_id, + self.local_start, + self.local_stop, + self.remote_start, + self.remote_stop, + protocol=self.proto, + is_ipv6=self.is_ipv6, + is_outbound=self.is_outbound, + priority=self.priority, + policy=self.policy, + local_port_start=self.local_port_start, + local_port_stop=self.local_port_stop, + remote_port_start=self.remote_port_start, + remote_port_stop=self.remote_port_stop, + is_add=0) + + def __str__(self): + return self.object_id() + + def object_id(self): + return "spd-entry-%d-%d-%d-%d-%d-%d" % (self.spd.id, + self.priority, + self.policy, + self.is_outbound, + self.is_ipv6, + self.remote_port_start) + + def query_vpp_config(self): + ss = self.test.vapi.ipsec_spd_dump(self.spd.id) + for s in ss: + if s.entry.sa_id == self.sa_id and \ + s.entry.is_outbound == self.is_outbound and \ + s.entry.priority == self.priority and \ + s.entry.policy == self.policy and \ + s.entry.remote_address_start == self.remote_start and \ + s.entry.remote_port_start == self.remote_port_start: + return True + return False + + def get_stats(self): + c = self.test.statistics.get_counter("/net/ipsec/policy") + return c[0][self.stat_index] + + +class VppIpsecSA(VppObject): + """ + VPP SAD Entry + """ + + def __init__(self, test, id, spi, + integ_alg, integ_key, + crypto_alg, crypto_key, + proto, + tun_src=None, tun_dst=None, + flags=None): + e = VppEnum.vl_api_ipsec_sad_flags_t + self.test = test + self.id = id + self.spi = spi + self.integ_alg = integ_alg + self.integ_key = integ_key + self.crypto_alg = crypto_alg + self.crypto_key = crypto_key + self.proto = proto + + self.tun_src = tun_src + self.tun_dst = tun_dst + if not flags: + self.flags = e.IPSEC_API_SAD_FLAG_NONE + else: + self.flags = flags + if (tun_src): + self.tun_src = ip_address(text_type(tun_src)) + self.flags = self.flags | e.IPSEC_API_SAD_FLAG_IS_TUNNEL + if (self.tun_src.version == 6): + self.flags = self.flags | e.IPSEC_API_SAD_FLAG_IS_TUNNEL_V6 + if (tun_dst): + self.tun_dst = ip_address(text_type(tun_dst)) + + def add_vpp_config(self): + self.test.vapi.ipsec_sad_entry_add_del( + self.id, + self.spi, + self.integ_alg, + self.integ_key, + self.crypto_alg, + self.crypto_key, + self.proto, + (self.tun_src if self.tun_src else []), + (self.tun_dst if self.tun_dst else []), + flags=self.flags) + self.test.registry.register(self, self.test.logger) + + def remove_vpp_config(self): + self.test.vapi.ipsec_sad_entry_add_del( + self.id, + self.spi, + self.integ_alg, + self.integ_key, + self.crypto_alg, + self.crypto_key, + self.proto, + (self.tun_src if self.tun_src else []), + (self.tun_dst if self.tun_dst else []), + flags=self.flags, + is_add=0) + + def __str__(self): + return self.object_id() + + def object_id(self): + return "ipsec-sa-%d" % self.id + + def query_vpp_config(self): + bs = self.test.vapi.ipsec_sa_dump() + for b in bs: + if b.entry.sad_id == self.id: + return True + return False diff --git a/test/vpp_neighbor.py b/test/vpp_neighbor.py index 7815a286fef7..7391447ff9b6 100644 --- a/test/vpp_neighbor.py +++ b/test/vpp_neighbor.py @@ -4,25 +4,22 @@ object abstractions for ARP and ND """ -from socket import inet_pton, inet_ntop, AF_INET, AF_INET6 +from ipaddress import ip_address from vpp_object import * -from vpp_papi import mac_pton +from vpp_papi import mac_pton, VppEnum -def find_nbr(test, sw_if_index, ip_addr, is_static=0, inet=AF_INET, mac=None): +def find_nbr(test, sw_if_index, nbr_addr, is_static=0, mac=None): + ip_addr = ip_address(unicode(nbr_addr)) + e = VppEnum.vl_api_ip_neighbor_flags_t nbrs = test.vapi.ip_neighbor_dump(sw_if_index, - is_ipv6=1 if AF_INET6 == inet else 0) - if inet == AF_INET: - s = 4 - else: - s = 16 - nbr_addr = inet_pton(inet, ip_addr) + is_ipv6=(6 is ip_addr.version)) for n in nbrs: - if nbr_addr == n.ip_address[:s] \ - and is_static == n.is_static: + if ip_addr == n.neighbor.ip_address and \ + is_static == (n.neighbor.flags & e.IP_API_NEIGHBOR_FLAG_STATIC): if mac: - if n.mac_address == mac_pton(mac): + if mac == str(n.neighbor.mac_address): return True else: return True @@ -35,25 +32,26 @@ class VppNeighbor(VppObject): """ def __init__(self, test, sw_if_index, mac_addr, nbr_addr, - af=AF_INET, is_static=False, is_no_fib_entry=0): + is_static=False, is_no_fib_entry=False): self._test = test self.sw_if_index = sw_if_index - self.mac_addr = mac_pton(mac_addr) - self.af = af - self.is_static = is_static - self.is_no_fib_entry = is_no_fib_entry + self.mac_addr = mac_addr self.nbr_addr = nbr_addr - self.nbr_addr_n = inet_pton(af, nbr_addr) + + e = VppEnum.vl_api_ip_neighbor_flags_t + self.flags = e.IP_API_NEIGHBOR_FLAG_NONE + if is_static: + self.flags |= e.IP_API_NEIGHBOR_FLAG_STATIC + if is_no_fib_entry: + self.flags |= e.IP_API_NEIGHBOR_FLAG_NO_FIB_ENTRY def add_vpp_config(self): r = self._test.vapi.ip_neighbor_add_del( self.sw_if_index, self.mac_addr, - self.nbr_addr_n, + self.nbr_addr, is_add=1, - is_ipv6=1 if AF_INET6 == self.af else 0, - is_static=self.is_static, - is_no_adj_fib=self.is_no_fib_entry) + flags=self.flags) self.stats_index = r.stats_index self._test.registry.register(self, self._test.logger) @@ -61,17 +59,19 @@ def remove_vpp_config(self): self._test.vapi.ip_neighbor_add_del( self.sw_if_index, self.mac_addr, - self.nbr_addr_n, - is_ipv6=1 if AF_INET6 == self.af else 0, + self.nbr_addr, is_add=0, - is_static=self.is_static) + flags=self.flags) + + def is_static(self): + e = VppEnum.vl_api_ip_neighbor_flags_t + return (self.flags & e.IP_API_NEIGHBOR_FLAG_STATIC) def query_vpp_config(self): return find_nbr(self._test, self.sw_if_index, self.nbr_addr, - self.is_static, - self.af) + self.is_static()) def __str__(self): return self.object_id() diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 1ca9a89a2eaa..249288b4f7f1 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1,25 +1,12 @@ -import fnmatch import os import time from collections import deque from six import moves -from vpp_papi import mac_pton +from vpp_papi import VPP, mac_pton from hook import Hook from vpp_l2 import L2_PORT_TYPE -# Sphinx creates auto-generated documentation by importing the python source -# files and collecting the docstrings from them. The NO_VPP_PAPI flag allows -# the vpp_papi_provider.py file to be importable without having to build -# the whole vpp api if the user only wishes to generate the test documentation. - -try: - from vpp_papi import VPP -except ImportError: - if not os.getenv("NO_VPP_PAPI") == 1: - raise - pass - # from vnet/vnet/mpls/mpls_types.h MPLS_IETF_MAX_LABEL = 0xfffff MPLS_LABEL_INVALID = MPLS_IETF_MAX_LABEL + 1 @@ -69,7 +56,7 @@ class VppPapiProvider(object): _zero, _negative = range(2) def __init__(self, name, shm_prefix, test_class, read_timeout): - self.hook = Hook("vpp-papi-provider") + self.hook = Hook(test_class) self.name = name self.shm_prefix = shm_prefix self.test_class = test_class @@ -350,9 +337,9 @@ def set_ip_flow_hash(self, 'reverse': reverse, 'is_ipv6': is_ip6}) - def ip6_nd_proxy(self, address, sw_if_index, is_del=0): + def ip6_nd_proxy(self, ip, sw_if_index, is_del=0): return self.api(self.papi.ip6nd_proxy_add_del, - {'address': address, + {'ip': ip, 'sw_if_index': sw_if_index, 'is_del': is_del}) @@ -380,8 +367,10 @@ def ip6_sw_interface_ra_prefix(self, pref_lifetime=0xffffffff): return self.api(self.papi.sw_interface_ip6nd_ra_prefix, {'sw_if_index': sw_if_index, - 'address': address, - 'address_length': address_length, + 'prefix': { + 'address': address, + 'address_length': address_length, + }, 'use_default': use_default, 'no_advertise': no_advertise, 'off_link': off_link, @@ -503,16 +492,16 @@ def bd_ip_mac_dump(self, bd_id): return self.api(self.papi.bd_ip_mac_dump, {'bd_id': bd_id}) - def want_ip4_arp_events(self, enable_disable=1, address=0): + def want_ip4_arp_events(self, enable_disable=1, ip="0.0.0.0"): return self.api(self.papi.want_ip4_arp_events, {'enable_disable': enable_disable, - 'address': address, + 'ip': ip, 'pid': os.getpid(), }) - def want_ip6_nd_events(self, enable_disable=1, address=0): + def want_ip6_nd_events(self, enable_disable=1, ip="::"): return self.api(self.papi.want_ip6_nd_events, {'enable_disable': enable_disable, - 'address': address, + 'ip': ip, 'pid': os.getpid(), }) def want_ip6_ra_events(self, enable_disable=1): @@ -1018,32 +1007,28 @@ def ip6_fib_dump(self): def ip_neighbor_add_del(self, sw_if_index, mac_address, - dst_address, + ip_address, is_add=1, - is_ipv6=0, - is_static=0, - is_no_adj_fib=0, - ): + flags=0): """ Add neighbor MAC to IPv4 or IPv6 address. :param sw_if_index: :param mac_address: :param dst_address: :param is_add: (Default value = 1) - :param is_ipv6: (Default value = 0) - :param is_static: (Default value = 0) - :param is_no_adj_fib: (Default value = 0) + :param flags: (Default value = 0/NONE) """ return self.api( self.papi.ip_neighbor_add_del, - {'sw_if_index': sw_if_index, - 'is_add': is_add, - 'is_ipv6': is_ipv6, - 'is_static': is_static, - 'is_no_adj_fib': is_no_adj_fib, - 'mac_address': mac_address, - 'dst_address': dst_address - } + { + 'is_add': is_add, + 'neighbor': { + 'sw_if_index': sw_if_index, + 'flags': flags, + 'mac_address': mac_address, + 'ip_address': ip_address + } + } ) def ip_neighbor_dump(self, @@ -1063,9 +1048,9 @@ def ip_neighbor_dump(self, ) def proxy_arp_add_del(self, - low_address, - hi_address, - vrf_id=0, + low, + hi, + table_id=0, is_add=1): """ Config Proxy Arp Range. @@ -1078,9 +1063,9 @@ def proxy_arp_add_del(self, self.papi.proxy_arp_add_del, {'proxy': { - 'vrf_id': vrf_id, - 'low_address': low_address, - 'hi_address': hi_address, + 'table_id': table_id, + 'low': low, + 'hi': hi, }, 'is_add': is_add}) @@ -3359,6 +3344,9 @@ def ipsec_spd_add_del(self, spd_id, is_add=1): self.papi.ipsec_spd_add_del, { 'spd_id': spd_id, 'is_add': is_add}) + def ipsec_spds_dump(self): + return self.api(self.papi.ipsec_spds_dump, {}) + def ipsec_interface_add_del_spd(self, spd_id, sw_if_index, is_add=1): """ IPSEC interface SPD add/del - \ Wrapper to associate/disassociate SPD to interface in VPP @@ -3375,7 +3363,12 @@ def ipsec_interface_add_del_spd(self, spd_id, sw_if_index, is_add=1): self.papi.ipsec_interface_add_del_spd, {'spd_id': spd_id, 'sw_if_index': sw_if_index, 'is_add': is_add}) - def ipsec_sad_add_del_entry(self, + def ipsec_spd_interface_dump(self, spd_index=None): + return self.api(self.papi.ipsec_spd_interface_dump, + {'spd_index': spd_index if spd_index else 0, + 'spd_index_valid': 1 if spd_index else 0}) + + def ipsec_sad_entry_add_del(self, sad_id, spi, integrity_algorithm, @@ -3385,12 +3378,8 @@ def ipsec_sad_add_del_entry(self, protocol, tunnel_src_address='', tunnel_dst_address='', - is_tunnel=1, - is_tunnel_ipv6=0, - is_add=1, - udp_encap=0, - use_anti_replay=0, - use_extended_sequence_number=0): + flags=0, + is_add=1): """ IPSEC SA add/del :param sad_id: security association ID :param spi: security param index of the SA in decimal @@ -3407,27 +3396,35 @@ def ipsec_sad_add_del_entry(self, crypto and ipsec algorithms """ return self.api( - self.papi.ipsec_sad_add_del_entry, - {'sad_id': sad_id, - 'spi': spi, - 'tunnel_src_address': tunnel_src_address, - 'tunnel_dst_address': tunnel_dst_address, - 'protocol': protocol, - 'integrity_algorithm': integrity_algorithm, - 'integrity_key_length': len(integrity_key), - 'integrity_key': integrity_key, - 'crypto_algorithm': crypto_algorithm, - 'crypto_key_length': len(crypto_key) if crypto_key is not None - else 0, - 'crypto_key': crypto_key, - 'is_add': is_add, - 'is_tunnel': is_tunnel, - 'is_tunnel_ipv6': is_tunnel_ipv6, - 'udp_encap': udp_encap, - 'use_extended_sequence_number': use_extended_sequence_number, - 'use_anti_replay': use_anti_replay}) + self.papi.ipsec_sad_entry_add_del, + { + 'is_add': is_add, + 'entry': + { + 'sad_id': sad_id, + 'spi': spi, + 'tunnel_src': tunnel_src_address, + 'tunnel_dst': tunnel_dst_address, + 'protocol': protocol, + 'integrity_algorithm': integrity_algorithm, + 'integrity_key': { + 'length': len(integrity_key), + 'data': integrity_key, + }, + 'crypto_algorithm': crypto_algorithm, + 'crypto_key': { + 'length': len(crypto_key), + 'data': crypto_key, + }, + 'flags': flags, + } + }) + + def ipsec_sa_dump(self, sa_id=None): + return self.api(self.papi.ipsec_sa_dump, + {'sa_id': sa_id if sa_id else 0xffffffff}) - def ipsec_spd_add_del_entry(self, + def ipsec_spd_entry_add_del(self, spd_id, sa_id, local_address_start, @@ -3466,24 +3463,33 @@ def ipsec_spd_add_del_entry(self, :param is_add: (Default value = 1) """ return self.api( - self.papi.ipsec_spd_add_del_entry, - {'spd_id': spd_id, - 'sa_id': sa_id, - 'local_address_start': local_address_start, - 'local_address_stop': local_address_stop, - 'remote_address_start': remote_address_start, - 'remote_address_stop': remote_address_stop, - 'local_port_start': local_port_start, - 'local_port_stop': local_port_stop, - 'remote_port_start': remote_port_start, - 'remote_port_stop': remote_port_stop, - 'is_add': is_add, - 'protocol': protocol, - 'policy': policy, - 'priority': priority, - 'is_outbound': is_outbound, - 'is_ipv6': is_ipv6, - 'is_ip_any': is_ip_any}) + self.papi.ipsec_spd_entry_add_del, + { + 'is_add': is_add, + 'entry': + { + 'spd_id': spd_id, + 'sa_id': sa_id, + 'local_address_start': local_address_start, + 'local_address_stop': local_address_stop, + 'remote_address_start': remote_address_start, + 'remote_address_stop': remote_address_stop, + 'local_port_start': local_port_start, + 'local_port_stop': local_port_stop, + 'remote_port_start': remote_port_start, + 'remote_port_stop': remote_port_stop, + 'protocol': protocol, + 'policy': policy, + 'priority': priority, + 'is_outbound': is_outbound, + 'is_ip_any': is_ip_any + } + }) + + def ipsec_spd_dump(self, spd_id, sa_id=0xffffffff): + return self.api(self.papi.ipsec_spd_dump, + {'spd_id': spd_id, + 'sa_id': sa_id}) def ipsec_tunnel_if_add_del(self, local_ip, remote_ip, local_spi, remote_spi, crypto_alg, local_crypto_key, @@ -3599,7 +3605,7 @@ def gbp_endpoint_dump(self): return self.api(self.papi.gbp_endpoint_dump, {'_no_type_conversion': True}) - def gbp_endpoint_group_add(self, epg, bd, + def gbp_endpoint_group_add(self, epg, sclass, bd, rd, uplink_sw_if_index): """ GBP endpoint group Add """ return self.api(self.papi.gbp_endpoint_group_add, @@ -3608,7 +3614,8 @@ def gbp_endpoint_group_add(self, epg, bd, 'uplink_sw_if_index': uplink_sw_if_index, 'bd_id': bd, 'rd_id': rd, - 'epg_id': epg + 'epg_id': epg, + 'sclass': sclass }}) def gbp_endpoint_group_del(self, epg): @@ -3622,7 +3629,8 @@ def gbp_endpoint_group_dump(self): def gbp_bridge_domain_add(self, bd_id, flags, bvi_sw_if_index, - uu_fwd_sw_if_index): + uu_fwd_sw_if_index, + bm_flood_sw_if_index): """ GBP bridge-domain Add """ return self.api(self.papi.gbp_bridge_domain_add, {'bd': @@ -3630,6 +3638,7 @@ def gbp_bridge_domain_add(self, bd_id, flags, 'flags': flags, 'bvi_sw_if_index': bvi_sw_if_index, 'uu_fwd_sw_if_index': uu_fwd_sw_if_index, + 'bm_flood_sw_if_index': bm_flood_sw_if_index, 'bd_id': bd_id }})