<?xml version="1.0" encoding="utf-8"?>
<!-- 
     draft-rfcxml-general-template-standard-00
  
     This template includes examples of the most commonly used features of RFCXML with comments 
     explaining how to customise them. This template can be quickly turned into an I-D by editing 
     the examples provided. Look for [REPLACE], [REPLACE/DELETE], [CHECK] and edit accordingly.
     Note - 'DELETE' means delete the element or attribute, not just the contents.
     
     Documentation is at https://authors.ietf.org/en/templates-and-schemas
-->
<?xml-model href="rfc7991bis.rnc"?>  <!-- Required for schema validation and schema-aware editing -->
<!-- <?xml-stylesheet type="text/xsl" href="rfc2629.xslt" ?> -->
<!-- This third-party XSLT can be enabled for direct transformations in XML processors, including most browsers -->


<!DOCTYPE rfc [
  <!ENTITY nbsp    "&#160;">
  <!ENTITY zwsp   "&#8203;">
  <!ENTITY nbhy   "&#8209;">
  <!ENTITY wj     "&#8288;">
]>
<!-- If further character entities are required then they should be added to the DOCTYPE above.
     Use of an external entity file is not recommended. -->

<rfc
  xmlns:xi="http://www.w3.org/2001/XInclude"
  category="info"
  docName="draft-samizadeh-bmwg-cni-benchmarking-01"
  ipr="trust200902"
  obsoletes=""
  updates=""
  submissionType="IETF"
  xml:lang="en"
  version="3">
  
<!-- [REPLACE] 
       * docName with name of your draft
     [CHECK] 
       * category should be one of std, bcp, info, exp, historic
       * ipr should be one of trust200902, noModificationTrust200902, noDerivativesTrust200902, pre5378Trust200902
       * updates can be an RFC number as NNNN
       * obsoletes can be an RFC number as NNNN 
-->

  <front>
    <title abbrev="CNI Telco-Cloud Benchmarking">CNI Telco-Cloud Benchmarking Considerations</title>
    <!--  [REPLACE/DELETE] abbrev. The abbreviated title is required if the full title is longer than 39 characters -->

    <seriesInfo name="Internet-Draft" value="draft-samizadeh-bmwg-cni-benchmarking-01"/>
   
    <author fullname="Tina Samizadeh" initials="T." surname="Samizadeh">
      <!-- [CHECK]
             * initials should not include an initial for the surname
             * role="editor" is optional -->
    <!-- Can have more than one author -->
      
    <!-- all of the following elements are optional -->
      <organization>fortiss GmbH</organization>
      <address>
        <postal>
          <!-- Reorder these if your country does things differently -->
          <street>Guerickestr. 25</street>
          <city>Munich</city>
          <code>80805</code>
          <country>DE</country>
          <!-- Uses two letter country code -->
        </postal>        
        <email>samizadeh@fortiss.org</email>  
        <!-- Can have more than one <email> element -->
       
      </address>
    </author>

    <author fullname="George Koukis" initials="G." surname="Koukis">
      <!-- [CHECK]
             * initials should not include an initial for the surname
             * role="editor" is optional -->
    <!-- Can have more than one author -->
      
    <!-- all of the following elements are optional -->
      <organization>ATHENA RC</organization>
      <address>
        <postal>
          <!-- Reorder these if your country does things differently -->
          <street>University Campus South Entrance</street>
          <city>Xanthi</city>
          <code>67100</code>
          <country>Greece</country>
          <!-- Uses two letter country code -->
        </postal>        
        <email>George.Koukis@athenarc.gr</email>  
        <!-- Can have more than one <email> element -->
        <uri></uri>
      </address>
    </author>
    
     <author fullname="Rute C. Sofia" initials="R." surname="C. Sofia">
      <!-- [CHECK]
             * initials should not include an initial for the surname
             * role="editor" is optional -->
    <!-- Can have more than one author -->
      
    <!-- all of the following elements are optional -->
      <organization>fortiss GmbH</organization>
      <address>
        <postal>
          <!-- Reorder these if your country does things differently -->
          <street>Guerickestr. 25</street>
          <city>Munich</city>
          <code>80805</code>
          <country>DE</country>
          <!-- Uses two letter country code -->
        </postal>        
        <email>sofia@fortiss.org</email>  
        <!-- Can have more than one <email> element -->
        <uri>www.rutesofia.com</uri>
      </address>
    </author>
    
     <author fullname="Lefteris Mamatas" initials="L." surname="Mamatas">
      <!-- [CHECK]
             * initials should not include an initial for the surname
             * role="editor" is optional -->
    <!-- Can have more than one author -->
      
    <!-- all of the following elements are optional -->
      <organization>University of Macedonia</organization>
      <address>
        <postal>
          <!-- Reorder these if your country does things differently -->
          <street>Egnatias 156</street>
          <city>Thessaloniki</city>
          <code>54636</code>
          <country>Greece</country>
          <!-- Uses two letter country code -->
        </postal>        
        <email>emamatas@uom.edu.gr</email>  
        <!-- Can have more than one <email> element -->
        <uri></uri>
      </address>
    </author>
    
      <author fullname="Vassilis Tsaoussidis" initials="V." surname="Tsaoussidis">
      <!-- [CHECK]
             * initials should not include an initial for the surname
             * role="editor" is optional -->
    <!-- Can have more than one author -->
      
    <!-- all of the following elements are optional -->
      <organization>ATHENA RC</organization>
      <address>
        <postal>
          <!-- Reorder these if your country does things differently -->
          <street>University Campus South Entrance</street>
          <city>Xanthi</city>
          <code>67100</code>
          <country>Greece</country>
          <!-- Uses two letter country code -->
        </postal>        
        <email>vassilis.tsaoussidis@gmail.com</email>  
        <!-- Can have more than one <email> element -->
        <uri></uri>
      </address>
    </author>
   

   
     <date day="20" month="October" year="2025" />
    <!-- On draft subbmission:
         * If only the current year is specified, the current day and month will be used.
         * If the month and year are both specified and are the current ones, the current day will
           be used
         * If the year is not the current one, it is necessary to specify at least a month and day="1" will be used.
    -->


    <area>Operations and Management Area</area>
    <workgroup>Benchmarking Methodology Working Group</workgroup>
    <keyword>Internet-Draft</keyword>
    <keyword>CNI</keyword>
    <keyword>SDN</keyword>
    <keyword>Edge-Cloud</keyword>

    <abstract>
      <t> 
      This document investigates benchmarking methodologies for Kubernetes Container Network Interfaces (CNIs) in Edge-to-Cloud environments. It defines performance, scalability, and observability metrics relevant to CNIs, and aligns with the goals of the IETF Benchmarking Methodology Working Group (BMWG). The document surveys current practices, introduces a repeatable benchmarking frameworks (e.g., CODEF), and proposes a path toward standardized, vendor-neutral benchmarking procedures for evaluating CNIs in microservice-oriented, distributed infrastructures.
     </t>
   </abstract>
    
  </front>

  <middle>
    
    <section>
      <name>Introduction</name>
      <t>     
     This document presents an initial exploration of benchmarking methodologies for Kubernetes Container Network Interfaces (CNIs) in Edge-to-Cloud environments. It evaluates the performance characteristics of common Kubernetes networking plugins such as Multus, Calico, Cilium, and Flannel within the scope of container orchestration platforms. The draft aims to align with the principles of the IETF Benchmarking Methodology Working Group (BMWG) by proposing a framework for repeatable, comparable, and vendor-neutral benchmarking of CNIs. Emphasis is placed on performance aspects relevant to Software Defined Networking (SDN) architectures and distributed deployments. The goal is to inform the development of formal benchmarking procedures tailored to CNIs in heterogeneous infrastructure scenarios.
</t>
    </section>
      
      <section>
        <name>Requirements Language</name>
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL",
          "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT
          RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
          interpreted as described in BCP 14 <xref target="RFC2119"/>
          <xref target="RFC8174"/> when, and only when, they appear in
          all capitals, as shown here.</t>
      </section>
      <!-- [CHECK] The 'Requirements Language' section is optional -->

<section anchor="statement">
  <name>Problem Statement and Alignment with BMWG Goals</name>
  <t>
    BMWG proposes and debates methodologies and metrics to evaluate performance characteristics of networking devices and systems in a repeatable, vendor-neutral, and interoperable manner. While multiple Kubernetes CNI solutions exist and are critical to Kubernetes networking-and by extension, to telco-cloud networking-there is currently no standardized methodology for benchmarking their performance, resource utilization, or behavior under varying operational conditions. The absence of such standards leads to non-reproducible, vendor-specific results that are difficult to compare or rely on for deployment decisions in edge-cloud contexts.
  </t>

  <t>
    This document aligns with BMWG goals by proposing benchmarking considerations for Kubernetes Container Network Interface (CNI) plugins that adhere to the following principles:
  </t>

  <ul>
    <li>Repeatability and Reproducibility: The draft emphasizes deterministic test environments by leveraging clean-slate container orchestration through automation frameworks such as the experimental open-source Cognitive Decentralised Edge Cloud (CODECO) <xref target="codeco_d10"/> and the Experimentation Framework (CODEF) <xref target="codef"/>. Test cases are repeatable across deployments, and variability in underlying infrastructure (e.g., bare metal vs. virtualized environments) is explicitly documented to preserve reproducibility, following BMWG best practices <xref target="RFC2544"/> and <xref target="RFC7312"/>.</li>

    <li>Vendor-Neutral Evaluation: The proposed approach includes a diverse set of CNIs from multiple vendors and open-source communities, avoiding platform-specific optimizations. CNIs are evaluated under the same environmental and workload conditions to provide fair comparisons, consistent with BMWG's commitment to vendor-agnostic test procedures.</li>

    <li>Metrics-Based Assessment: The document adopts classical benchmarking metrics including latency, throughput, jitter, and resource consumption (CPU, memory), extending them with CNI-relevant attributes such as pod network initialization time and observability overhead. These metrics are aligned with performance evaluation goals outlined in <xref target="RFC1242"/>, <xref target="RFC2285"/>, and more recent benchmarking efforts for virtualized environments <xref target="RFC8172"/>.</li>

    <li>Applicability to Emerging Architectures: The targeted environment includes Edge-to-Cloud deployments, which represent modern distributed system architectures. While BMWG has historically focused on network appliances, this work extends those principles to the networking aspects of containerized and software-defined infrastructures, continuing the evolution of benchmarking methods to address dynamic, microservice-based platforms.</li>

    <li>Traffic and Control Plane Separation: Following BMWG precedent (e.g., <xref target="RFC6808"/>), the methodology distinguishes between control-plane operations (e.g., pod deployment and CNI setup latency) and data-plane behavior (e.g., packet forwarding performance), allowing comprehensive benchmarking of CNIs across operational dimensions.</li>

    <li>Scalability and Stress Testing: The methodology incorporates stress and scalability scenarios, consistent with goals in <xref target="ietf-bmwg-07"/>, to uncover performance degradation points and assess operational resilience of CNIs under heavy load and fault conditions.</li>


    <li>Model Reference: CNIs in Kubernetes follow the models described in <xref target="RFC6808"/>.</li>
  </ul>

  <t>
    This alignment ensures that future extensions of this document toward a formal benchmarking specification can be scoped within the BMWG charter and contribute to standardized practices for container network evaluation.
  </t>

  <section anchor="abbreviations">
    <name>Abbreviations</name>
    
      <ul>
        <li>CNI: Container Network Interface</li>
        <li>SUT: System Under Test</li>
        <li>DUT: Device Under Test</li>
        <li>SDN: Software Defined Networking</li>
        <li>OVS: Open vSwitch</li>
        <li>OVN: Open Virtual Network</li>
        <li>RTT: Round-Trip Time</li>
        <li>eBPF: Extended Berkeley Packet Filter</li>
        <li>ENI: Elastic Network Interface</li>
        <li>QoE: Quality of Experience</li>
      </ul>
    
  </section>

  <section anchor="metrics-scope">
    <name>Scope of Metrics</name>
    <t>
      The core benchmarking metrics in this document, such as latency, throughput, jitter, packet loss, and pod lifecycle time, are aligned with BMWG practices. Additional metrics such as resource usage, energy efficiency, and operational ease are included to reflect real-world operator concerns but are considered informational and outside the core BMWG scope.
    </t>
  </section>
</section>


 

<section anchor="CNI-BenchP">
<name>CNI Benchmarking Key Aspects</name>
<t>
While several performance-benchmarking suites are already available from CNI providers <xref target="cilium-bench"/>, the open-source community <xref target="TNSM21-cni"/>, and also in the IETF BMWG <xref target="ietf-bmwg-07"/>, a comprehensive CNI evaluation SHOULD incorporate relevant performance metrics, scalability aspects and identify bottlenecks. This section provides a view on relevant aspects to ensure reliable and replicable performance evaluation, considering aspects that are relevant from a telco-cloud perspective.
</t>

     
<section anchor="CNI-Performance">
<name>Core Performance Metrics for CNI Benchmarking </name>
<t>
Considering the architecture of microservice-based applications, microservices may interact with each other and external services. Having containerized applications and orchestration platforms like Kubernetes, there is a continuous need to address communication and networking as Kubernetes doesn't handle networking itself. Moreover, communication between containers is extremely important to meet QoS requirements of applications. To evaluate the performance of CNIs there are several metrics that should be taken into account including network throughput, end-to-end latency, pod setup and deletion times, CPU and Memory utilization, etc. 

This section defines the core benchmarking metrics used to assess the performance of Container Network Interface (CNI) plugins in Kubernetes environments. The metrics conform to the standard benchmarking framework set forth in <xref target="RFC2544"/>, <xref target="RFC1242"/>, <xref target="RFC8172"/>, and are extended where necessary to include container-specific control-plane considerations. Measurements MUST be conducted under controlled conditions as described in Section 8, and SHOULD include both steady-state and dynamic workloads.
</t>

<section anchor="CNI-QoS-data-plane">
<name>Data Plane Performance Metrics</name>
<t> Benchmarking Quality of Service (QoS) for CNI plugins typically focuses on traditional performance metrics such as one-way latency, round-trip delay, packet loss, jitter, and achievable data rates under varied network conditions. These metrics are fundamental to assessing the efficiency and responsiveness of a CNI in both intra-cluster and inter-cluster communication scenarios. To ensure comprehensive evaluation, the benchmarking methodology SHOULD include tests using multiple transport protocols, primarily TCP and UDP. This is essential, as CNI plugins may exhibit significantly different performance profiles depending on the protocol type due to variations in connection setup, flow control, and packet processing overhead.  For TCP, two key test modes are RECOMMENDED:</t>
 <ul > 
 <li>TCP_RR (Request/Response): Measures the rate at which application-layer request/response pairs can be exchanged over a persistent TCP connection. This reflects transaction latency under connection reuse scenarios.</li>
  <li>TCP_CRR (Connect/Request/Response): Assesses the rate at which new TCP connections can be established, used for a request/response exchange, and torn down. This test exposes connection setup overhead and potential scalability bottlenecks.</li>
 </ul> 
<t> For UDP, the benchmark SHOULD include  UDP_RR testing, which captures round-trip time (RTT), latency variation (jitter), and packet loss characteristics under lightweight, connectionless exchanges. 

In all tests, the benchmarking suite MUST include a representative range of payload sizes, including at least 64 bytes, 512 bytes, and 1500 bytes. If supported by the underlying network and CNI plugin, jumbo frames (e.g., MTU > 1500 bytes) SHOULD also be tested to expose potential fragmentation penalties and their impact on latency, jitter, and throughput. 

These metrics evaluate the efficiency of packet forwarding and transport under varying traffic patterns, and are REQUIRED:</t>
 <ul> 
  <li>One-Way Latency (ms) SHOULD be measured using timestamped probes <xref target="RFC1242"/>.</li>
  <li>RTT (ms) SHOULD be measured via TCP_RR, TCP_CRR, and UDP_RR test modes. <xref target="RFC2544"/>.</li>
  <li>Throughput (Mbps or Gbps) SHOULD be assessed via the highest sustained rate of succesful packet delivery for the CNI without packet loss <xref target="RFC2544"/>.</li>
  <li>Packet loss rate (%) SHOULD be considered for reliability and congestion tolerance of the CNI <xref target="RFC2544"/>.</li>
  <li>Jitter MAY be relevant to assess variability. High jitter may indicate queuing inefficiencies or variable path latency <xref target="RFC5481"/>.</li>
  <li>Packet size variability SHALL be evaluated using a representative set of frame sizes (64B, 512B, 1500B). If jumbo frames (>1500B) are supported, testing MUST include these cases to expose fragmentation overheads <xref target="RFC2544"/>.</li>
  <li>Concurrent flow handling SHOULD be measured using concurrent connections and sustained request/response patterns for both TCP and UDP <xref target="RFC2285"/>.</li>
  </ul>
</section>

<section anchor="CNI-QoS-control-plane">
<name>Control Plane Performance Metrics</name>
<t> These metrics evaluate the responsiveness of the CNI plugin and Kubernetes components during pod and network lifecycle operations and are REQUIRED:</t>
 
 <ul> 
  <li>Pod initialization time (s) SHOULD be measured from kubelet interaction to completion of CNI ADD operation <xref target="RFC8172"/>.</li>
  <li>Pod deletion time (s) SHOULD be measured to understand issues with tear down <xref target="RFC8172"/>.</li>
  <li>CNI plugin deployment time (s) SHOULD be assessed, to understand the duration required for each CNI plugin to be fully deployed across the whole network (cluster nodes).</li>
  </ul>
</section>

<section anchor="CNI-QoS-system-plane">
<name>System Resource Performance Metrics</name>
<t>These metrics are essential in resource-constrained environments (e.g., edge deployments) where efficiency impacts scalability and are RECOMMENDED:</t>
 
 <ul> 
  <li>CPU/GPU utilization SHOULD be reported per node and per CNI process <xref target="RFC8172"/>.</li>
  <li>Memory utilization (MB/GB) measurements MUST consider average and peak memory used by the CNI <xref target="RFC8172"/>.</li>
  <li>CNIs SHOULD be evaluated under varying load conditions (idle, low-traffic, high traffic).</li>
  </ul>
  <t> The CPU and memory footprint of a Container Network Interface (CNI) plugin has substantial implications for workload density and system scalability, especially in resource-constrained or heterogeneous environments. In modern Edge-to-Cloud deployments often comprising diverse processor architectures (e.g., ARM64, AMD64) and variable memory constraints resource efficiency is critical to maximizing node utilization and sustaining performance. 

The architectural design of a CNI directly affects its resource profile. CNIs with extensive feature sets and complex data-plane capabilities such as policy enforcement, encryption, overlay encapsulation (e.g., VXLAN, IP-in-IP), or eBPF/XDP acceleration tend to exhibit higher CPU and memory consumption. For example, CNIs that perform user-space packet processing typically incur higher overhead, as each packet traverses the kernel-user boundary multiple times, resulting in increased CPU cycles and memory copies <xref target="RFC8172"/>. In contrast, in-kernel eBPF-based processing can reduce such overhead by executing directly in the Linux kernel <xref target="RFC9315"/>. 

In cloud-native deployments, CNIs that manage external interfaces (e.g., Elastic Network Interfaces (ENIs) in public cloud environments) may also introduce persistent memory usage due to API caching, state tracking, and metadata management <xref target="aws-vpc-cni-docs"/>. These variabilities are further amplified under dynamic workloads. It is frequently observed that a CNI optimized for high-throughput TCP bulk traffic may perform suboptimally under UDP-heavy traffic, high pod churn, or policy-intensive workloads. These behavioral differences necessitate a systematic and multi-dimensional benchmarking approach. 

 Accordingly, a robust benchmarking methodology SHOULD assess each CNI under at least three operating states: idle, low-traffic (and low load), high traffic (and high load). Such profiling enables the identification of baseline resource usage, saturation thresholds, and degradation points ("performance peaks"). Measurements SHOULD be taken at both the node level (e.g., using Prometheus <xref target="prometheus-docs"/>) and at the container or pod level (e.g., using cAdvisor <xref target="cadvisor-docs"/>). These practices are consistent with recommendations for virtualized and cloud-native benchmarking environments as described in <xref target="RFC8172"/>. </t>
</section>
</section>

<section anchor="CNI-QoS-optional">
<name>Extended  Performance Metrics (Optional) </name>
<t>While outside the core BMWG scope, these metrics reflect real-world operator needs and may be included for extended analysis, in particular for edge-cloud heterogeneous and resource constrained scenarios. As such, the following metrics are RECOMMENDED:</t>
 
 <ul> 
  <li>Policy enforcement delay (ms)</li>
  <li>Telemetry overhead.</li>
  <li>Power and energy consumption (J per bit).Where applicable, node- or pod-level energy usage MAY be reported using tools such as Kepler . Results SHOULD include error margins due to estimation variance, or energy models.</li>
  </ul>
  
 <t>While not core to BMWG benchmarking, and currently non-nomartive, energy metrics MAY be collected where relevant. Tools such as Kepler MAY be used, but results SHOULD be accompanied by a disclaimer about accuracy limitations in virtualized environments, and also on issues related with the applied energy models. A related discussion on energy metrics and energy-sensitivity can be found in IETF GREEN, <xref target="draft-ea-ds"/>, and in the IRTF NMRG <xref target="I-D.irtf-nmrg-energy-aware"/>, as well as in IRTF SUSTAIN.</t>
</section>

<section anchor="CNI-QoE">
<name>Extended Quality of Experience for DevOps and Developers (Optional)</name>
<t>

Quality of Experience (QoE) benchmarking for Container Network Interface (CNI) plugins extends beyond conventional network performance metrics such as latency and throughput. It focuses on assessing operational usability, deployment efficiency, and portability, i.e., factors that directly affect the user experience of platform administrators, DevOps engineers, and developers.  For instance, time to deploy or configure the CNI, ease of troubleshooting, and impact of the CNI on application performance are examples of QoE parameters. 

Key QoE indicators OPTIONAL MAY include:  </t>
<ul>
<li>Deployment time, the time required to install or upgrade a CNI plugin using declarative tooling (e.g., Helm charts, YAML manifests).</li>
<li>Configuration simplicity, the extent to which configuration is automated, validated, and integrated with Kubernetes-native workflows.</li>
<li>Troubleshooting tooling, the presence of purpose-built CLI utilities that simplify diagnostics, expose internal CNI state, and reduce reliance on low-level log inspection or manual kubectl commands.</li> 
</ul> 
<t> For example, CNI-specific command-line interfaces such as cillium and calicoctl provide capabilities such as one-command installation, real-time policy and connectivity status, and automated diagnostics. The cillium status --verbose command provides IPAM allocations, agent health, and datapath metrics, while the calicoctl node diags generates complete diagnostic bundles for analysis. 

CNI integration with Kubernetes distribution CLIs (e.g., k3s, MicroK8s) further improves QoE by streamlining lifecycle operations. For instance, MicroK8s leverages snap-based add-ons that can enable or disable CNIs via a single command, reducing complexity and configuration drift.Although these attributes are not part of the core benchmarking metrics defined by BMWG, their inclusion is RECOMMENDED to reflect practical DevOps concerns and enhance the applicability of CNI benchmarking results in production environments. </t>

</section>





<section anchor="CNI-interoperability">
<name>Interoperability and Scalability</name>
<t>
To ensure comprehensive benchmarking coverage, scalability and stress-testing phases SHOULD be incorporated into the evaluation methodology. These phases are essential to identify the performance ceilings of a given CNI plugin and to assess its behavior under saturation conditions, including whether key observability features remain functional. Such assessments are consistent with guidance outlined in <xref target="RFC8239"/> and extend benchmarking scope beyond nominal operation to failure and recovery modes.

Stress tests SHOULD simulate high-load scenarios by concurrently scaling multiple Kubernetes components. This includes initiating rapid pod-creation bursts, deploying multiple concurrent services and network policies, and triggering controlled resource exhaustion events (e.g., CPU throttling, memory pressure, disk I/O contention). Furthermore, network issues such as increased latency, jitter, or packet loss SHOULD be introduced using tools like <xref target="tc-netem"/> to assess the CNI's robustness under adverse network conditions.

The use of orchestration tools such as Kube-Burner <xref target="kube-burner"/> and chaos engineering frameworks (e.g., Chaos Mesh or Litmus) is RECOMMENDED to coordinate scalable and repeatable test scenarios. Network performance metrics during stress tests MAY be collected with traffic generators such as iperf3, netperf, or k6 <xref target="iperf3"/> <xref target="k6"/>. Benchmark results SHOULD include degradation thresholds, error rates, recovery latency, and metrics export consistency under stress to support the evaluation of CNI resilience and operational observability.
</t>
</section>

<section anchor="CNI-observability">
<name>Observability and Bottleneck Detection</name>
<t>
Observability is critical in identifying performance bottlenecks that may arise due to CNI behavior under stress conditions. Benchmarking SHOULD assess the ability of CNIs to expose metrics such as packet drops, queue lengths, or flow counts through standard telemetry interfaces (e.g., Prometheus, OpenTelemetry). Effective bottleneck detection tools and visibility into the data path are essential for root cause analysis. CNIs that provide native observability tooling (e.g., Cilium Hubble) SHOULD be benchmarked for the overhead and fidelity of these features.
In federated or multi-cluster environments, observability becomes a distributed operation spanning multiple control and data planes. Benchmarking MUST therefore evaluate how CNIs and associated telemetry systems aggregate, synchronize, and correlate metrics across clusters. This includes measuring propagation delays, timestamp alignment, and aggregation accuracy when telemetry data flow through federated collectors/monitoring backends (e.g., Prometheus-Thanos, Cortex). Benchmarks SHOULD also assess the ability to localize inter-cluster bottlenecks such as congested tunnels, gateway saturation, or asymmetric routing, distinguishing local clusters from cross-cluster traffic degradation.
</t>
</section>

<section anchor="kubernetes-cni-topologies">
<name>Kubernetes CNI topologies</name>
<t>
Kubernetes CNI topologies refers to patterns of network connectivity in a Kubernetes environment used for testing or benchmarking CNIs <xref target="Kubernetes-docs"/>.
</t>
<ul>
<li>Highly-coupled container-to-container communications</li>
<li>Pod-to-Pod communications</li>
<li>Pod-to-Service communications</li>
<li>External-to-Service communications</li>
</ul>

<t>
The benchmarking network topology must operate as an isolated test environment and MUST NOT connect to any devices that could forward test traffic into a production network or incorrectly route it to the test management network <xref target="RFC8456"/> and <xref target="RFC8204"/>.
</t>

</section>
</section>
<section anchor="cni_multi-cluster">
<name>CNI Behavior in Federated and Multi-Cluster Environments</name>
<t>
While existing works such as <xref target="RFC8172"/>  and <xref target="ietf-bmwg-07"/> provide benchmarking methodologies for virtualized and containerized infrastructures, their scope does not extend to CNI behavior in multi-cluster or federated deployments. Architectural drafts like <xref target="draft-dwon-t2trg-multiedge-arch"/>, <xref target="draft-si-service-mesh-dta"/>, and <xref target="draft-ietf-wimse-workload-identity-practices"/> discuss aspects of multi-cluster operations and security, but do not specify CNI-focused, measurable performance parameters and considerations. Similarly, <xref target="draft-contreras-nmrg-interconnection-intents"/> introduces the notion of multi-cluster service deployment and intent-based interconnection, yet it does not cover CNI-level performance benchmarking across federated clusters.
</t>
<section anchor="overview_of_federated">
<name>Overview of Federated Networking</name>
<t>
Federated and multi-cluster environments extend the scope of container networking beyond single operational domains. These architectures enable scalability, geographical distribution, isolation, and service proximity to end users, which are key properties for multi-domain cloud-native infrastructures. Federated CNI benchmarking is particularly relevant to Telco-Cloud and 6G scenarios, where workloads are distributed between cloud and (far-)edge IoT domains, introducing additional considerations compared to single-cluster deployments.

In such environments, multiple clusters operate as autonomous domains while being interconnected through federation layers or multi-cluster networking mechanisms. Examples include popular third-party solutions such as Submariner, Liqo, Karmada, and Open Cluster Management (OCM), which provide network connectivity, service discovery, and workload scheduling across clusters. In this context, CNIs are often extended by multi-cluster gateways or overlays to facilitate inter-cluster pod-to-pod and service-to-service communication. Such interconnections can rely on encapsulation protocols (e.g., VXLAN, IPSec, WireGuard) or Layer-7 service meshes (e.g., Istio, Linkerd, Consul, Open Service Mesh) - based on Envoy proxy and sidecars.
</t>  
</section>
<section anchor="bench_federated">
<name>Benchmarking Considerations for CNIs in Federated Environmentsg</name>
<t>Benchmarking CNIs in federated deployments MUST explicitly reflect how (i) architectural choices, (ii) topology and connectivity, 
     (iii) overlay and tunneling mechanisms, (iv) synchronization, and (v) security enforcement affect network behavior for both (i) data-plane and 
     (ii) control-plane operations. The following factors are key:</t>
<ul>
<li>Federation and Topology Models: CNIs may operate under hub-and-spoke <xref target="RFC4364"/>, <xref target="RFC7024"/>, neighboring, full-mesh <xref target="RFC4271"/>, <xref target="RFC9181"/>, or hierarchical <xref target="RFC7426"/> topologies. Each model introduces distinct path lengths and potential bottlenecks and security concerns. Benchmarks SHOULD quantify metrics like latency, jitter, and packet loss across these models.</li>
<li>Overlay, Encapsulation, and Encryption Mechanisms: CNIs may rely on native multi-cluster extensions (e.g., Cilium ClusterMesh) or external overlays (e.g., Submariner tunnels) with optional encryption (e.g., IPSec, WireGuard). Tests SHOULD measure the combined encapsulation and cryptographic overhead, including per-packet header size, MTU effects, CPU utilization, and throughput reduction compared to unencrypted baselines.</li>
<li>Routing, Policy, and Synchronization Behavior: CNIs synchronize endpoints, routes, and network policies across clusters. Benchmarking SHOULD measure propagation delay, convergence time, and consistency under dynamic conditions such as node joins, removals, or policy updates. Resource utilization (CPU, memory, and bandwidth) during synchronization SHOULD also be recorded.</li>
<li>Cross-Cluster Connectivity and Load Balancing: Evaluation SHOULD include one-way and RTT latency, throughput, and packet loss between pods located in different clusters. When multi-cluster services distribute requests, benchmarks SHOULD assess fairness as well as responsiveness to endpoint or cluster failures that influence path selection and recovery behavior.</li>
<li>Quality of Service (QoS) and Policy Enforcement: CNIs that implement QoS tagging or traffic shaping (e.g., Cilium's eBPF/EDT-based pacing, Calico's DSCP marking and policy-driven shaping, Antrea's TrafficControl, or Kube-OVN's QoS queues) SHOULD be evaluated for their ability to maintain SLA/SLO across clusters and overlays. Benchmarks SHOULD also verify that isolation and access-control policies (e.g., deny/allow rules) remain consistent across domains.</li>
<li>Resiliency and Recovery Performance: Benchmarking SHOULD assess CNI behavior during multi-cluster fault conditions, including inter-cluster link loss, control-plane failures, restarts, or topology reconfiguration. Measurements SHOULD include reconvergence time, packet loss, and recovery time to steady-state. Benchmarks SHOULD also evaluate route re-establishment latency and transient traffic interruption duration to characterize the CNI's overall fault-tolerance behavior.</li>
</ul>


</section>
</section>

<section anchor="CODEF">
<name>Best Practice Operational Example: CODEF</name>
<t>
CODEF is an open-source, modular benchmarking environment that supports the evaluation of containerized workloads in edge-to-cloud infrastructures. CODEF adopts a microservice-based architecture to streamline experimentation through abstraction, automation, and reproducibility. CODEF is logically divided into four functional layers, each implemented as an independent containerized microservice: Infrastructure Manager, Resource Manager, Experiment Controller, and Results' Processor, as represented in Figure 1. This modular design ensures extensibility and facilitates integration with diverse technologies across the experimentation pipeline. </t>


  <figure>
        <name>CODEF and its components.</name>
       <artset>
        <!-- This <artset> includes two <artwork> elements, each of a different type -->
         <artwork type="ascii-art">
<![CDATA[
  +-------------------------------------------+
  |  CODECO Experimentation Framework (CODEF) |
  +-------------------------------------------+
              |
              v
  +------------------------------------+     
  |  Experiment and Cluster Definition |
  +------------------------------------+                                          
              |
              v
  +------------------------+
  |   Experiment Manager   |
  +------------------------+
          |                   Container                Systems
          | Deploy VMs+OS +---------------+     +-------------------+
          +-------------> | Infrastr Mgrs |---> | physical,VM,cloud |
          |               +---------------+     +-------------------+
          | Deploy Resource Managers per node
          |
          |          Containers
          |      +---------------+    +----------+
          |----> | Resource MgrA |<-->|  Master  |      SW / App
          |      +---------------+    +----------+    +---------+
          |----> | Resource MgrB |<-->|  Worker1 |<-->| Ansible |
          |      +---------------+    +----------+    +---------+
          |----> | Resource MgrC |<-->|  WorkerX |
          |      +---------------+    +----------+
          |
          |                   Container
          | Execute Exper +----------------+    +------------+
          +-------------> | Experiment Ctr |<-->| Iteration, |
          |               +----------------+    | Metrics    |
          |                                     +------------+
          |                      Container      
          | Output Results +-------------------+    +-------------+
          +------------->  | Results Processor |<-->| Processing, |
                           +-------------------+    | Stats, LaTeX|
                                                    +-------------+
]]>
</artwork>

        </artset>
     </figure>
<ul>
<li>The Infrastructure Manager layer provisions cluster resources across heterogeneous environments, including bare-metal nodes, hypervisor-based virtual machines (e.g., VirtualBox, XCP-ng), and public or academic cloud testbeds (e.g., AWS, CloudLab, EdgeNet).</li>
<li>The Resource Manager deploys software components on each node using parameterized Ansible playbooks. A dedicated instance of the Resource Manager operates per node to guarantee consistent, automated software setup.</li>
<li>The Experiment Controller coordinates workload execution, manages experimental iterations, collects measurement data, and invokes benchmarks. </li>
<li>The Results' Processor performs statistical analysis and post-processing to generate structured outputs, including visualization and reporting artifacts.</li>
</ul>
<t>
CODEF supports full automation of the experimentation lifecycle, from cluster instantiation to metric analysis. Each cluster is provisioned from clean operating system images to ensure consistency, repeatability, and environmental isolation across benchmark runs. This approach eliminates state leakage between tests and enhances comparability. The framework also provides low-level parameterization options for various networking and security configurations. These include tunneling and encapsulation mechanisms (e.g., VXLAN, Geneve, IP-in-IP), encryption protocols (e.g., IPsec, WireGuard), and Linux kernel-based datapath acceleration features (e.g., eBPF and XDP). Such flexibility supports the emulation of production-grade deployments across a wide range of container network interfaces (CNIs) and infrastructure types.
</t>

<section anchor="CODEF-Bench">
<name>CODEF Benchmarking and CNI Support</name>
<t>CODEF addresses the need for repeatable, infrastructure-agnostic benchmarking across the edge-to-cloud continuum. It supports a broad spectrum of third-party CNIs plugins, including Antrea  <xref target="antrea"/> , Calico  <xref target="calico"/>, Cilium <xref target="cilium"/>, Flannel <xref target="flannel"/>, Weave Net <xref target="weavenet"/>, Kube-Router <xref target="kube-router"/>, Kube-OVN <xref target="kube-ovn"/>, and Multus, as well as emerging solutions such as L2S-M <xref target="L2S-M"/>. These CNIs can be deployed and benchmarked across multiple Kubernetes distributions, including upstream Kubernetes (vanilla), lightweight variants such as K3s, K0s, and MicroK8s, and production-grade clusters.

Each CNI plugin employs distinct architectural strategies at the network layer, such as underlay versus overlay models, use of encapsulation protocols (e.g., VXLAN, Geneve), encryption mechanisms (e.g., WireGuard, IPsec), and programmable datapaths (e.g., eBPF/XDP). Additionally, the degree of support for network policy enforcement, observability, and integration with Kubernetes-native APIs varies significantly across implementations. These differences introduce variability in performance, scalability, and resource utilization depending on workload and deployment characteristics.

CODEF enables the consistent application of benchmarking procedures across this heterogeneity by offering a unified, declarative methodology. It abstracts infrastructure-specific details and enforces environmental consistency through repeatable provisioning, workload orchestration, and result normalization. Accordingly, any benchmarking methodology targeting CNIs in diverse Kubernetes environments SHOULD account for these dimensions: CNI architecture, Kubernetes distribution, infrastructure type, and test scenario configuration to ensure meaningful, comparable, and reproducible results.

</t>
</section>

<section anchor="CODEF-Env">
<name>Environment Configuration Aspects</name>
<t>

In addition to the functional differences among CNI plugin implementations, benchmarking methodologies SHOULD account for the architectural and physical characteristics of the deployment environment. Key variables include the type of infrastructure such as virtualized environments (e.g., VM or hypervisor-based) versus bare-metal deployments and the test topology, including intra-node (same host) versus inter-node (across hosts) communication. Benchmarks SHOULD also distinguish between distributions designed for general-purpose Kubernetes (e.g., vanilla K8s) and those optimized for constrained edge deployments (e.g., MicroK8s, K3s).

Hardware heterogeneity introduces further variability. Performance results can be significantly influenced by CPU architecture (e.g., x86_64 vs. ARM), number of cores and threads, memory speed and hierarchy, cache layout, NUMA topology, and network interface characteristics (e.g., NIC model, offload capabilities, and firmware version). Low-level system configuration options, including MTU size, tunneling mode (e.g., VXLAN, IP-in-IP), and kernel datapath tuning (e.g., eBPF or XDP parameters), MAY also affect observed performance.

Empirical results from experiments conducted with CODEF under a variety of scenarios including intra- and inter-cluster configurations, hardware with diverse specifications, and a range of Kubernetes distributions demonstrated measurable performance differences across CNI plugins. Notably, significant disparities were observed not only between different CNI implementations, but also within the same CNI when deployed on different Kubernetes distributions or system architectures.

Contrary to expectation, deploying lightweight CNI plugins on edge-optimized distributions does not always result in improved efficiency. In some cases, plugins reduce their resource footprint by sacrificing performance (e.g., selecting a simpler encapsulation mechanism), while others achieve better throughput when paired with more capable general-purpose distributions at the expense of increased overhead. These trade-offs SHOULD be explicitly captured in benchmarking outcomes.

Importantly, the optimal CNI and distribution pairing is often workload-dependent. A configuration that appears suboptimal in terms of raw resource usage MAY outperform a lightweight alternative for certain traffic patterns, application behaviors, or network policies. As such, benchmarking methodologies intended for heterogeneous edge-cloud scenarios, in particular mobile scenarios and IoT scenarios, where embedded devices are a main part of the overall networking infrastructure, SHOULD incorporate these dimensions and evaluate plugin behavior across representative workloads and system conditions.
</t>
</section>

<section anchor="CODEF-measurement">
<name>Measurement Tools</name>
<t>
CODEF relies on Ansible playbooks to provision a suite of software tools supporting both workload generation and measurement. Benchmarking configurations may include lightweight and comprehensive traffic generators such as <xref target="iperf3"/>, <xref target="netperf"/>, and <xref target="sockperf"/>, as well as the <xref target="k8s-bench-suite"/>. These tools enable detailed measurements of network bandwidth, packet throughput, latency, and fragmentation behavior across TCP and UDP protocols, with varying message sizes.

Resource usage metrics such as CPU load, memory consumption, and disk utilization are collected at both node and container granularity. Observability stacks based on Prometheus and Grafana are integrated for real-time metric capture, historical trend visualization, and alerting capabilities. These facilities support traceability of system behavior during experiments and assist in identifying anomalous performance characteristics.

For scalability and resilience benchmarking, CODEF integrates load and stress testing tools such as the CNCF <xref target="kube-burner"/> and chaos engineering platforms (e.g., Chaos Mesh or Litmus). These tools simulate dynamic workloads, rapid pod scaling, and fault injection to evaluate system performance under adverse or bursty conditions. Such orchestrated testing scenarios are essential to reveal bottlenecks, performance degradation points, and recovery latency under operational stress.

Power consumption profiling is optionally supported through empirical estimation models or telemetry-based measurement frameworks such as <xref target="kepler"/>. However, their accuracy SHOULD be evaluated critically, as results may vary depending on the availability and quality of hardware-level counters (e.g., Intel RAPL) and the characteristics of the execution platform, particularly in virtualized or non-Intel environments.
</t>
</section>
</section>

<section anchor="CNI-Methodology">
<name>Kubernetes CNI Benchmarking Telco-Cloud Methodology</name>
<t>
 This section defines a set of best practice guidelines for benchmarking Kubernetes CNI plugins in telco-cloud and edge-clloud environments. The approach is aligned with IETF BMWG, emphasizing reproducibility, transparency, comparability.
The benchmarking recommendations presented herein aim to be applicable across a wide range of deployment scenarios, Kubernetes distributions, and CNI implementations. While selected operational workflows and experiences from CODEF are considered to illustrate practical implementation of these best practices, the methodology itself is designed to remain tool-agnostic and aligned with standardized benchmarking guidance.

The practices focus on controlled environment setup, test repeatability, performance metric collection, observability, and result reporting. Attention is given to relevant characteristics for telco and edge environments, including resource constraints, deployment diversity, and protocol behavior under stress. The goal is to provide a consistent and extensible benchmarking methodology for CNIs operating in dynamic, distributed, and microservice-oriented infrastructure environments.

</t>

<section anchor="CODEF-Test">
<name>Controlled Test Environments</name>
<t>
Benchmarking SHOULD be conducted in isolated testbeds with no extraneous traffic or workloads. The following practices help reduce environmental noise and increase determinism:</t>
<ul>
<li>Use bare-metal or dedicated VMs for benchmarking to avoid cross-tenant interference.</li>
<li>Ensure consistent CPU pinning and disable power-saving features or CPU frequency scaling to stabilize performance measurements.</li>
<li>Synchronize clocks across test nodes using NTP or PTP for accurate latency and jitter measurement.</li>
</ul>
</section>

<section anchor="CODEF-Test1">
<name>Standardized Test Configurations</name>
<t>
Benchmarking SHOULD adhere to pre-defined configurations to enable comparability across CNIs and platforms, aligning with <xref target="RFC2544"/><xref target="RFC6815"/>. The following elements MUST be documented:
</t>
<ul>
<li>Kubernetes version and distribution.</li>
<li>CNI plugin version and configuration parameters.</li>
<li>Kernel version and system tunables (e.g., MTU size, sysctl options).</li>
<li>CPU model, memory size, and network interface type.</li>
</ul>
</section>

<section anchor="CODEF-repeatability">
<name>Test Repeatability and Statistical Significance</name>
<t>
Each experiment SHOULD be repeated a minimum of five times. For latency and throughput metrics, results MUST be reported using:
</t>
<ul>
<li>Minimum, average (median), maximum.</li>
<li>at least 90th, and 95th percentile values.</li>
</ul>

<t>
Furthermore, adequate warm-up times when starting test runs, and cool-down periods between test runs SHOULD be included to prevent thermal bias or residual resource contention. Where possible, automation frameworks (e.g., CODEF, Ansible) SHOULD be used to ensure that each experiment is launched from a clean state.
</t>
</section>

<section anchor="CODEF-traffic">
<name>Traffic Generators, Traffic Models and Load Profiles</name>
<t>
Traffic generators MUST support multiple transport protocols (e.g., TCP, UDP) and varying packet sizes as well as interrarrival packet rates. Benchmarking tools such as iperf3, netperf, and sockperf are RECOMMENDED. For realistic CNI evaluation:
</t>
<ul>
<li>TCP_RR, TCP_CRR, and UDP_RR SHOULD be used to measure latency, jitter, and throughput.</li>
<li>Multiple flows and concurrent connections SHOULD be tested to simulate microservice interactions.</li>
</ul>
<t>
Benchmarks SHOULD include traffic profiles reflecting real-world microservice communications, such as:</t>
<ul>
<li>Short-lived TCP connections (request/response.</li>
<li>Persistent streaming (large payloads, high throughput).</li>
<li>Burst UDP traffic for latency and packet loss analysis.</li>
</ul>
</section>

<section anchor="CODEF-workload-sim">
<name>Workload Simulation, Emulation, and Stress Testing</name>
<t>
To evaluate performance under real-world loads, benchmarking MUST include scenarios with:
</t>
<ul>
<li>Small, average, high pod churn rates (creation/deletion).</li>
<li>Concurrent service access and policy enforcement.</li>
<li>Synthetic network and node failure</li>
</ul>
<t>
Tools such as kube-burner, chaos-mesh, and tc-netem are RECOMMENDED to orchestrate these scenarios, aligning with stress test guidance in <xref target="RFC8239"/>.</t>
</section>


<section anchor="CODEF-observability">
<name>Observability and Resource Instrumentation</name>
<t>
CNIs SHOULD expose internal metrics (e.g., policy hits, flow counts, packet drops). Benchmarks MUST capture:
</t>
<ul>
<li>CPU and memory usage per CNI pod/process via for instance Prometheus.</li>
<li>NIC statistics.</li>
<li>Network path visibility (e.g., using Cilium Hubble or Calico flow logs)</li>
</ul>

<t>
Experimental and open-source examples on how such metrics can be captured at a node and network level can be checked in the CODECO project  <xref target="codeco_d10"/> and respective code <xref target="codeco_d12"/>. Resource metrics MUST be collected at both node-level and pod-level granularity.
</t>
</section>

<section anchor="CODEF-result">
<name>Result Reporting and Output Format</name>
<t>
Benchmarking outputs SHOULD:
</t>
<ul>
<li>Use machine-readable formats (e.g., JSON, YAML, YANG).</li>
<li>Clearly label all test parameters and metrics.</li>
<li>Include system logs, configuration manifests, and tool versions.</li>
</ul>
<t>
A common results schema SHOULD be developed to support comparative analysis and long-term reproducibility, in line with goals in <xref target="RFC6815"/>.</t>
</section>

</section>


<section anchor="IANA">
<name>IANA Considerations</name>

<t>This document has no IANA considerations.</t>

</section>

 
<section anchor="security-considerations">
<name>Security Considerations</name>
<t>
Benchmarking tools and automation frameworks may introduce risk vectors such as elevated container privileges or misconfigured network policies. Experiments involving stress tests or fault injection should be performed in isolated environments. Benchmarking outputs SHOULD NOT expose sensitive cluster configuration or node-level details.
</t>

</section>

  </middle>
  
  

  <back>
    <references>
      <name>References</name>
      <references>
        <name>Normative References</name>
        
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7312.xml"/> 
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2285.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2544.xml"/> 
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.1242.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8172.xml"/> 
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6808.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8239.xml"/> 
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6815.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5481.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9315.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4364.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8204.xml"/> 
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7024.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8456.xml"/> 
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9181.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7426.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4271.xml"/>
        <!-- The recommended and simplest way to include a well known reference -->
        
      </references>
 
      <references>
        <name>Informative References</name>
     
        <reference anchor="codef" target="https://gitlab.eclipse.org/eclipse-research-labs/codeco-project/experimentation-framework-and-demonstrations/experimentation-framework">
        <!-- [REPLACE/DELETE] Example minimum reference -->
          <front>
            <title>CODECO Experimental Framework </title>
            <author initials="G." surname="Koukis et al.">
              <organization/>
            </author>
                <author>
      <organization>CODECO Consortium</organization>
    </author>
            <date year="2024"/>
            <!-- [CHECK] -->
          </front>
        </reference>
        
        <reference anchor="codeco_d12" target="https://doi.org/10.5281/zenodo.12819424">
        <!-- [REPLACE/DELETE] Example minimum reference -->
          <front>
            <title>CODECO D12 - Basic Operation Components and Toolkit version 2.0.</title>
            <author initials="G." surname="Samaras et al.">
              <organization/>
            </author>
                <author>
      <organization>CODECO Consortium</organization>
    </author>
            <date year="2024"/>
            <!-- [CHECK] -->
          </front>
        </reference>
        
        <reference anchor="draft-ea-ds" target="https://datatracker.ietf.org/doc/draft-sofia-green-energy-aware-diffserv/">
        <!-- [REPLACE/DELETE] Example minimum reference -->
          <front>
            <title>Energy-aware Differentiated Services (EA-DS). IETF draft draft-sofia-green-energy-aware-diffserv-00, active </title>
            <author initials="R." surname="C. Sofia et al.">
              <organization/>
            </author>
            <date year="2025"/>
            <!-- [CHECK] -->
          </front>
        </reference>
           
        
       <reference anchor="codeco_d10" target="https://doi.org/10.5281/zenodo.12819444">
  <front>
    <title>CODECO Deliverable D10: Technological Guidelines, Reference Architecture, and Open-source Ecosystem Design</title>
    <author initials="R." surname="C. Sofia et al." fullname="Rute C. Sofia"/>
    <author>
      <organization>CODECO Consortium</organization>
    </author>
    <date year="2024"/>
  </front>
  <seriesInfo name="CODECO" value="D10"/>
</reference>
        
        <reference anchor="ietf-bmwg-07" target="https://datatracker.ietf.org/doc/draft-ietf-bmwg-containerized-infra/07/">
        <!-- [REPLACE/DELETE] Example minimum reference -->
          <front>
            <title>Considerations for Benchmarking Network Performance in Containerized Infrastructures, draft-ietf-bmwg-containerized-infra-07,  active</title>
            <author initials="T." surname="Ngoc et al.">
              <organization/>
            </author>
            <date year="2025"/>
            <!-- [CHECK] -->
          </front>
        </reference>
  
        <reference anchor="antrea" target="https://antrea.io">
        <front>
        <title>Antrea CNI</title>
        <author>
        <organization>Antrea Project</organization>
        </author>
        <date year="2024"/>
        </front>
        </reference>

       <reference anchor="calico" target="https://www.tigera.io/project-calico/">
         <front>
          <title>Project Calico</title>
          <author>
             <organization>Tigera, Inc.</organization>
          </author>
          <date year="2024"/>
      </front>

      </reference>

<reference anchor="cilium" target="https://cilium.io">
  <front>
    <title>Cilium: eBPF-based Networking, Security, and Observability</title>
    <author>
      <organization>Cillium Authors</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>


<reference anchor="Kubernetes-docs" target="https://kubernetes.io/docs/concepts/cluster-administration/networking/">
  <front>
    <title>Kubernetes Documents-Cluster Networking</title>
    <author>
      <organization>Kubernetes Authors</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>


<reference anchor="flannel" target="https://github.com/flannel-io/flannel">
  <front>
    <title>Flannel CNI Plugin</title>
    <author>
      <organization>flannel-io</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="kube-ovn" target="https://github.com/kubeovn/kube-ovn">
  <front>
    <title>Kube-OVN: A Cloud-Native SDN for Kubernetes</title>
    <author>
      <organization>Kube-OVN Project</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="kube-router" target="https://github.com/cloudnativelabs/kube-router">
  <front>
    <title>Kube-Router: All-in-One CNI, Service Proxy, and Network Policy </title>
    <author>
      <organization>Kube-Router Community</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="weavenet" target="https://github.com/weaveworks/weave">
  <front>
    <title>Weave Net: Fast, Simple Networking for Kubernetes</title>
    <author>
      <organization>Weaveworks (archived)</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="cilium-bench" target="https://docs.cilium.io/en/latest/operations/performance/">
  <front>
    <title>Cilium Benchmarking Tools</title>
    <author>
      <organization>Cillium Authors</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="TNSM21-cni" target="https://arxiv.org/abs/2401.07674">
  <front>
    <title>Benchmarking Kubernetes Container Network Interfaces: Methodology, Metrics, and Observations</title>
    <author initials="G." surname="Koukis et al.">
   </author>
    <date year="2024" month="January"/>
  </front>
</reference>

<reference anchor="aws-vpc-cni-docs" target="https://docs.aws.amazon.com/eks/latest/userguide/pod-networking.html">
  <front>
    <title>Amazon EKS Pod Networking with the AWS VPC CNI</title>
    <author>
      <organization>Amazon Web Services</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="prometheus-docs" target="https://prometheus.io/docs/introduction/overview/">
  <front>
    <title>Prometheus Monitoring System Overview</title>
    <author>
      <organization>Prometheus Authors</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="cadvisor-docs" target="https://github.com/google/cadvisor">
  <front>
    <title>cAdvisor: Container Advisor</title>
    <author>
      <organization>Google</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="tc-netem" target="https://man7.org/linux/man-pages/man8/tc-netem.8.html">
  <front>
    <title>tc-netem: Network Emulation</title>
    <author>
      <organization>Linux Foundation</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="kube-burner" target="https://github.com/cloud-bulldozer/kube-burner">
  <front>
    <title>Kube-Burner: Kubernetes Performance and Scalability Tool</title>
    <author>
      <organization>Cloud-Bulldozer Project</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="iperf3" target="https://iperf.fr/">
  <front>
    <title>iPerf3: Network Bandwidth Measurement Tool</title>
    <author>
      <organization>ESnet / Lawrence Berkeley National Lab</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="k6" target="https://k6.io/docs/">
  <front>
    <title>k6: Modern Load Testing Tool</title>
    <author>
      <organization>Grafana Labs</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="L2S-M" target="https://github.com/Networks-it-uc3m/L2S-M">
  <front>
    <title>L2S-M: Lightweight Layer 2 Switching for Microservice Networks</title>
     <author>
      <organization>Universidad Carlos 3 de Madrid</organization>
    </author>
    <date year="2023" month="September"/>
  </front>

</reference>

<reference anchor="netperf" target="https://hewlettpackard.github.io/netperf/">
  <front>
    <title>Netperf: Network Performance Benchmark</title>
    <author>
      <organization>Hewlett Packard Enterprise</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="sockperf" target="https://github.com/Mellanox/sockperf">
  <front>
    <title>SockPerf: RDMA and TCP/UDP Latency Benchmark</title>
    <author>
      <organization>NVIDIA Mellanox</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="k8s-bench-suite" target="https://github.com/cnf-testsuite/testsuite">
  <front>
    <title>Kubernetes Bench-Suite</title>
    <author>
      <organization>CNCF CNF Test Suite</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>



<reference anchor="kepler" target="https://kepler.sustainable.computing.dev/">
  <front>
    <title>Kepler: Kubernetes-based Power Estimation and Reporting</title>
    <author>
      <organization>CNCF</organization>
    </author>
    <date year="2024"/>
  </front>
</reference>

<reference anchor="I-D.irtf-nmrg-energy-aware">
  <front>
    <title>Energy-Aware Networked Systems for a Sustainable Future</title>
    <author initials="L." surname="Chiaraviglio" fullname="Luca Chiaraviglio"/>
    <author initials="K." surname="Pentikousis" fullname="Konstantinos Pentikousis"/>
    <author initials="D." surname="Kutscher" fullname="Dirk Kutscher"/>
    <author initials="C." surname="Pignataro" fullname="Carlos Pignataro"/>
    <date month="March" year="2024"/>
  </front>
  <seriesInfo name="Internet-Draft" value="draft-irtf-nmrg-energy-aware-04"/>
  <format type="HTML" target="https://datatracker.ietf.org/doc/draft-irtf-nmrg-energy-aware/04/"/>
</reference>





<reference anchor="draft-dwon-t2trg-multiedge-arch" target="https://datatracker.ietf.org/doc/draft-dwon-t2trg-multiedge-arch/">
  <front>
    <title>Multi-Edge Architecture for the Internet of Things (IoT). IETF draft draft-dwon-t2trg-multiedge-arch-02, Expired</title>
    <author initials="J." surname="Dwon et al.">
      <organization/>
    </author>
    <date year="2025"/>
  </front>
</reference>

<reference anchor="draft-si-service-mesh-dta" target="https://datatracker.ietf.org/doc/draft-si-service-mesh-dta/">
  <front>
    <title>Service Mesh-based Data Transfer Architecture. IETF draft draft-si-service-mesh-dta-01, Expired</title>
    <author initials="Z." surname="Si et al.">
      <organization/>
    </author>
    <date year="2025"/>
  </front>
</reference>

<reference anchor="draft-ietf-wimse-workload-identity-practices" target="https://datatracker.ietf.org/doc/draft-ietf-wimse-workload-identity-practices/">
  <front>
    <title>Workload Identity Best Practices. IETF draft draft-ietf-wimse-workload-identity-practices-00, active</title>
    <author initials="M." surname="Richardson et al.">
      <organization/>
    </author>
    <date year="2025"/>
  </front>
</reference>

<reference anchor="draft-contreras-nmrg-interconnection-intents" target="https://datatracker.ietf.org/doc/draft-contreras-nmrg-interconnection-intents/">
  <front>
    <title>Interconnection Intents for Network Services. IETF draft draft-contreras-nmrg-interconnection-intents-05, Expired</title>
    <author initials="L." surname="Contreras et al.">
      <organization/>
    </author>
    <date year="2025"/>
  </front>
</reference>




      </references>
    </references>
    
       <section anchor="Acknowledgements" numbered="false">
      <!-- [REPLACE/DELETE] an Acknowledgements section is optional -->
      <name>Acknowledgements</name>
      <t>
      This work has been funded by The European Commission in the context of the Horizon Europe CODECO project under grant number 101092696, and by SGC, Grant agreement nr: M-0626, project SemComIIoT. 
      </t>
            <t>We thank Minh-Ngoc Tran for his contributions towards alignment with the draft<xref target="ietf-bmwg-07"/> , and suggestions for the removal of the former section 4, which provided a CNI summary only.</t>
     
    </section>
      <section anchor="Appendix-A" numbered="false">
      <name>Appendix A. Change Log</name>
     <t>
     -Since draft-samizadeh-bmwg-cni-benchmarking-00:
     </t>
     <ul>
     <li>Section 4 and 5 were removed.</li>
     <li>Added details about CNI Behavior in Federated and Multi-Cluster Environments.</li>
     <li>Added details about Observability and Bottleneck Detection in multi-cluster or federated environments.</li>
     <li>Revised references to Kubernetes network model and IETF drafts.</li>
     <li>Minor editorial updates and formatting corrections.</li>      
 </ul>
     
    </section>
    
 </back>
</rfc>
