<?xml version="1.0" encoding="UTF-8"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
    which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
    There has to be one entity for each item to be referenced. 
    An alternate method (rfc include) is described in the references. -->

<!ENTITY RFC1918 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1918.xml">
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2544 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2544.xml">
<!ENTITY RFC4814 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4814.xml">
<!ENTITY RFC5180 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5180.xml">
<!ENTITY RFC6146 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6146.xml">
<!ENTITY RFC6890 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6890.xml">
<!ENTITY RFC7599 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7599.xml">
<!ENTITY RFC8174 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8174.xml">
<!ENTITY RFC8219 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8219.xml">
<!ENTITY RFC9411 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.9411.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
    please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
    (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space 
    (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="info" docName="draft-ietf-bmwg-benchmarking-stateful-05" ipr="trust200902">

  <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the 
          full title is longer than 39 characters -->

    <title abbrev="Benchmarking Stateful NATxy Gateways">Benchmarking Methodology
    for Stateful NATxy Gateways using RFC 4814 Pseudorandom Port Numbers</title>

    <!-- add 'role="editor"' below for the editors if appropriate -->

    <!-- Another author who claims to be an editor -->

    <author fullname="Gábor Lencse" initials="G." surname="Lencse">
      <organization>Széchenyi István University</organization>
      <address>
        <postal>
          <street>Egyetem tér 1.</street>
          <!-- Reorder these if your country does things differently -->
          <city>Győr</city>
          <region></region>
          <code>H-9026</code>
          <country>Hungary</country>
        </postal>
        <phone></phone>
        <email>lencse@sze.hu</email>
        <uri></uri>
      </address>
    </author>

    <author fullname="Keiichi Shima" initials="K." surname="Shima">
      <organization>SoftBank Corp.</organization>
      <address>
        <postal>
          <street>1-7-1 Kaigan</street>
          <city>Minato-ku</city>
          <region>Tokyo</region>
          <code>105-7529</code>
          <country>Japan</country>
        </postal>
        <phone></phone>
        <email>shima@wide.ad.jp</email>
        <uri>https://softbank.co.jp/</uri>
      </address>
    </author>

    <date year="2024" />

    <!-- Meta-data Declarations -->

    <area>Operations and Management Area</area>

    <workgroup>Benchmarking Methodology Working Group</workgroup>

    <!-- WG name at the upperleft corner of the doc,
          IETF is fine for individual submissions.  
    If this element is not present, the default is "Network Working Group",
          which is used by the RFC Editor as a nod to the history of the IETF. -->

    <keyword>Benchmarking, Stateful NATxy, Measurement Procedure, Throughput, Frame Loss Rate, Latency, PDV</keyword>

    <!-- Keywords will be incorporated into HTML output
          files in a meta tag but they have no effect on text or nroff
          output. If you submit your draft to the RFC Editor, the
          keywords will be used for the search engine. -->

    <abstract>
      <t>RFC 2544 has defined a benchmarking methodology for network
      interconnect devices. RFC 5180 addressed IPv6 specificities and it also
      provided a technology update but excluded IPv6 transition technologies.
      RFC 8219 addressed IPv6 transition technologies, including stateful NAT64.
	  However, none of them discussed how to apply RFC 4814 pseudorandom port numbers
	  to any stateful NATxy (NAT44, NAT64, NAT66) technologies.
	  We discuss why using pseudorandom port numbers with stateful NATxy gateways is a 
	  difficult problem. We recommend a solution limiting the port number ranges and using 
	  two test phases (phase 1 and phase 2). We show how the 
	  classic performance measurement procedures (e.g. throughput, frame loss rate, 
	  latency, etc.) can be carried out. We also define new performance metrics and 
	  measurement procedures for maximum connection establishment rate, connection 
	  tear-down rate, and connection tracking table capacity measurements.
	  </t>
    </abstract>
  </front>

  <middle>
    <section anchor="intro" title="Introduction">
      <t><xref target="RFC2544"/> has defined a comprehensive benchmarking 
	  methodology for network interconnect devices, which is still in use. It was 
	  mainly IP version independent, but it used IPv4 in its examples. 
	  <xref target="RFC5180"/> addressed IPv6 specificities
      and also added technology updates, but declared IPv6 transition technologies
      out of its scope. <xref target="RFC8219"/> addressed the IPv6 transition 
	  technologies, including stateful NAT64. It has reused several benchmarking
      procedures from <xref target="RFC2544"/> (e.g. throughput, frame loss rate), 
	  it has redefined the latency measurement and added further ones, e.g. the PDV 
	  (packet delay variation) measurement.</t>  
	  <t>However, none of them discussed, how to apply <xref target="RFC4814"/> 
	  pseudorandom port numbers, when benchmarking stateful NATxy (NAT44, NAT64, NAT66) 
	  gateways. We are not aware of any other RFCs that address this question.
	  </t> 
      <t>First, we discuss why using pseudorandom port numbers with stateful NATxy 
	  gateways is a hard problem. 
	  </t>
	  <t>Then we recommend a solution.
	  </t>
		  
      <section>
        <name>Requirements Language</name>
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL",
          "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT
          RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
          interpreted as described in BCP 14 <xref target="RFC2119"/>
          <xref target="RFC8174"/> when, and only when, they appear in
          all capitals, as shown here.</t>
      </section>
      <!-- [CHECK] The 'Requirements Language' section is optional -->
	  
	  
    </section>

    <section anchor="problem" title="Pseudorandom Port Numbers and Stateful Translation">
    <t>In its appendix, <xref target="RFC2544"/> has defined a frame format 
	for test frames including specific source and destination port numbers.
    <xref target="RFC4814"/> recommends using pseudorandom and
    uniformly distributed values for both source and destination port
    numbers. However, stateful NATxy (NAT44, NAT64, NAT66) solutions use the 
	port numbers to identify connections. The usage of pseudorandom port
	numbers causes different problems depending on the direction.
	<list style="symbols">
	  <t> As for the client-to-server direction, pseudorandom source and 
	  destination port numbers could be used, however, this approach would 
	  be a denial of service attack against the stateful NATxy gateway, 
	  because it would exhaust its connection tracking table capacity. To that end,
	  let us see some calculations using the recommendations of RFC 4814:
	  <list style="symbols">
	    <t>The recommended source port range is: 1024-65535, thus its size is: 64512.</t>
	    <t>The recommended destination port range is: 1-49151, thus its size is: 49151.</t>
	    <t>The number of source and destination port number combinations is:
	    3,170,829,312.</t>
	  </list>
      We note that the usage of different source and destination IP addresses 
	  further increases the number of connection tracking table entries.</t>
	  <t>As for the server-to-client direction, the stateful DUT (Device Under Test) would drop any 
	  packets that do not belong to an existing connection, therefore, the 
	  direct usage of pseudorandom port numbers from the above-mentioned ranges 
	  is not feasible.</t>
	</list>
	</t>
    </section>


    <section anchor="setup_term" title="Test Setup and Terminology">
	  
	  <t>Section 12 of <xref target="RFC2544"/> requires testing first using 
	  a single protocol source and destination address pair an then also using 
	  multiple protocol addresses. We follow the same approach: first, we use a 
	  single source and destination IP address pair, and then we explain how to 
	  use multiple IP addresses.</t>	  

      <section anchor="setup_term_single" title="When Testing with a Single IP Address Pair">

	    <t>Our methodology works with any IP versions to benchmark stateful NATxy 
		gateways, where x and y are in {4, 6}. To facilitate an easy understanding, 
		we use two typical examples: stateful NAT44 and stateful NAT64. </t>
		
	    <t>The Test Setup for the well-known stateful NAT44 (also called NAPT: 
		Network Address and Port Translation) solution is shown in 
		<xref target="test_setup_sfnat44"/>.</t>

	    <t>Note: We are fully aware of <xref target="RFC6890"/> special purpose 
		IP address ranges. The <xref target="RFC1918"/> private IP addresses are 
		used to facilitate an easy understanding of the example. And we consider the 
		usage of the IP addresses reserved for benchmarking absolutely legitimate.</t>

        <figure anchor="test_setup_sfnat44" align="center" title="Test setup for benchmarking
		stateful NAT44 gateways">
          <preamble></preamble>

          <artwork align="left"><![CDATA[
              +--------------------------------------+
     10.0.0.2 |Initiator                    Responder| 198.19.0.2
+-------------|                Tester                |<------------+
| private IPv4|                         [state table]| public IPv4 |
|             +--------------------------------------+             |
|                                                                  |
|             +--------------------------------------+             |
|    10.0.0.1 |                 DUT:                 | 198.19.0.1  |
+------------>|        Stateful NAT44 gateway        |-------------+
  private IPv4|     [connection tracking table]      | public IPv4
              +--------------------------------------+

            ]]></artwork>

        <postamble></postamble>
        </figure>

	    <t> The Test Setup for the also widely used stateful NAT64 <xref target="RFC6146"/> 
		solution is shown in <xref target="test_setup_sfnat64"/>.</t>
		
        <figure anchor="test_setup_sfnat64" align="center" title="Test setup for benchmarking
		stateful NAT64 gateways">
          <preamble></preamble>

          <artwork align="left"><![CDATA[
              +--------------------------------------+
    2001:2::2 |Initiator                    Responder| 198.19.0.2
+-------------|                Tester                |<------------+
| IPv6 address|                         [state table]| IPv4 address|
|             +--------------------------------------+             |
|                                                                  |
|             +--------------------------------------+             |
|   2001:2::1 |                 DUT:                 | 198.19.0.1  |
+------------>|        Stateful NAT64 gateway        |-------------+
  IPv6 address|     [connection tracking table]      | IPv4 address
              +--------------------------------------+
            ]]></artwork>

        <postamble></postamble>
        </figure>	  		
		
	    <t>As for transport layer protocol, <xref target="RFC2544"/> recommended 
		testing with UDP, and it was kept also in <xref target="RFC8219"/>. For 
		the general recommendation, we also keep UDP, thus the port numbers in the 
		following text are to be understood as UDP port numbers. We discuss the 
		limitations of this approach in <xref target="udp_or_tcp"/>.</t>

	    <t>We define the most important elements of our proposed benchmarking system as follows.
	    <list style="symbols">
	    <t>Connection tracking table: The stateful NATxy gateway uses a connection 
		tracking table to be able to perform the stateful translation in the server to
		client direction. Its size, policy, and content are unknown to the Tester.</t>
		<t>Four tuple: The four numbers that identify a connection are source IP address, 
		source port number, destination IP address, destination port number.</t>
		<t>State table: The Responder of the Tester extracts the four tuple from each received
		test frame and stores it in its state table. Recommendation is given for writing and 
		reading order of the state table in <xref target="st_wr_order"/>.</t>
		<t>Initiator: The port of the Tester that may initiate a connection through the 
		stateful DUT in the client-to-server direction. Theoretically, it can use 
		any source and destination port numbers from the ranges recommended by 
		<xref target="RFC4814"/>: if the used four tuple does not belong to an existing 
		connection, the DUT will register a new connection into its connection tracking table.</t>
		<t>Responder: The port of the Tester that may not initiate a connection through 
		the stateful DUT in the server-to-client direction. It may send only frames that 
		belong to an existing connection. To that end, it uses four tuples that have been 
		previously extracted from the received test frames and stored in its state table.</t>
		<t>Test phase 1: Test frames are sent only by the Initiator to the 
		Responder through the DUT to fill both the connection tracking table of the DUT 
		and the state table of the Responder. This is a newly introduced operation phase 
		for stateful NATxy benchmarking. The necessity of this test phase is explained in 
		<xref target="prelim"/>.</t>
		<t>Test phase 2: The measurement procedures defined by <xref target="RFC8219"/> 
		(e.g. throughput, latency, etc.) are performed in this test phase after the completion 
		of test phase 1. Test frames are sent as required (e.g. bidirectional 
		test or unidirectional test in any of the two directions).</t>
		</list>
		</t>
		<t>
		One further definition is used in the text of this document:
		<list style="symbols">
		<t> Black box testing: It is a testing approach when the Tester is not aware of the 
		details of the internal structure and operation of the DUT. It can send input to the 
		DUT and observe the output of the DUT.</t>
		</list>
		</t> 
      </section>
	  
      <section anchor="setup_term_multiple" title="When Testing with Multiple IP Addresses">
	  
	  <t>We make considerations regarding the necessary and available IP addresses.</t>

	  <t>In <xref target="test_setup_sfnat44"/>, we used the single 198.19.0.1 IPv4 address 
	  on the WAN side port of the stateful NAT44 gateway. However, in practice, not a single 
	  IP address, but an IP address range is assigned to the WAN side port of the stateful 
	  NAT44 gateways. Its required size depends on the number of client nodes and on the type 
	  of the stateful NAT44 algorithm. (The traditional algorithm always replaces 
	  the source port number, when a new connection is established. Thus it requires a larger 
	  range than the extended algorithm, which replaces the source port number only when it 
	  is necessary. Please refer to Table 1 and Table 2 of <xref target="LEN2015"/>.)</t>  
	  
	  <t>When router testing is done, section 12 of <xref target="RFC2544"/> requires 
	  testing first using a single source and destination IP address pair, and then 
	  using destination IP addresses from 256 different networks. The 16-23 bits of 
	  the 198.18.0.0/24 and 198.19.0.0/24 addresses can be used to express the 256 networks.
	  As we do not do router testing, we do not need to use multiple destination networks, 
	  therefore, these bits are available for expressing multiple IP addresses that belong 
	  to the same "/16" network. Moreover, both the 198.18.0.0/16 and the 198.19.0.0/16 
	  networks can be used on the right side of the test setup as private IP addresses 
	  from the 10.0.0.0/16 network are used on its left side.</t>

       <figure anchor="test_setup_sfnat44_multi" align="center" title="Test setup for benchmarking
		stateful NAT44 gateways using multiple IPv4 addresses">
          <preamble></preamble>

          <artwork align="left"><![CDATA[
10.0.0.2/16 – 10.0.255.254/16      198.19.0.0/15 - 198.19.255.254/15
           \  +--------------------------------------+  /
            \ |Initiator                    Responder| /
+-------------|                Tester                |<------------+
| private IPv4|                         [state table]| public IPv4 |
|             +--------------------------------------+             |
|                                                                  |
|             +--------------------------------------+             |
| 10.0.0.1/16 |                 DUT:                 | public IPv4 |
+------------>|        Stateful NAT44 gateway        |-------------+
  private IPv4|     [connection tracking table]      | \
              +--------------------------------------+  \
                                   198.18.0.1/15 - 198.18.255.255/15												 
            ]]></artwork>

        <postamble></postamble>
        </figure>

	    <t>A possible solution for assigning multiple IPv4 addresses is shown in 
		<xref target="test_setup_sfnat44_multi"/>. On the left side, the private IP 
		address range is abundantly large. (We used the 16-31 bits to generate nearly 64k 
		potential different source addresses, but the 8-15 bits are also available 
		if needed.) On the right side, the 198.18.0.0./15 network is used, and it was 
		cut into two equal parts. (Asymmetric division is also possible, if needed.)</t>
			
	    <t>We note that these are the potential address ranges. We discuss the actual 
		address ranges to be used in <xref target="restr_port_range"/>.</t>	
			
	    <t>In the case of stateful NAT64, a single "/64" IPv6 prefix contains a high 
		number of bits to express different IPv6 addresses. <xref target="test_setup_sfnat64_multi"/> 
		shows an example, where we used bits 96-111 for that purpose.
		</t>	

       <figure anchor="test_setup_sfnat64_multi" align="center" title="Test Setup for benchmarking
		stateful NAT64 gateways using multiple IPv6 and IPv4 addresses">
          <preamble></preamble>

          <artwork align="left"><![CDATA[
2001:2::[0000-ffff]:0002/64       198.19.0.0/15 - 198.19.255.254/15      
           \  +--------------------------------------+  /
  IPv6      \ |Initiator                    Responder| /
+-------------|                Tester                |<------------+
| addresses   |                         [state table]| public IPv4 |
|             +--------------------------------------+             |
|                                                                  |
|             +--------------------------------------+             |
| 2001:2::1/64|                 DUT:                 | public IPv4 |
+------------>|        Stateful NAT64 gateway        |-------------+
 IPv6 address |     [connection tracking table]      | \
              +--------------------------------------+  \
                                   198.18.0.1/15 - 198.18.255.255/15       
            ]]></artwork>

        <postamble></postamble>
        </figure>
		
      </section>
	  
	  
    </section>
	
    <section anchor="method" title="Recommended Benchmarking Method">
	
	  <section anchor="restr_port_range" title="Restricted Number of Network Flows">

	  <t>When a single IP address pair is used for testing then the number of network 
	  flows is determined by the number of source port number destination port number 
	  combinations. </t>
	  
	  <t>The Initiator SHOULD use restricted ranges for source and destination port 
	  numbers to avoid the denial of service attack like event against the 
	  connection tracking table of the DUT described in <xref target="problem"/>. 
	  If it is possible, the size of the source port number range SHOULD be larger (e.g. in the order 
	  of a few times ten thousand), whereas the size of the destination port number 
	  range SHOULD be smaller (may vary from a few to several hundreds or thousands 
	  as needed). 
	  The rationale is that source and destination port numbers that can be observed in 
	  the Internet traffic are not symmetrical. Whereas source port numbers may be random, 
	  there are a few very popular destination port numbers (e.g. 443, 80, etc., 
	  see <xref target="IIR2020"/>), and others hardly occur. And we have
	  found that their role is also asymmetric in the Linux kernel routing hash 
	  function <xref target="LEN2020"/>.</t>
	  <t>However, in some special cases, the size of the source port range is limited. E.g. 
	  when benchmarking the CE and BR of a MAP-T <xref target="RFC7599"/> system together (as a compound system 
	  performing stateful NAT44), then the source port range is limited to the number of 
	  source port numbers assigned to each subscriber. (It could be as low as 2048 ports.) </t>
	  
	  <t>When multiple IP addresses are used, then the port number ranges should be even 
	  more restricted, as the number of potential network flows is the product of the size 
	  of the source IP address range, the size of the source port number range, the size of the 
	  destination IP address range, and the size of the destination port number range. 
	  And our method requires the enumeration of all their possible combinations in test 
	  phase 1 as described in <xref target="ctrl_conntrack"/>.</t>	  

	  <t>The number of network flows can be used as a parameter. The performance of the 
	  stateful NATxy gateway MAY be examined as a function of this parameter as described 
	  in <xref target="sc_net_flows"/>.</t>	  
	  </section>

	  <section anchor="prelim" title="Test Phase 1">
		<t>Test phase 1 serves two purposes:
		<list style="numbers">
		  <t>The connection tracking table of the DUT is filled. It is important, 
		  because its maximum connection establishment rate may be lower than its maximum
		  frame forwarding rate (that is throughput).</t>
		  <t>The state table of the Responder is filled with valid four tuples. It is 
		  a precondition for the Responder to be able to transmit frames that belong to 
		  connections that exist in the connection tracking table of the DUT.</t>
		</list>				
		Whereas the above two things are always necessary before test phase 2, 
		test phase 1 can be used without test phase 2. It is done so 
		when the maximum connection establishment rate is measured (as described in 
		<xref target="meas_max_conn_est_rate"/>).
		</t>
	    <t>Test phase 1 MUST be performed before all tests performed in 
		test phase 2. The following things happen in test phase 1:
		<list style="numbers">
		  <t>The Initiator sends test frames to the Responder through the DUT at a 
		  specific frame rate.</t>
		  <t>The DUT performs the stateful translation of the test frames and it also 
		  stores the new connections in its connection tracking table.</t>
		  <t>The Responder receives the translated test frames and updates its state 
		  table with the received four tuples. The responder transmits no test frames 
		  during test phase 1.</t>
		</list>  
		</t>
		<t>When test phase 1 is performed in preparation for 
		test phase 2, the applied frame rate SHOULD be safely lower than 
		the maximum connection establishment rate. (It implies that maximum connection 
		establishment rate measurement MUST be performed first.)
		Please refer to <xref target="ctrl_conntrack"/> for further conditions regarding 
		timeout and the enumeration of all possible four tuples.</t>
	  </section>
	  
	  <section anchor="consider_stateful" title="Consideration of the Cases of Stateful Operation">
		<t>We consider the most important events that may happen during the operation 
		of a stateful NATxy gateway, and the Actions of the gateway as follows.
		<list style="numbers">
		  <t>EVENT: A packet not belonging to an existing connection arrives in the client-to-server 
		  direction. ACTION: A new connection is registered into the connection tracking 
		  table and the packet is translated and forwarded.</t>
		  <t>EVENT: A packet not belonging to an existing connection arrives in the server-to-client 
		  direction. ACTION: The packet is discarded.</t>		  
		  <t>EVENT: A packet belonging to an existing connection arrives (in any direction). 
		   ACTION: The packet is translated and forwarded and the timeout counter of the corresponding 
		  connection tracking table entry is reset.</t>
		  <t>EVENT: A connection tracking table entry times out.  ACTION: The entry is deleted from 
		  the connection tracking table.</t>
		</list>
		</t>
		<t>Due to "black box" testing, the Tester is not able to directly examine (or delete) the entries 
		of the connection tracking table. But the entries can be and MUST be controlled by setting 
		an appropriate timeout value and carefully selecting the port numbers of the packets
		(as described in <xref target="ctrl_conntrack"/>) to be able to produce meaningful and 
		repeatable measurement results.
		</t>
		<t>We aim to support the measurement of the following performance characteristics 
		of a stateful NATxy gateway:
		<list style="numbers">
		  <t>maximum connection establishment rate</t>
		  <t>all "classic" performance metrics like throughput, frame loss rate, latency, etc.</t>		  
		  <t>connection tear-down rate</t>
		  <t>connection tracking table capacity</t>
		</list>
		</t>
	  </section>
	  
	  <section anchor="ctrl_conntrack" title="Control of the Connection Tracking Table Entries">
		<t>It is necessary to control the connection tracking table entries 
		of the DUT to achieve clear conditions for the measurements. We can simply 
		achieve the following two extreme situations:
		<list style="numbers">
		  <t>All frames create a new entry in the connection tracking table of the DUT and no 
		  old entries are deleted during the test. This is required for measuring the maximum 
		  connection establishment rate.</t>
		  <t>No new entries are created in the connection tracking table of the DUT and no old
		  ones are deleted during the test. This is ideal for the measurements to be executed in phase 2, 
		  like throughput, latency, etc.</t>
		</list>	
		</t>

		<t>From this point, we use the following two assumptions:
		<list style="numbers">
		  <t>The connection tracking table of the stateful NATxy is large enough to store all 
		  connections defined by the different four tuples.</t>
		  <t>Each experiment is started with an empty connection tracking table. (It can be ensured
		  by deleting its content before the experiment.)</t>
		</list>	
		</t>		
		
		<t>The first extreme situation can be achieved by 
		<list style="symbols">
		  <t>using different four tuples for every single test frame in test phase 1 and</t>
		  <t> setting the UDP timeout of the NATxy gateway to a value higher than the length of 
		  test phase 1.</t>
		</list>			  
		</t>
		
		<t>The second extreme situation can be achieved by 
		<list style="symbols">
		  <t>enumerating all possible four tuples in test phase 1 and</t>
		  <t>setting the UDP timeout of the NATxy gateway to a value higher than the length of 
		  test phase 1 plus the gap between the two phases plus the length of test phase 2.</t>
		</list>
		</t>
				
		<t>
		<xref target="RFC4814"/> REQUIRES pseudorandom port numbers, which we believe is a good 
		approximation of the distribution of the source port numbers a NATxy gateway on the 
		Internet may face with.
		</t>
		
		<t>
		We note that although the enumeration of all possible four tuples 
		is not a requirement for the first extreme situation and the usage of 
		different four tuples in test phase 1 is not a 
		requirement for the second extreme situation, pseudorandom 
		enumeration of all possible four tuples in test phase 1
		is a good solution in both cases. It may be computing efficiently 
		generated by preparing a random permutation of the previously 
		enumerated all possible four tuples using Dustenfeld's random shuffle 
		algorithm <xref target="DUST1964"/>.
		</t>
		
		<t>The enumeration of the four tuples in increasing or decreasing order 
		(or in any other specific order) MAY be used as an additional measurement. 
		</t>
	  </section>
	  
	  <section anchor="meas_max_conn_est_rate" title="Measurement of the Maximum Connection Establishment Rate">
	    <t>The maximum connection establishment rate is an important characteristic of
		the stateful NATxy gateway and its determination is necessary for the safe 
		execution of test phase 1 (without frame loss) before test phase 2.
		</t>
		<t>The measurement procedure of the maximum connection establishment rate is 
		very similar to the throughput measurement procedure defined in 
		<xref target="RFC2544"/>.
		</t>
		<t>Procedure: The Initiator sends a specific number of test frames using all
		different four tuples at a specific rate through the DUT. 
		The Responder counts the frames that are successfully translated by the DUT. 
		If the count of offered frames is equal to the count of received
		frames, the rate of the offered stream is raised and the test is rerun.  
		If fewer frames are received than were transmitted, the rate of the offered 
		stream is reduced and the test is rerun.
		</t>
		<t>The maximum connection establishment rate is the fastest rate at which 
		the count of test frames successfully translated by the DUT is equal to the number 
		of test frames sent to it by the Initiator.
		</t>
		<t>Note: In practice, we RECOMMEND the usage of binary search.</t>				
	  </section>

	  <section anchor="validation_of_conn" title="Validation of Connection Establishment">
	    <t>Due to "black box" testing, the entries of the connection tracking table of 
		the DUT may not be directly examined, but the presence of the connections can be 
		checked easily by sending frames from the Responder to the Initiator in 
		test phase 2 using all four tuples stored in the state table of the Tester 
		(at a low enough frame rate). The arrival of all test frames indicates that the 
		connections are indeed present.
		</t>

		<t>Procedure: When all the desired N number of test frames were sent by the Initiator 
		to the Receiver at frame rate R in test phase 1 for the maximum connection 
		establishment rate measurement, and the Receiver has successfully received all 
		the N frames, the establishment of the connections is checked in test 
		phase 2 as follows:
		<list style="symbols">
		  <t>The Responder sends test frames to the Initiator at frame rate r=R*alpha, 
		  for the duration of N/r using a different four tuple from its state table for 
		  each test frame.</t> 
		  <t>The Initiator counts the received frames, and if all N frames are arrived 
		  then the R frame rate of the maximum connection establishment rate measurement 
		  (performed in test phase 1) is raised for the next iteration, 
		  otherwise lowered (as well as in the case if test frames were missing 
		  in the preliminary test phase).</t>
		</list>
		</t>
		<t>Notes:		  
		  <list style="symbols">
		    <t>The alpha is a kind of "safety factor", it aims to make sure that 
			the frame rate used for the validation is not too high, and test may fail only 
			in the case if at least one connection is not present in the connection 
			tracking table of the DUT. (So alpha should be typically less than 1, e.g. 
			0.8 or 0.5.)
			</t>
			<t>The duration of N/r and the frame rate of r means that N frames are sent for validation.</t>
			<t>The order of four tuple selection is arbitrary provided that all four tuples MUST be used.</t>
			<t>Please refer to <xref target="meas_contr_capacity"/> for a short analysis 
			of the operation of the measurement and what problems may occur.</t>
		  </list>
		</t>
	  </section>

	  
	  <section anchor="real_test" title="Test Phase 2">
	    <t>As for the traffic direction, there are three possible cases during 
	    test phase 2:
	    <list style="symbols">
		  <t>bidirectional traffic: The Initiator sends test frames to the Responder and 
		  the Responder sends test frames to the Initiator.</t>
		  <t>unidirectional traffic from the Initiator to the Responder: The Initiator 
		  sends test frames to the Responder but the Responder does not send test frames to 
		  the Initiator.</t>
		  <t>unidirectional traffic from the Responder to the Initiator: The Responder 
		  sends test frames to the Initiator but the Initiator does not send test frames to 
		  the Responder.</t>
		</list>
		</t>
		<t>If the Initiator sends test frames, then it uses pseudorandom source port numbers and 
		destination port numbers from the restricted port number ranges. (If it uses multiple 
		source and/or destination IP addresses, then their ranges are also limited.) 
		The responder receives the test frames, updates its state table, and processes the test 
		frames as required by the given measurement procedure (e.g. only counts them for the
		throughput test, handles timestamps for latency or PDV tests, etc.).
		</t>
		<t>If the Responder sends test frames, then it uses the four tuples from its state 
		table. The reading order of the state table may follow different policies (discussed
		in <xref target="st_wr_order"/>). The Initiator receives the test frames and 
		processes them as required by the given measurement procedure.
		</t>
		<t>
		As for the actual measurement procedures, we RECOMMEND to use the updated ones 
		from Section 7 of <xref target="RFC8219"/>.
		</t>
	  </section>

	  <section anchor="meas_conn_tear_down_rate" title="Measurement of the Connection Tear-down Rate">	  
		<t>Connection tear-down can cause significant load for the NATxy gateway. 
		The connection tear-down performance can be measured as follows:
	    <list style="numbers">
		  <t>Load a certain number of connections (N) into the connection 
		  tracking table of the DUT (in the same way as done to measure the 
		  maximum connection establishment rate).</t>
		  <t>Record TimestampA.</t>
		  <t>Delete the content of the connection tracking table of the DUT.</t>
		  <t>Record TimestampB.</t>
  		</list>
		The connection tear-down rate can be computed as:
		</t>
        <t>connection tear-down rate = N / ( TimestampB - TimestampA)
        </t>
		<t>The connection tear-down rate SHOULD be measured for various values of N.
		</t>
        <t>We assume that the content of the connection tracking table may be deleted
		by an out-of-band control mechanism specific to the given NATxy gateway implementation. 
		(E.g. by removing the appropriate kernel module under Linux.)
		</t>
        <t>We are aware that the performance of removing the entire content of the connection 
		tracking table at one time may be different from removing all the entries one by one. 
		</t>
		
	  </section>

	  <section anchor="meas_contr_capacity" title="Measurement of the Connection Tracking Table Capacity">	  
		<t>The connection tracking table capacity is an important metric of stateful 
		NATxy gateways. Its measurement is not easy, because an elementary 
		step of a validated maximum connection establishment rate measurement (defined in 
		<xref target="validation_of_conn"/>) may have only a few distinct observable outcomes, 
		but some of them may have different root causes: 
	    <list style="numbers">
		  <t>During test phase 1, the number of test frames received by the 
		  Responder is less than the number of test frames sent by the Initiator. 
		  It may have different root causes, including:
		  <list style="numbers">
		    <t>The R frame sending rate was higher than the maximum connection 
			establishment rate. (Note that now the maximum connection 
			establishment rate is considered unknown because we can not measure the 
			maximum connection establishment without our assumption 1 in 
			<xref target="ctrl_conntrack"/>!)
			This root cause may be eliminated by lowering the R rate and re-executing 
			the test. (This step may be performed multiple times, while R>0.)</t>
			<t>The capacity of the connection tracking table of the DUT has been 
			  exhausted. (And either the DUT does not want to delete connections 
			  or the deletion of the connections makes it slower. This case is not
			  investigated further in test phase 1.)</t>
		  </list>
	      </t>
		  <t>During test phase 1, the number of test frames received by the 
		  Responder equals the number of test frames sent by the Initiator. 
		  In this case, the connections are validated in test phase 1. 
		  The validation may have two kinds of observable results:
		  <list style="numbers">
		    <t>The number of validation frames received by the Initiator 
			equals the number of validation frames sent by the Responder. 
			(It proves that the capacity of the connection tracking table of 
			the DUT is enough and both R and r were chosen properly.)</t>
			<t>The number of validation frames received by the Initiator 
			is less than the number of validation frames sent by the Responder. 
			This phenomenon may have various root causes:
			<list style="numbers">
			  <t>The capacity of the connection tracking table of the DUT has been 
			  exhausted. (It does not matter, whether some existing connections are 
			  discarded and new ones are stored, or the new connections are discarded.
			  Some connections are lost anyway, and it makes validation fail.)</t>
			  <t>The R frame sending rate used by the Initiator was too high in
			  test phase 1 and thus some connections were not established, 
			  even though all test frames arrived at the Responder. This root cause 
			  may be eliminated by lowering the R rate and re-executing the test. 
			  (This step may be performed multiple times, while R>0.)</t>
			  <t>The r frame sending rate used by the Responder was too high in 
			  test phase 2 and thus some test frames did not arrive at the Initiator, even 
			  though all connections were present in the connection tracking table of the DUT. 
			  This root cause may be eliminated by lowering the r rate and re-executing the test. 
			  (This step may be performed multiple times, while r>0.)</t>
			</list>
			And here is the problem: as the above three root causes are indistinguishable, 
			it is not easy to decide, whether R or r should be decreased.
			</t>
		  </list>
		  </t>
		</list>		
		</t>

		<t>We have some experience with benchmarking stateful NATxy gateways. When we tested 
		iptables with a very high number of connections, the 256GB RAM of the DUT was 
		exhausted and it stopped responding. Such a situation may make the connection 
		tracking table capacity	measurements rather inconvenient. We include this 
		possibility in our recommended measurement procedure, but we do not address the detection 
		and elimination of such a situation. (E.g. how the algorithm can reset the DUT.)
		</t>

		<t>For the connection tracking table size measurement, first we need a safe 
		number: C0. It is a precondition, that C0 number of connections can surely be 
		stored in the connection tracking table of the DUT. Using C0, one can determine 
		the maximum connection establishment rate using C0 number of connections. 
		It is done with a binary search using validation. The result is R0. The values 
		C0 and R0 will serve as "safe" starting values for the following two searches.
		</t>

		<t>First, we perform an exponential search to find the order of magnitude of the 
		connection tracking table capacity. The search stops if the DUT collapses OR 
		the maximum connection establishment rate severely drops (e.g. to its one tenth)
		due to doubling the number of connections.
		</t>

		<t>Then, the result of the exponential search gives the order of magnitude of 
		the size of the connection tracking table. Before disclosing the possible algorithms to
		determine the exact size of the connection tracking table, we consider three possible 
		replacement policies for the NATxy gateway:
	    <list style="numbers">
		  <t>The gateway does not delete any live connections until their timeout expires.</t>
		  <t>The gateway replaces the live connections according to LRU (least recently used) policy.</t>
		  <t>The gateway does a garbage collection when its connection tracking table is full 
		  and a frame with a new four tuple arrives. During the garbage collection, it deletes the K 
		  least recently used connections, where K is greater than 1.</t>
  		</list>		
		Now, we examine, what happens and how many validation frames arrive in the there cases. 
		Let the size of the connection tracking table be S, and the number of preliminary 
		frames be N, where S is less than N.
	    <list style="numbers">
		  <t>The connections defined by the first S test frames are registered into 
		  the connection tracking table of the DUT, and the last N-S connections are lost. 
		  (It is another question if the last N-S test frames are translated and 
		  forwarded in test phase 1 or simply dropped.) During validation, the validation 
		  frames with four tuples corresponding to the first S test frames will arrive at the 
		  Initiator and the other N-S validation frames will be lost.</t>
		  <t>All connections are registered into the connection tracking table of the DUT, 
		  but the first N-S connections are replaced (and thus lost). During validation, 
		  the validation frames with four tuples corresponding to the last S test frames 
		  will arrive to the Initiator, and the other N-S validation frames will be lost. </t>
		  <t>Depending on the values of K, S, and N, maybe less than S connections will survive.
		  In the worst case, only S-K+1 validation frames arrive, even though, the size of 
		  the connection tracking table is S.</t>
  		</list>
		If we know that the stateful NATxy gateway uses the first or second replacement 
		policy and we also know that both R and r rates are low enough, then the final
		step of determining the size of the connection tracking table is simple. If the Responder 
		sent N validation frames and the Initiator received N' of them, then the size of the 
		connection tracking table is N'.
 		</t>
		
		<t>In the general case, we perform a binary search to find the exact value of the connection 
		tracking table capacity within E error. The search chooses the lower half of 
		the interval if the DUT collapses OR the maximum connection establishment 
		rate severely drops (e.g. to its half) otherwise it chooses the higher half. 
		The search stops if the size of the interval is less than the E error.
		</t>		

		<t>The algorithms for the general case are defined using C like pseudocode in 
		<xref target="meas_contr_capacity_algo"/>. In practice, this algorithm may 
		be made more efficient in a way that the binary search for the maximum 
		connection establishment rate stops, if an elementary test fails at a rate 
		under RS*beta or RS*gamma during the external search or during the final 
		binary search for the capacity of the connection tracking table, respectively. 
		(This saves a high amount of execution time by eliminating the long-lasting tests at 
		low rates.)		
		</t>

        <figure anchor="meas_contr_capacity_algo" align="center" title="Measurement of the Connection Tracking Table Capacity">
          <preamble></preamble>

          <artwork align="left"><![CDATA[
// The binary_search_for_maximum_connection_establishment_rate(c,r) 
// function performs a binary search for the maximum connection 
// establishment rate in the [0, r] interval using c number of 
// connections.

// This is an exponential search for finding the order of magnitude 
// of the connection tracking table capacity
// Variables:
//   C0 and R0 are beginning safe values for the connection tracking 
//     table size and connection establishment rate, respectively
//   CS and RS are their currently used safe values
//   CT and RT are their values for the current examination
//   beta is a factor expressing an unacceptable drop in R (e.g. 
//     beta=0.1)
R0=binary_search_for_maximum_connection_establishment_rate(C0,maxrate);
for ( CS=C0, RS=R0;  1; CS=CT, RS=RT )
{
  CT=2*CS;
  RT=binary_search_for_maximum_connection_establishment_rate(CT,RS);
  if ( DUT_collapsed || RT < RS*beta )
    break;
}
// Here the size of the connection tracking table is between CS and CT

// This is the final binary search for finding the connection tracking  
// table capacity within E error
// Variables:
//   CS and RS are the safe values for connection tracking table size 
//     and connection establishment rate, respectively
//   C and R are the values for the current examination
//   gamma is a factor expressing an unacceptable drop in R 
//     (e.g. gamma=0.5)
for ( D=CT-CS;  D>E; D=CT-CS )
{
  C=(CS+CT)/2;
  R=binary_search_for_maximum_connection_establishment_rate(C,RS);
  if ( DUT_collapsed || R < RS*gamma)
    CT=C; // take the lower half of the interval
  else
    CS=C,RS=R; // take the upper half of the interval
}
// Here the size of the connection tracking table is CS within E error
            ]]></artwork>

        <postamble></postamble>
        </figure>
		
	  </section>
	  
	  <section anchor="st_wr_order" title="Writing and Reading Order of the State Table">	  
		<t>As for the writing policy of the state table of the Responder, we RECOMMEND round robin, 
		because it ensures that its entries are automatically kept fresh and consistent with 
		that of the connection tracking table of the DUT.
		</t>
		<t>The Responder can read its state table in various orders, for example:
	    <list style="symbols">
		  <t>pseudorandom</t>
		  <t>round-robin</t>
		</list>
		</t>
		<t>
		We RECOMMEND pseudorandom to follow the spirit of <xref target="RFC4814"/>. 
		Round-robin may be used as a computationally cheaper alternative. 
		</t>
	  </section>
	    
    </section>	

	<section anchor="meas_scalability" title="Scalability Measurements">
	  <t>As for scalability measurements, we do not define any new type of performance metrics, 
	  but we recommend to perform measurement series through which the value of one or more parameter(s) 
	  is/are changed to discover how the various values of the given parameter(s) influence 
	  the performance of the DUT.
	  </t>
	  
	  <section anchor="sc_net_flows" title="Scalability Against the Number of Network Flows">
	
	    <t>The scalability measurements aim to quantify how the performance 
		of the stateful NATxy gateways degrades with the increase of the number of 
		network flows.</t>
		
		<t>As for the actual values for the number of network flows to be used during 
		the measurement series, we RECOMMEND to use some representative values from 
		the range of the potential number of network flows the DUT may be faced with 
		during its intended usage.</t>
		
		<t>It is important, how the given number of network flows are generated. The sizes
		of the ranges of the source and destination IP addresses and port numbers are 
		essential parameters to be reported together with the results. Please see also 
		<xref target="reporting_format"/> about the reporting format.</t>
		
		<t>If a single IP address pair is used, then we RECOMMEND to use	  
		<list style="symbols">
		  <t>a fixed, larger source port number range (e.g., a few times 10,000)</t>
		  <t>a variable size destination port number range (e.g. 10; 100; 1,000; etc.), 
		  where its expedient granularity depends on the purpose.</t>
	    </list>	
        </t>
      </section>	
	  
	  <section anchor="sc_cpu_cores" title="Scalability Against the Number of CPU Cores">	  

	    <t>Stateful NATxy gateways are often implemented in software that are not bound 
		to a specific hardware but can be executed by commodity servers. To facilitate 
		the comparison of their performance, it can be useful to determine
	    <list style="symbols">
	      <t>the performance of the various implementations using a single core of a well-known CPU</t>
		  <t>the scale-up of the performance of the various implementations with the number of CPU cores.</t>
	    </list>  
	    </t>

	    <t>If the number of the available CPU cores is a power of two, then we recommend 
		to perform the tests with 1, 2, 4, 8, 16, etc. number of active CPU cores of the DUT.</t>

      </section>	
	
    </section>		
	
	<section anchor="reporting_format" title="Reporting Format">

	  <t>Measurements MUST be executed multiple times to achieve statistically reliable results. 
	  The report of the results MUST contain the number of repetitions of the measurements. 
	  We RECOMMEND median as the summarizing function of the results complemented with the 
	  first percentile and the 99th percentile as indices of the dispersion of the results. 
	  Average and standard deviation MAY also be reported.
	  </t>
	  
	  <t>All parameters and settings that may influence the performance of the DUT MUST be 
	  reported. Some of them may be specific to the given NATxy gateway implementation, like the 
	  "hashsize" (hash table size) and "nf_conntrack_max" (number of connection tracking 
	  table entries) values for iptables or the limit of the number of states for OpenBSD PF 
	  (set by the "set limit states number" command in the pf.conf file).
	  </t>
	  
	  
       <figure anchor="iptables-conn-scale" align="center" title="Example table:
	Maximum connection establishment rate of iptables against the number of sessions">
       <preamble></preamble>
       <artwork align="left"><![CDATA[
number of sessions (req.)            0.4M       4M     40M     400M
source port numbers (req.)         40,000   40,000  40,000   40,000
destination port numbers (req.)        10      100   1,000   10,000
"hashsize" (i.s.)                    2^17     2^20    2^23     2^27
"nf_conntrack_max" (i.s.)            2^20     2^23    2^26     2^30
num. sessions / "hashsize" (i.s.)    3.05     3.81    4.77     2.98
number of experiments (req.)           10       10      10       10 
error of binary search (req.)       1,000    1,000   1,000    1,000
connections/s median (req.)
connections/s 1st perc. (req.)
connections/s 99th perc. (req.)
          ]]></artwork>

       <postamble></postamble>
       </figure>
		
	  <t><xref target="iptables-conn-scale"/> shows  an example of table headings for 
	  reporting the measurement results for the scalability of the iptables stateful NAT44 
	  implementation against the number of sessions. We have indicated the always required fields
	  (req.) and the implementation-specific ones (i.s.).
	  In row 6, we also added a computed value, the number of sessions per hashsize ratio, which 
	  helps the reader to interpret the achieved maximum connection establishment rate. 
	  (A lower value results in shorter linked lists hanging on the entries of the hash 
	  table thus facilitating higher performance. The ratio is varying, because the number of 
	  sessions is always a power of 10, whereas the hash table size is a power of 2.)
	  To reflect the accuracy of the results, we have also added the value of the "error" of the
	  binary search, which expresses the stopping criterion for the binary search. The binary 
	  search stops, when the difference between the "higher limit" and "lower limit" of the 
	  binary search is less than or equal to "error".

	  </t>		  
	
	  <t> The table MUST be complemented with reporting the relevant parameters of the 
	  DUT. If the DUT is a general-purpose computer and some software NATxy gateway implementation is tested, 
	  then the hardware description SHOULD include: computer type, CPU type, and number of active CPU cores, 
	  memory type, size and speed, network interface card type (reflecting also the speed), 
	  the fact that direct cable connections were used or the type of the switch used for 
	  interconnecting the Tester and the DUT. Operating system type and version, kernel version, and the version 
	  of the NATxy gateway implementation (including last commit date and number if applicable) SHOULD also be given.
	  </t>
	  
	</section>



    <section anchor="impl_exp" title="Implementation and Experience">
	  <t>The stateful extension of siitperf <xref target="SIITPERF"/> is an implementation of this concept.
	  It is documented in this (open access) paper <xref target="LEN2022"/>. 
	  </t>
	  <t> The proposed benchmarking methodology has been validated by performing 
	  benchmarking measurements with three radically different stateful NAT64 
	  implementations (Jool, tayga+iptables, OpenBSD PF) in (open access) paper 
	  <xref target="LEN2023"/>.
	  </t>
	  <t>Our further experience with this methodology using siitperf for measuring the 
	  scalability of the iptables stateful NAT44 and Jool stateful NAT64 
	  implementations are described in 
	  <xref target="I-D.lencse-v6ops-transition-scalability"/>.
	  </t>	  
    </section>

	
    <section anchor="udp_or_tcp" title="Limitations of using UDP as Transport Layer Protocol">
	  <t>Stateful NATxy solutions handle TCP and UDP differently, e.g. iptables uses 30s 
	  timeout for UDP and 60s timeout for TCP. Thus benchmarking results produced using UDP do not 
      necessarily characterize the performance of a NATxy gateway well enough when they 
	  are used for forwarding Internet traffic. As for the given example, timeout values of the DUT may 
      be adjusted, but it requires extra consideration. 	  
	  </t> 
	  <t>Other differences in handling UDP or TCP are also possible. Thus we recommend that 
	  further investigations are to be performed in this field.
	  </t>
	  <t>As a mitigation of this problem, we recommend that testing with protocols using TCP 
	  (like HTTP and HTTPS) can be performed as described in 
	  <xref target="RFC9411"/>. This approach also solves the potential 
	  problem of protocol helpers may be present in the stateful DUT. 
	  </t>
    </section>

   <section anchor="Acknowledgements" title="Acknowledgements">
     <t>The authors would like to thank Al Morton, Sarah Banks, Edwin Cordeiro, Lukasz Bromirski, 
	  Sándor Répás, Tamás Hetényi, Timothy Winters, Eduard Vasilenko, Minh Ngoc Tran, Paolo Volpato, 
	  Zeqi Lai, and Bertalan Kovács for their comments.</t>
	 <t>This work was supported by the Japan Trust International Research Cooperation Program 
	 of the National Institute of Information and Communications Technology (NICT), Japan.</t>
   </section>

   <!-- Possibly a 'Contributors' section ... -->

   <section anchor="IANA" title="IANA Considerations">
     <t>This document does not make any request to IANA.</t>
   </section>

   <section anchor="Security" title="Security Considerations">
     <t>We have no further security considerations beyond that of <xref target="RFC8219"/>. 
	 Perhaps they should be cited here so that they be applied not only for the 
	 benchmarking of IPv6 transition technologies but also for the benchmarking of 
	 stateful NATxy gateways.</t>
   </section>
 </middle>

 <!--  *****BACK MATTER ***** -->

 <back>
   <!-- References split into informative and normative -->

   <!-- There are 2 ways to insert reference entries from the citation libraries:
    1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
    2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
       (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

    Both are cited textually in the same manner: by using xref elements.
    If you use the PI option, xml2rfc will, by default, try to find included files in the same
    directory as the including file. You can also define the XML_LIBRARY environment variable
    with a value containing a set of directories to search.  These can be either in the local
    filing system or remote ones accessed by http (http://domain/dir/... ).-->

   <references title="Normative References">
    <!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"?-->

    &RFC2119;
	&RFC1918;
	&RFC2544;
    &RFC4814;
	&RFC5180;
	&RFC6146;
	&RFC6890;
	&RFC7599;	
    &RFC8174;
	&RFC8219;
	&RFC9411;

   </references>

   <references title="Informative References">
     <!-- Here we use entities that we defined at the beginning. -->

    <?rfc include='reference.I-D.lencse-v6ops-transition-scalability'?>
	
    <reference anchor="DUST1964" 
    target="https://dl.acm.org/doi/10.1145/364520.364540">
      <front>
        <title>Algorithm 235: Random permutation
        </title>
        <author initials="R." surname="Durstenfeld">
          <organization></organization>
		</author>
        <date day="" month="July" year="1964"/>
      </front>
      <seriesInfo name="" value="Communications of the ACM, vol. 7, no. 7, p.420."/>
      <seriesInfo name="DOI" value="10.1145/364520.364540"/>
    </reference>
	
    <reference anchor="IIR2020" 
    target="https://www.iij.ad.jp/en/dev/iir/pdf/iir_vol49_report_EN.pdf">
      <front>
        <title>Periodic observation report: Internet trends as seen from IIJ infrastructure - 2020
        </title>

        <author initials="T." surname="Kurahashi">
          <organization></organization>
        </author>
        <author initials="Y." surname="Matsuzaki">
          <organization></organization>
        </author>
        <author initials="T." surname="Sasaki">
          <organization></organization>
        </author>
        <author initials="T." surname="Saito">
          <organization></organization>		  
        </author>
        <author initials="F." surname="Tsutsuji">
          <organization></organization>
        </author>
        <date day="" month="Dec" year="2020"/>
      </front>
      <seriesInfo name="" value="Internet Infrastructure Review, vol. 49"/>
    </reference>

    <reference anchor="LEN2015"  
    target="http://www.hit.bme.hu/~lencse/publications/e98-b_8_1580.pdf">
      <front>
        <title>Estimation of the Port Number Consumption of Web Browsing
        </title>

        <author initials="G." surname="Lencse">
          <organization></organization>
        </author>

        <date day="1" month="8" year="2015"/>
      </front>
      <seriesInfo name="" value="IEICE Transactions on Communications, vol. E98-B, no. 8. pp. 1580-1588"/>    
      <seriesInfo name="DOI" value="DOI: 10.1587/transcom.E98.B.1580"/>
    </reference>
	

    <reference anchor="LEN2020" 
    target="http://www.hit.bme.hu/~lencse/publications/291-1113-1-PB.pdf">
      <front>
        <title>Adding RFC 4814 Random Port Feature to Siitperf: Design, Implementation and Performance Estimation
        </title>

        <author initials="G." surname="Lencse">
          <organization></organization>
        </author>
        <date day="" month="" year="2020"/>
      </front>
      <seriesInfo name="" value="International Journal of Advances in Telecommunications, Electrotechnics, Signals and Systems, vol 9, no 3, pp. 18-26."/>
      <seriesInfo name="DOI" value="10.11601/ijates.v9i3.291"/>
    </reference>


    <reference anchor="LEN2022"  
    target="http://www.hit.bme.hu/~lencse/publications/ECC-2022-SFNAT64xy-Tester-published.pdf">
      <front>
        <title>Design and Implementation of a Software Tester for Benchmarking Stateful NAT64xy Gateways: 
		Theory and Practice of Extending Siitperf for Stateful Tests
        </title>

        <author initials="G." surname="Lencse">
          <organization></organization>
        </author>

        <date day="" month="" year="2022"/>
      </front>
      <seriesInfo name="" value="Computer Communications, vol. 172, no. 1, pp. 75-88, August 1, 2022"/>    
      <seriesInfo name="DOI" value="10.1016/j.comcom.2022.05.028"/>
    </reference>

    <reference anchor="LEN2023"  
    target="http://www.hit.bme.hu/~lencse/publications/ECC-2023-BM-SFNAT64-published.pdf">
      <front>
        <title>Benchmarking methodology for stateful NAT64 gateways
        </title>

        <author initials="G." surname="Lencse">
          <organization></organization>
        </author>
        <author initials="K." surname="Shima">
          <organization></organization>
        </author>		
        <author initials="K." surname="Cho">
          <organization></organization>
        </author>		

        <date day="" month="" year="2023"/>
      </front>
      <seriesInfo name="" value="Computer Communications, vol. 210, no. 1, pp. 256-272, October 1, 2023"/>    
      <seriesInfo name="DOI" value="10.1016/j.comcom.2023.08.009"/>
    </reference>	   
	
    <reference anchor="SIITPERF" 
    target="https://github.com/lencsegabor/siitperf">
      <front>
        <title>Siitperf: An RFC 8219 compliant SIIT and stateful NAT64/NAT44 tester written in C++ using DPDK
        </title>

        <author initials="G." surname="Lencse">
          <organization></organization>
        </author>

        <date day="" month="" year="2019-2023" />
      </front>
      <seriesInfo name="" value="source code"/>
      <seriesInfo name="" value="available from GitHub"/>
    </reference>
	<!-- 	-->
	
   </references>

   <section anchor="change_log" title="Change Log">
    <section title="00">
      <t>Initial version.
      </t>
    </section>
    <section title="01">
      <t>Updates based on the comments received on the BMWG mailing list and minor corrections.
      </t>
    </section>   
    <section title="02">
      <t><xref target="ctrl_conntrack"/> was completely re-written. As a consequence, 
	  the occurrences of the now undefined "mostly different" source port number destination 
	  port number combinations were deleted from <xref target="meas_max_conn_est_rate"/>, 
	  too.
      </t>
    </section>  	  
    <section title="03">
      <t>Added <xref target="consider_stateful"/> about the consideration of the
	  cases of stateful operation.
      </t>
      <t>Consistency checking. Removal of some parts obsoleted by the previous re-writing 
	  of <xref target="ctrl_conntrack"/>. 
      </t>
      <t>Added <xref target="meas_conn_tear_down_rate"/> about the method for measuring connection tear-down rate.
      </t>
      <t>Updates for <xref target="impl_exp"/> about the implementation and experience.
      </t>
    </section>
    <section title="04">
      <t>Update of the abstract.
      </t>
      <t>Added <xref target="validation_of_conn"/> about validation of connection establishment.
      </t>
      <t>Added <xref target="meas_contr_capacity"/> about the method for measuring connection tracking table capacity.
      </t>
      <t>Consistency checking and corrections. 
      </t>
    </section>
    <section title="00 - WG item">
      <t>Added measurement setup for Stateful NAT64 gateways.
      </t>
      <t>Consistency checking and corrections. 
      </t>
    </section>    	
    <section title="01">
      <t>Added Section 4.5.1 about typical types of measurement series and reporting format.
      </t> 
    </section>
    <section title="02">
      <t>Added the usage of multiple IP addresses.</t>
	  <t>Section 4.5.1 was removed and split into two Sections: 
	  <xref target="meas_scalability"/> about scalability measurements and 
	  <xref target="reporting_format"/> about reporting format.
      </t> 
    </section>	
    <section title="03">
      <t>Updated the usage of multiple IP addresses.</t>
	  <t>Test phases were renamed as follows:
	  <list style="symbols">
		<t>preliminary test phase --> test phase 1</t>
		<t>real test phase --> test phase 2.</t>
	  </list>	  
      </t> 
    </section>	
    <section title="04">
      <t>Minor updates to <xref target="setup_term_multiple"/> and <xref target="impl_exp"/>.</t> 
    </section>	
    <section title="05">
      <t>Minor updates addressing WGLC nits (adding the definition of "black box", and 
	  performing a high amount of grammatical corrections).</t> 
    </section>	
  </section>
  </back>
</rfc>