<?xml version="1.0" encoding="utf-8"?>
<?xml-model href="rfc7991bis.rnc"?>  <!-- Required for schema validation and schema-aware editing -->
<!-- <?xml-stylesheet type="text/xsl" href="rfc2629.xslt" ?> -->
<!-- This third-party XSLT can be enabled for direct transformations in XML processors, including most browsers -->


<!DOCTYPE rfc [
  <!ENTITY nbsp    "&#160;">
  <!ENTITY zwsp   "&#8203;">
  <!ENTITY nbhy   "&#8209;">
  <!ENTITY wj     "&#8288;">
  <!-- <!ENTITY rfc2119 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"> -->
  <!-- <!ENTITY rfc8174 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"> -->
  <!ENTITY INT "inband telemetry">
  <!-- <!ENTITY I-D.ietf-avtcore-cc-feedback-message SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-avtcore-cc-feedback-message.xml"> -->
]>
<!-- If further character entities are required then they should be added to the DOCTYPE above.
     Use of an external entity file is not recommended. -->

<rfc
  xmlns:xi="http://www.w3.org/2001/XInclude"
  category="info"
  docName="draft-miao-ccwg-hpcc-info-03"
  ipr="trust200902"
  obsoletes=""
  updates=""
  submissionType="IETF"
  xml:lang="en"
  version="3">


<!-- <?xml version="1.0" encoding="US-ASCII"?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY rfc2119 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY rfc8174 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml">
<!ENTITY INT "inband telemetry">
<!ENTITY I-D.ietf-avtcore-cc-feedback-message SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-avtcore-cc-feedback-message.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<?rfc toc="yes" ?>
<?rfc compact="yes" ?>
<?rfc symrefs="yes" ?>
<rfc category="info" 
     docName="draft-miao-ccwg-hpcc-info"
     ipr="trust200902">
  What is the category field value -->


  <front>
    <title abbrev="HPCC++">
    Inband Telemetry for HPCC++
    </title>
    
    
    <author fullname="Rui Miao" initials="R." surname="Miao">
      <organization>Meta</organization>

      <address>
        <postal>
          <street>1 Hacker Way</street>
          <city>Menlo Park</city>
          <region>CA</region>
          <code>94025</code>
          <country>USA</country>
        </postal>
	      <email>rmiao@meta.com</email>
      </address>
    </author>


    <author fullname="Surendra Anubolu" initials="S" surname="Anubolu">
      <organization abbrev="Broadcom Inc">Broadcom, Inc.</organization>
      <address>
        <postal>
          <street>1320 Ridder Park</street>
          <city>San Jose</city>
          <region>CA</region>
          <code>95131</code>
          <country>USA</country>
        </postal>
        <email>surendra.anubolu@broadcom.com</email>
      </address>
    </author>

    <author fullname="Rong Pan" initials="R" surname="Pan">
      <organization abbrev="AMD">AMD</organization>
      <address>
        <postal>	
          <street>2485 Augustine Dr.</street>
          <city>Santa Clara</city>
          <region>CA</region>
          <code>95054</code>
          <country>USA</country>
        </postal>
	<email>Rong.Pan@amd.com</email>
      </address>
    </author>


    <author fullname="Jeongkeun Lee" initials="J" surname="Lee">
      <organization abbrev="Google">Google</organization>

      <address>
        <postal>
          <street>Headquarters 1600 Amphitheatre Parkway</street>
          <city>Mountain View</city>
          <region>CA</region>
          <code>95043</code>
          <country>USA</country>
        </postal>
        <email>leejk@google.com</email>
      </address>
    </author>

    <author fullname="Barak Gafni" initials="B. " surname="Gafni">
      <organization>NVIDIA</organization>
      <address>
        <postal>
          <street>350 Oakmead Parkway, Suite 100</street>
          <city>Sunnyvale</city>
          <region>CA</region>
          <code>94085</code>
          <country>USA</country>
        </postal>

        <email>gbarak@NVIDIA.com</email>
      </address>
    </author>

    <author fullname="Jeff Tantsura" initials="J. " surname="Tantsura">
      <organization>NVIDIA</organization>
      <address>
        <postal>
          <country>USA</country>
        </postal>
        <email>jefftant.ietf@gmail.com</email>
      </address>
    </author>

  <author fullname="Allister Alemania" initials="A. " surname="Alemania">
      <organization>Intel</organization>
      <address>
        <postal>
          <street>2200 Mission College Blvd</street>
          <city>Santa Clara</city>
          <region></region>
          <code>95952</code>
          <country>USA</country>
        </postal>
        <email>allister.alemania@intel.com</email>
      </address>
      
    </author>
    <author fullname="Yuval Shpigelman" initials="Y. " surname="Shpigelman">
      <organization>NVIDIA</organization>
      <address>
        <postal>
          <street>Haim Hazaz 3A</street>
          <city>Netanya</city>
          <region></region>
          <code>4247417</code>
          <country>Israel</country>
        </postal>

        <email>yuvals@nvidia.com</email>
      </address>
    </author>

    
    <date year="2024" />

    <area>TSV</area>

    <keyword>Data Center Networking</keyword>

    <keyword>Congestion Control</keyword>

    <abstract>
    <t>Congestion control (CC) is the key to achieving ultra-low latency,
high bandwidth and network stability in high-speed networks. 
However, the existing high-speed CC schemes have inherent limitations for reaching these goals.</t>

    <t>In this document, we describe
HPCC++ (High Precision Congestion Control), a new high-speed CC
mechanism which achieves the three goals simultaneously. HPCC++
leverages inband telemetry to obtain precise link load
information and controls traffic precisely. By addressing challenges
such as delayed signaling during congestion and overreaction to the congestion signaling using inband and granular telemetry, HPCC++ can quickly 
converge to utilize all the available bandwidth while avoiding congestion, and can maintain near-zero
in-network queues for ultra-low latency. HPCC++ is also fair and
easy to deploy in hardware, implementable with commodity NICs and switches.</t>
   </abstract>
   
   </front>

    <middle>
    <section anchor="sec-intro" title="Introduction">

    <t>The link speed in data center networks has grown from 1Gbps to
      100Gbps in the past decade, and this growth is continuing. Ultralow 
      latency and high bandwidth, which are demanded by more
      and more applications, are two critical requirements in today's and
      future high-speed networks. </t>

    <t>Given that traditional software-based network stacks in hosts
      can no longer sustain the critical latency and bandwidth requirements as described in <xref target="Zhu-SIGCOMM2015"> </xref>, 
      offloading network stacks into hardware is an inevitable
      direction in high-speed networks. 
      As an example, large-scale networks with RDMA (remote direct memory access)
      often uses hardware-offloading solutions.
      In some cases, the RDMA networks still face fundamental challenges
      to reconcile low latency, high bandwidth utilization, and high stability.</t>

    <t>This document describes a new congestion control mechanism, HPCC++ (Enhanced High Precision Congestion Control), 
      for large-scale, high-speed networks. The key idea behind HPCC++ is to leverage the precise link load 
      information from signaled through &INT; to compute accurate flow rate updates. Unlike
      existing approaches that often require a large number of iterations
      to find the proper flow rates, HPCC++ requires only one rate update
      step in most cases. Using precise information from &INT; enables
      HPCC++ to address the limitations in current congestion control schemes. First,
      HPCC++ senders can quickly ramp up flow rates for high utilization
      and ramp down flow rates for congestion avoidance. Second, HPCC++
      senders can quickly adjust the flow rates to keep each link's output rate slightly lower than 
      the link's capacity, preventing queues from being built-up as well as preserving 
      high link utilization. Finally, since sending rates are computed precisely based on direct
      measurements at switches, HPCC++ requires merely three independent
      parameters that are used to tune fairness and efficiency. </t>

      <t>HPCC++ is an enhanced version of <xref target="SIGCOMM-HPCC"></xref>. 
      HPCC++ takes into account system constraints and aims to reduce the design 
      overhead and further improves the performance. Detailed specification about HPCC++ can be found at 
      <xref target="draft-miao-ccwg-hpcc"></xref>.</t>
      
      <t>This document describes the architecture changes in switches 
      and end-hosts to support the needed tranmission of inband telemetry and its consumption, 
      that imporves the efficiency in handling network congestion. </t>

    </section>

<section anchor = "sec-implementation-telemery"
	 title ="Inband telemetry padding at the network switches">
   <t>HPCC++ only relies on packets to share information across senders,
   receivers, and switches. 
   The switch should capture &INT; information that includes link load (txBytes, qlen, ts) and 
    link spec (switch_ID, port_ID, B) at the egress port.
    Note, each switch should record all those information at the single snapshot to achieve a precise link load
    estimate. Inside a data center, the path length is often no more than 5 hops. 
   The overhead of the inband telemetry padding for HPCC++ is considered to be low.</t>


  <t> As long the above algorithm is met, HPCC++ is open to a variety of inband telemetry format standards, 
  which are orthogonal to the HPCC++ algorithm. 
  Although this document does not mandate a particular &INT; header format or encapsulation, 
  we provide concrete implementation specifications using strandard inband telemetry protocols, 
  including IFA <xref target="I-D.ietf-kumar-ippm-ifa"></xref>, IETF IOAM <xref target="RFC9179"></xref>,
  and P4.org INT <xref target="P4-INT"></xref>. 
  In fact, the emerging inband telemetry protocols inform the evolution for a broader range of protocols and network functions, 
  where this document leverages the trend to propose the architecture change to support in-network functions like congestion control with high efficiency.
  </t>


  <section anchor = "sec-implementation-ifa"
	 title ="Inband telemetry on IFA2.0">
   <t>For more details, please refer to IFA <xref target="I-D.ietf-kumar-ippm-ifa"></xref></t>
   
    <figure anchor="fig-ifa-header"
               title="Example IFA header">
      <artwork><![CDATA[

    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |  lns  |  deviceID                             |     rsvd      |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   | Speed |     rsvd      |          rxTimestampSec               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |       egressPort              |         ingressPort           |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                        rxTimeStampNs                          |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                         residenceTime                         |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                            txBytes                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |       rsvd                    |      Queue Length             |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                             rsvd                              |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

  ]]></artwork>
	    </figure>
  <t><xref target="fig-ifa-header"></xref> shows the packet format of the
	  INT metadata after UDP and IFA metadata header. 
	  The field lns is the local name space and defines the format of the metadata. The field deviceID is a 20-bit field that uniquely identifies the device in the network. 
	  The Speed field is an encode field with the following encoding for port speed:
	  0 - 10G, 1 - 25G, 2 - 40G, 3- 50G, 4 - 100G, 5 - 200G, 6 - 400G.
	  The field cn is the congestion field and denotes if the packet experienced congestion.

  </t>
   </section>


  <section anchor = "sec-implementation-ioam"
	 title ="Inband telemetry on IOAM">
   
   <t>IOAM is the technology adopted by IETF to be used for in-situ telemetry. For the use of HPCC++ we would discuss the IOAM trace option as part of the IOAM architecture. IOAM trace supports both 
	   Pre-allocated and Incremental trace Options, meaning that a node in the network may either write data into an already-allocated space in the packet, or may it add the data as an extenation to the IOAM header, respectively. 
	   An IOAM data header has a modular design, where the data types written by a node are determined based on the IOAM trace header instruction list.
	   For the full description of the IOAM header design please refer to IETF IOAM <xref target="RFC9179"></xref> specification.
	  In order to fulfill the requirements set by the HPCC++ architecture we would suggest to use the below trace types:
	  </t>
	  
	  <ul bare="false" empty="false" indent="3" spacing="normal">
          <li>Hop_Lim and node_id Short</li>
		  <li>Ingress_if_id and egress_if_id Short</li>
		  <li>Queue Depth</li>
		  <li>Timestamp Fraction: To be used as egress timestamp rather than an ingress timestamp</li>
		  <li>Transmitted Bytes</li>
        </ul>
	  <t>Note that Transmitted Bytes trace type is defined in <xref target="I-D.draft-gafni-ippm-ioam-additional-data-fields"></xref> 
    as a suggested extension to <xref target="RFC9179"></xref>.
	</t>
	  <t>When using the above trace types, the IOAM data header would be constructed as follows:</t>
			  
	  <figure anchor="fig-ioam-data"
               title="Example of an IOAM data header">
   <artwork><![CDATA[
 
    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |   Hop_Lim     |              node_id                          |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |     ingress_if_id             |         egress_if_id          |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                       queue depth                             |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                   timestamp fraction                          |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                           tx_bytes                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+


  ]]></artwork>
          </figure>
	


   </section>


  <section anchor = "sec-implementation-p4"
	 title ="Inband telemetry on P4.org INT">

     <figure anchor="fig-p4int"
               title="Example P4.org INT v2.1 per-hop metadata header">
   <artwork><![CDATA[

    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                        Node ID (Nth hop)                      |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |      Ingress Interface ID     |      Egress Interface ID      | 
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |    Queue ID   |               Queue occupnacy                 |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                        Egress timestamp                       |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                    Egress timestamp (cont'd)                  |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                 Egress interface Tx utilization               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                        Node ID (N-1th hop)                    |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                              ...                              |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                        Node ID (1st hop)                      |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                              ...                              |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+


  ]]></artwork>
          </figure>

  <t><xref target="fig-p4int"></xref> shows the per-hop metadata format of the
	  P4.org INT-MD mode (following INT v2.1 spec). 
	  Each hop switch along the path adds its Node ID for the sender to be
	  able to track the path and detect a path change event. If so, it throws away
	  the existing status records of the flow and builds up new records.
	  
	  Queue occupancy (24 bits) is the current buffer occupancy of the egress port and queue
	  that the flow is going through. Egress timestamp (8 bytes) is used by HPCC++
	  algorithm to eventually compute interface utilization. Since P4.org INT reports
	  Egress TX utilization in-band, the Egress timestsamp is not mandatory but optional.
	  
	  HPCC++ algorithm today doesn't require Ingress Interface ID. P4.org INT defines Ingress
	  and Egress Interface IDs as one metadata instruction. We keep the Ingress ID for a future use.
</t>


   </section>

   

</section>

 <section anchor = "sec-implementation-csig"
	 title ="Inband telemetry on CSIG">
     <figure anchor="fig-csig"
               title="CSIG Compact Header Format">
   <artwork><![CDATA[

    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |             TPID              |  T  |R|    S    |      LM     |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

   |0-15|  TPID  : IEEE allocated Tag Protocol ID for 4 Byte CSIG tag
   |16-18| T     : Signal Type (0:min(ABW), 1: min(ABW/C), 2:max(PD))
   |19|    R     : Reserved
   |20-24| S     : Signal Value: Bucketed (32 configurable buckets)
   |25-31| LM    : Locator Metadata of bottleneck device / port

  ]]></artwork>
          </figure>

  <t>
	  CSIG (Congestion SIGnaling) <xref target="I-D.ietf-ravi-ippm-csig"></xref> is a 
	  compact in-band telemetry solution that carries a fixed-size aggregate metric computed
	  over the hop devices. CSIG supports two formats: compact and expanded formats. 
	  The expanded format is to be further defined in IETF. This section discusses how to support HPCC++ 
	  with the 2B compact format as shown in <xref target="fig-csig"></xref>. 
	  Note: there is no fundamental difference between the 2B format and the extended 6B format, 
	  as both use Type-Value format, carrying one signal at a given packet.

	  The Location Metadata (LM)field can encode link capacity and/or relative location of the bottleneck link 
	  such as Clos stage and link orientation (up or down), from which one may look up the speed of the link.

	  The Type-Value format of CSIG Signal allows the sender to choose one signal type of interest 
	  for each packet to collect over the hop devices. CSIG draft today suggests 3 signal types:
 	 <!-- <t><list style="symbols" --> 
 	</t>
	  <t>Minimum Available Bandwidth - min(ABW)</t> 
	  <t>Maximum Link Utilization - min(ABW/B), where B is the link capacity</t> 
	  <t>Maximum Per-Hop Delay - max(PD)</t>
 	 
 	 <!-- </list></t> -->

   

	<section anchor = "sec-implementation-csig-how"
	 title ="How to use CSIG for HPCC++">

  	<t>	
	In the HPCC++ algorithm, the per-hop normalized inflight bytes u’ in line 5 is a sum of two terms u1, u2 as follows:
	</t>
	
	<figure><artwork><![CDATA[
	u’ = u1 + u2, where
	u1 = min(ack.L[i].qlen,L[i].qlen) / (ack.L[i].B*T)
	u2 = txRate / ack.L[i].B
	]]></artwork></figure>

	<t>
	u1 is essentially qlen divided by BDP and u2 is a link utilization. 
	The min(current qlen, previous qlen) in u1 is for smoothing out noise and its benefit is marginal 
	as described in the HPCC+ requirement section.
</t>
<t>
	Compared to INT/IFA/IOAM discussed above that stack up multiple metadata from all hop devices in
	each packet, CSIG carries only one signal (either u1 or u2) at a packet. 
	We argue that’s acceptable for lossy networks as follows. 
	(The case for lossless networks will be discussed in a later revision.)
</t>
<t>
	In lossy networks not using PFC, queueing happens only when the link is 100% utilized. 
	It’s easy to see that when u1 (qlen/BDP) is non-zero, u2 (link utilization) is 100%. 
	Likewise, when u2 is less than 100%, u1 is zero. This mutual-exclusiveness between u1 and u2 
	by definition makes CSIG, which carries only one signal (either for u1 or u2) at a packet, 
	as a viable signaling mechanism for HPCC++. The sender of HPCC++ with CSIG can decide which 
	of the two signals to collect from each packet based on its protocol state. 
</t>
<t>
	Note that because u1’s qlen is an instantaneous value while u2’s link utilization is calculated
	over a measurement time window, it is possible to momentarily have u1 &gt; 0 and u2 &lt; 100%.
	In the case of HPCC++ with conventional INT/IFA/IOAM, the measurement window for 
	link utilization is determined by the delta between current timestamp and previous timestamp
	observed by each connection and the delta varies packet by packet. 
	HPCC++ sums up u1 and u2 ignoring such a temporary incongruity. 
	With CSIG, u2 is estimated by the switches
	and the measurement window is a network-wide config parameter e.g., a multiple of the network RTT. 
</t>
<t>
	When a path changes, triggered by either routing changes or by end-host driven path migration 
	(such as PLB), two consecutive packets are assumed to go through the same path 
	for the algorithm to have its inputs consistently - u1 and u2.

	With CSIG, HPCC++ can optain max(u') by collecting max(u2) or max(u2) per packet as follows.

	max(u2): the for-loop from line 3 of HPCC++ looks for the max(u1), 
	and if there is no queueing happening throughout the path, it returns max(u2). 
	max(u2)is equivalent to min(ABW/B) in CSIG, hence we can directly use min(ABW/B).

	max(u1): in computing max(u1), one straightforward way is to introduce 
	a new signal type of max(qlen/B) in CSIG. qlen/B can be interpreted as an ‘expected’ 
	sojourn time for the packet in the tail. 
	HPCC++ assumes ~100% of the link capacity B is used for HPCC++ traffic. 
	max(qlen/B) is maximum per-hop ‘expected’ delay. 
	Note that T (= congestion-free RTT) in u1 is a network-wide config parameter in HPCC++,
	not specific to each hop. Once CSIG collects max(qlen/B), 
	the end host can easily calculate u1 = max(qlen/B) / T for HPCC++.
</t>
<t>
	Another way is to use the max(PD) signal in CSIG to approximate max(qlen/B). 
	The reason why this is an approximation is in two folds: first, the per-hop delay in CSIG
	is the delay experienced by the signal-carrying packet while qlen in HPCC++ is the 
	length of the queue observed at the dequeue time of CSIG-carrying packet,
	hence the effect of queueing applies to the packet at the tail, not to the packet carries the CSIG signal.
	Second, B in qlen/B of HPCC++ is the whole link capacity while the per-hop delay
	in CSIG reflects the per-queue drain rate affected by other traffic classes
	depending on the queueing mechanism in use (strict priority or WDRR).

	The smoothing effect of min(current qlen, previous qlen) in u1 can be supported by CSIG
	as long as the bottleneck point doesn’t change between the previous packet and the current packet. 
	</t>

	</section>

   </section>


<section anchor = "sec-iana"
	 title = "IANA Considerations">

<t>This document makes no request of IANA.</t>

</section>

<section anchor = "sec-acknowledgments"
	 title = "Acknowledgments">

<t>
The authors would like to thank RTGWG members for their valuable review
comments and helpful input to this specification. </t>

</section> 

<section title="Contributors"  anchor="sec-contributors">

<t>The following individuals have contributed to the implementation
  and evaluation of the proposed scheme, and therefore have helped
  to validate and substantially improve this specification:
  Pedro Y. Segura, Roberto P. Cebrian, Robert Southworth and Md Ashiqur Rahman. </t>

</section>

<section title="Security Considerations"  anchor="Security">
 <t>TBD</t>
</section>

</middle>

<back>
    

     <references title="Normative References">
 
     <!-- &rfc2119;  RFCs -->
     <!-- &rfc8174;   - Ambiguity of Uppercase vs Lowercase -->
     </references>

    <references title="Informative References">
      <!-- &I-D.ietf-avtcore-cc-feedback-message;  -->
      

<reference anchor="Zhu-SIGCOMM2015"
		 target="">
        <front>
          <title>
	    Congestion Control for Large-Scale RDMA Deployments
          </title>

          <author fullname="Yibo Zhu" initials="Y." surname="Zhu">
            <organization></organization>
          </author>

          <author fullname="Haggai Eran" initials="H." surname="Eran">
            <organization></organization>
          </author>

          <author fullname="Daniel Firestone" initials="D." surname="Firestone">
            <organization></organization>
          </author>

          <author fullname="Chuanxiong Guo" initials="C." surname="Guo">
            <organization></organization>
          </author>

          <author fullname="Marina Lipshteyn" initials="M." surname="Lipshteyn">
            <organization></organization>
          </author>

          <author fullname="Yehonatan Liron" initials="Y." surname="Liron">
            <organization></organization>
          </author>

          <author fullname="Jitendra Padhye" initials="J." surname="Padhye">
            <organization></organization>
          </author>

          <author fullname="Shachar Raindel" initials="S." surname="Raindel">
            <organization></organization>
          </author>

          <author fullname="Mohammad Haj Yahia" initials="M. H." surname="Yahia">
            <organization></organization>
          </author>

          <author fullname="Ming Zhang" initials="M." surname="Zhang">
            <organization></organization>
          </author>

          <date month="August" year="2015" />
        </front>

        <seriesInfo name="ACM SIGCOMM"
		    value = "London, United Kingdom"/>

      </reference>
      
      <reference anchor="P4-INT"
		 target="https://github.com/p4lang/p4-applications/blob/master/docs/INT_v2_0.pdf">
        <front>
          <title>
	    In-band Network Telemetry (INT) Dataplane Specification, v2.0
          </title>

          <author >
            <organization></organization>
          </author>
          <date month="February" year="2020" />
        </front>
      </reference>

      <reference anchor="RFC9179"
		 target="https://datatracker.ietf.org/doc/html/rfc9197">
        <front>
          <title>
	    Data Fields for In Situ Operations, Administration, and Maintenance (IOAM)
          </title>

          <author >
            <organization></organization>
          </author>
          <date month="May" year="2022" />
        </front>
      </reference>

	    <reference anchor="I-D.draft-gafni-ippm-ioam-additional-data-fields"
		 target="https://datatracker.ietf.org/doc/html/draft-gafni-ippm-ioam-additional-data-fields-00">
        <front>
          <title>
	    Additional data fields for IOAM Trace Option Types
          </title>

          <author >
            <organization></organization>
          </author>
          <date month="May" year="2021" />
        </front>
      </reference>

      <reference anchor="I-D.ietf-kumar-ippm-ifa"
		 target="https://tools.ietf.org/html/draft-kumar-ippm-ifa-08">
        <front>
          <title>
	    Inband Flow Analyzer
          </title>

          <author >
            <organization></organization>
          </author>
          <date month="February" year="2019" />
        </front>
      </reference>

      <reference anchor="draft-miao-ccwg-hpcc"
		 target="">
        <front>
          <title>HPCC++: Enhanced High Precision Congestion Control
          </title>

                    <author fullname="Rui Miao" initials="R." surname="Miao">
            <organization></organization>
          </author>
          <date year="2024" />
        </front>
</reference>

<reference anchor="I-D.ietf-ravi-ippm-csig"
		 target="https://datatracker.ietf.org/doc/draft-ravi-ippm-csig/">
        <front>
          <title>
	    Congestion Signaling (CSIG)
          </title>

          <author >
            <organization></organization>
          </author>
          <date month="February" year="2024" />
        </front>
      </reference>

      <reference anchor="SIGCOMM-HPCC"
		 target="">
        <front>
          <title>
	    HPCC: High Precision Congestion Control
          </title>

          <author fullname="Yuliang Li" initials="Y." surname="Li">
            <organization></organization>
          </author>

          <author fullname="Rui Miao" initials="R." surname="Miao">
            <organization></organization>
          </author>

          <author fullname="Hongqiang Harry Liu" initials="H." surname="Liu">
            <organization></organization>
          </author>

          <author fullname="Yan Zhuang" initials="Y." surname="Zhuang">
            <organization></organization>
          </author>

          <author fullname="Fei Feng" initials="F." surname="Fei Feng">
            <organization></organization>
          </author>

          <author fullname="Lingbo Tang" initials="L." surname="Tang">
            <organization></organization>
          </author>

          <author fullname="Zheng Cao" initials="Z." surname="Cao">
            <organization></organization>
          </author>

          <author fullname="Ming Zhang" initials="M." surname="Zhang">
            <organization></organization>
          </author>

          <author fullname="Frank Kelly" initials="F." surname="Kelly">
            <organization></organization>
          </author>

          <author fullname="Mohammad Alizadeh" initials="M." surname="Alizadeh">
            <organization></organization>
          </author>

          <author fullname="Minlan Yu" initials="M." surname="Yu">
            <organization></organization>
          </author>

          <date month="August" year="2019" />
        </front>

        <seriesInfo name="ACM SIGCOMM"
		    value = "Beijing, China"/>

      </reference>


    

     </references>
  </back>
</rfc>