<?xml version="1.0" encoding="US-ASCII"?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml">
<!ENTITY RFC7432 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7432.xml">
<!ENTITY RFC8365 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8365.xml">
<!ENTITY RFC8584 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8584.xml">
<!ENTITY RFC9136 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9136.xml">
<!ENTITY RFC9252 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9252.xml">
<!ENTITY RFC4364 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4364.xml">
<!ENTITY RFC8214 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8214.xml">
<!ENTITY RFC7348 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7348.xml">
<!ENTITY RFC8926 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8926.xml">
<!ENTITY RFC7510 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7510.xml">
<!ENTITY RFC8986 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8986.xml">
<!ENTITY RFC9012 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9012.xml">
<!ENTITY RFC7938 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7938.xml">
<!ENTITY RFC9469 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9469.xml">
<!ENTITY RFC9573 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9573.xml">
<!ENTITY RFC9746 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9746.xml">
<!ENTITY RFC9135 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.9135.xml">
<!ENTITY I-D.ietf-bess-rfc7432bis SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-rfc7432bis.xml">
<!ENTITY I-D.ietf-bess-evpn-virtual-eth-segment SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-evpn-virtual-eth-segment.xml">
<!ENTITY I-D.ietf-bess-evpn-unequal-lb SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-evpn-unequal-lb.xml">
<!ENTITY I-D.ietf-bess-evpn-ip-aliasing SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-evpn-ip-aliasing.xml">
<!ENTITY I-D.ietf-bess-evpn-fast-reroute SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.ietf-bess-evpn-fast-reroute.xml">
]>
<?rfc toc="yes"?>
<?rfc tocompact="yes"?>
<?rfc tocdepth="3"?>
<?rfc tocindent="yes"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes"?>
<?rfc comments="yes"?>
<?rfc inline="yes"?>
<?rfc compact="yes"?>
<?rfc subcompact="no"?>
<rfc category="std" docName="draft-rabnag-bess-evpn-anycast-aliasing-04"
     ipr="trust200902" submissionType="IETF">
  <!---->

  <?rfc strict="yes"?>

  <?rfc compact="yes"?>

  <?rfc subcompact="no"?>

  <?rfc symrefs="yes"?>

  <?rfc sortrefs="no"?>

  <?rfc text-list-symbols="-o+*"?>

  <?rfc toc="yes"?>

  <front>
    <title abbrev="EVPN Anycast Multihoming">EVPN Anycast Multi-Homing</title>

    <author fullname="Jorge Rabadan" initials="J." role="editor"
            surname="Rabadan">
      <organization>Nokia</organization>

      <address>
        <postal>
          <street>520 Almanor Avenue</street>

          <city>Sunnyvale</city>

          <region>CA</region>

          <code>94085</code>

          <country>USA</country>
        </postal>

        <email>jorge.rabadan@nokia.com</email>
      </address>
    </author>

    <author fullname="Kiran Nagaraj" initials="K." surname="Nagaraj">
      <organization>Nokia</organization>

      <address>
        <postal>
          <street>520 Almanor Avenue</street>

          <city>Sunnyvale</city>

          <region>CA</region>

          <code>94085</code>

          <country>USA</country>
        </postal>

        <email>kiran.nagaraj@nokia.com</email>
      </address>
    </author>

    <author fullname="Alex Nichol" initials="A." surname="Nichol">
      <organization>Arista</organization>

      <address>
        <postal>
          <street/>

          <city/>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <facsimile/>

        <email>anichol@arista.com</email>

        <uri/>
      </address>
    </author>

    <author fullname="Ali Sajassi" initials="A. " surname="Sajassi">
      <organization>Cisco Systems</organization>

      <address>
        <postal>
          <street/>

          <city/>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <facsimile/>

        <email>sajassi@cisco.com</email>

        <uri/>
      </address>
    </author>

    <author fullname="Wen Lin" initials="W. " surname="Lin">
      <organization>Juniper Networks</organization>

      <address>
        <postal>
          <street/>

          <city/>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <facsimile/>

        <email>wlin@juniper.net</email>

        <uri/>
      </address>
    </author>

    <author fullname="Jeff Tantsura" initials="J. " surname="Tantsura">
      <organization>Nvidia</organization>

      <address>
        <postal>
          <street/>

          <city/>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <facsimile/>

        <email>jefftant.ietf@gmail.com</email>

        <uri/>
      </address>
    </author>

    <date day="7" month="July" year="2025"/>

    <workgroup>BESS Workgroup</workgroup>

    <abstract>
      <t>The current Ethernet Virtual Private Network (EVPN) all-active
      multi-homing procedures in Network Virtualization Over Layer-3 (NVO3)
      networks provide the required Split Horizon filtering, Designated
      Forwarder Election and Aliasing functions that the network needs in
      order to handle the traffic to and from the multi-homed CE in an
      efficient way. In particular, the Aliasing function supports load
      balancing of unicast traffic from remote Network Virtualization Edge
      (NVE) devices to NVEs that are multi-homed to the same CE, regardless of
      whether the CE&rsquo;s MAC/IP information has been learned on all those
      NVEs. This document introduces an optional enhancement to the EVPN
      multi-homing Aliasing function, referred to as EVPN Anycast
      Multi-homing. This optimization is specific to EVPN deployments over
      NVO3 tunnels (i.e., IP-based tunnels) and may offer benefits in typical
      data center designs, which are discussed herein.</t>
    </abstract>
  </front>

  <middle>
    <section anchor="sect-1" title="Introduction">
      <t>Ethernet Virtual Private Network (EVPN) is the de-facto standard
      control plane in Network Virtualization Over Layer-3 (NVO3) networks
      deployed in multi-tenant Data Centers <xref target="RFC8365"/><xref
      target="RFC9469"/>. EVPN enables Network Virtualization Edge (NVE)
      auto-discovery, tenant MAC/IP dissemination, and advanced capabilities
      required by Network Virtualization over Layer 3 (NVO3) networks, such as
      all-active multi-homing. The current EVPN all-active multi-homing
      procedures in NVO3 networks provide the required Split Horizon
      filtering, Designated Forwarder Election and Aliasing functions that the
      network needs in order to handle the traffic to and from the multi-homed
      CE in an efficient way. In particular, the Aliasing function supports
      load balancing of unicast traffic from remote NVEs to NVEs that are
      multi-homed to the same CE, regardless of whether the CE&rsquo;s MAC/IP
      information has been learned on all those NVEs. This document introduces
      an optional enhancement to the EVPN multi-homing Aliasing function,
      referred to as EVPN Anycast Multi-homing. This optimization is specific
      to EVPN deployments over NVO3 tunnels (i.e., IP-based tunnels) and may
      offer benefits in typical data center designs, which are discussed
      herein.</t>

      <section anchor="sect-1.1" title="Terminology and Conventions">
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
        "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
        "OPTIONAL" in this document are to be interpreted as described in BCP
        14 <xref target="RFC2119"/> <xref target="RFC8174"/> when, and only
        when, they appear in all capitals, as shown here.</t>

        <t><list style="symbols">
            <t>A-D per EVI route: EVPN route type 1, Auto-Discovery per EVPN
            Instance route. Route used for aliasing or backup signaling in
            EVPN multi-homing procedures <xref target="RFC7432"/>.</t>

            <t>A-D per ES route: EVPN route type 1, Auto-Discovery per
            Ethernet Segment route. Route used for mass withdraw in EVPN
            multi-homing procedures <xref target="RFC7432"/>.</t>

            <t>BUM traffic: Broadcast, Unknown unicast and Multicast
            traffic.</t>

            <t>CE: Customer Edge, e.g., a host, router, or switch.</t>

            <t>Clos: a multistage network topology described in <xref
            target="CLOS1953"/>, where all the edge nodes (or Leaf routers)
            are connected to all the core nodes (or Spines). Typically used in
            Data Centers.</t>

            <t>ECMP: Equal Cost Multi-Path.</t>

            <t>ES: Ethernet Segment. When a Tenant System (TS) is connected to
            one or more NVEs via a set of Ethernet links, then that set of
            links is referred to as an 'Ethernet segment'. Each ES is
            represented by a unique Ethernet Segment Identifier (ESI) in the
            NVO3 network and the ESI is used in EVPN routes that are specific
            to that ES.</t>

            <t>EVI: or EVPN Instance. It is a Layer-2 Virtual Network that
            uses an EVPN control-plane to exchange reachability information
            among the member NVEs. It corresponds to a set of MAC-VRFs of the
            same tenant. See MAC-VRF in this section.</t>

            <t>GENEVE: Generic Network Virtualization Encapsulation, an NVO3
            encapsulation defined in <xref target="RFC8926"/>.</t>

            <t>IP-VRF: an IP Virtual Routing and Forwarding table, as defined
            in <xref target="RFC4364"/>. It stores IP Prefixes that are part
            of the tenant's IP space, and are distributed among NVEs of the
            same tenant by EVPN. Route Distinguisher (RD) and Route Target(s)
            (RTs) are required properties of an IP-VRF. An IP-VRF is
            instantiated in an NVE for a given tenant, if the NVE is attached
            to multiple subnets of the tenant and local
            inter-subnet-forwarding is required across those subnets.</t>

            <t>IRB: Integrated Routing and Bridging interface. It refers to
            the logical interface that connects a Broadcast Domain instance
            (or a BT) to an IP-VRF and allows to forward packets with
            destination in a different subnet.</t>

            <t>MAC-VRF: a MAC Virtual Routing and Forwarding table, as defined
            in <xref target="RFC7432"/>. The instantiation of an EVI (EVPN
            Instance) in an NVE. Route Distinguisher (RD) and Route Target(s)
            (RTs) are required properties of a MAC-VRF and they are normally
            different from the ones defined in the associated IP-VRF (if the
            MAC-VRF has an IRB interface).</t>

            <t>MPLS and non-MPLS NVO3 tunnels: refer to Multi-Protocol Label
            Switching (or the absence of it) Network Virtualization Overlay
            tunnels. Network Virtualization Overlay tunnels use an IP
            encapsulation for overlay frames, where the source IP address
            identifies the ingress NVE and the destination IP address the
            egress NVE.</t>

            <t>NLRI: BGP Network Layer Reachability Information.</t>

            <t>NVE: Network Virtualization Edge device, a network entity that
            sits at the edge of an underlay network and implements Layer-2
            and/or Layer-3 network virtualization functions. The
            network-facing side of the NVE uses the underlying Layer-3 network
            to tunnel tenant frames to and from other NVEs. The tenant-facing
            side of the NVE sends and receives Ethernet frames to and from
            individual Tenant Systems. In this document, an NVE could be
            implemented as a virtual switch within a hypervisor, a switch or a
            router, and runs EVPN in the control-plane. This document uses the
            terms NVE and "Leaf router" interchangeably.</t>

            <t>NVO3 tunnels: Network Virtualization Over Layer-3 tunnels. In
            this document, NVO3 tunnels refer to a way to encapsulate tenant
            frames or packets into IP packets whose IP Source Addresses (SA)
            or Destination Addresses (DA) belong to the underlay IP address
            space, and identify NVEs connected to the same underlay network.
            Examples of NVO3 tunnel encapsulations are VXLAN <xref
            target="RFC7348"/>, GENEVE <xref target="RFC8926"/> or MPLSoUDP
            <xref target="RFC7510"/>.</t>

            <t>SBD: Supplementary Broadcast Domain <xref
            target="RFC9136"/>.</t>

            <t>SRv6: Segment routing with an IPv6 data plane, <xref
            target="RFC8986"/>.</t>

            <t>TS: Tenant System. A physical or virtual system that can play
            the role of a host or a forwarding element such as a router,
            switch, firewall, etc. It belongs to a single tenant and connects
            to one or more Broadcast Domains of that tenant.</t>

            <t>VNI: Virtual Network Identifier. Irrespective of the NVO3
            encapsulation, the tunnel header always includes a VNI that is
            added at the ingress NVE (based on the mapping table lookup) and
            identifies the BT at the egress NVE. This VNI is called VNI in
            VXLAN or GENEVE, VSID in nvGRE or Label in MPLSoGRE or MPLSoUDP.
            This document will refer to VNI as a generic Virtual Network
            Identifier for any NVO3 encapsulation.</t>

            <t>VTEP: VXLAN Termination End Point. A loopback IP address of the
            destination NVE that is used in the outer destination IP address
            of VXLAN packets directed to that NVE.</t>

            <t>VXLAN: Virtual eXtensible Local Area Network, an NVO3
            encapsulation defined in <xref target="RFC7348"/>.</t>
          </list></t>
      </section>

      <section anchor="sect-1.2" title="Problem Statement">
        <t><xref target="Figure1"/> depicts the typical Clos topology in
        multi-tenant Data Centers, only simplified to show three Leaf routers
        and two Spines, forming a 3-stage Clos topology. The NVEs or Leaf
        routers run EVPN for NVO3 tunnels, as in <xref target="RFC8365"/>.
        This document assumes VXLAN is used as the NVO3 tunnel encapsulation,
        given its widespread adoption in multi-tenant data center
        environments. The diagram below serves as a reference throughout the
        document. It is important to note that in large-scale data center
        deployments, the number of Tenant Systems, leaf routers, and spine
        layers may be significantly higher than what is depicted in <xref
        target="Figure1"/>.</t>

        <t><figure anchor="Figure1"
            title="Simplified Clos topology in Data Centers">
            <artwork><![CDATA[          +-------+   +-------+
          |Spine-1|   |Spine-2|
          |       |   |       |
          +-------+   +-------+
           |  |  |     |  |  |
       +---+  |  |     |  |  +---+
       |      |  |     |  |      |
       |  +------------+  |      |
       |  |   |  |        |      |
       |  |   |  +------------+  |
       |  |   |           |   |  |
       |  |   +---+  +----+   |  |
   L1  |  |    L2 |  |     L3 |  |
    +-------+   +-------+   +-------+
    | +---+ |   | +---+ |   | +---+ |
    | |BD1| |   | |BD1| |   | |BD1| |
    | +---+ |   | +---+ |   | +---+ |
    +-------+   +-------+   +-------+
       | |         | |          |
       | +---+ +---+ |          |
       |     | |     |          |
       |    +---+    |        +---+
       |    |TS1|    |        |TS3|
       |    +---+    |        +---+
       |    ES-1     |
       +-----+ +-----+
             | |
            +---+
            |TS2|
            +---+
            ES-2

]]></artwork>
          </figure></t>

        <t>In the example of <xref target="Figure1"/> the Tenant Systems TS1
        and TS2 are multi-homed to Leaf routers L1 and L2, and Ethernet
        Segments Identifiers ESI-1 and ESI-2 are the representation of TS1 and
        TS2 Ethernet Segments in the EVPN control plane for the Split Horizon
        filtering, Designated Forwarder and Aliasing functions <xref
        target="RFC8365"/>.</t>

        <t>Taking Tenant Systems TS1 and TS3 as an example, the EVPN
        all-active multi-homing procedures guarantee that, when TS3 sends
        unicast traffic to TS1, Leaf L3 does per-flow load balancing towards
        Leaf routers L1 and L2. As explained in <xref target="RFC7432"/> and
        <xref target="RFC8365"/> this is possible due to L1 and/or L2 Leaf
        routers advertising TS1's MAC address in an EVPN MAC/IP Advertisement
        route that includes ESI-1 in the Ethernet Segment Identifier field.
        When the route is imported into Leaf L3, TS1&rsquo;s MAC address is
        programmed with a destination associated with the ESI-1 next-hop list.
        This ESI-1 next-hop list is created based on the reception of the EVPN
        A-D per ES and A-D per EVI routes for ESI-1 that are received from
        Leaf routers L1 and L2.</t>

        <t>Assuming the Ethernet Segment ES-1 links are operationally active,
        Leaf routers L1 and L2 advertise the EVPN A-D per ES/EVI routes for
        ESI-1. Leaf L3 then adds L1 and L2 to its next-hop list for ESI-1. As
        a result, unicast flows from TS3 to TS1 are load-balanced across L1
        and L2. This ESI-1 next-hop list in Leaf L3 is referred to as the
        &ldquo;overlay ECMP set&rdquo; for ESI-1. In addition, once Leaf L3
        selects one of the next hops in the overlay ECMP set (e.g., L1), it
        performs a route lookup for L1&rsquo;s address in the base router
        route table. This lookup yields a list of two next hops &mdash;
        Spine-1 and Spine-2 &mdash; which is referred to as the
        &ldquo;underlay ECMP set.&rdquo; Therefore, for any given unicast flow
        to TS1, Leaf L3 performs per-flow load balancing at two levels: it
        first selects a next hop from the overlay ECMP set (e.g., L1), and
        then selects a next hop from the underlay ECMP set (e.g.,
        Spine-1).</t>

        <t>While aliasing <xref target="RFC7432"/> offers an efficient way to
        load balance unicast traffic across Leaf routers attached to the same
        all-active Ethernet Segment, it introduces challenges in very large
        data centers where the number of Ethernet Segments and Leaf routers is
        substantial:</t>

        <t><list style="letters">
            <t>Control Plane Scale: In a large data center environment, the
            number of multi-homed compute nodes can grow substantially into
            the thousands. Each compute node requires a unique Ethernet
            Segment (ES) and hosts dozens of EVIs per ES. Under the aliasing
            model defined in <xref target="RFC7432"/>, there is a requirement
            to advertise EVPN A-D per EVI routes for every active EVI on each
            Ethernet Segment. As a result, the volume of EVPN state that Route
            Reflectors, Data Center Gateways, and Leaf routers must process
            becomes significant, and it only increases as additional Ethernet
            Segments, Broadcast Domains, and Leaf routers are deployed.
            Eliminating the need to advertise these EVPN A-D per EVI routes
            would therefore provide a substantial benefit in reducing overall
            route scale and processing overhead.</t>

            <t>Convergence and Processing overhead: In accordance with <xref
            target="RFC8365"/> each node in an Ethernet Segment operates as an
            independent VTEP and therefore acts as a separate EVPN next hop.
            In a typical data center leaf-spine topology, this results in ECMP
            being applied both in the underlay ECMP set and in the overlay
            ECMP set. As a consequence, convergence at scale during a failure
            can be slow and CPU intensive. All leaf routers must process the
            overlay state changes triggered by the withdrawal of EVPN route(s)
            at the point of failure and update their overlay ECMP sets
            accordingly. By performing load balancing solely within the
            underlay ECMP set, it is possible to significantly reduce this
            network-wide state churn and processing overhead. This approach
            also enables faster convergence at scale by limiting
            re-convergence to only the intermediate spine nodes.</t>

            <t>Inefficient underlay forwarding during a failure: Another
            consequence of using ECMP with the overlay ECMP set is the
            potential for in-flight packets sent by remote leaf routers to be
            rerouted inefficiently. For example, suppose the link between L1
            and Spine-1 (shown in <xref target="Figure1"/>) fails. In-flight
            VXLAN packets already sent from L3 with the destination VTEP set
            to L1 arrive at Spine-1 and are rerouted along a suboptimal path
            &mdash; for instance, through L2 -&gt; Spine-2 -&gt; L1 -&gt; TS1
            &mdash; even though they could have been forwarded directly via L2
            -&gt; TS1, since TS1 is also connected to Leaf L2. Once the
            underlay routing protocol converges, all VXLAN packets destined
            for VTEP L1 are correctly forwarded to Spine-2, and Leaf L3
            removes Spine-1 from the underlay ECMP set for Leaf L1.</t>
          </list></t>

        <t>There are existing proprietary multi-chassis Link Aggregation Group
        implementations, collectively and commonly known as MC-LAG, that
        attempt to address the challenges described above by using the concept
        of "Anycast VTEPs". This involves assigning a shared loopback IP
        address that the leaf routers connected to the same multi-homed tenant
        system use to terminate VXLAN packets. For example, in <xref
        target="Figure1"/>, if Leaf routers L1 and L2 were to use an Anycast
        VTEP address (e.g., anycast-IP1), they could identify VXLAN packets
        destined for multi-homed tenant systems using that shared address:</t>

        <t><list style="symbols">
            <t>Leaf L3 would not need to create an overlay ECMP set for
            packets destined to TS1 or TS2, since the use of anycast-IP1 in
            the underlay ECMP set guarantees per-flow load balancing across
            the two leaf routers.</t>

            <t>In the same failure scenario described earlier &mdash; where
            the link between L1 and Spine-1 fails &mdash; Spine-1 would
            reroute VXLAN packets directly to Leaf L2. This is possible
            because L2 also advertises the anycast-IP1 address that Leaf L3
            uses to forward packets to TS1 or TS2.</t>

            <t>Additionally, if Leaf routers L1 and L2 used proprietary MC-LAG
            techniques, no EVPN A-D per EVI routes would be required. As a
            result, the number of EVPN routes would be significantly reduced
            in a large-scale data center.</t>
          </list>However, the use of proprietary MC-LAG technologies in EVPN
        NVO3 networks is being abandoned due to the superior capabilities
        offered by EVPN Multi-Homing. These include features such as mass
        withdraw <xref target="RFC7432"/>, advanced Designated Forwarding
        election <xref target="RFC8584"/> or weighted load balancing <xref
        target="I-D.ietf-bess-evpn-unequal-lb"/>, among others.</t>
      </section>

      <section anchor="sect-1.3" title="Solution Overview">
        <t>This document specifies an EVPN Anycast Multi-Homing extension that
        can be used as an alternative to EVPN aliasing (<xref
        target="RFC7432"/>). The EVPN Anycast Multi-Homing procedures
        described here may optionally replace per-flow overlay ECMP load
        balancing with simplified per-flow underlay ECMP load balancing. This
        approach works similarly to proprietary MC-LAG solutions but provides
        a standardized method that retains the superior advantages of EVPN
        Multi-Homing &mdash; such as Designated Forwarder Election, Split
        Horizon filtering, and the mass withdraw function (all described in
        <xref target="RFC8365"/> and <xref target="RFC7432"/>).</t>

        <t>The solution uses A-D per ES routes to advertise the Anycast VTEP
        address to be used when sending traffic to the Ethernet Segment, and
        it suppresses the use of A-D per EVI routes for Ethernet Segments
        configured in this mode. This design addresses the challenges outlined
        in <xref target="sect-1.2"/>.</t>

        <t>The solution is applicable to all NVO3 tunnels and even to IP
        tunnels in general. While VXLAN is often used as an example in this
        document due to its widespread adoption in multi-tenant data centers,
        the examples and procedures are equally valid for any NVO3 or IP
        tunnel type.</t>
      </section>
    </section>

    <section anchor="sect-2" title="BGP EVPN Extensions">
      <t>This specification makes use of two BGP extensions that are used
      along with the A-D per ES routes <xref target="RFC7432"/>.</t>

      <t>The first extension is the flag "A" or "Anycast Multi-homing mode"
      and it is requested to IANA to be allocated in bit 2 of the EVPN ESI
      Multihoming Attributes registry for the 1-octect Flags field in the ESI
      Label Extended Community, as follows:</t>

      <figure anchor="Figure2" title="ESI Label Extended Community and Flags">
        <artwork><![CDATA[   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   | Type=0x06     | Sub-Type=0x01 | Flags(1 octet)|  Reserved=0   |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |  Reserved=0   |          ESI Label                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

Flags field:

        0 1 2 3 4 5 6 7
       +-+-+-+-+-+-+-+-+
       |SHT|A|     |RED|   
       +-+-+-+-+-+-+-+-+

]]></artwork>
      </figure>

      <t>Where the following Flags are defined:</t>

      <texttable style="headers" suppress-title="true" title="Flags Field">
        <ttcol>Name</ttcol>

        <ttcol>Meaning</ttcol>

        <ttcol>Reference</ttcol>

        <c>RED</c>

        <c>Multihomed redundancy mode</c>

        <c><xref target="I-D.ietf-bess-rfc7432bis"/></c>

        <c>SHT</c>

        <c>Split Horizon type</c>

        <c><xref target="RFC9746"/></c>

        <c>A</c>

        <c>Anycast Multi-homing mode</c>

        <c>This document</c>
      </texttable>

      <t>When the NVE advertises an A-D per ES route with the A flag set, it
      indicates the Ethernet Segment is working in Anycast Multi-homing mode.
      The A flag is set only if the RED = 00 (All-Active redundancy mode), and
      MUST NOT be set if RED is different from 00.</t>

      <t>The second extension that this document introduces is the encoding of
      the "Anycast VTEP" address in the BGP Tunnel Encapsulation Attribute,
      Tunnel Egress Endpoint Sub-TLV (code point 6) <xref target="RFC9012"/>,
      that is advertised along with the A-D per ES routes for an Ethernet
      Segment working in Anycast Multi-homing mode. The "Anycast VTEP" in the
      BGP Tunnel Encapsulation Attribute can also be advertised along with
      EVPN IP Prefix routes for multi-homed IP subnets. Refer to <xref
      target="RFC9012"/> for the error handling procedures related to the BGP
      Tunnel Encapsulation Attribute. For NVO3 tunnel types (e.g., VXLAN,
      GENEVE), the &lsquo;Anycast VTEP&rsquo; MUST be encoded in the BGP
      Tunnel Encapsulation Attribute and advertised with the A-D per ES
      routes. However, when using SRv6 encapsulation, the BGP Tunnel
      Encapsulation Attribute is not applicable. Refer to <xref
      target="sect-6"/> for details about SRv6.</t>
    </section>

    <section anchor="sect-3" title="Anycast Multi-Homing Solution">
      <t>This document proposes an optional "EVPN Anycast Multi-homing"
      procedure that provides a solution to optimize network behavior if the
      challenges described in <xref target="sect-1.2"/> become significant.
      The description uses the terms "Ingress NVE" and "Egress NVE". In this
      document, Egress NVE refers to an NVE that is attached to a group of
      Ethernet Segments operating in Anycast Multi-homing mode. Ingress NVE
      refers to the NVE transmitting unicast traffic to a MAC address
      associated with a remote Ethernet Segment that also operates in Anycast
      Multi-Homing mode. In addition, the concepts of Unicast VTEP and Anycast
      VTEP are introduced:<list style="symbols">
          <t>A Unicast VTEP is a loopback IP address unique within the data
          center fabric and owned by a single NVE that terminates VXLAN (or
          other NVO3) traffic.</t>

          <t>An Anycast VTEP is a loopback IP address shared among NVEs
          attached to the same group of Ethernet Segments in Anycast
          Multi-Homing mode and is used to terminate VXLAN (or NVO3) traffic
          on those NVEs.</t>
        </list>An Anycast VTEP in this document MUST NOT be used as the BGP
      next hop of any EVPN route NLRI. This restriction is necessary because
      the Multi-Homing procedures require the originator of EVPN routes to be
      uniquely identified by their NLRI next hops</t>

      <t>The solution consists of the following modifications of the <xref
      target="RFC7432"/> EVPN Aliasing function:</t>

      <t><list style="numbers">
          <t>The <xref target="RFC8365"/> Designated Forwarder and Split
          Horizon filtering procedures remain unmodified. However, the
          Aliasing procedure is modified in this Anycast Multi-homing
          mode.</t>

          <t>The forwarding of BUM traffic and related procedures are not
          modified by this document. Only the procedures related to the
          forwarding of unicast traffic to a remote Ethernet Segment are
          updated.</t>

          <t>Any two or more Egress NVEs attached to the same Ethernet Segment
          working in Anycast Multi-homing mode MUST use the same VNI or label
          to identify the Broadcast Domain associated with that Ethernet
          Segment. For non-MPLS NVO3 tunnels, using the same VNI is implicit
          if global VNIs are used (<xref target="RFC8365"/> section 5.1.1). If
          locally significant values are used for the VNIs, at least all the
          Egress NVEs sharing Ethernet Segments MUST use the same VNI for the
          Broadcast Domain. For MPLS NVO3 tunnels, the Egress NVEs sharing
          Anycast Multi-homing Ethernet Segments MUST use Domain-wide Common
          Block labels <xref target="RFC9573"/> so that all can be configured
          with the same unicast label for the same Broadcast Domain. Note that
          this requirement only affects unicast labels (i.e., the labels
          advertised in EVPN MAC/IP Advertisement routes) and does not affect
          the Ingress Replication labels for BUM traffic, which are advertised
          via EVPN Inclusive Multicast Ethernet Tag routes.</t>

          <t>The default behavior for an Egress NVE attached to an Ethernet
          Segment follows <xref target="RFC8365"/>. The Anycast Multi-homing
          mode MUST be explicitly configured for a given all-active Ethernet
          Segment. When the Egress NVE Ethernet Segment is configured to
          follow the Anycast Multi-homing behavior for at least one Ethernet
          Segment, the egress NVE:<list style="letters">
              <t>Is configured with an Anycast VTEP address. A single Anycast
              VTEP address is allocated for all the Anycast Aliasing Ethernet
              Segments shared among the same group of Egress NVEs. This is the
              only additional address whose reachability needs to be
              advertised in the underlay routing protocol. If "m" Egress NVEs
              are attached to the same "n" Ethernet Segments, all the "m"
              Egress NVEs advertise the same Anycast VTEP address in the A-D
              per ES routes for those "n" Ethernet Segments.</t>

              <t>Is assumed to advertise reachability for the Anycast VTEP in
              the underlay routing protocol, either by announcing an exact
              match route for the Anycast VTEP address (using a /32 mask for
              IPv4 or a /128 mask for IPv6) or by advertising a shorter prefix
              that includes the Anycast VTEP IP address.</t>

              <t>Advertises EVPN A-D per ES routes for each Ethernet Segment
              with:<list style="symbols">
                  <t>an "Anycast Multi-homing mode" flag that indicates to the
                  remote NVEs that the EVPN MAC/IP Advertisement routes with
                  matching Ethernet Segment Identifier are resolved by only
                  A-D per ES routes for the Ethernet Segment. In other words,
                  this flag signals to the ingress NVE that no A-D per EVI
                  routes are advertised for the Ethernet Segment.</t>

                  <t>an Anycast VTEP that identifies the Ethernet Segment and
                  is encoded in a BGP tunnel encapsulation attribute <xref
                  target="RFC9012"/> attached to the route.</t>
                </list></t>

              <t>Does not modify the procedures for the EVPN MAC/IP
              Advertisement routes.</t>

              <t>Suppresses the advertisement of the A-D per EVI routes for
              the Ethernet Segment configured in Anycast Multi-homing
              mode.</t>

              <t>In case of a failure on the Ethernet Segment link, the Egress
              NVE withdraws the A-D per ES route(s), as well as the ES route
              for the Ethernet Segment. The Egress NVE cannot withdraw the
              Anycast VTEP address from the underlay routing protocol as long
              as there is at least one Ethernet Segment left that makes use of
              the Anycast VTEP. The Anycast VTEP address is withdrawn from the
              Egress NVE only if the entire Egress NVE fails or all Ethernet
              Segments associated with the Anycast VTEP go down.</t>

              <t>Unicast traffic for a failed local Ethernet Segment may still
              be attracted by the Egress NVE, given that the Anycast VTEP
              address is still advertised in the underlay routing protocol. In
              this case, the Egress NVE SHOULD support the procedures in <xref
              target="sect-4"/> so that unicast traffic can be rerouted to
              another Egress NVE attached to the Ethernet Segment.</t>
            </list></t>

          <t>The Ingress NVE that supports this document:<list style="letters">
              <t>Follows the regular <xref target="RFC8365"/> Aliasing
              procedures for the Ethernet Segments of the received in A-D per
              ES routes without the Anycast Multi-homing mode Flag set.</t>

              <t>Identifies the imported EVPN A-D per ES routes with the
              Anycast Multi-homing flag set and process them for Anycast
              Multi-homing.</t>

              <t>Upon receiving and importing (on a Broadcast Domain) an EVPN
              MAC/IP Advertisement route for MAC-1 with a non-zero Ethernet
              Segment Identifier ESI-1, the NVE searches for an A-D per ES
              route with the same ESI-1 imported into the same Broadcast
              Domain. If at least one A-D per ES route for ESI-1 is present,
              the NVE checks whether the Anycast Multi-Homing flag is set.
              <list style="symbols">
                  <t>If the flag is not set, the ingress NVE follows the
                  procedures defined in <xref target="RFC8365"/>.</t>

                  <t>If the Anycast Multi-Homing flag is set, the ingress NVE
                  programs MAC-1 to be associated with destination ESI-1.</t>
                </list>The ESI-1 destination is resolved to the Ethernet
              Segment Anycast VTEP, which is derived from the A-D per ES
              routes, along with the VNI (e.g., VNI-1) received in the MAC/IP
              Advertisement route.</t>

              <t>When the ingress NVE receives a frame with destination MAC
              address MAC-1 on any of the Attachment Circuits of the Broadcast
              Domain, the MAC lookup resolves to ESI-1 as the destination. The
              frame is then encapsulated into a VXLAN (or other NVO3) packet
              with the destination VTEP set to the Anycast VTEP and the VNI
              set to VNI-1. Because all Egress NVEs attached to the Ethernet
              Segment have previously advertised reachability to the Anycast
              VTEP, the ingress NVE creates an underlay ECMP set for the
              Anycast VTEP (assuming multiple equal-cost underlay paths). As a
              result, per-flow load balancing is achieved.</t>

              <t>The Ingress NVE MUST NOT use an Anycast VTEP as the outer
              source IP address of the VXLAN (or NVO3) tunnel, unless the
              ingress NVE also functions as an egress NVE that re-encapsulates
              the traffic into a tunnel for the purpose of Fast Reroute (see
              <xref target="sect-4"/>).</t>

              <t>The reception of one or more MP_UNREACH_NLRI messages for the
              A-D per ES routes for Ethernet Segment Identifier ESI-1 does not
              change the programming of the MAC addresses associated to ESI-1
              as long as there is at least one valid A-D per ES route for
              ESI-1 in the Bridge Domain. The reception of the MP_UNREACH_NLRI
              message for the last A-D per ES route for ESI-1 triggers the
              mass withdraw procedures for all MAC entries pointing at ESI-1.
              As an OPTIONAL optimization, if an ingress node receives an
              MP_UNREACH_NLRI message for the A-D per ES route from one of the
              NVEs on the Ethernet Segment - and only one NVE remains active
              on that Ethernet Segment - the ingress node may update the
              Ethernet Segment destination resolution from the Anycast VTEP to
              the Unicast VTEP, derived from the next hop of the MAC/IP
              Advertisement route.</t>
            </list></t>

          <t>The procedures on the Ingress NVE for Anycast Multi-homing assume
          that all the Egress NVEs attached to the same Ethernet Segment
          advertise the same Anycast Multi-homing flag value and Anycast VTEP
          in their A-D per ES routes for the Ethernet Segment. If there is any
          inconsistency in either of these values, the ingress NVE falls back
          to the <xref target="RFC8365"/> behavior, meaning the MAC address is
          programmed with the Unicast VTEP derived from the next hop of the
          MAC/IP Advertisement route.</t>
        </list>Non-upgraded NVEs ignore the Anycast Multi-homing flag value
      and the BGP tunnel encapsulation attribute.</t>

      <section anchor="sect-3.1" title="Anycast Multi-Homing Example">
        <t>Consider the example in <xref target="Figure1"/> where three Leaf
        routers run EVPN over VXLAN tunnels. Suppose Leaf routers L1, L2 and
        L3 support Anycast Multi-homing as described in <xref
        target="sect-3"/>, and Ethernet Segments ES-1 and ES-2 are configured
        as Anycast Ethernet Segments in all-active mode, using the Anycast
        VTEP IP12.</t>

        <t>Leaf routers L1 and L2 each advertise an A-D per ES route for ESI-1
        and an A-D per ES route for ESI-2 (in addition to the ES routes). Both
        routes include the Anycast Aliasing flag set and the same Anycast VTEP
        IP12. Upon receiving MAC/IP Advertisement routes for the two Ethernet
        Segments, Leaf L3 programs the MAC addresses associated with their
        respective destination Ethernet Segment. Therefore, when sending
        unicast packets to Tenant Systems TS1 or TS2, L3 uses the Anycast VTEP
        address as the outer IP destination. All A-D per EVI routes for ES-1
        and ES-2 are suppressed.</t>

        <t>Suppose only Leaf L1 learns TS1 MAC address, hence only L1
        advertises a MAC/IP Advertisement route for TS1 MAC with ESI-1. In
        that case:<list style="symbols">
            <t>Leaf L3 has the Anycast VTEP IP12 programmed in its routing
            table, associated with an underlay ECMP set composed of Spine-1
            and Spine-2. The TS1 MAC address is programmed with the
            destination ESI-1, resolved to Anycast VTEP IP12.</t>

            <t>When Tenant System TS3 sends unicast traffic to TS1, Leaf L3
            encapsulates the frames into VXLAN packets with the destination
            VTEP set to Anycast VTEP IP12. Leaf L3 can perform per-flow load
            balancing using only the underlay ECMP set, without needing to
            create an overlay ECMP set.</t>

            <t>Spine-1 and Spine-2 also create underlay ECMP-sets for Anycast
            VTEP IP12 with next hops L1 and L2. Therefore, in case of:<list
                style="symbols">
                <t>A failure of the link between L1 and Spine-1, Spine-1
                immediately removes L1 from the ECMP set for IP12, and packets
                are rerouted faster than when regular aliasing is used.</t>

                <t>A failure of the link between TS1 and L1, Leaf L1 sends an
                MP_UNREACH_NLRI for the A-D per ES route for ESI-1. Upon
                receiving this message, Leaf L3 does not change the resolution
                of the ESI-1 destination, because the A-D per ES route for
                ESI-1 from L2 remains active. Packets sent to TS1 that arrive
                at Leaf L1 are &ldquo;fast-rerouted&rdquo; to Leaf L2 as
                described in <xref target="sect-4"/>.</t>
              </list></t>

            <t>As per <xref target="sect-3"/>, point 5f, Leaf L3 can
            optionally be configured to change the resolution of the ESI-1
            destination to the unicast VTEP (derived from the MAC/IP
            Advertisement route) upon receiving an MP_UNREACH_NLRI for the A-D
            per ES route from L1. Even so, in-flight packets destined for TS1
            and arriving at Leaf L1 are still &ldquo;fast-rerouted&rdquo; to
            Leaf L2.</t>
          </list></t>
      </section>
    </section>

    <section anchor="sect-4"
             title="EVPN Fast Reroute Extensions For Anycast Multi-Homing">
      <t>The procedures in <xref target="sect-3"/> may result in situations
      where known unicast traffic destined for an Anycast VTEP of an Ethernet
      Segment arrives at an Egress NVE whose Ethernet Segment link is in a
      failed state. In that case, the Egress NVE SHOULD re-encapsulate the
      traffic into a NVO3 tunnel following the procedures described in <xref
      target="I-D.ietf-bess-evpn-fast-reroute"/>, section 7.1, with the
      following modifications:<list style="numbers">
          <t>The Egress NVEs in this document do not advertise A-D per EVI
          routes, therefore there is no signaling of specific redirect labels
          or VNIs. The Egress NVE uses the global VNI or Domain-wide Common
          Block label of the Ethernet Segment NVEs when re-encapsulating the
          traffic into an NVO3 tunnel (<xref target="sect-3"/>, point 3).</t>

          <t>Additionally, when rerouting traffic, the Egress NVE uses the
          Anycast VTEP of the Ethernet Segment as the outer source IP address
          of the NVO3 tunnel. Note this is the only scenario in this document
          where using the Anycast VTEP as the source IP address is permitted.
          Receiving NVO3-encapsulated packets with a local Anycast VTEP
          indicates that those packets have been "fast-rerouted". Therefore,
          they MUST NOT be forwarded into another tunnel.</t>
        </list></t>
    </section>

    <section anchor="sect-5"
             title="Applicability of Anycast Multi-Homing to Inter-Subnet Forwarding">
      <t>Anycast Multi-Homing can also be applied to inter-subnet forwarding
      scenarios. The diagram in <xref target="Figure3"/> illustrates such a
      scenario where Anycast Multi-Homing is used. This diagram serves as a
      reference throughout this section.</t>

      <t><figure anchor="Figure3"
          title="Anycast Multi-Homing for Inter-Subnet Forwarding">
          <artwork><![CDATA[       +-------+   +-------+      
       |Spine-1|   |Spine-2|      
       |       |   |       |      
       +-------+   +-------+      
        |  |  |     |  |  |       
    +---+  |  |     |  |  +---+   
    |      |  |     |  |      |   
    |  +------------+  |      |   
    |  |   |  |        |      |   
    |  |   |  +------------+  |   
    |  |   |           |   |  |   
    |  |   +---+  +----+   |  |   
 L1 |  |     L2|  |      L3|  |   
 +-------+   +-------+   +-------+
 |+-----+|   |+-----+|   |+-----+|
 ||IPVRF||   ||IPVRF||   ||IPVRF||
 |+--+--+|   |+--+--+|   |+--+--+|
 |   |IRB|   |   |IRB|   |   |IRB|
 | +-+-+ |   | +-+-+ |   | +-+-+ |
 | |BD1| |   | |BD1| |   | |BD3| |
 | ++--+ |   | +--++ |   | +-+-+ |
 +--|----+   +----|--+   +---+---+
    |     SN1     |          | SN3   
    |             |          |    
    |     ES-1    |          |    
    +-----+ +-----+          |    
          | |                |    
         +---+             +-|-+  
         |TS1|             |TS3|  
         +---+             +---+
         IP11              IP31

]]></artwork>
        </figure></t>

      <section title="Anycast Multi-Homing and Multi-Homed IP Prefixes">
        <t>Multi-homed IP subnets (subnets attached to two or more Leaf
        routers) can also benefit from Anycast Multi-Homing. These multi-homed
        IP subnets are advertised via EVPN IP Prefix routes, as described in
        <xref target="RFC9136"/>, section 4.4.</t>

        <t>Not all the challenges described in <xref target="sect-1.2"/> apply
        to the Anycast Multi-homing scenario for IP subnets. For example,
        multi-homed IP subnets advertised via EVPN IP Prefix routes do not
        require the use of Ethernet Segments, so the challenge described in
        <xref target="sect-1.2"/> (a) does not arise here. However, using
        Anycast VTEPs for EVPN IP Prefix routes advertised from the Leaf
        routers attached to the same IP subnet can still improve the
        convergence, reduce processing overhead, and address inefficient
        underlay forwarding, as explained in <xref target="sect-1.2"/> (b) and
        (c).</t>

        <t>The solution consists of the following modifications of the
        IP-VRF-to-IP-VRF model (<xref target="RFC9136"/> section 4.4):<list
            style="numbers">
            <t>Similar to <xref target="sect-3"/> bullet 3, any two or more
            Egress NVEs attached to the same IP subnet working in Anycast
            Multi-homing mode MUST use the same VNI or label to identify the
            IP-VRF (or SBD) associated with that IP subnet. The same
            considerations described in that bullet also apply to EVPN IP
            Prefix routes for multi-homed IP subnets.</t>

            <t>The use of Anycast VTEPs by Egress NVEs multi-homed to the same
            IP subnet is as follows:<list style="letters">
                <t>A single Anycast VTEP is shared by the group of NVEs
                associated with the same IP subnet(s) and these NVEs advertise
                reachability for the Anycast VTEP in the underlay routing
                protocol, as explained in <xref target="sect-3"/>, bullet
                4b.</t>

                <t>In an Interface-less IP-VRF-to-IP-VRF model (<xref
                target="RFC9136"/> section 4.4.1), the Anycast VTEP is encoded
                in a BGP tunnel encapsulation attribute <xref
                target="RFC9012"/> and advertised together with the EVPN IP
                Prefix route for the IP subnet. The presence of the Anycast
                VTEP in the IP Prefix route indicates to the ingress NVE that
                the prefix is using Anycast Multi-Homing. Additionally, the
                same MAC address SHOULD be encoded in the EVPN Router&rsquo;s
                MAC extended community of the IP Prefix routes advertised for
                the same multi-homed IP subnet by all the Egress NVEs using
                Anycast Multi-Homing.</t>

                <t>In an Interface-ful IP-VRF-to-IP-VRF with SBD IRB model
                (<xref target="RFC9136"/> section 4.4.2), the Anycast VTEP is
                advertised along with the IP Prefix route and also with the
                EVPN MAC/IP Advertisement route for the SBD IRB interface. In
                this model, when Anycast Multi-Homing is used, all Egress NVEs
                attached to the same IP subnet MUST use Anycast SBD IRB IP and
                MAC addresses. Specifically, the SBD IRB is configured with an
                Anycast MAC and IP address that are shared by all Egress NVEs
                operating in Anycast Multi-Homing mode. The IP Prefix routes
                for multi-homed IP subnets are advertised using these Anycast
                Gateway IP addresses in the Gateway IP field.</t>

                <t>In an Interface-ful IP-VRF-to-IP-VRF with Unnumbered SBD
                IRB model (<xref target="RFC9136"/> section 4.4.3), Anycast
                Multi-Homing operates similarly to the interface-ful SBD IRB
                model (bullet c). The difference is that the SBD IRBs do not
                have an IP address, and the Anycast SBD MAC address is used as
                the overlay index for IP Prefix resolution.</t>
              </list></t>

            <t>The Ingress NVEs supporting this document operate as
            follows:<list style="letters">
                <t>Upon receiving and importing an IP Prefix route with an
                Anycast VTEP, the Ingress NVE checks whether the same prefix
                has been received from another egress NVE and whether that IP
                Prefix route contains a matching Anycast VTEP. If both
                conditions are met, the prefix is programmed to use Anycast
                forwarding. If multiple IP Prefix routes for the same prefix
                exist but their Anycast VTEPs do not match, the IP Prefix
                routes are processed as described in <xref target="RFC9136"/>
                and MUST NOT be programmed to use Anycast forwarding. In the
                interface-less model, this means the prefix is programmed
                using the next hop from the IP Prefix route. In the
                interface-ful models, the prefix is resolved to the EVPN
                MAC/IP Advertisement routes associated with the non-Anycast
                SBD IRB.</t>

                <t>When using Anycast forwarding, regardless of the
                IP-VRF-to-IP-VRF implemented model, the Ingress NVE
                encapsulates the packets destined for a multi-homed IP subnet
                into a VXLAN (or other NVO3) packet with the destination VTEP
                set to the Anycast VTEP and the VNI set to the VNI of the
                IP-VRF (Interface-less model) or the SBD (Interface-ful
                models). Because all Egress NVEs attached to the multi-homed
                IP subnet have previously advertised reachability to the
                Anycast VTEP, the ingress NVE creates an underlay ECMP set for
                the Anycast VTEP (assuming multiple equal-cost underlay
                paths).</t>

                <t>The Ingress NVE MUST NOT use an Anycast VTEP as the outer
                source IP address of the tunnel, unless the ingress NVE also
                functions as an egress NVE that re-encapsulates the traffic
                into a tunnel for the purpose of Fast Reroute (<xref
                target="sect-4"/>). </t>
              </list></t>
          </list></t>

        <t>In the example shown in <xref target="Figure3"/>, IP subnet SN1 is
        multi-homed to Leaf routers L1 and L2. Assuming the interface-less
        model (<xref target="RFC9136"/>, Section 4.4.1) and following the
        procedure described above, L1 and L2 advertise SN1 in IP Prefix routes
        that use the same Anycast VTEP IP12. The ingress NVE, L3, programs SN1
        with VTEP IP12 as the destination. Packets destined for SN1 are then
        load-balanced based on the underlay ECMP set associated with Anycast
        VTEP IP12. </t>

        <t>Although <xref target="Figure3"/> assumes that SN1 is associated
        with IRB interfaces, Anycast Multi-Homing can also be used when SN1 is
        associated with non-IRB Layer 3 interfaces. It is important to note
        that if SN1 is associated with IRB interfaces, a link failure between
        TS1 and L1 does not trigger the advertisement of an MP_UNREACH_NLRI
        message for SN1. As a result, L1 continues to attract traffic destined
        for SN1, which must then be fast-rerouted to L2.</t>
      </section>

      <section title="Anycast Multi-Homing and EVPN IP Aliasing">
        <t>IP Aliasing is described in <xref
        target="I-D.ietf-bess-evpn-ip-aliasing"/> and leverages Ethernet
        Segments to provide fast convergence multi-homing for host routes
        (<xref target="I-D.ietf-bess-evpn-ip-aliasing"/> sections 1.1 and 1.2)
        or IP Prefix routes (<xref target="I-D.ietf-bess-evpn-ip-aliasing"/>
        section 1.3) programmed in an IP-VRF. Anycast Multi-homing can also be
        applied to these Ethernet Segments to address all the challenges
        described in <xref target="sect-1.2"/>, but specifically in the
        context of IP-VRFs rather than MAC-VRFs. The procedures described in
        <xref target="sect-3"/> and <xref target="sect-4"/> of this document
        apply to IP Aliasing, with the following considerations:<list
            style="numbers">
            <t>The Egress NVEs attached to the Anycast Multi-homing Ethernet
            Segments:<list style="letters">
                <t>Advertise both sets of Ethernet A-D per ES and IP A-D per
                ES routes with the Anycast Multi-homing mode flag and the
                Anycast VTEP. </t>

                <t>Suppress the advertisement of both Ethernet A-D per EVI and
                IP A-D per EVI routes for Ethernet Segment configured in
                Anycast Multi-homing mode.</t>

                <t>Include the EVPN Router's MAC Extended Community along with
                the IP A-D per ES routes if the encapsulation used between the
                PEs for inter-subnet forwarding is an Ethernet NVO tunnel
                <xref target="RFC9136"/>. When advertised with the IP A-D per
                ES routes, the EVPN Router's MAC extended community SHOULD
                contain the same MAC address value in all the IP A-D per ES
                routes advertised by all the Egress NVEs attached to the same
                Anycast Multi-homing Ethernet Segment. </t>

                <t>Apply the same procedures to IP A-D per ES routes as those
                described for Ethernet A-D per ES routes in <xref
                target="sect-3"/> for Egress NVEs. </t>
              </list></t>

            <t>The Ingress NVEs:<list style="letters">
                <t>Upon receiving and importing an EVPN MAC/IP Advertisement
                route (<xref target="RFC9135"/>) or an IP Prefix route (<xref
                target="RFC9136"/>) with a non-zero Ethernet Segment
                Identifier (ESI), the NVE searches for an IP A-D per ES route
                with the same ESI imported into the same IP-VRF. If at least
                one IP A-D per ES route for the ESI is present, the NVE checks
                whether the Anycast Multi-Homing flag is set.<list
                    style="symbols">
                    <t>If the flag is not set, the ingress NVE follows the
                    procedures described in <xref
                    target="I-D.ietf-bess-evpn-ip-aliasing"/>.</t>

                    <t>If the flag is set, the ingress NVE programs the host
                    route entry (from the MAC/IP Advertisement route with two
                    VNIs or from the EVPN IP Prefix route) or the IP Prefix
                    entry (from the EVPN IP Prefix route) to be associated
                    with the ES destination that uses an Anycast VTEP</t>
                  </list></t>

                <t>Other than the above, apply the same procedures to IP A-D
                per ES routes as those described for Ethernet A-D per ES
                routes in <xref target="sect-3"/> for Ingress NVEs.</t>
              </list></t>
          </list></t>
      </section>
    </section>

    <section anchor="sect-6"
             title="Applicability of Anycast Multi-Homing to SRv6 tunnels">
      <t>To be added.</t>
    </section>

    <section anchor="sect-7" title="Operational Considerations">
      <t>&ldquo;Underlay convergence&rdquo; &mdash; that is, convergence
      handled by the underlay routing protocol in the event of a failure
      &mdash; is generally considered faster than &ldquo;overlay
      convergence,&rdquo; where EVPN processes network convergence when
      failures occur. </t>

      <t>The use of Anycast Multi-Homing is especially valuable in scenarios
      where the operator aims to optimize convergence. In this model, a node
      failure affecting an Ethernet Segment Egress NVE simply results in the
      underlay routing protocol rerouting traffic to another Egress NVE that
      advertises the same Anycast VTEP. This underlay rerouting to a different
      owner of the Anycast VTEP is extremely fast and efficient, particularly
      in data center designs that use BGP in the underlay and follow the
      Autonomous System allocation recommended in <xref target="RFC7938"/> for
      loop protection. </t>

      <t>To illustrate this, consider a link failure between L1 and Spine-1,
      as shown in <xref target="Figure1"/>. If Spine-1 and Spine-2 are
      assigned the same Autonomous System Number for their underlay BGP
      peering sessions and no &ldquo;Allowas-in&rdquo; is configured (per
      <xref target="RFC7938"/>), packets destined for the Anycast VTEP IP12
      received by Spine-1 are immediately rerouted to L2 when the L1-Spine-1
      link fails. In contrast, if unicast VTEPs are used (as in regular
      all-active Ethernet Segments), in-flight packets destined for the
      unicast VTEP on L1 that arrive at Spine-1 would be dropped if the
      L1-Spine-1 link is unavailable. This example highlights how Anycast
      Multi-Homing achieves significantly faster convergence.</t>

      <t>Another benefit of Anycast Multi-homing is the reduction of EVPN
      control plane pressure (due to the suppression of the A-D per EVI
      routes).</t>

      <t>However, operators should consider the following operational factors
      before deploying this solution:</t>

      <t><list style="numbers">
          <t>Troubleshooting Anycast Multi-Homing Ethernet Segments differs
          from troubleshooting regular all-active Ethernet Segments. In
          traditional setups, operators rely on the withdrawal of an A-D per
          EVI route as an indication that the Ethernet Segment has failed in
          the specific Broadcast Domain associated with that route. With
          Anycast Multi-Homing, however, the suppression of A-D per EVI routes
          means that logical failures affecting only a subset of Broadcast
          Domains on the Ethernet Segment &mdash; while others remain
          operational &mdash; are more difficult to detect.</t>

          <t>Anycast Multi-homing Ethernet Segments MUST NOT be used in in the
          following cases:<list style="letters">
              <t>If the Ethernet Segment multi-homing redundancy mode is not
              All-Active mode.</t>

              <t>If the Ethernet Segment is used on EVPN VPWS Attachment
              Circuits <xref target="RFC8214"/>.</t>

              <t>If the Attachment Circuit Influenced Designated Forwarded
              capability is needed in the Ethernet Segment <xref
              target="RFC8584"/>.</t>

              <t>If advanced multi-homing features that make use of the
              signaling in EVPN A-D per EVI routes are needed. An example
              would be per EVI mass withdraw <xref target="RFC8365"/>.</t>

              <t>If unequal load balancing is needed <xref
              target="I-D.ietf-bess-evpn-unequal-lb"/>.</t>

              <t>If the tunnels used by EVPN in the Broadcast Domains
              associated with the Ethernet Segment are not IP tunnels (i.e.,
              they are not NVO3 tunnels).</t>

              <t>If the NVEs attached to the Ethernet Segment do not use the
              same VNI or label to identify the same Broadcast Domain.</t>
            </list></t>

          <t>Using the procedure in <xref target="sect-3"/> may result in
          packets being permanently fast-rerouted in the event of a link
          failure. To illustrate this, consider three Egress NVEs &mdash; L1,
          L2, and L3 &mdash; attached to ES-1. In this scenario, a failure of
          ES-1 on L1 does not prevent the network from continuing to send
          packets to L1 with the Anycast VTEP as the destination. When L1
          receives these packets, it re-encapsulates them and forwards them
          to, for instance, L2. This rerouting persists for as long as ES-1
          remains in a failed state on L1. In such cases, operators may
          consider deploying direct inter-node links between the Egress NVEs
          to optimize fast reroute forwarding. In the example above, rerouted
          packets are handled more efficiently if L1, L2, and L3 are directly
          connected.</t>
        </list></t>
    </section>

    <section anchor="sect-8" title="Security Considerations">
      <t>To be added.</t>
    </section>

    <section anchor="sect-9" title="IANA Considerations">
      <t>IANA is requested to allocate the flag "A" or "Anycast Multi-homing
      mode" in bit 2 of the EVPN ESI Multihoming Attributes registry for the
      1-octect Flags field in the ESI Label Extended Community.</t>
    </section>

    <section title="Contributors">
      <t>In addition to the authors listed on the front page, the following
      co-authors have also contributed to previous versions of this
      document:</t>

      <t>Nick Morris, Verizon</t>

      <t>nicklous.morris@verizonwireless.com</t>
    </section>

    <section title="Acknowledgments">
      <t/>
    </section>

    <section anchor="sect-12"
             title="Annex - Potential Multi Ethernet Segment Anycast Multi-Homing optimizations">
      <t>This section is here for documentation purposes only, and it will be
      removed from the document before publication. While these procedures
      were initially included in the document, they introduce additional
      complexity and are therefore excluded, as they undermine the primary
      goal of using anycast VTEPs, which is to simplify EVPN operations.
      However, the section is included as an annex for completeness.</t>

      <t>As described in <xref target="sect-7"/>, the use of Anycast
      Multi-Homing may mean that packets are permanently fast rerouted in case
      of a link failure. Some potential additional extensions on the Ingress
      NVE may mitigate the permanent "fast rerouting", as follows:</t>

      <t><list style="numbers">
          <t>On the Ingress NVEs, an "anycast-aliasing-threshold" and a
          "collect-timer" can be configured. The "anycast-aliasing-threshold"
          represents the number of active Egress NVEs per Ethernet Segment
          under which the ingress PE no longer uses the Anycast VTEP address
          to resolve the Ethernet Segment destination (and uses the Unicast
          VTEP instead, derived from the MAC/IP Advertisement route next hop).
          The "collect-timer" is triggered upon the creation of the Ethernet
          Segment destination, and it is needed to settle on the number of
          Egress NVEs for the Ethernet Segment against which the
          "anycast-aliasing-threshold" is compared.</t>

          <t>Upon expiration of the "collect-timer", the Ingress NVE computes
          the number of Egress NVEs for the Ethernet Segment based on the next
          hop count of the received A-D per ES routes. If the number of Egress
          NVEs for the Ethernet Segment is greater than or equal to the
          "anycast-aliasing-threshold" integer, the Ethernet Destination is
          resolved to the Anycast VTEP address. If lower than the threshold,
          the Ethernet Destination is resolved to the unicast VTEP
          address.</t>
        </list>In most of the use cases in multi-tenant Data Centers, there
      are two Leaf routers per rack that share all the Ethernet Segments of
      Tenant Systems in the rack. In this case, the
      "anycast-aliasing-threshold" is set to 2 and in case of link failure on
      the Ethernet Segment, this limits the amount of "fast-rerouted" traffic
      to only the in-flight packets.</t>

      <t>As an example, consider <xref target="Figure1"/>. Suppose Leaf router
      L3 supports these additional extensions. Leaf routers L1 and L2 both
      advertise an A-D per ES route for ESI-1, and an A-D per ES route for
      ESI-2. Both routes will carry the Anycast Multi-homing flag set and the
      same Anycast VTEP IP12. Following the described procedure, Leaf L3 is
      configured with anycast-aliasing-threshold = 2 and collect-timer = t.
      Upon receiving MAC/IP Advertisement routes for the two Ethernet Segments
      and the expiration of "t" seconds, Leaf L3 determines that the number of
      NVEs for ESI-1 and ESI-2 is equal to the threshold. Therefore, when
      sending unicast packets to Tenant Systems TS1 or TS2, L3 uses the
      Anycast VTEP address as outer IP address. Suppose now that the link
      TS1-L1 fails. Leaf L1 then sends an MP_UNREACH_NLRI for the A-D per ES
      route for ESI-1. Upon reception of the message, Leaf L3 changes the
      resolution of the ESI-1 destination from the Anycast VTEP to the Unicast
      VTEP derived from the MAC/IP Advertisement route next hop. Packets sent
      to Tenant System TS2 (on ES-2) still use the Anycast VTEP. In-flight
      packets sent to TS1 but still arriving at Leaf L1 are "fast-rerouted" to
      Leaf L2 as per <xref target="sect-4"/>.</t>

      <t>Another potential optimization is to use different Anycast VTEPs per
      ES. The proposal in <xref target="sect-3"/> uses a shared VTEP for all
      the Ethernet Segments in a common Egress NVE group. In case the number
      of Egress NVEs sharing the group of Ethernet Segments is limited to two,
      an alternative proposal is to use a different Anycast VTEP per Ethernet
      Segment, however allocate all those Anycast VTEP addresses from the same
      subnet. A single IP Prefix for such subnet is announced in the underlay
      routing protocol by the Egress NVEs. The benefit of this proposal is
      that, in case of link failure in one individual Ethernet Segment, e.g.,
      link TS1-L1 in <xref target="Figure1"/>, Leaf L2 detects the failure
      (based on the withdraw of the A-D per ES and ES routes) and can
      immediately announce the specific Anycast VTEP address (/32 or /128)
      into the underlay. Based on a Longest Prefix Match when routing NVO3
      packets, Spines can immediately reroute packets (with destination the
      Anycast VTEP for ESI-1) to Leaf L2. This may reduce the amount of
      fast-rerouted VXLAN packets and spares the Ingress NVE from having to
      change the resolution of the Ethernet Segment destination from the
      Anycast VTEP to the Unicast VTEP.</t>
    </section>
  </middle>

  <back>
    <references title="Normative References">
      &RFC2119;

      &RFC8174;

      &RFC7432;

      &RFC8365;

      &I-D.ietf-bess-rfc7432bis;

      &RFC9573;

      &RFC8584;

      &RFC9012;

      &RFC9135;

      &RFC9136;
    </references>

    <references title="Informative References">
      &RFC7348;

      &RFC8926;

      &RFC4364;

      &RFC7510;

      &RFC8986;

      &RFC8214;

      &RFC7938;

      &RFC9469;

      &I-D.ietf-bess-evpn-ip-aliasing;

      &I-D.ietf-bess-evpn-unequal-lb;

      &I-D.ietf-bess-evpn-fast-reroute;

      &RFC9746;

      <reference anchor="CLOS1953">
        <front>
          <title>A Study of Non-Blocking Switching Networks</title>

          <author fullname="C. Clos" initials="C." surname="Clos">
            <organization>The Bell System Technical Journal, Vol. 32(2), DOI
            10.1002/j.1538- 7305.1953.tb01433.x</organization>
          </author>

          <date month="March" year="1953"/>
        </front>
      </reference>
    </references>
  </back>
</rfc>
