<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
     which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
     There has to be one entity for each item to be referenced.
     An alternate method (rfc include) is described in the references. -->
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC3688 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3688.xml">
<!ENTITY RFC6020 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6020.xml">
<!ENTITY RFC7950 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7950.xml">
<!ENTITY RFC7149 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8632.xml">
<!ENTITY RFC8345 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8345.xml">
<!ENTITY RFC9375 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.9375.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs),
     please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
     (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
     (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="std" docName="draft-feng-opsawg-incident-management-01"
     ipr="trust200902">
  <front>
    <title abbrev="Incident Management">Incident Management for Network
    Services</title>

    <author fullname="Chong Feng" initials="C." role="editor" surname="Feng">
      <organization>Huawei</organization>

      <address>
        <postal>
          <street>101 Software Avenue, Yuhua District</street>

          <city>Nanjing</city>

          <region>Jiangsu</region>

          <code>210012</code>

          <country>China</country>
        </postal>

        <email>frank.fengchong@huawei.com</email>
      </address>
    </author>

    <author fullname="Tong Hu" initials="T." surname="Hu">
      <organization abbrev="CMCC">China Mobile (Hangzhou) Information
      Technology Co., Ltd</organization>

      <address>
        <postal>
          <street>Building A01, 1600 Yuhangtang Road, Wuchang Street, Yuhang
          District</street>

          <city>Hangzhou</city>

          <region>ZheJiang</region>

          <code>311121</code>

          <country>China</country>
        </postal>

        <email>hutong@cmhi.chinamobile.com</email>
      </address>
    </author>

    <author fullname="Luis Miguel Contreras Murillo" initials="LM."
            surname="Contreras">
      <organization>Telefonica I+D</organization>

      <address>
        <postal>
          <street/>

          <city>Madrid</city>

          <country>Spain</country>
        </postal>

        <email>luismiguel.contrerasmurillo@telefonica.com</email>
      </address>
    </author>

    <author fullname="Thomas Graf" initials="T." surname="Graf">
      <organization>Swisscom</organization>

      <address>
        <postal>
          <street>Binzring 17</street>

          <city>Zurich</city>

          <code>8045</code>

          <country>Switzerland</country>
        </postal>

        <email>thomas.graf@swisscom.com</email>
      </address>
    </author>

    <author fullname="Qin Wu" initials="Q." surname="Wu">
      <organization>Huawei</organization>

      <address>
        <postal>
          <street>101 Software Avenue, Yuhua District</street>

          <city>Nanjing</city>

          <region>Jiangsu</region>

          <code>210012</code>

          <country>China</country>
        </postal>

        <email>bill.wu@huawei.com</email>
      </address>
    </author>

    <author fullname="Chaode Yu" initials="C." surname="Yu">
      <organization>Huawei</organization>

      <address>
        <email>yuchaode@huawei.com</email>
      </address>
    </author>

    <author fullname="Nigel Davis" initials="N." surname="Davis">
      <organization>Ciena</organization>

      <address>
        <email>ndavis@ciena.com</email>
      </address>
    </author>

    <date year="2023"/>

    <area>ops</area>

    <workgroup>OPSAWG</workgroup>

    <keyword>Incident Lifecycle Management</keyword>

    <abstract>
      <t>A network incident refers to an unexpected interruption of a network
      service, degradation of a network service quality, or sub-health of a
      network service. Different data sources including alarms, metrics and
      other anomaly information can be aggregated into few amount of incidents
      by correlation analysis and the service impact analysis. </t>

      <t>This document also defines YANG modules to support the incident
      lifecycle management. The YANG modules are meant to provide a standard
      way to report, diagnose, and resolve incidents for the sake of enhanced
      network services.</t>
    </abstract>
  </front>

  <middle>
    <section anchor="Introduction" title="Introduction">
      <t><xref target="RFC8969"/> defines a framework for Automating Service and Network
      Management with YANG to full life cycle network management. A set of
      YANG data models have already been developed in IETF for Network
      performance monitoring and fault monitoring,e.g.,A YANG <xref target="RFC7950"/> data
      model for alarm management <xref target="RFC8632"/> defines a standard interface for
      alarm management. A data model for Network and VPN Service Performance
      Monitoring<xref target="RFC9375"/> defines a standard interface for network performance
      management. In addition, distributed tracing mechanism defined in
      <xref target="W3C-Trace-Context"/> can also be used to analyze and debug operations,
      such as configuration transactions, across multiple distributed
      systems.</t>

      <t>However these YANG data models for network maintenance are based on
      specific data source information and manage alarms and performance
      metrics data separately in various different management systems. In
      addition, the frequency and quantity of alarms and performance metrics
      data reported to Operating Support System (OSS) are increased
      dramatically (in many cases multiple orders of magnitude) with the
      growth of service types and complexity and grealy overwhelm OSS
      platforms; with known depdendency relation between fault, alarm and
      events, the traditional solutions, e.g., data compression are
      time-consuming and labor-intensive, usually rely on maintenance
      engineers' experience for data analysis, which result in low processing
      efficiency, inaccurate root cause identification and duplicated tickets.
      And, it is also difficult to assess the impact of alarms, performance
      metrics and other anomaly data on network services.</t>

      <t>To address these challenges, an incident-centric solution is proposed
      for network level root cause analysis, service impact analysis and
      network troubleshooting, which can span across multiple layer and
      multiple domains. A network incident refers to an unexpected
      interruption of a network service, degradation of a network service
      quality, or sub-health of a network service. Different data sources
      including alarms, metrics and other anomaly information can be
      aggregated into few amount of incidents by correlation analysis and the
      service impact analysis. For example, the protocols related to
      the interface fail to work properly due to the interface down, large
      amount of alarms may be reported to upper layer management system and
      aggregated into one or a few incidents when some network services may be
      affected by this incident (e.g. L3VPN services related with the
      interface will become unavailable). An incident may also be raised
      through the analysis of some network performance metrics, for example,
      as described in SAIN <xref target="I-D.ietf-opsawg-service-assurance-architecture" /> ,
      network services can be decomposed to some sub-services, some metrics
      are monitored for each sub-service, symptoms will occur if
      services/sub-services are unhealthy(after analyzing metrics), these
      symptoms may raise one incident when it causes degradation of the network
      services.</t>

      <t>In addition, Artificial Intelligence (AI) and Machine Learning (ML)
      play a important role in the processing of large amounts of data with
      complex correlations. For example, Neural Network Algorithm or Hierarchy
      Aggregation Algorithm can be used to replace manual alarm correlation.
      Through online and offline learning, these algorithms can be
      continuously optimized to improve the efficiency of fault diagnosis.</t>

      <t>This document defines the concepts, requirements, and architecture of
      incident management. The document also defines a YANG data model for
      incident lifecycle management, which improves troubleshooting
      efficiency, ensures network service quality, and improves network
      automation <xref target="RFC8969"/>.</t>
    </section>

    <section anchor="terminology" title="Terminology">
      <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
      "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
      "OPTIONAL" in this document are to be interpreted as described in BCP 14
      <xref target="RFC2119"/> <xref target="RFC8174"/> when, and only when,
      they appear in all capitals, as shown here.</t>

      <t>The following terms are defined in <xref target="RFC8632"/> are not
      redefined here: <list style="symbols">
          <t>alarm</t>
        </list></t>

      <t>The following terms are defined in this document: <list
          style="hanging">
          <t hangText="Incident: ">An unexpected interruption of a network
          service, degradation of network service quality, or sub-health of a
          network service.<vspace blankLines="1"/></t>

          <t hangText="Incident management: ">Lifecycle management of
          incidents including incident identification, reporting, acknowledge,
          diagnosis, and resolution.</t>

          <t hangText="Incident management system: ">An entity which
          implements incident management. It include incident management server
          and incident management client.</t>

          <t hangText="Incident management server: ">An entity which provides
          some functions of incident management. For example, it can detect an
          incident, perform incident diagnosis, resolution and
          prediction,etc.</t>

          <t hangText="Incident management client: ">An entity which can
          manage incidents. For example, it can receive incident
          notifications, query the information of incidents, instruct the
          incident management server to diagnose, resolve, etc.</t>
        </list></t>
    </section>

    <section title="Sample Use Cases">
      <section title="Incident-Based Trouble Tickets dispatching">
        <t>Traditionally, the dispatching of trouble tickets is mostly based
        on alarms data analysis and need to involve operators' maintenance
        engineers. These operators' maintenance engineers are able to monitor
        and detect that alarms are associated with the same network fault.
        Therefore, they can correlate these alarms to the same trouble ticket,
        which is in the low automation. If there are more alarms, then the
        human costs for network maintenance are increased accordingly.</t>

        <t>Some operators preconfigure whitelist and adopt some coarse
        granularity association rules for the alarm management. It seems to
        improve fault management automation. However, some trouble tickets
        could be missed if the filtering conditions are too tight. If the
        filtering conditions are too loose, multiple trouble tickets would be
        dispatched to the same fault.</t>

        <t>It is hard to achieve a perfect balance between the automation and
        duplicated trouble tickets under the traditional working situations.
        However, with the help of the incident management, massive alarms can
        be aggregated into a few incidents, multiple trouble tickets will be
        saved. At the same time, incident management can keep high accuracy
        and automation. This could be an answer to this pain point of
        traditional trouble ticket dispatching.</t>
      </section>

      <section title="Fault Locating">
        <t>Currently, to accomplish fault isolation and fault localization,
        maintenance experts need to correlate topology data, service data
        together with huge amount of alarm data at different layers (e.g.,
        optical layer, packet layer) to do the analysis. Sometimes some
        cooperations from the construction engineers who work on site, are
        required to attempt to make change configuration on devices and then
        further investigate the corresponding root cause. Sometimes
        cooperations between different operation teams are required to locate
        fault either at the optical layer or packet layer. </t>

        <t>For example, for a common cable interruption, maintenance experts
        need to analyze the root cause alarm from large amount of alarms, and
        then trace the root cause alarm in the network segment by segment.
        Next, site engineers perform tests at the source station to locate the
        interruption and locate the faulty optical exchange station. Then they
        move to the located optical exchange station to replace or splice
        fibers. During the whole process, multiple people are needed inside
        and outside the site.</t>

        <t>With the help of incident management, the system can automatically
        locate the faulty segment, and eliminate the need for manual analysis.
        By cooperating with the integrated Optical time-domain reflectometer
        (OTDR) within the equipment, we can determine the target optical
        exchange station before site visits. Multiple site visits and time are
        saved.</t>
      </section>

      <section title="Fault Labelling">
        <t>Fiber cutover is a common maintenance scenario for Operators.
        During the cutover process, maintenance experts must identify affected
        devices based on the cutover object and their experience. They will
        give these devices a mark to inform other maintenance engineers that
        it is not necessary to dispatch trouble tickets before the ending of
        cutover.</t>

        <t>However, depending on the human experience, it is very likely to
        make some mistakes. For example, some devices are missing to mark and
        some devices are marked incorrectly. If the devices are missing to be
        marked, some trouble tickets will be dispatched during cutover, which
        are not needed actually. If the devices are wrongly marked, some fault
        not related to this cutover will be missing.</t>

        <t>With incident management, maintenance experts only need to mark the
        cutover objects and do not need to mark the devices that would be
        affected. Because of the alarm aggregation capabilities and knowing
        the relationship between root cause alarm and correlative alarm, the
        fault management system can automatically identify correlative alarms,
        without dispatching any trouble tickets to the affected devices.</t>
      </section>

      <section title="Energy Conservation">
        <t>With the global trend of energy conservation, emission reduction
        and safety management, more and more enterprises have joined the
        energy conservation and emission reduction ranks and adopted measures
        to turn off the power during non-working hours, making due
        contributions to the green earth. However, this proactive power-off
        measure periodically generates a large number of alarms on the
        network, and the traditional Operation and Management system can not
        effectively identify such non-real faults caused by the enterprise
        users. Operators need to manually identify and rectify faults based on
        the expert experience, wasting a large number of human resources.</t>

        <t>Incident management can intelligently identify faults caused by
        periodic power-off on the tenant side and directly identify faults. As
        a result, operators do not need to dispatch trouble tickets for such
        faults anymore, this can help to reduce human resource costs.</t>
      </section>
    </section>

    <section title="Incident Management Architecture">
      <figure anchor="figure_4" title="Incident Management Architecture">
        <artwork>

            +----------------------+-------------------+
            |                                          |
            |            Incident Management Client    |
            |                                          |
            |                                          |
            +------------+---------+---------+---------+
               ^         |         |         |
               |Incident |Incident |Incident |Incident
               |Report   |Ack      |Diagnose |Resolve
               |         |         |         |
               |         V         V         V
            +--+-------------------+---------+----------+
            |                                           |
            |                                           |
            |            Incident Management Server     |
            |                                           |
            |                                           |
            |                                           |
            |                                           |
            +----------------------+-----+--+-----------+
                  ^       ^Abnormal         ^
                  |Alarm  |Operations       |Metrics
                  |Report |Report           |/Telemetry
                  |       |                 V
     +--------+-+-+-------+--------------++------------------+
     |                                                       |
     |                     Network                           |
     |                                                       |
     +------------------------------------+------------------+

            </artwork>
      </figure>

      <t><xref target="figure_4"/> illustrates the incident management
      architecture. Two key components for the incident management are
      incident management client and incident management server.</t>

      <t>Incident management server can be deployed in network analytics
      platform, controllers and provides functionalities such as incident
      identification, report, diagnosis, resolution, querying for incident
      lifecycle management.</t>

      <t>Incident management client can be deployed in the network OSS or
      other business systems of operators and invokes the functionalities
      provided by incident management server to meet the business requirements
      of fault management.</t>

      <t>A typical workflow of incident management is as follows: <list
          style="symbols">
          <t>Some alarms or abnormal operations, network performance metrics
          are reported from the network. Incident management server receives
          these alarms/abnormal operations/metrics and try to analyze the
          correlation of them, if the incidents are identified, it will be
          reported to the client. The impact of network services will be also
          analyzed and will update the incident.</t>

          <t>Incident management client receives the incident raised by server,
          and acknowledge it. Client may invoke the 'incident diagnose' rpc to
          diagnose this incident to find the root causes.</t>

          <t>If the root causes have been found, the client can resolve this
          incident by invoking the 'incident resolve' rpc operation,
          dispatching a ticket or using other functions (e.g. routing
          calculation,configuration)</t>
        </list></t>

      <section title="Interworking with Alarm Management">
        <figure anchor="figure_5" title="Interworking with alarm management">
          <artwork>
   +-----------------------------+
   |         OSS                 |
   |+-------+      +-----------+ |
   ||alarm  |      | incident  | |
   ||handler|      |  client   | |
   |+-------+      +-----------+ |
   +---^---------------^---------+
       |               |
       |alarm          |incident
   +---|---------------|---------+
   |   |  controller   |         |
   |   |               |         |
   |+--+---++      +-----------+ |
   ||alarm  |      |           | |
   ||process+-----&gt;|  incident | |
   ||       |alarm |   server  | |
   |+------++      +-----------+ |
   |   ^              ^          |
   +---+--------------|----------+
       |alarm         | metrics/trace/etc.
       |              |
   +---+--------------+----------+
   |         network             |
   |                             |
   +-----------------------------+
                </artwork>
        </figure>

        <t>YANG model for the alarm management<xref target="RFC8632"/> defines
        a standard interface to manage the lifecycle of alarms. Alarms
        represent the undesirable state of network resources, alarm data model
        also defines the root causes and impacted services fields, but there
        may lack sufficient information to determine them in lower layer
        system (mainly in devices level), so alarms do not always tell the
        status of services or the root causes. As described in <xref
        target="RFC8632"/>, alarm management act as a starting point for
        high-level fault management. While incident management often works at
        the network level, so it's possible to have enough information to
        perform correlation and service impact analysis. Alarms can work as
        one of data sources of incident management and may be aggregated into
        few amount of incidents by correlation analysis, network service
        impact and root causes may be determined during incident process.</t>

        <t>Incident also contains some related alarms,if needed users can
        query the information of alarms by alarm management interface <xref
        target="RFC8632"/>. In some cases, e.g. cutover scenario, incident
        server may use alarm management interface <xref target="RFC8632"/> to
        shelve some alarms.</t>

        <t>Alarm management may keep the original process, alarms are reported
        from network to network controller or analytics and then reported to
        upper layer system(e.g. OSS). Upper layer system may store these
        alarms and provide the information for fault analysis (e.g. deeper
        analysis based on incident).</t>

        <t>Compared with alarm management, incident management provides not
        only incident reporting but also diagnosis and resolution functions,
        it's possible to support self-healing and may be helpful for
        single-domain closed-loop control.</t>

        <t>Incident management is not a substitute for alarm management.
        Instead, they can work together to implement fault management.</t>
      </section>

      <section title="Interworking with SAIN">
        <figure anchor="figure_6" title="Interworking with SAIN">
          <artwork>
          +----------------+
          | incident client|
          +----------------+
                  ^
                  |incident
          +-------+--------+
          |incident server |
          +----------------+
                  ^
                  |symptoms
          +-------+--------+
          |     SAIN       |
          |                |
          +----------------+
                  ^
                  |metrics
    +-------------+-------------+
    |                           |
    |         network           |
    |                           |
    +---------------------------+

                </artwork>
        </figure>

        <t>SAIN <xref
        target="I-D.ietf-opsawg-service-assurance-architecture"/> defines the
        architecture of network service assurance. A network service can be
        decomposed into some sub-services, and some metrics can be monitored
        for sub-services. For example, a tunnel service can be decomposed into
        some peer tunnel interface sub-services and IP connectivity
        sub-service. If some metrics are evaluated to indicate unhealthy for
        specific sub-service, some symptoms will be present. Incident server
        may identify the incident based on symptoms, and then report it to
        upper layer system. So, SAIN can be one way to identify incident,
        services, sub-services and metrics can be preconfigured via APIs
        defined by service assurance YANG model <xref
        target="I-D.ietf-opsawg-service-assurance-yang"/> and incident will be
        reported if symptoms match the condition of incident.</t>
      </section>

      <section title="Relationship with RFC8969">
        <t><xref target="RFC8969"/> defines a framework for network
        automation using YANG, this framework breaks down YANG modules into
        three layers, service layer, network layer and device layer, and
        contains service deployment, service optimization/assurance, and
        service diagnosis. Incident works at the network layer and aggregates
        alarms, metrics and other information from device layer, it's helpful
        to provfide service assurance. And the incident diagnosis may be one
        way of service diagnosis.</t>
      </section>

      <section title="Relationship with Trace Context">
        <t>W3C defines a common trace context<xref
        target="W3C-Trace-Context"/> for distributed system tracing, <xref
        target="I-D.rogaglia-netconf-trace-ctx-extension"/> defines a netconf
        extension for <xref target="W3C-Trace-Context"/> and <xref
        target="I-D.quilbeuf-opsawg-configuration-tracing"/> defines a
        mechanism for configuration tracing. If some errors occur when
        services are deploying, it's very easy to identify these errors by
        distributed system tracing, and an incident should be reported.</t>
      </section>
    </section>

    <section title="Functional Interface Requirements between the Client and the Server">
      <section title="Incident Identification">
        <figure anchor="figure_1" title="Incident Identification">
          <artwork>
        +--------------+
     +--|  Incident1   |
     |  +--+-----------+
     |     |  +-----------+
     |     +--+  alarm1   |
     |     |  +-----------+
     |     |
     |     |  +-----------+
     |     +--+  alarm2   |
     |     |  +-----------+
     |     |
     |     |  +-----------+
     |     +--+  alarm3   |
     |        +-----------+
     |  +--------------+
     +--|  Incident2   |
     |  +--+-----------+
     |     |  +-----------+
     |     +--+  metric1  |
     |     |  +-----------+
     |     |  +-----------+
     |     +--+  metric2  |
     |        +-----------+
     |
     |  +--------------+
     +--|  Incident3   |
        +--+-----------+
           |  +-----------+
           +--+ alarm1    |
           |  +-----------+
           |
           |  +-----------+
           +--| metric1   |
              +-----------+
                </artwork>
        </figure>

        <t>As described in <xref target="figure_1"/>, multiple alarms,
        metrics, or hybrid can be aggregated into an incident after
        analysis.</t>

        <t>The incident management server MUST be capable of identifying
        incidents. Multiple alarms, metrics and other information are reported
        to incident server, and the server must analyze it and find out the
        correlations of them, if the correlation match the incident rules,
        incident will be identified and reported to the client. Service impact
        analysis will be performed if an indent is identified, and the content
        of incident will be updated if impacted network services are
        detected.</t>

        <t>AI/ML may be used to identify the incident. Expert system and
        online learning can help AI to identify the correlation of alarms,
        metrics and other information by time-base correlation algorithm,
        topo-based correlation algorithm, etc. For example, if interface is
        down, then many protocol alarms will be reported, AI will think these
        alarms have some correlations. These correlations will be put into
        knowledge base, and the incident will be identified faster according
        to knowledge base next time.</t>

        <t>As mentioned above, SAIN is another way to implement incident
        identification. Observation timestamp defined in <xref
        target="I-D.tgraf-yang-push-observation-time"/> and trace context
        defined in <xref target="W3C-Trace-Context"/> may be helpful for
        incident identification.</t>

        <t><figure anchor="figure_2"
            title="Example 1 of Incident Identification">
            <artwork>
                     +----------------------+
                     |                      |
                     |     Orchestrator     |
                     |                      |
                     +----+-----------------+
                          ^VPN A Unavailable
                          |
                      +---+----------------+
                      |                    |
                      |     Controller     |
                      |                    |
                      |                    |
                      +-+-+-+-----+--+-----+
                      ^ ^            ^
                  IGP | |Interface   |IGP Peer
                 Down | |Down        | Abnormal
                      | |            |
     VPN A            | |            |
    +-----------------+-+------------+------------------*
    | \  +---+       ++-++         +-+-+        +---+  /|
    |  \ |   |       |   |         |   |        |   | / |
    |   \|PE1+-------| P1+X--------|P2 +--------|PE2|/  |
    |    +---+       +---+         +---+        +---+   |
    +---------------------------------------------------+

                    </artwork>
          </figure></t>

        <t>As described in <xref target="figure_2"/>, vpn a is deployed from
        PE1 to PE2, if a interface of P1 is going down, many alarms are
        triggered, such as interface down, igp down, and igp peer abnormal
        from P2. These alarms are aggregated and analyzed by controller, and
        the incident 'vpn unavailable' is triggered by the controller.</t>

        <t><figure anchor="figure_3"
            title="Example 2 of Incident Identification">
            <artwork>
                    +----------------------+
                    |                      |
                    |     Orchestrator     |
                    |                      |
                    +----+-----------------+
                         ^VPN A Degradation
                         |
                     +---+----------------+
                     |                    |
                     |     controller     |
                     |                    |
                     |                    |
                     +-+-+-+-----+--+-----+
                       ^            ^
                       |Packet      |Path Delay
                       |Loss        |
                       |            |
    VPN A              |            |
   +-------------------+------------+-------------------+
   | \  +---+       ++-++         +-+-+        +---+  / |
   |  \ |   |       |   |         |   |        |   | /  |
   |   \|PE1+-------|P1 +---------|P2 +--------|PE2|/   |
   |    +---+       +---+         +---+        +---+    |
   +----------------------------------------------------+

                </artwork>
          </figure></t>

        <t>As described in <xref target="figure_3"/>, controller collect the
        network metrics from network elements, it finds the packet loss of P1
        and the path delay of P2 exceed the thresholds, an incident 'VPN A
        degradation' may be triggered after analysis.</t>
      </section>

      <section title="Incident Diagnosis">
        <t>After an incident is reported to the incident management client,
        the client MAY diagnose the incident to determine the root cause. Some
        diagnosis operations may affect the running network services. The
        client can choose not to perform that diagnosis operation after
        determining the impact is not trivial. The incident management server
        can also perform self-diagnosis. However, the self-diagnosis MUST not
        affect the running network services. Possible diagnosis methods
        include link reachability detection, link quality detection, alarm/log
        analysis, and short-term fine-grained monitoring of network quality
        metrics, etc.</t>
      </section>

      <section title="Incident Resolution">
        <t>After the root cause is diagnosed, the client MAY resolve the
        incident. The client MAY choose resolve the incident by invoking other
        functions, such as routing calculation function, configuration
        function, dispatching a ticket or asking the server to resolve it.
        Generally, the client would attempt to directly resolve the root
        cause. If the root cause cannot be resolved, an alternative solution
        SHOULD be required. For example, if an incident caused by a physical
        component failure, it cannot be automatically resolved, the standby
        link can be used to bypass the faulty component.</t>

        <t>Incident server will monitor the status of incident, if the faults
        are fixed, the server will update the status of incident to 'cleared',
        and report the updated incident to the client.</t>

        <t>Incident resolution may affect the running network services. The
        client can choose not to perform those operations after determining
        the impact is not trivial.</t>
      </section>
    </section>

    <section title="Incident Data Model Concepts">
      <section title="Identifying the Incident Instance">
        <t>An incident ID is used as an identifier of an incident instance, if
        an incident instance is identified, a new incident ID is created. The
        incident ID MUST be unique in the whole system.</t>
      </section>

      <section title="The Incident Lifecycle">
        <section title="Incident Instance Lifecycle">
          <t>From an incident instance perspective, an incident can have the
          following lifecycle: 'raised', 'updated', 'cleared'. When an
          incident is generated, the status is 'raised'. If the status changes
          after the incident is generated, (for example, self-diagnosis,
          diagnosis command issued by the client, or any other condition
          causes the status to change but does not reach the 'cleared' level.)
          , the status changes to 'updated'. When an incident is successfully
          resolved, the status changes to 'cleared'.</t>
        </section>

        <section title="Operator Incident Lifecycle">
          <t>From an operator perspective, the lifecycle of an incident
          instance includes 'acknowledged', 'diagnosed', and 'resolved'. When
          an incident instance is generated, the operator SHOULD acknowledge
          the incident. And then the operator attempts to diagnose the
          incident (for example, find out the root cause and affected
          components). Diagnosis is not mandatory. If the root cause and
          affected components are known when the incident is generated,
          diagnosis is not required. After locating the root cause and
          affected components, operator can try to resolve the incident.</t>
        </section>
      </section>
    </section>

    <section title="Incident Data Model">
      <section title="Overview">
        <figure>
          <artwork>
module: ietf-incident
  +--ro incidents
     +--ro incident* [incident-id]
        +--ro incident-id string
        +--ro csn? uint64
        +--ro service-instance* string
        +--ro name? string
        +--ro type? enumeration
        +--ro domain? identityref
        +--ro priority? int:incident-priority
        +--ro status? enumeration
        +--ro ack-status? enumeration
        +--ro category? identityref
        +--ro detail? string
        +--ro resolve-advice? string
        +--ro sources
        ...
        +--ro root-causes
        ...
        +--ro root-events
        ...
        +--ro events
        ...
        +--ro raise-time? yang:date-and-time
        +--ro occur-time? yang:date-and-time
        +--ro clear-time? yang:date-and-time
        +--ro ack-time? yang:date-and-time
        +--ro last-updated? yang:date-and-time
  rpcs:
    +---x incident-acknowledge
    ...
    +---x incident-diagnose
    ...
    +---x incident-resolve

  notifications:
    +---n incident-notification
       +--ro incident-id?
               -&gt; /inc:incidents/inc:incident/inc:incident-id
       ...
       +--ro time? yang:date-and-time
          </artwork>
        </figure>
      </section>

      <section title="Incident Notifications">
        <figure>
          <artwork>
  notifications:
    +---n incident-notification
       +--ro incident-id?
               -&gt; /inc:incidents/inc:incident/inc:incident-id
       +--ro csn? uint64
       +--ro service-instance* string
       +--ro name? string
       +--ro type? enumeration
       +--ro domain? identityref
       +--ro priority? int:incident-priority
       +--ro status? enumeration
       +--ro ack-status? enumeration
       +--ro category? identityref
       +--ro detail? string
       +--ro resolve-advice? string
       +--ro sources
       |  +--ro source* [node]
       |     +--ro node -&gt; /nw:networks/nw:network/nw:node/nw:node-id
       |     +--ro resource* [name]
       |        +--ro name al:resource
       +--ro root-causes
       |  +--ro root-cause* [node]
       |     +--ro node -&gt; /nw:networks/nw:network/nw:node/nw:node-id
       |     +--ro resource* [name]
       |     |  +--ro name al:resource
       |     |  +--ro cause-name? string
       |     |  +--ro detail? string
       |     +--ro cause-name? string
       |     +--ro detail? string
       +--ro root-events
       |  +--ro root-event* [type event-id]
       |     +--ro type -&gt; ../../../events/event/type
       |     +--ro event-id leafref
       +--ro events
       |  +--ro event* [type event-id]
       |     +--ro type enumeration
       |     +--ro event-id string
       |     +--ro (event-type-info)?
       |        +--:(alarm)
       |        |  +--ro alarm
       |        |     +--ro resource? leafref
       |        |     +--ro alarm-type-id? leafref
       |        |     +--ro alarm-type-qualifier? leafref
       |        +--:(notification)
       |        +--:(log)
       |        +--:(KPI)
       |        +--:(unknown)
       +--ro time? yang:date-and-time
                </artwork>
        </figure>

        <t>A general notification, incident-notification, is provided here.
        When an incident instance is identified, the notification will be
        sent. After a notification is generated, if the incident management
        server performs self diagnosis or the client uses the interfaces
        provided by the incident management server to deliver diagnosis and
        resolution actions, the notification update behavior is triggered, for
        example, the root cause objects and affected objects are updated. When
        an incident is successfully resolved, the status of the incident would
        be set to 'cleared'.</t>
      </section>

      <section title="Incident Acknowledge">
        <figure>
          <artwork>
    +---x incident-acknowledge
    |  +---w input
    |  |  +---w incident-id*
    |  |          -&gt; /inc:incidents/inc:incident/inc:incident-id
                </artwork>
        </figure>

        <t>After an incident is generated, updated, or cleared, (In some
        scenarios where automatic diagnosis and resolution are supported, the
        status of an incident may be updated multiple times or even
        automatically resolved.) The operator needs to confirm the incident to
        ensure that the client knows the incident.</t>

        <t>The incident-acknowledge rpc can confirm multiple incidents at a
        time</t>
      </section>

      <section title="Incident Diagnose">
        <figure>
          <artwork>
    +---x incident-diagnose
    |  +---w input
    |  |  +---w incident-id*
    |  |          -&gt; /inc:incidents/inc:incident/inc:incident-id
                </artwork>
        </figure>

        <t>After an incident is generated, incident diagnose rpc can be used
        to diagnose the incident and locate the root causes. Diagnosis can be
        performed on some detection tasks, such as BFD detection, flow
        detection, telemetry collection, short-term threshold alarm,
        configuration error check, or test packet injection.</t>

        <t>After the diagnosis is performed, a incident update notification
        will be triggered to report the latest status of the incident.</t>
      </section>

      <section title="Incident Resolution">
        <figure>
          <artwork>
    +---x incident-resolve
       +---w input
       |  +---w incident-id*
       |          -&gt; /inc:incidents/inc:incident/inc:incident-id
                </artwork>
        </figure>

        <t>After the root causes and impacts are determined, incident-resolve
        rpc can be used to resolve the incident (if the server can resolve it).
        How to resolve an incident instance is out of the scope of this
        document.</t>

        <t>Incident resolve rpc allows multiple incident instances to be
        resolved at a time. If an incident instance is successfully resolved,
        a notification will be triggered to update the incident status to
        'cleared'. If the incident content is changed during this process, a
        notification update will be triggered.</t>
      </section>
    </section>

      <section title="Incident Management YANG Module">
          <figure>
              <artwork><![CDATA[
   <CODE BEGINS>
       file="ietf-incident-types@2023-05-16.yang"
module ietf-incident-types {
  yang-version "1.1";
  namespace "urn:ietf:params:xml:ns:yang:ietf-incident-types";
  prefix "int";

  import ietf-network {
    prefix nw;
    reference
      "RFC 8345: A YANG Data Model for Network Topologies";
  }

  organization
    "IETF OPSAWG Working Group";
  contact
    "WG Web:   <https://datatracker.ietf.org/wg/opsawg/>;
     WG List:  <mailto:opsawg@ietf.org>
     Author:   Chong Feng  <mailto:frank.fengchong@huawei.com>
     Author:   Tong Hu  <mailto:hutong@cmhi.chinamobile.com>
     Author:   Luis Miguel Contreras Murillo <mailto:
               luismiguel.contrerasmurillo@telefonica.com>
     Author :  Thomas Graf   <mailto:thomas.graf@swisscom.com>
     Author :  Qin Wu   <mailto:bill.wu@huawei.com>
     Author:   Chaode Yu   <mailto:yuchaode@huawei.com>
     Author:   Nigel Davis   <mailto:ndavis@ciena.com>";

  description
    "This module defines the identities and typedefs for
     incident management.

     Copyright (c) 2022 IETF Trust and the persons identified as
     authors of the code.  All rights reserved.

     Redistribution and use in source and binary forms, with or
     without modification, is permitted pursuant to, and subject
     to the license terms contained in, the Revised BSD License
     set forth in Section 4.c of the IETF Trust's Legal Provisions
     Relating to IETF Documents
     (https://trustee.ietf.org/license-info).
     This version of this YANG module is part of RFC XXXX; see the
     RFC itself for full legal notices.  ";
  revision 2023-05-16 {
    description "initial version";
    reference "RFC XXX: Yang module for incident management.";
  }
  //identities
  identity incident-domain {
    description "The abstract identity to indicate the domain of
                 an incident.";
  }
  identity single-domain {
    base incident-domain;
    description "single domain.";
  }
  identity access {
    base single-domain;
    description "access domain.";
  }
  identity ran {
    base access;
    description "radio access network domain.";
  }
  identity transport {
    base single-domain;
    description "transport domain.";
  }
  identity otn {
    base transport;
    description "optical transport network domain.";
  }
  identity ip {
    base single-domain;
    description "ip domain.";
  }
  identity ptn {
    base ip;
    description "packet transport network domain.";
  }

  identity cross-domain {
    base incident-domain;
    description "cross domain.";
  }
  identity incident-category {
    description "The abstract identity for incident category.";
  }
  identity device {
    base incident-category;
    description "device category.";
  }
  identity power-enviorment {
    base device;
    description "power system category.";
  }
  identity device-hardware {
    base device;
    description "hardware of device category.";
  }
  identity device-software {
    base device;
    description "software of device category";
  }
  identity line {
    base device-hardware;
    description "line card category.";
  }
  identity maintenance {
    base incident-category;
    description "maintenance category.";
  }
  identity network {
    base incident-category;
    description "network category.";
  }
  identity protocol {
    base incident-category;
    description "protocol category.";
  }
  identity overlay {
    base incident-category;
    description "overlay category";
  }
  identity vm {
    base incident-category;
    description "vm category.";
  }

  //typedefs
  typedef incident-priority {
    type enumeration {
      enum critical {
        description "the incident MUST be handled immediately.";
      }
      enum high {
        description "the incident should be handled as soon as
                     possible.";
      }
      enum medium {
        description "network services are not affected, or the
                     services are slightly affected,but corrective
                     measures need to be taken.";
      }
      enum low {
        description "potential or imminent service-affecting
                     incidents are detected,but services are
                     not affected currently.";
      }
    }
    description "define the priority of incident.";
  }
  typedef node-ref {
    type leafref {
      path "/nw:networks/nw:network/nw:node/nw:node-id";
    }
    description "reference a network node.";
  }
}
   <CODE ENDS>
]]>
              </artwork>
          </figure>
          <figure>
              <artwork><![CDATA[
   <CODE BEGINS>
       file="ietf-incident@2023-05-16.yang"
module ietf-incident {
  yang-version 1.1;
  namespace "urn:ietf:params:xml:ns:yang:ietf-incident";
  prefix inc;
  import ietf-yang-types {
    prefix yang;
    reference
      "RFC 6991: Common YANG Data Types";
  }

  import ietf-alarms {
    prefix al;
    reference
      "RFC 8632: A YANG Data Model for Alarm Management";
  }

  import ietf-incident-types {
    prefix int;
    reference
      "draft-feng-opsawg-incident-management: Incident
       Management for Network Services";
  }
  organization
    "IETF OPSAWG Working Group";
  contact
    "WG Web:   <https://datatracker.ietf.org/wg/opsawg/>;
     WG List:  <mailto:opsawg@ietf.org>
     Author:   Chong Feng  <mailto:frank.fengchong@huawei.com>
     Author:   Tong Hu  <mailto:hutong@cmhi.chinamobile.com>
     Author:   Luis Miguel Contreras Murillo <mailto:
               luismiguel.contrerasmurillo@telefonica.com>
     Author :  Qin Wu   <mailto:bill.wu@huawei.com>
     Author:   Chaode Yu   <mailto:yuchaode@huawei.com>
     Author:   Nigel Davis   <mailto:ndavis@ciena.com>";

 description
    "This module defines the interfaces for incident management
     lifecycle.

     This module is intended for the following use cases:
     * incident lifecycle management:
       - incident report: report incident instance to client
                          when an incident instance is detected.
       - incident acknowledge: acknowledge an incident instance.
       - incident diagnose: diagnose an incident instance.
       - incident resolve: resolve an incident instance.

     Copyright (c) 2022 IETF Trust and the persons identified as
     authors of the code.  All rights reserved.

     Redistribution and use in source and binary forms, with or
     without modification, is permitted pursuant to, and subject
     to the license terms contained in, the Revised BSD License
     set forth in Section 4.c of the IETF Trust's Legal Provisions
     Relating to IETF Documents
     (https://trustee.ietf.org/license-info).
     This version of this YANG module is part of RFC XXXX; see the
     RFC itself for full legal notices.  ";
  revision 2023-05-16 {
    description "remove identies and typedefs to independent yang
                 module. update some definitions of data model.";
    reference "RFC XXX: Yang module for incident management.";
  }
  revision 2023-03-13 {
    description "initial version";
    reference "RFC XXX: Yang module for incident management.";
  }

  //groupings
  grouping resources-info {
    description "the grouping which defines the network
                 resources of a node.";
    leaf node {
      type int:node-ref;
      description "reference to a network node.";
    }
    list resource {
      key name;
      description "the resources of a network node.";
      leaf name {
         type al:resource;
         description "network resource name.";
      }
    }
  }

  grouping incident-time-info {
    description "the grouping defines incident time information.";
    leaf raise-time {
      type yang:date-and-time;
      description "the time when an incident instance is raised.";
    }
    leaf occur-time {
      type yang:date-and-time;
      description "the time when an incident instance is occured.
                   It's the occur time of the first event during
                   incident detection.";
    }
    leaf clear-time {
      type yang:date-and-time;
      description "the time when an incident instance is
                   resolved.";
    }
    leaf ack-time {
      type yang:date-and-time;
      description "the time when an incident instance is
                   acknowledged.";
    }
    leaf last-updated {
      type yang:date-and-time;
      description "the latest time when an incident instance is
                   updated";
    }
  }

  grouping incident-info {
    description "the grouping defines the information of an
                 incident.";
    leaf csn {
      type uint64;
      mandatory true;
      description "The sequence number of the incident instance.";
    }
    leaf-list service-instance {
      type string;
      description "the related network service instances of
                   the incident instance.";
    }
    leaf name {
      type string;
      mandatory true;
      description "the name of an incident.";
    }
    leaf type {
      type enumeration {
        enum fault {
          description "It indicates the type of the incident
                       is a fault, for example an interface
                       fails to work.";
        }
        enum potential-risk {
          description "It indicates the type of the incident
                       is a potential risk, for example high
                       CPU rate may cause a fault in the
                       future.";
        }
      }
      mandatory true;
      description "The type of an incident.";
    }
    leaf domain {
      type identityref {
        base int:incident-domain;
      }
      mandatory true;
      description "the domain of an incident.";
    }
    leaf priority {
      type int:incident-priority;
      mandatory true;
      description "the priority of an incident instance.";
    }

    leaf status {
      type enumeration {
        enum raised {
          description "an incident instance is raised.";
        }
        enum updated {
          description "the information of an incident instance
                       is updated.";
        }
        enum cleared {
          description "an incident is cleared.";
        }
      }
      default raised;
      description "The status of an incident instance.";
    }
    leaf ack-status {
      type enumeration {
        enum acknowledged {
          description "The incident has been acknowledged by user.";
        }
        enum unacknowledged {
          description "The incident hasn't been acknowledged.";
        }
      }
      default unacknowledged;
      description "the acknowledge status of an incident.";
    }
    leaf category {
      type identityref {
        base int:incident-category;
      }
      mandatory true;
      description "The category of an incident.";
    }
    leaf detail {
      type string;
      description "detail information of this incident.";
    }
    leaf resolve-advice {
      type string;
      description "The advice to resolve this incident.";
    }
    container sources {
      description "The source components.";
      list source {
        key node;
        uses resources-info;
        min-elements 1;
        description "The source components of incident.";
      }
    }

    container root-causes{
      description "The root cause objects.";
      list root-cause {
        key node;
        description "the root causes of incident.";
        grouping root-cause-info {
          description "The information of root cause.";
          leaf cause-name {
            type string;
            description "the name of cause";
          }
          leaf detail {
            type string;
            description "the detail information of the cause.";
          }
        }
        uses resources-info {
          augment resource {
            description "augment root cause information.";
            //if root cause object is a resource of a node
            uses root-cause-info;
          }
        }
        //if root cause object is a node
        uses root-cause-info;
      }
    }
    container root-events {
      description "the root events of the incident.";
      list root-event {
        key "type event-id";
        description "the root event of the incident.";
        leaf type {
          type leafref {
            path "../../../events/event/type";
          }
          description "the event type.";
        }
        leaf event-id {
          type leafref {
            path "../../../events/event[type = current()/../type]"
                +"/event-id";
          }
          description "the event identifier, such as uuid,
                       sequence number, etc.";
        }
      }
    }
    container events {
      description "related events.";
      list event {
        key "type event-id";
        description "related events.";
        leaf type {
          type enumeration {
            enum alarm {
              description "alarm type";
            }
            enum inform {
              description "inform type";
            }
            enum KPI {
              description "KPI type";
            }
            enum unknown {
              description "unknown type";
            }
          }
          description "event type.";
        }
        leaf event-id {
          type string;
          description "the event identifier, such as uuid,
                       sequence number, etc.";
        }
        choice event-type-info {
          description "event type information.";
          case alarm {
            when "type = 'alarm'";
            container alarm {
              description "alarm type event.";
              leaf resource {
                type leafref {
                  path "/al:alarms/al:alarm-list/al:alarm"
                      +"/al:resource";
                }
                description "network resource.";
                reference "RFC 8632: A YANG Data Model for Alarm
                           Management";
              }
              leaf alarm-type-id {
                type leafref {
                  path "/al:alarms/al:alarm-list/al:alarm"
                      +"[al:resource = current()/../resource]"
                      +"/al:alarm-type-id";
                }
                description "alarm type id";
                reference "RFC 8632: A YANG Data Model for Alarm
                            Management";
              }
              leaf alarm-type-qualifier {
                type leafref {
                  path "/al:alarms/al:alarm-list/al:alarm"
                      +"[al:resource = current()/../resource]"
                      +"[al:alarm-type-id = current()/.."
                      +"/alarm-type-id]/al:alarm-type-qualifier";
                }
                description "alarm type qualitifier";
                reference "RFC 8632: A YANG Data Model for Alarm
                           Management";
              }
            }
          }
          case notification {
            //TODO
          }
          case log {
          //TODO
          }
          case KPI {
          //TODO
          }
          case unknown {
          //TODO
          }
        }
      }

    }

  }

  //data definitions
  container incidents {
    config false;
    description "the information of incidents.";
    list incident {
      key incident-id;
      description "the information of incident.";
      leaf incident-id {
        type string;
        description "the identifier of an incident instance.";
      }
      uses incident-info;
      uses incident-time-info;
    }
  }

  // notifications
  notification incident-notification {
    description "incident notification. It will be triggered when
                 the incident is raised, updated or cleared.";
    leaf incident-id {
      type leafref {
        path "/inc:incidents/inc:incident/inc:incident-id";
      }
      description "the identifier of an incident instance.";
    }
    uses incident-info;
    leaf time {
      type yang:date-and-time;
      description "occur time of an incident instance.";
    }
  }
  // rpcs
  rpc incident-acknowledge {
    description "This rpc can be used to acknowledge the specified
                 incidents.";
    input {
      leaf-list incident-id {
        type leafref {
          path "/inc:incidents/inc:incident/inc:incident-id";
        }
        description "the identifier of an incident instance.";
      }
    }
  }
  rpc incident-diagnose {
    description "This rpc can be used to diagnose the specified
                 incidents. The result of diagnosis will be reported
                 by incident notification.";
    input {
      leaf-list incident-id {
        type leafref {
          path "/inc:incidents/inc:incident/inc:incident-id";
        }
        description
          "the identifier of an incident instance.";
      }
    }
  }

  rpc incident-resolve {
    description "This rpc can be used to resolve the specified
                 incidents. The result of resolution will be reported
                 by incident notification.";
    input {
      leaf-list incident-id {
        type leafref {
          path "/inc:incidents/inc:incident/inc:incident-id";
        }
        description
          "the identifier of an incident instance.";
      }
    }
  }
}
   <CODE ENDS>
]]>
              </artwork>
          </figure>
      </section>

    <section anchor="IANA" title="IANA Considerations">
      <section title="The &quot;IETF XML&quot; Registry">
        <t>This document registers one XML namespace URN in the 'IETF XML
        registry', following the format defined in <xref
        target="RFC3688"/>.</t>

        <figure>
          <artwork>URI: urn:ietf:params:xml:ns:yang:ietf-incident
Registrant Contact: The IESG.
XML: N/A, the requested URIs are XML namespaces.</artwork>
        </figure>
      </section>

      <section title="The &quot;YANG Module Names&quot; Registry">
        <t>This document registers one module name in the 'YANG Module Names'
        registry, defined in <xref target="RFC6020"/>.</t>

        <figure>
          <artwork>name: ietf-incident
prefix: inc
namespace: urn:ietf:params:xml:ns:yang:ietf-incident
RFC: XXXX
// RFC Ed.: replace XXXX and remove this comment</artwork>
        </figure>
      </section>
    </section>

    <section anchor="security" title="Security Considerations">
      <t>The YANG modules specified in this document define a schema for data
      that is designed to be accessed via network management protocol such as
      NETCONF [RFC6241] or RESTCONF [RFC8040]. The lowest NETCONF layer is the
      secure transport layer, and the mandatory-to-implement secure transport
      is Secure Shell (SSH) [RFC6242]. The lowest RESTCONF layer is HTTPS, and
      the mandatory-to-implement secure transport is TLS [RFC8446].</t>

      <t>The Network Configuration Access Control Model (NACM) [RFC8341]
      provides the means to restrict access for particular NETCONF or RESTCONF
      users to a preconfigured subset of all available NETCONF or RESTCONF
      protocol operations and content.</t>

      <t>There are a number of data nodes defined in this YANG module that are
      writable/creatable/deletable (i.e., config true, which is the default).
      These data nodes may be considered sensitive or vulnerable in some
      network environments. Write operations (e.g., edit-config) to these data
      nodes without proper protection can have a negative effect on network
      operations. These are the subtrees and data nodes and their
      sensitivity/vulnerability:</t>

      <t>Some of the readable data nodes in this YANG module may be considered
      sensitive or vulnerable in some network environments. It is thus
      important to control read access (e.g., via get, get-config, or
      notification) to these data nodes. These are the subtrees and data nodes
      and their sensitivity/vulnerability:</t>

      <t>Some of the RPC operations in this YANG module may be considered
      sensitive or vulnerable in some network environments. It is thus
      important to control access to these operations. These are the
      operations and their sensitivity/vulnerability:</t>
    </section>

      <section title="Contributors">
          <t><figure>
              <artwork>
Aihua Guo
Futurewei Technologies
aihuaguo.ietf@gmail.com</artwork>
          </figure></t>
          <t>
              <figure>
                  <artwork>
Zhidong Yin
Huawei
yinzhidong@huawei.com
                  </artwork>
              </figure>
          </t>
          <t>
              <figure>
                  <artwork>
Guoxiang Liu
Huawei
liuguoxiang@huawei.com
                  </artwork>
              </figure>
          </t>
          <t>
              <figure>
                  <artwork>
Kaichun Wu
Huawei
wukaichun@huawei.com
                  </artwork>
              </figure>
          </t>
          <t>
              <figure>
                  <artwork>
Yanlei Zheng
China Unicom
zhengyanlei@chinaunicom.cn
                  </artwork>
              </figure>
          </t>
          <t>
              <figure>
                  <artwork>
Yunbin Xu
CAICT
xuyunbin@caict.ac.cn
                  </artwork>
              </figure>
          </t>
      </section>

    <section title="Acknowledgments">
      <t>The authors would like to thank Mohamed Boucadair,
      Robert Wilton, Benoit Claise, Oscar Gonzalez de Dios, Mahesh
      Jethanandani, Balazs Lengyel, Bo Wu, Qiufang Ma, Haomian Zheng, YuanYao
      for their valuable comments and great input to this work.</t>
    </section>
  </middle>

  <back>
    <references title="Normative References">
      <?rfc include="reference.RFC.2119.xml"?>

      <?rfc include="reference.RFC.3688.xml"?>

      <?rfc include="reference.RFC.6020.xml"?>

      <?rfc include="reference.RFC.7950.xml"?>

      <?rfc include="reference.RFC.8174.xml"?>
    </references>

    <references title="Informative References">
      <?rfc include="reference.RFC.8345.xml"?>

      <?rfc include="reference.RFC.8632.xml"?>

      <?rfc include='reference.RFC.8969.xml'?>

      <?rfc include="reference.RFC.9375.xml"?>

      <?rfc include="reference.I-D.ietf-opsawg-service-assurance-architecture"?>

      <?rfc include="reference.I-D.ietf-opsawg-service-assurance-yang"?>

      <reference anchor="I-D.tgraf-yang-push-observation-time"
                 target="https://www.ietf.org/id/draft-tgraf-yang-push-observation-time-00.html">
        <front>
          <title>Support of Network Observation Timestamping in YANG
          Notifications</title>

          <author fullname="Thomas Graf" initials="T" surname="Graf">
            <organization>Swisscom</organization>
          </author>

          <author fullname="Benoit Claise" initials="B" surname="Claise">
            <organization>Huawei</organization>
          </author>

          <author fullname="Alex Huang Feng" initials="A."
                  surname="Huang Feng">
            <organization>INSA-Lyon</organization>
          </author>

          <date day="6" month="March" year="2023"/>
        </front>
      </reference>

      <reference anchor="W3C-Trace-Context"
                 target="https://www.w3.org/TR/2021/REC-trace-context-1-20211123/">
        <front>
          <title>W3C Recommendation on Trace Context</title>

          <author>
            <organization>W3C</organization>
          </author>

          <date day="23" month="November" year="2021"/>
        </front>
      </reference>

      <reference anchor="I-D.rogaglia-netconf-trace-ctx-extension"
                 target="https://www.ietf.org/archive/id/draft-rogaglia-netconf-trace-ctx-extension-02.html">
        <front>
          <title>NETCONF Extension to support Trace Context
          propagation</title>

          <author fullname="Roque Gagliano" initials="R" surname="Gagliano">
            <organization>Cisco</organization>
          </author>

          <author fullname="Kristian Larsson" initials="K" surname="Larsson">
            <organization>Deutsche Telekom</organization>
          </author>

          <author fullname="Jan Lindblad" initials="J." surname="Lindblad">
            <organization>Cisco</organization>
          </author>

          <date day="13" month="March" year="2023"/>
        </front>
      </reference>

      <reference anchor="I-D.quilbeuf-opsawg-configuration-tracing"
                 target="https://www.ietf.org/archive/id/draft-quilbeuf-opsawg-configuration-tracing-01.html">
        <front>
          <title>External Transaction ID for Configuration Tracing</title>

          <author fullname="Jean Quilbeuf" initials="J" surname="Quilbeuf">
            <organization>Huawei</organization>
          </author>

          <author fullname="Benoit Claise" initials="B" surname="Claise">
            <organization>Huawei</organization>
          </author>

          <author fullname="Thomas Graf" initials="T." surname="Graf">
            <organization>Swisscom</organization>
          </author>

          <author fullname="Diego R. Lopez" initials="D." surname="Lopez">
            <organization>Telefonica I+D</organization>
          </author>

          <author fullname="Qiong Sun" initials="Q." surname="Sun">
            <organization>China Telecom</organization>
          </author>

          <date day="13" month="March" year="2023"/>
        </front>
      </reference>
    </references>

    <section title="Changes between revisions">
      <t>[[RFC editor: please remove this section before publication.]]</t>

      <t>v00 - v01 <list style="symbols">
          <t>Modify the introduction.</t>
		  
		  <t>Rename incident agent to incident server.</t>

          <t>Add the interworking with alarm management.</t>

          <t>Add the interworking with SAIN.</t>

          <t>Add the relationship with RFC8969.</t>

          <t>Add the relationship with observation timestamp and trace
          context.</t>

          <t>Clarify the incident identification process.</t>

          <t>Modify the work flow of incident diagnosis and resolution.</t>

          <t>Remove identities and typedefs from ietf-incident YANG module,
          and create a new YANG module called ietf-incident-types.</t>

          <t>Modify ietf-incident YANG module, for example, modify
          incident-diagnose rpc and incident-resolve rpc.</t>
        </list></t>
    </section>
  </back>
</rfc>
