<?xml version='1.0' encoding='utf-8'?>
<!-- This template is for creating an Internet Draft using xml2rfc,
    which is available here: https://xml2rfc.tools.ietf.org/. -->

<!-- https://docs.google.com/document/d/1KFzCj7FYKaSrtuWzx5GiYgMLB5IZs4tcuvDQtajMR4g/edit -->

<!DOCTYPE rfc SYSTEM "rfc2629-xhtml.ent">
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
    please see http://xml.resource.org/authoring/README.html. -->
<rfc
        xmlns:xi="http://www.w3.org/2001/XInclude"
        category="info"
        docName="draft-ihsan-nmrg-rl-vne-ps-01"
        ipr="trust200902"
        obsoletes=""
        updates=""
        submissionType="IETF"
        xml:lang="en"
        tocInclude="true"
        tocDepth="4"
        symRefs="true"
        sortRefs="true"
        version="3">
    <!-- xml2rfc v2v3 conversion 2.38.1 -->
    <!-- category values: std, bcp, info, exp, and historic
      ipr values: trust200902, noModificationTrust200902, noDerivativesTrust200902,
         or pre5378Trust200902
      you can add the attributes updates="NNNN" and obsoletes="NNNN"
      they will automatically be output with "(if approved)" -->

    <!-- ***** FRONT MATTER ***** -->

    <front>
        <!-- The abbreviated title is used in the page header - it is only necessary if the
            full title is longer than 39 characters -->
        <title abbrev="ML-based Virtual Network Embedding">
            Reinforcement Learning-Based Virtual Network Embedding: Problem Statement
        </title>
        <seriesInfo name="Internet-Draft" value="draft-ihsan-nmrg-ml-vne-00"/>
        <!-- add 'role="editor"' below for the editors if appropriate -->

        <!-- Another author who claims to be an editor -->

        <author fullname="Ihsan Ullah" initials="I." surname="Ullah">
            <organization>KOREATECH</organization>
            <address>
                <postal>
                    <street>1600, Chungjeol-ro, Byeongcheon-myeon, Dongnam-gu</street>
                    <!-- Reorder these if your country does things differently -->
                    <city>Cheonan</city>
                    <region>Chungcheongnam-do</region>
                    <code>31253</code>
                    <country>Republic of Korea</country>
                </postal>
                <email>ihsan@koreatech.ac.kr</email>
                <!-- uri and facsimile elements may also be added -->
            </address>
        </author>
        <author fullname="Youn-Hee Han" initials="Y-H." surname="Han">
            <organization>KOREATECH</organization>
            <address>
                <postal>
                    <street>1600, Chungjeol-ro, Byeongcheon-myeon, Dongnam-gu</street>
                    <!-- Reorder these if your country does things differently -->
                    <city>Cheonan</city>
                    <region>Chungcheongnam-do</region>
                    <code>31253</code>
                    <country>Republic of Korea</country>
                </postal>
                <email>yhhan@koreatech.ac.kr</email>
                <!-- uri and facsimile elements may also be added -->
            </address>
        </author>
        <author fullname="TaeYeon Kim" initials="TY." surname="Kim">
            <organization>ETRI</organization>
            <address>
                <postal>
                    <street>218 Gajeong-ro, Yuseong-gu</street>
                    <!-- Reorder these if your country does things differently -->
                    <city>Daejeon</city>
                    <code>34129</code>
                    <country>Republic of Korea</country>
                </postal>
                <email>tykim@etri.re.kr</email>
                <!-- uri and facsimile elements may also be added -->
            </address>
        </author>


        <date year="2021"/>
        <!-- If the month and year are both specified and are the current ones, xml2rfc will fill
            in the current day for you. If only the current year is specified, xml2rfc will fill
         in the current day and month for you. If the year is not the current one, it is
         necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the
         purpose of calculating the expiry date).  With drafts it is normally sufficient to
         specify just the year. -->

        <!-- Meta-data Declarations -->

        <area>General</area>
        <workgroup>Internet Engineering Task Force</workgroup>
        <!-- WG name at the upperleft corner of the doc,
            IETF is fine for individual submissions.
         If this element is not present, the default is "Network Working Group",
            which is used by the RFC Editor as a nod to the history of the IETF. -->

        <keyword>virtual network embedding, machine learning</keyword>
        <!-- Keywords will be incorporated into HTML output
            files in a meta tag but they have no effect on text or nroff
            output. If you submit your draft to the RFC Editor, the
            keywords will be used for the search engine. -->

        <abstract>
            <t> In Network virtualization (NV) technology, Virtual Network Embedding (VNE) is an algorithm used to map a virtual network to the substrate network.
                VNE is the core orientation of NV which has a great impact on the performance of virtual network and resource utilization of the substrate network.
                An efficient embedding algorithm can maximize the acceptance ratio of virtual networks to increase the revenue for Internet service provider.
                Several works have been appeared on the design of VNE solutions, however, it has becomes a challenging issues for researchers.
                To solved the VNE problem, we believe that reinforcement learning (RL) can play a vital role to make the VNE algorithm more intelligent and efficient.
                Moreover, RL has been merged with deep learning techniques to develop adaptive models with effective strategies for various complex problems.
                In RL, agents can learn desired behaviors (e.g, optimal VNE strategies), and after learning and completing training, it can embed the virtual network to the subtract network very quickly and efficiently.
                RL can reduce the complexity of the VNE algorithm, however, it is too difficult to apply RL techniques directly to VNE problems and need more research study.
                In this document, we presenting a problem statement to motivate the researchers toward the VNE problem using deep reinforcement learning.

            </t>
        </abstract>
    </front>
    <middle>
        <section numbered="true" toc="default">
            <name>Introduction and Scope</name>
            <t> Recently, Network virtualization (NV) technology has received a lot of attention from academics and industry.
           It allows multiple heterogeneous virtual networks to share resources on the same substrate network (SN) <xref target="RFC7364"/>, <xref target="ASNVT2020"/>.
           The current large-size fixed substrate network architecture is no longer efficient and not extendable due to network ossification.
           To overcome this limitations, traditional Internet Service Providers (ISPs) are divided into two independent parts which work together.
           One is the Service Providers (SPs) who create and own the different number of the VNs, and the other one is the Infrastructure Providers (InPs) who own the SN devices and links as underlying resources.
           SPs generate and construct the customized Virtual Network Requests (VNRs), and lease the resources from InPs based on that requests.
           In addition, two types of mediators can enter into the industry domain for better coordination of SPs and InPs.
           One is the Virtual Network Providers (VNPs) who assemble and coordinate diverse virtual resources from one or more InPs, the other one is the Virtual Network Operators (VNOs) who create, manage, and operate the VN according to the demand of the SPs.
           VNPs and VNOs could enable efficient use of the physical network and increase the commercial revenue of both SPs and InPs. NV can increase network agility, flexibility and scalability while creating significant cost savings.
           Greater network workload mobility, increased availability of network resources with good performance, and automated operations, are all the benefits of NV.
            </t>
            <t>
          Virtual Network Embedding (VNE) <xref target="VNESURV2013"/> is one of the main technique and strategy which used to map a virtual network to the substrate network.
          VNE algorithm has two main parts, Node embedding: where virtual nodes of VN have to be mapped to the SN nodes, and Link ebbedding: where virtual links between the VNs have to be mapped to the physical paths in the substrate network.
          It has been proven to be NP-Hard, and both node and link embeddings have become challenging for the researchers.
          A virtual node and link should be efficiently embedded into a given SN, so that more VNR can be accepted with minimum cost.
          The distance of the virtual nodes from each other in a given SN is a big contribution to the link failures and causes the rejection of VNRs.
          Hence, an efficient and intelligent technique is required for VNE problem to reduce VNRs rejection <xref target="ENViNE2021"/>.
          In the perspective of the InPs, the efficient VNE performs better mostly in terms of revenue, acceptance ratio, and revenue-to-cost ratio.

           </t>
          <t>Figure 1 shows the the example of two virtual network request VNR1 and VNR2 to embed them in the given substrate network.
          VNR1 contain three virtual nodes (a, b, and c) with cpu demands (15, 30, and 10) respectively, and the link between virtual the nodes a-b,b-c, and c-a with bandwidth demands 15,20, and 35 respectively.
          Similarly, VNR2 contains virtual nodes and links with cpu and bandwidth demand respectively.
          The purpose of the VNE algorithm to map the virtual nodes and links of the VNRs to the physical nodes and links of the given substrate as shown in Figure 1. <xref target="ENViNE2021"/>.
</t>
            <figure>
        <name>Substrate network with embedded virtual network, VNR1 and VNR2</name>
                <artwork align='center'>
           +----+                +----+         +----+          +----+
           | a  |                | d  |         | e  |          | f  |
           | 15 |                | 25 |__ _25___| 30 |__ _35_ __| 45 |
           +----+                +----+         +----+          +----+
          /      \                \                                 /
        15        35               30                              20
        /          \                \                             /
  +----+            +----+           +----+                 +----+
  | b  |            | c  |           | g  |                 | h  |
  | 30 |__ _20_ __ _| 10 |           | 15 |__ _ __10__ __ __| 35 |
  +----+            +----+           +----+                 +----+

           (VNR1)                                 (VNR2)
             ||   Embedding                         ||    Embedding
             VV                                     VV

        +----+              +----+       +----+                  +----+
 .......| a  |......35......| c  |       | d  |........25........| e  |
:  _____| 15 |              | 10 |_______| 25 |          ________| 30 |
: |     +----+              +----+       +----+         |        +----+
: |   A      |                | :   B      | :          |   C      |  :
: |   50     |__ ___50__ __ __| :   60     |_:_ __30 _ _|   40     |  :
: +__________+                +_:_________+  :          +__________+  :
:      |                        :     |      :                |       :
15     |                        :     |      :                |      35
:     40                       20     60     :               50       :
:      |                        :     |     30                |       :
:      |                       _:_____|_     :                |       :
+----:..............20........|.:       |    :                |   +----+
| b  | |   +----+.....30......|.........|....:                |   | f  |
| 30 |_|___| g  |             |       +----+                __|___| 45 |
+----+     | 15 |.....10......|.......| h  |........20.....|......+----+
 |   D     +____+             |    E  | 35 |               |     F    |
 |   50     |__ __ __ 70 _____|    40 +____+ ___ __ 50_ ___|     60   |
 +__________+                 +_________+                  +__________+

                  </artwork>
                </figure>

      <t>
          Recently, artificial intelligence and machine learning technologies have been widely used to solve networking problems <xref target='SUR2018'/>, <xref target='MLCNM2018'/>, <xref target='MVNNML2021'/>.
          There has been a surge in research efforts,specially,reinforcement learning (RL) which has been contributed much more in the many complex tasks, e.g. video games and auto-driving etc.
          The main goal of an RL to learn better policies for sequential decision making problems (e.g., VNE) and solve them very efficiently.
      </t>
      <t>
          Problems such as node ordering, pattern matching, and network feature extraction can all be simplified by graph-related theories and techniques.
          Graph neural network (GNN) is a new type of ML model architecture that can aggregate graph features (degrees, distance to specific nodes, node connectivity, etc.) on nodes <xref target='DVNEGCN2021'/>.
<!--          Several variants of GNN were proposed but the most known one is GCN (Graph Convolutional Network) which generalizes the convolutional operation from euclidean data (images and grid) to non-euclidean data (graph).-->
          The model can be used to cluster nodes and links according to the physical nodes and physical links attribute characteristics (CPU, storage, bandwidth, delay, etc.), and it is highly suitable for graph structures of any topological form. Hence, GNN is useful to find the best VNE strategy by intelligent agent training, and the organic combination of VNE and GCN has a good prerequisite.
      </t>
      <t>
          Designing and applying RL techniques directly into VNE problems is not yet trivial, but may face several challenges. This document describes the problems.
          Several works have appeared on the design of VNE solutions using RL, which focuses on how to interact with the environment to achieve maximum cumulative return <xref target="VNEQS2021"/>, <xref target="NRRL2020"/>, <xref target="MVNE2020"/>, <xref target="CDVNE2020"/>, <xref target="PPRL2020"/>, <xref target="RLVNEWSN2020"/>, <xref target="QLDC2019"/>, <xref target="VNFFG2020"/>, <xref target="VNEGCN2020"/>, <xref target="NFVDeep2019"/>, <xref target="DeepViNE2019"/>, <xref target="VNETD2019"/>, <xref target="RDAM2018"/>, <xref target="MOQL2018"/>, <xref target="ZTORCH2018"/>, <xref target="NeuroViNE2018"/>, <xref target="QVNE2020"/>.
          This document outlines the problems encountered when designing and applying RL-based VNE solutions.
          Section 2 describes how to design RL-based VNE solutions. Section 3 gives terminology, and Section 4 describes the problem space details.
      </t>
         </section>
            <section anchor="terminology" numbered="true" toc="default">
            <name>Reinforcement Learning-based VNE Solutions</name>

            <t> As we discussed that RL has been studied in various fields (such as game, control system, operation research, information theory, multi-agent system, network system, etc.) and shows better performance than humans.
                Unlike deep learning, RL trains a policy model by receiving rewards through interaction with the environment without training label data.
                </t><t>
                Recently, there have been several attempts to solve VNE problems using RL.
                When applying RL-based algorithms to solve VNE problems, the RL agent automatically learns without human intervention through interaction with the environment.
                Once the agent completed the learning process, it can generate the most appropriate embeddings decision (action) based on the state of the network.
                Based on the embedding or action the agent get reward from the environments to  adaptively train its policy for future action.
                The RL agent gets the most optimized model based on the reward function defined according to each objective (revenue, cost, revenue to cost ratio and acceptance ratio).
                The optimal RL policy model provides the VNE strategy appropriately according to the objective of the network operator.
                </t>
              <t>
                Figure 2 shows the virtual network embedding solution based on RL algorithm.
                The RL is divided into a training process and an inference process.
                In the training process, state information is composed of various substrate networks and VNRs (Environment), which are used as suitable inputs for RL models through feature extraction.
                After that, the RL model is updated by model updater using a feature extracted state and reward.
                In the inference process, using the trained RL model, the embedding result is provided to the operating network in real time.
                </t>
                <t>
                  The following figure shows the detail about RL method based virtual networks embedding solutions.
                </t>
                <figure>
                    <name>Two processes for RL method based VNE</name>
                    <artwork align='center'>
RL Model Training Process
+--------------------------------------------------------------------+
| Training Environment                                               |
| +-------------------+         RL-based VNE Agent                   |
| | +---------+       |         +----------------------------------+ |
| | | +---------+     |         |                   Action         | |
| | | | +----------+  |&lt;----------------------------------+     | |
| | + | | Substrate|  |         |                         |        | |
| |   | | Networks |  |         |  +----------+      +----------+  | |
| |   + +----------+  |  State  |  | Feature  |      |    RL    |  | |
| |                   |----------->|Extraction|----->|   Model  |  | |
| | +--------+        |         |  +----------+      | (Policy) |  | |
| | | +---------+     |         |       |            +----------+  | |
| | + | +---------+   |         |       |   +---------+     A      | |
| |   + |  VNRs   |   | Reward  |       +-->|  Model  |     |      | |
| |     +---------+   |-------------------->| Updater |-----+      | |
| +-------------------+         |           +---------+            | |
|                               +----------------------------------+ |
+--------------------------------------------------------------------+
                                  |
Inference Process                 |
+---------------------------------V----------------------------------+
|                         + - - - - - - - +                          |
| Operating Network       |   RL Model    |    Trained RL Model      |
| (Inference Environment) |   Training    |------------------+       |
| +-------------------+   |   Process     |                  |       |
| |   +-----------+   |   + - - - - - - - +                  |       |
| |   |           |   |         RL-based VNE Agent           |       |
| |   | Substrate |   |         +----------------------------|-----+ |
| |   |  Network  |   |         |                   Action   |     | |
| |   |           |   |&lt;--------------------------------+   |     | |
| |   +-----------+   |         |                        |   V     | |
| | +---------+       |         |  +------------+     +---------+  | |
| | | +---------+     | State   |  |  Feature   |     | Trained |  | |
| | + | +----------+  |----------->| Extraction |---->|   RL    |  | |
| |   + |   VNRs   |  |         |  +------------+     |  Model  |  | |
| |     +----------+  |         |                     +---------+  | |
| +-------------------+         +----------------------------------+ |
+--------------------------------------------------------------------+
                    </artwork>
                </figure>
        </section>

        <section numbered="true" toc="default">
            <name>Terminology</name>

            <dl newline="true" spacing="normal" indent="3">
                <dt>Network Virtualization</dt>
                <dd>Network virtualization is the process of combining hardware and software network resources and network functionality into a single, software-based administrative entity, a virtual network <xref target="RFC7364"/>.</dd>
            </dl>

            <dl newline="true" spacing="normal" indent="3">
                <dt>Virtual Network Embedding (VNE)</dt>
                <dd>Virtual Network Embedding (VNE) <xref target="VNESURV2013"/> is one of the main techniques used to map a virtual network to the substrate network.
                        </dd>
            </dl>
            <dl newline="true" spacing="normal" indent="3">
                <dt>Substrate Network (SN)</dt>
                <dd>The underlying physical network which contains the resources such as CPU and bandwidth for virtual networks is called substrate network.</dd>
            </dl>

            <dl newline="true" spacing="normal" indent="3">
                <dt>Virtual Network Request (VNR)</dt>
                <dd>Virtual Network Request is a complete single Virtual network request containing virtual nodes and virtual links.</dd>
            </dl>

            <dl newline="true" spacing="normal" indent="3">
                <dt>Agent</dt>
                <dd>In RL, an agent is the component that makes the decision abd take action  (i.e., embedding decision).</dd>
            </dl>
             <dl newline="true" spacing="normal" indent="3">
                <dt>State</dt>
                <dd>State is a representation (e.g., remaining SN capacity and requested VN resource) of the current environment, and it tells the agent what situation it is in currently.</dd>
            </dl>

            <dl newline="true" spacing="normal" indent="3">
                <dt>Action</dt>
                <dd>Actions (i.e., node and link embedding) are behavior an RL agent can do to change the states of the environment.</dd>
            </dl>
            <dl newline="true" spacing="normal" indent="3">
                <dt>Policy</dt>
                <dd>A policy defines an agent's way of behaving at a given time.
                        It is a mapping from perceived states of environment to actions to be taken when in those states.
                        It is usually implemented as a deep learning model because the state and action spaces are too large to be completely known.</dd>
            </dl>

            <dl newline="true" spacing="normal" indent="3">
                <dt>Reward</dt>
                <dd>A reward is the feedback which provides an agent to the agent for taking actions that lead to good outcomes (i.g., achieve the objective of the network operator).</dd>
            </dl>
            <dl newline="true" spacing="normal" indent="3">
                <dt>Environment</dt>
                <dd>An environment is the agent’s world in which it lives and interacts.
                    The agent can interact with the environment by performing some action but cannot influence the rules of the environment by those actions.</dd>
            </dl>

        </section>

        <section anchor="problem_statements" numbered="true" toc="default">
            <name>Problem Space</name>
            <t>RL contains three main components: state representation, action space, and reward description.
                For solving a VNE problem, we need to consider how to design the three main RL components.
                In addition, a specific RL algorithm, training environment, sim2real gap, and generalization are also important issues that should be considered and addressed.
                We will describe each one in detail as follows.</t>
                 <section numbered="true" toc="default">
                    <name>State Representation</name>
                    <t>The way to understand and observe the VNE problem is crucial for an RL agent to establish a thorough knowledge of the network status and generate efficient embedding decisions.
                        Therefore, it is essential to firstly design the state representation that serves as the input to the agent.
                        The state representation is the information which an agent can receive from the environment, and consists of a set of values representing the current situation in the environment.
                        Based on the state representation, the RL agent selects the most appropriate action through its policy model.
                        In the VNE problem, an RL agent needs to know the information of the overall SN entities and their current status in order to use the resources of the nodes and edges of the substrate network.
                        Also it must know the requirements of the VNR.
                        Therefore, in the VNE problem, the state usually should represent the current resource state of the nodes and edges of the substrate network (ie, CPU, memory, storage, bandwidth, delay, loss rate, etc.) and the requirements of the virtual node and link of the VNR.
                        The collected status information is used as raw input, or refined status information through the feature extraction process is used as input for the RL agent.
                        The state representation may vary depending on the operator's objective and VNE strategy.
                        The method of determining such feature extraction and representation greatly affects the performance of the agent.
                    </t>
                 </section>
            <section numbered="true" toc="default">
                    <name>Action Space</name>
                    <t>In RL, an action represents a decision that an RL agent can take based on current state representation.
                        The set of all possible actions is called an action space.
                        In the VNE problems, actions are generally divided into node embedding and link embedding. The action for node embedding means the VNR’s nodes are assigned to which nodes in the SN.

                        Also, for link embedding, the action represents the selected paths between the selected substrate network nodes from the node embedding result.
                        If the policy model of the RL agent is well trained, it will select the embedding result to maximize the reward appropriate for the operator's objectives.
                        The output actions generated from the agent will indicate the adjustment of allocated resources.

                        It is noted that, at each point of time step, an RL algorithm may decide to 1) embed each virtual node onto substrate nodes and then embed each virtual link onto substrate paths separately, or 2) embed the given whole VNR onto substrate nodes and links in the SN at once.
                        In the former case, at every single step, a learning agent focuses on exactly one virtual node from the current VNR, and it generates a certain substrate node to host the virtual node.
                        Link embedding is then performed separately in the same time step.
                        To solve the VNE problem efficiently, mapping of virtual nodes and links are considered together, although they are performed separately.
                        Link mapping is considering more complex than node mapping, because a virtual link can be mapped onto a physical path with different hops.
                        On the other hand, at every single step, a learning agent can try to embed the given whole VNR, i.e., all virtual nodes and links in the given VNR, onto a subset of SN components.
                        The whole VNR embedding should be handled as a graph embedding, so that the action space is huge and the design of the RL algorithm is usually more difficult than the one with each node and link embedding.

                    </t>
                 </section>
                   <section numbered="true" toc="default">
                    <name>Reward Description</name>
                    <t>Designing rewards is an important issue for an RL algorithm.
                       In general, the reward is the benefit that an RL agent follows when performing its determined action.
                       Reward is an immediate value that evaluates only the current state and action.
                       The value of reward depends on success or failure of each step.
                       In order to select the action that gives the best results in the long run, an RL agent needs to select the action with the highest cumulative reward.
                       The reward is calculated through the reward function according to the objective of the environment, and even in the same environment, it may be different depending on the operator’s objective.
                       Based on the given reward the agent can evaluate the effectiveness to improve the policy.
                       Hence, the reward function play a important rules in the training process of RL.
                       In the VNE problem, the overall objectives are to reduce the VNE rejection, embed them with minimum cost, maximize the revenue, and increase the resource utilization of physical resources.
                       Reward function should be designed to achieve one or multiple ones of these objectives.
                       Each objective and its correspondent reward design are outlined as follows:

                    </t>

                      <dl newline="true" spacing="normal" indent="3">
                      <dt>Revenue</dt>
                      <dd>Revenue is the sum of the virtual resources requested by the VN, and calculated to determine the total cost of the resources.
                        Typically, a successful action (e.g., VNR is embedded without violation) is treated to be a good reward which also increases the revenue.
                        Otherwise, a failed action (e.g., VNR is rejected) leads that the agent will receive a negative reward as well as decreasing the revenue.</dd>
                      </dl>
                       <dl newline="true" spacing="normal" indent="3">
                      <dt>Acceptance Ratio</dt>
                      <dd>Acceptance ratio is the ratio measured by the number of successfully embedded virtual network requests divided by total number of virtual network requests.
                          To achieve a high acceptance ratio, the agent is trying to embed maximum VNR and get a good reward. Getting a good reward is usually proportional to the acceptance ratio.</dd>
                      </dl>

                      <dl newline="true" spacing="normal" indent="3">
                      <dt>Revenue-to-cost ratio</dt>
                      <dd>To balance and compare the cost of resources for embedding VNR, the revenue is divided by cost.
                          Revenue-to-cost ratio compares the embedding algorithms with respect to their embedding results in terms of the cost and revenue.
                          Since most VNOs are most interested in this objective, a reward function should be made to relate to this performance metric.</dd>
                      </dl>

                    </section>

         <section anchor="use_cases" numbered="true" toc="default">
            <name>Policy and RL Algorithms</name>
            <t>The policy is the strategy that the agent employs to determine the next action based on the current state.
               It maps states to actions that promise the highest reward.
               Therefore, an RL agent updates its policy repeatedly in the learning phase to maximize the expected cumulative reward.
               Unlike supervised learning, in which each sample has a corresponding label indicating the preferred output of the learning model, an RL agent relies on reward signals to evaluate the effectiveness of actions and further improve the policy.
               From the perspective of RL, the goal of VNE is to find an optimal policy to embed an VNR onto the given SN in any state at any time.
               There are two types of RL algorithms: on-policy and off-policy.
               In on-policy RL algorithms,  the (behaviour) policy of the exploration step to select an action and the policy to learn are the same.
               On-policy algorithms work with a single policy, and require any observations (state, action, reward, next state) to have been generated using that policy.
               Representative on-policy algorithms include A2C, A3C, TRPO, and PPO. On the other hand, off-policy RL algorithms work with two policies.
               These are a policy being learned, called the target policy, and the policy being followed that generates the observations, called the behaviour policy.
               In off-policy RL algorithms, the learning policy and the behaviour policy are not necessarily the same. It allows the use of exploratory policies for collecting the experience, since learning and behavior policies are separated.
               In the VNE problem, various experiences can be accumulated by extracting embedding results using various behavior policies. Representative off-policy algorithms include Q-learning, DQN, DDPG, and SAC.
               There are different classifications for RL algorithms: model-based and model-free. In model-based RL algorithms, an RL agent learns its optimal behavior indirectly by learning a model of the environment by taking actions and observing the outcomes that include the next state and the immediate reward.
               The models predict the outcomes of actions. The model is used instead of the environment or in addition to interaction with it to learn optimal policies.
               This becomes, however, impractical when the state and action space is large. Unlike model-based algorithms, model-free RL algorithms learn directly by trial and error with the environment and do not require the relatively large memory.
               Since data efficiency or safety is very important even in VNE problems, the use of model-based algorithms can be actively considered. However, since it is not easy to build a good model that mimics a real network environment, a model-free RL algorithm may be more suitable for VNE problems.
               In conclusion, a good RL algorithm selection plays an important role in solving the VNE problem, and VNE performance metrics vary depending on the selected RL algorithm.

            </t>
              </section>
                <section  numbered="true" toc="default">
                    <name>Training Environment</name>
                    <t>Simulation is the use of software to simulate an interacting environment that is difficult to actually execute and test.
                        An RL algorithm learns by iteratively interacting with the environment. However, in the real environment, various variables such as failure and component consumption exist.
                        Therefore, it is necessary to learn through a simulation that simulates the real environment.
                        In order to solve the VNE problem, we need to use a network simulator similar to the real environment because it is difficult to repeatedly experiment with real network environments using an RL algorithm, and it is very challenging and overwhelming to directly apply an RL algorithm to real-world environments.
                        When solving VNE problems, a network simulation environment similar to a real network is required. The network simulation environment should have a general SN environment and VNR required by the operator.
                        The SN has nodes and links between nodes, and each has capacity such as CPU and Bandwidth.
                        In the case of VNR, there are virtual nodes and links required by the operator, and each must have its own requirements.
                    </t>
                </section>
                    <section  numbered="true" toc="default">
                    <name>Sim2Real Gap</name>
                    <t>An RL algorithm iteratively learns through a simulation environment to train a model of the desired policy.
                        The trained model is then applied to the real environment and/or tuned more for adapting to the real one.
                        However, when the trained model is applied in the simulation to the real environment, sim2real gap problem arises. Obviously, the simulation environment does not match perfectly to the real environment which mostly fails in the tuning process and gives poor performance in the model because of the Sim2Real gap.
                        The sim2real gap is caused by the difference between the simulation and the real environment.
                        It is because the simulation environment cannot perfectly simulate the real environment, and there are many variables in the real environment.
                        In a real network environment for VNE, the SN's nodes and links may fail due to external factors, or capacity such as CPU may change suddenly.
                        In order to solve this problem, the simulation environment should be more robust or the trained RL model should be generalized.
                        To reduce the gap between sim and real network environments we need to train our model with an efficient and large number of  VNR and keep learning the agent not only depend on previous memorization.
                    </t>
                  </section>
                <section  numbered="true" toc="default">
                        <name> Generalization</name>
                        <t>Generalization refers to the trained model's ability to adapt properly to previously unseen new observations.
                            An RL algorithm tries to learn a model that optimizes some objective with the purpose of performing well on data that has never been seen by the model during training.
                            In terms of VNE problems, the generalization is a measure of how the agent’s policy model performs on predicting unseen VNR.
                            The RL agent not only has to memorize all the previous variance of the VNR but also to learn and explore more possible variance.
                            It is important to have good and efficient training data for VNR with good variance and train the model with all possible VNRs.
                        </t>
                </section>
                </section>

        <section anchor="IANA" numbered="true" toc="default">
            <name>IANA Considerations</name>
            <t>This memo includes no request to IANA.</t>
            <t>All drafts are required to have an IANA considerations section (see
                <xref target="RFC5226" format="default">Guidelines for Writing an IANA Considerations Section in RFCs
                </xref>
                for a guide). If the draft does not require IANA to do
                anything, the section contains an explicit statement that this is the
                case (as above). If there are no requirements for IANA, the section will
                be removed during conversion into an RFC by the RFC Editor.
            </t>
        </section>
        <section anchor="Security" numbered="true" toc="default">
            <name>Security Considerations</name>
            <t>All drafts are required to have a security considerations section.
                See <xref target="RFC3552" format="default">RFC 3552</xref> for a guide.

            </t>
        </section>

    </middle>
    <!--  *****BACK MATTER ***** -->

    <back>
        <!-- References split into informative and normative -->

        <!-- There are 2 ways to insert reference entries from the citation libraries:
         1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
         2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
            (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

         Both are cited textually in the same manner: by using xref elements.
         If you use the PI option, xml2rfc will, by default, try to find included files in the same
         directory as the including file. You can also define the XML_LIBRARY environment variable
         with a value containing a set of directories to search.  These can be either in the local
         filing system or remote ones accessed by http (http://domain/dir/... ).-->

        <references>
            <name>Informative References</name>
            <!-- Here we use entities that we defined at the beginning. -->
               <reference anchor="ASNVT2020" target="https://doi.org/10.1145/3379444">
                <front>
                    <title>A Survey of Network Virtualization Techniques for Internet of Things using SND and NFV</title>
                    <seriesInfo name="DOI" value="10.1145/3379444"/>
                    <author initials="Kashif" surname="Sharif" fullname="Kashif Sharif">
                        <organization/>
                        </author>
                    <author initials="Fan" surname="Li" fullname="Fan Li">
                        <organization/>
                        </author>
                    <author initials="Zohaib" surname="Latif" fullname="Zohaib Latif">
                        <organization/>
                        </author>
                     <author initials="MM" surname="Karim" fullname="M. M. Karim">
                        <organization/>
                        </author>
                    <author initials="Sujit" surname="Biswas" fullname="Sujit Biswas">
                        <organization/>
                        </author>
                    <date year="2020" month="April"/>
                 </front>
            </reference>

             <reference anchor="VNESURV2013" target="https://doi.org/10.1109/SURV.2013.013013.00155">
                <front>
                    <title>Virtual Network Embedding: A Survey</title>
                    <seriesInfo name="DOI" value="10.1109/SURV.2013.013013.00155"/>
                    <author initials="Fischer" surname="Fischer" fullname="Andreas Fischer">
                        <organization/>
                        </author>
                    <author initials="Juan Felipe" surname="Botero" fullname="Juan Felipe Botero">
                        <organization/>
                        </author>
                    <author initials="Michael;" surname="Till Beck" fullname="Michael Till Beck">
                        <organization/>
                        </author>
                     <author initials="MM" surname="Karim" fullname="M. M. Karim">
                        <organization/>
                        </author>
                    <author initials="Hermann" surname="De Meer" fullname="Hermann de Meer">
                        <organization/>
                        </author>
                    <author initials="Xavier" surname="Hesselbach" fullname="Xavier Hesselbach">
                        <organization/>
                        </author>
                    <date year="2020" month="April"/>
                 </front>
            </reference>

            <reference anchor="ENViNE2021" target="https://ieeexplore.ieee.org/document/9415185">
                <front>
                    <title>Ego Network-Based Virtual Network Embedding Scheme for Revenue Maximization</title>
                    <seriesInfo name="DOI" value="10.1109/ICAIIC51459.2021.9415185"/>
                    <author initials="IHSAN" surname="ULLAH" fullname="Ihsan Ullah">
                        <organization/>
                    </author>
                    <author initials="Hyun-Kyo" surname="Lim" fullname="Hyun-Kyo Lim">
                        <organization/>
                    </author>
                    <author initials="Youn-Hee" surname="Han" fullname="Youn-Hee Han">
                        <organization/>
                    </author>
                   <date year="2021" month="April"/>
                    </front>
            </reference>

            <reference anchor="SUR2018" target="https://link.springer.com/article/10.1186/s13174-018-0087-2">
                <front>
                    <title>A Comprehensive survey on Machine Learning for Networking: Evolution, Applications and Research Opportunities</title>
                    <seriesInfo name="DOI" value="10.1186/s13174-018-0087-2"/>
                    <author initials="Raouf" surname="Boutaba" fullname="Raouf Boutaba">
                        <organization/>
                    </author>
                    <author initials="Mohammad" surname="Salahuddin" fullname="Mohammad A. Salahuddin">
                        <organization/>
                    </author>
                    <author initials="Noura" surname="Limam" fullname=" Noura Limam">
                        <organization/>
                    </author>
                    <author initials="Sara" surname="Ayoubi" fullname=" Sara Ayoubi">
                        <organization/>
                    </author>
                    <author initials="Nashid" surname="Shahriar" fullname="Nashid Shahriar">
                        <organization/>
                    </author>
                    <author initials="Felipe" surname="Estrada-Solano" fullname="Felipe Estrada-Solano">
                        <organization/>
                    </author>
                    <author initials="Oscar" surname="M. Caicedo " fullname="Oscar M. Caicedo ">
                        <organization/>
                    </author>
                   <date year="2018" month="June"/>
                    </front>
            </reference>

             <reference anchor="DVNEGCN2021" target="https://ieeexplore.ieee.org/document/9475485">
                <front>
                    <title>Dynamic Virtual Network Embedding Algorithm based on Graph Convolution Neural Network and Reinforcement Learning</title>
                    <seriesInfo name="DOI" value="10.1109/JIOT.2021.3095094"/>
                    <author initials="Peiying" surname="Zhang" fullname="Peiying Zhang">
                        <organization/>
                    </author>
                    <author initials="Chao" surname="Wang" fullname="Chao Wang">
                        <organization/>
                    </author>
                    <author initials="NeeraJ" surname="Kumar" fullname="Neeraj Kumar">
                        <organization/>
                    </author>
                    <author initials="Weishan" surname="Zhang," fullname=" Weishan Zhang">
                        <organization/>
                    </author>
                    <author initials="Lei" surname="Liu" fullname="Lei Liu">
                        <organization/>
                    </author>

                   <date year="2021" month="July"/>
                    </front>
            </reference>

               <reference anchor="MLCNM2018" target="https://ieeexplore.ieee.org/document/8255757">
                <front>
                    <title>Machine Learning for Cognitive Network Management</title>
                    <seriesInfo name="DOI" value="10.1109/MCOM.2018.1700560"/>
                    <author initials="Sara" surname="Ayoubi" fullname="Sara Ayoubi">
                        <organization/>
                    </author>
                    <author initials="Limam" surname="Noura" fullname="Noura Limam">
                        <organization/>
                    </author>
                    <author initials="Mohammad" surname="Salahuddin" fullname="Mohammad A. Salahuddin">
                        <organization/>
                    </author>
                    <author initials="Nashid" surname="Shahriar" fullname="Nashid Shahriar">
                        <organization/>
                    </author>
                    <author initials="NRaouf" surname="Boutaba" fullname="Raouf Boutaba">
                        <organization/>
                    </author>
                    <author initials="Felipe" surname="Estrada-Solano" fullname="Felipe Estrada-Solano">
                        <organization/>
                    </author>
                    <author initials="Oscar" surname="M. Caicedo " fullname="Oscar M. Caicedo ">
                        <organization/>
                    </author>
                   <date year="2018" month="Jan"/>
                    </front>
            </reference>

        <reference anchor="MVNNML2021" target="https://www.semanticscholar.org/paper/Managing-Virtualized-Networks-and-Services-with-Boutaba-Shahriar/48b8fc73c1609d4632d7db5e67e373a62a3cc1f6">
                <front>
                    <title>Managing Virtualized Networks and Services with Machine Learning</title>
                    <seriesInfo name="DOI" value="48b8fc73c1609d4632d7db5e67e373a62a3cc1f6"/>
                    <author initials="Raouf" surname="Boutaba" fullname="Raouf Boutaba">
                        <organization/>
                    </author>
                    <author initials="Nashid" surname="Shahriar" fullname="Nashid Shahriar">
                        <organization/>
                    </author>
                    <author initials="Mohammad" surname="A" fullname="Salahuddin">
                        <organization/>
                    </author>
                    <author initials="Noura" surname="Limam" fullname="Limam">
                        <organization/>
                    </author>
                    <date year="2021" month="Jan"/>
                    </front>
            </reference>

            <reference anchor="VNEQS2021" target="https://link.springer.com/article/10.1007/s00607-020-00883-w">
                <front>
                    <title>VNE Solution for Network Differentiated QoS and Security Requirements: From the Perspective of Deep Reinforcement Learning</title>
                    <seriesInfo name="DOI" value="10.1007/s00607-020-00883-w"/>
                    <author initials="Chao" surname="Wang" fullname="Chao Wang">
                        <organization/>
                    </author>
                    <author initials=" Ranbir Singh" surname="Batth" fullname=" Ranbir Singh Batth">
                        <organization/>
                    </author>
                    <author initials="Peiying" surname="Zhang" fullname="Peiying Zhang">
                        <organization/>
                    </author>
                    <author initials="Gagangeet" surname="Aujla" fullname="Gagangeet Singh Aujla">
                        <organization/>
                    </author>
                     <author initials="Youxiang" surname="Duan " fullname="Youxiang Duan ">
                        <organization/>
                    </author>
                     <author initials="Lihua" surname="Ren" fullname="Lihua Ren ">
                        <organization/>
                    </author>
                    <date year="2021" month="Jan"/>
                    </front>
            </reference>


            <reference anchor="NRRL2020" target="https://ieeexplore.ieee.org/document/9109671">
                <front>
                    <title>Network Resource Allocation Strategy Based on Deep Reinforcement Learning</title>
                    <seriesInfo name="DOI" value="10.1109/OJCS.2020.3000330"/>
                    <author initials="" surname="" fullname="Shidong Zhang; Chao Wang; Junsan Zhang; Youxiang Duan; Xinhong You;  Peiying Zhang">
                        <organization/>
                    </author>

                    <date year="2020" month="June"/>
                    </front>
            </reference>

             <reference anchor="MVNE2020" target="https://doi.org/10.1002/cpe.6020">
                <front>
                    <title>Modeling on Virtual Network Embedding using Reinforcement Learning</title>
                    <seriesInfo name="DOI" value="10.1002/cpe.6020"/>
                    <author initials="" surname="" fullname="Cong Wang, Fanghui Zheng, Guangcong Zheng, Sancheng Peng, Zejie Tian, Yujia Guo, Guorui Li, Ying Yuan">
                        <organization/>
                    </author>

                    <date year="2020" month="Sep"/>
                    </front>
            </reference>

            <reference anchor="CDVNE2020" target="https://ieeexplore.ieee.org/document/8982091">
                <front>
                    <title>A Continuous-Decision Virtual Network Embedding Scheme Relying on Reinforcement Learning</title>
                    <seriesInfo name="DOI" value="10.1109/TNSM.2020.2971543"/>
                    <author initials="" surname="" fullname="Haipeng Yao; Sihan Ma; Jingjing Wang; Peiying Zhang; Chunxiao Jiang; Song Guo">
                        <organization/>
                    </author>

                    <date year="2020" month="Feb"/>
                    </front>
            </reference>

            <reference anchor="PPRL2020" target="https://ieeexplore.ieee.org/document/8982091">
                <front>
                    <title>A Privacy-Preserving Reinforcement Learning Algorithm for Multi-Domain Virtual Network Embedding</title>
                    <seriesInfo name="DOI" value="10.1109/TNSM.2020.2971543"/>
                    <author initials="" surname="" fullname="Davide Andreoletti; Tanya Velichkova; Giacomo Verticale; Massimo Tornatore; Silvia Giordano">
                        <organization/>
                    </author>

                    <date year="2020" month="Sep"/>
                    </front>
            </reference>

         <reference anchor="RLVNEWSN2020" target="https://ieeexplore.ieee.org/document/9253442">
                <front>
                    <title>Reinforcement Learning for Virtual Network Embedding in Wireless Sensor Networks</title>
                    <seriesInfo name="DOI" value="10.1109/WiMob50308.2020.9253442"/>
                    <author initials="" surname="" fullname="Haitham Afifi; Holger Karl">
                        <organization/>
                    </author>

                    <date year="2020" month="Oct"/>
                    </front>
            </reference>

            <reference anchor="QLDC2019" target="https://link.springer.com/article/10.1007/s00521-019-04376-6">
                <front>
                    <title>A Q-Learning-Based Approach for Virtual Network Embedding in Data Center</title>
                    <seriesInfo name="DOI" value="10.1007/s00521-019-04376"/>
                    <author initials="" surname="" fullname="Ying Yuan, Zejie Tian, Cong Wang, Fanghui Zheng, Yanxia Lv ">
                        <organization/>
                    </author>
                    <date year="2019" month="July"/>
                    </front>
            </reference>

           <reference anchor="VNFFG2020" target="https://www.rfc-editor.org/info/rfc2629">
                <front>
                    <title>Evolutionary Actor-Multi-Critic Model for VNF-FG Embedding</title>
                    <seriesInfo name="DOI" value="10.1109/CCNC46108.2020.9045434"/>
                    <seriesInfo name="RFC" value="1129"/>
                    <author initials="P.T" surname="Anh Quang" fullname="Pham Tran Anh Quang">
                        <organization/>
                    </author>
                    <author initials="Y." surname="Hadjadj-Aoul" fullname="Yassine Hadjadj-Aoul">
                        <organization/>
                    </author>
                    <author initials="A." surname="Outtagarts" fullname="Abdelkader Outtagarts">
                        <organization/>
                    </author>
                    <date year="2020" month="Jan"/>
                  </front>
            </reference>

            <reference anchor="VNEGCN2020" target="https://ieeexplore.ieee.org/document/9060910">
                <front>
                    <title>Automatic Virtual Network Embedding: A Deep Reinforcement Learning Approach With Graph Convolutional Networks</title>
                    <seriesInfo name="DOI" value="10.1109/JSAC.2020.2986662"/>
                    <seriesInfo name="RFC" value="1129"/>
                    <author initials="Z." surname="Yan" fullname="Zhongxia Yan">
                        <organization/>
                    </author>
                    <author initials="J." surname="Ge" fullname="Jingguo Ge">
                        <organization/>
                    </author>
                    <author initials="Y." surname="Wu" fullname="Yulei Wu">
                        <organization/>
                    </author>
                    <author initials="L." surname="Li" fullname="L. Li">
                        <organization/>
                    </author>
                    <author initials="T." surname="Li" fullname="T. Li">
                        <organization/>
                    </author>
                    <date year="2020" month="April"/>
                                    </front>
            </reference>

             <reference anchor="NFVDeep2019" target="https://doi.org/10.1145/3326285.3329056">
                <front>
                    <title>NFVdeep: Adaptive Online Service Function Chain Deployment with Deep Reinforcement Learning</title>
                    <seriesInfo name="DOI" value="10.1145/3326285.3329056"/>
                    <seriesInfo name="RFC" value="1129"/>
                    <author initials="Y." surname="Xiao" fullname="Y. Xiao">
                        <organization/>
                    </author>
                    <author initials="Q." surname="Zhang" fullname="Q. Zhang">
                        <organization/>
                    </author>
                    <author initials="F." surname="Liu" fullname="F. Liu">
                        <organization/>
                    </author>
                    <author initials="J." surname="Wang" fullname="J. Wang">
                        <organization/>
                    </author>
                    <author initials="M." surname="Zhao" fullname="M. Zhao">
                        <organization/>
                    </author>

                    <author initials="Z." surname="Zhang" fullname="Z. Zhang">
                        <organization/>
                    </author>

                    <author initials="J." surname="Zhang" fullname="J. Zhang">
                        <organization/>
                    </author>
                    <date year="2019" month="June"/>
                    </front>
            </reference>

            <reference anchor="DeepViNE2019" target="https://ieeexplore.ieee.org/document/8845171">
                <front>
                    <title>DeepViNE: Virtual Network Embedding with Deep Reinforcement Learning</title>
                    <seriesInfo name="DOI" value="10.1109/INFCOMW.2019.8845171"/>
                    <seriesInfo name="RFC" value="3552"/>
                    <seriesInfo name="BCP" value="72"/>
                    <author initials="M." surname="Dolati" fullname="M. Dolati">
                        <organization/>
                    </author>
                    <author initials="S. B." surname="Hassanpour" fullname="S. B. Hassanpour">
                        <organization/>
                    </author>
                    <author initials="M." surname="Ghaderi" fullname="M. Ghaderi">
                        <organization/>
                    </author>
                    <author initials="A." surname="Khonsari" fullname="A. Khonsari">
                        <organization/>
                    </author>
                    <date year="2019" month="Sep"/>
                   </front>
            </reference>

            <reference anchor="VNETD2019" target="https://doi.org/10.1016/j.comnet.2019.05.004">
                <front>
                    <title>VNE-TD: A Virtual Network Embedding Algorithm Based on Temporal-Difference Learning</title>
                    <seriesInfo name="DOI" value="10.1016/j.comnet.2019.05.004"/>
                    <seriesInfo name="RFC" value="3552"/>
                    <seriesInfo name="BCP" value="72"/>
                    <author initials="S." surname="Wang" fullname="S. Wang">
                        <organization/>
                    </author>
                    <author initials="J." surname="Bi" fullname="J. Bi">
                        <organization/>
                    </author>
                    <author initials="A." surname="V.Vasilakos" fullname="A. V. Vasilakos">
                        <organization/>
                    </author>
                    <author initials="Q." surname="Fan" fullname="Q. Fan">
                        <organization/>
                    </author>
                    <date year="2019" month="Oct"/>
                    </front>
            </reference>

             <reference anchor="RDAM2018" target="https://ieeexplore.ieee.org/document/8469054">
                <front>
                    <title>RDAM: A Reinforcement Learning Based Dynamic Attribute Matrix Representation for Virtual Network Embedding</title>
                    <seriesInfo name="DOI" value="10.1109/TETC.2018.2871549"/>
                    <author initials="" surname="" fullname="Haipeng Yao; Bo Zhang; Peiying Zhang; Sheng Wu; Chunxiao Jiang; Song Guo">
                        <organization/>
                    </author>

                    <date year="2018" month="Sep"/>
                    </front>
            </reference>

            <reference anchor="MOQL2018" target="https://jwcn-eurasipjournals.springeropen.com/articles/10.1186/s13638-018-1170-x">
                <front>
                    <title>Multi-Objective Virtual Network Embedding Algorithm Based on Q-learning and Curiosity-Driven</title>
                    <seriesInfo name="DOI" value="10.1109/TETC.2018.2871549"/>
                    <author initials="" surname="" fullname="Mengyang He, Lei Zhuang, Shuaikui Tian, Guoqing Wang, Kunli Zhang ">
                        <organization/>
                    </author>
                    <date year="2018" month="June"/>
                    </front>
            </reference>

            <reference anchor="ZTORCH2018" target="https://ieeexplore.ieee.org/document/8450000">
                <front>
                    <title>Z-TORCH: An Automated NFV Orchestration and Monitoring Solution</title>
                    <seriesInfo name="DOI" value="10.1109/TNSM.2018.2867827"/>
                    <seriesInfo name="RFC" value="3552"/>
                    <seriesInfo name="BCP" value="72"/>
                    <author initials="V." surname="Sciancalepore" fullname="V. Sciancalepore">
                        <organization/>
                    </author>
                    <author initials="X." surname="Chen" fullname="X. Chen">
                        <organization/>
                    </author>
                    <author initials="F. Z." surname="Yousaf" fullname="F. Z. Yousaf">
                        <organization/>
                    </author>
                    <author initials="X." surname="Costa-Perez" fullname="X. Costa-Perez">
                        <organization/>
                    </author>
                    <date year="2018" month="August"/>
                    </front>
            </reference>

            <reference anchor="NeuroViNE2018" target="https://ieeexplore.ieee.org/document/8486263">
                <front>
                    <title>NeuroViNE: A Neural Preprocessor for Your Virtual Network Embedding Algorithm</title>
                    <seriesInfo name="DOI" value=" 10.1109/INFOCOM.2018.8486263"/>
                    <author initials="" surname="" fullname="Andreas Blenk; Patrick Kalmbach; Johannes Zerwas; Michael Jarschel; Stefan Schmid; Wolfgang Kellerer">
                        <organization/>
                    </author>
                    <date year="2018" month="June"/>
                    </front>
            </reference>

             <reference anchor="QVNE2020" target="https://link.springer.com/article/10.1007/s00521-019-04376-6">
                <front>
                    <title>A Q-learning-Based Approach for Virtual Network Embedding in Data Center</title>
                    <seriesInfo name="DOI" value="10.1007/s00521-019-04376-6"/>
                    <author initials="Y." surname="Yuan" fullname="Y. Yuan">
                        <organization/>
                    </author>
                    <author initials="Z." surname="Tian" fullname="Z. Tian">
                        <organization/>
                    </author>
                    <author initials="C." surname="Wang" fullname="C. Wang">
                        <organization/>
                    </author>
                    <author initials="F." surname="Zheng" fullname="F. Zheng">
                        <organization/>
                    </author>
                    <author initials="Y." surname="Lv" fullname="Y. Lv">
                     <organization/>
                    </author>
                    <date year="2020" month="July"/>
                   </front>
            </reference>

             <reference anchor="RFC7364" target="https://https://datatracker.ietf.org/doc/rfc7364/">
                <front>
                    <title>Problem Statement: Overlays for Network Virtualization</title>
                        <author initials="P.T" surname="Thomas" fullname="Thomas Narten">
                        <organization/>
                    </author>
                    <author initials="Y." surname="Eric" fullname="Eric Gray">
                        <organization/>
                    </author>
                    <author initials="A." surname="David" fullname="David Black">
                        <organization/>
                    </author>
                     <author initials="A." surname="Luyuan" fullname="Luyuan Fang">
                        <organization/>
                    </author>
                    <author initials="A." surname="Larry" fullname="Larry Kreeger">
                        <organization/>
                    </author>
                    <author initials="A." surname="Maria Napierala" fullname="Maria Napierala">
                        <organization/>
                    </author>
                    <date year="2015" month="October"/>
                   </front>
            </reference>
            <reference anchor="RFC3552" target="https://www.rfc-editor.org/info/rfc3552">
                <front>
                    <title>Guidelines for Writing RFC Text on Security Considerations</title>
                    <seriesInfo name="DOI" value="10.17487/RFC3552"/>
                    <seriesInfo name="RFC" value="3552"/>
                    <seriesInfo name="BCP" value="72"/>
                    <author initials="E." surname="Rescorla" fullname="E. Rescorla">
                        <organization/>
                    </author>
                    <author initials="B." surname="Korver" fullname="B. Korver">
                        <organization/>
                    </author>
                    <date year="2003" month="July"/>
                   </front>
            </reference>

            <reference anchor="RFC5226" target="https://www.rfc-editor.org/info/rfc5226">
                <front>
                    <title>Guidelines for Writing an IANA Considerations Section in RFCs</title>
                    <seriesInfo name="DOI" value="10.17487/RFC5226"/>
                    <seriesInfo name="RFC" value="5226"/>
                    <author initials="T." surname="Narten" fullname="T. Narten">
                        <organization/>
                    </author>
                    <author initials="H." surname="Alvestrand" fullname="H. Alvestrand">
                        <organization/>
                    </author>
                    <date year="2008" month="May"/>
                    <abstract>
                        <t>Many protocols make use of identifiers consisting of constants and other well-known
                            values. Even after a protocol has been defined and deployment has begun, new values may
                            need to be assigned (e.g., for a new option type in DHCP, or a new encryption or
                            authentication transform for IPsec). To ensure that such quantities have consistent
                            values and interpretations across all implementations, their assignment must be
                            administered by a central authority. For IETF protocols, that role is provided by the
                            Internet Assigned Numbers Authority (IANA).
                        </t>
                        <t>In order for IANA to manage a given namespace prudently, it needs guidelines describing
                            the conditions under which new values can be assigned or when modifications to existing
                            values can be made. If IANA is expected to play a role in the management of a namespace,
                            IANA must be given clear and concise instructions describing that role. This document
                            discusses issues that should be considered in formulating a policy for assigning values
                            to a namespace and provides guidelines for authors on the specific text that must be
                            included in documents that place demands on IANA.
                        </t>
                        <t>This document obsoletes RFC 2434. This document specifies an Internet Best Current
                            Practices for the Internet Community, and requests discussion and suggestions for
                            improvements.
                        </t>
                    </abstract>
                </front>
            </reference>
            <!-- A reference written by by an organization not a person. -->
        </references>
<!--        <section anchor="app-additionala" numbered="true" toc="default">-->
<!--            <name>Acknowledgments</name>-->
<!--            <t>This becomes an Appendix.</t>-->
<!--        </section>-->
<!--        <section anchor="app-additionalb" numbered="true" toc="default">-->
<!--            <name>Contributors</name>-->
<!--            <t>This becomes an Appendix.</t>-->
<!--        </section>-->
        <!-- Change Log

    v00 2006-03-15  EBD   Initial version

    v01 2006-04-03  EBD   Moved PI location back to position 1 -
                         v3.1 of XMLmind is better with them at this location.
    v02 2007-03-07  AH    removed extraneous nested_list attribute,
                         other minor corrections
    v03 2007-03-09  EBD   Added comments on null IANA sections and fixed heading capitalization.
                         Modified comments around figure to reflect non-implementation of
                         figure indent control.  Put in reference using anchor="DOMINATION".
                         Fixed up the date specification comments to reflect current truth.
    v04 2007-03-09 AH     Major changes: shortened discussion of PIs,
                         added discussion of rfc include.
    v05 2007-03-10 EBD    Added preamble to C program example to tell about ABNF and alternative
                         images. Removed meta-characters from comments (causes problems).

    v06 2010-04-01 TT     Changed ipr attribute values to latest ones. Changed date to
                         year only, to be consistent with the comments. Updated the
                         IANA guidelines reference from the I-D to the finished RFC.
    v07 2020-01-21 HL    Converted the template to use XML schema version 3.
        -->
    </back>
</rfc>
