Browse Source

ospf: Convert MAX_AGE LSA list to tree

Store the MaxAge LSA list in a tree instead of a linked list for efficient access.
Walking the list can be quite inefficient in some large systems and under certain tests.

ospfd maintains the list of LSA's that have been MaxAge'd out in a separate
linked list for removal by a remover/walker thread. When a new LSA is to be
installed, the old LSA is ejected and when it is ejected, the MaxAge LSA list
is traversed to ensure that the old LSA is also removed from this list if it
exists on this list.

When a large number (> 5K) MaxAge LSAs are bombarding the system, walking this
list takes a significant time causing timers to fire and actions to be taken
such as expiring neighbors due to expiry of DeadInterval (especially when timer
is really low, <= 12s), creating a spiral of instability.

By making this MaxAge LSA list be a tree, this problem is mitigated.

Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Reviewed-by: Ayan Banerjee <ayan@cumulusnetworks.com>
Reviewed-by: Scott Feldman <sfeldma@cumulusnetworks.com>
Reviewed-by: Shrijeet Mukherjee <shm@cumulusnetworks.com>
Signed-off-by: Scott Feldman <sfeldma@cumulusnetworks.com>
Dinesh Dutt 7 years ago
parent
commit
91e6a0e5ca
6 changed files with 83 additions and 32 deletions
  1. 43 9
      ospfd/ospf_lsa.c
  2. 12 9
      ospfd/ospf_lsdb.c
  3. 1 0
      ospfd/ospf_lsdb.h
  4. 14 9
      ospfd/ospf_vty.c
  5. 12 4
      ospfd/ospfd.c
  6. 1 1
      ospfd/ospfd.h

+ 43 - 9
ospfd/ospf_lsa.c

@@ -2828,7 +2828,7 @@ ospf_maxage_lsa_remover (struct thread *thread)
 {
   struct ospf *ospf = THREAD_ARG (thread);
   struct ospf_lsa *lsa;
-  struct listnode *node, *nnode;
+  struct route_node *rn;
   int reschedule = 0;
 
   ospf->t_maxage = NULL;
@@ -2839,8 +2839,13 @@ ospf_maxage_lsa_remover (struct thread *thread)
   reschedule = !ospf_check_nbr_status (ospf);
 
   if (!reschedule)
-    for (ALL_LIST_ELEMENTS (ospf->maxage_lsa, node, nnode, lsa))
+    for (rn = route_top(ospf->maxage_lsa); rn; rn = route_next(rn))
       {
+	if ((lsa = rn->info) == NULL)
+	  {
+	    continue;
+	  }
+
         if (lsa->retransmit_counter > 0)
           {
             reschedule = 1;
@@ -2893,13 +2898,22 @@ ospf_maxage_lsa_remover (struct thread *thread)
 void
 ospf_lsa_maxage_delete (struct ospf *ospf, struct ospf_lsa *lsa)
 {
-  struct listnode *n;
+  struct route_node *rn;
+  struct prefix_ls lsa_prefix;
 
-  if ((n = listnode_lookup (ospf->maxage_lsa, lsa)))
+  ls_prefix_set (&lsa_prefix, lsa);
+
+  if ((rn = route_node_lookup(ospf->maxage_lsa,
+			      (struct prefix *)&lsa_prefix)))
     {
-      list_delete_node (ospf->maxage_lsa, n);
-      UNSET_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE);
-      ospf_lsa_unlock (&lsa); /* maxage_lsa */
+      if (rn->info == lsa)
+	{
+	  UNSET_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE);
+	  ospf_lsa_unlock (&lsa); /* maxage_lsa */
+	  rn->info = NULL;
+	  route_unlock_node (rn); /* route_node_lookup */
+	}
+	  route_unlock_node (rn); /* route_node_lookup */
     }
 }
 
@@ -2911,6 +2925,9 @@ ospf_lsa_maxage_delete (struct ospf *ospf, struct ospf_lsa *lsa)
 void
 ospf_lsa_maxage (struct ospf *ospf, struct ospf_lsa *lsa)
 {
+  struct prefix_ls lsa_prefix;
+  struct route_node *rn;
+
   /* When we saw a MaxAge LSA flooded to us, we put it on the list
      and schedule the MaxAge LSA remover. */
   if (CHECK_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE))
@@ -2921,8 +2938,25 @@ ospf_lsa_maxage (struct ospf *ospf, struct ospf_lsa *lsa)
       return;
     }
 
-  listnode_add (ospf->maxage_lsa, ospf_lsa_lock (lsa));
-  SET_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE);
+  ls_prefix_set (&lsa_prefix, lsa);
+  if ((rn = route_node_get (ospf->maxage_lsa,
+			    (struct prefix *)&lsa_prefix)) != NULL)
+    {
+      if (rn->info != NULL)
+	{
+	  route_unlock_node (rn);
+	}
+      else
+	{
+	  rn->info = ospf_lsa_lock(lsa);
+	  SET_FLAG(lsa->flags, OSPF_LSA_IN_MAXAGE);
+	}
+    }
+  else
+    {
+      zlog_err("Unable to allocate memory for maxage lsa\n");
+      assert(0);
+    }
 
   if (IS_DEBUG_OSPF (lsa, LSA_FLOODING))
     zlog_debug ("LSA[%s]: MaxAge LSA remover scheduled.", dump_lsa_key (lsa));

+ 12 - 9
ospfd/ospf_lsdb.c

@@ -72,13 +72,16 @@ ospf_lsdb_cleanup (struct ospf_lsdb *lsdb)
     route_table_finish (lsdb->type[i].db);
 }
 
-static void
-lsdb_prefix_set (struct prefix_ls *lp, struct ospf_lsa *lsa)
+void
+ls_prefix_set (struct prefix_ls *lp, struct ospf_lsa *lsa)
 {
-  lp->family = 0;
-  lp->prefixlen = 64;
-  lp->id = lsa->data->id;
-  lp->adv_router = lsa->data->adv_router;
+  if (lp && lsa && lsa->data)
+    {
+      lp->family = 0;
+      lp->prefixlen = 64;
+      lp->id = lsa->data->id;
+      lp->adv_router = lsa->data->adv_router;
+    }
 }
 
 static void
@@ -115,7 +118,7 @@ ospf_lsdb_add (struct ospf_lsdb *lsdb, struct ospf_lsa *lsa)
   struct route_node *rn;
 
   table = lsdb->type[lsa->data->type].db;
-  lsdb_prefix_set (&lp, lsa);
+  ls_prefix_set (&lp, lsa);
   rn = route_node_get (table, (struct prefix *)&lp);
   
   /* nothing to do? */
@@ -167,7 +170,7 @@ ospf_lsdb_delete (struct ospf_lsdb *lsdb, struct ospf_lsa *lsa)
   
   assert (lsa->data->type < OSPF_MAX_LSA);
   table = lsdb->type[lsa->data->type].db;
-  lsdb_prefix_set (&lp, lsa);
+  ls_prefix_set (&lp, lsa);
   if ((rn = route_node_lookup (table, (struct prefix *) &lp)))
     {
       if (rn->info == lsa)
@@ -218,7 +221,7 @@ ospf_lsdb_lookup (struct ospf_lsdb *lsdb, struct ospf_lsa *lsa)
   struct ospf_lsa *find;
 
   table = lsdb->type[lsa->data->type].db;
-  lsdb_prefix_set (&lp, lsa);
+  ls_prefix_set (&lp, lsa);
   rn = route_node_lookup (table, (struct prefix *) &lp);
   if (rn)
     {

+ 1 - 0
ospfd/ospf_lsdb.h

@@ -66,6 +66,7 @@ extern struct ospf_lsdb *ospf_lsdb_new (void);
 extern void ospf_lsdb_init (struct ospf_lsdb *);
 extern void ospf_lsdb_free (struct ospf_lsdb *);
 extern void ospf_lsdb_cleanup (struct ospf_lsdb *);
+extern void ls_prefix_set (struct prefix_ls *lp, struct ospf_lsa *lsa);
 extern void ospf_lsdb_add (struct ospf_lsdb *, struct ospf_lsa *);
 extern void ospf_lsdb_delete (struct ospf_lsdb *, struct ospf_lsa *);
 extern void ospf_lsdb_delete_all (struct ospf_lsdb *);

+ 14 - 9
ospfd/ospf_vty.c

@@ -4035,21 +4035,26 @@ show_ip_ospf_database_summary (struct vty *vty, struct ospf *ospf, int self)
 static void
 show_ip_ospf_database_maxage (struct vty *vty, struct ospf *ospf)
 {
-  struct listnode *node;
+  struct route_node *rn;
   struct ospf_lsa *lsa;
 
   vty_out (vty, "%s                MaxAge Link States:%s%s",
            VTY_NEWLINE, VTY_NEWLINE, VTY_NEWLINE);
 
-  for (ALL_LIST_ELEMENTS_RO (ospf->maxage_lsa, node, lsa))
+  for (rn = route_top (ospf->maxage_lsa); rn; rn = route_next (rn))
     {
-      vty_out (vty, "Link type: %d%s", lsa->data->type, VTY_NEWLINE);
-      vty_out (vty, "Link State ID: %s%s",
-               inet_ntoa (lsa->data->id), VTY_NEWLINE);
-      vty_out (vty, "Advertising Router: %s%s",
-               inet_ntoa (lsa->data->adv_router), VTY_NEWLINE);
-      vty_out (vty, "LSA lock count: %d%s", lsa->lock, VTY_NEWLINE);
-      vty_out (vty, "%s", VTY_NEWLINE);
+      struct ospf_lsa *lsa;
+
+      if ((lsa = rn->info) != NULL)
+	{
+	  vty_out (vty, "Link type: %d%s", lsa->data->type, VTY_NEWLINE);
+	  vty_out (vty, "Link State ID: %s%s",
+		   inet_ntoa (lsa->data->id), VTY_NEWLINE);
+	  vty_out (vty, "Advertising Router: %s%s",
+		   inet_ntoa (lsa->data->adv_router), VTY_NEWLINE);
+	  vty_out (vty, "LSA lock count: %d%s", lsa->lock, VTY_NEWLINE);
+	  vty_out (vty, "%s", VTY_NEWLINE);
+	}
     }
 }
 

+ 12 - 4
ospfd/ospfd.c

@@ -201,7 +201,7 @@ ospf_new (void)
 
   /* MaxAge init. */
   new->maxage_delay = OSFP_LSA_MAXAGE_REMOVE_DELAY_DEFAULT;
-  new->maxage_lsa = list_new ();
+  new->maxage_lsa = route_table_init();
   new->t_maxage_walker =
     thread_add_timer (master, ospf_lsa_maxage_walker,
                       new, OSPF_LSA_MAXAGE_CHECK_INTERVAL);
@@ -502,10 +502,18 @@ ospf_finish_final (struct ospf *ospf)
   ospf_lsdb_delete_all (ospf->lsdb);
   ospf_lsdb_free (ospf->lsdb);
 
-  for (ALL_LIST_ELEMENTS (ospf->maxage_lsa, node, nnode, lsa))
-    ospf_lsa_unlock (&lsa); /* maxage_lsa */
+  for (rn = route_top (ospf->maxage_lsa); rn; rn = route_next (rn))
+    {
+      struct ospf_lsa *lsa;
 
-  list_delete (ospf->maxage_lsa);
+      if ((lsa = rn->info) != NULL)
+	{
+	  ospf_lsa_unlock (&lsa);
+	  rn->info = NULL;
+	}
+      route_unlock_node (rn);
+    }
+  route_table_finish (ospf->maxage_lsa);
 
   if (ospf->old_table)
     ospf_route_table_free (ospf->old_table);

+ 1 - 1
ospfd/ospfd.h

@@ -248,7 +248,7 @@ struct ospf
   /* Time stamps. */
   struct timeval ts_spf;		/* SPF calculation time stamp. */
 
-  struct list *maxage_lsa;              /* List of MaxAge LSA for deletion. */
+  struct route_table *maxage_lsa;       /* List of MaxAge LSA for deletion. */
   int redistribute;                     /* Num of redistributed protocols. */
 
   /* Threads. */