Browse Source

zebra: atomic FIB updates

This commit updates the kernel API so that route changes are
atomically updated using change/replaces messages instead
of first sending a withdraw followed with update.

Same for zclient updates, changes are sent as single ADD
instead of DELETE + ADD.

Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Timo Teräs 3 years ago
parent
commit
0abf6796c3
6 changed files with 171 additions and 307 deletions
  1. 1 13
      zebra/kernel_null.c
  2. 4 4
      zebra/kernel_socket.c
  3. 1 8
      zebra/rt.h
  4. 18 22
      zebra/rt_netlink.c
  5. 26 48
      zebra/rt_socket.c
  6. 121 212
      zebra/zebra_rib.c

+ 1 - 13
zebra/kernel_null.c

@@ -30,19 +30,7 @@
 #include "zebra/connected.h"
 #include "zebra/rib.h"
 
-int kernel_add_ipv4 (struct prefix *a, struct rib *b) { return 0; }
-#ifdef HAVE_SYS_WEAK_ALIAS_PRAGMA
-#pragma weak kernel_delete_ipv4 = kernel_add_ipv4
-#else
-int kernel_delete_ipv4 (struct prefix *a, struct rib *b) { return 0; }
-#endif
-
-int kernel_add_ipv6 (struct prefix *a, struct rib *b) { return 0; }
-#ifdef HAVE_SYS_WEAK_ALIAS_PRAGMA
-#pragma weak kernel_delete_ipv6 = kernel_add_ipv6
-#else
-int kernel_delete_ipv6 (struct prefix *a, struct rib *b) { return 0; }
-#endif
+int kernel_route_rib (struct prefix *a, struct rib *old, struct rib *new) { return 0; }
 
 int kernel_add_route (struct prefix_ipv4 *a, struct in_addr *b, int c, int d)
 { return 0; }

+ 4 - 4
zebra/kernel_socket.c

@@ -861,7 +861,7 @@ rtm_read (struct rt_msghdr *rtm)
     return;
 #endif
 
-  if ((rtm->rtm_type == RTM_ADD) && ! (flags & RTF_UP))
+  if ((rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) && ! (flags & RTF_UP))
     return;
 
   /* This is connected route. */
@@ -1072,14 +1072,14 @@ rtm_write (int message,
 
   ifp = if_lookup_by_index (index);
 
-  if (gate && message == RTM_ADD)
+  if (gate && (message == RTM_ADD || message == RTM_CHANGE))
     msg.rtm.rtm_flags |= RTF_GATEWAY;
 
   /* When RTF_CLONING is unavailable on BSD, should we set some
    * other flag instead?
    */
 #ifdef RTF_CLONING
-  if (! gate && message == RTM_ADD && ifp &&
+  if (! gate && (message == RTM_ADD || message == RTM_CHANGE) && ifp &&
       (ifp->flags & IFF_POINTOPOINT) == 0)
     msg.rtm.rtm_flags |= RTF_CLONING;
 #endif /* RTF_CLONING */
@@ -1104,7 +1104,7 @@ rtm_write (int message,
 
   if (mask)
     msg.rtm.rtm_addrs |= RTA_NETMASK;
-  else if (message == RTM_ADD) 
+  else if (message == RTM_ADD || message == RTM_CHANGE)
     msg.rtm.rtm_flags |= RTF_HOST;
 
   /* Tagging route with flags */

+ 1 - 8
zebra/rt.h

@@ -27,16 +27,9 @@
 #include "if.h"
 #include "zebra/rib.h"
 
-extern int kernel_add_ipv4 (struct prefix *, struct rib *);
-extern int kernel_delete_ipv4 (struct prefix *, struct rib *);
+extern int kernel_route_rib (struct prefix *, struct rib *, struct rib *);
 extern int kernel_add_route (struct prefix_ipv4 *, struct in_addr *, int, int);
 extern int kernel_address_add_ipv4 (struct interface *, struct connected *);
 extern int kernel_address_delete_ipv4 (struct interface *, struct connected *);
 
-#ifdef HAVE_IPV6
-extern int kernel_add_ipv6 (struct prefix *, struct rib *);
-extern int kernel_delete_ipv6 (struct prefix *, struct rib *);
-
-#endif /* HAVE_IPV6 */
-
 #endif /* _ZEBRA_RT_H */

+ 18 - 22
zebra/rt_netlink.c

@@ -1607,8 +1607,7 @@ _netlink_route_debug(
 
 /* Routing table change via netlink interface. */
 static int
-netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib,
-                         int family)
+netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib)
 {
   int bytelen;
   struct sockaddr_nl snl;
@@ -1616,6 +1615,7 @@ netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib,
   int recursing;
   int nexthop_num;
   int discard;
+  int family = PREFIX_FAMILY(p);
   const char *routedesc;
 
   struct
@@ -1632,7 +1632,7 @@ netlink_route_multipath (int cmd, struct prefix *p, struct rib *rib,
   bytelen = (family == AF_INET ? 4 : 16);
 
   req.n.nlmsg_len = NLMSG_LENGTH (sizeof (struct rtmsg));
-  req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
+  req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
   req.n.nlmsg_type = cmd;
   req.r.rtm_family = family;
   req.r.rtm_table = rib->table;
@@ -1803,30 +1803,26 @@ skip:
 }
 
 int
-kernel_add_ipv4 (struct prefix *p, struct rib *rib)
+kernel_route_rib (struct prefix *p, struct rib *old, struct rib *new)
 {
-  return netlink_route_multipath (RTM_NEWROUTE, p, rib, AF_INET);
-}
+  int ret;
 
-int
-kernel_delete_ipv4 (struct prefix *p, struct rib *rib)
-{
-  return netlink_route_multipath (RTM_DELROUTE, p, rib, AF_INET);
-}
+  if (!old && new)
+    return netlink_route_multipath (RTM_NEWROUTE, p, new);
+  if (old && !new)
+    return netlink_route_multipath (RTM_DELROUTE, p, old);
 
-#ifdef HAVE_IPV6
-int
-kernel_add_ipv6 (struct prefix *p, struct rib *rib)
-{
-  return netlink_route_multipath (RTM_NEWROUTE, p, rib, AF_INET6);
-}
+  /* Replace, can be done atomically if metric does not change;
+   * netlink uses [prefix, tos, priority] to identify prefix */
+  if (old->metric == new->metric)
+    return netlink_route_multipath (RTM_NEWROUTE, p, new);
 
-int
-kernel_delete_ipv6 (struct prefix *p, struct rib *rib)
-{
-  return netlink_route_multipath (RTM_DELROUTE, p, rib, AF_INET6);
+  /* Add + delete so the prefix does not disappear temporarily */
+  ret = netlink_route_multipath (RTM_NEWROUTE, p, new);
+  if (netlink_route_multipath (RTM_DELROUTE, p, old) < 0)
+    ret = -1;
+  return ret;
 }
-#endif /* HAVE_IPV6 */
 
 /* Interface address modification. */
 static int

+ 26 - 48
zebra/rt_socket.c

@@ -216,34 +216,6 @@ kernel_rtm_ipv4 (int cmd, struct prefix *p, struct rib *rib, int family)
   return 0; /*XXX*/
 }
 
-int
-kernel_add_ipv4 (struct prefix *p, struct rib *rib)
-{
-  int route;
-
-  if (zserv_privs.change(ZPRIVS_RAISE))
-    zlog (NULL, LOG_ERR, "Can't raise privileges");
-  route = kernel_rtm_ipv4 (RTM_ADD, p, rib, AF_INET);
-  if (zserv_privs.change(ZPRIVS_LOWER))
-    zlog (NULL, LOG_ERR, "Can't lower privileges");
-
-  return route;
-}
-
-int
-kernel_delete_ipv4 (struct prefix *p, struct rib *rib)
-{
-  int route;
-
-  if (zserv_privs.change(ZPRIVS_RAISE))
-    zlog (NULL, LOG_ERR, "Can't raise privileges");
-  route = kernel_rtm_ipv4 (RTM_DELETE, p, rib, AF_INET);
-  if (zserv_privs.change(ZPRIVS_LOWER))
-    zlog (NULL, LOG_ERR, "Can't lower privileges");
-
-  return route;
-}
-
 #ifdef HAVE_IPV6
 
 #ifdef SIN6_LEN
@@ -273,8 +245,7 @@ sin6_masklen (struct in6_addr mask)
 
 /* Interface between zebra message and rtm message. */
 static int
-kernel_rtm_ipv6_multipath (int cmd, struct prefix *p, struct rib *rib,
-			   int family)
+kernel_rtm_ipv6 (int cmd, struct prefix *p, struct rib *rib, int family)
 {
   struct sockaddr_in6 *mask;
   struct sockaddr_in6 sin_dest, sin_mask, sin_gate;
@@ -369,7 +340,7 @@ kernel_rtm_ipv6_multipath (int cmd, struct prefix *p, struct rib *rib,
 #if 0
       if (error)
 	{
-	  zlog_info ("kernel_rtm_ipv6_multipath(): nexthop %d add error=%d.",
+	  zlog_info ("kernel_rtm_ipv6(): nexthop %d add error=%d.",
 	    nexthop_num, error);
 	}
 #else
@@ -383,38 +354,45 @@ kernel_rtm_ipv6_multipath (int cmd, struct prefix *p, struct rib *rib,
   if (nexthop_num == 0)
     {
       if (IS_ZEBRA_DEBUG_KERNEL)
-	zlog_debug ("kernel_rtm_ipv6_multipath(): No useful nexthop.");
+	zlog_debug ("kernel_rtm_ipv6(): No useful nexthop.");
       return 0;
     }
 
   return 0; /*XXX*/
 }
 
+#endif
+
 int
-kernel_add_ipv6 (struct prefix *p, struct rib *rib)
+kernel_route_rib (struct prefix *p, struct rib *old, struct rib *new)
 {
-  int route;
-
-  if (zserv_privs.change(ZPRIVS_RAISE))
-    zlog (NULL, LOG_ERR, "Can't raise privileges");
-  route =  kernel_rtm_ipv6_multipath (RTM_ADD, p, rib, AF_INET6);
-  if (zserv_privs.change(ZPRIVS_LOWER))
-    zlog (NULL, LOG_ERR, "Can't lower privileges");
+  struct rib *rib;
+  int route = 0, cmd;
 
-  return route;
-}
+  if (!old && new)
+    cmd = RTM_ADD;
+  else if (old && !new)
+    cmd = RTM_DELETE;
+  else
+    cmd = RTM_CHANGE;
 
-int
-kernel_delete_ipv6 (struct prefix *p, struct rib *rib)
-{
-  int route;
+  rib = new ? new : old;
 
   if (zserv_privs.change(ZPRIVS_RAISE))
     zlog (NULL, LOG_ERR, "Can't raise privileges");
-  route =  kernel_rtm_ipv6_multipath (RTM_DELETE, p, rib, AF_INET6);
+
+  switch (PREFIX_FAMILY(p))
+    {
+    case AF_INET:
+      route = kernel_rtm_ipv4 (cmd, p, rib, AF_INET);
+      break;
+    case AF_INET6:
+      route = kernel_rtm_ipv6 (cmd, p, rib, AF_INET6);
+      break;
+    }
+
   if (zserv_privs.change(ZPRIVS_LOWER))
     zlog (NULL, LOG_ERR, "Can't lower privileges");
 
   return route;
 }
-#endif /* HAVE_IPV6 */

+ 121 - 212
zebra/zebra_rib.c

@@ -1103,49 +1103,8 @@ nexthop_active_update (struct route_node *rn, struct rib *rib, int set)
 
 
 
-static void
-rib_install_kernel (struct route_node *rn, struct rib *rib)
-{
-  int ret = 0;
-  struct nexthop *nexthop, *tnexthop;
-  rib_table_info_t *info = rn->table->info;
-  int recursing;
-
-  if (info->safi != SAFI_UNICAST)
-    {
-      for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
-        SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
-      return;
-    }
-
-  /*
-   * Make sure we update the FPM any time we send new information to
-   * the kernel.
-   */
-  zfpm_trigger_update (rn, "installing in kernel");
-  switch (PREFIX_FAMILY (&rn->p))
-    {
-    case AF_INET:
-      ret = kernel_add_ipv4 (&rn->p, rib);
-      break;
-#ifdef HAVE_IPV6
-    case AF_INET6:
-      ret = kernel_add_ipv6 (&rn->p, rib);
-      break;
-#endif /* HAVE_IPV6 */
-    }
-
-  /* This condition is never met, if we are using rt_socket.c */
-  if (ret < 0)
-    {
-      for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
-	UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
-    }
-}
-
-/* Uninstall the route from kernel. */
 static int
-rib_uninstall_kernel (struct route_node *rn, struct rib *rib)
+rib_update_kernel (struct route_node *rn, struct rib *old, struct rib *new)
 {
   int ret = 0;
   struct nexthop *nexthop, *tnexthop;
@@ -1154,31 +1113,31 @@ rib_uninstall_kernel (struct route_node *rn, struct rib *rib)
 
   if (info->safi != SAFI_UNICAST)
     {
-      for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
-        SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
-      return ret;
+      if (new)
+        for (ALL_NEXTHOPS_RO(new->nexthop, nexthop, tnexthop, recursing))
+          SET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
+      if (old)
+        for (ALL_NEXTHOPS_RO(old->nexthop, nexthop, tnexthop, recursing))
+          UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
+      return 0;
     }
 
   /*
    * Make sure we update the FPM any time we send new information to
    * the kernel.
    */
-  zfpm_trigger_update (rn, "uninstalling from kernel");
+  zfpm_trigger_update (rn, "updating in kernel");
 
-  switch (PREFIX_FAMILY (&rn->p))
-    {
-    case AF_INET:
-      ret = kernel_delete_ipv4 (&rn->p, rib);
-      break;
-#ifdef HAVE_IPV6
-    case AF_INET6:
-      ret = kernel_delete_ipv6 (&rn->p, rib);
-      break;
-#endif /* HAVE_IPV6 */
-    }
+  ret = kernel_route_rib (&rn->p, old, new);
 
-  for (ALL_NEXTHOPS_RO(rib->nexthop, nexthop, tnexthop, recursing))
-    UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
+  /* This condition is never met, if we are using rt_socket.c */
+  if (ret < 0 && new)
+      for (ALL_NEXTHOPS_RO(new->nexthop, nexthop, tnexthop, recursing))
+        UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
+
+  if (old)
+    for (ALL_NEXTHOPS_RO(old->nexthop, nexthop, tnexthop, recursing))
+      UNSET_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB);
 
   return ret;
 }
@@ -1196,7 +1155,7 @@ rib_uninstall (struct route_node *rn, struct rib *rib)
 
       redistribute_delete (&rn->p, rib);
       if (! RIB_SYSTEM_ROUTE (rib))
-	rib_uninstall_kernel (rn, rib);
+	rib_update_kernel (rn, rib, NULL);
       UNSET_FLAG (rib->flags, ZEBRA_FLAG_SELECTED);
     }
 }
@@ -1261,15 +1220,56 @@ rib_gc_dest (struct route_node *rn)
   return 1;
 }
 
+/* Check if 'alternate' RIB entry is better than 'current'. */
+static struct rib *
+rib_choose_best (struct rib *current, struct rib *alternate)
+{
+  if (current == NULL)
+    return alternate;
+
+  /* filter route selection in following order:
+   * - connected beats other types
+   * - lower distance beats higher
+   * - lower metric beats higher for equal distance
+   * - last, hence oldest, route wins tie break.
+   */
+
+  /* Connected routes. Pick the last connected
+   * route of the set of lowest metric connected routes.
+   */
+  if (alternate->type == ZEBRA_ROUTE_CONNECT)
+    {
+      if (current->type != ZEBRA_ROUTE_CONNECT
+          || alternate->metric <= current->metric)
+        return alternate;
+
+      return current;
+    }
+
+  if (current->type == ZEBRA_ROUTE_CONNECT)
+    return current;
+
+  /* higher distance loses */
+  if (alternate->distance < current->distance)
+    return alternate;
+  if (current->distance < alternate->distance)
+    return current;
+
+  /* metric tie-breaks equal distance */
+  if (alternate->metric <= current->metric)
+    return alternate;
+
+  return current;
+}
+
 /* Core function for processing routing information base. */
 static void
 rib_process (struct route_node *rn)
 {
   struct rib *rib;
   struct rib *next;
-  struct rib *fib = NULL;
-  struct rib *select = NULL;
-  struct rib *del = NULL;
+  struct rib *old_fib = NULL;
+  struct rib *new_fib = NULL;
   int installed = 0;
   struct nexthop *nexthop = NULL, *tnexthop;
   int recursing;
@@ -1279,32 +1279,18 @@ rib_process (struct route_node *rn)
 
   info = rn->table->info;
 
-  RNODE_FOREACH_RIB_SAFE (rn, rib, next)
+  RNODE_FOREACH_RIB (rn, rib)
     {
       /* Currently installed rib. */
       if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
         {
-          assert (fib == NULL);
-          fib = rib;
+          assert (old_fib == NULL);
+          old_fib = rib;
         }
-      
-      /* Unlock removed routes, so they'll be freed, bar the FIB entry,
-       * which we need to do do further work with below.
-       */
+
+      /* Skip deleted entries from selection */
       if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
-        {
-          if (rib != fib)
-            {
-              if (IS_ZEBRA_DEBUG_RIB)
-                rnode_debug (rn, "rn %p, removing rib %p",
-                            (void *)rn, (void *)rib);
-              rib_unlink (rn, rib);
-            }
-          else
-            del = rib;
-          
-          continue;
-        }
+        continue;
       
       /* Skip unreachable nexthop. */
       if (! nexthop_active_update (rn, rib, 0))
@@ -1314,150 +1300,73 @@ rib_process (struct route_node *rn)
       if (rib->distance == DISTANCE_INFINITY)
         continue;
 
-      /* Newly selected rib, the common case. */
-      if (!select)
-        {
-          select = rib;
-          continue;
-        }
-      
-      /* filter route selection in following order:
-       * - connected beats other types
-       * - lower distance beats higher
-       * - lower metric beats higher for equal distance
-       * - last, hence oldest, route wins tie break.
-       */
-      
-      /* Connected routes. Pick the last connected
-       * route of the set of lowest metric connected routes.
-       */
-      if (rib->type == ZEBRA_ROUTE_CONNECT)
-        {
-          if (select->type != ZEBRA_ROUTE_CONNECT
-              || rib->metric <= select->metric)
-            select = rib;
-          continue;
-        }
-      else if (select->type == ZEBRA_ROUTE_CONNECT)
-        continue;
-      
-      /* higher distance loses */
-      if (rib->distance > select->distance)
-        continue;
-      
-      /* lower wins */
-      if (rib->distance < select->distance)
-        {
-          select = rib;
-          continue;
-        }
-      
-      /* metric tie-breaks equal distance */
-      if (rib->metric <= select->metric)
-        select = rib;
+      new_fib = rib_choose_best(new_fib, rib);
     } /* RNODE_FOREACH_RIB_SAFE */
 
   /* After the cycle is finished, the following pointers will be set:
-   * select --- the winner RIB entry, if any was found, otherwise NULL
-   * fib    --- the SELECTED RIB entry, if any, otherwise NULL
-   * del    --- equal to fib, if fib is queued for deletion, NULL otherwise
-   * rib    --- NULL
+   * old_fib --- RIB entry currently having SELECTED
+   * new_fib --- RIB entry that is newly SELECTED
    */
 
-  /* Same RIB entry is selected. Update FIB and finish. */
-  if (select && select == fib)
-    {
-      if (IS_ZEBRA_DEBUG_RIB)
-	rnode_debug (rn, "Updating existing route, select %p, fib %p",
-                     (void *)select, (void *)fib);
-      if (CHECK_FLAG (select->status, RIB_ENTRY_CHANGED))
-        {
-          if (info->safi == SAFI_UNICAST)
-	    zfpm_trigger_update (rn, "updating existing route");
-
-          redistribute_delete (&rn->p, select);
-          if (! RIB_SYSTEM_ROUTE (select))
-            rib_uninstall_kernel (rn, select);
-
-          /* Set real nexthop. */
-          nexthop_active_update (rn, select, 1);
-  
-          if (! RIB_SYSTEM_ROUTE (select))
-            rib_install_kernel (rn, select);
-          redistribute_add (&rn->p, select);
-        }
-      else if (! RIB_SYSTEM_ROUTE (select))
-        {
-          /* Housekeeping code to deal with 
-             race conditions in kernel with linux
-             netlink reporting interface up before IPv4 or IPv6 protocol
-             is ready to add routes.
-             This makes sure the routes are IN the kernel.
-           */
-
-          for (ALL_NEXTHOPS_RO(select->nexthop, nexthop, tnexthop, recursing))
-            if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
-            {
-              installed = 1;
-              break;
-            }
-          if (! installed) 
-            rib_install_kernel (rn, select);
-        }
-      goto end;
-    }
+  /* Set real nexthops. */
+  if (new_fib)
+    nexthop_active_update (rn, new_fib, 1);
 
-  /* At this point we either haven't found the best RIB entry or it is
-   * different from what we currently intend to flag with SELECTED. In both
-   * cases, if a RIB block is present in FIB, it should be withdrawn.
-   */
-  if (fib)
+  /* Update kernel if FIB entry has changed */
+  if (old_fib != new_fib
+      || (new_fib && CHECK_FLAG (new_fib->status, RIB_ENTRY_CHANGED)))
     {
-      if (IS_ZEBRA_DEBUG_RIB)
-        rnode_debug (rn, "Removing existing route, fib %p", (void *)fib);
+        if (old_fib && old_fib != new_fib)
+          {
+            if (! new_fib)
+              redistribute_delete (&rn->p, old_fib);
 
-      if (info->safi == SAFI_UNICAST)
-        zfpm_trigger_update (rn, "removing existing route");
+            if (! RIB_SYSTEM_ROUTE (old_fib) && (! new_fib || RIB_SYSTEM_ROUTE (new_fib)))
+              rib_update_kernel (rn, old_fib, NULL);
+            UNSET_FLAG (old_fib->flags, ZEBRA_FLAG_SELECTED);
+          }
 
-      redistribute_delete (&rn->p, fib);
-      if (! RIB_SYSTEM_ROUTE (fib))
-	rib_uninstall_kernel (rn, fib);
-      UNSET_FLAG (fib->flags, ZEBRA_FLAG_SELECTED);
+        if (new_fib)
+          {
+            /* Install new or replace existing FIB entry */
+            SET_FLAG (new_fib->flags, ZEBRA_FLAG_SELECTED);
+            redistribute_add (&rn->p, new_fib);
 
-      /* Set real nexthop. */
-      nexthop_active_update (rn, fib, 1);
-    }
+            if (! RIB_SYSTEM_ROUTE (new_fib))
+              rib_update_kernel (rn, old_fib, new_fib);
+          }
 
-  /* Regardless of some RIB entry being SELECTED or not before, now we can
-   * tell, that if a new winner exists, FIB is still not updated with this
-   * data, but ready to be.
-   */
-  if (select)
+        if (info->safi == SAFI_UNICAST)
+          zfpm_trigger_update (rn, "updating existing route");
+    }
+  else if (old_fib == new_fib && new_fib && ! RIB_SYSTEM_ROUTE (new_fib))
     {
-      if (IS_ZEBRA_DEBUG_RIB)
-        rnode_debug (rn, "Adding route, select %p", (void *)select);
-
-      if (info->safi == SAFI_UNICAST)
-        zfpm_trigger_update (rn, "new route selected");
-
-      /* Set real nexthop. */
-      nexthop_active_update (rn, select, 1);
-
-      if (! RIB_SYSTEM_ROUTE (select))
-        rib_install_kernel (rn, select);
-      SET_FLAG (select->flags, ZEBRA_FLAG_SELECTED);
-      redistribute_add (&rn->p, select);
+      /* Housekeeping code to deal with race conditions in kernel with
+       * linux netlink reporting interface up before IPv4 or IPv6 protocol
+       * is ready to add routes. This makes sure routes are IN the kernel.
+       */
+      for (ALL_NEXTHOPS_RO(new_fib->nexthop, nexthop, tnexthop, recursing))
+        if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
+          {
+            installed = 1;
+            break;
+          }
+      if (! installed)
+        rib_update_kernel (rn, NULL, new_fib);
     }
 
-  /* FIB route was removed, should be deleted */
-  if (del)
+  /* Remove all RIB entries queued for removal */
+  RNODE_FOREACH_RIB_SAFE (rn, rib, next)
     {
-      if (IS_ZEBRA_DEBUG_RIB)
-        rnode_debug (rn, "Deleting fib %p, rn %p", (void *)del, (void *)rn);
-      rib_unlink (rn, del);
+      if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
+        {
+          if (IS_ZEBRA_DEBUG_RIB)
+            rnode_debug (rn, "rn %p, removing rib %p",
+                        (void *)rn, (void *)rib);
+          rib_unlink (rn, rib);
+        }
     }
 
-end:
   if (IS_ZEBRA_DEBUG_RIB_Q)
     rnode_debug (rn, "rn %p dequeued", (void *)rn);
 
@@ -3082,7 +2991,7 @@ rib_sweep_table (struct route_table *table)
 	  if (rib->type == ZEBRA_ROUTE_KERNEL && 
 	      CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELFROUTE))
 	    {
-	      ret = rib_uninstall_kernel (rn, rib);
+	      ret = rib_update_kernel (rn, rib, NULL);
 	      if (! ret)
                 rib_delnode (rn, rib);
 	    }
@@ -3165,7 +3074,7 @@ rib_close_table (struct route_table *table)
             zfpm_trigger_update (rn, NULL);
 
 	  if (! RIB_SYSTEM_ROUTE (rib))
-	    rib_uninstall_kernel (rn, rib);
+	    rib_update_kernel (rn, rib, NULL);
         }
 }