bgp_nht.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. /* BGP Nexthop tracking
  2. * Copyright (C) 2013 Cumulus Networks, Inc.
  3. *
  4. * This file is part of GNU Zebra.
  5. *
  6. * GNU Zebra is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License as published by the
  8. * Free Software Foundation; either version 2, or (at your option) any
  9. * later version.
  10. *
  11. * GNU Zebra is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with GNU Zebra; see the file COPYING. If not, write to the Free
  18. * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  19. * 02111-1307, USA.
  20. */
  21. #include <zebra.h>
  22. #include "command.h"
  23. #include "thread.h"
  24. #include "prefix.h"
  25. #include "zclient.h"
  26. #include "stream.h"
  27. #include "network.h"
  28. #include "log.h"
  29. #include "memory.h"
  30. #include "nexthop.h"
  31. #include "filter.h"
  32. #include "bgpd/bgpd.h"
  33. #include "bgpd/bgp_table.h"
  34. #include "bgpd/bgp_route.h"
  35. #include "bgpd/bgp_attr.h"
  36. #include "bgpd/bgp_nexthop.h"
  37. #include "bgpd/bgp_debug.h"
  38. #include "bgpd/bgp_nht.h"
  39. #include "bgpd/bgp_fsm.h"
  40. #include "bgpd/bgp_zebra.h"
  41. extern struct zclient *zclient;
  42. extern struct bgp_table *bgp_nexthop_cache_table[AFI_MAX];
  43. static void register_nexthop(struct bgp_nexthop_cache *bnc);
  44. static void unregister_nexthop (struct bgp_nexthop_cache *bnc);
  45. static void evaluate_paths(struct bgp_nexthop_cache *bnc);
  46. static int make_prefix(int afi, struct bgp_info *ri, struct prefix *p);
  47. static void path_nh_map(struct bgp_info *path, struct bgp_nexthop_cache *bnc,
  48. int keep);
  49. int
  50. bgp_nexthop_check (struct bgp_info *path, int connected)
  51. {
  52. struct bgp_nexthop_cache *bnc = path->nexthop;
  53. if (!bnc)
  54. return 0;
  55. if (BGP_DEBUG(nht, NHT))
  56. {
  57. char buf[INET6_ADDRSTRLEN];
  58. zlog_debug("%s: NHT checking %s",
  59. __FUNCTION__,
  60. bnc_str (bnc, buf, INET6_ADDRSTRLEN));
  61. }
  62. if (connected && !(CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)))
  63. return 0;
  64. return (bgp_zebra_num_connects() == 0 ||
  65. CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID));
  66. }
  67. /* Helper to get the rn for the appropriate nexthop for path or peer.
  68. * returns the locked rn - caller must bump down the refcnt.
  69. *
  70. * may return NULL in error cases.
  71. */
  72. static
  73. struct bgp_node *
  74. bgp_get_nexthop_rn (struct bgp_info *path, struct peer *peer)
  75. {
  76. struct prefix p;
  77. afi_t afi;
  78. assert (path || peer);
  79. if (!(path || peer))
  80. return NULL;
  81. if (path)
  82. {
  83. afi = family2afi (path->net->p.family);
  84. if (make_prefix(afi, path, &p) < 0)
  85. return NULL;
  86. }
  87. else
  88. {
  89. afi = family2afi(peer->su.sa.sa_family);
  90. if (afi == AFI_IP)
  91. {
  92. p.family = AF_INET;
  93. p.prefixlen = IPV4_MAX_BITLEN;
  94. p.u.prefix4 = peer->su.sin.sin_addr;
  95. }
  96. else if (afi == AFI_IP6)
  97. {
  98. p.family = AF_INET6;
  99. p.prefixlen = IPV6_MAX_BITLEN;
  100. p.u.prefix6 = peer->su.sin6.sin6_addr;
  101. }
  102. else
  103. return NULL;
  104. }
  105. return bgp_node_get (bgp_nexthop_cache_table[afi], &p);
  106. }
  107. static
  108. struct bgp_nexthop_cache *
  109. bgp_find_nexthop (struct bgp_info *path, struct peer *peer)
  110. {
  111. struct bgp_nexthop_cache *bnc = NULL;
  112. struct bgp_node *rn = bgp_get_nexthop_rn (path, peer);
  113. if (!rn)
  114. return NULL;
  115. bnc = rn->info;
  116. bgp_unlock_node (rn);
  117. return bnc;
  118. }
  119. static void
  120. bgp_unlink_nexthop_check (struct bgp_nexthop_cache *bnc)
  121. {
  122. if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info)
  123. {
  124. if (BGP_DEBUG(nht, NHT))
  125. {
  126. char buf[INET6_ADDRSTRLEN];
  127. zlog_debug("bgp_unlink_nexthop: freeing bnc %s",
  128. bnc_str (bnc, buf, INET6_ADDRSTRLEN));
  129. }
  130. unregister_nexthop(bnc);
  131. bnc->node->info = NULL;
  132. bgp_unlock_node (bnc->node);
  133. bnc->node = NULL;
  134. bnc_free (bnc);
  135. }
  136. }
  137. void
  138. bgp_unlink_nexthop (struct bgp_info *path)
  139. {
  140. struct bgp_nexthop_cache *bnc = path->nexthop;
  141. if (!bnc)
  142. return;
  143. if (BGP_DEBUG(nht, NHT))
  144. {
  145. char buf[INET6_ADDRSTRLEN];
  146. zlog_debug("%s: NHT unlinking %s",
  147. __FUNCTION__, bnc_str (bnc, buf, INET6_ADDRSTRLEN));
  148. }
  149. path_nh_map(path, NULL, 0);
  150. bgp_unlink_nexthop_check (bnc);
  151. }
  152. void
  153. bgp_unlink_nexthop_by_peer (struct peer *peer)
  154. {
  155. struct bgp_nexthop_cache *bnc = bgp_find_nexthop (NULL, peer);
  156. if (!bnc)
  157. return;
  158. if (BGP_DEBUG(nht, NHT))
  159. zlog_debug("%s: NHT unlinking %s",
  160. __FUNCTION__, peer->host);
  161. bnc->nht_info = NULL;
  162. bgp_unlink_nexthop_check (bnc);
  163. }
  164. int
  165. bgp_ensure_nexthop (struct bgp_info *ri, struct peer *peer,
  166. int connected)
  167. {
  168. struct bgp_node *rn;
  169. struct bgp_nexthop_cache *bnc;
  170. rn = bgp_get_nexthop_rn (ri, peer);
  171. if (!rn)
  172. {
  173. zlog_debug("%s: NHT could not ensure, failed to get rn!",
  174. __FUNCTION__);
  175. return 0;
  176. }
  177. if (!rn->info)
  178. {
  179. bnc = bnc_new();
  180. rn->info = bnc;
  181. bnc->node = rn;
  182. bgp_lock_node(rn);
  183. if (connected)
  184. SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
  185. }
  186. bnc = rn->info;
  187. bgp_unlock_node (rn);
  188. if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
  189. register_nexthop(bnc);
  190. if (ri)
  191. {
  192. path_nh_map(ri, bnc, 1); /* updates NHT ri list reference */
  193. if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
  194. (bgp_info_extra_get(ri))->igpmetric = bnc->metric;
  195. else if (ri->extra)
  196. ri->extra->igpmetric = 0;
  197. }
  198. else if (peer)
  199. bnc->nht_info = (void *)peer; /* NHT peer reference */
  200. if (BGP_DEBUG(nht, NHT))
  201. {
  202. char buf[INET6_ADDRSTRLEN];
  203. zlog_debug("%s: NHT ensured %s",
  204. __FUNCTION__, bnc_str (bnc, buf, INET6_ADDRSTRLEN));
  205. }
  206. return (bgp_zebra_num_connects() == 0 ||
  207. CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID));
  208. }
  209. void
  210. bgp_parse_nexthop_update (void)
  211. {
  212. struct stream *s;
  213. struct bgp_node *rn;
  214. struct bgp_nexthop_cache *bnc;
  215. struct nexthop *nexthop;
  216. struct nexthop *oldnh;
  217. struct nexthop *nhlist_head = NULL;
  218. struct nexthop *nhlist_tail = NULL;
  219. uint32_t metric;
  220. u_char nexthop_num;
  221. struct prefix p;
  222. int i;
  223. s = zclient->ibuf;
  224. memset(&p, 0, sizeof(struct prefix));
  225. p.family = stream_getw(s);
  226. p.prefixlen = stream_getc(s);
  227. switch (p.family)
  228. {
  229. case AF_INET:
  230. p.u.prefix4.s_addr = stream_get_ipv4 (s);
  231. break;
  232. case AF_INET6:
  233. stream_get(&p.u.prefix6, s, 16);
  234. break;
  235. default:
  236. break;
  237. }
  238. rn = bgp_node_lookup(bgp_nexthop_cache_table[family2afi(p.family)], &p);
  239. if (!rn || !rn->info)
  240. {
  241. if (BGP_DEBUG(nht, NHT))
  242. {
  243. char buf[INET6_ADDRSTRLEN];
  244. prefix2str(&p, buf, INET6_ADDRSTRLEN);
  245. zlog_debug("parse nexthop update(%s): rn not found", buf);
  246. }
  247. if (rn)
  248. bgp_unlock_node (rn);
  249. return;
  250. }
  251. bnc = rn->info;
  252. bgp_unlock_node (rn);
  253. bnc->last_update = bgp_clock();
  254. bnc->change_flags = 0;
  255. metric = stream_getl (s);
  256. nexthop_num = stream_getc (s);
  257. /* debug print the input */
  258. if (BGP_DEBUG(nht, NHT))
  259. {
  260. char buf[INET6_ADDRSTRLEN];
  261. prefix2str(&p, buf, INET6_ADDRSTRLEN);
  262. zlog_debug("parse nexthop update(%s): metric=%d, #nexthop=%d", buf,
  263. metric, nexthop_num);
  264. }
  265. if (metric != bnc->metric)
  266. bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
  267. if(nexthop_num != bnc->nexthop_num)
  268. bnc->change_flags |= BGP_NEXTHOP_CHANGED;
  269. if (nexthop_num)
  270. {
  271. bnc->flags |= BGP_NEXTHOP_VALID;
  272. bnc->metric = metric;
  273. bnc->nexthop_num = nexthop_num;
  274. for (i = 0; i < nexthop_num; i++)
  275. {
  276. nexthop = nexthop_new();
  277. nexthop->type = stream_getc (s);
  278. switch (nexthop->type)
  279. {
  280. case ZEBRA_NEXTHOP_IPV4:
  281. nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s);
  282. break;
  283. case ZEBRA_NEXTHOP_IFINDEX:
  284. case ZEBRA_NEXTHOP_IFNAME:
  285. nexthop->ifindex = stream_getl (s);
  286. break;
  287. case ZEBRA_NEXTHOP_IPV4_IFINDEX:
  288. case ZEBRA_NEXTHOP_IPV4_IFNAME:
  289. nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s);
  290. nexthop->ifindex = stream_getl (s);
  291. break;
  292. #ifdef HAVE_IPV6
  293. case ZEBRA_NEXTHOP_IPV6:
  294. stream_get (&nexthop->gate.ipv6, s, 16);
  295. break;
  296. case ZEBRA_NEXTHOP_IPV6_IFINDEX:
  297. case ZEBRA_NEXTHOP_IPV6_IFNAME:
  298. stream_get (&nexthop->gate.ipv6, s, 16);
  299. nexthop->ifindex = stream_getl (s);
  300. break;
  301. #endif
  302. default:
  303. /* do nothing */
  304. break;
  305. }
  306. if (nhlist_tail)
  307. {
  308. nhlist_tail->next = nexthop;
  309. nhlist_tail = nexthop;
  310. }
  311. else
  312. {
  313. nhlist_tail = nexthop;
  314. nhlist_head = nexthop;
  315. }
  316. /* No need to evaluate the nexthop if we have already determined
  317. * that there has been a change.
  318. */
  319. if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
  320. continue;
  321. for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
  322. if (nexthop_same_no_recurse(oldnh, nexthop))
  323. break;
  324. if (!oldnh)
  325. bnc->change_flags |= BGP_NEXTHOP_CHANGED;
  326. }
  327. bnc_nexthop_free(bnc);
  328. bnc->nexthop = nhlist_head;
  329. }
  330. else
  331. {
  332. bnc->flags &= ~BGP_NEXTHOP_VALID;
  333. UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
  334. bnc_nexthop_free(bnc);
  335. bnc->nexthop = NULL;
  336. }
  337. evaluate_paths(bnc);
  338. }
  339. /**
  340. * make_prefix - make a prefix structure from the path (essentially
  341. * path's node.
  342. */
  343. static int
  344. make_prefix (int afi, struct bgp_info *ri, struct prefix *p)
  345. {
  346. memset (p, 0, sizeof (struct prefix));
  347. switch (afi)
  348. {
  349. case AFI_IP:
  350. p->family = AF_INET;
  351. p->prefixlen = IPV4_MAX_BITLEN;
  352. p->u.prefix4 = ri->attr->nexthop;
  353. break;
  354. #ifdef HAVE_IPV6
  355. case AFI_IP6:
  356. if (ri->attr->extra->mp_nexthop_len == 16
  357. && IN6_IS_ADDR_LINKLOCAL (&ri->attr->extra->mp_nexthop_global))
  358. return -1;
  359. p->family = AF_INET6;
  360. p->prefixlen = IPV6_MAX_BITLEN;
  361. p->u.prefix6 = ri->attr->extra->mp_nexthop_global;
  362. break;
  363. #endif
  364. default:
  365. break;
  366. }
  367. return 0;
  368. }
  369. /**
  370. * sendmsg_nexthop -- Format and send a nexthop register/Unregister
  371. * command to Zebra.
  372. * ARGUMENTS:
  373. * struct bgp_nexthop_cache *bnc -- the nexthop structure.
  374. * int command -- either ZEBRA_NEXTHOP_REGISTER or ZEBRA_NEXTHOP_UNREGISTER
  375. * RETURNS:
  376. * void.
  377. */
  378. static void
  379. sendmsg_nexthop (struct bgp_nexthop_cache *bnc, int command)
  380. {
  381. struct stream *s;
  382. struct prefix *p;
  383. int ret;
  384. /* Check socket. */
  385. if (!zclient || zclient->sock < 0)
  386. {
  387. zlog_debug("%s: Can't send NH register, Zebra client not established",
  388. __FUNCTION__);
  389. return;
  390. }
  391. p = &(bnc->node->p);
  392. s = zclient->obuf;
  393. stream_reset (s);
  394. zclient_create_header (s, command, VRF_DEFAULT);
  395. if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
  396. stream_putc(s, 1);
  397. else
  398. stream_putc(s, 0);
  399. stream_putw(s, PREFIX_FAMILY(p));
  400. stream_putc(s, p->prefixlen);
  401. switch (PREFIX_FAMILY(p))
  402. {
  403. case AF_INET:
  404. stream_put_in_addr (s, &p->u.prefix4);
  405. break;
  406. case AF_INET6:
  407. stream_put(s, &(p->u.prefix6), 16);
  408. break;
  409. default:
  410. break;
  411. }
  412. stream_putw_at (s, 0, stream_get_endp (s));
  413. ret = zclient_send_message(zclient);
  414. /* TBD: handle the failure */
  415. if (ret < 0)
  416. zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
  417. if (command == ZEBRA_NEXTHOP_REGISTER)
  418. SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
  419. else if (command == ZEBRA_NEXTHOP_UNREGISTER)
  420. UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
  421. return;
  422. }
  423. /**
  424. * register_nexthop - register a nexthop with Zebra for notification
  425. * when the route to the nexthop changes.
  426. * ARGUMENTS:
  427. * struct bgp_nexthop_cache *bnc -- the nexthop structure.
  428. * RETURNS:
  429. * void.
  430. */
  431. static void
  432. register_nexthop (struct bgp_nexthop_cache *bnc)
  433. {
  434. /* Check if we have already registered */
  435. if (bnc->flags & BGP_NEXTHOP_REGISTERED)
  436. return;
  437. sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_REGISTER);
  438. }
  439. /**
  440. * unregister_nexthop -- Unregister the nexthop from Zebra.
  441. * ARGUMENTS:
  442. * struct bgp_nexthop_cache *bnc -- the nexthop structure.
  443. * RETURNS:
  444. * void.
  445. */
  446. static void
  447. unregister_nexthop (struct bgp_nexthop_cache *bnc)
  448. {
  449. /* Check if we have already registered */
  450. if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
  451. return;
  452. sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_UNREGISTER);
  453. }
  454. /**
  455. * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
  456. * ARGUMENTS:
  457. * struct bgp_nexthop_cache *bnc -- the nexthop structure.
  458. * RETURNS:
  459. * void.
  460. */
  461. static void
  462. evaluate_paths (struct bgp_nexthop_cache *bnc)
  463. {
  464. struct bgp_node *rn;
  465. struct bgp_info *path;
  466. struct bgp *bgp = bgp_get_default();
  467. int afi;
  468. struct peer *peer = (struct peer *)bnc->nht_info;
  469. LIST_FOREACH(path, &(bnc->paths), nh_thread)
  470. {
  471. if (!(path->type == ZEBRA_ROUTE_BGP &&
  472. path->sub_type == BGP_ROUTE_NORMAL))
  473. continue;
  474. rn = path->net;
  475. afi = family2afi(rn->p.family);
  476. /* Path becomes valid/invalid depending on whether the nexthop
  477. * reachable/unreachable.
  478. */
  479. if ((CHECK_FLAG(path->flags, BGP_INFO_VALID) ? 1 : 0) !=
  480. (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) ? 1 : 0))
  481. {
  482. if (CHECK_FLAG (path->flags, BGP_INFO_VALID))
  483. {
  484. bgp_aggregate_decrement (bgp, &rn->p, path,
  485. afi, SAFI_UNICAST);
  486. bgp_info_unset_flag (rn, path, BGP_INFO_VALID);
  487. }
  488. else
  489. {
  490. bgp_info_set_flag (rn, path, BGP_INFO_VALID);
  491. bgp_aggregate_increment (bgp, &rn->p, path,
  492. afi, SAFI_UNICAST);
  493. }
  494. }
  495. /* Copy the metric to the path. Will be used for bestpath computation */
  496. if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
  497. (bgp_info_extra_get(path))->igpmetric = bnc->metric;
  498. else if (path->extra)
  499. path->extra->igpmetric = 0;
  500. if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_METRIC_CHANGED) ||
  501. CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CHANGED))
  502. SET_FLAG(path->flags, BGP_INFO_IGP_CHANGED);
  503. bgp_process(bgp, rn, afi, SAFI_UNICAST);
  504. }
  505. if (peer && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED))
  506. {
  507. if (BGP_DEBUG(nht, NHT))
  508. zlog_debug("%s: Updating peer (%s) status with NHT", __FUNCTION__, peer->host);
  509. SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
  510. }
  511. RESET_FLAG(bnc->change_flags);
  512. }
  513. /**
  514. * path_nh_map - make or break path-to-nexthop association.
  515. * ARGUMENTS:
  516. * path - pointer to the path structure
  517. * bnc - pointer to the nexthop structure
  518. * make - if set, make the association. if unset, just break the existing
  519. * association.
  520. */
  521. static void
  522. path_nh_map (struct bgp_info *path, struct bgp_nexthop_cache *bnc, int make)
  523. {
  524. if (path->nexthop)
  525. {
  526. LIST_REMOVE(path, nh_thread);
  527. path->nexthop->path_count--;
  528. path->nexthop = NULL;
  529. }
  530. if (make)
  531. {
  532. LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
  533. path->nexthop = bnc;
  534. path->nexthop->path_count++;
  535. }
  536. }