177 lines
5.2 KiB
Diff
177 lines
5.2 KiB
Diff
From 6779e5da698feb9b9e02411859ad81885ba46c01 Mon Sep 17 00:00:00 2001
|
|
From: Maria Matejka <mq@ucw.cz>
|
|
Date: Fri, 20 Dec 2024 11:28:00 +0100
|
|
Subject: [PATCH] BGP: fix locking order error on dynamic protocol spawn
|
|
|
|
We missed that the protocol spawner violates the prescribed
|
|
locking order. When the rtable level is locked, no new protocol can be
|
|
started, thus we need to:
|
|
|
|
* create the protocol from a clean mainloop context
|
|
* in protocol start hook, take the socket
|
|
|
|
Testsuite: cf-bgp-autopeer
|
|
Fixes: #136
|
|
|
|
Thanks to Job Snijders <job@fastly.com> for reporting:
|
|
https://trubka.network.cz/pipermail/bird-users/2024-December/017980.html
|
|
---
|
|
nest/proto.c | 19 +++++++++++++++++++
|
|
nest/protocol.h | 2 ++
|
|
proto/bgp/bgp.c | 46 +++++++++++++++++++++++++++++++++++-----------
|
|
3 files changed, 56 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/nest/proto.c b/nest/proto.c
|
|
index dded84f51..678697d69 100644
|
|
--- a/nest/proto.c
|
|
+++ b/nest/proto.c
|
|
@@ -1867,6 +1867,25 @@ proto_spawn(struct proto_config *cf, uint disabled)
|
|
return p;
|
|
}
|
|
|
|
+bool
|
|
+proto_disable(struct proto *p)
|
|
+{
|
|
+ ASSERT_DIE(birdloop_inside(&main_birdloop));
|
|
+ bool changed = !p->disabled;
|
|
+ p->disabled = 1;
|
|
+ proto_rethink_goal(p);
|
|
+ return changed;
|
|
+}
|
|
+
|
|
+bool
|
|
+proto_enable(struct proto *p)
|
|
+{
|
|
+ ASSERT_DIE(birdloop_inside(&main_birdloop));
|
|
+ bool changed = p->disabled;
|
|
+ p->disabled = 0;
|
|
+ proto_rethink_goal(p);
|
|
+ return changed;
|
|
+}
|
|
|
|
/**
|
|
* DOC: Graceful restart recovery
|
|
diff --git a/nest/protocol.h b/nest/protocol.h
|
|
index 25ed6f553..cf7ecb898 100644
|
|
--- a/nest/protocol.h
|
|
+++ b/nest/protocol.h
|
|
@@ -78,6 +78,8 @@ void proto_build(struct protocol *); /* Called from protocol to register itself
|
|
void protos_preconfig(struct config *);
|
|
void protos_commit(struct config *new, struct config *old, int type);
|
|
struct proto * proto_spawn(struct proto_config *cf, uint disabled);
|
|
+bool proto_disable(struct proto *p);
|
|
+bool proto_enable(struct proto *p);
|
|
void protos_dump_all(struct dump_request *);
|
|
|
|
#define GA_UNKNOWN 0 /* Attribute not recognized */
|
|
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
|
|
index 5fc2b5fff..3170e3a42 100644
|
|
--- a/proto/bgp/bgp.c
|
|
+++ b/proto/bgp/bgp.c
|
|
@@ -378,8 +378,6 @@ bgp_startup(struct bgp_proto *p)
|
|
if (p->postponed_sk)
|
|
{
|
|
/* Apply postponed incoming connection */
|
|
- sk_reloop(p->postponed_sk, p->p.loop);
|
|
-
|
|
bgp_setup_conn(p, &p->incoming_conn);
|
|
bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
|
|
bgp_send_open(&p->incoming_conn);
|
|
@@ -583,6 +581,9 @@ bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len
|
|
static void
|
|
bgp_down(struct bgp_proto *p)
|
|
{
|
|
+ /* Check that the dynamic BGP socket has been picked up */
|
|
+ ASSERT_DIE(p->postponed_sk == NULL);
|
|
+
|
|
if (bgp_start_state(p) > BSS_PREPARE)
|
|
{
|
|
bgp_setup_auth(p, 0);
|
|
@@ -617,8 +618,8 @@ bgp_decision(void *vp)
|
|
bgp_down(p);
|
|
}
|
|
|
|
-static struct bgp_proto *
|
|
-bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
|
|
+static void
|
|
+bgp_spawn(struct bgp_proto *pp, struct birdsock *sk)
|
|
{
|
|
struct symbol *sym;
|
|
char fmt[SYM_MAX_LEN];
|
|
@@ -635,9 +636,16 @@ bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
|
|
cfg_mem = NULL;
|
|
|
|
/* Just pass remote_ip to bgp_init() */
|
|
- ((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
|
|
+ ((struct bgp_config *) sym->proto)->remote_ip = sk->daddr;
|
|
+
|
|
+ /* Create the protocol disabled initially */
|
|
+ SKIP_BACK_DECLARE(struct bgp_proto, p, p, proto_spawn(sym->proto, 1));
|
|
|
|
- return (void *) proto_spawn(sym->proto, 0);
|
|
+ /* Pass the socket */
|
|
+ p->postponed_sk = sk;
|
|
+
|
|
+ /* And enable the protocol */
|
|
+ proto_enable(&p->p);
|
|
}
|
|
|
|
void
|
|
@@ -1489,10 +1497,15 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
|
|
/* For dynamic BGP, spawn new instance and postpone the socket */
|
|
if (bgp_is_dynamic(p))
|
|
{
|
|
- p = bgp_spawn(p, sk->daddr);
|
|
- p->postponed_sk = sk;
|
|
- rmove(sk, p->p.pool);
|
|
- goto leave;
|
|
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
|
|
+
|
|
+ /* The dynamic protocol must be in the START state */
|
|
+ ASSERT_DIE(p->p.proto_state == PS_START);
|
|
+ birdloop_leave(p->p.loop);
|
|
+
|
|
+ /* Now we have a clean mainloop */
|
|
+ bgp_spawn(p, sk);
|
|
+ return 0;
|
|
}
|
|
|
|
rmove(sk, p->p.pool);
|
|
@@ -1806,7 +1819,6 @@ bgp_start(struct proto *P)
|
|
p->incoming_conn.state = BS_IDLE;
|
|
p->neigh = NULL;
|
|
p->bfd_req = NULL;
|
|
- p->postponed_sk = NULL;
|
|
p->gr_ready = 0;
|
|
p->gr_active_num = 0;
|
|
|
|
@@ -1848,6 +1860,16 @@ bgp_start(struct proto *P)
|
|
channel_graceful_restart_lock(&c->c);
|
|
}
|
|
|
|
+ /* Now it's the last chance to move the postponed socket to this BGP,
|
|
+ * as bgp_start is the only hook running from main loop. */
|
|
+ if (p->postponed_sk)
|
|
+ {
|
|
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
|
|
+ rmove(p->postponed_sk, p->p.pool);
|
|
+ sk_reloop(p->postponed_sk, p->p.loop);
|
|
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
|
|
+ }
|
|
+
|
|
/*
|
|
* Before attempting to create the connection, we need to lock the port,
|
|
* so that we are the only instance attempting to talk with that neighbor.
|
|
@@ -1999,6 +2021,8 @@ bgp_init(struct proto_config *CF)
|
|
p->remote_ip = cf->remote_ip;
|
|
p->remote_as = cf->remote_as;
|
|
|
|
+ p->postponed_sk = NULL;
|
|
+
|
|
/* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
|
|
if (cf->c.parent)
|
|
cf->remote_ip = IPA_NONE;
|
|
--
|
|
GitLab
|
|
|