diff --git a/pkg/frr/configure.go b/pkg/frr/configure.go index d5d8782..7b0e175 100644 --- a/pkg/frr/configure.go +++ b/pkg/frr/configure.go @@ -43,7 +43,7 @@ func (m *Manager) Configure(in Configuration, nm *nl.Manager, nwopCfg *config.Co for j := range in.VRFs[i].Import { for k := range in.VRFs[i].Import[j].Items { if in.VRFs[i].Import[j].Items[k].Action != "deny" { - return false, fmt.Errorf("only deny rules are allowed in import prefix-lists of mgmt VRFs") + return false, &ConfigurationError{fmt.Errorf("only deny rules are allowed in import prefix-lists of mgmt VRFs")} } // Swap deny to permit, this will be a prefix-list called from a deny route-map in.VRFs[i].Import[j].Items[k].Action = "permit" @@ -58,7 +58,7 @@ func (m *Manager) Configure(in Configuration, nm *nl.Manager, nwopCfg *config.Co currentConfig, err := os.ReadFile(m.ConfigPath) if err != nil { - return false, fmt.Errorf("error reading configuration file: %w", err) + return false, &ConfigurationError{fmt.Errorf("error reading configuration file: %w", err)} } targetConfig, err := render(m.configTemplate, frrConfig) @@ -72,7 +72,7 @@ func (m *Manager) Configure(in Configuration, nm *nl.Manager, nwopCfg *config.Co if !bytes.Equal(currentConfig, targetConfig) { err = os.WriteFile(m.ConfigPath, targetConfig, frrPermissions) if err != nil { - return false, fmt.Errorf("error writing configuration file: %w", err) + return false, &ConfigurationError{fmt.Errorf("error writing configuration file: %w", err)} } return true, nil @@ -187,3 +187,11 @@ func applyCfgReplacements(frrConfig []byte, replacements []config.Replacement) [ } return frrConfig } + +type ConfigurationError struct { + err error +} + +func (r *ConfigurationError) Error() string { + return fmt.Sprintf("FRR configuration error: %s", r.err.Error()) +} diff --git a/pkg/reconciler/layer3.go b/pkg/reconciler/layer3.go index f66e91d..60533a7 100644 --- a/pkg/reconciler/layer3.go +++ b/pkg/reconciler/layer3.go @@ -2,6 +2,7 @@ package reconciler import ( "context" + "errors" "fmt" "net" "sort" @@ -65,6 +66,15 @@ func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfigurati return allConfigs[i].VNI < allConfigs[j].VNI }) + // Create FRR configuration and reload it + err = r.configureFRR(allConfigs) + if err != nil { + if !errors.Is(err, &frr.ConfigurationError{}) { + return err + } + r.Logger.Error(err, "failed to configure FRR") + } + created, deletedVRF, err := r.reconcileL3Netlink(l3Configs) if err != nil { r.Logger.Error(err, "error reconciling Netlink") @@ -75,54 +85,40 @@ func (r *reconcile) reconcileLayer3(l3vnis []networkv1alpha1.VRFRouteConfigurati if err != nil { return err } - reloadTwice := deletedVRF || deletedTaas - // We wait here for two seconds to let FRR settle after updating netlink devices - time.Sleep(defaultSleep) - - err = r.configureFRR(allConfigs, reloadTwice) - if err != nil { - return err + // When a BGP VRF is deleted there is a leftover running configuration after reload + // A second reload fixes this. + if deletedVRF || deletedTaas { + if err := r.reloadFRR(); err != nil { + return fmt.Errorf("failed to reload FRR: %w", err) + } } - // Make sure that all created netlink VRFs are up after FRR reload + // We wait here for two seconds to let FRR settle after updating netlink devices time.Sleep(defaultSleep) + for _, info := range created { if err := r.netlinkManager.UpL3(info); err != nil { - r.Logger.Error(err, "error setting L3 to state UP") - return fmt.Errorf("error setting L3 to state UP: %w", err) + r.Logger.Error(err, "error setting L3 to state UP", "interface", info) } } return nil } -func (r *reconcile) configureFRR(vrfConfigs []frr.VRFConfiguration, reloadTwice bool) error { +func (r *reconcile) configureFRR(vrfConfigs []frr.VRFConfiguration) error { changed, err := r.frrManager.Configure(frr.Configuration{ VRFs: vrfConfigs, ASN: r.config.ServerASN, }, r.netlinkManager, r.config) if err != nil { - r.Logger.Error(err, "error updating FRR configuration") return fmt.Errorf("error updating FRR configuration: %w", err) } - if changed || r.dirtyFRRConfig { + if changed { err := r.reloadFRR() if err != nil { - r.dirtyFRRConfig = true return err } - - // When a BGP VRF is deleted there is a leftover running configuration after reload - // A second reload fixes this. - if reloadTwice { - err := r.reloadFRR() - if err != nil { - r.dirtyFRRConfig = true - return err - } - } - r.dirtyFRRConfig = false } return nil } @@ -249,6 +245,7 @@ func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl. } // Check for VRFs that are configured on the host but no longer in Kubernetes + preexisting := []nl.VRFInformation{} toDelete := []nl.VRFInformation{} for i := range existing { stillExists := false @@ -256,6 +253,9 @@ func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl. if vrfConfigs[j].Name == existing[i].Name && vrfConfigs[j].VNI == existing[i].VNI { stillExists = true existing[i].MTU = vrfConfigs[j].MTU + if !existing[i].MarkForDelete { + preexisting = append(preexisting, existing[i]) + } break } } @@ -266,6 +266,13 @@ func (r *reconcile) reconcileL3Netlink(vrfConfigs []frr.VRFConfiguration) ([]nl. } } + // Make sure that all previously configured L3 interfaces are up + for _, info := range preexisting { + if err := r.netlinkManager.UpL3(info); err != nil { + r.Logger.Error(err, "failed to set L3 up", "interface", info.Name) + } + } + // Check for VRFs that are in Kubernetes but not yet configured on the host toCreate := prepareVRFsToCreate(vrfConfigs, existing) diff --git a/pkg/reconciler/reconciler.go b/pkg/reconciler/reconciler.go index 9f7dd94..2c430f0 100644 --- a/pkg/reconciler/reconciler.go +++ b/pkg/reconciler/reconciler.go @@ -28,8 +28,6 @@ type Reconciler struct { healthChecker *healthcheck.HealthChecker debouncer *debounce.Debouncer - - dirtyFRRConfig bool } type reconcile struct {