diff --git a/charts/kubernetes/Chart.yaml b/charts/kubernetes/Chart.yaml index 6fe837b7..166cbae5 100644 --- a/charts/kubernetes/Chart.yaml +++ b/charts/kubernetes/Chart.yaml @@ -4,8 +4,8 @@ description: A Helm chart for deploying Unikorn Kubernetes Service type: application -version: v0.2.19 -appVersion: v0.2.19 +version: v0.2.20 +appVersion: v0.2.20 icon: https://raw.githubusercontent.com/unikorn-cloud/assets/main/images/logos/dark-on-light/icon.png diff --git a/charts/kubernetes/templates/applications.yaml b/charts/kubernetes/templates/applications.yaml index 0fb91a1e..71fb73fc 100644 --- a/charts/kubernetes/templates/applications.yaml +++ b/charts/kubernetes/templates/applications.yaml @@ -84,6 +84,11 @@ spec: parameters: - name: hostpathMapper.enabled value: 'true' + - name: autoDeletePersistentVolumeClaims + value: 'true' + # This will be valid from v0.20 onward + #- name: controlPlane.statefulSet.persistence.volumeClaim.retentionPolicy + # value: Delete --- apiVersion: unikorn-cloud.org/v1alpha1 kind: HelmApplication diff --git a/go.mod b/go.mod index 77884f36..bdd3f1a1 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/prometheus/client_golang v1.19.0 github.com/spdx/tools-golang v0.5.3 github.com/spf13/pflag v1.0.5 - github.com/unikorn-cloud/core v0.1.49 + github.com/unikorn-cloud/core v0.1.52 github.com/unikorn-cloud/identity v0.2.11 github.com/unikorn-cloud/rbac v0.1.1 github.com/unikorn-cloud/region v0.1.9 diff --git a/go.sum b/go.sum index be331a8e..939606dd 100644 --- a/go.sum +++ b/go.sum @@ -175,8 +175,8 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= -github.com/unikorn-cloud/core v0.1.49 h1:ahAxrzvBnBICi+qN/AmTqKRJHpxl958gKVfBO3lz4G8= -github.com/unikorn-cloud/core v0.1.49/go.mod h1:cP39UQN7aSmsfjQuSMsworI4oBIwx4oA4u20CbPpfZw= +github.com/unikorn-cloud/core v0.1.52 h1:J8C8MBT3vYa5LUGHoWgzJ+UMPKcTtm9BfKgVKkG3f3w= +github.com/unikorn-cloud/core v0.1.52/go.mod h1:cP39UQN7aSmsfjQuSMsworI4oBIwx4oA4u20CbPpfZw= github.com/unikorn-cloud/identity v0.2.11 h1:q6mkJ3qTRjwhlvLS9Jv0I4wlJhnsbJZHu2rbNdnXBYk= github.com/unikorn-cloud/identity v0.2.11/go.mod h1:4KHNdHiIKpKERD0slunDDXhdC59M7eiN+Y1wSfHbQwQ= github.com/unikorn-cloud/rbac v0.1.1 h1:5QB3YzwG0FaH80FirdwZLm8hwsxLN0iPCY9i1VVZjXY= diff --git a/pkg/provisioners/managers/cluster/provisioner.go b/pkg/provisioners/managers/cluster/provisioner.go index d3febea1..97b02d42 100644 --- a/pkg/provisioners/managers/cluster/provisioner.go +++ b/pkg/provisioners/managers/cluster/provisioner.go @@ -218,8 +218,40 @@ func (p *Provisioner) getProvisioner(ctx context.Context) (provisioners.Provisio return provisioner, nil } +// managerReady gates cluster creation on the manager being up and ready. +// Due to https://github.com/argoproj/argo-cd/issues/18041 Argo will break +// quite spectacularly if you try to install an application when the requisite +// CRDs are not present yet. As a result we need to provision the implicit +// manager serially, and that takes a long time. So long the request times +// out, so we essentially have to defer cluster creation until we know the +// manager is working and Argo isn't going to fail. +func (p *Provisioner) managerReady(ctx context.Context) error { + cli := coreclient.StaticClientFromContext(ctx) + + manager := &unikornv1.ClusterManager{} + + if err := cli.Get(ctx, client.ObjectKey{Namespace: p.cluster.Namespace, Name: p.cluster.Spec.ClusterManagerID}, manager); err != nil { + return err + } + + condition, err := manager.StatusConditionRead(unikornv1core.ConditionAvailable) + if err != nil { + return err + } + + if condition.Reason != unikornv1core.ConditionReasonProvisioned { + return provisioners.ErrYield + } + + return nil +} + // Provision implements the Provision interface. func (p *Provisioner) Provision(ctx context.Context) error { + if err := p.managerReady(ctx); err != nil { + return err + } + provisioner, err := p.getProvisioner(ctx) if err != nil { return err diff --git a/pkg/provisioners/managers/clustermanager/provisioner.go b/pkg/provisioners/managers/clustermanager/provisioner.go index ff1aed59..ae7e3a7f 100644 --- a/pkg/provisioners/managers/clustermanager/provisioner.go +++ b/pkg/provisioners/managers/clustermanager/provisioner.go @@ -26,7 +26,6 @@ import ( unikornv1core "github.com/unikorn-cloud/core/pkg/apis/unikorn/v1alpha1" coreclient "github.com/unikorn-cloud/core/pkg/client" "github.com/unikorn-cloud/core/pkg/provisioners" - "github.com/unikorn-cloud/core/pkg/provisioners/concurrent" "github.com/unikorn-cloud/core/pkg/provisioners/remotecluster" "github.com/unikorn-cloud/core/pkg/provisioners/serial" unikornv1 "github.com/unikorn-cloud/kubernetes/pkg/apis/unikorn/v1alpha1" @@ -122,7 +121,9 @@ func (p *Provisioner) getClusterManagerProvisioner() provisioners.Provisioner { remoteClusterManager := remotecluster.New(vcluster.NewRemoteCluster(p.clusterManager.Namespace, p.clusterManager.Name, &p.clusterManager), true) - clusterAPIProvisioner := concurrent.New("cluster-api", + // **** sake https://github.com/argoproj/argo-cd/issues/18041 + // This should be a concurrent provision, but alas no. + clusterAPIProvisioner := serial.New("cluster-api", certmanager.New(apps.certManager), clusterapi.New(apps.clusterAPI), ) diff --git a/pkg/server/handler/clustermanager/client.go b/pkg/server/handler/clustermanager/client.go index ceb215fb..15c2a8b0 100644 --- a/pkg/server/handler/clustermanager/client.go +++ b/pkg/server/handler/clustermanager/client.go @@ -28,7 +28,6 @@ import ( "github.com/unikorn-cloud/core/pkg/server/conversion" "github.com/unikorn-cloud/core/pkg/server/errors" "github.com/unikorn-cloud/core/pkg/util" - "github.com/unikorn-cloud/core/pkg/util/retry" unikornv1 "github.com/unikorn-cloud/kubernetes/pkg/apis/unikorn/v1alpha1" "github.com/unikorn-cloud/kubernetes/pkg/openapi" "github.com/unikorn-cloud/kubernetes/pkg/server/handler/applicationbundle" @@ -57,19 +56,6 @@ func NewClient(client client.Client) *Client { } } -var ( - // ErrResourceDeleting is raised when the resource is being deleted. - ErrResourceDeleting = goerrors.New("resource is being deleted") - - // ErrNamespaceUnset is raised when the namespace hasn't been created - // yet. - ErrNamespaceUnset = goerrors.New("resource namespace is unset") - - // ErrApplicationBundle is raised when no suitable application - // bundle is found. - ErrApplicationBundle = goerrors.New("no application bundle found") -) - // CreateImplicit is called when a cluster creation call is made and a control plane is not specified. func (c *Client) CreateImplicit(ctx context.Context, organizationID, projectID string) (*unikornv1.ClusterManager, error) { log := log.FromContext(ctx) @@ -108,31 +94,6 @@ func (c *Client) CreateImplicit(ctx context.Context, organizationID, projectID s return nil, err } - waitCtx, cancel := context.WithCancel(ctx) - defer cancel() - - // Allow a grace period for the project to become active to avoid client - // errors and retries. The namespace creation should be ostensibly instant - // and likewise show up due to non-blocking yields. - callback := func() error { - if _, err := c.get(waitCtx, resource.Namespace, resource.Name); err != nil { - // Short cut deleting errors. - if goerrors.Is(err, ErrResourceDeleting) { - cancel() - - return nil - } - - return err - } - - return nil - } - - if err := retry.Forever().DoWithContext(waitCtx, callback); err != nil { - return nil, err - } - return resource, nil }