Skip to content

Commit

Permalink
Fix ArgoCD
Browse files Browse the repository at this point in the history
Argo will now crap out fatally when it sees a resource type without the
correcsponding CRD being installed yet, so we need to wait for
cert-manager to be installed before installing CAPI.  Then we need to
wait for all that to be working before creating the Kubernetes cluster.
Then I discovered (again) the vcluster PVC is not cleaned up.
  • Loading branch information
spjmurray committed Jun 21, 2024
1 parent 981b3c8 commit cab8ce7
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 46 deletions.
4 changes: 2 additions & 2 deletions charts/kubernetes/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ description: A Helm chart for deploying Unikorn Kubernetes Service

type: application

version: v0.2.19
appVersion: v0.2.19
version: v0.2.20
appVersion: v0.2.20

icon: https://raw.githubusercontent.com/unikorn-cloud/assets/main/images/logos/dark-on-light/icon.png

Expand Down
5 changes: 5 additions & 0 deletions charts/kubernetes/templates/applications.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ spec:
parameters:
- name: hostpathMapper.enabled
value: 'true'
- name: autoDeletePersistentVolumeClaims
value: 'true'
# This will be valid from v0.20 onward
#- name: controlPlane.statefulSet.persistence.volumeClaim.retentionPolicy
# value: Delete
---
apiVersion: unikorn-cloud.org/v1alpha1
kind: HelmApplication
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ require (
github.com/prometheus/client_golang v1.19.0
github.com/spdx/tools-golang v0.5.3
github.com/spf13/pflag v1.0.5
github.com/unikorn-cloud/core v0.1.49
github.com/unikorn-cloud/core v0.1.52
github.com/unikorn-cloud/identity v0.2.11
github.com/unikorn-cloud/rbac v0.1.1
github.com/unikorn-cloud/region v0.1.9
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/unikorn-cloud/core v0.1.49 h1:ahAxrzvBnBICi+qN/AmTqKRJHpxl958gKVfBO3lz4G8=
github.com/unikorn-cloud/core v0.1.49/go.mod h1:cP39UQN7aSmsfjQuSMsworI4oBIwx4oA4u20CbPpfZw=
github.com/unikorn-cloud/core v0.1.52 h1:J8C8MBT3vYa5LUGHoWgzJ+UMPKcTtm9BfKgVKkG3f3w=
github.com/unikorn-cloud/core v0.1.52/go.mod h1:cP39UQN7aSmsfjQuSMsworI4oBIwx4oA4u20CbPpfZw=
github.com/unikorn-cloud/identity v0.2.11 h1:q6mkJ3qTRjwhlvLS9Jv0I4wlJhnsbJZHu2rbNdnXBYk=
github.com/unikorn-cloud/identity v0.2.11/go.mod h1:4KHNdHiIKpKERD0slunDDXhdC59M7eiN+Y1wSfHbQwQ=
github.com/unikorn-cloud/rbac v0.1.1 h1:5QB3YzwG0FaH80FirdwZLm8hwsxLN0iPCY9i1VVZjXY=
Expand Down
32 changes: 32 additions & 0 deletions pkg/provisioners/managers/cluster/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,40 @@ func (p *Provisioner) getProvisioner(ctx context.Context) (provisioners.Provisio
return provisioner, nil
}

// managerReady gates cluster creation on the manager being up and ready.
// Due to https://github.com/argoproj/argo-cd/issues/18041 Argo will break
// quite spectacularly if you try to install an application when the requisite
// CRDs are not present yet. As a result we need to provision the implicit
// manager serially, and that takes a long time. So long the request times
// out, so we essentially have to defer cluster creation until we know the
// manager is working and Argo isn't going to fail.
func (p *Provisioner) managerReady(ctx context.Context) error {
cli := coreclient.StaticClientFromContext(ctx)

manager := &unikornv1.ClusterManager{}

if err := cli.Get(ctx, client.ObjectKey{Namespace: p.cluster.Namespace, Name: p.cluster.Spec.ClusterManagerID}, manager); err != nil {
return err
}

condition, err := manager.StatusConditionRead(unikornv1core.ConditionAvailable)
if err != nil {
return err
}

if condition.Reason != unikornv1core.ConditionReasonProvisioned {
return provisioners.ErrYield
}

return nil
}

// Provision implements the Provision interface.
func (p *Provisioner) Provision(ctx context.Context) error {
if err := p.managerReady(ctx); err != nil {
return err
}

provisioner, err := p.getProvisioner(ctx)
if err != nil {
return err
Expand Down
5 changes: 3 additions & 2 deletions pkg/provisioners/managers/clustermanager/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
unikornv1core "github.com/unikorn-cloud/core/pkg/apis/unikorn/v1alpha1"
coreclient "github.com/unikorn-cloud/core/pkg/client"
"github.com/unikorn-cloud/core/pkg/provisioners"
"github.com/unikorn-cloud/core/pkg/provisioners/concurrent"
"github.com/unikorn-cloud/core/pkg/provisioners/remotecluster"
"github.com/unikorn-cloud/core/pkg/provisioners/serial"
unikornv1 "github.com/unikorn-cloud/kubernetes/pkg/apis/unikorn/v1alpha1"
Expand Down Expand Up @@ -122,7 +121,9 @@ func (p *Provisioner) getClusterManagerProvisioner() provisioners.Provisioner {

remoteClusterManager := remotecluster.New(vcluster.NewRemoteCluster(p.clusterManager.Namespace, p.clusterManager.Name, &p.clusterManager), true)

clusterAPIProvisioner := concurrent.New("cluster-api",
// **** sake https://github.com/argoproj/argo-cd/issues/18041
// This should be a concurrent provision, but alas no.
clusterAPIProvisioner := serial.New("cluster-api",
certmanager.New(apps.certManager),
clusterapi.New(apps.clusterAPI),
)
Expand Down
39 changes: 0 additions & 39 deletions pkg/server/handler/clustermanager/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"github.com/unikorn-cloud/core/pkg/server/conversion"
"github.com/unikorn-cloud/core/pkg/server/errors"
"github.com/unikorn-cloud/core/pkg/util"
"github.com/unikorn-cloud/core/pkg/util/retry"
unikornv1 "github.com/unikorn-cloud/kubernetes/pkg/apis/unikorn/v1alpha1"
"github.com/unikorn-cloud/kubernetes/pkg/openapi"
"github.com/unikorn-cloud/kubernetes/pkg/server/handler/applicationbundle"
Expand Down Expand Up @@ -57,19 +56,6 @@ func NewClient(client client.Client) *Client {
}
}

var (
// ErrResourceDeleting is raised when the resource is being deleted.
ErrResourceDeleting = goerrors.New("resource is being deleted")

// ErrNamespaceUnset is raised when the namespace hasn't been created
// yet.
ErrNamespaceUnset = goerrors.New("resource namespace is unset")

// ErrApplicationBundle is raised when no suitable application
// bundle is found.
ErrApplicationBundle = goerrors.New("no application bundle found")
)

// CreateImplicit is called when a cluster creation call is made and a control plane is not specified.
func (c *Client) CreateImplicit(ctx context.Context, organizationID, projectID string) (*unikornv1.ClusterManager, error) {
log := log.FromContext(ctx)
Expand Down Expand Up @@ -108,31 +94,6 @@ func (c *Client) CreateImplicit(ctx context.Context, organizationID, projectID s
return nil, err
}

waitCtx, cancel := context.WithCancel(ctx)
defer cancel()

// Allow a grace period for the project to become active to avoid client
// errors and retries. The namespace creation should be ostensibly instant
// and likewise show up due to non-blocking yields.
callback := func() error {
if _, err := c.get(waitCtx, resource.Namespace, resource.Name); err != nil {
// Short cut deleting errors.
if goerrors.Is(err, ErrResourceDeleting) {
cancel()

return nil
}

return err
}

return nil
}

if err := retry.Forever().DoWithContext(waitCtx, callback); err != nil {
return nil, err
}

return resource, nil
}

Expand Down

0 comments on commit cab8ce7

Please sign in to comment.