Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TLS certificates API support #170

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions cmd/fastly-exporter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ func main() {
serviceBlocklist stringslice
metricAllowlist stringslice
metricBlocklist stringslice
certificateRefresh time.Duration
datacenterRefresh time.Duration
productRefresh time.Duration
serviceRefresh time.Duration
Expand All @@ -62,6 +63,7 @@ func main() {
fs.Var(&serviceBlocklist, "service-blocklist", "if set, don't include services whose names match this regex (repeatable)")
fs.Var(&metricAllowlist, "metric-allowlist", "if set, only export metrics whose names match this regex (repeatable)")
fs.Var(&metricBlocklist, "metric-blocklist", "if set, don't export metrics whose names match this regex (repeatable)")
fs.DurationVar(&certificateRefresh, "certificate-refresh", 10*time.Minute, "how often to poll api.fastly.com for updated certificates metadata (10m–24h)")
fs.DurationVar(&datacenterRefresh, "datacenter-refresh", 10*time.Minute, "how often to poll api.fastly.com for updated datacenter metadata (10m–1h)")
fs.DurationVar(&productRefresh, "product-refresh", 10*time.Minute, "how often to poll api.fastly.com for updated product metadata (10m–24h)")
fs.DurationVar(&serviceRefresh, "service-refresh", 1*time.Minute, "how often to poll api.fastly.com for updated service metadata (15s–10m)")
Expand Down Expand Up @@ -124,6 +126,14 @@ func main() {
})

{
if certificateRefresh < 10*time.Minute {
level.Warn(logger).Log("msg", "-certificate-refresh cannot be shorter than 10m; setting it to 10m")
certificateRefresh = 10 * time.Minute
}
if certificateRefresh > 24*time.Hour {
level.Warn(logger).Log("msg", "-certificaate-refresh cannot be longer than 24h; setting it to 24h")
certificateRefresh = 24 * time.Hour
}
if datacenterRefresh < 10*time.Minute {
level.Warn(logger).Log("msg", "-datacenter-refresh cannot be shorter than 10m; setting it to 10m")
datacenterRefresh = 10 * time.Minute
Expand Down Expand Up @@ -269,6 +279,11 @@ func main() {
serviceCache = api.NewServiceCache(apiClient, token, serviceCacheOptions...)
}

var certificateCache *api.CertificateCache
{
enabled := !metricNameFilter.Blocked(prometheus.BuildFQName(namespace, deprecatedSubsystem, "cert_expiry_timestamp_seconds"))
certificateCache = api.NewCertificateCache(apiClient, token, enabled)
}
var datacenterCache *api.DatacenterCache
{
enabled := !metricNameFilter.Blocked(prometheus.BuildFQName(namespace, deprecatedSubsystem, "datacenter_info"))
Expand All @@ -288,6 +303,14 @@ func main() {
}
return nil
})
if certificateCache.Enabled() {
g.Go(func() error {
if err := certificateCache.Refresh(context.Background()); err != nil {
level.Warn(logger).Log("during", "initial fetch of certificates", "err", err, "msg", "certificate labels unavailable, will retry")
}
return nil
})
}
if datacenterCache.Enabled() {
g.Go(func() error {
if err := datacenterCache.Refresh(context.Background()); err != nil {
Expand All @@ -307,6 +330,15 @@ func main() {
}

var defaultGatherers prometheus.Gatherers
if certificateCache.Enabled() {
certs, err := certificateCache.Gatherer(namespace, deprecatedSubsystem)
if err != nil {
level.Error(apiLogger).Log("during", "create certificate gatherer", "err", err)
os.Exit(1)
}
defaultGatherers = append(defaultGatherers, certs)
}

if datacenterCache.Enabled() {
dcs, err := datacenterCache.Gatherer(namespace, deprecatedSubsystem)
if err != nil {
Expand Down Expand Up @@ -351,6 +383,31 @@ func main() {
}

var g run.Group
// only setup the ticker if the certificateCache is enabled.
if certificateCache.Enabled() {

// Every certificateRefresh, ask the api.CertificateCache to refresh
// metadata from the api.fastly.com/tls/certificates endpoint.
var (
ctx, cancel = context.WithCancel(context.Background())
ticker = time.NewTicker(certificateRefresh)
)
g.Add(func() error {
for {
select {
case <-ticker.C:
if err := certificateCache.Refresh(ctx); err != nil {
level.Warn(apiLogger).Log("during", "certificate refresh", "err", err, "msg", "the certificate info metrics may be stale")
}
case <-ctx.Done():
return ctx.Err()
}
}
}, func(error) {
ticker.Stop()
cancel()
})
}
// only setup the ticker if the datacenterCache is enabled.
if datacenterCache.Enabled() {

Expand Down
154 changes: 154 additions & 0 deletions pkg/api/certificate_cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package api

import (
"context"
"encoding/json"
"fmt"
"net/http"
"sort"
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
)

// maxCertificatesPageSize is the maximum amount of results that can be requested
// from the api.fastly.com/tls/certificates endpoint.
const maxCertificatesPageSize = 1000

type Certificates struct {
Certificate []Certificate `json:"data"`
}

type Certificate struct {
Attributes Attributes `json:"attributes"`
Id string `json:"id"`
}

type Attributes struct {
CN string `json:"issued_to"`
Name string `json:"name"`
Issuer string `json:"issuer"`
Not_after string `json:"not_after"`
SN string `json:"serial_number"`
}

// CertificateCache polls api.fastly.com/tls/certificates and maintains a local cache
// of the returned metadata. That information is exposed as Prometheus metrics.
type CertificateCache struct {
client HTTPClient
token string
enabled bool

mtx sync.Mutex
certs Certificates
}

// NewCertificateCache returns an empty cache of certificates metadata. Use the
// Refresh method to update the cache.
func NewCertificateCache(client HTTPClient, token string, enabled bool) *CertificateCache {
return &CertificateCache{
client: client,
token: token,
enabled: enabled,
}
}

// Refresh the cache with metadata retreived from the Fastly API.
func (c *CertificateCache) Refresh(ctx context.Context) error {
if !c.enabled {
return nil
}

// TODO: Implement additional requests for next pages if there are more
// TLS certificates than maxCertificatesPageSize
var uri string = fmt.Sprintf("https://api.fastly.com/tls/certificates?page%%5Bnumber%%5D=1&page%%5Bsize%%5D=%d&sort=created_at", maxCertificatesPageSize)

req, err := http.NewRequestWithContext(ctx, "GET", uri, nil)
if err != nil {
return fmt.Errorf("error constructing API certificates request: %w", err)
}

req.Header.Set("Fastly-Key", c.token)
req.Header.Set("Accept", "application/json")
resp, err := c.client.Do(req)
if err != nil {
return fmt.Errorf("error executing API certificates request: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return NewError(resp)
}

var response Certificates
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
return fmt.Errorf("error decoding API certificates response: %w", err)
}

sort.Slice(response.Certificate, func(i, j int) bool {
return response.Certificate[i].Attributes.CN < response.Certificate[j].Attributes.CN
})

c.mtx.Lock()
defer c.mtx.Unlock()
c.certs = response

return nil
}

// Certificates returns a copy of the currently cached certificates.
func (c *CertificateCache) Certificates() Certificates {
c.mtx.Lock()
defer c.mtx.Unlock()
certs := c.certs
return certs
}

// Gatherer returns a Prometheus gatherer which will yield current metadata
// about Fastly certificates as labels on a gauge metric.
func (c *CertificateCache) Gatherer(namespace, subsystem string) (prometheus.Gatherer, error) {
var (
fqName = prometheus.BuildFQName(namespace, subsystem, "cert_expiry_timestamp_seconds")
help = "Metadata about Fastly certificates."
labels = []string{"cn", "name", "id", "issuer", "sn"}
constLabels = prometheus.Labels{}
desc = prometheus.NewDesc(fqName, help, labels, constLabels)
collector = &certificateCollector{desc: desc, cache: c}
)

registry := prometheus.NewRegistry()
if err := registry.Register(collector); err != nil {
return nil, fmt.Errorf("registering certificate collector: %w", err)
}

return registry, nil
}

// Enabled returns true if the CertificateCache is enabled
func (c *CertificateCache) Enabled() bool {
return c.enabled
}

type certificateCollector struct {
desc *prometheus.Desc
cache *CertificateCache
}

func (c *certificateCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.desc
}

func (c *certificateCollector) Collect(ch chan<- prometheus.Metric) {
for _, cert := range c.cache.Certificates().Certificate {
format := "2006-01-02T15:04:05.000Z"
t, _ := time.Parse(format, cert.Attributes.Not_after)
var (
desc = c.desc
valueType = prometheus.GaugeValue
value = float64(t.Unix())
labelValues = []string{cert.Attributes.CN, cert.Attributes.Name, cert.Id, cert.Attributes.Issuer, cert.Attributes.SN}
)
ch <- prometheus.MustNewConstMetric(desc, valueType, value, labelValues...)
}
}
Loading