/
renewer.go
168 lines (150 loc) · 5.04 KB
/
renewer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
package ca
import (
"context"
"sync"
"time"
"github.com/docker/go-events"
"github.com/docker/swarmkit/connectionbroker"
"github.com/docker/swarmkit/log"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// RenewTLSExponentialBackoff sets the exponential backoff when trying to renew TLS certificates that have expired
var RenewTLSExponentialBackoff = events.ExponentialBackoffConfig{
Base: time.Second * 5,
Factor: time.Second * 5,
Max: 1 * time.Hour,
}
// TLSRenewer handles renewing TLS certificates, either automatically or upon
// request.
type TLSRenewer struct {
mu sync.Mutex
s *SecurityConfig
connBroker *connectionbroker.Broker
renew chan struct{}
expectedRole string
rootPaths CertPaths
}
// NewTLSRenewer creates a new TLS renewer. It must be started with Start.
func NewTLSRenewer(s *SecurityConfig, connBroker *connectionbroker.Broker, rootPaths CertPaths) *TLSRenewer {
return &TLSRenewer{
s: s,
connBroker: connBroker,
renew: make(chan struct{}, 1),
rootPaths: rootPaths,
}
}
// SetExpectedRole sets the expected role. If a renewal is forced, and the role
// doesn't match this expectation, renewal will be retried with exponential
// backoff until it does match.
func (t *TLSRenewer) SetExpectedRole(role string) {
t.mu.Lock()
t.expectedRole = role
t.mu.Unlock()
}
// Renew causes the TLSRenewer to renew the certificate (nearly) right away,
// instead of waiting for the next automatic renewal.
func (t *TLSRenewer) Renew() {
select {
case t.renew <- struct{}{}:
default:
}
}
// Start will continuously monitor for the necessity of renewing the local certificates, either by
// issuing them locally if key-material is available, or requesting them from a remote CA.
func (t *TLSRenewer) Start(ctx context.Context) <-chan CertificateUpdate {
updates := make(chan CertificateUpdate)
go func() {
var (
retry time.Duration
forceRetry bool
)
expBackoff := events.NewExponentialBackoff(RenewTLSExponentialBackoff)
defer close(updates)
for {
ctx = log.WithModule(ctx, "tls")
log := log.G(ctx).WithFields(logrus.Fields{
"node.id": t.s.ClientTLSCreds.NodeID(),
"node.role": t.s.ClientTLSCreds.Role(),
})
// Our starting default will be 5 minutes
retry = 5 * time.Minute
// Since the expiration of the certificate is managed remotely we should update our
// retry timer on every iteration of this loop.
// Retrieve the current certificate expiration information.
validFrom, validUntil, err := readCertValidity(t.s.KeyReader())
if err != nil {
// We failed to read the expiration, let's stick with the starting default
log.Errorf("failed to read the expiration of the TLS certificate in: %s", t.s.KeyReader().Target())
select {
case updates <- CertificateUpdate{Err: errors.New("failed to read certificate expiration")}:
case <-ctx.Done():
log.Info("shutting down certificate renewal routine")
return
}
} else {
// If we have an expired certificate, try to renew immediately: the hope that this is a temporary clock skew, or
// we can issue our own TLS certs.
if validUntil.Before(time.Now()) {
log.Warn("the current TLS certificate is expired, so an attempt to renew it will be made immediately")
// retry immediately(ish) with exponential backoff
retry = expBackoff.Proceed(nil)
} else if forceRetry {
// A forced renewal was requested, but did not succeed yet.
// retry immediately(ish) with exponential backoff
retry = expBackoff.Proceed(nil)
} else {
// Random retry time between 50% and 80% of the total time to expiration
retry = calculateRandomExpiry(validFrom, validUntil)
}
}
log.WithFields(logrus.Fields{
"time": time.Now().Add(retry),
}).Debugf("next certificate renewal scheduled for %v from now", retry)
select {
case <-time.After(retry):
log.Info("renewing certificate")
case <-t.renew:
forceRetry = true
log.Info("forced certificate renewal")
// Pause briefly before attempting the renewal,
// to give the CA a chance to reconcile the
// desired role.
select {
case <-time.After(500 * time.Millisecond):
case <-ctx.Done():
log.Info("shutting down certificate renewal routine")
return
}
case <-ctx.Done():
log.Info("shutting down certificate renewal routine")
return
}
// ignore errors - it will just try again later
var certUpdate CertificateUpdate
if err := RenewTLSConfigNow(ctx, t.s, t.connBroker, t.rootPaths); err != nil {
certUpdate.Err = err
expBackoff.Failure(nil, nil)
} else {
newRole := t.s.ClientTLSCreds.Role()
t.mu.Lock()
expectedRole := t.expectedRole
t.mu.Unlock()
if expectedRole != "" && expectedRole != newRole {
expBackoff.Failure(nil, nil)
continue
}
certUpdate.Role = newRole
expBackoff.Success(nil)
forceRetry = false
}
select {
case updates <- certUpdate:
case <-ctx.Done():
log.Info("shutting down certificate renewal routine")
return
}
}
}()
return updates
}