34
34
import jakarta .enterprise .inject .Instance ;
35
35
import jakarta .inject .Inject ;
36
36
import lombok .Getter ;
37
+ import lombok .ToString ;
37
38
import org .eclipse .microprofile .config .inject .ConfigProperty ;
38
39
import org .eclipse .microprofile .context .ManagedExecutor ;
39
40
import org .eclipse .microprofile .context .ThreadContext ;
@@ -76,7 +77,7 @@ public class KafkaSqlUpgraderManager {
76
77
* <p>
77
78
* However, since we have heartbeat messages, this time does not have to be too long.
78
79
*/
79
- @ ConfigProperty (name = "registry.kafkasql.upgrade-lock-timeout" , defaultValue = "10s " )
80
+ @ ConfigProperty (name = "registry.kafkasql.upgrade-lock-timeout" , defaultValue = "80s " )
80
81
@ Info (category = "store" , description = "How long should KafkaSQL upgrader manager hold the lock before it's assumed to have failed. " +
81
82
"There is a tradeoff between giving the upgrade process enough time and recovering from a failed upgrade. " +
82
83
"You may need to increase this value if your Kafka cluster is very busy." , availableSince = "2.5.9.Final" )
@@ -128,6 +129,8 @@ public class KafkaSqlUpgraderManager {
128
129
129
130
private long sequence ;
130
131
132
+ private Instant kafkaClock ;
133
+
131
134
private volatile boolean localTryLocked ;
132
135
private Instant localTryLockedTimestamp ;
133
136
private volatile boolean upgrading ;
@@ -165,8 +168,9 @@ public synchronized void init() {
165
168
// We need to keep in mind that multiple nodes might start at the same time.
166
169
// Upgrader runs only once on startup, so this can be set once.
167
170
localUpgraderUUID = UUID .randomUUID ().toString ();
171
+ log .debug ("UUID of this upgrader is {}" , localUpgraderUUID );
168
172
169
- waitHeartbeatEmitter = new WaitHeartbeatEmitter (scale (lockTimeout , 1.1f ), submitter , log , threadContext );
173
+ waitHeartbeatEmitter = new WaitHeartbeatEmitter (scale (lockTimeout , 0.26f ), submitter , log , threadContext );
170
174
171
175
// Produce a bootstrap message to know when we are up-to-date with the topic. We don't know the version yet.
172
176
submitter .send (UpgraderKey .create (true ), UpgraderValue .create (ActionType .UPGRADE_BOOTSTRAP , localUpgraderUUID , null ));
@@ -192,9 +196,12 @@ public synchronized void read(Instant currentTimestamp, MessageKey key, MessageV
192
196
}
193
197
194
198
if (key instanceof UpgraderKey ) {
199
+ log .debug ("Reading UpgraderKey {}" , key );
200
+ log .debug ("Reading UpgraderValue {} {}" , ((UpgraderValue ) value ).getAction (), value );
195
201
// Update our lock map
196
202
var upgraderValue = (UpgraderValue ) value ;
197
- updateLockMap (currentTimestamp , upgraderValue );
203
+ processMessage (currentTimestamp , upgraderValue );
204
+ log .debug ("Lock map state: {}" , lockMap );
198
205
}
199
206
}
200
207
@@ -218,8 +225,10 @@ public synchronized void read(Instant currentTimestamp, MessageKey key, MessageV
218
225
var slip = Duration .between (initTimestamp , currentTimestamp ).abs ().toMillis ();
219
226
if (slip > scale (lockTimeout , 0.25f ).toMillis ()) {
220
227
log .warn ("We detected a significant time difference ({} ms) between a moment when a Kafka message is produced (local time), " +
221
- "and it's creation timestamp reported by Kafka at the moment it is consumed. If this causes issues during KafkaSQL storage upgrade, " +
222
- "consider increasing 'registry.kafkasql.upgrade-lock-timeout' config value (currently {} ms)." , slip , lockTimeout );
228
+ "and it's creation timestamp reported by Kafka at the moment it is consumed. " +
229
+ "This might happen when Kafka is configured with message.timestamp.type=LogAppendTime. " +
230
+ "If this causes issues during KafkaSQL storage upgrade, " +
231
+ "consider increasing 'registry.kafkasql.upgrade-lock-timeout' config value (currently {})." , slip , lockTimeout );
223
232
}
224
233
switchState (State .WAIT );
225
234
}
@@ -256,9 +265,8 @@ public synchronized void read(Instant currentTimestamp, MessageKey key, MessageV
256
265
// Nobody tried to upgrade yet, or failed, we should try.
257
266
if (localTryLocked ) {
258
267
// We have tried to lock, eventually it might be our turn to go, but we have to check for our own timeout
259
- var now = Instant .now ();
260
- if (lockMap .get (localUpgraderUUID ).latestLockTimestamp != null && now .isAfter (lockMap .get (localUpgraderUUID ).latestLockTimestamp .plus (lockTimeout )) &&
261
- localTryLockedTimestamp != null && now .isAfter (localTryLockedTimestamp .plus (scale (lockTimeout , 1.5f )))) { // We need to prevent loop here, so we keep a local timestamp as well
268
+ if (lockMap .get (localUpgraderUUID ).latestLockTimestamp != null && kafkaClock .isAfter (lockMap .get (localUpgraderUUID ).latestLockTimestamp .plus (lockTimeout )) &&
269
+ localTryLockedTimestamp != null && Instant .now ().isAfter (localTryLockedTimestamp .plus (scale (lockTimeout , 1.5f )))) { // We need to prevent loop here, so we keep a local timestamp as well
262
270
// Our own lock has timed out, we can try again
263
271
localTryLocked = false ;
264
272
switchState (State .TRY_LOCK );
@@ -282,11 +290,10 @@ public synchronized void read(Instant currentTimestamp, MessageKey key, MessageV
282
290
// We've got the lock, but we may have sent an unlock message
283
291
if (localTryLocked ) {
284
292
// We got the lock, but first check if we have enough time.
285
- var now = Instant .now ();
286
- if (now .isAfter (lockMap .get (localUpgraderUUID ).latestLockTimestamp .plus (scale (lockTimeout , 0.5f )))) {
293
+ if (kafkaClock .isAfter (lockMap .get (localUpgraderUUID ).latestLockTimestamp .plus (scale (lockTimeout , 0.5f )))) {
287
294
// We should unlock and wait, then try again
288
295
log .warn ("We've got the lock but we don't have enough time ({} ms remaining). Unlocking." ,
289
- Duration .between (lockMap .get (localUpgraderUUID ).latestLockTimestamp , now ).toMillis ());
296
+ Duration .between (lockMap .get (localUpgraderUUID ).latestLockTimestamp , kafkaClock ).toMillis ());
290
297
submitter .send (UpgraderKey .create (true ), UpgraderValue .create (ActionType .UPGRADE_ABORT_AND_UNLOCK , localUpgraderUUID , targetVersion ));
291
298
localTryLocked = false ;
292
299
// No need to send heartbeat, since we're expecting to read the unlock message
@@ -488,14 +495,46 @@ private LockRecord computeActiveLock() {
488
495
var r = lockMap .values ().stream ()
489
496
.filter (rr -> rr .targetVersion != null &&
490
497
rr .targetVersion == targetVersion &&
491
- rr .tryLocked &&
492
- !rr .isTimedOut (Instant .now (), lockTimeout ))
498
+ rr .tryLocked )
499
+ .filter (rr -> {
500
+ var to = rr .isTimedOut (kafkaClock , lockTimeout );
501
+ if (to ) {
502
+ log .debug ("Lock of upgrader {} has timed out." , rr .upgraderUUID );
503
+ }
504
+ return !to ;
505
+ })
493
506
.min (Comparator .comparingLong (rr -> rr .tryLockSequence ));
494
507
return r .orElse (null );
495
508
}
496
509
497
510
498
- private void updateLockMap (Instant timestamp , UpgraderValue value ) {
511
+ private void processMessage (Instant timestamp , UpgraderValue value ) {
512
+ /*
513
+ * There are two main ways how the Kafka message gets a timestamp:
514
+ * - By the broker when a message is put into the log (LogAppendTime), or
515
+ * - By a client/producer when a message is created (CreateTime)
516
+ * based on topic configuration https://kafka.apache.org/documentation/#log.message.timestamp.type
517
+ *
518
+ * The first case is better for synchronisation, and we try to set it by default in
519
+ * io.apicurio.registry.storage.impl.kafkasql.KafkaSqlRegistryStorage.autoCreateTopics.
520
+ *
521
+ * However, we have to handle the second case as well, so we have to ensure that:
522
+ * - the kafka clock does not go back in time, and
523
+ * - we use big enough lock timeout to handle potentially large round-trip times.
524
+ */
525
+ if (kafkaClock == null || timestamp .isAfter (kafkaClock )) {
526
+ kafkaClock = timestamp ;
527
+ } else {
528
+ var slip = Duration .between (timestamp , kafkaClock ).abs ().toMillis ();
529
+ if (slip > scale (lockTimeout , 0.25f ).toMillis ()) {
530
+ log .warn ("Ignoring significantly antedated timestamp {}, current kafka clock is {}. " +
531
+ "This might happen when Kafka is configured with message.timestamp.type=CreateTime. " +
532
+ "If this causes issues during KafkaSQL storage upgrade, " +
533
+ "consider increasing 'registry.kafkasql.upgrade-lock-timeout' config value (currently {})." , timestamp , kafkaClock , lockTimeout );
534
+ } else {
535
+ log .debug ("Ignoring antedated timestamp {}, current kafka clock is {}." , timestamp , kafkaClock );
536
+ }
537
+ }
499
538
if (value .getUpgraderUUID () == null ) {
500
539
return ;
501
540
}
@@ -559,6 +598,7 @@ private enum State {
559
598
}
560
599
561
600
601
+ @ ToString
562
602
private static class LockRecord {
563
603
// UUID of the upgrader
564
604
String upgraderUUID ;
@@ -608,7 +648,7 @@ private UpgraderManagerHandle(KafkaSqlSubmitter submitter, String localUpgraderU
608
648
*/
609
649
public synchronized void heartbeat () {
610
650
var now = Instant .now ();
611
- if (lastHeartbeat == null || now .isAfter (lastHeartbeat .plus (scale (lockTimeout , 0.35f )))) {
651
+ if (lastHeartbeat == null || now .isAfter (lastHeartbeat .plus (scale (lockTimeout , 0.25f )))) {
612
652
log .debug ("Sending lock heartbeat." );
613
653
submitter .send (UpgraderKey .create (true ), UpgraderValue .create (ActionType .UPGRADE_LOCK_HEARTBEAT , localUpgraderUUID , null ));
614
654
lastHeartbeat = now ;
@@ -617,7 +657,7 @@ public synchronized void heartbeat() {
617
657
618
658
619
659
private synchronized boolean isTimedOut () {
620
- return Instant .now ().isAfter (lastHeartbeat .plus (scale (lockTimeout , 0.85f )));
660
+ return Instant .now ().isAfter (lastHeartbeat .plus (scale (lockTimeout , 0.75f )));
621
661
}
622
662
}
623
663
@@ -697,13 +737,13 @@ public static Duration scale(Duration original, float scale) {
697
737
698
738
/* This is the current lock timeout schematic:
699
739
*
700
- * Lock timeout: |------------------------------| 100% = 10s (default)
701
- * Wait heartbeat: | . | 110% = 11s
702
- * Lock heartbeat: | | . 35% = 3.5s
703
- * Too late to upgrade: | | . 50% = 5s
704
- * Upgrader timeout*: | | . 85% = 8.5s
740
+ * Lock timeout: |------------------------------| 100%
741
+ * Wait heartbeat: | | . 26%
742
+ * Lock heartbeat: | | . 25%
743
+ * Too late to upgrade: | | . 50%
744
+ * Upgrader timeout*: | | . 75%
705
745
*
706
746
* * This is the longest time upgrader can block without sending a heartbeat,
707
- * assuming that the heartbeat Kafka message is stored within ~15 % of lock timeout (1.5s by default) .
747
+ * assuming that the heartbeat Kafka message is stored within ~25 % of lock timeout.
708
748
*/
709
749
}
0 commit comments