From 784a1314a64d05d55c9618748c21b8945235f3fe Mon Sep 17 00:00:00 2001 From: Oskar Szwajkowski Date: Wed, 14 Aug 2024 15:28:05 +0200 Subject: [PATCH] Use exponential backoff retry strategy for glue client Default strategy was not good enough for handling concurrent modifications, which led to flaky behavior in concurrent modifications tests like TestIcebergGlueCatalogConnectorSmokeTest#testDeleteRowsConcurrently --- .../plugin/hive/metastore/glue/GlueMetastoreModule.java | 4 ++++ .../trino/plugin/hive/metastore/glue/v1/GlueClientUtil.java | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreModule.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreModule.java index c8b439385d28..319b2c74724e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreModule.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueMetastoreModule.java @@ -36,6 +36,7 @@ import software.amazon.awssdk.http.apache.ProxyConfiguration; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain; +import software.amazon.awssdk.retries.api.BackoffStrategy; import software.amazon.awssdk.services.glue.GlueClient; import software.amazon.awssdk.services.glue.GlueClientBuilder; import software.amazon.awssdk.services.glue.model.ConcurrentModificationException; @@ -131,6 +132,9 @@ public static GlueClient createGlueClient(GlueHiveMetastoreConfig config, OpenTe .build().newExecutionInterceptor()) .retryStrategy(retryBuilder -> retryBuilder .retryOnException(throwable -> throwable instanceof ConcurrentModificationException) + .backoffStrategy(BackoffStrategy.exponentialDelay( + java.time.Duration.ofMillis(20), + java.time.Duration.ofMillis(1500))) .maxAttempts(config.getMaxGlueErrorRetries()))); Optional staticCredentialsProvider = getStaticCredentialsProvider(config); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueClientUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueClientUtil.java index 9a795f8cb765..1172ea9ea4f7 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueClientUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueClientUtil.java @@ -18,8 +18,10 @@ import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.handlers.RequestHandler2; import com.amazonaws.metrics.RequestMetricCollector; +import com.amazonaws.retry.PredefinedBackoffStrategies.ExponentialBackoffStrategy; import com.amazonaws.retry.PredefinedRetryPolicies; import com.amazonaws.retry.RetryPolicy; +import com.amazonaws.retry.RetryPolicy.BackoffStrategy; import com.amazonaws.retry.RetryPolicy.RetryCondition; import com.amazonaws.services.glue.AWSGlueAsync; import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; @@ -45,11 +47,12 @@ public static AWSGlueAsync createAsyncGlueClient( RetryCondition customRetryCondition = (requestContext, exception, retriesAttempted) -> defaultRetryPolicy.getRetryCondition().shouldRetry(requestContext, exception, retriesAttempted) || exception instanceof ConcurrentModificationException; + BackoffStrategy customBackoffStrategy = new ExponentialBackoffStrategy(20, 1500); RetryPolicy glueRetryPolicy = RetryPolicy.builder() .withRetryMode(defaultRetryPolicy.getRetryMode()) .withRetryCondition(customRetryCondition) - .withBackoffStrategy(defaultRetryPolicy.getBackoffStrategy()) + .withBackoffStrategy(customBackoffStrategy) .withFastFailRateLimiting(defaultRetryPolicy.isFastFailRateLimiting()) .withMaxErrorRetry(config.getMaxGlueErrorRetries()) .build();