apache
diff --git a/‎extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/HiveConnectorUtils.scala
Lines changed: 59 additions & 10 deletions b/‎extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/HiveConnectorUtils.scala
Lines changed: 59 additions & 10 deletions
@@ -18,19 +18,19 @@
 package org.apache.kyuubi.spark.connector.hive
 
 import java.lang.{Boolean => JBoolean, Long => JLong}
+import java.net.URI
 
 import scala.util.Try
 
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTablePartition}
 import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.execution.command.CommandUtils
-import org.apache.spark.sql.execution.command.CommandUtils.{calculateMultipleLocationSizes, calculateSingleLocationSize}
 import org.apache.spark.sql.execution.datasources.{PartitionDirectory, PartitionedFile}
 import org.apache.spark.sql.hive.execution.HiveFileFormat
 import org.apache.spark.sql.internal.SQLConf
@@ -82,7 +82,28 @@ object HiveConnectorUtils extends Logging {
       isSplitable: JBoolean,
       maxSplitBytes: JLong,
       partitionValues: InternalRow): Seq[PartitionedFile] =
-    Try { // SPARK-42821: 4.0.0-preview2
+    Try { // SPARK-42821, SPARK-51185: Spark 4.0
+      val fileStatusWithMetadataClz = DynClasses.builder()
+        .impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
+        .buildChecked()
+      DynMethods
+        .builder("splitFiles")
+        .impl(
+          "org.apache.spark.sql.execution.PartitionedFileUtil",
+          fileStatusWithMetadataClz,
+          classOf[Path],
+          classOf[Boolean],
+          classOf[Long],
+          classOf[InternalRow])
+        .buildChecked()
+        .invokeChecked[Seq[PartitionedFile]](
+          null,
+          file,
+          filePath,
+          isSplitable,
+          maxSplitBytes,
+          partitionValues)
+    }.recover { case _: Exception => // SPARK-42821: 4.0.0-preview2
       val fileStatusWithMetadataClz = DynClasses.builder()
         .impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
         .buildChecked()
@@ -192,19 +213,41 @@ object HiveConnectorUtils extends Logging {
       file.asInstanceOf[FileStatus].getPath
     }.get
 
+  private def calculateMultipleLocationSizes(
+      sparkSession: SparkSession,
+      tid: TableIdentifier,
+      paths: Seq[Option[URI]]): Seq[Long] = {
+
+    val sparkSessionClz = DynClasses.builder()
+      .impl("org.apache.spark.sql.classic.SparkSession") // SPARK-49700 (4.0.0)
+      .impl("org.apache.spark.sql.SparkSession")
+      .build()
+
+    val calculateMultipleLocationSizesMethod =
+      DynMethods.builder("calculateMultipleLocationSizes")
+        .impl(
+          CommandUtils.getClass,
+          sparkSessionClz,
+          classOf[TableIdentifier],
+          classOf[Seq[Option[URI]]])
+        .buildChecked(CommandUtils)
+
+    calculateMultipleLocationSizesMethod
+      .invokeChecked[Seq[Long]](sparkSession, tid, paths)
+  }
+
   def calculateTotalSize(
       spark: SparkSession,
       catalogTable: CatalogTable,
       hiveTableCatalog: HiveTableCatalog): (BigInt, Seq[CatalogTablePartition]) = {
     val sessionState = spark.sessionState
     val startTime = System.nanoTime()
     val (totalSize, newPartitions) = if (catalogTable.partitionColumnNames.isEmpty) {
-      (
-        calculateSingleLocationSize(
-          sessionState,
-          catalogTable.identifier,
-          catalogTable.storage.locationUri),
-        Seq())
+      val tableSize = CommandUtils.calculateSingleLocationSize(
+        sessionState,
+        catalogTable.identifier,
+        catalogTable.storage.locationUri)
+      (tableSize, Seq())
     } else {
       // Calculate table size as a sum of the visible partitions. See SPARK-21079
       val partitions = hiveTableCatalog.listPartitions(catalogTable.identifier)
@@ -402,7 +445,13 @@ object HiveConnectorUtils extends Logging {
     new StructType(newFields)
   }
 
-  def withSQLConf[T](pairs: (String, String)*)(f: => T): T = {
+  // This is a fork of Spark's withSQLConf, and we use a different name to avoid linkage
+  // issue on cross-version cases.
+  // For example, SPARK-46227(4.0.0) moves `withSQLConf` from SQLHelper to SQLConfHelper,
+  // classes that extend SQLConfHelper will prefer to linkage super class's method when
+  // compiling with Spark 4.0, then linkage error will happen when run the jar with lower
+  // Spark versions.
+  def withSparkSQLConf[T](pairs: (String, String)*)(f: => T): T = {
     val conf = SQLConf.get
     val (keys, values) = pairs.unzip
     val currentValues = keys.map { key =>