Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/pyspark/sql/connect/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ def pyspark_types_to_proto_types(data_type: DataType) -> pb2.DataType:
ret = pb2.DataType()
if isinstance(data_type, NullType):
ret.null.CopyFrom(pb2.DataType.NULL())
elif isinstance(data_type, CharType):
ret.char.length = data_type.length
elif isinstance(data_type, VarcharType):
ret.var_char.length = data_type.length
elif isinstance(data_type, StringType):
ret.string.collation = data_type.collation
elif isinstance(data_type, BooleanType):
Expand Down
44 changes: 44 additions & 0 deletions python/pyspark/sql/tests/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,50 @@ def my_udf(input_val):
result_type = df_result.schema["result"].dataType
self.assertEqual(result_type, StringType("fr"))

def test_udf_with_char_varchar_return_type(self):
(char_type, char_value) = ("char(10)", "a")
(varchar_type, varchar_value) = ("varchar(8)", "a")
(array_with_char_type, array_with_char_type_value) = ("array<char(5)>", ["a", "b"])
(array_with_varchar_type, array_with_varchar_value) = ("array<varchar(12)>", ["a", "b"])
(map_type, map_value) = (f"map<{char_type}, {varchar_type}>", {"a": "b"})
(struct_type, struct_value) = (
f"struct<f1: {char_type}, f2: {varchar_type}>",
{"f1": "a", "f2": "b"},
)

pairs = [
(char_type, char_value),
(varchar_type, varchar_value),
(array_with_char_type, array_with_char_type_value),
(array_with_varchar_type, array_with_varchar_value),
(map_type, map_value),
(struct_type, struct_value),
(
f"struct<f1: {array_with_char_type}, f2: {array_with_varchar_type}, "
f"f3: {map_type}>",
f"{{'f1': {array_with_char_type_value}, 'f2': {array_with_varchar_value}, "
f"'f3': {map_value}}}",
),
(
f"map<{array_with_char_type}, {array_with_varchar_type}>",
f"{{{array_with_char_type_value}: {array_with_varchar_value}}}",
),
(f"array<{struct_type}>", [struct_value, struct_value]),
]

for return_type, return_value in pairs:
with self.assertRaisesRegex(
Exception,
"(Please use a different output data type for your UDF or DataFrame|"
"Invalid return type with Arrow-optimized Python UDF)",
):

@udf(return_type)
def my_udf():
return return_value

self.spark.range(1).select(my_udf().alias("result")).show()


class UDFTests(BaseUDFTestsMixin, ReusedSQLTestCase):
@classmethod
Expand Down