remove todo, tweak ci, typing adjustments

artemisart · artemisart · commit 2f840cddcb35 · 2024-02-04T17:51:41.000+01:00
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -34,21 +34,21 @@ jobs:
         run: poetry run pylint functional
       - name: black
         run: poetry run black --check --diff --color functional
-        if: always()
+        if: success() || failure()
       - name: Test with pytest
         run: poetry run pytest --cov=functional --cov-report=xml
-        if: always()
+        if: success() || failure()
       - name: mypy
         run: poetry run mypy --warn-unused-configs --check-untyped-defs --warn-redundant-casts --warn-unused-ignores --extra-checks functional
-        if: always()
+        if: success() || failure()
       - uses: tsuyoshicho/action-mypy@v4
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           reporter: github-pr-review
           level: warning
           execute_command: poetry run mypy --strict
           filter_mode: nofilter
-        if: matrix.python-version == '3.11' # run only on latest to avoid duplicate warnings
+        if: (success() || failure()) && matrix.python-version == '3.11' # run only on latest to avoid duplicate warnings
       - uses: codecov/codecov-action@v1
         with:
           file: ./coverage.xml
diff --git a/functional/io.py b/functional/io.py
@@ -5,13 +5,13 @@
 import gzip
 import io
 import lzma
-from os import PathLike
+from pathlib import Path
 from typing import Any, Optional
 
 from typing_extensions import TypeAlias
 
-# from typeshed
-StrOrBytesPath: TypeAlias = str | bytes | PathLike
+# adapted from typeshed
+StrOrBytesPath: TypeAlias = str | bytes | Path
 FileDescriptorOrPath: TypeAlias = int | StrOrBytesPath
 
 WRITE_MODE = "wt"
@@ -28,7 +28,7 @@ class ReusableFile:
     # pylint: disable=too-many-instance-attributes
     def __init__(
         self,
-        path: FileDescriptorOrPath,
+        path: StrOrBytesPath,
         delimiter: Optional[str] = None,
         mode: str = "r",
         buffering: int = -1,
@@ -91,7 +91,7 @@ class CompressedFile(ReusableFile):
     # pylint: disable=too-many-instance-attributes
     def __init__(
         self,
-        path: FileDescriptorOrPath,
+        path: StrOrBytesPath,
         delimiter: Optional[str] = None,
         mode: str = "rt",
         buffering: int = -1,
@@ -122,7 +122,7 @@ class GZFile(CompressedFile):
     # pylint: disable=too-many-instance-attributes
     def __init__(
         self,
-        path: FileDescriptorOrPath,
+        path: StrOrBytesPath,
         delimiter: Optional[str] = None,
         mode: str = "rt",
         buffering: int = -1,
@@ -177,7 +177,7 @@ class BZ2File(CompressedFile):
     # pylint: disable=too-many-instance-attributes
     def __init__(
         self,
-        path: FileDescriptorOrPath,
+        path: StrOrBytesPath,
         delimiter: Optional[str] = None,
         mode: str = "rt",
         buffering: int = -1,
@@ -299,7 +299,7 @@ def get_read_function(filename: FileDescriptorOrPath, disable_compression: bool)
 
 
 def universal_write_open(
-    path: FileDescriptorOrPath,
+    path: StrOrBytesPath,
     mode: str,
     buffering: int = -1,
     encoding: Optional[str] = None,
diff --git a/functional/pipeline.py b/functional/pipeline.py
@@ -3,6 +3,7 @@
 """
 
 from __future__ import annotations
+import builtins
 
 import collections
 import csv
@@ -23,6 +24,7 @@
     Optional,
     TypeVar,
     Union,
+    cast,
     overload,
 )
 
@@ -31,7 +33,7 @@
 
 from functional import transformations
 from functional.execution import ExecutionEngine, ExecutionStrategies
-from functional.io import WRITE_MODE, FileDescriptorOrPath, universal_write_open
+from functional.io import WRITE_MODE, StrOrBytesPath, universal_write_open
 from functional.lineage import Lineage
 from functional.util import (
     SupportsRichComparisonT,
@@ -173,7 +175,7 @@ def __nonzero__(self) -> bool:
         """
         return self.len() != 0
 
-    def __getitem__(self, item: int) -> T | Sequence:  # TODO
+    def __getitem__(self, item: int) -> T | Sequence:
         """
         Gets item at given index.
 
@@ -212,7 +214,7 @@ def __add__(self, other) -> Sequence[T]:
         else:
             return Sequence(self.sequence + other, no_wrap=self.no_wrap)
 
-    def _evaluate(self) -> Iterable[str]:
+    def _evaluate(self) -> Iterable:
         """
         Creates and returns an iterator which applies all the transformations in the lineage
 
@@ -1483,7 +1485,7 @@ def to_dict(
         ...
 
     def to_dict(
-        self: Sequence[tuple[U, V]], default: Optional[Callable[[], V]] = None
+        self: Sequence[tuple[U, V]], default: Callable[[], V] | V | None = None
     ) -> dict[U, V] | collections.defaultdict[U, V]:
         """
         Converts sequence of (Key, Value) pairs to a dictionary.
@@ -1505,7 +1507,7 @@ def to_dict(
             return dictionary
         else:
             return collections.defaultdict(
-                default if callable(default) else lambda: default, dictionary
+                default if callable(default) else lambda: cast(V, default), dictionary
             )
 
     @overload
@@ -1541,19 +1543,19 @@ def dict(
     # pylint: disable=too-many-locals
     def to_file(
         self,
-        path: FileDescriptorOrPath,
-        delimiter=None,
-        mode="wt",
-        buffering=-1,
-        encoding=None,
-        errors=None,
-        newline=None,
-        compresslevel=9,
-        format=None,
-        check=-1,
-        preset=None,
-        filters=None,
-        compression=None,
+        path: StrOrBytesPath,
+        delimiter: Optional[str] = None,
+        mode: str = "wt",
+        buffering: int = -1,
+        encoding: Optional[str] = None,
+        errors: Optional[str] = None,
+        newline: Optional[str] = None,
+        compresslevel: int = 9,
+        format: Optional[int] = None,
+        check: int = -1,
+        preset: Optional[int] = None,
+        filters: Optional[Iterable[builtins.dict]] = None,
+        compression: Optional[str] = None,
     ):
         """
         Saves the sequence to a file by executing str(self) which becomes str(self.to_list()). If
@@ -1594,9 +1596,9 @@ def to_file(
 
     def to_jsonl(
         self,
-        path: FileDescriptorOrPath,
+        path: StrOrBytesPath,
         mode: str = "wb",
-        compression: Optional[bool] = None,
+        compression: Optional[str] = None,
     ):
         """
         Saves the sequence to a jsonl file. Each element is mapped using json.dumps then written
diff --git a/functional/streams.py b/functional/streams.py
@@ -3,6 +3,7 @@
 import builtins
 import csv as csvapi
 import json as jsonapi
+from pathlib import Path
 import re
 import sqlite3 as sqlite3api
 from itertools import chain
@@ -218,7 +219,7 @@ def csv(
         :param fmt_params: options passed to csv.reader
         :return: Sequence wrapping csv file
         """
-        if isinstance(csv_file, FileDescriptorOrPath):
+        if isinstance(csv_file, (int, str, bytes, Path)):
             file_open = get_read_function(csv_file, self.disable_compression)
             input_file = file_open(csv_file)
         elif hasattr(csv_file, "next") or hasattr(csv_file, "__next__"):
@@ -240,7 +241,7 @@ def csv_dict_reader(
         dialect="excel",
         **kwds,
     ):
-        if isinstance(csv_file, FileDescriptorOrPath):
+        if isinstance(csv_file, (int, str, bytes, Path)):
             file_open = get_read_function(csv_file, self.disable_compression)
             input_file = file_open(csv_file)
         elif hasattr(csv_file, "next") or hasattr(csv_file, "__next__"):
@@ -273,7 +274,7 @@ def jsonl(self, jsonl_file: FileDescriptorOrPath | Iterable[str]):
         :param jsonl_file: path or file containing jsonl content
         :return: Sequence wrapping jsonl file
         """
-        if isinstance(jsonl_file, FileDescriptorOrPath):
+        if isinstance(jsonl_file, (int, str, bytes, Path)):
             file_open = get_read_function(jsonl_file, self.disable_compression)
             input_file = file_open(jsonl_file)
         else:
@@ -298,7 +299,7 @@ def json(self, json_file: FileDescriptorOrPath | IO) -> Sequence[Any]:
         :param json_file: path or file containing json content
         :return: Sequence wrapping jsonl file
         """
-        if isinstance(json_file, FileDescriptorOrPath):
+        if isinstance(json_file, (str, bytes, Path)):
             file_open = get_read_function(json_file, self.disable_compression)
             input_file = file_open(json_file)
             json_input = jsonapi.load(input_file)
diff --git a/functional/transformations.py b/functional/transformations.py
@@ -1,9 +1,9 @@
 from __future__ import annotations
 
 import collections
+import collections.abc
 import types
-import typing
-from collections.abc import Callable, Iterable
+from collections.abc import Callable, Iterable, Set
 from functools import partial
 from itertools import (
     accumulate,
@@ -15,20 +15,26 @@
     starmap,
     takewhile,
 )
-from typing import NamedTuple, Optional, TypeVar
+from typing import TYPE_CHECKING, NamedTuple, Optional, TypeVar
+
 from functional.execution import ExecutionStrategies
+from functional.util import identity
+
+if TYPE_CHECKING:
+    from functional.pipeline import Sequence
 
 
 class Transformation(NamedTuple):
     name: str
-    function: Optional[Callable[[Iterable], Iterable]]
-    execution_strategies: set[ExecutionStrategies] = {}
+    function: Callable[[Iterable], Iterable]
+    execution_strategies: Set[int] = frozenset()
 
 
 T = TypeVar("T")
 
 #: Cache transformation
-CACHE_T = Transformation("cache", None)
+CACHE_T = Transformation("cache", identity)
+# this identity will not be used but it's to comply with typing
 
 
 def name(function: Callable) -> str:
@@ -43,13 +49,13 @@ def name(function: Callable) -> str:
         return str(function)
 
 
-def listify(sequence: Iterable[T]) -> typing.Sequence[T]:
+def listify(sequence: Iterable[T]) -> collections.abc.Sequence[T]:
     """
     Convert an iterable to a list
     :param sequence: sequence to convert
     :return: list
     """
-    if isinstance(sequence, typing.Sequence):
+    if isinstance(sequence, collections.abc.Sequence):
         return sequence
     return list(sequence)
 
@@ -107,7 +113,7 @@ def _reverse_iter(iterable: Iterable[T]) -> Iterable[T]:
     :return: reversed iterable
     """
     try:  # avoid a copy if we can
-        return reversed(iterable)
+        return reversed(iterable)  # type: ignore
     except TypeError:
         return reversed(list(iterable))
 
@@ -278,7 +284,7 @@ def tail_t() -> Transformation:
     return Transformation("tail", lambda sequence: islice(sequence, 1, None))
 
 
-def _inits(sequence: Iterable[T], wrap) -> list[typing.Sequence[T]]:
+def _inits(sequence: Iterable[T], wrap) -> list[Sequence[T]]:
     """
     Implementation for inits_t
     :param sequence: sequence to inits
@@ -297,7 +303,7 @@ def inits_t(wrap):
     return Transformation("inits", partial(_inits, wrap=wrap))
 
 
-def _tails(sequence: Iterable[T], wrap) -> list[typing.Sequence[T]]:
+def _tails(sequence: Iterable[T], wrap) -> list[Sequence[T]]:
     """
     Implementation for tails_t
     :param sequence: sequence to tails
diff --git a/functional/util.py b/functional/util.py
@@ -5,7 +5,7 @@
 from functools import reduce
 from itertools import chain, count, islice, takewhile
 from multiprocessing import Pool, cpu_count
-from typing import Any, Callable, Optional, Protocol, Sized, TypeVar, Union
+from typing import Any, Callable, Optional, Protocol, Sized, TypeVar, Union, cast
 
 import dill as serializer  # type: ignore
 from typing_extensions import TypeAlias
@@ -190,7 +190,9 @@ def lazy_parallelize(
         processes = CPU_COUNT
     else:
         processes = min(processes, CPU_COUNT)
-    partition_size = partition_size or compute_partition_size(seq, processes)
+    partition_size = partition_size or compute_partition_size(
+        cast(Sized, seq), processes
+    )
     with Pool(processes=processes) as pool:
         partitions = split_every(partition_size, iter(seq))
         packed_partitions = (pack(func, (partition,)) for partition in partitions)