Skip to content

Commit fcff677

Browse files
Merge pull request #19 from JustinKurland/JustinKurland-docstring-PEP8
Update expanding.pyc docstring PEP8
2 parents 22f01a2 + ab8a396 commit fcff677

File tree

1 file changed

+146
-121
lines changed

1 file changed

+146
-121
lines changed

src/pytimetk/core/expanding.py

+146-121
Original file line numberDiff line numberDiff line change
@@ -20,61 +20,73 @@ def augment_expanding(
2020
engine: str = 'pandas',
2121
**kwargs,
2222
) -> pd.DataFrame:
23-
'''Apply one or more Series-based expanding functions to one or more columns of a DataFrame.
24-
23+
'''
24+
Apply one or more Series-based expanding functions to one or more columns of a DataFrame.
25+
2526
Parameters
2627
----------
2728
data : Union[pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy]
2829
Input data to be processed. Can be a Pandas DataFrame or a GroupBy object.
2930
date_column : str
3031
Name of the datetime column. Data is sorted by this column within each group.
3132
value_column : Union[str, list]
32-
Column(s) to which the expanding window functions should be applied. Can be a single column name or a list.
33+
Column(s) to which the expanding window functions should be applied. Can be
34+
a single column name or a list.
3335
window_func : Union[str, list, Tuple[str, Callable]], optional, default 'mean'
34-
The `window_func` parameter in the `augment_expanding` function specifies the function(s) to be applied to the expanding windows of the value column(s).
35-
36+
The `window_func` parameter in the `augment_expanding` function specifies
37+
the function(s) to be applied to the expanding windows of the value column(s).
38+
3639
1. It can be either:
3740
- A string representing the name of a standard function (e.g., 'mean', 'sum').
38-
41+
3942
2. For custom functions:
40-
- Provide a list of tuples. Each tuple should contain a custom name for the function and the function itself.
41-
- Each custom function should accept a Pandas Series as its input and operate on that series.
42-
Example: ("range", lambda x: x.max() - x.min())
43-
43+
- Provide a list of tuples. Each tuple should contain a custom name for
44+
the function and the function itself.
45+
- Each custom function should accept a Pandas Series as its input and
46+
operate on that series. Example: ("range", lambda x: x.max() - x.min())
47+
4448
(See more Examples below.)
45-
46-
Note: If your function needs to operate on multiple columns (i.e., it requires access to a DataFrame rather than just a Series), consider using the `augment_expanding_apply` function in this library.
49+
50+
Note: If your function needs to operate on multiple columns (i.e., it
51+
requires access to a DataFrame rather than just a Series), consider
52+
using the `augment_expanding_apply` function in this library.
4753
min_periods : int, optional, default None
48-
Minimum observations in the window to have a value. Defaults to the window size. If set, a value will be produced even if fewer observations are present than the window size.
54+
Minimum observations in the window to have a value. Defaults to the window
55+
size. If set, a value will be produced even if fewer observations are
56+
present than the window size.
4957
engine : str, optional, default 'pandas'
50-
Specifies the backend computation library for augmenting expanding window functions.
51-
58+
Specifies the backend computation library for augmenting expanding window
59+
functions.
60+
5261
The options are:
5362
- "pandas" (default): Uses the `pandas` library.
54-
- "polars": Uses the `polars` library, which may offer performance benefits for larger datasets.
55-
63+
- "polars": Uses the `polars` library, which may offer performance
64+
benefits for larger datasets.
65+
5666
**kwargs : additional keyword arguments
57-
Additional arguments passed to the `pandas.Series.expanding` method when using the Pandas engine.
58-
67+
Additional arguments passed to the `pandas.Series.expanding` method when
68+
using the Pandas engine.
69+
5970
Returns
6071
-------
6172
pd.DataFrame
62-
The `augment_expanding` function returns a DataFrame with new columns for each applied function, window size, and value column.
63-
73+
The `augment_expanding` function returns a DataFrame with new columns for
74+
each applied function, window size, and value column.
75+
6476
Examples
6577
--------
66-
78+
6779
```{python}
6880
# Example 1 - Pandas Backend for Expanding Window Functions
6981
# This example demonstrates the use of string-named functions
7082
# on an expanding window using the Pandas backend for computations.
71-
83+
7284
import pytimetk as tk
7385
import pandas as pd
7486
import numpy as np
75-
87+
7688
df = tk.load_dataset("m4_daily", parse_dates = ['date'])
77-
89+
7890
expanded_df = (
7991
df
8092
.groupby('id')
@@ -84,81 +96,82 @@ def augment_expanding(
8496
window_func = [
8597
'mean', # Built-in mean function
8698
'std', # Built-in standard deviation function,
87-
('quantile_75', lambda x: pd.Series(x).quantile(0.75)), # Custom quantile function
88-
99+
('quantile_75', lambda x: pd.Series(x).quantile(0.75)), # Custom quantile function
100+
89101
],
90102
min_periods = 1,
91103
engine = 'pandas', # Utilize pandas for the underlying computations
92-
)
93-
)
104+
)
105+
)
94106
display(expanded_df)
95107
```
96-
97-
98-
```{python}
99-
# Example 2 - Polars Backend for Expanding Window Functions using Built-Ins (538X Faster than Pandas)
100-
# This example demonstrates the use of string-named functions and configurable functions
101-
# using the Polars backend for computations.
102-
# Configurable functions, like pl_quantile, allow the use of specific parameters associated
103-
# with their corresponding polars.Expr.rolling_<function_name> method.
104-
# For instance, pl_quantile corresponds to polars.Expr.rolling_quantile.
105-
106-
import pytimetk as tk
107-
import pandas as pd
108-
import polars as pl
109-
import numpy as np
110-
from pytimetk.utils.polars_helpers import pl_quantile
111-
from pytimetk.utils.pandas_helpers import pd_quantile
112108
113-
df = tk.load_dataset("m4_daily", parse_dates = ['date'])
114-
115-
expanded_df = (
116-
df
117-
.groupby('id')
118-
.augment_expanding(
119-
date_column = 'date',
120-
value_column = 'value',
121-
window_func = [
122-
'mean', # Built-in mean function
123-
'std', # Built-in std function
124-
('quantile_75', pl_quantile(quantile=0.75)), # Configurable with all parameters found in polars.Expr.rolling_quantile
125-
],
126-
min_periods = 1,
127-
engine = 'polars', # Utilize Polars for the underlying computations
128-
)
129-
)
130-
display(expanded_df)
131-
```
132109
133110
```{python}
134-
# Example 3 - Lambda Functions for Expanding Window Functions are faster in Pandas than Polars
135-
# This example demonstrates the use of lambda functions of the form lambda x: x
136-
# Identity lambda functions, while convenient, have signficantly slower performance.
137-
# When using lambda functions the Pandas backend will likely be faster than Polars.
138-
139-
import pytimetk as tk
140-
import pandas as pd
141-
import polars as pl
142-
import numpy as np
143-
144-
df = tk.load_dataset("m4_daily", parse_dates = ['date'])
145-
146-
expanded_df = (
147-
df
148-
.groupby('id')
149-
.augment_expanding(
150-
date_column = 'date',
151-
value_column = 'value',
152-
window_func = [
153-
154-
('range', lambda x: x.max() - x.min()), # Identity lambda function: can be slower, especially in Polars
155-
],
156-
min_periods = 1,
157-
engine = 'pandas', # Utilize pandas for the underlying computations
158-
)
159-
)
160-
display(expanded_df)
161-
```
111+
# Example 2 - Polars Backend for Expanding Window Functions using Built-Ins
112+
# (538X Faster than Pandas)
113+
This example demonstrates the use of string-named functions and configurable
114+
functions using the Polars backend for computations. Configurable functions,
115+
like pl_quantile, allow the use of specific parameters associated with their
116+
corresponding polars.Expr.rolling_<function_name> method.
117+
For instance, pl_quantile corresponds to polars.Expr.rolling_quantile.
118+
119+
import pytimetk as tk
120+
import pandas as pd
121+
import polars as pl
122+
import numpy as np
123+
from pytimetk.utils.polars_helpers import pl_quantile
124+
from pytimetk.utils.pandas_helpers import pd_quantile
125+
126+
df = tk.load_dataset("m4_daily", parse_dates = ['date'])
127+
128+
expanded_df = (
129+
df
130+
.groupby('id')
131+
.augment_expanding(
132+
date_column = 'date',
133+
value_column = 'value',
134+
window_func = [
135+
'mean', # Built-in mean function
136+
'std', # Built-in std function
137+
('quantile_75', pl_quantile(quantile=0.75)), # Configurable with all parameters found in polars.Expr.rolling_quantile
138+
],
139+
min_periods = 1,
140+
engine = 'polars', # Utilize Polars for the underlying computations
141+
)
142+
)
143+
display(expanded_df)
144+
```
145+
146+
```{python}
147+
# Example 3 - Lambda Functions for Expanding Window Functions are faster in Pandas than Polars
148+
# This example demonstrates the use of lambda functions of the form lambda x: x
149+
# Identity lambda functions, while convenient, have signficantly slower performance.
150+
# When using lambda functions the Pandas backend will likely be faster than Polars.
151+
152+
import pytimetk as tk
153+
import pandas as pd
154+
import polars as pl
155+
import numpy as np
156+
157+
df = tk.load_dataset("m4_daily", parse_dates = ['date'])
158+
159+
expanded_df = (
160+
df
161+
.groupby('id')
162+
.augment_expanding(
163+
date_column = 'date',
164+
value_column = 'value',
165+
window_func = [
166+
167+
('range', lambda x: x.max() - x.min()), # Identity lambda function: can be slower, especially in Polars
168+
],
169+
min_periods = 1,
170+
engine = 'pandas', # Utilize pandas for the underlying computations
171+
)
172+
)
173+
display(expanded_df)
174+
```
162175
'''
163176
# Ensure data is a DataFrame or a GroupBy object
164177
check_dataframe_or_groupby(data)
@@ -463,88 +476,99 @@ def augment_expanding_apply(
463476
window_func: Union[Tuple[str, Callable], List[Tuple[str, Callable]]],
464477
min_periods: Optional[int] = None,
465478
) -> pd.DataFrame:
466-
'''Apply one or more DataFrame-based expanding functions to one or more columns of a DataFrame.
467-
479+
'''
480+
Apply one or more DataFrame-based expanding functions to one or more columns of a DataFrame.
481+
468482
Parameters
469483
----------
470484
data : Union[pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy]
471485
Input data to be processed. Can be a Pandas DataFrame or a GroupBy object.
472486
date_column : str
473487
Name of the datetime column. Data is sorted by this column within each group.
474488
window_func : Union[Tuple[str, Callable], List[Tuple[str, Callable]]]
475-
The `window_func` parameter in the `augment_expanding_apply` function specifies the function(s) that operate on a expanding window with the consideration of multiple columns.
476-
489+
The `window_func` parameter in the `augment_expanding_apply` function
490+
specifies the function(s) that operate on a expanding window with the
491+
consideration of multiple columns.
492+
477493
The specification can be:
478494
- A tuple where the first element is a string representing the function's name and the second element is the callable function itself.
479495
- A list of such tuples for multiple functions.
480-
481-
Note: For functions targeting only a single value column without the need for contextual data from other columns, consider using the `augment_expanding` function in this library.
482-
min_periods : int, optional, default None
483-
Minimum observations in the window to have a value. Defaults to the window size. If set, a value will be produced even if fewer observations are present than the window size.
484496
497+
Note: For functions targeting only a single value column without the need for
498+
contextual data from other columns, consider using the `augment_expanding`
499+
function in this library.
500+
min_periods : int, optional, default None
501+
Minimum observations in the window to have a value. Defaults to the window
502+
size. If set, a value will be produced even if fewer observations are
503+
present than the window size.
504+
485505
Returns
486506
-------
487507
pd.DataFrame
488-
The `augment_expanding` function returns a DataFrame with new columns for each applied function, window size, and value column.
489-
508+
The `augment_expanding` function returns a DataFrame with new columns
509+
for each applied function, window size, and value column.
510+
490511
Examples
491512
--------
492513
```{python}
493514
import pytimetk as tk
494515
import pandas as pd
495516
import numpy as np
496517
```
497-
518+
498519
```{python}
499-
# Example showcasing the expanding correlation between two columns (`value1` and `value2`).
520+
# Example showcasing the expanding correlation between two columns (`value1` and
521+
# `value2`).
500522
# The correlation requires both columns as input.
501-
523+
502524
# Sample DataFrame with id, date, value1, and value2 columns.
503525
df = pd.DataFrame({
504526
'id': [1, 1, 1, 2, 2, 2],
505527
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05', '2023-01-06']),
506528
'value1': [10, 20, 29, 42, 53, 59],
507529
'value2': [2, 16, 20, 40, 41, 50],
508-
})
509-
530+
})
531+
510532
# Compute the expanding correlation for each group of 'id'
511533
expanding_df = (
512534
df.groupby('id')
513-
.augment_expanding_apply(
535+
.augment_expanding_apply(
514536
date_column='date',
515537
window_func=[('corr', lambda x: x['value1'].corr(x['value2']))], # Lambda function for correlation
538+
)
516539
)
517-
)
518540
display(expanding_df)
519541
```
520-
542+
521543
```{python}
522-
# expanding Regression Example: Using `value1` as the dependent variable and `value2` and `value3` as the independent variables.
523-
# This example demonstrates how to perform a expanding regression using two independent variables.
524-
544+
# expanding Regression Example: Using `value1` as the dependent variable and
545+
# `value2` and `value3` as the independent variables.
546+
# This example demonstrates how to perform a expanding regression using two
547+
# independent variables.
548+
525549
# Sample DataFrame with `id`, `date`, `value1`, `value2`, and `value3` columns.
526550
df = pd.DataFrame({
527551
'id': [1, 1, 1, 2, 2, 2],
528552
'date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05', '2023-01-06']),
529553
'value1': [10, 20, 29, 42, 53, 59],
530554
'value2': [5, 16, 24, 35, 45, 58],
531555
'value3': [2, 3, 6, 9, 10, 13]
532-
})
533-
556+
})
557+
534558
# Define Regression Function to be applied on the expanding window.
535559
def regression(df):
536-
560+
537561
# Required module (scikit-learn) for regression.
538562
from sklearn.linear_model import LinearRegression
539-
563+
540564
model = LinearRegression()
541565
X = df[['value2', 'value3']] # Independent variables
542566
y = df['value1'] # Dependent variable
543567
model.fit(X, y)
544568
ret = pd.Series([model.intercept_, model.coef_[0]], index=['Intercept', 'Slope'])
545-
569+
546570
return ret # Return intercept and slope as a Series
547-
571+
548572
# Compute the expanding regression for each group of `id`
549573
result_df = (
550574
df.groupby('id')
@@ -554,8 +578,9 @@ def regression(df):
554578
)
555579
.dropna()
556580
)
557-
558-
# Format the results to have each regression output (slope and intercept) in separate columns.
581+
582+
# Format the results to have each regression output (slope and intercept) in
583+
# separate columns.
559584
regression_wide_df = pd.concat(result_df['expanding_regression'].to_list(), axis=1).T
560585
regression_wide_df = pd.concat([result_df.reset_index(drop = True), regression_wide_df], axis=1)
561586
display(regression_wide_df)

0 commit comments

Comments
 (0)