Skip to content

Commit 798ce3c

Browse files
committed
fixed issues with flake8 pre-commit
1 parent db9dea7 commit 798ce3c

11 files changed

+85
-44
lines changed

.flake8

+6
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,8 @@
11
[flake8]
22
max-line-length = 120
3+
per-file-ignores =
4+
tests/*: D10, E74
5+
setup.py: D10
6+
docs/conf.py: D100
7+
sobolev_alignment/__init__.py: D104,F401
8+
sobolev_alignment/*: E203

sobolev_alignment/data_normalisation.py

Whitespace-only changes.

sobolev_alignment/feature_analysis.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1-
"""FEATURE_ANALYSIS"""
1+
"""
2+
Feature analysis.
3+
4+
@author: Soufiane Mourragui
5+
6+
This modules contains all the codes used in the Taylor expansion for the Gaussian/Matern
7+
kernel.
8+
"""
29

310
import gc
411
import logging
@@ -24,7 +31,8 @@ def higher_order_contribution(
2431
2532
Compute the features corresponding to the Taylor expansion of the kernel, i.e. $x_j exp^{-\gamma xx^T}$ for
2633
linear features. Returns a sparse pandas DataFrame containing all the features (columns) by samples (rows).
27-
We here critically rely on the sparsity of the data-matrix to speed up computations. The current implementation is relevant in two cases:
34+
We here critically rely on the sparsity of the data-matrix to speed up computations. The current implementation
35+
is relevant in two cases:
2836
-When dimensionality is small
2937
-When data is sparse.
3038
@@ -54,8 +62,9 @@ def higher_order_contribution(
5462
could lead to crash.
5563
5664
return_matrix: bool, default to False
57-
If True, then returns simply the feature-matrix without feature-naming. In cases when feature names are
58-
not relevant (e.g. computing the proportion of non-linearities), return_matrix=True can help speed-up the process.
65+
If True, then returns simply the feature-matrix without feature-naming. In cases when feature names
66+
are not relevant (e.g. computing the proportion of non-linearities), return_matrix=True can help
67+
speed-up the process.
5968
6069
Returns
6170
-------
@@ -96,7 +105,7 @@ def higher_order_contribution(
96105

97106

98107
def _combination_to_idx(idx, p):
99-
"""Transforms a combination (tuple of feature idx) into an indicative function.
108+
r"""Transform a combination (tuple of feature idx) into an indicative function.
100109
101110
Parameters
102111
----------
@@ -118,7 +127,7 @@ def _combination_to_idx(idx, p):
118127

119128

120129
def basis(x, k, gamma):
121-
"""Computed the basis function for a single gene, except offset term.
130+
r"""Compute the basis function for a single gene, except offset term.
122131
123132
Parameters
124133
----------

sobolev_alignment/generate_artificial_sample.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
GENERATE ARTIFICIAL SAMPLE
2+
Generate artificial samples.
33
44
@author: Soufiane Mourragui
55
@@ -26,7 +26,7 @@ def generate_samples(
2626
return_dist: bool = False,
2727
):
2828
"""
29-
Generates artificial gene expression profiles.
29+
Generate artificial gene expression profiles.
3030
3131
Note to developers: this method has been designed to be used with scvi-tools classes. Other VAE
3232
implementations may break here.
@@ -111,7 +111,7 @@ def parallel_generate_samples(
111111
n_jobs=1,
112112
):
113113
"""
114-
Generates artificial gene expression profiles.
114+
Generate artificial gene expression profiles.
115115
116116
Wrapper of parallelize generate_samples, running several threads in parallel.
117117
<b>Note to developers</b>: this function needs to be changed if applied to other VAE model

sobolev_alignment/interpolated_features.py

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""
2+
Compute interpolated features.
3+
4+
@author: Soufiane Mourragui
5+
"""
6+
17
import numpy as np
28
import pandas as pd
39
import scipy

sobolev_alignment/kernel_operations.py

+7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
"""
2+
Kernel operations.
3+
4+
@author: Soufiane Mourragui
5+
6+
Custom scripts for specific matrix operations.
7+
"""
18
import numpy as np
29

310

sobolev_alignment/krr_approx.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def _save_coefs(self):
255255
self._process_coef_ridge_falkon()
256256

257257
def _process_coef_ridge_sklearn(self):
258-
"""Save and process (i.e. transform to torch.Tensor) the coefficients obtained after kernel ridge regression with scikit-learn implementation."""
258+
"""Save and transform to torch.Tensor KRR coefficients from scikit-learn implementation."""
259259
self.sample_weights_ = torch.Tensor(self.ridge_clf_.dual_coef_)
260260
self.ridge_samples_idx_ = np.arange(self.training_data_.shape[0])
261261

sobolev_alignment/krr_model_selection.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
<h2>Kernel Ridge Regression (KRR) model search</h2>
2+
Kernel Ridge Regression (KRR) model search.
33
44
@author: Soufiane Mourragui
55

sobolev_alignment/multi_krr_approx.py

+14-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1-
import torch
1+
"""
2+
Multi KRR approximation.
3+
4+
@author: Soufiane Mourragui
25
3-
# Falkon import
6+
Scripts supporting the naive integration of several KRR. No gain is provided by such approach.
7+
"""
8+
9+
import torch
410

511

612
class MultiKRRApprox:
@@ -12,31 +18,32 @@ class MultiKRRApprox:
1218
"""
1319

1420
def __init__(self):
21+
"""Create instance."""
1522
self.krr_regressors = []
1623

1724
def predict(self, X: torch.Tensor):
18-
"""Predict latent factor values given a tensor"""
25+
"""Predict latent factor values given a tensor."""
1926
prediction = [clf.transform(torch.Tensor(X)).detach().numpy() for clf in self.krr_regressors]
2027
prediction = torch.Tensor(prediction)
2128
prediction = torch.mean(prediction, axis=0)
2229

2330
return prediction
2431

2532
def transform(self, X: torch.Tensor):
26-
"""Predict latent factor values given a tensor"""
33+
"""Predict latent factor values given a tensor."""
2734
return self.predict(X)
2835

2936
def anchors(self):
30-
"""Return anchors"""
37+
"""Return anchors."""
3138
return self.anchors_
3239

3340
def process_clfs(self):
34-
"""Process the different classifiers"""
41+
"""Process the different classifiers."""
3542
self.anchors_ = torch.cat([clf.anchors() for clf in self.krr_regressors])
3643
self.sample_weights_ = torch.cat([clf.sample_weights_ for clf in self.krr_regressors])
3744
self.sample_weights_ = 1 / len(self.krr_regressors) * self.sample_weights_
3845
self.kernel_ = self.krr_regressors[0].kernel_
3946

4047
def add_clf(self, clf):
41-
"""Add a classifier"""
48+
"""Add a classifier."""
4249
self.krr_regressors.append(clf)

sobolev_alignment/scvi_model_search.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
<h2>scVI model search</h2>
2+
scVI model search.
33
44
@author: Soufiane Mourragui
55
@@ -132,7 +132,7 @@ def make_objective_function(train_data_an, test_data_an, batch_key=None, model=s
132132

133133
def _objective_function(params):
134134
"""
135-
Objective function
135+
Objective function.
136136
137137
Returns a method which performs, for one set of hyperparameters, the training,
138138
the evaluation on test data and summing up all the results in a dictionary usable
@@ -192,7 +192,7 @@ def _objective_function(params):
192192

193193

194194
def split_dataset(data_an, test_size=0.1):
195-
"""Split between training and testing"""
195+
"""Split between training and testing."""
196196
train_data_df, test_data_df = train_test_split(data_an.to_df(), test_size=test_size)
197197
train_data_an = data_an[train_data_df.index,]
198198
test_data_an = data_an[test_data_df.index,]

sobolev_alignment/sobolev_alignment.py

+29-23
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""
2-
<h2>Sobolev Alignment</h2>
2+
Sobolev Alignment.
33
44
@author: Soufiane Mourragui
55
66
References
77
----------
8-
Mourragui et al, Identifying commonalities between cell lines and tumors at the single cell level using Sobolev Alignment of deep generative models, Biorxiv, 2022.
8+
Mourragui et al, Identifying commonalities between cell lines and tumors at the single cell level using
9+
Sobolev Alignment of deep generative models, Biorxiv, 2022.
910
Lopez et al, Deep generative modeling for single-cell transcriptomics, Nature Methods, 2018.
1011
Meanti et al, Kernel methods through the roof: handling billions of points efficiently, NeurIPS, 2020.
1112
"""
@@ -45,7 +46,7 @@
4546

4647
class SobolevAlignment:
4748
"""
48-
Sobolev Alignment implementation
49+
Sobolev Alignment implementation.
4950
5051
Main class for Sobolev Alignment, which wraps all the different operations presented in Sobolev Alignment procedure:
5152
- Model selection (scVI and KRR)
@@ -66,8 +67,8 @@ def __init__(
6667
target_scvi_params: dict = None,
6768
source_krr_params: dict = None,
6869
target_krr_params: dict = None,
69-
n_artificial_samples: int = int(10e5),
70-
n_samples_per_sample_batch: int = 10**6,
70+
n_artificial_samples: int = 10**5,
71+
n_samples_per_sample_batch: int = 10**5,
7172
frac_save_artificial: float = 0.1,
7273
save_mmap: str = None,
7374
log_input: bool = True,
@@ -237,7 +238,7 @@ def fit(
237238
sample_artificial: bool = True,
238239
):
239240
"""
240-
Runs the complete Sobolev Alignment workflow between a source (e.g. cell line) and a target (e.g. tumor) dataset.
241+
Run complete Sobolev Alignment workflow between a source (e.g. cell line) and a target (e.g. tumor) dataset.
241242
242243
Source and target data should be passed as AnnData and potential batch names
243244
(source_batch_name, target_batch_name) should be part of the "obs" element
@@ -593,7 +594,7 @@ def _compute_batch_library_size(self):
593594
}
594595

595596
def _check_same_kernel(self):
596-
"""Same kernel has to be used for source and kernel KRR."""
597+
"""Verify that same kernel is used for source and kernel KRR."""
597598
if "kernel" in self.krr_params["source"] or "kernel" in self.krr_params["target"]:
598599
assert self.krr_params["source"]["kernel"] == self.krr_params["target"]["kernel"]
599600
if "kernel_params" in self.krr_params["source"] or "kernel_params" in self.krr_params["target"]:
@@ -797,9 +798,12 @@ def _compute_cross_cosine_sim(self):
797798

798799
def _compute_principal_vectors(self, all_PVs=False):
799800
"""
800-
All_PVs indicate whether the data source with the most PVs should be reduced to the number of PVs of the smallest data-source.
801+
Compute principal vectors by SVD of cosine similarity.
801802
802-
Example: source has 10 factors, target 13. all_PVs=True would yield 13 target PVs, all_PVs=False would yield 10.
803+
All_PVs indicate whether the data source with the most PVs should be reduced to the
804+
number of PVs of the smallest data-source.
805+
Example: source has 10 factors, target 13. all_PVs=True would yield 13 target PVs,
806+
all_PVs=False would yield 10.
803807
"""
804808
cosine_svd = np.linalg.svd(self.cosine_sim, full_matrices=all_PVs)
805809
self.principal_angles = cosine_svd[1]
@@ -815,8 +819,8 @@ def compute_consensus_features(self, X_input: dict, n_similar_pv: int, fit: bool
815819
"""
816820
Project data on interpolated consensus features.
817821
818-
Project the data on interpolated features, i.e., a linear combination of source and target SPVs which best balances the effect of source and target
819-
data.
822+
Project the data on interpolated features, i.e., a linear combination of source and target SPVs which
823+
best balances the effect of source and target data.
820824
821825
Parameters
822826
----------
@@ -825,10 +829,11 @@ def compute_consensus_features(self, X_input: dict, n_similar_pv: int, fit: bool
825829
n_similar_pv: int
826830
Number of top SPVs to project the data on.
827831
fit: bool, default to True
828-
Whether the interpolated times must be computed. If False, will use previously computed times, but will return an error if not previously fitted.
832+
Whether the interpolated times must be computed. If False, will use previously computed times,
833+
but will return an error if not previously fitted.
829834
return_anndata: bool, default to False
830-
Whether the projected consensus features must be formatted as an AnnData with overlapping indices in obs. This allows downstream analysis.
831-
By default, return a DataFrame.
835+
Whether the projected consensus features must be formatted as an AnnData with overlapping
836+
indices in obs. This allows downstream analysis. By default, return a DataFrame.
832837
833838
Returns
834839
-------
@@ -1066,7 +1071,7 @@ def krr_model_selection(
10661071
return self.krr_params
10671072

10681073
def save(self, folder: str = ".", with_krr: bool = True, with_model: bool = True):
1069-
"""Save Sobolev Alignment model"""
1074+
"""Save Sobolev Alignment model."""
10701075
if not os.path.exists(folder) and not os.path.isdir(folder):
10711076
os.mkdir(folder)
10721077

@@ -1217,7 +1222,7 @@ def plot_training_metrics(self, folder: str = "."):
12171222
plt.show()
12181223

12191224
def plot_cosine_similarity(self, folder: str = ".", absolute_cos: bool = False):
1220-
"""Plot cosine similarity"""
1225+
"""Plot cosine similarity."""
12211226
if absolute_cos:
12221227
sns.heatmap(np.abs(self.cosine_sim), cmap="seismic_r", center=0)
12231228
else:
@@ -1330,17 +1335,18 @@ def feature_analysis(self, max_order: int = 1, gene_names: list = None):
13301335
"""
13311336
Launch feature analysis for a trained scVI model.
13321337
1333-
Computes the gene contributions (feature weights) associated with the KRRs which approximate the latent factors and the SPVs.
1334-
Technically, given the kernel machine which approximates a latent factor (KRR), this method computes the weights associated
1335-
with the orthonormal basis in the Gaussian-kernel associated Sobolev space.
1338+
Computes the gene contributions (feature weights) associated with the KRRs which approximate the
1339+
latent factors and the SPVs. Technically, given the kernel machine which approximates a latent factor
1340+
(KRR), this method computes the weights associated with the orthonormal basis in the Gaussian-kernel
1341+
associated Sobolev space.
13361342
13371343
Parameters
13381344
----------
13391345
max_order: int, default to 1
13401346
Order of the features to compute. 1 corresponds to linear features (genes), two to interaction terms.
13411347
gene_names: list of str, default to None
1342-
Names of the genes passed as input to Sobolev Alignment. <b>WARNING</b> Must be in the same order as the input to
1343-
SobolevAlignment.fit
1348+
Names of the genes passed as input to Sobolev Alignment. <b>WARNING</b> Must be in the same order as
1349+
the input to SobolevAlignment.fit
13441350
"""
13451351
# Make kernel parameter
13461352
if (
@@ -1405,7 +1411,7 @@ def feature_analysis(self, max_order: int = 1, gene_names: list = None):
14051411
}
14061412

14071413
def sample_random_vector_(self, data_source, K):
1408-
"""Sample a vector randomly for either source or target"""
1414+
"""Sample a vector randomly for either source or target."""
14091415
n_samples = self.approximate_krr_regressions_[data_source].anchors().shape[0]
14101416
n_factors = self.approximate_krr_regressions_[data_source].anchors().shape[1]
14111417

@@ -1435,7 +1441,7 @@ def sample_random_vector_(self, data_source, K):
14351441
return coefficients
14361442

14371443
def compute_random_direction_(self, K_X, K_Y, K_XY):
1438-
"""Sample randomly two vectors and compute cosine similarity"""
1444+
"""Sample randomly two vectors and compute cosine similarity."""
14391445
# Random samples
14401446
perm_source_sample_coef = self.sample_random_vector_("source", K_X)
14411447
perm_target_sample_coef = self.sample_random_vector_("target", K_Y)

0 commit comments

Comments
 (0)