Skip to content

Commit

Permalink
autopep8
Browse files Browse the repository at this point in the history
  • Loading branch information
quaquel committed Apr 11, 2019
1 parent 01c685b commit 01b15c8
Show file tree
Hide file tree
Showing 34 changed files with 342 additions and 337 deletions.
4 changes: 2 additions & 2 deletions ema_workbench/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
from .plotting_util import Density, PlotType
from . import pairs_plotting
from .feature_scoring import (get_ex_feature_scores, get_feature_scores_all,
get_rf_feature_scores, get_univariate_feature_scores)
get_rf_feature_scores, get_univariate_feature_scores)
from .scenario_discovery_util import RuleInductionType
from .logistic_regression import Logit
from .logistic_regression import Logit
45 changes: 22 additions & 23 deletions ema_workbench/analysis/clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,29 @@


def CID(xi, xj, ce_i, ce_j):
return np.linalg.norm(xi-xj) * (max(ce_i,ce_j)/min(ce_i, ce_j))
return np.linalg.norm(xi - xj) * (max(ce_i, ce_j) / min(ce_i, ce_j))


def calculate_cid(data, condensed_form=False):
'''calculate the complex invariant distance between all rows
Parameters
----------
data : 2d ndarray
condensed_form : bool, optional
Returns
-------
distances
a 2D ndarray with the distances between all time series, or condensed
form similar to scipy.spatial.distance.pdist¶
'''
ce = np.sqrt(np.sum(np.diff(data, axis=1)**2, axis=1))

indices = np.arange(0, data.shape[0])
cid = np.zeros((data.shape[0], data.shape[0]))

Expand All @@ -61,48 +62,46 @@ def calculate_cid(data, condensed_form=False):
distance = CID(xi, xj, ce_i, ce_j)
cid[i, j] = distance
cid[j, i] = distance

if not condensed_form:
return cid
else:
return sp.spatial.distance.squareform(cid)
return sp.spatial.distance.squareform(cid)


def plot_dendrogram(distances):
'''plot dendrogram for distances
'''

if distances.ndim == 2:
distances = sp.spatial.distance.squareform(distances)
linked = sp.cluster.hierarchy.linkage(distances) # @UndefinedVariable


fig = plt.figure()
fig = plt.figure()
sp.cluster.hierarchy.dendrogram(linked, # @UndefinedVariable
orientation='top',
distance_sort='descending',
show_leaf_counts=True)
orientation='top',
distance_sort='descending',
show_leaf_counts=True)
return fig


def apply_agglomerative_clustering(distances, n_clusters, linkage='average'):
'''apply agglomerative clustering to the distances
Parameters
----------
distances : ndarray
n_clusters : int
linkage : {'average', 'complete', 'single'}
Returns
-------
1D ndarray with cluster assignment
'''

c = cluster.AgglomerativeClustering(n_clusters=n_clusters,
affinity='precomputed',
linkage=linkage)
clusters = c.fit_predict(distances)
return clusters


8 changes: 4 additions & 4 deletions ema_workbench/analysis/dimensional_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,12 @@ def plot_category(ax, axis, i, label, pos, level):

if axis == 0:
rot = 'horizontal'
if (level > 0) & (len(str(label))>4):
if (level > 0) & (len(str(label)) > 4):
rot = 'vertical'
ax.text(i, pos, label, ha='center', va='center', rotation=rot)
else:
rot = 'horizontal'
if (level == 0) & (len(str(label))>4):
if (level == 0) & (len(str(label)) > 4):
rot = 'vertical'
ax.text(pos, i, label, ha='center', va='center', rotation=rot)

Expand Down Expand Up @@ -245,8 +245,8 @@ def plot_index(ax, ax_plot, axis, index, plot_labels=True,
# add values
for p in range(j, nr_levels):
pos = 1 / (2 * nr_levels) + p / (nr_levels)
plot_category(ax, axis, i +offsets[p] * len(index),
entry[p], pos, p)
plot_category(ax, axis, i + offsets[p] * len(index),
entry[p], pos, p)
if axis:
ax_plot.axhline(i, c="w", lw=lw)
else:
Expand Down
13 changes: 5 additions & 8 deletions ema_workbench/analysis/feature_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,12 @@ def _prepare_experiments(experiments):
x_nominal_columns = x_nominal.columns.values

for column in x_nominal_columns:
if np.unique(x[column]).shape==(1,):
if np.unique(x[column]).shape == (1,):
x = x.drop(column, axis=1)
_logger.info(("{} dropped from analysis "
"because only a single category").format(column))
else:
x[column] = x[column].astype('category').cat.codes



return x.values, x.columns.tolist()

Expand Down Expand Up @@ -254,7 +252,7 @@ def get_ex_feature_scores(x, y, mode=RuleInductionType.CLASSIFICATION,
min_samples_leaf : int, optional
defaults to 1 for N=1000 or lower, from there on
proportional to sqrt of N
(see discussion in Jaxa-Rozen & Kwakkel (2018) doi: 10.1016/j.envsoft.2018.06.011)
(see discussion in Jaxa-Rozen & Kwakkel (2018) doi: 10.1016/j.envsoft.2018.06.011)
see http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html
min_weight_fraction_leaf : float, optional
see http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html
Expand All @@ -281,16 +279,15 @@ def get_ex_feature_scores(x, y, mode=RuleInductionType.CLASSIFICATION,
# TODO
# max_features = number of variables/3
#
# min_samples_leaf
# min_samples_leaf
# 1000 - >
# then proportional based on sqrt of N
# dus sqrt(N) / Sqrt(1000) met 1 als minimumd
if max_features is None:
max_features = int(round(x.shape[1]/3))
max_features = int(round(x.shape[1] / 3))
if min_samples_leaf is None:
min_samples_leaf = max(1,
int(round(math.sqrt(x.shape[0])/math.sqrt(1000))))

int(round(math.sqrt(x.shape[0]) / math.sqrt(1000))))

if mode == RuleInductionType.CLASSIFICATION:
etc = ExtraTreesClassifier
Expand Down
Loading

0 comments on commit 01b15c8

Please sign in to comment.