Skip to content

Commit cb915cb

Browse files
committed
fix test
1 parent 9df209c commit cb915cb

File tree

3 files changed

+295
-1
lines changed

3 files changed

+295
-1
lines changed

test/search_path.py_test

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# use yapypy run
2+
from traitlets import traitlets
3+
4+
print(traitlets)

test/test_1.py_test

+289
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
from __future__ import print_function
2+
3+
import math
4+
5+
from IPython import display
6+
from matplotlib import cm
7+
from matplotlib import gridspec
8+
from matplotlib import pyplot as plt
9+
import numpy as np
10+
import pandas as pd
11+
from sklearn import metrics
12+
from sklearn.utils import shuffle
13+
import tensorflow as tf
14+
from tensorflow.python.data import Dataset
15+
16+
tf.logging.set_verbosity(tf.logging.ERROR)
17+
pd.options.display.max_rows = 10
18+
pd.options.display.float_format = '{:.1f}'.format
19+
20+
california_housing_dataframe = pd.read_csv("https://dl.google.com/mlcc/mledu-datasets/california_housing_train.csv", sep=",")
21+
california_housing_dataframe = shuffle(california_housing_dataframe)
22+
23+
def preprocess_features(california_housing_dataframe):
24+
"""Prepares input features from California housing data set.
25+
Args:
26+
california_housing_dataframe: A Pandas DataFrame expected to contain data
27+
from the California housing data set.
28+
Returns:
29+
A DataFrame that contains the features to be used for the model, including
30+
synthetic features.
31+
"""
32+
selected_features = california_housing_dataframe[
33+
["latitude",
34+
"longitude",
35+
"housing_median_age",
36+
"total_rooms",
37+
"total_bedrooms",
38+
"population",
39+
"households",
40+
"median_income"]]
41+
processed_features = selected_features.copy()
42+
# Create a synthetic feature.
43+
processed_features["rooms_per_person"] = (
44+
california_housing_dataframe["total_rooms"] /
45+
california_housing_dataframe["population"])
46+
return processed_features
47+
48+
def preprocess_targets(california_housing_dataframe):
49+
"""Prepares target features (i.e., labels) from California housing data set.
50+
Args:
51+
california_housing_dataframe: A Pandas DataFrame expected to contain data
52+
from the California housing data set.
53+
Returns:
54+
A DataFrame that contains the target feature.
55+
"""
56+
output_targets = pd.DataFrame()
57+
# Scale the target to be in units of thousands of dollars.
58+
output_targets["median_house_value"] = (
59+
california_housing_dataframe["median_house_value"] / 1000.0)
60+
return output_targets
61+
62+
63+
64+
# Choose the first 12000 (out of 17000) examples for training.
65+
training_examples = preprocess_features(california_housing_dataframe.head(12000))
66+
training_targets = preprocess_targets(california_housing_dataframe.head(12000))
67+
68+
# Choose the last 5000 (out of 17000) examples for validation.
69+
validation_examples = preprocess_features(california_housing_dataframe.tail(5000))
70+
validation_targets = preprocess_targets(california_housing_dataframe.tail(5000))
71+
72+
# Double-check that we've done the right thing.
73+
print("Training examples summary:")
74+
display.display(training_examples.describe())
75+
print("Validation examples summary:")
76+
display.display(validation_examples.describe())
77+
78+
print("Training targets summary:")
79+
display.display(training_targets.describe())
80+
print("Validation targets summary:")
81+
display.display(validation_targets.describe())
82+
83+
def construct_feature_columns(input_features):
84+
"""Construct the TensorFlow Feature Columns.
85+
Args:
86+
input_features: The names of the numerical input features to use.
87+
Returns:
88+
A set of feature columns
89+
"""
90+
return set([tf.feature_column.numeric_column(my_feature)
91+
for my_feature in input_features])
92+
93+
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
94+
"""Trains a linear regression model of one feature.
95+
96+
Args:
97+
features: pandas DataFrame of features
98+
targets: pandas DataFrame of targets
99+
batch_size: Size of batches to be passed to the model
100+
shuffle: True or False. Whether to shuffle the data.
101+
num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
102+
Returns:
103+
Tuple of (features, labels) for next data batch
104+
"""
105+
106+
# Convert pandas data into a dict of np arrays.
107+
features = {key:np.array(value) for key,value in dict(features).items()}
108+
109+
# Construct a dataset, and configure batching/repeating
110+
ds = Dataset.from_tensor_slices((features,targets)) # warning: 2GB limit
111+
ds = ds.batch(batch_size).repeat(num_epochs)
112+
113+
# Shuffle the data, if specified
114+
if shuffle:
115+
ds = ds.shuffle(10000)
116+
117+
# Return the next batch of data
118+
features, labels = ds.make_one_shot_iterator().get_next()
119+
return features, labels
120+
121+
122+
def train_model(
123+
learning_rate,
124+
steps,
125+
batch_size,
126+
feature_columns,
127+
training_examples,
128+
training_targets,
129+
validation_examples,
130+
validation_targets):
131+
"""Trains a linear regression model.
132+
133+
In addition to training, this function also prints training progress information,
134+
as well as a plot of the training and validation loss over time.
135+
136+
Args:
137+
learning_rate: A `float`, the learning rate.
138+
steps: A non-zero `int`, the total number of training steps. A training step
139+
consists of a forward and backward pass using a single batch.
140+
feature_columns: A `set` specifying the input feature columns to use.
141+
training_examples: A `DataFrame` containing one or more columns from
142+
`california_housing_dataframe` to use as input features for training.
143+
training_targets: A `DataFrame` containing exactly one column from
144+
`california_housing_dataframe` to use as target for training.
145+
validation_examples: A `DataFrame` containing one or more columns from
146+
`california_housing_dataframe` to use as input features for validation.
147+
validation_targets: A `DataFrame` containing exactly one column from
148+
`california_housing_dataframe` to use as target for validation.
149+
150+
Returns:
151+
A `LinearRegressor` object trained on the training data.
152+
"""
153+
154+
periods = 10
155+
steps_per_period = steps / periods
156+
157+
# Create a linear regressor object.
158+
my_optimizer = tf.train.FtrlOptimizer(learning_rate=learning_rate)
159+
my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
160+
linear_regressor = tf.estimator.LinearRegressor(
161+
feature_columns=feature_columns,
162+
optimizer=my_optimizer
163+
)
164+
165+
training_input_fn = lambda: my_input_fn(training_examples,
166+
training_targets["median_house_value"],
167+
batch_size=batch_size)
168+
predict_training_input_fn = lambda: my_input_fn(training_examples,
169+
training_targets["median_house_value"],
170+
num_epochs=1,
171+
shuffle=False)
172+
predict_validation_input_fn = lambda: my_input_fn(validation_examples,
173+
validation_targets["median_house_value"],
174+
num_epochs=1,
175+
shuffle=False)
176+
177+
# Train the model, but do so inside a loop so that we can periodically assess
178+
# loss metrics.
179+
print("Training model...")
180+
print("RMSE (on training data):")
181+
training_rmse = []
182+
validation_rmse = []
183+
for period in range (0, periods):
184+
# Train the model, starting from the prior state.
185+
linear_regressor.train(
186+
input_fn=training_input_fn,
187+
steps=steps_per_period
188+
)
189+
# Take a break and compute predictions.
190+
training_predictions = linear_regressor.predict(input_fn=predict_training_input_fn)
191+
training_predictions = np.array([item['predictions'][0] for item in training_predictions])
192+
validation_predictions = linear_regressor.predict(input_fn=predict_validation_input_fn)
193+
validation_predictions = np.array([item['predictions'][0] for item in validation_predictions])
194+
195+
# Compute training and validation loss.
196+
training_root_mean_squared_error = math.sqrt(
197+
metrics.mean_squared_error(training_predictions, training_targets))
198+
validation_root_mean_squared_error = math.sqrt(
199+
metrics.mean_squared_error(validation_predictions, validation_targets))
200+
# Occasionally print the current loss.
201+
print(" period %02d : %0.2f" % (period, training_root_mean_squared_error))
202+
# Add the loss metrics from this period to our list.
203+
training_rmse.append(training_root_mean_squared_error)
204+
validation_rmse.append(validation_root_mean_squared_error)
205+
print("Model training finished.")
206+
207+
208+
# Output a graph of loss metrics over periods.
209+
plt.ylabel("RMSE")
210+
plt.xlabel("Periods")
211+
plt.title("Root Mean Squared Error vs. Periods")
212+
plt.tight_layout()
213+
plt.plot(training_rmse, label="training")
214+
plt.plot(validation_rmse, label="validation")
215+
plt.legend()
216+
217+
return linear_regressor
218+
219+
_ = train_model(
220+
learning_rate=1.0,
221+
steps=500,
222+
batch_size=100,
223+
feature_columns=construct_feature_columns(training_examples),
224+
training_examples=training_examples,
225+
training_targets=training_targets,
226+
validation_examples=validation_examples,
227+
validation_targets=validation_targets)
228+
229+
def get_quantile_based_boundaries(feature_values, num_buckets):
230+
boundaries = np.arange(1.0, num_buckets) / num_buckets
231+
quantiles = feature_values.quantile(boundaries)
232+
return [quantiles[q] for q in quantiles.keys()]
233+
234+
def construct_feature_columns():
235+
"""Construct the TensorFlow Feature Columns.
236+
Returns:
237+
A set of feature columns
238+
"""
239+
households = tf.feature_column.numeric_column("households")
240+
longitude = tf.feature_column.numeric_column("longitude")
241+
latitude = tf.feature_column.numeric_column("latitude")
242+
housing_median_age = tf.feature_column.numeric_column("housing_median_age")
243+
median_income = tf.feature_column.numeric_column("median_income")
244+
rooms_per_person = tf.feature_column.numeric_column("rooms_per_person")
245+
246+
# Divide households into 7 buckets.
247+
bucketized_households = tf.feature_column.bucketized_column(
248+
households, boundaries=get_quantile_based_boundaries(
249+
training_examples["households"], 7))
250+
251+
# Divide longitude into 10 buckets.
252+
bucketized_longitude = tf.feature_column.bucketized_column(
253+
longitude, boundaries=get_quantile_based_boundaries(
254+
training_examples["longitude"], 10))
255+
256+
# Divide latitude into 10 buckets.
257+
bucketized_latitude = tf.feature_column.bucketized_column(
258+
latitude, boundaries=get_quantile_based_boundaries(
259+
training_examples["latitude"], 10))
260+
261+
# Divide housing_median_age into 7 buckets.
262+
bucketized_housing_median_age = tf.feature_column.bucketized_column(
263+
housing_median_age, boundaries=get_quantile_based_boundaries(
264+
training_examples["housing_median_age"], 7))
265+
266+
# Divide median_income into 7 buckets.
267+
bucketized_median_income = tf.feature_column.bucketized_column(
268+
median_income, boundaries=get_quantile_based_boundaries(
269+
training_examples["median_income"], 7))
270+
271+
# Divide rooms_per_person into 7 buckets.
272+
bucketized_rooms_per_person = tf.feature_column.bucketized_column(
273+
rooms_per_person, boundaries=get_quantile_based_boundaries(
274+
training_examples["rooms_per_person"], 7))
275+
276+
long_x_lat = tf.feature_column.crossed_column(
277+
set([bucketized_longitude, bucketized_latitude]), hash_bucket_size=1000)
278+
279+
feature_columns = set([
280+
bucketized_longitude,
281+
bucketized_latitude,
282+
bucketized_housing_median_age,
283+
bucketized_households,
284+
bucketized_median_income,
285+
bucketized_rooms_per_person,
286+
long_x_lat,
287+
])
288+
289+
return feature_columns

yapypy/extended_python/emit_impl/constrains.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ def py_emit(node: ast.Yield, ctx: Context):
6868
>>> def f():
6969
>>> yield 1
7070
>>> self.assertEqual(1, next(f()))
71-
>>> yield None
71+
>>> def m():
72+
>>> yield None
7273
"""
7374
if ContextType.Module in ctx.cts:
7475
exc = SyntaxError()

0 commit comments

Comments
 (0)