-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodelBuilder.js
248 lines (222 loc) · 11.1 KB
/
modelBuilder.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import * as tf from "https://esm.sh/@tensorflow/tfjs"
const DEFAULTS = {
architecture: [
{
'type': "dense",
'units': 5,
'kernelSize': undefined,
'filters': undefined,
'poolSize': undefined,
'strides': undefined,
'activation': undefined,
'bias': undefined
},
{
'type': "dense",
'units': 3,
'kernelSize': undefined,
'filters': undefined,
'poolSize': undefined,
'strides': undefined,
'activation': undefined,
'bias': undefined
},
{
'type': "dense",
'units': 3,
'kernelSize': undefined,
'filters': undefined,
'poolSize': undefined,
'strides': undefined,
'activation': undefined,
'bias': undefined
},
],
activation: "relu",
bias: true,
outputActivation: "softmax",
optimizer: "sgd",
lossFunc: "categoricalCrossentropy",
metrics: ["accuracy"],
numEpochs: 50,
batchSize: 4,
callbacks: {
"onEpochEnd": (epochNum, metrics) => {
console.log(`Metrics for epoch ${epochNum}: ${JSON.stringify(metrics)}`)
}
}
}
const utils = {
request: (url, opts) => fetch(url, opts).then(res => res.json()),
shuffleArray: (arr) => {
for (let i = arr.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[arr[i], arr[j]] = [arr[j], arr[i]]
}
return arr
},
calculateMetrics: (predictedLabels, actualLabels) => {
const predictions = tf.argMax(predictedLabels, 1).dataSync()
const groundTruth = tf.argMax(actualLabels, 1).dataSync()
const numCorrectPredictions = predictions.reduce((correctPreds, prediction, index) => {
if (prediction === groundTruth[index]) {
correctPreds += 1
}
return correctPreds
}, 0)
return { numCorrectPredictions }
}
};
/**
* Class representing a TensorFlow.js model.
*/
export class TFJSModel {
/**
* Creates an instance of the TFJSModel.
* @param {Object} options - The model options.
* @param {Array<Object>|Array<number>} [options.architecture=[5,3,3]] - The model architecture configuration. Could be an array of numbers corresponding to the number of units per densely connected layer of the neural network, or an object with more granular properties.
* @param {string} [options.activation="relu"] - The activation function to be used in all the hidden layers. Could be one of: 'elu'|'hardSigmoid'|'linear'|'relu'|'relu6'| 'selu'|'sigmoid'|'softmax'|'softplus'|'softsign'|'tanh'.
* @param {boolean} [options.bias=true] - Whether to include a bias term in the layers.
* @param {string} [options.outputActivation="softmax"] - The activation function for the output layer. Could be one of: 'elu'|'hardSigmoid'|'linear'|'relu'|'relu6'| 'selu'|'sigmoid'|'softmax'|'softplus'|'softsign'|'tanh'.
* @param {string} [options.optimizer="sgd"] - The optimizer to use for training. Could be one of: 'sgd'|'momentum'|'adagrad'|'adadelta'|'adam'|'adamax'|'rmsprop' .
* @param {string} [options.lossFunc="categoricalCrossentropy"] - The loss function to use for training. Could be one of: 'absoluteDifference'|'computeWeightedLoss'|'cosineDistance'|'hingeLoss'|'huberLoss'|'logLoss'|'meanSquaredError'|'sigmoidCrossEntropy'|'softmaxCrossEntropy'|'binaryAccuracy'|'binaryCrossentropy'|'categoricalAccuracy'|'categoricalCrossentropy'|'cosineProximity'|'meanAbsoluteError'|'meanAbsolutePercentageError'|'meanSquaredError'|'precision'|'recall'|'sparseCategoricalAccuracy'.
* @param {Array<string>} [options.metrics=["accuracy"]] - The metrics to track during training. Could be one or more of: 'binaryAccuracy'|'binaryCrossentropy'|'categoricalAccuracy'|'categoricalCrossentropy'|'cosineProximity'|'meanAbsoluteError'|'meanAbsolutePercentageError'|'meanSquaredError'|'precision'|'r2Score'|'recall'|'sparseCategoricalAccuracy' .
* @param {number} [options.numEpochs=50] - The number of iterations to run the training for.
* @param {number} [options.batchSize=4] - The batch size to be used every epoch.
* @param {Object} [options.callbacks] - List of callbacks to be executed at various points during training. Valid callbacks include: 'onTrainBegin(logs)'|'onTrainEnd(logs)'|'onEpochBegin(epoch, logs)'|'onEpochEnd(epoch, logs)'|'onBatchBegin(batch, logs)'|'onBatchEnd(batch, logs)'|'onYield(epoch, batch, logs)'. Includes the 'onEpochEnd' callback by default.
*/
constructor({ inputShape, architecture=DEFAULTS.architecture, activation=DEFAULTS.activation, bias=DEFAULTS.bias, outputActivation=DEFAULTS.outputActivation, optimizer=DEFAULTS.optimizer, lossFunc=DEFAULTS.lossFunc, metrics=DEFAULTS.metrics, numEpochs=DEFAULTS.numEpochs, batchSize=DEFAULTS.batchSize, callbacks=DEFAULTS.callbacks }) {
this.inputShape = inputShape
this.architecture = architecture || DEFAULTS.architecture
this.activation = activation || DEFAULTS.activation
this.bias = bias || DEFAULTS.bias
this.outputActivation = outputActivation || DEFAULTS.outputActivation
this.optimizer = optimizer || DEFAULTS.optimizer
this.lossFunc = lossFunc || DEFAULTS.lossFunc
this.metrics = metrics || DEFAULTS.metrics
this.numEpochs = numEpochs || DEFAULTS.numEpochs
this.batchSize = batchSize || DEFAULTS.batchSize
this.callbacks = callbacks || DEFAULTS.callbacks
this.model = undefined
}
/**
* Preprocesses the data by shuffling it and separating features from labels. Called by default on loading the data.
* @param {Array<Object>} data - The input data.
* @param {string} labelName - The name of the label field.
* @returns {Promise<Array<Array<any>, Array<any>>>} The unlabeled data and corresponding labels.
*/
preprocess (data, labelName) {
const shuffledData = utils.shuffleArray(data)
let unlabeledData = [],
correspLabels = []
for (let row of shuffledData) {
const {
[labelName]: label, ...features
} = row
unlabeledData.push(Object.values(features))
correspLabels.push(label)
}
return [unlabeledData, correspLabels]
}
/**
* Loads data from a specified URL and preprocesses it for training/testing.
* @param {Object} params - Parameters for loading data.
* @param {string} params.datasetURL - The URL to the dataset file.
* @param {string} params.labelFieldName - The name of the label field.
* @param {number} [params.trainTestRatio=0.8] - The ratio for splitting data into training and testing sets.
*/
async loadData({ datasetURL, labelFieldName, trainTestRatio=0.8 }) {
const data = await utils.request(datasetURL)
let [unlabeledData, labels] = await this.preprocess(data, labelFieldName)
const [trainingData, testData] = [unlabeledData.slice(0, data.length * trainTestRatio), unlabeledData.slice(data.length * trainTestRatio)]
const [trainingLabels, testLabels] = [labels.slice(0, data.length * trainTestRatio), labels.slice(data.length * trainTestRatio)]
this.dataset = {
'trainingData': tf.tensor(trainingData),
'trainingLabels': tf.tensor(trainingLabels),
'testData': tf.tensor(testData),
'testLabels': tf.tensor(testLabels)
}
}
/**
* Compiles the model architecture based on the provided input shape and architecture configuration.
*/
async build() {
if (!Array.isArray(this.inputShape) && isNaN(this.inputShape)) {
this.inputShape = this.dataset?.trainingData.shape[1]
}
if (typeof(this.inputShape) === 'undefined') {
console.error("The shape of the input data cannot be determined to build the model. Try reloading the dataset or specify the correct shape when calling the function.")
return
}
this.model = await new Promise(resolve => {
tf.tidy(() => {
const layers = this.architecture.map((layer, index) => {
let layerArch = {
'type': layer?.type || "dense",
'units': layer?.units || layer,
'kernelSize': layer?.kernelSize,
'filters': layer?.filters,
'poolSize': layer?.poolSize,
'strides': layer?.strides,
'activation': layer?.activation || this.activation,
'bias': layer?.bias || this.bias
}
if (index === 0) {
layerArch.inputShape = this.inputShape
} else if (index === this.architecture.length - 1) {
layerArch.activation = this.outputActivation
}
return tf.layers[layerArch.type](layerArch)
})
let model = tf.sequential({
layers
})
resolve(model)
})
})
}
/**
* Trains the model using the loaded dataset.
* @returns {Promise<Object>} A promise that resolves to the training information, including the values of the final loss and the metrics specified.
*/
train() {
if (typeof(this.dataset) === 'undefined') {
console.error("No dataset loaded! Please load the data first before starting training.")
return
}
if (typeof(this.model) === 'undefined') {
console.error("No model to train! Please build the model first before starting training.")
return
}
return new Promise(resolve => {
this.model.compile({
'optimizer': this.optimizer,
'loss': this.lossFunc,
'metrics': this.metrics
})
this.model.fit(this.dataset.trainingData, this.dataset.trainingLabels, {
'epochs': this.numEpochs,
'batchSize': this.batchSize,
'callbacks': this.callbacks
}).then((info) => {
resolve(info)
})
})
}
/**
* Validates the model on the test dataset and calculates the proportion of correct predictions.
* @returns {Promise<number>} A promise that resolves to the proportion of correct predictions (accuracy for now, more metrics could be added later).
*/
test() {
return new Promise(resolve => {
tf.tidy(() => {
const predictions = this.model.predict(this.dataset.testData, {
'batchSize': this.batchSize
})
const { numCorrectPredictions } = utils.calculateMetrics(predictions, this.dataset.testLabels)
// Return the proportion of correct predictions.
resolve(numCorrectPredictions/this.dataset.testData.shape[0])
})
})
}
}