From 90c0ca36aa82f7a2aeb483348eabc44889814422 Mon Sep 17 00:00:00 2001 From: mnottheone Date: Wed, 23 Aug 2017 00:17:09 +0530 Subject: [PATCH 01/17] Sgd+ momentum optimizer added --- src/momentumOptimizer.ts | 125 +++++++++++++++++++++++++++++++++++++++ src/session_test.ts | 34 +++++++++++ 2 files changed, 159 insertions(+) create mode 100644 src/momentumOptimizer.ts diff --git a/src/momentumOptimizer.ts b/src/momentumOptimizer.ts new file mode 100644 index 0000000000..7e98a8f006 --- /dev/null +++ b/src/momentumOptimizer.ts @@ -0,0 +1,125 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +import {Node} from './graph'; +import {NDArrayMath} from './math/math'; +import {NDArray, Scalar} from './math/ndarray'; +import {Optimizer} from './optimizer'; +import {SessionRuntime} from './session'; +import * as session_util from './session_util'; +import {TensorArrayMap} from './tensor_array_map'; + +export class MomentumOptimizer extends Optimizer { + constructor(private learningRate: number, private momentum: + number, specifiedVariableList?: Node[]) { + super(specifiedVariableList); + if(specifiedVariableList == null) + { + this.variableVelocities = null; + } + } + + beforeBatch( + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, gradientArrayMap: TensorArrayMap) { + this.variableNodes = this.specifiedVariableNodes == null ? + session_util.getVariableNodesFromEvaluationSet(runtime.nodes) : + this.specifiedVariableNodes; + this.m = Scalar.new(this.momentum); + if (batchSize !== this.prevBatchSize) { + this.prevBatchSize = batchSize; + this.c = Scalar.new(-this.learningRate / batchSize); + + } + if (this.variableVelocities == null){ + this.variableVelocities = new TensorArrayMap(); + this.variableNodes.forEach( + node => { + this.variableVelocities.set(node.output, + NDArray.zeros(node.output.shape)); + }); + } + + + this.variableNodes.forEach( + node => { + this.variableGradients.set(node.output, + NDArray.zeros(node.output.shape)); + }); + } + + afterExample( + math: NDArrayMath, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, gradientArrayMap: TensorArrayMap) { + math.scope((keep) => { + this.variableNodes!.forEach(node => { + + const gradient = gradientArrayMap.get(node.output); + const accumulatedGradient = this.variableGradients.get(node.output); + this.variableGradients.set( + node.output, keep(math.add(gradient, accumulatedGradient))); + accumulatedGradient.dispose(); + }); + }); + } + + afterBatch( + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, gradientArrayMap: TensorArrayMap) { + math.scope((keep) => { + this.variableNodes!.forEach(node => { + const oldVariable = activationArrayMap.get(node.output); + const gradient = this.variableGradients.get(node.output); + const oldVelocity = this.variableVelocities.get(node.output); + const velocity = math.scaledArrayAdd(this.m!, + oldVelocity, this.one!, gradient); + const variable = + math.scaledArrayAdd(this.c!, velocity, this.one!, oldVariable); + this.variableVelocities.set(node.output, keep(velocity)); + activationArrayMap.set(node.output, keep(variable)); + node.data = variable; + + oldVariable.dispose(); + oldVelocity.dispose(); + }); + }); + + this.variableGradients.dispose(); + this.variableGradients = new TensorArrayMap(); + } + + dispose() { + if (this.c != null) { + this.c.dispose(); + } + if (this.m != null) { + this.m.dispose(); + } + this.one.dispose(); + this.variableVelocities.dispose(); + } + + setLearningRate(learningRate: number) { + this.learningRate = learningRate; + } + + setMomentum(momentum: number) { + this.momentum = momentum; + } + + private variableGradients = new TensorArrayMap(); + private variableVelocities = new TensorArrayMap(); + private prevBatchSize: number; + private one = Scalar.new(1); + private c: Scalar; + private m: Scalar; +} \ No newline at end of file diff --git a/src/session_test.ts b/src/session_test.ts index 095af5dad2..7a801d8721 100644 --- a/src/session_test.ts +++ b/src/session_test.ts @@ -20,6 +20,8 @@ import {NDArrayMathGPU} from './math/math_gpu'; import {Array1D, NDArray, Scalar} from './math/ndarray'; import {FeedDictionary, FeedEntry, Session} from './session'; import {SGDOptimizer} from './sgd_optimizer'; +import {MomentumOptimizer} from './momentumOptimizer'; + import * as test_util from './test_util'; @@ -288,6 +290,38 @@ describe('Session', () => { }); }); + it('Safe mode math, math scope train does not throw', () => { + const x = g.placeholder('x', [2]); + const w = g.variable('w', NDArray.zeros([1,2])); + const b = g.variable('b', NDArray.zeros([1])); + const y = g.reduceSum(g.add(g.matmul(w, x), b)); + + const safeMode = true; + const optimizer = new MomentumOptimizer(0.1,0.5); + const math = new NDArrayMathCPU(safeMode); + const session = new Session(g, math); + const inputProvider: InputProvider = { + getNextCopy() { + return Array1D.new([2, 4 ]); + }, + disposeCopy(math, example) {} + }; + + math.scope(() => { + // w = reduce_sum(w_1*x_1 + w_2*x_2 + b) + // velocity_w = [momentum* old_vel_w1 + x_1, + // momentum* old_vel_w2 + x_2] = [2,4] + // w = [ w_old - lr*vel_w1, w_old - lr*vel_w2] = [-0.2, -0.4] + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + // velocity_w = [momentum* old_vel_w1 + x_1, + // momentum* old_vel_w2 + x_2] = [3,6] + // w = [ w_old - lr*vel_w1, w_old - lr*vel_w2] = [-0.5, -1.0] + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + const dydw = session.activationArrayMap.get(w).getValues(); + test_util.expectArraysClose(dydw, new Float32Array([-.5,-1.0]), 2e-5); + }); + }); + it('Safe mode math, no math scope train throws', () => { const x = g.placeholder('x', [2]); const y = g.square(x); From 6e7566539848fbee42cc4f70a6a54a097ebde48b Mon Sep 17 00:00:00 2001 From: mnottheone Date: Thu, 24 Aug 2017 01:06:11 +0530 Subject: [PATCH 02/17] momentum optimizer extended from sgd --- src/momentumOptimizer.ts | 69 +++++++++++----------------------------- src/session_test.ts | 8 +++-- src/sgd_optimizer.ts | 10 +++--- 3 files changed, 29 insertions(+), 58 deletions(-) diff --git a/src/momentumOptimizer.ts b/src/momentumOptimizer.ts index 7e98a8f006..1b5b3c6a35 100644 --- a/src/momentumOptimizer.ts +++ b/src/momentumOptimizer.ts @@ -13,19 +13,15 @@ limitations under the License. import {Node} from './graph'; import {NDArrayMath} from './math/math'; import {NDArray, Scalar} from './math/ndarray'; -import {Optimizer} from './optimizer'; +import {SGDOptimizer} from './sgd_optimizer'; import {SessionRuntime} from './session'; import * as session_util from './session_util'; import {TensorArrayMap} from './tensor_array_map'; -export class MomentumOptimizer extends Optimizer { - constructor(private learningRate: number, private momentum: - number, specifiedVariableList?: Node[]) { - super(specifiedVariableList); - if(specifiedVariableList == null) - { - this.variableVelocities = null; - } +export class MomentumOptimizer extends SGDOptimizer { + constructor(protected learningRate: number, + private momentum: number, specifiedVariableList?: Node[]) { + super(learningRate, specifiedVariableList); } beforeBatch( @@ -35,53 +31,34 @@ export class MomentumOptimizer extends Optimizer { session_util.getVariableNodesFromEvaluationSet(runtime.nodes) : this.specifiedVariableNodes; this.m = Scalar.new(this.momentum); + if (batchSize !== this.prevBatchSize) { this.prevBatchSize = batchSize; - this.c = Scalar.new(-this.learningRate / batchSize); - + this.c = Scalar.new(-this.learningRate / batchSize); } - if (this.variableVelocities == null){ - this.variableVelocities = new TensorArrayMap(); - this.variableNodes.forEach( - node => { - this.variableVelocities.set(node.output, - NDArray.zeros(node.output.shape)); - }); - } - - - this.variableNodes.forEach( - node => { - this.variableGradients.set(node.output, - NDArray.zeros(node.output.shape)); - }); - } - - afterExample( - math: NDArrayMath, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, gradientArrayMap: TensorArrayMap) { - math.scope((keep) => { - this.variableNodes!.forEach(node => { - const gradient = gradientArrayMap.get(node.output); - const accumulatedGradient = this.variableGradients.get(node.output); - this.variableGradients.set( - node.output, keep(math.add(gradient, accumulatedGradient))); - accumulatedGradient.dispose(); + if (this.variableVelocities.size() === 0) { + this.variableNodes.forEach(node => { + this.variableVelocities.set(node.output, + NDArray.zeros(node.output.shape)); + }); + } + this.variableNodes.forEach(node => { + this.variableGradients.set(node.output, + NDArray.zeros(node.output.shape)); }); - }); } afterBatch( math: NDArrayMath, batchSize: number, runtime: SessionRuntime, activationArrayMap: TensorArrayMap, gradientArrayMap: TensorArrayMap) { math.scope((keep) => { - this.variableNodes!.forEach(node => { + this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); const gradient = this.variableGradients.get(node.output); const oldVelocity = this.variableVelocities.get(node.output); - const velocity = math.scaledArrayAdd(this.m!, - oldVelocity, this.one!, gradient); + const velocity = math.scaledArrayAdd(this.m, + oldVelocity, this.one, gradient); const variable = math.scaledArrayAdd(this.c!, velocity, this.one!, oldVariable); this.variableVelocities.set(node.output, keep(velocity)); @@ -108,18 +85,10 @@ export class MomentumOptimizer extends Optimizer { this.variableVelocities.dispose(); } - setLearningRate(learningRate: number) { - this.learningRate = learningRate; - } - setMomentum(momentum: number) { this.momentum = momentum; } - private variableGradients = new TensorArrayMap(); private variableVelocities = new TensorArrayMap(); - private prevBatchSize: number; - private one = Scalar.new(1); - private c: Scalar; private m: Scalar; } \ No newline at end of file diff --git a/src/session_test.ts b/src/session_test.ts index 7a801d8721..81c2325c7c 100644 --- a/src/session_test.ts +++ b/src/session_test.ts @@ -297,7 +297,7 @@ describe('Session', () => { const y = g.reduceSum(g.add(g.matmul(w, x), b)); const safeMode = true; - const optimizer = new MomentumOptimizer(0.1,0.5); + const optimizer = new MomentumOptimizer(0.1, 0.5); const math = new NDArrayMathCPU(safeMode); const session = new Session(g, math); const inputProvider: InputProvider = { @@ -313,12 +313,14 @@ describe('Session', () => { // momentum* old_vel_w2 + x_2] = [2,4] // w = [ w_old - lr*vel_w1, w_old - lr*vel_w2] = [-0.2, -0.4] session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + const dydw = session.activationArrayMap.get(w).getValues(); + test_util.expectArraysClose(dydw, new Float32Array([-.2, -0.4]), 1e-5); // velocity_w = [momentum* old_vel_w1 + x_1, // momentum* old_vel_w2 + x_2] = [3,6] // w = [ w_old - lr*vel_w1, w_old - lr*vel_w2] = [-0.5, -1.0] session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); - const dydw = session.activationArrayMap.get(w).getValues(); - test_util.expectArraysClose(dydw, new Float32Array([-.5,-1.0]), 2e-5); + const dydw2 = session.activationArrayMap.get(w).getValues(); + test_util.expectArraysClose(dydw2, new Float32Array([-.5, -1.0]), 2e-5); }); }); diff --git a/src/sgd_optimizer.ts b/src/sgd_optimizer.ts index 0014a50992..3b49fccf7d 100644 --- a/src/sgd_optimizer.ts +++ b/src/sgd_optimizer.ts @@ -22,7 +22,7 @@ import * as session_util from './session_util'; import {TensorArrayMap} from './tensor_array_map'; export class SGDOptimizer extends Optimizer { - constructor(private learningRate: number, specifiedVariableList?: Node[]) { + constructor(protected learningRate: number, specifiedVariableList?: Node[]) { super(specifiedVariableList); } @@ -86,8 +86,8 @@ export class SGDOptimizer extends Optimizer { this.learningRate = learningRate; } - private variableGradients = new TensorArrayMap(); - private prevBatchSize: number; - private one = Scalar.new(1); - private c: Scalar; + protected variableGradients = new TensorArrayMap(); + protected prevBatchSize: number; + protected one = Scalar.new(1); + protected c: Scalar; } From 282a063807ae5779e6367a9598bba5144a5c7c04 Mon Sep 17 00:00:00 2001 From: Aman Kumar Singh Date: Thu, 24 Aug 2017 01:42:59 +0530 Subject: [PATCH 03/17] momentum optimizer used in model-builder --- demos/model-builder/model-builder.ts | 70 ++++++++++++++-------------- src/index.ts | 2 + 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/demos/model-builder/model-builder.ts b/demos/model-builder/model-builder.ts index 2ce4a9b40b..252981adbc 100644 --- a/demos/model-builder/model-builder.ts +++ b/demos/model-builder/model-builder.ts @@ -19,11 +19,11 @@ import './model-layer'; import '../demo-header'; import '../demo-footer'; +import {PolymerElement, PolymerHTMLElement} from '../polymer-spec'; // tslint:disable-next-line:max-line-length -import {Array1D, Array3D, DataStats, FeedEntry, Graph, GraphRunner, GraphRunnerEventObserver, InCPUMemoryShuffledInputProviderBuilder, InMemoryDataset, MetricReduction, NDArray, NDArrayMath, NDArrayMathCPU, NDArrayMathGPU, Optimizer, Scalar, Session, SGDOptimizer, Tensor, util} from '../deeplearnjs'; +import {Array1D, Array3D, DataStats, FeedEntry, Graph, GraphRunner, GraphRunnerEventObserver, InCPUMemoryShuffledInputProviderBuilder, InMemoryDataset, MetricReduction, NDArray, NDArrayMath, NDArrayMathCPU, NDArrayMathGPU, Optimizer, Scalar, Session, MomentumOptimizer, Tensor, util} from '../deeplearnjs'; import {NDArrayImageVisualizer} from '../ndarray-image-visualizer'; import {NDArrayLogitsVisualizer} from '../ndarray-logits-visualizer'; -import {PolymerElement, PolymerHTMLElement} from '../polymer-spec'; import * as xhr_dataset from '../xhr-dataset'; import {XhrDataset, XhrDatasetConfig} from '../xhr-dataset'; @@ -37,6 +37,7 @@ const DATASETS_CONFIG_JSON = 'model-builder-datasets-config.json'; // TODO(nsthorat): Make these parameters in the UI. const BATCH_SIZE = 64; const LEARNING_RATE = 0.1; +const MOMENTUM = 0.1; /** How often to evaluate the model against test data. */ const EVAL_INTERVAL_MS = 1500; /** How often to compute the cost. Downloading the cost stalls the GPU. */ @@ -185,34 +186,32 @@ export class ModelBuilder extends ModelBuilderPolymer { totalTimeSec.toFixed(1), }; this.graphRunner = new GraphRunner(this.math, this.session, eventObserver); - this.optimizer = new SGDOptimizer(LEARNING_RATE); + this.optimizer = new MomentumOptimizer(LEARNING_RATE, MOMENTUM); // Set up datasets. this.populateDatasets(); - this.querySelector('#dataset-dropdown .dropdown-content') - .addEventListener( - // tslint:disable-next-line:no-any - 'iron-activate', (event: any) => { - // Update the dataset. - const datasetName = event.detail.selected; - this.updateSelectedDataset(datasetName); + this.querySelector('#dataset-dropdown .dropdown-content')!.addEventListener( + // tslint:disable-next-line:no-any + 'iron-activate', (event: any) => { + // Update the dataset. + const datasetName = event.detail.selected; + this.updateSelectedDataset(datasetName); - // TODO(nsthorat): Remember the last model used for each dataset. - this.removeAllLayers(); - }); - this.querySelector('#model-dropdown .dropdown-content') - .addEventListener( - // tslint:disable-next-line:no-any - 'iron-activate', (event: any) => { - // Update the model. - const modelName = event.detail.selected; - this.updateSelectedModel(modelName); - }); + // TODO(nsthorat): Remember the last model used for each dataset. + this.removeAllLayers(); + }); + this.querySelector('#model-dropdown .dropdown-content')!.addEventListener( + // tslint:disable-next-line:no-any + 'iron-activate', (event: any) => { + // Update the model. + const modelName = event.detail.selected; + this.updateSelectedModel(modelName); + }); { const normalizationDropdown = - this.querySelector('#normalization-dropdown .dropdown-content'); + this.querySelector('#normalization-dropdown .dropdown-content')!; // tslint:disable-next-line:no-any normalizationDropdown.addEventListener('iron-activate', (event: any) => { const selectedNormalizationOption = event.detail.selected; @@ -227,20 +226,20 @@ export class ModelBuilder extends ModelBuilderPolymer { this.showTrainStats = false; this.showDatasetStats = false; - const addButton = this.querySelector('#add-layer'); + const addButton = this.querySelector('#add-layer')!; addButton.addEventListener('click', () => this.addLayer()); - const downloadModelButton = this.querySelector('#download-model'); + const downloadModelButton = this.querySelector('#download-model')!; downloadModelButton.addEventListener('click', () => this.downloadModel()); - const uploadModelButton = this.querySelector('#upload-model'); + const uploadModelButton = this.querySelector('#upload-model')!; uploadModelButton.addEventListener('click', () => this.uploadModel()); this.setupUploadModelButton(); - const uploadWeightsButton = this.querySelector('#upload-weights'); + const uploadWeightsButton = this.querySelector('#upload-weights')!; uploadWeightsButton.addEventListener('click', () => this.uploadWeights()); this.setupUploadWeightsButton(); - const stopButton = this.querySelector('#stop'); + const stopButton = this.querySelector('#stop')!; stopButton.addEventListener('click', () => { this.applicationState = ApplicationState.IDLE; this.graphRunner.stopTraining(); @@ -252,13 +251,12 @@ export class ModelBuilder extends ModelBuilderPolymer { this.startTraining(); }); - this.querySelector('#environment-toggle') - .addEventListener('change', (event) => { - this.math = - // tslint:disable-next-line:no-any - (event.target as any).active ? this.mathGPU : this.mathCPU; - this.graphRunner.setMath(this.math); - }); + this.querySelector( + '#environment-toggle')!.addEventListener('change', (event) => { + // tslint:disable-next-line:no-any + this.math = (event.target as any).active ? this.mathGPU : this.mathCPU; + this.graphRunner.setMath(this.math); + }); this.hiddenLayers = []; this.examplesPerSec = 0; @@ -777,7 +775,7 @@ export class ModelBuilder extends ModelBuilderPolymer { // Show and setup the load view button. const fileInput = this.querySelector('#model-file') as HTMLInputElement; fileInput.addEventListener('change', event => { - const file = fileInput.files[0]; + const file = fileInput.files![0]; // Clear out the value of the file chooser. This ensures that if the user // selects the same file, we'll re-read it. fileInput.value = ''; @@ -819,7 +817,7 @@ export class ModelBuilder extends ModelBuilderPolymer { // Show and setup the load view button. const fileInput = this.querySelector('#weights-file') as HTMLInputElement; fileInput.addEventListener('change', event => { - const file = fileInput.files[0]; + const file = fileInput.files![0]; // Clear out the value of the file chooser. This ensures that if the user // selects the same file, we'll re-read it. fileInput.value = ''; diff --git a/src/index.ts b/src/index.ts index ad474cfe1b..4ef0a6c9cc 100644 --- a/src/index.ts +++ b/src/index.ts @@ -37,5 +37,7 @@ export {GPGPUContext} from './math/webgl/gpgpu_context'; export {Optimizer} from './optimizer'; export {CostReduction, FeedEntry, Session} from './session'; export {SGDOptimizer} from './sgd_optimizer'; +export {MomentumOptimizer} from './momentumOptimizer'; + // Second level exports. export {conv_util, gpgpu_util, render_ndarray_gpu_util, util, webgl_util}; From 147eb17cff16c62a1ab40fabdcba903d4e2ba338 Mon Sep 17 00:00:00 2001 From: mnottheone Date: Thu, 24 Aug 2017 02:02:17 +0530 Subject: [PATCH 04/17] cleanup --- demos/model-builder/model-builder.ts | 72 ++++++++++++++-------------- src/index.ts | 1 - 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/demos/model-builder/model-builder.ts b/demos/model-builder/model-builder.ts index 252981adbc..7687c8d229 100644 --- a/demos/model-builder/model-builder.ts +++ b/demos/model-builder/model-builder.ts @@ -1,11 +1,8 @@ /* Copyright 2017 Google Inc. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,11 +16,11 @@ import './model-layer'; import '../demo-header'; import '../demo-footer'; -import {PolymerElement, PolymerHTMLElement} from '../polymer-spec'; // tslint:disable-next-line:max-line-length import {Array1D, Array3D, DataStats, FeedEntry, Graph, GraphRunner, GraphRunnerEventObserver, InCPUMemoryShuffledInputProviderBuilder, InMemoryDataset, MetricReduction, NDArray, NDArrayMath, NDArrayMathCPU, NDArrayMathGPU, Optimizer, Scalar, Session, MomentumOptimizer, Tensor, util} from '../deeplearnjs'; import {NDArrayImageVisualizer} from '../ndarray-image-visualizer'; import {NDArrayLogitsVisualizer} from '../ndarray-logits-visualizer'; +import {PolymerElement, PolymerHTMLElement} from '../polymer-spec'; import * as xhr_dataset from '../xhr-dataset'; import {XhrDataset, XhrDatasetConfig} from '../xhr-dataset'; @@ -186,32 +183,34 @@ export class ModelBuilder extends ModelBuilderPolymer { totalTimeSec.toFixed(1), }; this.graphRunner = new GraphRunner(this.math, this.session, eventObserver); - this.optimizer = new MomentumOptimizer(LEARNING_RATE, MOMENTUM); + this.optimizer = MomentumOptimizer(LEARNING_RATE, MOMENTUM); // Set up datasets. this.populateDatasets(); - this.querySelector('#dataset-dropdown .dropdown-content')!.addEventListener( - // tslint:disable-next-line:no-any - 'iron-activate', (event: any) => { - // Update the dataset. - const datasetName = event.detail.selected; - this.updateSelectedDataset(datasetName); + this.querySelector('#dataset-dropdown .dropdown-content') + .addEventListener( + // tslint:disable-next-line:no-any + 'iron-activate', (event: any) => { + // Update the dataset. + const datasetName = event.detail.selected; + this.updateSelectedDataset(datasetName); - // TODO(nsthorat): Remember the last model used for each dataset. - this.removeAllLayers(); - }); - this.querySelector('#model-dropdown .dropdown-content')!.addEventListener( - // tslint:disable-next-line:no-any - 'iron-activate', (event: any) => { - // Update the model. - const modelName = event.detail.selected; - this.updateSelectedModel(modelName); - }); + // TODO(nsthorat): Remember the last model used for each dataset. + this.removeAllLayers(); + }); + this.querySelector('#model-dropdown .dropdown-content') + .addEventListener( + // tslint:disable-next-line:no-any + 'iron-activate', (event: any) => { + // Update the model. + const modelName = event.detail.selected; + this.updateSelectedModel(modelName); + }); { const normalizationDropdown = - this.querySelector('#normalization-dropdown .dropdown-content')!; + this.querySelector('#normalization-dropdown .dropdown-content'); // tslint:disable-next-line:no-any normalizationDropdown.addEventListener('iron-activate', (event: any) => { const selectedNormalizationOption = event.detail.selected; @@ -226,20 +225,20 @@ export class ModelBuilder extends ModelBuilderPolymer { this.showTrainStats = false; this.showDatasetStats = false; - const addButton = this.querySelector('#add-layer')!; + const addButton = this.querySelector('#add-layer'); addButton.addEventListener('click', () => this.addLayer()); - const downloadModelButton = this.querySelector('#download-model')!; + const downloadModelButton = this.querySelector('#download-model'); downloadModelButton.addEventListener('click', () => this.downloadModel()); - const uploadModelButton = this.querySelector('#upload-model')!; + const uploadModelButton = this.querySelector('#upload-model'); uploadModelButton.addEventListener('click', () => this.uploadModel()); this.setupUploadModelButton(); - const uploadWeightsButton = this.querySelector('#upload-weights')!; + const uploadWeightsButton = this.querySelector('#upload-weights'); uploadWeightsButton.addEventListener('click', () => this.uploadWeights()); this.setupUploadWeightsButton(); - const stopButton = this.querySelector('#stop')!; + const stopButton = this.querySelector('#stop'); stopButton.addEventListener('click', () => { this.applicationState = ApplicationState.IDLE; this.graphRunner.stopTraining(); @@ -251,12 +250,13 @@ export class ModelBuilder extends ModelBuilderPolymer { this.startTraining(); }); - this.querySelector( - '#environment-toggle')!.addEventListener('change', (event) => { - // tslint:disable-next-line:no-any - this.math = (event.target as any).active ? this.mathGPU : this.mathCPU; - this.graphRunner.setMath(this.math); - }); + this.querySelector('#environment-toggle') + .addEventListener('change', (event) => { + this.math = + // tslint:disable-next-line:no-any + (event.target as any).active ? this.mathGPU : this.mathCPU; + this.graphRunner.setMath(this.math); + }); this.hiddenLayers = []; this.examplesPerSec = 0; @@ -775,7 +775,7 @@ export class ModelBuilder extends ModelBuilderPolymer { // Show and setup the load view button. const fileInput = this.querySelector('#model-file') as HTMLInputElement; fileInput.addEventListener('change', event => { - const file = fileInput.files![0]; + const file = fileInput.files[0]; // Clear out the value of the file chooser. This ensures that if the user // selects the same file, we'll re-read it. fileInput.value = ''; @@ -817,7 +817,7 @@ export class ModelBuilder extends ModelBuilderPolymer { // Show and setup the load view button. const fileInput = this.querySelector('#weights-file') as HTMLInputElement; fileInput.addEventListener('change', event => { - const file = fileInput.files![0]; + const file = fileInput.files[0]; // Clear out the value of the file chooser. This ensures that if the user // selects the same file, we'll re-read it. fileInput.value = ''; @@ -837,4 +837,4 @@ export class ModelBuilder extends ModelBuilderPolymer { } } -document.registerElement(ModelBuilder.prototype.is, ModelBuilder); +document.registerElement(ModelBuilder.prototype.is, ModelBuilder); \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index 4ef0a6c9cc..fdf05147a9 100644 --- a/src/index.ts +++ b/src/index.ts @@ -38,6 +38,5 @@ export {Optimizer} from './optimizer'; export {CostReduction, FeedEntry, Session} from './session'; export {SGDOptimizer} from './sgd_optimizer'; export {MomentumOptimizer} from './momentumOptimizer'; - // Second level exports. export {conv_util, gpgpu_util, render_ndarray_gpu_util, util, webgl_util}; From dd4f057f0965976d9eef5bda88aff50f332393e4 Mon Sep 17 00:00:00 2001 From: mnottheone Date: Thu, 24 Aug 2017 02:04:52 +0530 Subject: [PATCH 05/17] -_- --- demos/model-builder/model-builder.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/model-builder/model-builder.ts b/demos/model-builder/model-builder.ts index 7687c8d229..9f621cc652 100644 --- a/demos/model-builder/model-builder.ts +++ b/demos/model-builder/model-builder.ts @@ -183,7 +183,7 @@ export class ModelBuilder extends ModelBuilderPolymer { totalTimeSec.toFixed(1), }; this.graphRunner = new GraphRunner(this.math, this.session, eventObserver); - this.optimizer = MomentumOptimizer(LEARNING_RATE, MOMENTUM); + this.optimizer = new MomentumOptimizer(LEARNING_RATE, MOMENTUM); // Set up datasets. this.populateDatasets(); From 55cb3fd679e27ad0902e0a953e0f8a55420a078a Mon Sep 17 00:00:00 2001 From: mnottheone Date: Thu, 24 Aug 2017 23:50:03 +0530 Subject: [PATCH 06/17] redundant code removed in momentumOptimizer --- src/momentumOptimizer.ts | 31 ++++++++++--------------------- src/session_test.ts | 2 +- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/src/momentumOptimizer.ts b/src/momentumOptimizer.ts index 1b5b3c6a35..fb9a20af99 100644 --- a/src/momentumOptimizer.ts +++ b/src/momentumOptimizer.ts @@ -15,7 +15,6 @@ import {NDArrayMath} from './math/math'; import {NDArray, Scalar} from './math/ndarray'; import {SGDOptimizer} from './sgd_optimizer'; import {SessionRuntime} from './session'; -import * as session_util from './session_util'; import {TensorArrayMap} from './tensor_array_map'; export class MomentumOptimizer extends SGDOptimizer { @@ -25,28 +24,18 @@ export class MomentumOptimizer extends SGDOptimizer { } beforeBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, gradientArrayMap: TensorArrayMap) { - this.variableNodes = this.specifiedVariableNodes == null ? - session_util.getVariableNodesFromEvaluationSet(runtime.nodes) : - this.specifiedVariableNodes; - this.m = Scalar.new(this.momentum); - - if (batchSize !== this.prevBatchSize) { - this.prevBatchSize = batchSize; - this.c = Scalar.new(-this.learningRate / batchSize); - } + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, gradientArrayMap: TensorArrayMap) { + super.beforeBatch(math, batchSize, runtime, + activationArrayMap, gradientArrayMap); - if (this.variableVelocities.size() === 0) { - this.variableNodes.forEach(node => { - this.variableVelocities.set(node.output, - NDArray.zeros(node.output.shape)); - }); + this.m = Scalar.new(this.momentum); + if (this.variableVelocities.size() === 0){ + this.variableNodes.forEach(node => { + this.variableVelocities.set(node.output, + NDArray.zeros(node.output.shape)); + }); } - this.variableNodes.forEach(node => { - this.variableGradients.set(node.output, - NDArray.zeros(node.output.shape)); - }); } afterBatch( diff --git a/src/session_test.ts b/src/session_test.ts index 81c2325c7c..af64cfa3f3 100644 --- a/src/session_test.ts +++ b/src/session_test.ts @@ -302,7 +302,7 @@ describe('Session', () => { const session = new Session(g, math); const inputProvider: InputProvider = { getNextCopy() { - return Array1D.new([2, 4 ]); + return Array1D.new([2, 4]); }, disposeCopy(math, example) {} }; From 6b0a678a8dec5a1010e8fc095e9a9e531d95fcf5 Mon Sep 17 00:00:00 2001 From: mnottheone Date: Fri, 25 Aug 2017 10:15:25 +0530 Subject: [PATCH 07/17] tabs replaced with spaces --- src/momentumOptimizer.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/momentumOptimizer.ts b/src/momentumOptimizer.ts index fb9a20af99..e1c5f36826 100644 --- a/src/momentumOptimizer.ts +++ b/src/momentumOptimizer.ts @@ -27,14 +27,14 @@ export class MomentumOptimizer extends SGDOptimizer { math: NDArrayMath, batchSize: number, runtime: SessionRuntime, activationArrayMap: TensorArrayMap, gradientArrayMap: TensorArrayMap) { super.beforeBatch(math, batchSize, runtime, - activationArrayMap, gradientArrayMap); + activationArrayMap, gradientArrayMap); this.m = Scalar.new(this.momentum); if (this.variableVelocities.size() === 0){ - this.variableNodes.forEach(node => { - this.variableVelocities.set(node.output, - NDArray.zeros(node.output.shape)); - }); + this.variableNodes.forEach(node => { + this.variableVelocities.set(node.output, + NDArray.zeros(node.output.shape)); + }); } } From c4487761d7a9ef7345641230c826949171d775ef Mon Sep 17 00:00:00 2001 From: mnottheone Date: Fri, 25 Aug 2017 10:19:13 +0530 Subject: [PATCH 08/17] space added --- src/momentumOptimizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/momentumOptimizer.ts b/src/momentumOptimizer.ts index e1c5f36826..1a88548bc1 100644 --- a/src/momentumOptimizer.ts +++ b/src/momentumOptimizer.ts @@ -30,7 +30,7 @@ export class MomentumOptimizer extends SGDOptimizer { activationArrayMap, gradientArrayMap); this.m = Scalar.new(this.momentum); - if (this.variableVelocities.size() === 0){ + if (this.variableVelocities.size() === 0) { this.variableNodes.forEach(node => { this.variableVelocities.set(node.output, NDArray.zeros(node.output.shape)); From 366cc4f0b7072fc3aa4e44ae7604058c37f82bdd Mon Sep 17 00:00:00 2001 From: mnottheone Date: Fri, 25 Aug 2017 10:33:28 +0530 Subject: [PATCH 09/17] space added --- src/momentumOptimizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/momentumOptimizer.ts b/src/momentumOptimizer.ts index 1a88548bc1..d36a6b9bc4 100644 --- a/src/momentumOptimizer.ts +++ b/src/momentumOptimizer.ts @@ -33,7 +33,7 @@ export class MomentumOptimizer extends SGDOptimizer { if (this.variableVelocities.size() === 0) { this.variableNodes.forEach(node => { this.variableVelocities.set(node.output, - NDArray.zeros(node.output.shape)); + NDArray.zeros(node.output.shape)); }); } } From b0d1a1cd9047bf29aaacc9ffc5d69c0bd7996d8e Mon Sep 17 00:00:00 2001 From: mnottheone Date: Sat, 2 Sep 2017 15:28:33 +0530 Subject: [PATCH 10/17] rmsprop and adagrad optimizer added --- src/adagrad_optimizer.ts | 88 ++++++++++++++++++++++++++++++++++++++++ src/rmsprop_optimizer.ts | 81 ++++++++++++++++++++++++++++++++++++ src/session_test.ts | 85 +++++++++++++++++++++++++++++++++++++- 3 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 src/adagrad_optimizer.ts create mode 100644 src/rmsprop_optimizer.ts diff --git a/src/adagrad_optimizer.ts b/src/adagrad_optimizer.ts new file mode 100644 index 0000000000..c30b02db08 --- /dev/null +++ b/src/adagrad_optimizer.ts @@ -0,0 +1,88 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +import {Node} from './graph'; +import {NDArrayMath} from './math/math'; +import {NDArray, Scalar} from './math/ndarray'; +import {SGDOptimizer} from './sgd_optimizer'; +import {SessionRuntime} from './session'; +import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; + +export class AdagradOptimizer extends SGDOptimizer { + constructor(protected learningRate: number, + protected momentum: number, specifiedVariableList?: Node[]) { + super(learningRate, specifiedVariableList); + } + + beforeBatch( + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { + super.beforeBatch(math, batchSize, runtime, + activationArrayMap, gradientArrayMap); + + this.m = Scalar.new(this.momentum); + this.eps = Scalar.new(1e-6); + if (this.cache.size() === 0) { + this.variableNodes.forEach(node => { + this.cache.set(node.output, + NDArray.zeros(node.output.shape)); + }); + } + } + + afterBatch( + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { + math.scope((keep) => { + this.variableNodes.forEach(node => { + const oldVariable = activationArrayMap.get(node.output); + const gradient = this.variableGradients.get(node.output); + const oldCache = this.cache.get(node.output); + const gradientSquare = math.multiply(gradient, gradient); + const cache = math.add(oldCache, gradientSquare); + const variable = math.scaledArrayAdd(this.c!, + math.divide( gradient, math.sqrt( math.add(cache, this.eps))), + this.one!, oldVariable); + this.cache.set(node.output, keep(cache)); + activationArrayMap.set(node.output, keep(variable)); + node.data = variable; + + oldVariable.dispose(); + oldCache.dispose(); + }); + }); + + this.variableGradients.dispose(); + this.variableGradients = new TensorArrayMap(); + } + + dispose() { + if (this.c != null) { + this.c.dispose(); + } + if (this.m != null) { + this.m.dispose(); + } + this.one.dispose(); + this.cache.dispose(); + } + + setMomentum(momentum: number) { + this.momentum = momentum; + } + + protected cache = new TensorArrayMap(); + protected m: Scalar; + protected eps: Scalar; +} diff --git a/src/rmsprop_optimizer.ts b/src/rmsprop_optimizer.ts new file mode 100644 index 0000000000..8ebc59a629 --- /dev/null +++ b/src/rmsprop_optimizer.ts @@ -0,0 +1,81 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +import {Node} from './graph'; +import {NDArrayMath} from './math/math'; +import {Scalar} from './math/ndarray'; +import {AdagradOptimizer} from './adagrad_optimizer'; +import {SessionRuntime} from './session'; +import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; + +export class RmspropOptimizer extends AdagradOptimizer { + constructor(protected learningRate: number, + protected momentum: number, private gamma: number, + specifiedVariableList?: Node[]) { + super(learningRate, momentum, specifiedVariableList); + } + + beforeBatch( + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { + super.beforeBatch(math, batchSize, runtime, + activationArrayMap, gradientArrayMap); + + this.g = Scalar.new(this.gamma); + } + + + afterBatch( + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { + math.scope((keep) => { + this.variableNodes.forEach(node => { + const oldVariable = activationArrayMap.get(node.output); + const gradient = this.variableGradients.get(node.output); + const oldCache = this.cache.get(node.output); + const gradientSquare = math.multiply(gradient, gradient); + const cache = math.scaledArrayAdd(this.g!, oldCache, + math.sub(this.one, this.g)!, gradientSquare); + const variable = math.scaledArrayAdd(this.c!, + math.divide( gradient, math.sqrt( math.add(cache, this.eps))), + this.one!, oldVariable); + this.cache.set(node.output, keep(cache)); + activationArrayMap.set(node.output, keep(variable)); + node.data = variable; + + oldVariable.dispose(); + oldCache.dispose(); + }); + }); + + this.variableGradients.dispose(); + this.variableGradients = new TensorArrayMap(); + } + + dispose() { + if (this.c != null) { + this.c.dispose(); + } + if (this.m != null) { + this.m.dispose(); + } + this.one.dispose(); + this.cache.dispose(); + } + + setMomentum(momentum: number) { + this.momentum = momentum; + } + private g: Scalar; +} diff --git a/src/session_test.ts b/src/session_test.ts index d4c75a1cc8..0703c10237 100644 --- a/src/session_test.ts +++ b/src/session_test.ts @@ -21,6 +21,8 @@ import {Array1D, NDArray, Scalar} from './math/ndarray'; import {FeedDictionary, FeedEntry, Session} from './session'; import {SGDOptimizer} from './sgd_optimizer'; import {MomentumOptimizer} from './momentumOptimizer'; +import {AdagradOptimizer} from './adagrad_optimizer'; +import {RmspropOptimizer} from './rmsprop_optimizer'; import * as test_util from './test_util'; @@ -330,7 +332,7 @@ describe('Session', () => { math.scope(() => { // w = reduce_sum(w_1*x_1 + w_2*x_2 + b) - // velocity_w = [momentum* old_vel_w1 + x_1, + // velocity_w = [momentum* old_vel_w1 + x_1, // momentum* old_vel_w2 + x_2] = [2,4] // w = [ w_old - lr*vel_w1, w_old - lr*vel_w2] = [-0.2, -0.4] session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); @@ -345,6 +347,87 @@ describe('Session', () => { }); }); + + it('Safe mode math, math scope train does not throw', () => { + const x = g.placeholder('x', [2]); + const w = g.variable('w', NDArray.zeros([1,2])); + const b = g.variable('b', NDArray.zeros([1])); + const y = g.reduceSum(g.add(g.matmul(w, x), b)); + + const safeMode = true; + const optimizer = new AdagradOptimizer(0.1, 0.5); + const math = new NDArrayMathCPU(safeMode); + const session = new Session(g, math); + const inputProvider: InputProvider = { + getNextCopy() { + return Array1D.new([2, 4]); + }, + disposeCopy(math, example) {} + }; + + math.scope(() => { + // w = reduce_sum(w_1*x_1 + w_2*x_2 + b) + // cache = [old_cache_w1 + grad_w1**2, + // old_cache_w2 + grad_w2**2] = [4,16] + // w = [ w1_old - lr*grad_w1/sqrt(cahce_w2 + eps), + // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] + // = [-0.1, -0.1] + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + const dydw = session.activationArrayMap.get(w).getValues(); + test_util.expectArraysClose(dydw, new Float32Array([-.1, -0.1]), 1e-5); + // cache = [old_cache_w1 + grad_w1**2, + // old_cache_w2 + grad_w2**2] = [4,16] + // w = [ w1_old - lr*grad_w1/sqrt(cahce_w2 + eps), + // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] + // = [-0.1707, -0.1707] + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + const dydw2 = session.activationArrayMap.get(w).getValues(); + test_util.expectArraysClose(dydw2, + new Float32Array([-.1707, -.1707]), 2e-5); + }); + }); + + it('Safe mode math, math scope train does not throw', () => { + const x = g.placeholder('x', [2]); + const w = g.variable('w', NDArray.zeros([1,2])); + const b = g.variable('b', NDArray.zeros([1])); + const y = g.reduceSum(g.add(g.matmul(w, x), b)); + const safeMode = true; + const optimizer = new RmspropOptimizer(0.1, 0.5, 0.8); + const math = new NDArrayMathCPU(safeMode); + const session = new Session(g, math); + const inputProvider: InputProvider = { + getNextCopy() { + return Array1D.new([2, 4]); + }, + disposeCopy(math, example) {} + }; + + math.scope(() => { + // w = reduce_sum(w_1*x_1 + w_2*x_2 + b) + // cache = [gamma*old_cache_w1 + (1-gamma)*grad_w1**2, + // gamma*old_cache_w2 + (1-gamma)*grad_w2**2] + // = [.8, .3.2] + // w = [ w1_old - lr*grad_w1/sqrt(cahce_w1 + eps), + // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] + // = [-0.2236, -0.2236] + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + const dydw = session.activationArrayMap.get(w).getValues(); + test_util.expectArraysClose(dydw, + new Float32Array([-.2236, -0.2236]), 1e-5); + // cache = [gamma*old_cache_w1 + (1-gamma)*grad_w1**2, + // gamma*old_cache_w2 + (1-gamma)*grad_w2**2] + // = [1.44, 5.76] + // w = [ w1_old - lr*grad_w1/sqrt(cahce_w1 + eps), + // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] + // = [-.39027, -.39027] + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + const dydw2 = session.activationArrayMap.get(w).getValues(); + test_util.expectArraysClose(dydw2, + new Float32Array([-.39027, -.39027]), 2e-5); + }); + }); + it('Safe mode math, no math scope train throws', () => { const x = g.placeholder('x', [2]); const y = g.square(x); From 3324f7baa8db399c2af95cc0acb1852ed201e2ca Mon Sep 17 00:00:00 2001 From: mnottheone Date: Thu, 7 Sep 2017 02:04:46 +0530 Subject: [PATCH 11/17] resolved texture leakage and optimizers inherited from optimizer.ts --- src/adagrad_optimizer.ts | 30 +++++------- src/index.ts | 2 +- ...ntumOptimizer.ts => momentum_optimizer.ts} | 13 ++--- src/optimizer.ts | 48 ++++++++++++++++--- src/rmsprop_optimizer.ts | 44 +++++++++-------- src/session_test.ts | 2 +- src/sgd_optimizer.ts | 45 +---------------- 7 files changed, 85 insertions(+), 99 deletions(-) rename src/{momentumOptimizer.ts => momentum_optimizer.ts} (93%) diff --git a/src/adagrad_optimizer.ts b/src/adagrad_optimizer.ts index c30b02db08..f87c72db6e 100644 --- a/src/adagrad_optimizer.ts +++ b/src/adagrad_optimizer.ts @@ -13,14 +13,16 @@ limitations under the License. import {Node} from './graph'; import {NDArrayMath} from './math/math'; import {NDArray, Scalar} from './math/ndarray'; -import {SGDOptimizer} from './sgd_optimizer'; +import {Optimizer} from './optimizer'; import {SessionRuntime} from './session'; import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; -export class AdagradOptimizer extends SGDOptimizer { +export class AdagradOptimizer extends Optimizer { constructor(protected learningRate: number, protected momentum: number, specifiedVariableList?: Node[]) { super(learningRate, specifiedVariableList); + this.m = Scalar.new(momentum); + this.eps = Scalar.new(1e-6); } beforeBatch( @@ -30,8 +32,6 @@ export class AdagradOptimizer extends SGDOptimizer { super.beforeBatch(math, batchSize, runtime, activationArrayMap, gradientArrayMap); - this.m = Scalar.new(this.momentum); - this.eps = Scalar.new(1e-6); if (this.cache.size() === 0) { this.variableNodes.forEach(node => { this.cache.set(node.output, @@ -52,7 +52,7 @@ export class AdagradOptimizer extends SGDOptimizer { const gradientSquare = math.multiply(gradient, gradient); const cache = math.add(oldCache, gradientSquare); const variable = math.scaledArrayAdd(this.c!, - math.divide( gradient, math.sqrt( math.add(cache, this.eps))), + math.divide(gradient, math.add(math.sqrt( cache), this.eps)), this.one!, oldVariable); this.cache.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); @@ -68,21 +68,13 @@ export class AdagradOptimizer extends SGDOptimizer { } dispose() { - if (this.c != null) { - this.c.dispose(); - } - if (this.m != null) { - this.m.dispose(); - } - this.one.dispose(); + super.dispose(); + this.m.dispose(); + this.eps.dispose(); this.cache.dispose(); } - setMomentum(momentum: number) { - this.momentum = momentum; - } - - protected cache = new TensorArrayMap(); - protected m: Scalar; - protected eps: Scalar; + private cache = new TensorArrayMap(); + private m: Scalar; + private eps: Scalar; } diff --git a/src/index.ts b/src/index.ts index fdf05147a9..ce6156cb1d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -37,6 +37,6 @@ export {GPGPUContext} from './math/webgl/gpgpu_context'; export {Optimizer} from './optimizer'; export {CostReduction, FeedEntry, Session} from './session'; export {SGDOptimizer} from './sgd_optimizer'; -export {MomentumOptimizer} from './momentumOptimizer'; +export {MomentumOptimizer} from './momentum_optimizer'; // Second level exports. export {conv_util, gpgpu_util, render_ndarray_gpu_util, util, webgl_util}; diff --git a/src/momentumOptimizer.ts b/src/momentum_optimizer.ts similarity index 93% rename from src/momentumOptimizer.ts rename to src/momentum_optimizer.ts index facff5f1d5..4739fab046 100644 --- a/src/momentumOptimizer.ts +++ b/src/momentum_optimizer.ts @@ -18,9 +18,10 @@ import {SessionRuntime} from './session'; import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; export class MomentumOptimizer extends SGDOptimizer { - constructor(protected learningRate: number, + constructor(protected learningRate: number, private momentum: number, specifiedVariableList?: Node[]) { super(learningRate, specifiedVariableList); + this.m = Scalar.new(this.momentum); } beforeBatch( @@ -30,7 +31,6 @@ export class MomentumOptimizer extends SGDOptimizer { super.beforeBatch(math, batchSize, runtime, activationArrayMap, gradientArrayMap); - this.m = Scalar.new(this.momentum); if (this.variableVelocities.size() === 0) { this.variableNodes.forEach(node => { this.variableVelocities.set(node.output, @@ -66,13 +66,8 @@ export class MomentumOptimizer extends SGDOptimizer { } dispose() { - if (this.c != null) { - this.c.dispose(); - } - if (this.m != null) { - this.m.dispose(); - } - this.one.dispose(); + super.dispose(); + this.m.dispose(); this.variableVelocities.dispose(); } diff --git a/src/optimizer.ts b/src/optimizer.ts index 0ef31cbd3d..4ef78d04b7 100644 --- a/src/optimizer.ts +++ b/src/optimizer.ts @@ -15,33 +15,69 @@ limitations under the License. import {Node, VariableNode} from './graph'; import {NDArrayMath} from './math/math'; +import {NDArray, Scalar} from './math/ndarray'; import {SessionRuntime} from './session'; +import * as session_util from './session_util'; import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; export abstract class Optimizer { protected variableNodes: VariableNode[]; protected specifiedVariableNodes: VariableNode[]|null; - constructor(specifiedVariableList?: Node[]) { + constructor(protected learningRate: number, specifiedVariableList?: Node[]) { if (specifiedVariableList != null) { this.specifiedVariableNodes = specifiedVariableList as VariableNode[]; } } - abstract beforeBatch( + beforeBatch( math: NDArrayMath, batchSize: number, runtime: SessionRuntime, activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap): void; + gradientArrayMap: SummedTensorArrayMap){ + this.variableNodes = this.specifiedVariableNodes == null ? + session_util.getVariableNodesFromEvaluationSet(runtime.nodes) : + this.specifiedVariableNodes; + if (batchSize !== this.prevBatchSize) { + if(this.c != null){ + this.c.dispose(); + } + this.prevBatchSize = batchSize; + this.c = Scalar.new(-this.learningRate / batchSize); + } + this.variableNodes.forEach( + node => this.variableGradients.set( + node.output, NDArray.zeros(node.output.shape))); + } - abstract afterExample( + afterExample( math: NDArrayMath, runtime: SessionRuntime, activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap): void; + gradientArrayMap: SummedTensorArrayMap){ + math.scope((keep) => { + this.variableNodes.forEach(node => { + const gradient = gradientArrayMap.get(node.output); + const accumulatedGradient = this.variableGradients.get(node.output); + this.variableGradients.set( + node.output, keep(math.add(gradient, accumulatedGradient))); + accumulatedGradient.dispose(); + }); + }); + } abstract afterBatch( math: NDArrayMath, batchSize: number, runtime: SessionRuntime, activationArrayMap: TensorArrayMap, gradientArrayMap: SummedTensorArrayMap): void; - abstract dispose(): void; + dispose(){ + if (this.c != null) { + this.c.dispose(); + } + this.one.dispose(); + } + + protected variableGradients = new TensorArrayMap(); + protected prevBatchSize: number; + protected one = Scalar.new(1); + protected c: Scalar; } diff --git a/src/rmsprop_optimizer.ts b/src/rmsprop_optimizer.ts index 8ebc59a629..3737da2676 100644 --- a/src/rmsprop_optimizer.ts +++ b/src/rmsprop_optimizer.ts @@ -12,16 +12,19 @@ limitations under the License. import {Node} from './graph'; import {NDArrayMath} from './math/math'; -import {Scalar} from './math/ndarray'; -import {AdagradOptimizer} from './adagrad_optimizer'; +import {NDArray, Scalar} from './math/ndarray'; +import {Optimizer} from './optimizer'; import {SessionRuntime} from './session'; import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; -export class RmspropOptimizer extends AdagradOptimizer { +export class RmspropOptimizer extends Optimizer { constructor(protected learningRate: number, protected momentum: number, private gamma: number, specifiedVariableList?: Node[]) { - super(learningRate, momentum, specifiedVariableList); + super(learningRate, specifiedVariableList); + this.m = Scalar.new(momentum); + this.eps = Scalar.new(1e-6); + this.g = Scalar.new(this.gamma); } beforeBatch( @@ -30,9 +33,13 @@ export class RmspropOptimizer extends AdagradOptimizer { gradientArrayMap: SummedTensorArrayMap) { super.beforeBatch(math, batchSize, runtime, activationArrayMap, gradientArrayMap); - - this.g = Scalar.new(this.gamma); - } + if (this.cache.size() === 0) { + this.variableNodes.forEach(node => { + this.cache.set(node.output, + NDArray.zeros(node.output.shape)); + }); + } + } afterBatch( @@ -46,10 +53,10 @@ export class RmspropOptimizer extends AdagradOptimizer { const oldCache = this.cache.get(node.output); const gradientSquare = math.multiply(gradient, gradient); const cache = math.scaledArrayAdd(this.g!, oldCache, - math.sub(this.one, this.g)!, gradientSquare); + math.sub(this.one, this.g)!, gradientSquare); const variable = math.scaledArrayAdd(this.c!, - math.divide( gradient, math.sqrt( math.add(cache, this.eps))), - this.one!, oldVariable); + math.divide(gradient, math.add(math.sqrt( cache), this.eps)), + this.one!, oldVariable); this.cache.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; @@ -64,18 +71,15 @@ export class RmspropOptimizer extends AdagradOptimizer { } dispose() { - if (this.c != null) { - this.c.dispose(); - } - if (this.m != null) { - this.m.dispose(); - } - this.one.dispose(); + super.dispose(); + this.m.dispose(); + this.eps.dispose(); + this.g.dispose(); this.cache.dispose(); } - setMomentum(momentum: number) { - this.momentum = momentum; - } + private cache = new TensorArrayMap(); + private m: Scalar; + private eps: Scalar; private g: Scalar; } diff --git a/src/session_test.ts b/src/session_test.ts index 0703c10237..2458999eba 100644 --- a/src/session_test.ts +++ b/src/session_test.ts @@ -20,7 +20,7 @@ import {NDArrayMathGPU} from './math/math_gpu'; import {Array1D, NDArray, Scalar} from './math/ndarray'; import {FeedDictionary, FeedEntry, Session} from './session'; import {SGDOptimizer} from './sgd_optimizer'; -import {MomentumOptimizer} from './momentumOptimizer'; +import {MomentumOptimizer} from './momentum_optimizer'; import {AdagradOptimizer} from './adagrad_optimizer'; import {RmspropOptimizer} from './rmsprop_optimizer'; diff --git a/src/sgd_optimizer.ts b/src/sgd_optimizer.ts index 27b5051200..4daead5788 100644 --- a/src/sgd_optimizer.ts +++ b/src/sgd_optimizer.ts @@ -15,46 +15,13 @@ limitations under the License. import {Node} from './graph'; import {NDArrayMath} from './math/math'; -import {NDArray, Scalar} from './math/ndarray'; import {Optimizer} from './optimizer'; import {SessionRuntime} from './session'; -import * as session_util from './session_util'; import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; export class SGDOptimizer extends Optimizer { constructor(protected learningRate: number, specifiedVariableList?: Node[]) { - super(specifiedVariableList); - } - - beforeBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { - this.variableNodes = this.specifiedVariableNodes == null ? - session_util.getVariableNodesFromEvaluationSet(runtime.nodes) : - this.specifiedVariableNodes; - if (batchSize !== this.prevBatchSize) { - this.prevBatchSize = batchSize; - this.c = Scalar.new(-this.learningRate / batchSize); - } - this.variableNodes.forEach( - node => this.variableGradients.set( - node.output, NDArray.zeros(node.output.shape))); - } - - afterExample( - math: NDArrayMath, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { - math.scope((keep) => { - this.variableNodes.forEach(node => { - const gradient = gradientArrayMap.get(node.output); - const accumulatedGradient = this.variableGradients.get(node.output); - this.variableGradients.set( - node.output, keep(math.add(gradient, accumulatedGradient))); - accumulatedGradient.dispose(); - }); - }); + super(learningRate, specifiedVariableList); } afterBatch( @@ -79,18 +46,10 @@ export class SGDOptimizer extends Optimizer { } dispose() { - if (this.c != null) { - this.c.dispose(); - } - this.one.dispose(); + super.dispose(); } setLearningRate(learningRate: number) { this.learningRate = learningRate; } - - protected variableGradients = new TensorArrayMap(); - protected prevBatchSize: number; - protected one = Scalar.new(1); - protected c: Scalar; } From c9b33872f96a119c85a7df51050e42d8b68bfe58 Mon Sep 17 00:00:00 2001 From: mnottheone Date: Sat, 9 Sep 2017 16:05:31 +0530 Subject: [PATCH 12/17] minor changes in optimizers --- src/adagrad_optimizer.ts | 5 ++--- src/momentum_optimizer.ts | 2 +- src/rmsprop_optimizer.ts | 10 +++++----- src/session_test.ts | 4 ++-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/adagrad_optimizer.ts b/src/adagrad_optimizer.ts index f87c72db6e..681836578c 100644 --- a/src/adagrad_optimizer.ts +++ b/src/adagrad_optimizer.ts @@ -51,13 +51,12 @@ export class AdagradOptimizer extends Optimizer { const oldCache = this.cache.get(node.output); const gradientSquare = math.multiply(gradient, gradient); const cache = math.add(oldCache, gradientSquare); - const variable = math.scaledArrayAdd(this.c!, + const variable = math.scaledArrayAdd(this.c, math.divide(gradient, math.add(math.sqrt( cache), this.eps)), - this.one!, oldVariable); + this.one, oldVariable); this.cache.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; - oldVariable.dispose(); oldCache.dispose(); }); diff --git a/src/momentum_optimizer.ts b/src/momentum_optimizer.ts index 4739fab046..2829927729 100644 --- a/src/momentum_optimizer.ts +++ b/src/momentum_optimizer.ts @@ -51,7 +51,7 @@ export class MomentumOptimizer extends SGDOptimizer { const velocity = math.scaledArrayAdd(this.m, oldVelocity, this.one, gradient); const variable = - math.scaledArrayAdd(this.c!, velocity, this.one!, oldVariable); + math.scaledArrayAdd(this.c, velocity, this.one, oldVariable); this.variableVelocities.set(node.output, keep(velocity)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; diff --git a/src/rmsprop_optimizer.ts b/src/rmsprop_optimizer.ts index 3737da2676..28efdaa968 100644 --- a/src/rmsprop_optimizer.ts +++ b/src/rmsprop_optimizer.ts @@ -17,7 +17,7 @@ import {Optimizer} from './optimizer'; import {SessionRuntime} from './session'; import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; -export class RmspropOptimizer extends Optimizer { +export class RMSPropOptimizer extends Optimizer { constructor(protected learningRate: number, protected momentum: number, private gamma: number, specifiedVariableList?: Node[]) { @@ -52,11 +52,11 @@ export class RmspropOptimizer extends Optimizer { const gradient = this.variableGradients.get(node.output); const oldCache = this.cache.get(node.output); const gradientSquare = math.multiply(gradient, gradient); - const cache = math.scaledArrayAdd(this.g!, oldCache, - math.sub(this.one, this.g)!, gradientSquare); - const variable = math.scaledArrayAdd(this.c!, + const cache = math.scaledArrayAdd(this.g, oldCache, + math.sub(this.one, this.g), gradientSquare); + const variable = math.scaledArrayAdd(this.c, math.divide(gradient, math.add(math.sqrt( cache), this.eps)), - this.one!, oldVariable); + this.one, oldVariable); this.cache.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; diff --git a/src/session_test.ts b/src/session_test.ts index 2458999eba..d1ef705a59 100644 --- a/src/session_test.ts +++ b/src/session_test.ts @@ -22,7 +22,7 @@ import {FeedDictionary, FeedEntry, Session} from './session'; import {SGDOptimizer} from './sgd_optimizer'; import {MomentumOptimizer} from './momentum_optimizer'; import {AdagradOptimizer} from './adagrad_optimizer'; -import {RmspropOptimizer} from './rmsprop_optimizer'; +import {RMSPropOptimizer} from './rmsprop_optimizer'; import * as test_util from './test_util'; @@ -393,7 +393,7 @@ describe('Session', () => { const b = g.variable('b', NDArray.zeros([1])); const y = g.reduceSum(g.add(g.matmul(w, x), b)); const safeMode = true; - const optimizer = new RmspropOptimizer(0.1, 0.5, 0.8); + const optimizer = new RMSPropOptimizer(0.1, 0.5, 0.8); const math = new NDArrayMathCPU(safeMode); const session = new Session(g, math); const inputProvider: InputProvider = { From 133312b378b690efc2d25f605cc95cff7da533b1 Mon Sep 17 00:00:00 2001 From: mnottheone Date: Sat, 16 Sep 2017 20:46:18 +0530 Subject: [PATCH 13/17] formatting done --- src/adagrad_optimizer.ts | 22 ++++---- src/index.ts | 32 +++++------ src/momentum_optimizer.ts | 36 ++++++------- src/optimizer.ts | 62 +++++++++++----------- src/rmsprop_optimizer.ts | 36 ++++++------- src/session_test.ts | 108 +++++++++++++++++++------------------- src/sgd_optimizer.ts | 18 +++---- 7 files changed, 157 insertions(+), 157 deletions(-) diff --git a/src/adagrad_optimizer.ts b/src/adagrad_optimizer.ts index 681836578c..5a5bd87ecd 100644 --- a/src/adagrad_optimizer.ts +++ b/src/adagrad_optimizer.ts @@ -10,12 +10,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -import {Node} from './graph'; -import {NDArrayMath} from './math/math'; -import {NDArray, Scalar} from './math/ndarray'; -import {Optimizer} from './optimizer'; -import {SessionRuntime} from './session'; -import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; +import { Node } from './graph'; +import { NDArrayMath } from './math/math'; +import { NDArray, Scalar } from './math/ndarray'; +import { Optimizer } from './optimizer'; +import { SessionRuntime } from './session'; +import { TensorArrayMap, SummedTensorArrayMap } from './tensor_array_map'; export class AdagradOptimizer extends Optimizer { constructor(protected learningRate: number, @@ -41,9 +41,9 @@ export class AdagradOptimizer extends Optimizer { } afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { math.scope((keep) => { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); @@ -52,8 +52,8 @@ export class AdagradOptimizer extends Optimizer { const gradientSquare = math.multiply(gradient, gradient); const cache = math.add(oldCache, gradientSquare); const variable = math.scaledArrayAdd(this.c, - math.divide(gradient, math.add(math.sqrt( cache), this.eps)), - this.one, oldVariable); + math.divide(gradient, math.add(math.sqrt(cache), this.eps)), + this.one, oldVariable); this.cache.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; diff --git a/src/index.ts b/src/index.ts index a42128f035..1cbe812d74 100644 --- a/src/index.ts +++ b/src/index.ts @@ -21,24 +21,24 @@ import * as render_ndarray_gpu_util from './math/webgl/render_ndarray_gpu_util'; import * as webgl_util from './math/webgl/webgl_util'; import * as util from './util'; -export {CheckpointLoader} from './checkpoint_loader'; -export {DataStats, InMemoryDataset} from './dataset'; -export {Graph, Tensor} from './graph'; +export { CheckpointLoader } from './checkpoint_loader'; +export { DataStats, InMemoryDataset } from './dataset'; +export { Graph, Tensor } from './graph'; // tslint:disable-next-line:max-line-length -export {GraphRunner, GraphRunnerEventObserver, MetricReduction} from './graph_runner'; +export { GraphRunner, GraphRunnerEventObserver, MetricReduction } from './graph_runner'; // tslint:disable-next-line:max-line-length -export {ConstantInitializer, Initializer, NDArrayInitializer, OnesInitializer, RandomNormalInitializer, RandomTruncatedNormalInitializer, RandomUniformInitializer, VarianceScalingInitializer, ZerosInitializer} from './initializers'; +export { ConstantInitializer, Initializer, NDArrayInitializer, OnesInitializer, RandomNormalInitializer, RandomTruncatedNormalInitializer, RandomUniformInitializer, VarianceScalingInitializer, ZerosInitializer } from './initializers'; // tslint:disable-next-line:max-line-length -export {InCPUMemoryShuffledInputProviderBuilder, InGPUMemoryShuffledInputProviderBuilder, InputProvider} from './input_provider'; -export {MatrixOrientation, NDArrayMath} from './math/math'; -export {NDArrayMathCPU} from './math/math_cpu'; -export {NDArrayMathGPU} from './math/math_gpu'; +export { InCPUMemoryShuffledInputProviderBuilder, InGPUMemoryShuffledInputProviderBuilder, InputProvider } from './input_provider'; +export { MatrixOrientation, NDArrayMath } from './math/math'; +export { NDArrayMathCPU } from './math/math_cpu'; +export { NDArrayMathGPU } from './math/math_gpu'; // tslint:disable-next-line:max-line-length -export {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './math/ndarray'; -export {GPGPUContext} from './math/webgl/gpgpu_context'; -export {Optimizer} from './optimizer'; -export {CostReduction, FeedEntry, Session} from './session'; -export {SGDOptimizer} from './sgd_optimizer'; -export {MomentumOptimizer} from './momentum_optimizer'; +export { Array1D, Array2D, Array3D, Array4D, NDArray, Scalar } from './math/ndarray'; +export { GPGPUContext } from './math/webgl/gpgpu_context'; +export { Optimizer } from './optimizer'; +export { CostReduction, FeedEntry, Session } from './session'; +export { SGDOptimizer } from './sgd_optimizer'; +export { MomentumOptimizer } from './momentum_optimizer'; // Second level exports. -export {conv_util, gpgpu_util, render_ndarray_gpu_util, util, webgl_util}; +export { conv_util, gpgpu_util, render_ndarray_gpu_util, util, webgl_util }; diff --git a/src/momentum_optimizer.ts b/src/momentum_optimizer.ts index 8ebca9f2f2..1f694db3f7 100644 --- a/src/momentum_optimizer.ts +++ b/src/momentum_optimizer.ts @@ -15,49 +15,49 @@ * ============================================================================= */ -import {Node} from './graph'; -import {NDArrayMath} from './math/math'; -import {NDArray, Scalar} from './math/ndarray'; -import {SessionRuntime} from './session'; -import {SGDOptimizer} from './sgd_optimizer'; -import {SummedTensorArrayMap, TensorArrayMap} from './tensor_array_map'; +import { Node } from './graph'; +import { NDArrayMath } from './math/math'; +import { NDArray, Scalar } from './math/ndarray'; +import { SessionRuntime } from './session'; +import { SGDOptimizer } from './sgd_optimizer'; +import { SummedTensorArrayMap, TensorArrayMap } from './tensor_array_map'; export class MomentumOptimizer extends SGDOptimizer { constructor( - protected learningRate: number, private momentum: number, - specifiedVariableList?: Node[]) { + protected learningRate: number, private momentum: number, + specifiedVariableList?: Node[]) { super(learningRate, specifiedVariableList); this.m = Scalar.new(this.momentum); } beforeBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { super.beforeBatch( - math, batchSize, runtime, activationArrayMap, gradientArrayMap); + math, batchSize, runtime, activationArrayMap, gradientArrayMap); if (this.variableVelocities.size() === 0) { this.variableNodes.forEach(node => { this.variableVelocities.set( - node.output, NDArray.zeros(node.output.shape)); + node.output, NDArray.zeros(node.output.shape)); }); } } afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { math.scope((keep) => { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); const gradient = this.variableGradients.get(node.output); const oldVelocity = this.variableVelocities.get(node.output); const velocity = - math.scaledArrayAdd(this.m, oldVelocity, this.one, gradient); + math.scaledArrayAdd(this.m, oldVelocity, this.one, gradient); const variable = - math.scaledArrayAdd(this.c, velocity, this.one, oldVariable); + math.scaledArrayAdd(this.c, velocity, this.one, oldVariable); this.variableVelocities.set(node.output, keep(velocity)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; diff --git a/src/optimizer.ts b/src/optimizer.ts index fcbc15aeac..0e04d4442e 100644 --- a/src/optimizer.ts +++ b/src/optimizer.ts @@ -15,16 +15,16 @@ * ============================================================================= */ -import {Node, VariableNode} from './graph'; -import {NDArrayMath} from './math/math'; -import {NDArray, Scalar} from './math/ndarray'; -import {SessionRuntime} from './session'; +import { Node, VariableNode } from './graph'; +import { NDArrayMath } from './math/math'; +import { NDArray, Scalar } from './math/ndarray'; +import { SessionRuntime } from './session'; import * as session_util from './session_util'; -import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; +import { TensorArrayMap, SummedTensorArrayMap } from './tensor_array_map'; export abstract class Optimizer { protected variableNodes: VariableNode[]; - protected specifiedVariableNodes: VariableNode[]|null; + protected specifiedVariableNodes: VariableNode[] | null; constructor(protected learningRate: number, specifiedVariableList?: Node[]) { if (specifiedVariableList != null) { @@ -33,45 +33,45 @@ export abstract class Optimizer { } beforeBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap){ + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { this.variableNodes = this.specifiedVariableNodes == null ? - session_util.getVariableNodesFromEvaluationSet(runtime.nodes) : - this.specifiedVariableNodes; + session_util.getVariableNodesFromEvaluationSet(runtime.nodes) : + this.specifiedVariableNodes; if (batchSize !== this.prevBatchSize) { - if(this.c != null){ + if (this.c != null) { this.c.dispose(); } this.prevBatchSize = batchSize; this.c = Scalar.new(-this.learningRate / batchSize); } this.variableNodes.forEach( - node => this.variableGradients.set( - node.output, NDArray.zeros(node.output.shape))); + node => this.variableGradients.set( + node.output, NDArray.zeros(node.output.shape))); } afterExample( - math: NDArrayMath, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap){ - math.scope((keep) => { - this.variableNodes.forEach(node => { - const gradient = gradientArrayMap.get(node.output); - const accumulatedGradient = this.variableGradients.get(node.output); - this.variableGradients.set( - node.output, keep(math.add(gradient, accumulatedGradient))); - accumulatedGradient.dispose(); - }); - }); - } + math: NDArrayMath, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { + math.scope((keep) => { + this.variableNodes.forEach(node => { + const gradient = gradientArrayMap.get(node.output); + const accumulatedGradient = this.variableGradients.get(node.output); + this.variableGradients.set( + node.output, keep(math.add(gradient, accumulatedGradient))); + accumulatedGradient.dispose(); + }); + }); + } abstract afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap): void; + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap): void; - dispose(){ + dispose() { if (this.c != null) { this.c.dispose(); } diff --git a/src/rmsprop_optimizer.ts b/src/rmsprop_optimizer.ts index 28efdaa968..fff3ae425a 100644 --- a/src/rmsprop_optimizer.ts +++ b/src/rmsprop_optimizer.ts @@ -10,12 +10,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -import {Node} from './graph'; -import {NDArrayMath} from './math/math'; -import {NDArray, Scalar} from './math/ndarray'; -import {Optimizer} from './optimizer'; -import {SessionRuntime} from './session'; -import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; +import { Node } from './graph'; +import { NDArrayMath } from './math/math'; +import { NDArray, Scalar } from './math/ndarray'; +import { Optimizer } from './optimizer'; +import { SessionRuntime } from './session'; +import { TensorArrayMap, SummedTensorArrayMap } from './tensor_array_map'; export class RMSPropOptimizer extends Optimizer { constructor(protected learningRate: number, @@ -33,19 +33,19 @@ export class RMSPropOptimizer extends Optimizer { gradientArrayMap: SummedTensorArrayMap) { super.beforeBatch(math, batchSize, runtime, activationArrayMap, gradientArrayMap); - if (this.cache.size() === 0) { - this.variableNodes.forEach(node => { - this.cache.set(node.output, - NDArray.zeros(node.output.shape)); - }); - } + if (this.cache.size() === 0) { + this.variableNodes.forEach(node => { + this.cache.set(node.output, + NDArray.zeros(node.output.shape)); + }); } + } afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { math.scope((keep) => { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); @@ -53,10 +53,10 @@ export class RMSPropOptimizer extends Optimizer { const oldCache = this.cache.get(node.output); const gradientSquare = math.multiply(gradient, gradient); const cache = math.scaledArrayAdd(this.g, oldCache, - math.sub(this.one, this.g), gradientSquare); + math.sub(this.one, this.g), gradientSquare); const variable = math.scaledArrayAdd(this.c, - math.divide(gradient, math.add(math.sqrt( cache), this.eps)), - this.one, oldVariable); + math.divide(gradient, math.add(math.sqrt(cache), this.eps)), + this.one, oldVariable); this.cache.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; diff --git a/src/session_test.ts b/src/session_test.ts index 8cd4d9a47e..9a7cb4693f 100644 --- a/src/session_test.ts +++ b/src/session_test.ts @@ -15,16 +15,16 @@ * ============================================================================= */ -import {Graph, Tensor} from './graph'; -import {InputProvider} from './input_provider'; -import {NDArrayMathCPU} from './math/math_cpu'; -import {NDArrayMathGPU} from './math/math_gpu'; -import {Array1D, NDArray, Scalar} from './math/ndarray'; -import {FeedDictionary, FeedEntry, Session} from './session'; -import {SGDOptimizer} from './sgd_optimizer'; -import {MomentumOptimizer} from './momentum_optimizer'; -import {AdagradOptimizer} from './adagrad_optimizer'; -import {RMSPropOptimizer} from './rmsprop_optimizer'; +import { Graph, Tensor } from './graph'; +import { InputProvider } from './input_provider'; +import { NDArrayMathCPU } from './math/math_cpu'; +import { NDArrayMathGPU } from './math/math_gpu'; +import { Array1D, NDArray, Scalar } from './math/ndarray'; +import { FeedDictionary, FeedEntry, Session } from './session'; +import { SGDOptimizer } from './sgd_optimizer'; +import { MomentumOptimizer } from './momentum_optimizer'; +import { AdagradOptimizer } from './adagrad_optimizer'; +import { RMSPropOptimizer } from './rmsprop_optimizer'; import * as test_util from './test_util'; @@ -35,7 +35,7 @@ describe('FeedDictionary', () => { }); it('ctor populates dict from only feed entry', () => { - const e: FeedEntry = {tensor: new Tensor([]), data: NDArray.zeros([1])}; + const e: FeedEntry = { tensor: new Tensor([]), data: NDArray.zeros([1]) }; const d = new FeedDictionary([e]); expect(Object.keys(d.dict).length).toEqual(1); expect(d.dict[e.tensor.id]).toBe(e); @@ -43,10 +43,10 @@ describe('FeedDictionary', () => { it('ctor populates dict from many entries', () => { const entries: FeedEntry[] = [ - {tensor: new Tensor([]), data: NDArray.zeros([1])}, - {tensor: new Tensor([]), data: NDArray.zeros([1])}, - {tensor: new Tensor([]), data: NDArray.zeros([1])}, - {tensor: new Tensor([]), data: NDArray.zeros([1])} + { tensor: new Tensor([]), data: NDArray.zeros([1]) }, + { tensor: new Tensor([]), data: NDArray.zeros([1]) }, + { tensor: new Tensor([]), data: NDArray.zeros([1]) }, + { tensor: new Tensor([]), data: NDArray.zeros([1]) } ]; const d = new FeedDictionary(entries); expect(Object.keys(d.dict).length).toEqual(entries.length); @@ -56,7 +56,7 @@ describe('FeedDictionary', () => { it('add adds entry to map keyed on tensor id', () => { const t = new Tensor([]); const nda = NDArray.zeros([1]); - const fd = new FeedDictionary([{tensor: t, data: nda}]); + const fd = new FeedDictionary([{ tensor: t, data: nda }]); expect(fd.dict[t.id].tensor).toBe(t); expect(fd.dict[t.id].data).toBe(nda); }); @@ -86,14 +86,14 @@ describe('Session', () => { const fc3 = g.add(g.matmul(fc3W, relu2), fc3B); const session = new Session(g, new NDArrayMathCPU()); - session.eval(fc3, [{tensor: input, data: NDArray.zeros([28 * 28])}]); + session.eval(fc3, [{ tensor: input, data: NDArray.zeros([28 * 28]) }]); }); it('y=x^2 + 3: CPU', () => { const x = g.placeholder('x', [2]); const y = g.add(g.square(x), g.constant(3)); const session = new Session(g, new NDArrayMathCPU()); - const yVal = session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}]); + const yVal = session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }]); const expected = new Float32Array([28, 19]); test_util.expectArraysClose(yVal.getValues(), expected, 1e-5); }); @@ -105,7 +105,7 @@ describe('Session', () => { const session = new Session(g, math); math.scope(() => { - const yVal = session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}]); + const yVal = session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }]); const expected = new Float32Array([28, 19]); test_util.expectArraysClose(yVal.getValues(), expected, 1e-5); }); @@ -120,7 +120,7 @@ describe('Session', () => { math.scope(() => { const yVal = - session.eval(y, [{tensor: xSquared, data: Array1D.new([25, 16])}]); + session.eval(y, [{ tensor: xSquared, data: Array1D.new([25, 16]) }]); const expected = new Float32Array([28, 19]); test_util.expectArraysClose(yVal.getValues(), expected, 1e-5); }); @@ -136,7 +136,7 @@ describe('Session', () => { math.scope(() => { const result = - session.evalAll([y, z], [{tensor: x, data: Array1D.new([5, 4])}]); + session.evalAll([y, z], [{ tensor: x, data: Array1D.new([5, 4]) }]); const expectedY = new Float32Array([28, 19]); const expectedZ = new Float32Array([27, 18]); test_util.expectArraysClose(result[0].getValues(), expectedY, 1e-5); @@ -154,12 +154,12 @@ describe('Session', () => { math.scope(() => { const result1 = - session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}]); + session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }]); const expectedY = new Float32Array([30, 20]); test_util.expectArraysClose(result1.getValues(), expectedY, 1e-5); const result2 = - session.eval(z, [{tensor: x, data: Array1D.new([5, 4])}]); + session.eval(z, [{ tensor: x, data: Array1D.new([5, 4]) }]); const expectedZ = new Float32Array([31, 21]); test_util.expectArraysClose(result2.getValues(), expectedZ, 1e-5); }); @@ -179,20 +179,20 @@ describe('Session', () => { getNextCopy() { return xs[idx++]; }, - disposeCopy(math, example) {} + disposeCopy(math, example) { } }; // w = x^2 + x + 3 // dw/dx = 2x + 1 - session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); let dwdx = session.gradientArrayMap.get(x).get(); expect(dwdx).toBe(5); - session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); dwdx = session.gradientArrayMap.get(x).get(); expect(dwdx).toBe(3); - session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); dwdx = session.gradientArrayMap.get(x).get(); expect(dwdx).toBe(-1); }); @@ -209,12 +209,12 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) {} + disposeCopy(math, example) { } }; // w = reduce_sum(x^2 + x + 3) // dw/dx = [2*x_1 + 1, 2*x_2 + 1] - session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); const dwdx = session.gradientArrayMap.get(x).getValues(); test_util.expectArraysClose(dwdx, new Float32Array([5, 9]), 1e-5); }); @@ -235,7 +235,7 @@ describe('Session', () => { getNextCopy() { return Array1D.new([1, 2]); }, - disposeCopy(math, example) {} + disposeCopy(math, example) { } }; // prediction = reduce_sum((x + b0)^2 + b1) @@ -245,8 +245,8 @@ describe('Session', () => { // Update only b0 const optimizerOnlyB0 = new SGDOptimizer(0.1, [b0.node]); session.train( - cost, [{tensor: x, data: inputProvider}], 2, optimizerOnlyB0, - undefined); + cost, [{ tensor: x, data: inputProvider }], 2, optimizerOnlyB0, + undefined); const b0After1 = session.activationArrayMap.get(b0).getValues(); const b1After1 = session.activationArrayMap.get(b1).getValues(); @@ -256,7 +256,7 @@ describe('Session', () => { // Update both b0 and b1 const optimizerAll = new SGDOptimizer(0.1); session.train( - cost, [{tensor: x, data: inputProvider}], 2, optimizerAll, undefined); + cost, [{ tensor: x, data: inputProvider }], 2, optimizerAll, undefined); const b0After2 = session.activationArrayMap.get(b0).getValues(); const b1After2 = session.activationArrayMap.get(b1).getValues(); @@ -271,8 +271,8 @@ describe('Session', () => { const math = new NDArrayMathCPU(safeMode); const session = new Session(g, math); - expect(() => session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}])) - .toThrowError(); + expect(() => session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }])) + .toThrowError(); }); it('Safe mode math, math scope eval does not throw', () => { @@ -283,7 +283,7 @@ describe('Session', () => { const session = new Session(g, math); math.scope(() => { - const yVal = session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}]); + const yVal = session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }]); const expected = new Float32Array([25, 16]); test_util.expectArraysClose(yVal.getValues(), expected, 1e-5); }); @@ -303,13 +303,13 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) {} + disposeCopy(math, example) { } }; math.scope(() => { // w = reduce_sum(x^2 + x + 3) // dw/dx = [2*x_1 + 1, 2*x_2 + 1] - session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); const dwdx = session.gradientArrayMap.get(x).getValues(); test_util.expectArraysClose(dwdx, new Float32Array([5, 9]), 1e-5); }); @@ -317,7 +317,7 @@ describe('Session', () => { it('Safe mode math, math scope train does not throw', () => { const x = g.placeholder('x', [2]); - const w = g.variable('w', NDArray.zeros([1,2])); + const w = g.variable('w', NDArray.zeros([1, 2])); const b = g.variable('b', NDArray.zeros([1])); const y = g.reduceSum(g.add(g.matmul(w, x), b)); @@ -329,7 +329,7 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) {} + disposeCopy(math, example) { } }; math.scope(() => { @@ -337,13 +337,13 @@ describe('Session', () => { // velocity_w = [momentum* old_vel_w1 + x_1, // momentum* old_vel_w2 + x_2] = [2,4] // w = [ w_old - lr*vel_w1, w_old - lr*vel_w2] = [-0.2, -0.4] - session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); const dydw = session.activationArrayMap.get(w).getValues(); test_util.expectArraysClose(dydw, new Float32Array([-.2, -0.4]), 1e-5); // velocity_w = [momentum* old_vel_w1 + x_1, // momentum* old_vel_w2 + x_2] = [3,6] // w = [ w_old - lr*vel_w1, w_old - lr*vel_w2] = [-0.5, -1.0] - session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); const dydw2 = session.activationArrayMap.get(w).getValues(); test_util.expectArraysClose(dydw2, new Float32Array([-.5, -1.0]), 2e-5); }); @@ -352,7 +352,7 @@ describe('Session', () => { it('Safe mode math, math scope train does not throw', () => { const x = g.placeholder('x', [2]); - const w = g.variable('w', NDArray.zeros([1,2])); + const w = g.variable('w', NDArray.zeros([1, 2])); const b = g.variable('b', NDArray.zeros([1])); const y = g.reduceSum(g.add(g.matmul(w, x), b)); @@ -364,7 +364,7 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) {} + disposeCopy(math, example) { } }; math.scope(() => { @@ -374,7 +374,7 @@ describe('Session', () => { // w = [ w1_old - lr*grad_w1/sqrt(cahce_w2 + eps), // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] // = [-0.1, -0.1] - session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); const dydw = session.activationArrayMap.get(w).getValues(); test_util.expectArraysClose(dydw, new Float32Array([-.1, -0.1]), 1e-5); // cache = [old_cache_w1 + grad_w1**2, @@ -382,7 +382,7 @@ describe('Session', () => { // w = [ w1_old - lr*grad_w1/sqrt(cahce_w2 + eps), // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] // = [-0.1707, -0.1707] - session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); const dydw2 = session.activationArrayMap.get(w).getValues(); test_util.expectArraysClose(dydw2, new Float32Array([-.1707, -.1707]), 2e-5); @@ -391,7 +391,7 @@ describe('Session', () => { it('Safe mode math, math scope train does not throw', () => { const x = g.placeholder('x', [2]); - const w = g.variable('w', NDArray.zeros([1,2])); + const w = g.variable('w', NDArray.zeros([1, 2])); const b = g.variable('b', NDArray.zeros([1])); const y = g.reduceSum(g.add(g.matmul(w, x), b)); const safeMode = true; @@ -402,7 +402,7 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) {} + disposeCopy(math, example) { } }; math.scope(() => { @@ -413,7 +413,7 @@ describe('Session', () => { // w = [ w1_old - lr*grad_w1/sqrt(cahce_w1 + eps), // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] // = [-0.2236, -0.2236] - session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); const dydw = session.activationArrayMap.get(w).getValues(); test_util.expectArraysClose(dydw, new Float32Array([-.2236, -0.2236]), 1e-5); @@ -423,7 +423,7 @@ describe('Session', () => { // w = [ w1_old - lr*grad_w1/sqrt(cahce_w1 + eps), // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] // = [-.39027, -.39027] - session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); + session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); const dydw2 = session.activationArrayMap.get(w).getValues(); test_util.expectArraysClose(dydw2, new Float32Array([-.39027, -.39027]), 2e-5); @@ -444,12 +444,12 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) {} + disposeCopy(math, example) { } }; expect( - () => - session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer)) - .toThrowError(); + () => + session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer)) + .toThrowError(); }); }); diff --git a/src/sgd_optimizer.ts b/src/sgd_optimizer.ts index 431cdedada..605d4d9d54 100644 --- a/src/sgd_optimizer.ts +++ b/src/sgd_optimizer.ts @@ -15,11 +15,11 @@ * ============================================================================= */ -import {Node} from './graph'; -import {NDArrayMath} from './math/math'; -import {Optimizer} from './optimizer'; -import {SessionRuntime} from './session'; -import {TensorArrayMap, SummedTensorArrayMap} from './tensor_array_map'; +import { Node } from './graph'; +import { NDArrayMath } from './math/math'; +import { Optimizer } from './optimizer'; +import { SessionRuntime } from './session'; +import { TensorArrayMap, SummedTensorArrayMap } from './tensor_array_map'; export class SGDOptimizer extends Optimizer { constructor(protected learningRate: number, specifiedVariableList?: Node[]) { @@ -27,15 +27,15 @@ export class SGDOptimizer extends Optimizer { } afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { math.scope((keep) => { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); const gradient = this.variableGradients.get(node.output); const variable = - math.scaledArrayAdd(this.c, gradient, this.one, oldVariable); + math.scaledArrayAdd(this.c, gradient, this.one, oldVariable); activationArrayMap.set(node.output, keep(variable)); node.data = variable; From 458af00cf97cc180b9ed47465141db4477b7d8d2 Mon Sep 17 00:00:00 2001 From: mnottheone Date: Sat, 16 Sep 2017 20:52:11 +0530 Subject: [PATCH 14/17] license updated --- src/adagrad_optimizer.ts | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/adagrad_optimizer.ts b/src/adagrad_optimizer.ts index 5a5bd87ecd..541e5a2770 100644 --- a/src/adagrad_optimizer.ts +++ b/src/adagrad_optimizer.ts @@ -1,15 +1,19 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - +/** + * @license + * Copyright 2017 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ import { Node } from './graph'; import { NDArrayMath } from './math/math'; import { NDArray, Scalar } from './math/ndarray'; From 28abe8c55ce142f1630acbfb0fb46def4df21986 Mon Sep 17 00:00:00 2001 From: mnottheone Date: Sat, 16 Sep 2017 22:10:15 +0530 Subject: [PATCH 15/17] cache -> accumulatedSquaredGradients --- src/adagrad_optimizer.ts | 12 ++++++------ src/rmsprop_optimizer.ts | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/adagrad_optimizer.ts b/src/adagrad_optimizer.ts index 541e5a2770..52e862936b 100644 --- a/src/adagrad_optimizer.ts +++ b/src/adagrad_optimizer.ts @@ -36,9 +36,9 @@ export class AdagradOptimizer extends Optimizer { super.beforeBatch(math, batchSize, runtime, activationArrayMap, gradientArrayMap); - if (this.cache.size() === 0) { + if (this.accumulatedSquaredGradients.size() === 0) { this.variableNodes.forEach(node => { - this.cache.set(node.output, + this.accumulatedSquaredGradients.set(node.output, NDArray.zeros(node.output.shape)); }); } @@ -52,13 +52,13 @@ export class AdagradOptimizer extends Optimizer { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); const gradient = this.variableGradients.get(node.output); - const oldCache = this.cache.get(node.output); + const oldCache = this.accumulatedSquaredGradients.get(node.output); const gradientSquare = math.multiply(gradient, gradient); const cache = math.add(oldCache, gradientSquare); const variable = math.scaledArrayAdd(this.c, math.divide(gradient, math.add(math.sqrt(cache), this.eps)), this.one, oldVariable); - this.cache.set(node.output, keep(cache)); + this.accumulatedSquaredGradients.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; oldVariable.dispose(); @@ -74,10 +74,10 @@ export class AdagradOptimizer extends Optimizer { super.dispose(); this.m.dispose(); this.eps.dispose(); - this.cache.dispose(); + this.accumulatedSquaredGradients.dispose(); } - private cache = new TensorArrayMap(); + private accumulatedSquaredGradients = new TensorArrayMap(); private m: Scalar; private eps: Scalar; } diff --git a/src/rmsprop_optimizer.ts b/src/rmsprop_optimizer.ts index fff3ae425a..cac3c067be 100644 --- a/src/rmsprop_optimizer.ts +++ b/src/rmsprop_optimizer.ts @@ -33,9 +33,9 @@ export class RMSPropOptimizer extends Optimizer { gradientArrayMap: SummedTensorArrayMap) { super.beforeBatch(math, batchSize, runtime, activationArrayMap, gradientArrayMap); - if (this.cache.size() === 0) { + if (this.accumulatedSquaredGradients.size() === 0) { this.variableNodes.forEach(node => { - this.cache.set(node.output, + this.accumulatedSquaredGradients.set(node.output, NDArray.zeros(node.output.shape)); }); } @@ -50,14 +50,14 @@ export class RMSPropOptimizer extends Optimizer { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); const gradient = this.variableGradients.get(node.output); - const oldCache = this.cache.get(node.output); + const oldCache = this.accumulatedSquaredGradients.get(node.output); const gradientSquare = math.multiply(gradient, gradient); const cache = math.scaledArrayAdd(this.g, oldCache, math.sub(this.one, this.g), gradientSquare); const variable = math.scaledArrayAdd(this.c, math.divide(gradient, math.add(math.sqrt(cache), this.eps)), this.one, oldVariable); - this.cache.set(node.output, keep(cache)); + this.accumulatedSquaredGradients.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; @@ -75,10 +75,10 @@ export class RMSPropOptimizer extends Optimizer { this.m.dispose(); this.eps.dispose(); this.g.dispose(); - this.cache.dispose(); + this.accumulatedSquaredGradients.dispose(); } - private cache = new TensorArrayMap(); + private accumulatedSquaredGradients = new TensorArrayMap(); private m: Scalar; private eps: Scalar; private g: Scalar; From 41b9ff1658a32bff1f9946b2191cf779952523db Mon Sep 17 00:00:00 2001 From: mnottheone Date: Sat, 16 Sep 2017 22:29:27 +0530 Subject: [PATCH 16/17] formatted --- src/adagrad_optimizer.ts | 43 +++++++------- src/momentum_optimizer.ts | 36 ++++++------ src/optimizer.ts | 40 ++++++------- src/rmsprop_optimizer.ts | 75 ++++++++++++------------ src/session_test.ts | 117 +++++++++++++++++++------------------- src/sgd_optimizer.ts | 18 +++--- 6 files changed, 166 insertions(+), 163 deletions(-) diff --git a/src/adagrad_optimizer.ts b/src/adagrad_optimizer.ts index 52e862936b..e352fb330b 100644 --- a/src/adagrad_optimizer.ts +++ b/src/adagrad_optimizer.ts @@ -14,40 +14,41 @@ * limitations under the License. * ============================================================================= */ -import { Node } from './graph'; -import { NDArrayMath } from './math/math'; -import { NDArray, Scalar } from './math/ndarray'; -import { Optimizer } from './optimizer'; -import { SessionRuntime } from './session'; -import { TensorArrayMap, SummedTensorArrayMap } from './tensor_array_map'; +import {Node} from './graph'; +import {NDArrayMath} from './math/math'; +import {NDArray, Scalar} from './math/ndarray'; +import {Optimizer} from './optimizer'; +import {SessionRuntime} from './session'; +import {SummedTensorArrayMap, TensorArrayMap} from './tensor_array_map'; export class AdagradOptimizer extends Optimizer { - constructor(protected learningRate: number, - protected momentum: number, specifiedVariableList?: Node[]) { + constructor( + protected learningRate: number, protected momentum: number, + specifiedVariableList?: Node[]) { super(learningRate, specifiedVariableList); this.m = Scalar.new(momentum); this.eps = Scalar.new(1e-6); } beforeBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { - super.beforeBatch(math, batchSize, runtime, - activationArrayMap, gradientArrayMap); + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { + super.beforeBatch( + math, batchSize, runtime, activationArrayMap, gradientArrayMap); if (this.accumulatedSquaredGradients.size() === 0) { this.variableNodes.forEach(node => { - this.accumulatedSquaredGradients.set(node.output, - NDArray.zeros(node.output.shape)); + this.accumulatedSquaredGradients.set( + node.output, NDArray.zeros(node.output.shape)); }); } } afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { math.scope((keep) => { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); @@ -55,9 +56,9 @@ export class AdagradOptimizer extends Optimizer { const oldCache = this.accumulatedSquaredGradients.get(node.output); const gradientSquare = math.multiply(gradient, gradient); const cache = math.add(oldCache, gradientSquare); - const variable = math.scaledArrayAdd(this.c, - math.divide(gradient, math.add(math.sqrt(cache), this.eps)), - this.one, oldVariable); + const variable = math.scaledArrayAdd( + this.c, math.divide(gradient, math.add(math.sqrt(cache), this.eps)), + this.one, oldVariable); this.accumulatedSquaredGradients.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; diff --git a/src/momentum_optimizer.ts b/src/momentum_optimizer.ts index 1f694db3f7..8ebca9f2f2 100644 --- a/src/momentum_optimizer.ts +++ b/src/momentum_optimizer.ts @@ -15,49 +15,49 @@ * ============================================================================= */ -import { Node } from './graph'; -import { NDArrayMath } from './math/math'; -import { NDArray, Scalar } from './math/ndarray'; -import { SessionRuntime } from './session'; -import { SGDOptimizer } from './sgd_optimizer'; -import { SummedTensorArrayMap, TensorArrayMap } from './tensor_array_map'; +import {Node} from './graph'; +import {NDArrayMath} from './math/math'; +import {NDArray, Scalar} from './math/ndarray'; +import {SessionRuntime} from './session'; +import {SGDOptimizer} from './sgd_optimizer'; +import {SummedTensorArrayMap, TensorArrayMap} from './tensor_array_map'; export class MomentumOptimizer extends SGDOptimizer { constructor( - protected learningRate: number, private momentum: number, - specifiedVariableList?: Node[]) { + protected learningRate: number, private momentum: number, + specifiedVariableList?: Node[]) { super(learningRate, specifiedVariableList); this.m = Scalar.new(this.momentum); } beforeBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { super.beforeBatch( - math, batchSize, runtime, activationArrayMap, gradientArrayMap); + math, batchSize, runtime, activationArrayMap, gradientArrayMap); if (this.variableVelocities.size() === 0) { this.variableNodes.forEach(node => { this.variableVelocities.set( - node.output, NDArray.zeros(node.output.shape)); + node.output, NDArray.zeros(node.output.shape)); }); } } afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { math.scope((keep) => { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); const gradient = this.variableGradients.get(node.output); const oldVelocity = this.variableVelocities.get(node.output); const velocity = - math.scaledArrayAdd(this.m, oldVelocity, this.one, gradient); + math.scaledArrayAdd(this.m, oldVelocity, this.one, gradient); const variable = - math.scaledArrayAdd(this.c, velocity, this.one, oldVariable); + math.scaledArrayAdd(this.c, velocity, this.one, oldVariable); this.variableVelocities.set(node.output, keep(velocity)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; diff --git a/src/optimizer.ts b/src/optimizer.ts index 0e04d4442e..804e1a63f8 100644 --- a/src/optimizer.ts +++ b/src/optimizer.ts @@ -15,16 +15,16 @@ * ============================================================================= */ -import { Node, VariableNode } from './graph'; -import { NDArrayMath } from './math/math'; -import { NDArray, Scalar } from './math/ndarray'; -import { SessionRuntime } from './session'; +import {Node, VariableNode} from './graph'; +import {NDArrayMath} from './math/math'; +import {NDArray, Scalar} from './math/ndarray'; +import {SessionRuntime} from './session'; import * as session_util from './session_util'; -import { TensorArrayMap, SummedTensorArrayMap } from './tensor_array_map'; +import {SummedTensorArrayMap, TensorArrayMap} from './tensor_array_map'; export abstract class Optimizer { protected variableNodes: VariableNode[]; - protected specifiedVariableNodes: VariableNode[] | null; + protected specifiedVariableNodes: VariableNode[]|null; constructor(protected learningRate: number, specifiedVariableList?: Node[]) { if (specifiedVariableList != null) { @@ -33,12 +33,12 @@ export abstract class Optimizer { } beforeBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { this.variableNodes = this.specifiedVariableNodes == null ? - session_util.getVariableNodesFromEvaluationSet(runtime.nodes) : - this.specifiedVariableNodes; + session_util.getVariableNodesFromEvaluationSet(runtime.nodes) : + this.specifiedVariableNodes; if (batchSize !== this.prevBatchSize) { if (this.c != null) { this.c.dispose(); @@ -47,29 +47,29 @@ export abstract class Optimizer { this.c = Scalar.new(-this.learningRate / batchSize); } this.variableNodes.forEach( - node => this.variableGradients.set( - node.output, NDArray.zeros(node.output.shape))); + node => this.variableGradients.set( + node.output, NDArray.zeros(node.output.shape))); } afterExample( - math: NDArrayMath, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { math.scope((keep) => { this.variableNodes.forEach(node => { const gradient = gradientArrayMap.get(node.output); const accumulatedGradient = this.variableGradients.get(node.output); this.variableGradients.set( - node.output, keep(math.add(gradient, accumulatedGradient))); + node.output, keep(math.add(gradient, accumulatedGradient))); accumulatedGradient.dispose(); }); }); } abstract afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap): void; + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap): void; dispose() { if (this.c != null) { diff --git a/src/rmsprop_optimizer.ts b/src/rmsprop_optimizer.ts index cac3c067be..4d9eb1ccdc 100644 --- a/src/rmsprop_optimizer.ts +++ b/src/rmsprop_optimizer.ts @@ -1,26 +1,31 @@ -/* Copyright 2017 Google Inc. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ +/** + * @license + * Copyright 2017 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ -import { Node } from './graph'; -import { NDArrayMath } from './math/math'; -import { NDArray, Scalar } from './math/ndarray'; -import { Optimizer } from './optimizer'; -import { SessionRuntime } from './session'; -import { TensorArrayMap, SummedTensorArrayMap } from './tensor_array_map'; +import {Node} from './graph'; +import {NDArrayMath} from './math/math'; +import {NDArray, Scalar} from './math/ndarray'; +import {Optimizer} from './optimizer'; +import {SessionRuntime} from './session'; +import {SummedTensorArrayMap, TensorArrayMap} from './tensor_array_map'; export class RMSPropOptimizer extends Optimizer { - constructor(protected learningRate: number, - protected momentum: number, private gamma: number, - specifiedVariableList?: Node[]) { + constructor( + protected learningRate: number, protected momentum: number, + private gamma: number, specifiedVariableList?: Node[]) { super(learningRate, specifiedVariableList); this.m = Scalar.new(momentum); this.eps = Scalar.new(1e-6); @@ -28,35 +33,35 @@ export class RMSPropOptimizer extends Optimizer { } beforeBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { - super.beforeBatch(math, batchSize, runtime, - activationArrayMap, gradientArrayMap); + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { + super.beforeBatch( + math, batchSize, runtime, activationArrayMap, gradientArrayMap); if (this.accumulatedSquaredGradients.size() === 0) { this.variableNodes.forEach(node => { - this.accumulatedSquaredGradients.set(node.output, - NDArray.zeros(node.output.shape)); + this.accumulatedSquaredGradients.set( + node.output, NDArray.zeros(node.output.shape)); }); } } afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { math.scope((keep) => { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); const gradient = this.variableGradients.get(node.output); const oldCache = this.accumulatedSquaredGradients.get(node.output); const gradientSquare = math.multiply(gradient, gradient); - const cache = math.scaledArrayAdd(this.g, oldCache, - math.sub(this.one, this.g), gradientSquare); - const variable = math.scaledArrayAdd(this.c, - math.divide(gradient, math.add(math.sqrt(cache), this.eps)), - this.one, oldVariable); + const cache = math.scaledArrayAdd( + this.g, oldCache, math.sub(this.one, this.g), gradientSquare); + const variable = math.scaledArrayAdd( + this.c, math.divide(gradient, math.add(math.sqrt(cache), this.eps)), + this.one, oldVariable); this.accumulatedSquaredGradients.set(node.output, keep(cache)); activationArrayMap.set(node.output, keep(variable)); node.data = variable; diff --git a/src/session_test.ts b/src/session_test.ts index 9a7cb4693f..a3a3b8bb2f 100644 --- a/src/session_test.ts +++ b/src/session_test.ts @@ -15,17 +15,16 @@ * ============================================================================= */ -import { Graph, Tensor } from './graph'; -import { InputProvider } from './input_provider'; -import { NDArrayMathCPU } from './math/math_cpu'; -import { NDArrayMathGPU } from './math/math_gpu'; -import { Array1D, NDArray, Scalar } from './math/ndarray'; -import { FeedDictionary, FeedEntry, Session } from './session'; -import { SGDOptimizer } from './sgd_optimizer'; -import { MomentumOptimizer } from './momentum_optimizer'; -import { AdagradOptimizer } from './adagrad_optimizer'; -import { RMSPropOptimizer } from './rmsprop_optimizer'; - +import {AdagradOptimizer} from './adagrad_optimizer'; +import {Graph, Tensor} from './graph'; +import {InputProvider} from './input_provider'; +import {NDArrayMathCPU} from './math/math_cpu'; +import {NDArrayMathGPU} from './math/math_gpu'; +import {Array1D, NDArray, Scalar} from './math/ndarray'; +import {MomentumOptimizer} from './momentum_optimizer'; +import {RMSPropOptimizer} from './rmsprop_optimizer'; +import {FeedDictionary, FeedEntry, Session} from './session'; +import {SGDOptimizer} from './sgd_optimizer'; import * as test_util from './test_util'; @@ -35,7 +34,7 @@ describe('FeedDictionary', () => { }); it('ctor populates dict from only feed entry', () => { - const e: FeedEntry = { tensor: new Tensor([]), data: NDArray.zeros([1]) }; + const e: FeedEntry = {tensor: new Tensor([]), data: NDArray.zeros([1])}; const d = new FeedDictionary([e]); expect(Object.keys(d.dict).length).toEqual(1); expect(d.dict[e.tensor.id]).toBe(e); @@ -43,10 +42,10 @@ describe('FeedDictionary', () => { it('ctor populates dict from many entries', () => { const entries: FeedEntry[] = [ - { tensor: new Tensor([]), data: NDArray.zeros([1]) }, - { tensor: new Tensor([]), data: NDArray.zeros([1]) }, - { tensor: new Tensor([]), data: NDArray.zeros([1]) }, - { tensor: new Tensor([]), data: NDArray.zeros([1]) } + {tensor: new Tensor([]), data: NDArray.zeros([1])}, + {tensor: new Tensor([]), data: NDArray.zeros([1])}, + {tensor: new Tensor([]), data: NDArray.zeros([1])}, + {tensor: new Tensor([]), data: NDArray.zeros([1])} ]; const d = new FeedDictionary(entries); expect(Object.keys(d.dict).length).toEqual(entries.length); @@ -56,7 +55,7 @@ describe('FeedDictionary', () => { it('add adds entry to map keyed on tensor id', () => { const t = new Tensor([]); const nda = NDArray.zeros([1]); - const fd = new FeedDictionary([{ tensor: t, data: nda }]); + const fd = new FeedDictionary([{tensor: t, data: nda}]); expect(fd.dict[t.id].tensor).toBe(t); expect(fd.dict[t.id].data).toBe(nda); }); @@ -86,14 +85,14 @@ describe('Session', () => { const fc3 = g.add(g.matmul(fc3W, relu2), fc3B); const session = new Session(g, new NDArrayMathCPU()); - session.eval(fc3, [{ tensor: input, data: NDArray.zeros([28 * 28]) }]); + session.eval(fc3, [{tensor: input, data: NDArray.zeros([28 * 28])}]); }); it('y=x^2 + 3: CPU', () => { const x = g.placeholder('x', [2]); const y = g.add(g.square(x), g.constant(3)); const session = new Session(g, new NDArrayMathCPU()); - const yVal = session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }]); + const yVal = session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}]); const expected = new Float32Array([28, 19]); test_util.expectArraysClose(yVal.getValues(), expected, 1e-5); }); @@ -105,7 +104,7 @@ describe('Session', () => { const session = new Session(g, math); math.scope(() => { - const yVal = session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }]); + const yVal = session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}]); const expected = new Float32Array([28, 19]); test_util.expectArraysClose(yVal.getValues(), expected, 1e-5); }); @@ -120,7 +119,7 @@ describe('Session', () => { math.scope(() => { const yVal = - session.eval(y, [{ tensor: xSquared, data: Array1D.new([25, 16]) }]); + session.eval(y, [{tensor: xSquared, data: Array1D.new([25, 16])}]); const expected = new Float32Array([28, 19]); test_util.expectArraysClose(yVal.getValues(), expected, 1e-5); }); @@ -136,7 +135,7 @@ describe('Session', () => { math.scope(() => { const result = - session.evalAll([y, z], [{ tensor: x, data: Array1D.new([5, 4]) }]); + session.evalAll([y, z], [{tensor: x, data: Array1D.new([5, 4])}]); const expectedY = new Float32Array([28, 19]); const expectedZ = new Float32Array([27, 18]); test_util.expectArraysClose(result[0].getValues(), expectedY, 1e-5); @@ -153,13 +152,11 @@ describe('Session', () => { const session = new Session(g, math); math.scope(() => { - const result1 = - session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }]); + const result1 = session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}]); const expectedY = new Float32Array([30, 20]); test_util.expectArraysClose(result1.getValues(), expectedY, 1e-5); - const result2 = - session.eval(z, [{ tensor: x, data: Array1D.new([5, 4]) }]); + const result2 = session.eval(z, [{tensor: x, data: Array1D.new([5, 4])}]); const expectedZ = new Float32Array([31, 21]); test_util.expectArraysClose(result2.getValues(), expectedZ, 1e-5); }); @@ -179,20 +176,20 @@ describe('Session', () => { getNextCopy() { return xs[idx++]; }, - disposeCopy(math, example) { } + disposeCopy(math, example) {} }; // w = x^2 + x + 3 // dw/dx = 2x + 1 - session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); let dwdx = session.gradientArrayMap.get(x).get(); expect(dwdx).toBe(5); - session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); dwdx = session.gradientArrayMap.get(x).get(); expect(dwdx).toBe(3); - session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); dwdx = session.gradientArrayMap.get(x).get(); expect(dwdx).toBe(-1); }); @@ -209,12 +206,12 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) { } + disposeCopy(math, example) {} }; // w = reduce_sum(x^2 + x + 3) // dw/dx = [2*x_1 + 1, 2*x_2 + 1] - session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); const dwdx = session.gradientArrayMap.get(x).getValues(); test_util.expectArraysClose(dwdx, new Float32Array([5, 9]), 1e-5); }); @@ -235,7 +232,7 @@ describe('Session', () => { getNextCopy() { return Array1D.new([1, 2]); }, - disposeCopy(math, example) { } + disposeCopy(math, example) {} }; // prediction = reduce_sum((x + b0)^2 + b1) @@ -245,8 +242,8 @@ describe('Session', () => { // Update only b0 const optimizerOnlyB0 = new SGDOptimizer(0.1, [b0.node]); session.train( - cost, [{ tensor: x, data: inputProvider }], 2, optimizerOnlyB0, - undefined); + cost, [{tensor: x, data: inputProvider}], 2, optimizerOnlyB0, + undefined); const b0After1 = session.activationArrayMap.get(b0).getValues(); const b1After1 = session.activationArrayMap.get(b1).getValues(); @@ -256,7 +253,7 @@ describe('Session', () => { // Update both b0 and b1 const optimizerAll = new SGDOptimizer(0.1); session.train( - cost, [{ tensor: x, data: inputProvider }], 2, optimizerAll, undefined); + cost, [{tensor: x, data: inputProvider}], 2, optimizerAll, undefined); const b0After2 = session.activationArrayMap.get(b0).getValues(); const b1After2 = session.activationArrayMap.get(b1).getValues(); @@ -271,8 +268,8 @@ describe('Session', () => { const math = new NDArrayMathCPU(safeMode); const session = new Session(g, math); - expect(() => session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }])) - .toThrowError(); + expect(() => session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}])) + .toThrowError(); }); it('Safe mode math, math scope eval does not throw', () => { @@ -283,7 +280,7 @@ describe('Session', () => { const session = new Session(g, math); math.scope(() => { - const yVal = session.eval(y, [{ tensor: x, data: Array1D.new([5, 4]) }]); + const yVal = session.eval(y, [{tensor: x, data: Array1D.new([5, 4])}]); const expected = new Float32Array([25, 16]); test_util.expectArraysClose(yVal.getValues(), expected, 1e-5); }); @@ -303,13 +300,13 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) { } + disposeCopy(math, example) {} }; math.scope(() => { // w = reduce_sum(x^2 + x + 3) // dw/dx = [2*x_1 + 1, 2*x_2 + 1] - session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer); const dwdx = session.gradientArrayMap.get(x).getValues(); test_util.expectArraysClose(dwdx, new Float32Array([5, 9]), 1e-5); }); @@ -329,7 +326,7 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) { } + disposeCopy(math, example) {} }; math.scope(() => { @@ -337,13 +334,13 @@ describe('Session', () => { // velocity_w = [momentum* old_vel_w1 + x_1, // momentum* old_vel_w2 + x_2] = [2,4] // w = [ w_old - lr*vel_w1, w_old - lr*vel_w2] = [-0.2, -0.4] - session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); const dydw = session.activationArrayMap.get(w).getValues(); test_util.expectArraysClose(dydw, new Float32Array([-.2, -0.4]), 1e-5); // velocity_w = [momentum* old_vel_w1 + x_1, // momentum* old_vel_w2 + x_2] = [3,6] // w = [ w_old - lr*vel_w1, w_old - lr*vel_w2] = [-0.5, -1.0] - session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); const dydw2 = session.activationArrayMap.get(w).getValues(); test_util.expectArraysClose(dydw2, new Float32Array([-.5, -1.0]), 2e-5); }); @@ -364,7 +361,7 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) { } + disposeCopy(math, example) {} }; math.scope(() => { @@ -374,7 +371,7 @@ describe('Session', () => { // w = [ w1_old - lr*grad_w1/sqrt(cahce_w2 + eps), // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] // = [-0.1, -0.1] - session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); const dydw = session.activationArrayMap.get(w).getValues(); test_util.expectArraysClose(dydw, new Float32Array([-.1, -0.1]), 1e-5); // cache = [old_cache_w1 + grad_w1**2, @@ -382,10 +379,10 @@ describe('Session', () => { // w = [ w1_old - lr*grad_w1/sqrt(cahce_w2 + eps), // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] // = [-0.1707, -0.1707] - session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); const dydw2 = session.activationArrayMap.get(w).getValues(); - test_util.expectArraysClose(dydw2, - new Float32Array([-.1707, -.1707]), 2e-5); + test_util.expectArraysClose( + dydw2, new Float32Array([-.1707, -.1707]), 2e-5); }); }); @@ -402,7 +399,7 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) { } + disposeCopy(math, example) {} }; math.scope(() => { @@ -413,20 +410,20 @@ describe('Session', () => { // w = [ w1_old - lr*grad_w1/sqrt(cahce_w1 + eps), // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] // = [-0.2236, -0.2236] - session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); const dydw = session.activationArrayMap.get(w).getValues(); - test_util.expectArraysClose(dydw, - new Float32Array([-.2236, -0.2236]), 1e-5); + test_util.expectArraysClose( + dydw, new Float32Array([-.2236, -0.2236]), 1e-5); // cache = [gamma*old_cache_w1 + (1-gamma)*grad_w1**2, // gamma*old_cache_w2 + (1-gamma)*grad_w2**2] // = [1.44, 5.76] // w = [ w1_old - lr*grad_w1/sqrt(cahce_w1 + eps), // w2_old - lr*grad_w1/sqrt(cahce_w2 + eps)] // = [-.39027, -.39027] - session.train(y, [{ tensor: x, data: inputProvider }], 1, optimizer); + session.train(y, [{tensor: x, data: inputProvider}], 1, optimizer); const dydw2 = session.activationArrayMap.get(w).getValues(); - test_util.expectArraysClose(dydw2, - new Float32Array([-.39027, -.39027]), 2e-5); + test_util.expectArraysClose( + dydw2, new Float32Array([-.39027, -.39027]), 2e-5); }); }); @@ -444,12 +441,12 @@ describe('Session', () => { getNextCopy() { return Array1D.new([2, 4]); }, - disposeCopy(math, example) { } + disposeCopy(math, example) {} }; expect( - () => - session.train(w, [{ tensor: x, data: inputProvider }], 1, optimizer)) - .toThrowError(); + () => + session.train(w, [{tensor: x, data: inputProvider}], 1, optimizer)) + .toThrowError(); }); }); diff --git a/src/sgd_optimizer.ts b/src/sgd_optimizer.ts index 605d4d9d54..a9c213c688 100644 --- a/src/sgd_optimizer.ts +++ b/src/sgd_optimizer.ts @@ -15,11 +15,11 @@ * ============================================================================= */ -import { Node } from './graph'; -import { NDArrayMath } from './math/math'; -import { Optimizer } from './optimizer'; -import { SessionRuntime } from './session'; -import { TensorArrayMap, SummedTensorArrayMap } from './tensor_array_map'; +import {Node} from './graph'; +import {NDArrayMath} from './math/math'; +import {Optimizer} from './optimizer'; +import {SessionRuntime} from './session'; +import {SummedTensorArrayMap, TensorArrayMap} from './tensor_array_map'; export class SGDOptimizer extends Optimizer { constructor(protected learningRate: number, specifiedVariableList?: Node[]) { @@ -27,15 +27,15 @@ export class SGDOptimizer extends Optimizer { } afterBatch( - math: NDArrayMath, batchSize: number, runtime: SessionRuntime, - activationArrayMap: TensorArrayMap, - gradientArrayMap: SummedTensorArrayMap) { + math: NDArrayMath, batchSize: number, runtime: SessionRuntime, + activationArrayMap: TensorArrayMap, + gradientArrayMap: SummedTensorArrayMap) { math.scope((keep) => { this.variableNodes.forEach(node => { const oldVariable = activationArrayMap.get(node.output); const gradient = this.variableGradients.get(node.output); const variable = - math.scaledArrayAdd(this.c, gradient, this.one, oldVariable); + math.scaledArrayAdd(this.c, gradient, this.one, oldVariable); activationArrayMap.set(node.output, keep(variable)); node.data = variable; From 506283bca415ec8b075c89ab9efeae9de7be9723 Mon Sep 17 00:00:00 2001 From: mnottheone Date: Sat, 16 Sep 2017 22:39:34 +0530 Subject: [PATCH 17/17] formatted --- src/index.ts | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/index.ts b/src/index.ts index 1cbe812d74..4dd8cb27c8 100644 --- a/src/index.ts +++ b/src/index.ts @@ -21,24 +21,24 @@ import * as render_ndarray_gpu_util from './math/webgl/render_ndarray_gpu_util'; import * as webgl_util from './math/webgl/webgl_util'; import * as util from './util'; -export { CheckpointLoader } from './checkpoint_loader'; -export { DataStats, InMemoryDataset } from './dataset'; -export { Graph, Tensor } from './graph'; +export {CheckpointLoader} from './checkpoint_loader'; +export {DataStats, InMemoryDataset} from './dataset'; +export {Graph, Tensor} from './graph'; // tslint:disable-next-line:max-line-length -export { GraphRunner, GraphRunnerEventObserver, MetricReduction } from './graph_runner'; +export {GraphRunner, GraphRunnerEventObserver, MetricReduction} from './graph_runner'; // tslint:disable-next-line:max-line-length -export { ConstantInitializer, Initializer, NDArrayInitializer, OnesInitializer, RandomNormalInitializer, RandomTruncatedNormalInitializer, RandomUniformInitializer, VarianceScalingInitializer, ZerosInitializer } from './initializers'; +export {ConstantInitializer, Initializer, NDArrayInitializer, OnesInitializer, RandomNormalInitializer, RandomTruncatedNormalInitializer, RandomUniformInitializer, VarianceScalingInitializer, ZerosInitializer} from './initializers'; // tslint:disable-next-line:max-line-length -export { InCPUMemoryShuffledInputProviderBuilder, InGPUMemoryShuffledInputProviderBuilder, InputProvider } from './input_provider'; -export { MatrixOrientation, NDArrayMath } from './math/math'; -export { NDArrayMathCPU } from './math/math_cpu'; -export { NDArrayMathGPU } from './math/math_gpu'; +export {InCPUMemoryShuffledInputProviderBuilder, InGPUMemoryShuffledInputProviderBuilder, InputProvider} from './input_provider'; +export {MatrixOrientation, NDArrayMath} from './math/math'; +export {NDArrayMathCPU} from './math/math_cpu'; +export {NDArrayMathGPU} from './math/math_gpu'; // tslint:disable-next-line:max-line-length -export { Array1D, Array2D, Array3D, Array4D, NDArray, Scalar } from './math/ndarray'; -export { GPGPUContext } from './math/webgl/gpgpu_context'; -export { Optimizer } from './optimizer'; -export { CostReduction, FeedEntry, Session } from './session'; -export { SGDOptimizer } from './sgd_optimizer'; -export { MomentumOptimizer } from './momentum_optimizer'; +export {Array1D, Array2D, Array3D, Array4D, NDArray, Scalar} from './math/ndarray'; +export {GPGPUContext} from './math/webgl/gpgpu_context'; +export {MomentumOptimizer} from './momentum_optimizer'; +export {Optimizer} from './optimizer'; +export {CostReduction, FeedEntry, Session} from './session'; +export {SGDOptimizer} from './sgd_optimizer'; // Second level exports. -export { conv_util, gpgpu_util, render_ndarray_gpu_util, util, webgl_util }; +export {conv_util, gpgpu_util, render_ndarray_gpu_util, util, webgl_util};