Hey everyone,
I have been having trouble with increasing the simulation duration of my NengoDL simulation because of memory constraints. I am working on a rather big model (YOLO v4 tiny), which needs lots of memory to begin with. But when specifying larger simulation durations, the memory consumption goes through the roof and causes the process to be killed. Note that I’m referring to RAM instead of VRAM.
The reason I am confused is that the increased memory consumption does not only come from the fact that I use a bigger dataset (with more tiled timesteps). When analysing the memory allocations using tracemalloc, I discovered that most of the difference comes from the TensorFlow backend, specifically the ops.py file.
Now to ask the actual question: is there any way to prevent this? Intuitively, a longer simulation time should not increase the memory consumption apart from the larger dataset.
Below is a minimum example showing the different amounts of memory being allocated at different simulation durations.
Thanks a lot in advance!
import nengo
import numpy as np
import tensorflow as tf
import tracemalloc
import nengo_dl
seed = 0
np.random.seed(seed)
tf.random.set_seed(seed)
(train_images, train_labels), (
test_images,
test_labels,
) = tf.keras.datasets.mnist.load_data()
# flatten images and add time dimension
train_images = train_images.reshape((train_images.shape[0], 1, -1))
train_labels = train_labels.reshape((train_labels.shape[0], 1, -1))
test_images = test_images.reshape((test_images.shape[0], 1, -1))
test_labels = test_labels.reshape((test_labels.shape[0], 1, -1))
# input
inp = tf.keras.Input(shape=(28, 28, 1))
# convolutional layers
conv0 = tf.keras.layers.Conv2D(
filters=32,
kernel_size=3,
activation=tf.nn.relu,
)(inp)
conv1 = tf.keras.layers.Conv2D(
filters=64,
kernel_size=3,
strides=2,
activation=tf.nn.relu,
)(conv0)
# fully connected layer
flatten = tf.keras.layers.Flatten()(conv1)
dense = tf.keras.layers.Dense(units=10)(flatten)
model = tf.keras.Model(inputs=inp, outputs=dense)
def train():
converter = nengo_dl.Converter(model)
with nengo_dl.Simulator(converter.net, minibatch_size=200) as sim:
# run training
sim.compile(
optimizer=tf.optimizers.Adam(0.001),
loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[tf.metrics.sparse_categorical_accuracy],
)
sim.fit(
{converter.inputs[inp]: train_images},
{converter.outputs[dense]: train_labels},
validation_data=(
{converter.inputs[inp]: test_images},
{converter.outputs[dense]: test_labels},
),
epochs=2,
)
# save the parameters to file
sim.save_params("./keras_to_snn_params")
def run_network(
activation,
params_file="keras_to_snn_params",
n_steps=30,
scale_firing_rates=1,
synapse=None,
n_test=400,
):
tracemalloc.start()
# convert the keras model to a nengo network
nengo_converter = nengo_dl.Converter(
model,
swap_activations={tf.nn.relu: activation},
scale_firing_rates=scale_firing_rates,
synapse=synapse,
)
# get input/output objects
nengo_input = nengo_converter.inputs[inp]
nengo_output = nengo_converter.outputs[dense]
# add a probe to the first convolutional layer to record activity.
# we'll only record from a subset of neurons, to save memory.
sample_neurons = np.linspace(
0,
np.prod(conv0.shape[1:]),
1000,
endpoint=False,
dtype=np.int32,
)
# repeat inputs for some number of timesteps
tiled_test_images = np.tile(test_images[:n_test], (1, n_steps, 1))
# set some options to speed up simulation
with nengo_converter.net:
nengo_dl.configure_settings(stateful=False)
# build network, load in trained weights, run inference on test images
with nengo_dl.Simulator(
nengo_converter.net, minibatch_size=10, progress_bar=False
) as nengo_sim:
nengo_sim.load_params(params_file)
s1 = tracemalloc.take_snapshot()
data = nengo_sim.predict({nengo_input: tiled_test_images})
s2 = tracemalloc.take_snapshot()
top_stats = s2.compare_to(s1, 'lineno')
print("Memory:")
for stat in top_stats[:10]:
print(stat)
# compute accuracy on test data, using output of network on
# last timestep
predictions = np.argmax(data[nengo_output][:, -1], axis=-1)
accuracy = (predictions == test_labels[:n_test, 0, 0]).mean()
print(f"Test accuracy: {100 * accuracy:.2f}%")
train()
run_network(
activation=nengo.SpikingRectifiedLinear(),
n_steps=10,
synapse=0.01,
)
run_network(
activation=nengo.SpikingRectifiedLinear(),
n_steps=500,
synapse=0.01,
)