One of the underlying data structures of Tensorflow (protocol buffers) are capped at 2GB. On certain large networks, this makes nengo_dl.Simulator fail where nengo.Simulator and nengo_ocl.Simulator succeed.
This issue and some workarounds are documented
For a nengo frontend user, it is hard to implement those workarounds fairly deep within the backend. Am I missing some config option to allow the use of >2GB connections? If so, it might benefit from some documentation in the nengo or at least nengo_dl docs. If not, perhaps that option should exist, or perhaps nengo_dl could catch this exception and rectify it at runtime.
Input: (you will need >10GB RAM to make it to the exception in question)
import numpy as np
import nengo
import nengo_dl
transform = np.random.normal(size=(3*10**4, 3*10**4)) # Fails
# transform = np.zeros((3*10**4, 3*10**4)) # Works
print(transform.nbytes / 1e9, 'GB') # 7.2 GB
with nengo.Network() as net:
ens = nengo.Ensemble(transform.shape[0], 1)
nengo.Connection(ens.neurons, ens.neurons, transform=transform)
sim = nengo_dl.Simulator(net)
Output:
7.2 GB
Build finished in 0:00:01
Optimization finished in 0:00:00
| # Constructing graph | 0:00:00
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
464 try:
--> 465 values = ops.convert_to_tensor(
466 values,
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
1340 if ret is None:
-> 1341 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1342
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/tensor_conversion_registry.py in _default_conversion_function(***failed resolving arguments***)
51 del as_ref # Unused.
---> 52 return constant_op.constant(value, dtype, name=name)
53
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
260 """
--> 261 return _constant_impl(value, dtype, shape, name, verify_shape=False,
262 allow_broadcast=True)
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
297 tensor_value.tensor.CopyFrom(
--> 298 tensor_util.make_tensor_proto(
299 value, dtype=dtype, shape=shape, verify_shape=verify_shape,
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
520 if nparray.size * nparray.itemsize >= (1 << 31):
--> 521 raise ValueError(
522 "Cannot create a tensor proto whose content is larger than 2GB.")
ValueError: Cannot create a tensor proto whose content is larger than 2GB.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
482 try:
--> 483 observed = ops.convert_to_tensor(
484 values, as_ref=input_arg.is_ref).dtype.name
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
1340 if ret is None:
-> 1341 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1342
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/tensor_conversion_registry.py in _default_conversion_function(***failed resolving arguments***)
51 del as_ref # Unused.
---> 52 return constant_op.constant(value, dtype, name=name)
53
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
260 """
--> 261 return _constant_impl(value, dtype, shape, name, verify_shape=False,
262 allow_broadcast=True)
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
297 tensor_value.tensor.CopyFrom(
--> 298 tensor_util.make_tensor_proto(
299 value, dtype=dtype, shape=shape, verify_shape=verify_shape,
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
520 if nparray.size * nparray.itemsize >= (1 << 31):
--> 521 raise ValueError(
522 "Cannot create a tensor proto whose content is larger than 2GB.")
ValueError: Cannot create a tensor proto whose content is larger than 2GB.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-1-6a480743f0ee> in <module>
11 ens = nengo.Ensemble(transform.shape[0], 1)
12 nengo.Connection(ens.neurons, ens.neurons, transform=transform)
---> 13 sim = nengo_dl.Simulator(net)
~/.local/lib/python3.8/site-packages/nengo_dl/simulator.py in __init__(self, network, dt, seed, model, device, unroll_simulation, minibatch_size, progress_bar)
533 ) as progress:
534 self.graph = tf.Graph()
--> 535 self._build_keras(progress)
536
537 # initialize sim attributes
~/.local/lib/python3.8/site-packages/nengo/utils/magic.py in __call__(self, *args, **kwargs)
179 return self.wrapper(wrapped, instance, args, kwargs)
180 else:
--> 181 return self.wrapper(self.__wrapped__, self.instance, args, kwargs)
182 else:
183 instance = getattr(self.__wrapped__, "__self__", None)
~/.local/lib/python3.8/site-packages/nengo_dl/simulator.py in with_self(wrapped, instance, args, kwargs)
56 instance.tensor_graph.device
57 ):
---> 58 output = wrapped(*args, **kwargs)
59 tf.keras.backend.set_floatx(keras_dtype)
60
~/.local/lib/python3.8/site-packages/nengo_dl/simulator.py in _build_keras(self, progress)
558 inputs = list(self.node_inputs.values()) + [n_steps]
559
--> 560 outputs = self.tensor_graph(
561 inputs,
562 stateful=self.stateful,
~/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
895 # Build layer if applicable (if the `build` method has been
896 # overridden).
--> 897 self._maybe_build(inputs)
898 cast_inputs = self._maybe_cast_inputs(inputs)
899
~/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py in _maybe_build(self, inputs)
2414 # operations.
2415 with tf_utils.maybe_init_scope(self):
-> 2416 self.build(input_shapes) # pylint:disable=not-callable
2417 # We must set also ensure that the layer is marked as built, and the build
2418 # shape is stored since user defined build functions may not be calling
~/.local/lib/python3.8/site-packages/nengo_dl/tensor_graph.py in build(self, input_shape)
259 for sig_type in ("trainable", "non_trainable"):
260 for k, v in self.base_arrays_init[sig_type].items():
--> 261 initializer, shape, dtype = get_initializer(v)
262 assert initializer is not None # params should never be set
263 self.base_params[k] = self.add_weight(
~/.local/lib/python3.8/site-packages/nengo_dl/tensor_graph.py in get_initializer(init_vals)
237 else:
238 val = tf.concat(
--> 239 [
240 tf.zeros(s, dtype)
241 if v is None
~/.local/lib/python3.8/site-packages/nengo_dl/tensor_graph.py in <listcomp>(.0)
240 tf.zeros(s, dtype)
241 if v is None
--> 242 else tf.cast(tf.broadcast_to(v, s), dtype)
243 for v, s in zip(values, shapes)
244 ],
~/.local/lib/python3.8/site-packages/tensorflow/python/ops/gen_array_ops.py in broadcast_to(input, shape, name)
827 # Add nodes to the TensorFlow graph.
828 try:
--> 829 _, _, _op, _outputs = _op_def_library._apply_op_helper(
830 "BroadcastTo", input=input, shape=shape, name=name)
831 except (TypeError, ValueError):
~/.local/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
484 values, as_ref=input_arg.is_ref).dtype.name
485 except ValueError as err:
--> 486 raise ValueError(
487 "Tried to convert '%s' to a tensor and failed. Error: %s" %
488 (input_name, err))
ValueError: Tried to convert 'input' to a tensor and failed. Error: Cannot create a tensor proto whose content is larger than 2GB.