You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm getting high memor usage (started ad 12 GB and the error occured at 18 GB):
I'm using intel tensorflow plugin on intel iris Xe GPU
ERROR:
ResourceExhaustedError Traceback (most recent call last)
Cell In[7], line 89
86 printlogcallback = tf.keras.callbacks.LambdaCallback(on_batch_end=printlog)
88 # treina o modelo
---> 89 History = fold_model.fit(
90 train_generator_fold,
91 batch_size = batch_size,
92 epochs = epochs,
93 callbacks=[printlogcallback],
94 validation_data = (val_generator_fold),
95 verbose = 1 # mostra a barra de progresso
96 )
98 # Suponha que 'model' é o seu modelo treinado
99 save_model(fold_model, f'./modelos_h5/{key}_fold{fold+1}_batches{batch_size}_epochs{epochs}.h5')
File ~\.conda\envs\directml\lib\site-packages\keras\utils\traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~\.conda\envs\directml\lib\site-packages\tensorflow\python\eager\execute.py:54, in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53 ctx.ensure_initialized()
---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
ResourceExhaustedError: Graph execution error:
Detected at node 'gradient_tape/model/block_1_pad/Slice_1' defined at (most recent call last):
File "C:\Users\leand\.conda\envs\directml\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\leand\.conda\envs\directml\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
app.launch_new_instance()
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
app.start()
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\ipykernel\kernelapp.py", line 711, in start
self.io_loop.start()
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
self.asyncio_loop.run_forever()
File "C:\Users\leand\.conda\envs\directml\lib\asyncio\base_events.py", line 603, in run_forever
self._run_once()
File "C:\Users\leand\.conda\envs\directml\lib\asyncio\base_events.py", line 1909, in _run_once
handle._run()
File "C:\Users\leand\.conda\envs\directml\lib\asyncio\events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
await self.process_one()
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
await dispatch(*args)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
await result
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\ipykernel\kernelbase.py", line 729, in execute_request
reply_content = await reply_content
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\ipykernel\ipkernel.py", line 411, in do_execute
res = shell.run_cell(
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\ipykernel\zmqshell.py", line 531, in run_cell
return super().run_cell(*args, **kwargs)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\IPython\core\interactiveshell.py", line 2945, in run_cell
result = self._run_cell(
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\IPython\core\interactiveshell.py", line 3000, in _run_cell
return runner(coro)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
coro.send(None)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\IPython\core\interactiveshell.py", line 3203, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\IPython\core\interactiveshell.py", line 3382, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\IPython\core\interactiveshell.py", line 3442, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "C:\Users\leand\AppData\Local\Temp\ipykernel_17272\2278945961.py", line 89, in <module>
History = fold_model.fit(
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\keras\engine\training.py", line 1564, in fit
tmp_logs = self.train_function(iterator)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\keras\engine\training.py", line 1160, in train_function
return step_function(self, iterator)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\keras\engine\training.py", line 1146, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\keras\engine\training.py", line 1135, in run_step
outputs = model.train_step(data)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\keras\engine\training.py", line 997, in train_step
self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
File "C:\Users\leand\.conda\envs\directml\lib\site-packages\keras\optimizers\optimizer_v1.py", line 872, in minimize
grads = tape.gradient(loss, var_list, grad_loss)
Node: 'gradient_tape/model/block_1_pad/Slice_1'
OOM when allocating tensor with shape[8,96,100,100] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator PluggableDevice_0_bfc
[[{{node gradient_tape/model/block_1_pad/Slice_1}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
[Op:__inference_train_function_20953]
MY MODEL:
# MODELO DE BASE
# https://keras.io/api/applications/
def model(modelo):
# modelo base
base_model = modelo(
include_top = False,
weights = "imagenet", # modelo pré-treinado para não utilizar pesos aleatórios
input_shape = (200, 200, 3) # 200W X 200H X 3 CANAIS
)
# NOVO MODELO A PARTIR DO MODELO DE BASE
n_category = 9 # number of categories
new_model = base_model.output
new_model = GlobalAveragePooling2D()(new_model)
new_model = Dropout(0.25)(new_model)
# camada de predição (saída)
prediction_layer = Dense(n_category, activation='softmax')(new_model) # 9 tipos de tomate
# acoplando as camadas de entrada e saída
new_model = Model(
inputs = base_model.input, # a entrada é com base no dataset
outputs = prediction_layer # a saída é com base no número de categorias
)
return new_model
The text was updated successfully, but these errors were encountered:
leo-smi
changed the title
high memory usage
high memory usage for Padam optimizer
Sep 5, 2023
I'm getting high memor usage (started ad 12 GB and the error occured at 18 GB):
I'm using intel tensorflow plugin on intel iris Xe GPU
ERROR:
MY MODEL:
The text was updated successfully, but these errors were encountered: