-
Notifications
You must be signed in to change notification settings - Fork 14
Open
Description
When I run CUDA_VISIBLE_DEVICES=0,1 torchrun --standalone --nnodes=1 --nproc-per-node=2 cli_demo_sat.py --from_pretrained cogcom-base-17b --local_tokenizer tokenizer --english --fp16 (--quant 4), and test certain pictures, 50% of them will lead to exception, so I use traceback to print it:
Traceback (most recent call last):
File "cli_demo_sat.py", line 116, in main
response, history, cache_image = chat(
File "cli_demo_sat.py", line 116, in main
response, history, cache_image = chat(
File "/CogCoM/cogcom/utils/chat.py", line 229, in chat
(output, turns_mems), turns_mems_mask = filling_sequence(
File "/CogCoM/cogcom/utils/chat.py", line 229, in chat
(output, turns_mems), turns_mems_mask = filling_sequence(
File "/CogCoM/cogcom/utils/chat.py", line 87, in filling_sequence
logits, *output_per_layers = model(
File "/CogCoM/cogcom/utils/chat.py", line 87, in filling_sequence
logits, *output_per_layers = model(
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/CogCoM/cogcom/models/cogcom_model.py", line 142, in forward
return super().forward(input_ids=input_ids, vision_expert_mask=vision_expert_mask, image_embed_mask=image_embed_mask, **kwargs)
File "/CogCoM/cogcom/models/cogcom_model.py", line 142, in forward
return super().forward(input_ids=input_ids, vision_expert_mask=vision_expert_mask, image_embed_mask=image_embed_mask, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/base_model.py", line 137, in forward
return self.transformer(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/base_model.py", line 137, in forward
return self.transformer(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 668, in forward
layer_ret = layer(*args, layer_id=torch.tensor(i), **kw_args, position_ids=position_ids, **output_cross_layer,
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 668, in forward
layer_ret = layer(*args, layer_id=torch.tensor(i), **kw_args, position_ids=position_ids, **output_cross_layer,
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 390, in forward
return HOOKS_DEFAULT['layer_forward'](self, hidden_states, mask, *args, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 390, in forward
return HOOKS_DEFAULT['layer_forward'](self, hidden_states, mask, *args, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 172, in layer_forward_default
attention_output = self.attention(attention_input, mask, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 172, in layer_forward_default
attention_output = self.attention(attention_input, mask, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 111, in forward
return self.hooks['attention_forward'](hidden_states, mask, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/model/transformer.py", line 111, in forward
return self.hooks['attention_forward'](hidden_states, mask, **kw_args)
File "/CogCoM/cogcom/models/mixin.py", line 256, in attention_forward
context_layer = attention_fn(query_layer, key_layer, value_layer, mask, dropout_fn, **kw_args)
File "/CogCoM/cogcom/models/mixin.py", line 256, in attention_forward
context_layer = attention_fn(query_layer, key_layer, value_layer, mask, dropout_fn, **kw_args)
File "/CogCoM/cogcom/models/com_memory.py", line 49, in attention_fn
return old_impl(q, k, v, mask, dropout_fn, cross_attention=cross_attention, mems=mems, **kw_args)
File "/CogCoM/cogcom/models/com_memory.py", line 49, in attention_fn
return old_impl(q, k, v, mask, dropout_fn, cross_attention=cross_attention, mems=mems, **kw_args)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 68, in attention_fn_default
return standard_attention(
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 68, in attention_fn_default
return standard_attention(
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 43, in standard_attention
context_layer = torch.matmul(attention_probs, value_layer)
File "/root/miniconda3/envs/cogcom/lib/python3.8/site-packages/sat/transformer_defaults.py", line 43, in standard_attention
context_layer = torch.matmul(attention_probs, value_layer)
RuntimeError: expected scalar type Half but found Float
RuntimeError: expected scalar type Half but found Float
I wonder what leads to this problem. Besides, sometimes whether I use quant 4 determines runtime error or not.
Is there any solution to this problem? Thanks a lot!
Metadata
Metadata
Assignees
Labels
No labels