You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
1. I have searched related issues but cannot get the expected help.
2. The bug has not been fixed in the latest version.
3. Please note that if the bug-related issue you submitted lacks corresponding environment info and a minimal reproducible demo, it will be challenging for us to reproduce and resolve the issue, reducing the likelihood of receiving feedback.
Describe the bug
/usr/local/python3.10.5/lib/python3.10/site-packages/torch_npu/utils/storage.py:38: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
if self.device.type != 'cpu':
/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py:40: UserWarning: AutoNonVariableTypeMode is deprecated and will be removed in 1.10 release. For kernel implementations please use AutoDispatchBelowADInplaceOrView instead, If you are looking for a user facing API to enable running your inference-only workload, please use c10::InferenceMode. Using AutoDispatchBelowADInplaceOrView in user code is under risk of producing silent wrong result in some edge cases. See Note [AutoDispatchBelowAutograd] for more details. (Triggered internally at build/CMakeFiles/torch_npu.dir/compiler_depend.ts:74.)
tmp[is_boi_eoi] = LANGUAGE_TOKEN_TYPE
2024-11-26 17:25:31,408 - lmdeploy - ERROR - request.py:21 - Engine loop failed with error: backend='atbgraph' raised:
RuntimeError: val
While executing %apply_rotary_pos_emb_default : [num_users=2] = call_function[target=torch.ops.lmdeploy.apply_rotary_pos_emb.default](args = (%_unsafe_view_default, %_unsafe_view_default_1, %view_default_1, %view_default_2, None, None), kwargs = {})
Original traceback:
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 504, in forward
hidden_states = self.transformer(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 466, in forward
hidden_states = self.encoder(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 375, in forward
hidden_states, residual = layer(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 317, in forward
layernorm_input = self.self_attention(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 186, in forward
q_rope, k_rope = self.apply_rotary_pos_emb(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/nn/rotary_embedding.py", line 127, in forward
return self.impl.forward(query, key, cos, sin, inplace)
File "/opt/lmdeploy/lmdeploy/pytorch/backends/dlinfer/apply_rotary_emb.py", line 26, in forward
return apply_rotary_pos_emb(query, key, cos, sin, q_embed, k_embed)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/custom_op.py", line 72, in patched_func
return torch_ops_func_with_default(*args, **kwargs)
Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
You can suppress this exception and fall back to eager by setting:
import torch._dynamo
torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 17, in _raise_exception_on_finish
task.result()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 963, in async_loop
await self._async_loop()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 957, in _async_loop
await __step()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 945, in __step
raise e
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 939, in __step
raise out
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 873, in _async_loop_background
await self._async_step_background(
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 755, in _async_step_background
output = await self._async_model_forward(
File "/opt/lmdeploy/lmdeploy/utils.py", line 241, in __tmp
return (await func(*args, **kwargs))
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 646, in _async_model_forward
ret = await __forward(inputs)
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 624, in __forward
return await self.model_agent.async_forward(
File "/opt/lmdeploy/lmdeploy/pytorch/engine/model_agent.py", line 303, in async_forward
output = self._forward_impl(inputs,
File "/opt/lmdeploy/lmdeploy/pytorch/engine/model_agent.py", line 270, in _forward_impl
output = model_forward(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/engine/model_agent.py", line 153, in model_forward
output = model(**input_dict)
File "/opt/lmdeploy/lmdeploy/pytorch/backends/graph_runner.py", line 25, in call
return self.model(**kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 451, in _fn
return fn(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 921, in catch_errors
return callback(frame, cache_entry, hooks, frame_state, skip=1)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 400, in _convert_frame_assert
return _compile(
File "/usr/local/python3.10.5/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 676, in _compile
guarded_code = compile_inner(code, one_graph, hooks, transform)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 262, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 535, in compile_inner
out_code = transform_code_object(code, transform)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/bytecode_transformation.py", line 1036, in transform_code_object
transformations(instructions, code_options)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 165, in _fn
return fn(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 500, in transform
tracer.run()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 2149, in run
super().run()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 810, in run
and self.step()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 773, in step
getattr(self, inst.opname)(inst)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 2268, in RETURN_VALUE
self.output.compile_subgraph(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 981, in compile_subgraph
self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
File "/usr/local/python3.10.5/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 1178, in compile_and_call_fx_graph
compiled_fn = self.call_user_compiler(gm)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 262, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/dynamo/output_graph.py", line 1251, in call_user_compiler
raise BackendCompilerFailed(self.compiler_fn, e).with_traceback(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/dynamo/output_graph.py", line 1232, in call_user_compiler
compiled_fn = compiler_fn(gm, self.example_inputs())
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/dynamo/repro/after_dynamo.py", line 117, in debug_wrapper
compiled_gm = compiler_fn(gm, example_inputs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/dynamo/repro/after_dynamo.py", line 117, in debug_wrapper
compiled_gm = compiler_fn(gm, example_inputs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/init.py", line 1770, in call
return self.compiler_fn(model, inputs, **self.kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/vendor/AtbGraph/init.py", line 7, in atbgraph
return compile_fx(gm, fake_input_tensor, "atbgraph")
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/compile_fx.py", line 103, in compile_fx
return compile_fx_210(model, example_inputs, backend, inner_compile)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/compile_fx.py", line 255, in compile_fx_210
return aot_autograd(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/backends/common.py", line 58, in compiler_fn
cg = aot_module_simplified(gm, example_inputs, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 903, in aot_module_simplified
compiled_fn = create_aot_dispatcher_function(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 262, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 628, in create_aot_dispatcher_function
compiled_fn = compiler_fn(flat_fn, fake_flat_args, aot_config, fw_metadata=fw_metadata)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 443, in aot_wrapper_dedupe
return compiler_fn(flat_fn, leaf_flat_args, aot_config, fw_metadata=fw_metadata)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 648, in aot_wrapper_synthetic_base
return compiler_fn(flat_fn, flat_args, aot_config, fw_metadata=fw_metadata)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py", line 119, in aot_dispatch_base
compiled_fw = compiler(fw_module, updated_flat_args)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 262, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/compile_fx.py", line 217, in fw_compiler_base
return inner_compile(
File "/usr/local/python3.10.5/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/compile_fx.py", line 82, in compile_fx_inner
gt.transform()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/graph.py", line 58, in transform
self.gm = self.backend_opset_transform(self.gm)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/vendor/AtbGraph/opset_convert.py", line 65, in atbgraph_opset_convert
gm = AtenToAtbTransformer(gm).transform()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/fx/interpreter.py", line 507, in transform
result = super().run(enable_io_processing=False)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/fx/interpreter.py", line 153, in run
raise RuntimeError(*e.args) from e
torch._dynamo.exc.BackendCompilerFailed: backend='atbgraph' raised:
RuntimeError: val
While executing %apply_rotary_pos_emb_default : [num_users=2] = call_function[target=torch.ops.lmdeploy.apply_rotary_pos_emb.default](args = (%_unsafe_view_default, %_unsafe_view_default_1, %view_default_1, %view_default_2, None, None), kwargs = {})
Original traceback:
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 504, in forward
hidden_states = self.transformer(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 466, in forward
hidden_states = self.encoder(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 375, in forward
hidden_states, residual = layer(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 317, in forward
layernorm_input = self.self_attention(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 186, in forward
q_rope, k_rope = self.apply_rotary_pos_emb(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/nn/rotary_embedding.py", line 127, in forward
return self.impl.forward(query, key, cos, sin, inplace)
File "/opt/lmdeploy/lmdeploy/pytorch/backends/dlinfer/apply_rotary_emb.py", line 26, in forward
return apply_rotary_pos_emb(query, key, cos, sin, q_embed, k_embed)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/custom_op.py", line 72, in patched_func
return torch_ops_func_with_default(*args, **kwargs)
Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
You can suppress this exception and fall back to eager by setting:
import torch._dynamo
torch._dynamo.config.suppress_errors = True
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/usr/local/python3.10.5/lib/python3.10/asyncio/queues.py", line 159, in get
await getter
asyncio.exceptions.CancelledError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/python3.10.5/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
return fut.result()
asyncio.exceptions.CancelledError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 171, in __no_threadsafe_get
return await asyncio.wait_for(self.resp_que.get(), timeout)
File "/usr/local/python3.10.5/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/python3.10.5/lib/python3.10/site-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/usr/local/python3.10.5/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in call
return await self.app(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/applications.py", line 113, in call
await self.middleware_stack(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/middleware/errors.py", line 165, in call
await self.app(scope, receive, _send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in call
await self.app(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 62, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 715, in call
await self.middleware_stack(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 735, in app
await route.handle(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 288, in handle
await self.app(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 73, in app
response = await f(request)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/fastapi/routing.py", line 301, in app
raw_response = await run_endpoint_function(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
return await dependant.call(**values)
File "/opt/lmdeploy/lmdeploy/serve/openai/api_server.py", line 483, in chat_completions_v1
async for res in result_generator:
File "/opt/lmdeploy/lmdeploy/serve/async_engine.py", line 576, in generate
async for outputs in generator.async_stream_infer(
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine_instance.py", line 178, in async_stream_infer
resp = await self.req_sender.async_recv(req_id)
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 312, in async_recv
resp: Response = await self._async_resp_get()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 187, in _async_resp_get
return await __no_threadsafe_get()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 175, in __no_threadsafe_get
exit(1)
File "/usr/local/python3.10.5/lib/python3.10/_sitebuiltins.py", line 26, in call
raise SystemExit(code)
SystemExit: 1
INFO: 10.161.63.218:40718 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
Checklist
Describe the bug
/usr/local/python3.10.5/lib/python3.10/site-packages/torch_npu/utils/storage.py:38: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
if self.device.type != 'cpu':
/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py:40: UserWarning: AutoNonVariableTypeMode is deprecated and will be removed in 1.10 release. For kernel implementations please use AutoDispatchBelowADInplaceOrView instead, If you are looking for a user facing API to enable running your inference-only workload, please use c10::InferenceMode. Using AutoDispatchBelowADInplaceOrView in user code is under risk of producing silent wrong result in some edge cases. See Note [AutoDispatchBelowAutograd] for more details. (Triggered internally at build/CMakeFiles/torch_npu.dir/compiler_depend.ts:74.)
tmp[is_boi_eoi] = LANGUAGE_TOKEN_TYPE
2024-11-26 17:25:31,408 - lmdeploy - ERROR - request.py:21 - Engine loop failed with error: backend='atbgraph' raised:
RuntimeError: val
While executing %apply_rotary_pos_emb_default : [num_users=2] = call_function[target=torch.ops.lmdeploy.apply_rotary_pos_emb.default](args = (%_unsafe_view_default, %_unsafe_view_default_1, %view_default_1, %view_default_2, None, None), kwargs = {})
Original traceback:
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 504, in forward
hidden_states = self.transformer(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 466, in forward
hidden_states = self.encoder(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 375, in forward
hidden_states, residual = layer(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 317, in forward
layernorm_input = self.self_attention(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 186, in forward
q_rope, k_rope = self.apply_rotary_pos_emb(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/nn/rotary_embedding.py", line 127, in forward
return self.impl.forward(query, key, cos, sin, inplace)
File "/opt/lmdeploy/lmdeploy/pytorch/backends/dlinfer/apply_rotary_emb.py", line 26, in forward
return apply_rotary_pos_emb(query, key, cos, sin, q_embed, k_embed)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/custom_op.py", line 72, in patched_func
return torch_ops_func_with_default(*args, **kwargs)
Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
You can suppress this exception and fall back to eager by setting:
import torch._dynamo
torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 17, in _raise_exception_on_finish
task.result()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 963, in async_loop
await self._async_loop()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 957, in _async_loop
await __step()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 945, in __step
raise e
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 939, in __step
raise out
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 873, in _async_loop_background
await self._async_step_background(
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 755, in _async_step_background
output = await self._async_model_forward(
File "/opt/lmdeploy/lmdeploy/utils.py", line 241, in __tmp
return (await func(*args, **kwargs))
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 646, in _async_model_forward
ret = await __forward(inputs)
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine.py", line 624, in __forward
return await self.model_agent.async_forward(
File "/opt/lmdeploy/lmdeploy/pytorch/engine/model_agent.py", line 303, in async_forward
output = self._forward_impl(inputs,
File "/opt/lmdeploy/lmdeploy/pytorch/engine/model_agent.py", line 270, in _forward_impl
output = model_forward(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/engine/model_agent.py", line 153, in model_forward
output = model(**input_dict)
File "/opt/lmdeploy/lmdeploy/pytorch/backends/graph_runner.py", line 25, in call
return self.model(**kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 451, in _fn
return fn(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 921, in catch_errors
return callback(frame, cache_entry, hooks, frame_state, skip=1)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 400, in _convert_frame_assert
return _compile(
File "/usr/local/python3.10.5/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 676, in _compile
guarded_code = compile_inner(code, one_graph, hooks, transform)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 262, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 535, in compile_inner
out_code = transform_code_object(code, transform)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/bytecode_transformation.py", line 1036, in transform_code_object
transformations(instructions, code_options)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 165, in _fn
return fn(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 500, in transform
tracer.run()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 2149, in run
super().run()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 810, in run
and self.step()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 773, in step
getattr(self, inst.opname)(inst)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/symbolic_convert.py", line 2268, in RETURN_VALUE
self.output.compile_subgraph(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 981, in compile_subgraph
self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
File "/usr/local/python3.10.5/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/output_graph.py", line 1178, in compile_and_call_fx_graph
compiled_fn = self.call_user_compiler(gm)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 262, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/dynamo/output_graph.py", line 1251, in call_user_compiler
raise BackendCompilerFailed(self.compiler_fn, e).with_traceback(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/dynamo/output_graph.py", line 1232, in call_user_compiler
compiled_fn = compiler_fn(gm, self.example_inputs())
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/dynamo/repro/after_dynamo.py", line 117, in debug_wrapper
compiled_gm = compiler_fn(gm, example_inputs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/dynamo/repro/after_dynamo.py", line 117, in debug_wrapper
compiled_gm = compiler_fn(gm, example_inputs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/init.py", line 1770, in call
return self.compiler_fn(model, inputs, **self.kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/vendor/AtbGraph/init.py", line 7, in atbgraph
return compile_fx(gm, fake_input_tensor, "atbgraph")
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/compile_fx.py", line 103, in compile_fx
return compile_fx_210(model, example_inputs, backend, inner_compile)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/compile_fx.py", line 255, in compile_fx_210
return aot_autograd(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/backends/common.py", line 58, in compiler_fn
cg = aot_module_simplified(gm, example_inputs, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 903, in aot_module_simplified
compiled_fn = create_aot_dispatcher_function(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 262, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 628, in create_aot_dispatcher_function
compiled_fn = compiler_fn(flat_fn, fake_flat_args, aot_config, fw_metadata=fw_metadata)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 443, in aot_wrapper_dedupe
return compiler_fn(flat_fn, leaf_flat_args, aot_config, fw_metadata=fw_metadata)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 648, in aot_wrapper_synthetic_base
return compiler_fn(flat_fn, flat_args, aot_config, fw_metadata=fw_metadata)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py", line 119, in aot_dispatch_base
compiled_fw = compiler(fw_module, updated_flat_args)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 262, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/compile_fx.py", line 217, in fw_compiler_base
return inner_compile(
File "/usr/local/python3.10.5/lib/python3.10/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/compile_fx.py", line 82, in compile_fx_inner
gt.transform()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/dynamo_bridge/graph.py", line 58, in transform
self.gm = self.backend_opset_transform(self.gm)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/dicp/vendor/AtbGraph/opset_convert.py", line 65, in atbgraph_opset_convert
gm = AtenToAtbTransformer(gm).transform()
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/fx/interpreter.py", line 507, in transform
result = super().run(enable_io_processing=False)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/fx/interpreter.py", line 153, in run
raise RuntimeError(*e.args) from e
torch._dynamo.exc.BackendCompilerFailed: backend='atbgraph' raised:
RuntimeError: val
While executing %apply_rotary_pos_emb_default : [num_users=2] = call_function[target=torch.ops.lmdeploy.apply_rotary_pos_emb.default](args = (%_unsafe_view_default, %_unsafe_view_default_1, %view_default_1, %view_default_2, None, None), kwargs = {})
Original traceback:
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 504, in forward
hidden_states = self.transformer(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 466, in forward
hidden_states = self.encoder(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 375, in forward
hidden_states, residual = layer(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 317, in forward
layernorm_input = self.self_attention(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/models/chatglm2.py", line 186, in forward
q_rope, k_rope = self.apply_rotary_pos_emb(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/opt/lmdeploy/lmdeploy/pytorch/nn/rotary_embedding.py", line 127, in forward
return self.impl.forward(query, key, cos, sin, inplace)
File "/opt/lmdeploy/lmdeploy/pytorch/backends/dlinfer/apply_rotary_emb.py", line 26, in forward
return apply_rotary_pos_emb(query, key, cos, sin, q_embed, k_embed)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/dlinfer/graph/custom_op.py", line 72, in patched_func
return torch_ops_func_with_default(*args, **kwargs)
Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
You can suppress this exception and fall back to eager by setting:
import torch._dynamo
torch._dynamo.config.suppress_errors = True
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/usr/local/python3.10.5/lib/python3.10/asyncio/queues.py", line 159, in get
await getter
asyncio.exceptions.CancelledError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/python3.10.5/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
return fut.result()
asyncio.exceptions.CancelledError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 171, in __no_threadsafe_get
return await asyncio.wait_for(self.resp_que.get(), timeout)
File "/usr/local/python3.10.5/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/python3.10.5/lib/python3.10/site-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/usr/local/python3.10.5/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in call
return await self.app(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/applications.py", line 113, in call
await self.middleware_stack(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/middleware/errors.py", line 165, in call
await self.app(scope, receive, _send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in call
await self.app(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 62, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 715, in call
await self.middleware_stack(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 735, in app
await route.handle(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 288, in handle
await self.app(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/starlette/routing.py", line 73, in app
response = await f(request)
File "/usr/local/python3.10.5/lib/python3.10/site-packages/fastapi/routing.py", line 301, in app
raw_response = await run_endpoint_function(
File "/usr/local/python3.10.5/lib/python3.10/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
return await dependant.call(**values)
File "/opt/lmdeploy/lmdeploy/serve/openai/api_server.py", line 483, in chat_completions_v1
async for res in result_generator:
File "/opt/lmdeploy/lmdeploy/serve/async_engine.py", line 576, in generate
async for outputs in generator.async_stream_infer(
File "/opt/lmdeploy/lmdeploy/pytorch/engine/engine_instance.py", line 178, in async_stream_infer
resp = await self.req_sender.async_recv(req_id)
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 312, in async_recv
resp: Response = await self._async_resp_get()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 187, in _async_resp_get
return await __no_threadsafe_get()
File "/opt/lmdeploy/lmdeploy/pytorch/engine/request.py", line 175, in __no_threadsafe_get
exit(1)
File "/usr/local/python3.10.5/lib/python3.10/_sitebuiltins.py", line 26, in call
raise SystemExit(code)
SystemExit: 1
INFO: 10.161.63.218:40718 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
Reproduction
ASCEND_RT_VISIBLE_DEVICES=2 lmdeploy serve api_server
/home/nfs/appnfs/xxx/pre_models/ZhipuAI/glm-4v-9b
--backend pytorch
--chat-template /opt/lmdeploy/chat_template/glm-4v-9b.json
--model-name glm-4v-9b
--device ascend
--server-name 0.0.0.0
--server-port 50055
curl "http://10.161.63.218:50055/v1/chat/completions"
-H "Content-Type: application/json"
-d '{
"model": "glm-4v-9b",
"messages": [
{
"role": "user",
"content": [
{ "type": "text", "text": "描述这张图片"},
{ "type": "image_url", "image_url": { "url": "/home/nfs/appnfs/xxx/img-test/cat.png"}}
]
}
],
"max_tokens": 128
}'
Environment
Error traceback
No response
The text was updated successfully, but these errors were encountered: