Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
choices=get_args(TaskOption),
help='The task to use the model for. Each vLLM instance only '
'supports one task, even if the same model can be used for '
'multiple tasks. When the model only supports one task, "auto" '
'multiple tasks. When the model only supports one task, ``"auto"`` '
'can be used to select it; otherwise, you must specify explicitly '
'which task to use.')
parser.add_argument(
Expand All @@ -250,7 +250,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument(
'--skip-tokenizer-init',
action='store_true',
help='Skip initialization of tokenizer and detokenizer')
help='Skip initialization of tokenizer and detokenizer.')
parser.add_argument(
'--revision',
type=nullable_str,
Expand Down Expand Up @@ -401,7 +401,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument(
'--worker-use-ray',
action='store_true',
help='Deprecated, use --distributed-executor-backend=ray.')
help='Deprecated, use ``--distributed-executor-backend=ray``.')
parser.add_argument('--pipeline-parallel-size',
'-pp',
type=int,
Expand Down Expand Up @@ -430,7 +430,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
choices=[8, 16, 32, 64, 128],
help='Token block size for contiguous chunks of '
'tokens. This is ignored on neuron devices and '
'set to max-model-len. On CUDA devices, '
'set to ``--max-model-len``. On CUDA devices, '
'only block sizes up to 32 are supported. '
'On HPU devices, block size defaults to 128.')

Expand All @@ -439,12 +439,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
action=argparse.BooleanOptionalAction,
default=EngineArgs.enable_prefix_caching,
help="Enables automatic prefix caching. "
"Use --no-enable-prefix-caching to disable explicitly.",
"Use ``--no-enable-prefix-caching`` to disable explicitly.",
)
parser.add_argument('--disable-sliding-window',
action='store_true',
help='Disables sliding window, '
'capping to sliding window size')
'capping to sliding window size.')
parser.add_argument('--use-v2-block-manager',
action='store_true',
default=True,
Expand Down Expand Up @@ -861,7 +861,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"of the provided names. The model name in the model "
"field of a response will be the first name in this "
"list. If not specified, the model name will be the "
"same as the `--model` argument. Noted that this name(s) "
"same as the ``--model`` argument. Noted that this name(s) "
"will also be used in `model_name` tag content of "
"prometheus metrics, if multiple names provided, metrics "
"tag will take the first one.")
Expand All @@ -881,7 +881,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
default=None,
help="Valid choices are " +
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
". It makes sense to set this only if --otlp-traces-endpoint is"
". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
" set. If set, it will collect detailed traces for the specified "
"modules. This involves use of possibly costly and or blocking "
"operations and hence might have a performance impact.")
Expand Down
60 changes: 30 additions & 30 deletions vllm/entrypoints/openai/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,29 +79,29 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument("--host",
type=nullable_str,
default=None,
help="host name")
parser.add_argument("--port", type=int, default=8000, help="port number")
help="Host name.")
parser.add_argument("--port", type=int, default=8000, help="Port number.")
parser.add_argument(
"--uvicorn-log-level",
type=str,
default="info",
choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
help="log level for uvicorn")
help="Log level for uvicorn.")
parser.add_argument("--allow-credentials",
action="store_true",
help="allow credentials")
help="Allow credentials.")
parser.add_argument("--allowed-origins",
type=json.loads,
default=["*"],
help="allowed origins")
help="Allowed origins.")
parser.add_argument("--allowed-methods",
type=json.loads,
default=["*"],
help="allowed methods")
help="Allowed methods.")
parser.add_argument("--allowed-headers",
type=json.loads,
default=["*"],
help="allowed headers")
help="Allowed headers.")
parser.add_argument("--api-key",
type=nullable_str,
default=None,
Expand All @@ -115,10 +115,10 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
action=LoRAParserAction,
help="LoRA module configurations in either 'name=path' format"
"or JSON format. "
"Example (old format): 'name=path' "
"Example (old format): ``'name=path'`` "
"Example (new format): "
"'{\"name\": \"name\", \"local_path\": \"path\", "
"\"base_model_name\": \"id\"}'")
"``{\"name\": \"name\", \"local_path\": \"path\", "
"\"base_model_name\": \"id\"}``")
parser.add_argument(
"--prompt-adapters",
type=nullable_str,
Expand All @@ -132,7 +132,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
default=None,
help="The file path to the chat template, "
"or the template in single-line form "
"for the specified model")
"for the specified model.")
parser.add_argument(
'--chat-template-content-format',
type=str,
Expand All @@ -141,38 +141,39 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help='The format to render message content within a chat template.'
'\n\n'
'* "string" will render the content as a string. '
'Example: "Hello World"\n'
'Example: ``"Hello World"``\n'
'* "openai" will render the content as a list of dictionaries, '
'similar to OpenAI schema. '
'Example: [{"type": "text", "text": "Hello world!"}]')
'Example: ``[{"type": "text", "text": "Hello world!"}]``')
parser.add_argument("--response-role",
type=nullable_str,
default="assistant",
help="The role name to return if "
"`request.add_generation_prompt=true`.")
"``request.add_generation_prompt=true``.")
parser.add_argument("--ssl-keyfile",
type=nullable_str,
default=None,
help="The file path to the SSL key file")
help="The file path to the SSL key file.")
parser.add_argument("--ssl-certfile",
type=nullable_str,
default=None,
help="The file path to the SSL cert file")
help="The file path to the SSL cert file.")
parser.add_argument("--ssl-ca-certs",
type=nullable_str,
default=None,
help="The CA certificates file")
help="The CA certificates file.")
parser.add_argument(
"--ssl-cert-reqs",
type=int,
default=int(ssl.CERT_NONE),
help="Whether client certificate is required (see stdlib ssl module's)"
help="Whether client certificate is required (see stdlib ssl module's)."
)
parser.add_argument(
"--root-path",
type=nullable_str,
default=None,
help="FastAPI root_path when app is behind a path based routing proxy")
help="FastAPI root_path when app is behind a path based routing proxy."
)
parser.add_argument(
"--middleware",
type=nullable_str,
Expand All @@ -182,15 +183,15 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"We accept multiple --middleware arguments. "
"The value should be an import path. "
"If a function is provided, vLLM will add it to the server "
"using @app.middleware('http'). "
"using ``@app.middleware('http')``. "
"If a class is provided, vLLM will add it to the server "
"using app.add_middleware(). ")
"using ``app.add_middleware()``. ")
parser.add_argument(
"--return-tokens-as-token-ids",
action="store_true",
help="When --max-logprobs is specified, represents single tokens as "
"strings of the form 'token_id:{token_id}' so that tokens that "
"are not JSON-encodable can be identified.")
help="When ``--max-logprobs`` is specified, represents single tokens "
" as strings of the form 'token_id:{token_id}' so that tokens "
"that are not JSON-encodable can be identified.")
parser.add_argument(
"--disable-frontend-multiprocessing",
action="store_true",
Expand All @@ -205,9 +206,8 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"--enable-auto-tool-choice",
action="store_true",
default=False,
help=
"Enable auto tool choice for supported models. Use --tool-call-parser"
" to specify which parser to use")
help="Enable auto tool choice for supported models. Use "
"``--tool-call-parser`` to specify which parser to use.")

valid_tool_parsers = ToolParserManager.tool_parsers.keys()
parser.add_argument(
Expand All @@ -219,7 +219,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help=
"Select the tool call parser depending on the model that you're using."
" This is used to parse the model-generated tool call into OpenAI API "
"format. Required for --enable-auto-tool-choice.")
"format. Required for ``--enable-auto-tool-choice``.")

parser.add_argument(
"--tool-parser-plugin",
Expand All @@ -228,7 +228,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help=
"Special the tool parser plugin write to parse the model-generated tool"
" into OpenAI API format, the name register in this plugin can be used "
"in --tool-call-parser.")
"in ``--tool-call-parser``.")

parser = AsyncEngineArgs.add_cli_args(parser)

Expand All @@ -243,7 +243,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"--disable-fastapi-docs",
action='store_true',
default=False,
help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint"
help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."
)
parser.add_argument(
"--enable-prompt-tokens-details",
Expand Down
Loading