pymllm.configs.server_config ============================ .. py:module:: pymllm.configs.server_config Classes ------- .. autoapisummary:: pymllm.configs.server_config.ServerConfig Module Contents --------------- .. py:class:: ServerConfig Centralized runtime configuration for the MLLM server. .. py:attribute:: model_path :type: Optional[pathlib.Path] :value: None .. py:attribute:: tokenizer_path :type: Optional[pathlib.Path] :value: None .. py:attribute:: tokenizer_mode :type: Literal['auto', 'slow', 'fast'] :value: 'auto' .. py:attribute:: load_format :type: Literal['auto', 'safetensors'] :value: 'auto' .. py:attribute:: trust_remote_code :type: bool :value: False .. py:attribute:: download_dir :type: Optional[pathlib.Path] :value: None .. py:attribute:: context_length :type: Optional[int] :value: None .. py:attribute:: dtype :type: Literal['auto', 'float16', 'bfloat16', 'float32'] :value: 'auto' .. py:attribute:: host :type: str :value: '127.0.0.1' .. py:attribute:: port :type: int :value: 30000 .. py:attribute:: fastapi_root_path :type: str :value: '' .. py:attribute:: api_key :type: Optional[str] :value: None .. py:attribute:: admin_api_key :type: Optional[str] :value: None .. py:attribute:: served_model_name :type: Optional[str] :value: None .. py:attribute:: file_storage_path :type: pathlib.Path .. py:attribute:: cors_allow_origins :type: list[str] :value: ['*'] .. py:attribute:: mem_fraction_static :type: Optional[float] :value: None .. py:attribute:: max_running_requests :type: Optional[int] :value: 1 .. py:attribute:: max_queued_requests :type: Optional[int] :value: None .. py:attribute:: max_total_tokens :type: Optional[int] :value: None .. py:attribute:: chunked_prefill_size :type: Optional[int] :value: None .. py:attribute:: max_prefill_tokens :type: Optional[int] :value: None .. py:attribute:: schedule_policy :type: Literal['auto', 'fcfs'] :value: 'fcfs' .. py:attribute:: schedule_conservativeness :type: float :value: 1.0 .. py:attribute:: sleep_on_idle :type: bool :value: False .. py:attribute:: stream_interval :type: int :value: 1 .. py:attribute:: stream_output :type: bool :value: True .. py:attribute:: base_gpu_id :type: int :value: 0 .. py:attribute:: attention_backend :type: Literal['auto', 'flashinfer'] :value: 'auto' .. py:attribute:: gdn_decode_backend :type: Literal['auto', 'flashinfer', 'mllm_kernel', 'pytorch'] :value: 'auto' .. py:attribute:: sampling_backend :type: Optional[str] :value: None .. py:attribute:: disable_cuda_graph :type: bool :value: False .. py:attribute:: enable_torch_compile :type: bool :value: False .. py:attribute:: torch_compile_max_bs :type: int :value: 32 .. py:attribute:: random_seed :type: Optional[int] :value: 42 .. py:attribute:: reasoning_parser :type: Optional[str] :value: None .. py:attribute:: tool_call_parser :type: Optional[str] :value: None .. py:attribute:: log_level :type: Literal['debug', 'info', 'warning', 'error', 'critical'] :value: 'info' .. py:attribute:: enable_metrics :type: bool :value: False .. py:attribute:: show_time_cost :type: bool :value: False .. py:attribute:: decode_log_interval :type: int :value: 40 .. py:attribute:: enable_shared_queue :type: bool :value: False .. py:attribute:: disable_radix_cache :type: bool :value: False .. py:attribute:: radix_cache_page_size :type: int :value: 1 .. py:attribute:: enable_mamba_cache :type: bool :value: False .. py:attribute:: tensor_transport_mode :type: str :value: 'default' .. py:attribute:: cuda_ipc_pool_size_mb :type: int :value: 512 .. py:attribute:: cuda_ipc_recycle_interval :type: float :value: 0.1 .. py:attribute:: extra_options :type: dict[str, Any] .. py:method:: __post_init__()