pymllm.bench_one_batch
======================

.. py:module:: pymllm.bench_one_batch

.. autoapi-nested-parse::

   SGLang-style one-batch benchmark for pymllm.

   This module intentionally bypasses the HTTP server, tokenizer workers,
   scheduler, and detokenizer.  It drives :class:`pymllm.executor.ModelRunner`
   directly to measure one static prefill followed by token-by-token decode.


Attributes
----------

.. autoapisummary::

   pymllm.bench_one_batch.logger


Classes
-------

.. autoapisummary::

   pymllm.bench_one_batch.BenchSetting
   pymllm.bench_one_batch.BenchArgs
   pymllm.bench_one_batch.DecodeState
   pymllm.bench_one_batch.PymllmBenchRunner


Functions
---------

.. autoapisummary::

   pymllm.bench_one_batch.add_bench_args
   pymllm.bench_one_batch.make_parser
   pymllm.bench_one_batch.parse_args
   pymllm.bench_one_batch.generate_settings
   pymllm.bench_one_batch.make_synthetic_input_ids
   pymllm.bench_one_batch.summarize_latencies
   pymllm.bench_one_batch.make_profile_trace_path
   pymllm.bench_one_batch.run_single_setting
   pymllm.bench_one_batch.run_benchmark
   pymllm.bench_one_batch.main


Module Contents
---------------

.. py:data:: logger

.. py:class:: BenchSetting

   .. py:attribute:: batch_size
      :type:  int


   .. py:attribute:: input_len
      :type:  int


   .. py:attribute:: output_len
      :type:  int


.. py:class:: BenchArgs

   .. py:attribute:: run_name
      :type:  str
      :value: 'default'


   .. py:attribute:: batch_size
      :type:  list[int]
      :value: [1]


   .. py:attribute:: input_len
      :type:  list[int]
      :value: [256, 512, 1024]


   .. py:attribute:: output_len
      :type:  list[int]
      :value: [128]


   .. py:attribute:: result_filename
      :type:  pathlib.Path


   .. py:attribute:: log_decode_step
      :type:  int
      :value: 0


   .. py:attribute:: seed
      :type:  int
      :value: 42


   .. py:attribute:: profile
      :type:  bool
      :value: False


   .. py:attribute:: profile_record_shapes
      :type:  bool
      :value: False


   .. py:attribute:: profile_activities
      :type:  list[str]
      :value: ['CPU', 'GPU']


   .. py:attribute:: profile_stage
      :type:  str
      :value: 'all'


   .. py:attribute:: profile_filename_prefix
      :type:  str
      :value: 'pymllm_profile'


   .. py:attribute:: profile_start_step
      :type:  Optional[int]
      :value: None


   .. py:attribute:: profile_steps
      :type:  int
      :value: 1


   .. py:attribute:: skip_warmup
      :type:  bool
      :value: False


.. py:class:: DecodeState

   .. py:attribute:: req_pool_indices
      :type:  torch.Tensor


   .. py:attribute:: seq_lens
      :type:  torch.Tensor


   .. py:attribute:: mrope_position_deltas
      :type:  Optional[torch.Tensor]
      :value: None


.. py:function:: add_bench_args(parser)

.. py:function:: make_parser()

.. py:function:: parse_args(argv = None)

.. py:function:: generate_settings(args)

.. py:function:: make_synthetic_input_ids(*, batch_size, input_len, vocab_size, seed, device)

.. py:function:: summarize_latencies(*, setting, prefill_latency, decode_latencies, run_name, device, dtype, cuda_graph, extra = None)

.. py:function:: make_profile_trace_path(*, output_dir, prefix, run_name, setting, stage, step = None)

.. py:class:: PymllmBenchRunner(runner)

   .. py:attribute:: runner


   .. py:attribute:: device


   .. py:method:: create(cfg)
      :classmethod:


   .. py:method:: clear()


   .. py:method:: extend(input_ids)


   .. py:method:: decode(input_ids, state)


   .. py:method:: shutdown()


.. py:function:: run_single_setting(*, bench_runner, args, setting, seed, record_result)

.. py:function:: run_benchmark(cfg, args)

.. py:function:: main(argv = None)