Skip to content

vllm.distributed.eplb.policy.abstract

AbstractEplbPolicy

Bases: ABC

Source code in vllm/distributed/eplb/policy/abstract.py
class AbstractEplbPolicy(ABC):
    @classmethod
    @abstractmethod
    def rebalance_experts(
        cls,
        weight: torch.Tensor,
        num_replicas: int,
        num_groups: int,
        num_nodes: int,
        num_ranks: int,
    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        Entry point for expert-parallelism load balancer.

        Parameters:
            weight: [layers, num_logical_experts], the load statistics
                for all logical experts
            num_replicas: number of physical experts, must be a multiple of
                `num_ranks`
            num_groups: number of expert groups
            num_nodes: number of server nodes
            num_ranks: number of ranks, must be a multiple of `num_nodes`

        Returns:
            physical_to_logical_map: [layers, num_replicas], the expert
                index of each replica
            logical_to_physical_map: [layers, num_logical_experts, X],
                the replica indices for each expert
            expert_count: [layers, num_logical_experts], number of
                physical replicas for each logical expert
        """
        raise NotImplementedError

rebalance_experts abstractmethod classmethod

rebalance_experts(
    weight: Tensor,
    num_replicas: int,
    num_groups: int,
    num_nodes: int,
    num_ranks: int,
) -> tuple[Tensor, Tensor, Tensor]

Entry point for expert-parallelism load balancer.

Parameters:

Name Type Description Default
weight Tensor

[layers, num_logical_experts], the load statistics for all logical experts

required
num_replicas int

number of physical experts, must be a multiple of num_ranks

required
num_groups int

number of expert groups

required
num_nodes int

number of server nodes

required
num_ranks int

number of ranks, must be a multiple of num_nodes

required

Returns:

Name Type Description
physical_to_logical_map Tensor

[layers, num_replicas], the expert index of each replica

logical_to_physical_map Tensor

[layers, num_logical_experts, X], the replica indices for each expert

expert_count Tensor

[layers, num_logical_experts], number of physical replicas for each logical expert

Source code in vllm/distributed/eplb/policy/abstract.py
@classmethod
@abstractmethod
def rebalance_experts(
    cls,
    weight: torch.Tensor,
    num_replicas: int,
    num_groups: int,
    num_nodes: int,
    num_ranks: int,
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Entry point for expert-parallelism load balancer.

    Parameters:
        weight: [layers, num_logical_experts], the load statistics
            for all logical experts
        num_replicas: number of physical experts, must be a multiple of
            `num_ranks`
        num_groups: number of expert groups
        num_nodes: number of server nodes
        num_ranks: number of ranks, must be a multiple of `num_nodes`

    Returns:
        physical_to_logical_map: [layers, num_replicas], the expert
            index of each replica
        logical_to_physical_map: [layers, num_logical_experts, X],
            the replica indices for each expert
        expert_count: [layers, num_logical_experts], number of
            physical replicas for each logical expert
    """
    raise NotImplementedError