modules.hl.hl_utils

hl_utils ¶

Classes:

Name	Description
`HL`

Classes¶

HL ¶

HL(models: Module | Iterable[Dict[str, Module | Any]], *, gamma=1.0, flip=-1.0, theta=0.1, beta=1.0, reset_neuron_costate=False, reset_weight_costate=False, local=True)

Parameters:

Name	Type	Description	Default
`models`	`list of dict`	List of parameter groups, each containing: - 'params': the Model or list of parameters - 'gamma', 'beta', 'theta', etc.: Hyperparameters for the group.	required

Methods:

Name	Description
`step`	Perform one optimization step for all parameter groups.
`compute_hamiltonian`	Computes the Hamiltonian for all models.
`zero_grad`	Zeroes the gradients and resets co-states if needed.

Source code in unaiverse/modules/hl/hl_utils.py

def __init__(self, models: torch.nn.Module | Iterable[Dict[str, torch.nn.Module | Any]], *,
             gamma=1., flip=-1., theta=0.1, beta=1., reset_neuron_costate=False, reset_weight_costate=False,
             local=True):
    """
    Args:
        models (list of dict): List of parameter groups, each containing:
            - 'params': the Model or list of parameters
            - 'gamma', 'beta', 'theta', etc.: Hyperparameters for the group.
    """

    # Set defaults
    defaults = dict(params=None, gamma=gamma, flip=flip, theta=theta, beta=beta,
                    reset_neuron_costate=reset_neuron_costate, reset_weight_costate=reset_weight_costate,
                    local=local)

    # Ensure models is a list of dicts and assign the specified values
    if isinstance(models, torch.nn.Module):
        models = [{**defaults, 'params': models}]

    self.param_groups = []
    for group in models:
        assert 'params' in group, "Each parameter group must contain a 'params' key storing the model."
        self.param_groups.append({**defaults, **group})

    # Store the optimizer state for each model in a list of dicts, not to be confused with the state of the model
    self.state = [_init_state_and_costate(group['params']) for group in self.param_groups]

Methods:¶

step ¶

step()

Perform one optimization step for all parameter groups.

Source code in unaiverse/modules/hl/hl_utils.py

@torch.no_grad()
def step(self):
    """Perform one optimization step for all parameter groups."""

    for group, state in zip(self.param_groups, self.state):
        model = group['params']
        delta = model.delta

        # Copy the state (of the model) just to track it during the optimization and get the costate
        # the locality of these operations is handled by the model
        state['x']['xi'] = model.h
        dp_xi = _get_grad(model.h)
        _euler_step(state['p']['xi'], dp_xi, step_size=-delta * group['flip'],
                    decay=-group['flip'] * group['theta'], in_place=True)

        # Copy the weights from the network just to track it during the optimization and get the costates
        dp_w = {}
        for name, param in model.named_parameters():
            state['x']['w'][name] = param
            dp_w[name] = _get_grad(param)

        if group['local']:

            # Local HL uses the old costates to update the weights
            d_w = state['p']['w']
            _euler_step(state['x']['w'], d_w, step_size=-delta*group['beta'], decay=None, in_place=True)
            _euler_step(state['p']['w'], dp_w, step_size=-delta*group['flip'],
                        decay=-group['flip']*group['theta'], in_place=True)
        else:

            # Non-local HL updates the costates before updating the weights
            d_w = _euler_step(state['p']['w'], dp_w, step_size=-delta * group['flip'],
                              decay=-group['flip'] * group['theta'], in_place=True)
            _euler_step(state['x']['w'], d_w, step_size=-delta * group['beta'], decay=None, in_place=True)

compute_hamiltonian ¶

compute_hamiltonian(*potential_terms: Tensor) -> Tensor

Computes the Hamiltonian for all models.

Source code in unaiverse/modules/hl/hl_utils.py

def compute_hamiltonian(self, *potential_terms: torch.Tensor) -> torch.Tensor:
    """Computes the Hamiltonian for all models."""

    # The number of potential terms provided should be equal to the number of models
    assert len(potential_terms) == len(self.param_groups), f"A potential term for each model is expected."

    ham = torch.tensor(0., dtype=potential_terms[0].dtype, device=potential_terms[0].device)
    for group, state, potential_term in zip(self.param_groups, self.state, potential_terms):
        model = group['params']
        ham += group['gamma'] * potential_term + torch.dot(model.dh.view(-1), state['p']['xi'].view(-1)).real
    return ham

zero_grad ¶

zero_grad(set_to_none: bool = False) -> None

Zeroes the gradients and resets co-states if needed.

Source code in unaiverse/modules/hl/hl_utils.py

def zero_grad(self, set_to_none: bool = False) -> None:
    """Zeroes the gradients and resets co-states if needed."""

    for group, state in zip(self.param_groups, self.state):
        model = group['params']
        _zero_grad(model.h, set_to_none)
        for param in model.parameters():
            _zero_grad(param, set_to_none)

        # Eventually reset costates
        if group['reset_neuron_costate']:
            _zero_inplace(state['p']['xi'], detach=True)
        if group['reset_weight_costate']:
            _zero_inplace(state['p']['w'], detach=True)