modules.networks

networks ¶

Classes:

Name	Description
`CSSM`
`CDiagR`	Diagonal matrix-based generator with real-valued transformations.
`CDiagC`	Diagonal matrix-based generator with complex-valued transformations.
`CTE`	Antisymmetric Matrix Exponential Generator implementing continuous-time dynamics.
`CTB`	Block Antisymmetric Generator using 2x2 parameterized rotation blocks.
`CTBE`	Antisymmetric Generator with Exact Matrix Exponential Blocks.

Classes¶

CSSM ¶

CSSM(u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, sigma: Callable = tanh, project_every: int = 0, local: bool = False, batch_size: int = 1, seed: int = -1)

Bases: ModuleWrapper

Methods:

Name	Description
`adjust_eigs`	Placeholder for eigenvalue adjustment method.
`forward`	Forward pass that updates the hidden state and computes the output.

Source code in unaiverse/modules/networks.py

def __init__(self, u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, sigma: Callable = F.tanh,
             project_every: int = 0, local: bool = False, batch_size: int = 1, seed: int = -1):
    super(CSSM, self).__init__(seed=seed)
    device = self.device
    u_shape = torch.Size(u_shape)
    u_dim = u_shape.numel()
    du_dim = d_dim
    self.batch_size = batch_size
    self.proc_inputs, self.proc_outputs = get_proc_inputs_and_proc_outputs_for_rnn(u_shape, du_dim, y_dim)

    # Define linear transformation matrices for state update and output mapping
    self.A = torch.nn.Linear(h_dim, h_dim, bias=False, device=device)  # Recurrent weight matrix
    self.B = torch.nn.Linear(u_dim + du_dim, h_dim, bias=False, device=device)  # Input-to-hidden mapping
    self.C = torch.nn.Linear(h_dim, y_dim, bias=False, device=device)  # Hidden-to-output mapping

    # Hidden state initialization
    self.register_buffer('h_init', torch.randn((batch_size, h_dim), device=device))
    self.register_buffer('h_next', torch.randn((batch_size, h_dim), device=device))
    self.h = None
    self.dh = None
    self.sigma = sigma  # The non-linear activation function

    # Store input dimensions and device
    self.u_dim = u_dim
    self.du_dim = du_dim
    self.delta = 1.  # Discrete time step
    self.local = local  # If True the state update is computed locally in time (i.e., kept out from the graph)
    self.forward_count = 0
    self.project_every = project_every

Methods:¶

adjust_eigs ¶

adjust_eigs()

Placeholder for eigenvalue adjustment method.

Source code in unaiverse/modules/networks.py

@torch.no_grad()
def adjust_eigs(self):
    """Placeholder for eigenvalue adjustment method."""
    pass

forward ¶

forward(u: Tensor, du: Tensor, first: bool = True, last: bool = False)

Forward pass that updates the hidden state and computes the output.

Source code in unaiverse/modules/networks.py

def forward(self, u: torch.Tensor, du: torch.Tensor, first: bool = True, last: bool = False):
    """Forward pass that updates the hidden state and computes the output."""

    # Handle missing inputs
    u = u.flatten(1).to(self.device) if u is not None else (
        torch.zeros((self.batch_size, self.u_dim), device=self.device))
    du = du.to(self.device) if du is not None else (
        torch.zeros((self.batch_size, self.du_dim), device=self.device))

    # Reset hidden state if first step
    if first:
        h = self.init_h(torch.cat([du, u], dim=1))
        self.forward_count = 0
    else:
        h = self.h_next.data

    # Track the gradients on h from here on
    h.requires_grad_()

    # Check if it's time to project the eigenvalues
    if self.project_every:
        if self.forward_count % self.project_every == 0:
            self.adjust_eigs()

    # Handle inputs
    du, u = self.handle_inputs(du, u)

    # Update hidden state based on input and previous hidden state
    h_new = self.A(h) + self.B(torch.cat([du, u], dim=1))

    if self.local:

        # In the local version we keep track in self.h of the old value of the state
        self.h = h
        self.dh = (h_new - self.h) / self.delta  # (h_new - h_old) / delta
    else:

        # In the non-local version we keep track in self.h of the new value of the state
        self.h = h_new
        self.dh = (self.h - h) / self.delta  # (h_new - h_old) / delta

    # Compute output using a nonlinear activation function
    y = self.C(self.sigma(self.h))

    # Store the new state for the next iteration
    self.h_next.data = h_new.detach()
    self.forward_count += 1

    return y

CDiagR ¶

CDiagR(u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, sigma: Callable = lambda x: x, project_every: int = 0, local: bool = False, batch_size: int = 1, seed: int = -1)

Bases: ModuleWrapper

Diagonal matrix-based generator with real-valued transformations.

Methods:

Name	Description
`adjust_eigs`	Normalize the diagonal weight matrix by setting signs.
`forward`	Forward pass with diagonal transformation.

Source code in unaiverse/modules/networks.py

def __init__(self, u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, sigma: Callable = lambda x: x,
             project_every: int = 0, local: bool = False, batch_size: int = 1, seed: int = -1):
    super(CDiagR, self).__init__(seed=seed)
    device = self.device
    u_shape = torch.Size(u_shape)
    u_dim = u_shape.numel()
    du_dim = d_dim
    self.batch_size = batch_size
    self.proc_inputs, self.proc_outputs = get_proc_inputs_and_proc_outputs_for_rnn(u_shape, du_dim, y_dim)

    # Define diagonal transformation and linear layers
    self.diag = torch.nn.Linear(in_features=1, out_features=h_dim, bias=False, device=device, dtype=torch.float32)
    self.B = torch.nn.Linear(u_dim + du_dim, h_dim, bias=False, device=device)
    self.C = torch.nn.Linear(h_dim, y_dim, bias=False, device=device)

    # Hidden state initialization
    self.register_buffer('h_init', torch.randn((batch_size, h_dim), device=device))
    self.register_buffer('h_next', torch.randn((batch_size, h_dim), device=device))
    self.h = None
    self.dh = None
    self.sigma = sigma  # The non-linear activation function

    # Store input dimensions and device
    self.u_dim = u_dim
    self.du_dim = du_dim
    self.delta = 1.
    self.local = local  # If True the state update is computed locally in time (i.e., kept out from the graph)
    self.forward_count = 0
    self.project_every = project_every

Methods:¶

adjust_eigs ¶

adjust_eigs()

Normalize the diagonal weight matrix by setting signs.

Source code in unaiverse/modules/networks.py

@torch.no_grad()
def adjust_eigs(self):
    """Normalize the diagonal weight matrix by setting signs."""
    self.diag.weight.copy_(torch.sign(self.diag.weight))

forward ¶

forward(u: Tensor, du: Tensor, first: bool = True, last: bool = False)

Forward pass with diagonal transformation.

Source code in unaiverse/modules/networks.py

def forward(self, u: torch.Tensor, du: torch.Tensor, first: bool = True, last: bool = False):
    """Forward pass with diagonal transformation."""

    # Handle missing inputs
    u = u.flatten(1).to(self.device) if u is not None else (
        torch.zeros((self.batch_size, self.u_dim), device=self.device))
    du = du.to(self.device) if du is not None else (
        torch.zeros((self.batch_size, self.du_dim), device=self.device))

    # Reset hidden state if first step
    if first:
        h = self.init_h(torch.cat([du, u], dim=1))
        self.forward_count = 0
    else:
        h = self.h_next.data

    # Track the gradients on h from here on
    h.requires_grad_()

    # Check if it's time to project the eigenvalues
    if self.project_every:
        if self.forward_count % self.project_every == 0:
            self.adjust_eigs()

    # Handle inputs
    du, u = self.handle_inputs(du, u)

    # Apply diagonal transformation to hidden state
    h_new = self.diag.weight.view(self.diag.out_features) * h + self.B(torch.cat([du, u], dim=1))

    if self.local:

        # In the local version we keep track in self.h of the old value of the state
        self.h = h
        self.dh = (h_new - self.h) / self.delta  # (h_new - h_old) / delta
    else:

        # In the non-local version we keep track in self.h of the new value of the state
        self.h = h_new
        self.dh = (self.h - h) / self.delta  # (h_new - h_old) / delta

    # Compute output using a nonlinear activation function
    y = self.C(self.sigma(self.h))

    # Store the new state for the next iteration
    self.h_next.data = h_new.detach()
    self.forward_count += 1

    return y

CDiagC ¶

CDiagC(u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, sigma: Callable = lambda x: x, project_every: int = 0, local: bool = False, batch_size: int = 1, seed: int = -1)

Bases: ModuleWrapper

Diagonal matrix-based generator with complex-valued transformations.

Methods:

Name	Description
`adjust_eigs`	Normalize the diagonal weight matrix by dividing by its magnitude.
`forward`	Forward pass with complex-valued transformation.

Source code in unaiverse/modules/networks.py

def __init__(self, u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, sigma: Callable = lambda x: x,
             project_every: int = 0, local: bool = False, batch_size: int = 1, seed: int = -1):
    super(CDiagC, self).__init__(seed=seed)
    device = self.device
    u_shape = torch.Size(u_shape)
    u_dim = u_shape.numel()
    du_dim = d_dim
    self.batch_size = batch_size
    self.proc_inputs, self.proc_outputs = get_proc_inputs_and_proc_outputs_for_rnn(u_shape, du_dim, y_dim)

    # Define diagonal transformation with complex numbers
    self.diag = torch.nn.Linear(in_features=1, out_features=h_dim, bias=False, device=device, dtype=torch.cfloat)
    self.B = torch.nn.Linear(u_dim + du_dim, h_dim, bias=False, device=device, dtype=torch.cfloat)
    self.C = torch.nn.Linear(h_dim, y_dim, bias=False, device=device, dtype=torch.cfloat)

    # Hidden state initialization
    self.register_buffer('h_init', torch.randn((batch_size, h_dim), device=device))
    self.register_buffer('h_next', torch.randn((batch_size, h_dim), device=device))
    self.h = None
    self.dh = None
    self.sigma = sigma  # The non-linear activation function

    # Store input dimensions and device
    self.u_dim = u_dim
    self.du_dim = du_dim
    self.delta = 1.
    self.local = local  # If True the state update is computed locally in time (i.e., kept out from the graph)
    self.forward_count = 0
    self.project_every = project_every

Methods:¶

adjust_eigs ¶

adjust_eigs()

Normalize the diagonal weight matrix by dividing by its magnitude.

Source code in unaiverse/modules/networks.py

@torch.no_grad()
def adjust_eigs(self):
    """ Normalize the diagonal weight matrix by dividing by its magnitude. """
    self.diag.weight.div_(self.diag.weight.abs())

forward ¶

forward(u: Tensor, du: Tensor, first: bool = True, last: bool = False)

Forward pass with complex-valued transformation.

Source code in unaiverse/modules/networks.py

def forward(self, u: torch.Tensor, du: torch.Tensor, first: bool = True, last: bool = False):
    """Forward pass with complex-valued transformation."""

    # Handle missing inputs
    u = u.flatten(1).to(self.device) if u is not None else torch.zeros((self.batch_size, self.u_dim),
                                                                       device=self.device, dtype=torch.cfloat)
    du = du.to(self.device) if du is not None else torch.zeros((self.batch_size, self.du_dim),
                                                               device=self.device, dtype=torch.cfloat)

    # Reset hidden state if first step
    if first:
        h = self.init_h(torch.cat([du, u], dim=1))
        self.forward_count = 0
    else:
        h = self.h_next.data

    # Track the gradients on h from here on
    h.requires_grad_()

    # Check if it's time to project the eigenvalues
    if self.project_every:
        if self.forward_count % self.project_every == 0:
            self.adjust_eigs()

    # Handle inputs
    du, u = self.handle_inputs(du, u)

    # Apply complex diagonal transformation
    h_new = self.diag.weight.view(self.diag.out_features) * h + self.B(torch.cat([du, u], dim=1))

    if self.local:

        # In the local version we keep track in self.h of the old value of the state
        self.h = h
        self.dh = (h_new - self.h) / self.delta  # (h_new - h_old) / delta
    else:

        # In the non-local version we keep track in self.h of the new value of the state
        self.h = h_new
        self.dh = (self.h - h) / self.delta  # (h_new - h_old) / delta

    # Compute output using a nonlinear activation function
    y = self.C(self.sigma(self.h))

    # Store the new state for the next iteration
    self.h_next.data = h_new.detach()
    self.forward_count += 1

    return y.real

CTE ¶

CTE(u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, delta: float, sigma: Callable = lambda x: x, project_every: int = 0, local: bool = False, cnu_memories: int = 0, batch_size: int = 1, seed: int = -1)

Bases: ModuleWrapper

Antisymmetric Matrix Exponential Generator implementing continuous-time dynamics.

Uses antisymmetric weight matrix with matrix exponential for stable hidden state evolution.

Parameters:

Name	Type	Description	Default
`u_shape`	`tuple[int]`	Input shape (tuple of integers)	required
`d_dim`	`int`	Input descriptor dimension	required
`y_dim`	`int`	Output dimension	required
`h_dim`	`int`	Hidden state dimension	required
`delta`	`float`	Time step for discrete approximation	required
`local`	`bool`	Local computations (bool)	`False`
`seed`	`int`	Random seed (positive int)	`-1`

Methods:

Name	Description
`adjust_eigs`	Placeholder for eigenvalue adjustment method
`forward`	Forward pass through the system dynamics.

Source code in unaiverse/modules/networks.py

def __init__(self, u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, delta: float,
             sigma: Callable = lambda x: x, project_every: int = 0, local: bool = False,
             cnu_memories: int = 0, batch_size: int = 1, seed: int = -1):
    super(CTE, self).__init__(seed=seed)
    device = self.device
    u_shape = torch.Size(u_shape)
    u_dim = u_shape.numel()
    du_dim = d_dim
    self.batch_size = batch_size
    self.proc_inputs, self.proc_outputs = get_proc_inputs_and_proc_outputs_for_rnn(u_shape, du_dim, y_dim)

    # Antisymmetric weight matrix (W - W^T)
    self.W = torch.nn.Linear(h_dim, h_dim, bias=False, device=device)
    self.Id = torch.eye(h_dim, device=device)  # Identity matrix

    # Input projection matrix
    self.B = torch.nn.Linear(u_dim + du_dim, h_dim, bias=False, device=device)

    # Output projection matrix
    if cnu_memories <= 0:
        self.C = torch.nn.Linear(h_dim, y_dim, bias=False, device=device)
    else:
        self.C = LinearCNU(h_dim, y_dim, bias=False, device=device, key_size=u_dim + du_dim,
                           delta=1, beta_k=delta, scramble=False, key_mem_units=cnu_memories, shared_keys=True)

    # Hidden state initialization
    self.register_buffer('h_init', torch.randn((batch_size, h_dim), device=device))
    self.register_buffer('h_next', torch.randn((batch_size, h_dim), device=device))
    self.h = None
    self.dh = None
    self.sigma = sigma  # The non-linear activation function

    # System parameters
    self.u_dim = u_dim
    self.du_dim = du_dim
    self.delta = delta
    self.local = local
    self.forward_count = 0
    self.project_every = project_every

Methods:¶

adjust_eigs ¶

adjust_eigs()

Placeholder for eigenvalue adjustment method

Source code in unaiverse/modules/networks.py

@torch.no_grad()
def adjust_eigs(self):
    """Placeholder for eigenvalue adjustment method"""
    pass

forward ¶

forward(u: Tensor, du: Tensor, first: bool = True, last: bool = False) -> Tensor

Forward pass through the system dynamics.

Parameters:

Name	Type	Description	Default
`u`	`Tensor`	Input tensor of shape (batch_size, u_dim)	required
`du`	`Tensor`	Input descriptor tensor of shape (batch_size, du_dim)	required
`first`	`bool`	Flag indicating first step (resets hidden state)	`True`
`last`	`bool`	Flag indicating last step (does nothing)	`False`

Returns:

Name	Type	Description
`y`	`Tensor`	Output tensor of shape (batch_size, y_dim)

Source code in unaiverse/modules/networks.py

def forward(self, u: torch.Tensor, du: torch.Tensor, first: bool = True, last: bool = False) -> torch.Tensor:
    """Forward pass through the system dynamics.

    Args:
        u: Input tensor of shape (batch_size, u_dim)
        du: Input descriptor tensor of shape (batch_size, du_dim)
        first: Flag indicating first step (resets hidden state)
        last: Flag indicating last step (does nothing)

    Returns:
        y: Output tensor of shape (batch_size, y_dim)
    """

    # Handle missing inputs
    u = u.flatten(1).to(self.device) if u is not None else (
        torch.zeros((self.batch_size, self.u_dim), device=self.device))
    du = du.to(self.device) if du is not None else (
        torch.zeros((self.batch_size, self.du_dim), device=self.device))

    # Reset hidden state if first step
    if first:
        h = self.init_h(torch.cat([du, u], dim=1))
        self.forward_count = 0
    else:
        h = self.h_next.data

    # Track the gradients on h from here on
    h.requires_grad_()

    # Check if it's time to project the eigenvalues
    if self.project_every:
        if self.forward_count % self.project_every == 0:
            self.adjust_eigs()

    if not isinstance(self.C, LinearCNU):
        C = self.C
    else:
        udu = torch.cat([du, u], dim=1)
        weight_C = self.C.compute_weights(udu).view(self.C.out_features, self.C.in_features)

        def C(x):
            return torch.nn.functional.linear(x, weight_C)

    # Handle inputs
    du, u = self.handle_inputs(du, u)

    # Antisymmetric matrix construction
    A = 0.5 * (self.W.weight - self.W.weight.t())
    A_expm = torch.linalg.matrix_exp(A * self.delta)  # Matrix exponential
    rec = F.linear(h, A_expm, self.W.bias)  # Recurrent component

    # Input processing component
    A_inv = torch.linalg.inv(A)
    inp = A_inv @ (A_expm - self.Id) @ self.B(torch.cat([du, u], dim=1)).unsqueeze(-1)

    # Handle locality
    h_new = rec + inp.squeeze(-1)  # Updated hidden state
    if self.local:

        # In the local version we keep track in self.h of the old value of the state
        self.h = h
        self.dh = (h_new - self.h) / self.delta  # (h_new - h_old) / delta
    else:

        # In the non-local version we keep track in self.h of the new value of the state
        self.h = h_new
        self.dh = (self.h - h) / self.delta  # (h_new - h_old) / delta

    # Compute output using a nonlinear activation function
    y = C(self.sigma(self.h))

    # Store the new state for the next iteration
    self.h_next.data = h_new
    self.forward_count += 1

    return y

CTB ¶

CTB(u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, delta: float = None, alpha: float = 0.0, sigma: Callable = lambda x: x, project_every: int = 0, local: bool = False, batch_size: int = 1, seed: int = -1)

Bases: ModuleWrapper

Block Antisymmetric Generator using 2x2 parameterized rotation blocks.

Implements structured antisymmetric dynamics through learnable rotational frequencies.

Parameters:

Name	Type	Description	Default
`u_shape`	`tuple[int]`	Input shape (tuple of integers)	required
`d_dim`	`int`	Input descriptor dimension	required
`y_dim`	`int`	Output dimension	required
`h_dim`	`int`	Hidden state dimension	required
`delta`	`float`	Time step for discrete approximation	`None`
`alpha`	`float`	Dissipation added on the diagonal (also controls the eigenvalue projections method)	`0.0`

Methods:

Name	Description
`reset_parameters`	Initialize rotational frequencies with uniform distribution
`adjust_eigs`	Adjust eigenvalues to maintain stability
`forward`	Forward pass through block-structured dynamics

Source code in unaiverse/modules/networks.py

def __init__(self, u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, delta: float = None,
             alpha: float = 0., sigma: Callable = lambda x: x, project_every: int = 0, local: bool = False,
             batch_size: int = 1, seed: int = -1):
    super(CTB, self).__init__(seed=seed)
    device = self.device
    u_shape = torch.Size(u_shape)
    u_dim = u_shape.numel()
    du_dim = d_dim
    self.batch_size = batch_size
    self.proc_inputs, self.proc_outputs = get_proc_inputs_and_proc_outputs_for_rnn(u_shape, du_dim, y_dim)

    assert h_dim % 2 == 0, "Hidden dimension must be even for 2x2 blocks"
    self.order = h_dim // 2  # Number of 2x2 blocks

    # Learnable rotational frequencies
    self.omega = torch.nn.Parameter(torch.empty(self.order, device=device))
    self.register_buffer('ones', torch.ones(self.order, requires_grad=False, device=device))

    # Projection matrices
    self.B = torch.nn.Linear(u_dim + du_dim, h_dim, bias=False, device=device)
    self.C = torch.nn.Linear(h_dim, y_dim, bias=False, device=device)

    # Damping configuration
    if alpha > 0.:

        # In this case we want to add the feedback parameter alpha and use it to move eigenvalues on the unit circle
        self.project_method = 'const'
        self.register_buffer('alpha', torch.full_like(self.omega.data, alpha, device=device))
    elif alpha == 0.:

        # This is the case in which we want to divide by the modulus
        self.project_method = 'modulus'
        self.register_buffer('alpha', torch.zeros_like(self.omega.data, device=device))
    elif alpha == -1.:
        self.project_method = 'alpha'
        self.register_buffer('alpha', torch.zeros_like(self.omega.data, device=device))

    # Hidden state initialization
    self.register_buffer('h_init', torch.randn((batch_size, h_dim), device=device))
    self.register_buffer('h_next', torch.randn((batch_size, h_dim), device=device))
    self.h = None
    self.dh = None
    self.sigma = sigma  # The non-linear activation function

    # System parameters
    self.u_dim = u_dim
    self.du_dim = du_dim
    self.delta = delta
    self.local = local  # If True the state update is computed locally in time (i.e., kept out from the graph)
    self.reset_parameters()
    self.forward_count = 0
    self.project_every = project_every

Methods:¶

reset_parameters ¶

reset_parameters() -> None

Initialize rotational frequencies with uniform distribution

Source code in unaiverse/modules/networks.py

def reset_parameters(self) -> None:
    """Initialize rotational frequencies with uniform distribution"""
    torch.nn.init.uniform_(self.omega)

adjust_eigs ¶

adjust_eigs()

Adjust eigenvalues to maintain stability

Source code in unaiverse/modules/networks.py

@torch.no_grad()
def adjust_eigs(self):
    """Adjust eigenvalues to maintain stability"""
    with torch.no_grad():
        if self.project_method == 'alpha':

            # Compute damping to maintain eigenvalues on unit circle
            self.alpha.copy_((1. - torch.sqrt(1. - (self.delta * self.omega) ** 2) / self.delta))
        elif self.project_method == 'modulus':

            # Normalize by modulus for unit circle stability
            module = torch.sqrt(self.ones ** 2 + (self.delta * self.omega) ** 2)
            self.omega.div_(module)
            self.ones.div_(module)

forward ¶

forward(u: Tensor, du: Tensor, first: bool = True, last: bool = False) -> Tensor

Forward pass through block-structured dynamics

Source code in unaiverse/modules/networks.py

def forward(self, u: torch.Tensor, du: torch.Tensor, first: bool = True, last: bool = False) -> torch.Tensor:
    """Forward pass through block-structured dynamics"""

    # Handle missing inputs
    u = u.flatten(1).to(self.device) if u is not None \
        else torch.zeros((self.batch_size, self.u_dim), device=self.device)
    du = du.to(self.device) if du is not None \
        else torch.zeros((self.batch_size, self.du_dim), device=self.device)

    # Reset hidden state if first step
    if first:
        h = self.init_h(torch.cat([du, u], dim=1))
        self.forward_count = 0
    else:
        h = self.h_next.data

    # Track the gradients on h from here on
    h.requires_grad_()
    h_pair = h.view(-1, self.order, 2)  # Reshape to (batch, blocks, 2)

    # Check if it's time to project the eigenvalues
    if self.project_every:
        if self.forward_count % self.project_every == 0:
            self.adjust_eigs()

    # Handle inputs
    du, u = self.handle_inputs(du, u)

    # Block-wise rotation with damping
    h1 = (self.ones - self.delta * self.alpha) * h_pair[..., 0] + self.delta * self.omega * h_pair[..., 1]
    h2 = -self.delta * self.omega * h_pair[..., 0] + (self.ones - self.delta * self.alpha) * h_pair[..., 1]

    # Recurrent and input components
    rec = torch.stack([h1, h2], dim=-1).flatten(start_dim=1)
    inp = self.delta * self.B(torch.cat([du, u], dim=1))

    # Handle locality
    h_new = rec + inp  # Updated hidden state
    if self.local:

        # In the local version we keep track in self.h of the old value of the state
        self.h = h
        self.dh = (h_new - self.h) / self.delta  # (h_new - h_old) / delta
    else:

        # In the non-local version we keep track in self.h of the new value of the state
        self.h = h_new
        self.dh = (self.h - h) / self.delta  # (h_new - h_old) / delta

    # Compute output using a nonlinear activation function
    y = self.C(self.sigma(self.h))

    # Store the new state for the next iteration
    self.h_next.data = h_new.detach()
    self.forward_count += 1

    return y

CTBE ¶

CTBE(u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, delta: float, sigma: Callable = lambda x: x, project_every: int = 0, local: bool = False, cnu_memories: int = 0, batch_size: int = 1, seed: int = -1)

Bases: ModuleWrapper

Antisymmetric Generator with Exact Matrix Exponential Blocks.

Implements precise rotational dynamics using trigonometric parameterization.

Parameters:

Name	Type	Description	Default
`u_shape`	`tuple[int]`	Input shape (tuple of integers)	required
`d_dim`	`int`	Input descriptor dimension	required
`y_dim`	`int`	Output dimension	required
`h_dim`	`int`	Hidden state dimension	required
`delta`	`float`	Time step for discrete approximation	required

Methods:

Name	Description
`reset_parameters`	Initialize rotational frequencies
`adjust_eigs`	Placeholder for eigenvalue adjustment
`forward`	Exact matrix exponential forward pass

Source code in unaiverse/modules/networks.py

def __init__(self, u_shape: tuple[int], d_dim: int, y_dim: int, h_dim: int, delta: float,
             sigma: Callable = lambda x: x, project_every: int = 0, local: bool = False,
             cnu_memories: int = 0, batch_size: int = 1, seed: int = -1):
    super(CTBE, self).__init__(seed=seed)
    device = self.device
    u_shape = torch.Size(u_shape)
    u_dim = u_shape.numel()
    du_dim = d_dim
    self.batch_size = batch_size
    self.proc_inputs, self.proc_outputs = get_proc_inputs_and_proc_outputs_for_rnn(u_shape, du_dim, y_dim)

    assert h_dim % 2 == 0, "Hidden dimension must be even for 2x2 blocks"
    self.order = h_dim // 2

    # Learnable rotational frequencies
    self.omega = torch.nn.Parameter(torch.empty(self.order, device=device))
    self.B = torch.nn.Linear(u_dim + du_dim, h_dim, bias=False, device=device)
    if cnu_memories <= 0:
        self.C = torch.nn.Linear(h_dim, y_dim, bias=False, device=device)
    else:
        self.C = LinearCNU(h_dim, y_dim, bias=False, device=device, key_size=u_dim + du_dim,
                           delta=1, beta_k=delta, scramble=False, key_mem_units=cnu_memories, shared_keys=True)

    # Hidden state initialization
    self.register_buffer('h_init', torch.randn((batch_size, h_dim), device=device))
    self.register_buffer('h_next', torch.randn((batch_size, h_dim), device=device))
    self.h = None
    self.dh = None
    self.sigma = sigma  # The non-linear activation function

    # System parameters
    self.u_dim = u_dim
    self.du_dim = du_dim
    self.delta = delta
    self.local = local  # If True the state update is computed locally in time (i.e., kept out from the graph)
    self.reset_parameters()
    self.forward_count = 0
    self.project_every = project_every

Methods:¶

reset_parameters ¶

reset_parameters() -> None

Initialize rotational frequencies

Source code in unaiverse/modules/networks.py

def reset_parameters(self) -> None:
    """Initialize rotational frequencies"""
    if not isinstance(self.omega, CNUs):
        torch.nn.init.uniform_(self.omega)
    else:
        torch.nn.init.uniform_(self.omega.M)

adjust_eigs ¶

adjust_eigs()

Placeholder for eigenvalue adjustment

Source code in unaiverse/modules/networks.py

@torch.no_grad()
def adjust_eigs(self):
    """Placeholder for eigenvalue adjustment"""
    pass

forward ¶

forward(u: Tensor, du: Tensor, first: bool = True, last: bool = False) -> Tensor

Exact matrix exponential forward pass

Source code in unaiverse/modules/networks.py

def forward(self, u: torch.Tensor, du: torch.Tensor, first: bool = True, last: bool = False) -> torch.Tensor:
    """Exact matrix exponential forward pass"""

    # Handle missing inputs
    u = u.flatten(1).to(self.device) if u is not None \
        else torch.zeros((self.batch_size, self.u_dim), device=self.device)
    du = du.to(self.device) if du is not None \
        else torch.zeros((self.batch_size, self.du_dim), device=self.device)

    # Reset hidden state if first step
    if first:
        h = self.init_h(torch.cat([du, u], dim=1))
        self.forward_count = 0
    else:
        h = self.h_next.data

    # Track the gradients on h from here on
    h.requires_grad_()
    h_pair = h.view(-1, self.order, 2)

    # Check if it's time to project the eigenvalues
    if self.project_every:
        if self.forward_count % self.project_every == 0:
            self.adjust_eigs()

    if not isinstance(self.C, LinearCNU):
        C = self.C
    else:
        udu = torch.cat([du, u], dim=1)
        weight_C = self.C.compute_weights(udu).view(self.C.out_features, self.C.in_features)

        def C(x):
            return torch.nn.functional.linear(x, weight_C)

    # Handle inputs
    du, u = self.handle_inputs(du, u)
    udu = torch.cat([du, u], dim=1)

    # Trigonometric terms for exact rotation
    cos_t = torch.cos(self.omega * self.delta)
    sin_t = torch.sin(self.omega * self.delta)

    # Rotational update
    h1 = cos_t * h_pair[..., 0] + sin_t * h_pair[..., 1]
    h2 = -sin_t * h_pair[..., 0] + cos_t * h_pair[..., 1]
    rec = torch.stack([h1, h2], dim=-1).flatten(start_dim=1)

    # Input processing
    u_hat = self.B(udu).view(-1, self.order, 2)
    inp1 = (sin_t * u_hat[..., 0] - (cos_t - 1) * u_hat[..., 1]) / self.omega
    inp2 = ((cos_t - 1) * u_hat[..., 0] + sin_t * u_hat[..., 1]) / self.omega
    inp = torch.stack([inp1, inp2], dim=-1).flatten(start_dim=1)

    # Handle locality
    h_new = rec + inp  # Updated hidden state
    if self.local:

        # In the local version we keep track in self.h of the old value of the state
        self.h = h
        self.dh = (h_new - self.h) / self.delta  # (h_new - h_old) / delta
    else:

        # In the non-local version we keep track in self.h of the new value of the state
        self.h = h_new
        self.dh = (self.h - h) / self.delta  # (h_new - h_old) / delta

    # Compute output using a nonlinear activation function
    y = C(self.sigma(self.h))

    # Store the new state for the next iteration
    self.h_next.data = h_new.detach()
    self.forward_count += 1

    return y