diff --git a/dwave/plugins/torch/models/boltzmann_machine.py b/dwave/plugins/torch/models/boltzmann_machine.py index ff8739f..00acb28 100644 --- a/dwave/plugins/torch/models/boltzmann_machine.py +++ b/dwave/plugins/torch/models/boltzmann_machine.py @@ -49,6 +49,13 @@ class GraphRestrictedBoltzmannMachine(torch.nn.Module): """Creates a graph-restricted Boltzmann machine. + The initialization-strategy is grounded in + `Hinton's practical guide for RBM training`_, which recommends sampling + weights from a Gaussian distribution with mean 0 and standard deviation 0.01 (for zero-one-valued RBMs). + The scaling factor of :math:`1/\sqrt(N)` ensures that the energy functional remains extensive + and initializes the GRBM in a paramagnetic regime, consistent with the `Sherrington-Kirkpatrick model`_. + The biases are initialized to zero to ensure extensiveness of the energy functional and to avoid introducing any initial preference for spin configurations. + Args: nodes (Iterable[Hashable]): List of nodes. edges (Iterable[tuple[Hashable, Hashable]]): List of edges. @@ -82,8 +89,8 @@ def __init__( self._idx_to_edge = {i: e for i, e in enumerate(self._edges)} self._edge_to_idx = {e: i for i, e in self._idx_to_edge.items()} - self._linear = torch.nn.Parameter(0.05 * (2 * torch.rand(self._n_nodes) - 1)) - self._quadratic = torch.nn.Parameter(5.0 * (2 * torch.rand(self._n_edges) - 1)) + self._linear = torch.nn.Parameter(torch.zeros(self._n_nodes)) + self._quadratic = torch.nn.Parameter(torch.randn(self._n_edges)/self._n_nodes**0.5) edge_idx_i = torch.tensor([self._node_to_idx[i] for i, _ in self._edges]) edge_idx_j = torch.tensor([self._node_to_idx[j] for _, j in self._edges]) diff --git a/releasenotes/notes/gaussian-rbm-init-28fd4d295ef86d77.yaml b/releasenotes/notes/gaussian-rbm-init-28fd4d295ef86d77.yaml new file mode 100644 index 0000000..ea450d5 --- /dev/null +++ b/releasenotes/notes/gaussian-rbm-init-28fd4d295ef86d77.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + Initialize ``GraphRestrictedBoltzmannMachine`` weights using Gaussian + random variables with standard deviation equal to :math:`1/\sqrt(N)`, where N + denotes the number of nodes in the GRBM. The weight-initialization strategy is grounded in `Hinton's practical guide for RBM training `_, which recommends sampling weights from a Gaussian distribution with mean 0 and standard deviation 0.01 (for zero-one-valued RBMs). The scaling factor of :math:`1/\sqrt(N)` ensures that the energy functional remains extensive and initializes the GRBM in a paramagnetic regime, consistent with the `Sherrington-Kirkpatrick model`_. + + diff --git a/tests/test_dvae_winci2020.py b/tests/test_dvae_winci2020.py index 38dfff7..e22cd39 100644 --- a/tests/test_dvae_winci2020.py +++ b/tests/test_dvae_winci2020.py @@ -78,12 +78,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # are the models themselves latent_dims_list = [1, 2] self.encoders = {i: Encoder(i) for i in latent_dims_list} - # self.decoders is independent of number of latent dims, but we also create a dict to separate - # them + # self.decoders is independent of number of latent dims, but we also create a dict to + # separate them self.decoders = {i: Decoder(latent_features, input_features) for i in latent_dims_list} - - # self.dvaes is a dict whose keys are the numbers of latent dims and the values are the models - # themselves + # self.dvaes is a dict whose keys are the numbers of latent dims and the values are the + # models themselves self.dvaes = {i: DVAE(self.encoders[i], self.decoders[i]) for i in latent_dims_list} @@ -248,19 +247,22 @@ def test_latent_to_discrete(self, n_samples, expected): @parameterized.expand([(i, j) for i in range(1, 3) for j in [0, 1, 5, 1000]]) def test_forward(self, n_latent_dims, n_samples): """Test the forward method.""" + torch.manual_seed(1234) # Set seed for reproducibility of latent_to_discrete sampling expected_latents = self.encoders[n_latent_dims](self.data) expected_discretes = self.dvaes[n_latent_dims].latent_to_discrete( expected_latents, n_samples ) expected_reconstructed_x = self.decoders[n_latent_dims](expected_discretes) + torch.manual_seed(1234) # Set seed again to ensure that the sampling in the forward method + # is the same as in the expected_discretes latents, discretes, reconstructed_x = self.dvaes[n_latent_dims].forward( x=self.data, n_samples=n_samples ) + torch.testing.assert_close(latents, expected_latents) + torch.testing.assert_close(discretes, expected_discretes) + torch.testing.assert_close(reconstructed_x, expected_reconstructed_x) - assert torch.equal(reconstructed_x, expected_reconstructed_x) - assert torch.equal(discretes, expected_discretes) - assert torch.equal(latents, expected_latents) if __name__ == "__main__":