Skip to content

Clusterer

Implementation of Self-Organizing Map.

SOM(n_columns=5, n_rows=5, initialcodebook=None, kerneltype=0, maptype='planar', gridtype='rectangular', compactsupport=True, neighborhood='gaussian', std_coeff=0.5, random_state=None, verbose=0)

Bases: BaseEstimator, ClusterMixin

Class to fit and visualize a Self-Organizing Map (SOM).

The implementation uses SOM from Somoclu. Read more in the [user_guide].

Parameters:

Name Type Description Default
n_columns int

The number of columns in the map.

5
n_rows int

The number of rows in the map.

5
initialcodebook ArrayLike | str | None

Define the codebook to start the training. If initialcodebook='pca' then the codebook is initialized from the first subspace spanned by the first two eigenvectors of the correlation matrix.

None
kerneltype int

Specify which kernel to use. If kerneltype=0 use dense CPU kernel. Else if kerneltype=1 use dense GPU kernel if compiled with it.

0
maptype str

Specify the map topology. If maptype='planar' use planar map. Else if maptype='toroid' use toroid map.

'planar'
gridtype str

Specify the grid form of the nodes. If gridtype='rectangular' use rectangular neurons. Else if gridtype='hexagonal' use hexagonal neurons.

'rectangular'
compactsupport bool

Cut off map updates beyond the training radius with the Gaussian neighborhood.

True
neighborhood str

Specify the neighborhood. If neighborhood='gaussian' use Gaussian neighborhood. Else if neighborhood='bubble' use bubble neighborhood function.

'gaussian'
std_coeff float

Set the coefficient in the Gaussian neighborhood :math:exp(-||x-y||^2/(2*(coeff*radius)^2)).

0.5
random_state int | RandomState | None

Control the randomization of the algorithm by specifying the codebook initalization. It is ignored when initialcodebook is not None.

  • If int, random_state is the seed used by the random number generator.
  • If RandomState instance, random_state is the random number generator.
  • If None, the random number generator is the RandomState instance used by np.random.
None
verbose int

Specify verbosity level (0, 1, or 2).

0
Source code in src/clover/clusterer/_som.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def __init__(
    self: Self,
    n_columns: int = 5,
    n_rows: int = 5,
    initialcodebook: npt.ArrayLike | str | None = None,
    kerneltype: int = 0,
    maptype: str = 'planar',
    gridtype: str = 'rectangular',
    compactsupport: bool = True,
    neighborhood: str = 'gaussian',
    std_coeff: float = 0.5,
    random_state: int | np.random.RandomState | None = None,
    verbose: int = 0,
) -> None:
    self.n_columns = n_columns
    self.n_rows = n_rows
    self.initialcodebook = initialcodebook
    self.kerneltype = kerneltype
    self.maptype = maptype
    self.gridtype = gridtype
    self.compactsupport = compactsupport
    self.neighborhood = neighborhood
    self.std_coeff = std_coeff
    self.random_state = random_state
    self.verbose = verbose

fit(X, y=None, **fit_params)

Train the self-organizing map.

Parameters:

Name Type Description Default
X ArrayLike

Training instances to cluster.

required
y ArrayLike | None

Ignored

None
fit_params dict[str, Any]

Parameters to pass to train method of Somoclu object.

{}

Returns:

Type Description
Self

The object itself.

Source code in src/clover/clusterer/_som.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
def fit(self: Self, X: npt.ArrayLike, y: npt.ArrayLike | None = None, **fit_params: dict[str, Any]) -> Self:
    """Train the self-organizing map.

    Args:
        X:
            Training instances to cluster.
        y:
            Ignored
        fit_params:
            Parameters to pass to train method of Somoclu object.

    Returns:
        The object itself.
    """

    # Check and normalize input data
    X_scaled = minmax_scale(check_array(X, dtype=np.float32))

    # Check random_state
    self.random_state_ = check_random_state(self.random_state)

    # Initialize codebook
    if self.initialcodebook is None:
        if self.random_state is None:
            initialcodebook = None
            initialization = 'random'
        else:
            codebook_size = self.n_columns * self.n_rows * X_scaled.shape[1]
            initialcodebook = self.random_state_.random_sample(
                codebook_size,
            ).astype(np.float32)
            initialization = None
    elif self.initialcodebook == 'pca':
        initialcodebook = None
        initialization = 'random'
    else:
        initialcodebook = self.initialcodebook
        initialization = None

    # Create Somoclu object
    self.algorithm_ = Somoclu(
        n_columns=self.n_columns,
        n_rows=self.n_rows,
        initialcodebook=initialcodebook,
        kerneltype=self.kerneltype,
        maptype=self.maptype,
        gridtype=self.gridtype,
        compactsupport=self.compactsupport,
        neighborhood=self.neighborhood,
        std_coeff=self.std_coeff,
        initialization=initialization,
        data=None,
        verbose=self.verbose,
    )

    # Fit Somoclu
    self.algorithm_.train(data=X_scaled, **fit_params)

    # Grid labels
    grid_labels = cast(list[tuple[int, int]], [tuple(grid_label) for grid_label in self.algorithm_.bmus.tolist()])

    # Generate labels mapping
    self.labels_mapping_ = generate_labels_mapping(grid_labels)

    # Generate cluster labels
    self.labels_ = np.array(
        [self.labels_mapping_[grid_label] for grid_label in grid_labels],
    )

    # Generate labels neighbors
    self.neighbors_ = self._generate_neighbors(
        sorted(set(grid_labels)),
        self.labels_mapping_,
    )

    return self

fit_predict(X, y=None, **fit_params)

Train the self-organizing map and assign cluster labels to samples.

Parameters:

Name Type Description Default
X ArrayLike

New data to transform.

required
y ArrayLike | None

Ignored.

None
fit_params dict[str, Any]

Parameters to pass to train method of Somoclu object.

{}

Returns:

Name Type Description
labels NDArray

Index of the cluster each sample belongs to.

Source code in src/clover/clusterer/_som.py
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
def fit_predict(
    self: Self,
    X: npt.ArrayLike,
    y: npt.ArrayLike | None = None,
    **fit_params: dict[str, Any],
) -> npt.NDArray:
    """Train the self-organizing map and assign cluster labels to samples.

    Args:
        X:
            New data to transform.
        y:
            Ignored.
        fit_params:
            Parameters to pass to train method of Somoclu object.

    Returns:
        labels:
            Index of the cluster each sample belongs to.
    """
    return self.fit(X=X, y=None, **fit_params).labels_

extract_topological_neighbors(col, row, gridtype, n_rows, n_columns, bmus)

Return the topological neighbors of a neuron.

Source code in src/clover/clusterer/_som.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def extract_topological_neighbors(
    col: int,
    row: int,
    gridtype: str,
    n_rows: int,
    n_columns: int,
    bmus: list[list[int]],
) -> list[tuple[int, int]]:
    """Return the topological neighbors of a neuron."""

    # Return common topological neighbors for the two grid types
    topological_neighbors = [
        (col - 1, row),
        (col + 1, row),
        (col, row - 1),
        (col, row + 1),
    ]

    # Append extra topological neighbors for hexagonal grid type
    if gridtype == 'hexagonal':
        offset = (-1) ** row
        topological_neighbors += [
            (col - offset, row - offset),
            (col - offset, row + offset),
        ]

    # Apply constraints
    topological_neighbors = [
        (col, row)
        for col, row in topological_neighbors
        if 0 <= col < n_columns and 0 <= row < n_rows and [col, row] in bmus
    ]

    return topological_neighbors

generate_labels_mapping(grid_labels)

Generate a mapping between grid labels and cluster labels.

Source code in src/clover/clusterer/_som.py
19
20
21
22
23
24
25
26
27
28
def generate_labels_mapping(grid_labels: list[tuple[int, int]]) -> dict[tuple[int, int], int]:
    """Generate a mapping between grid labels and cluster labels."""

    # Identify unique grid labels
    unique_labels = sorted(set(grid_labels))

    # Generate mapping
    labels_mapping = dict(zip(unique_labels, range(len(unique_labels)), strict=True))

    return labels_mapping