Base

Base class for distributors.

`BaseDistributor`

Bases: BaseEstimator

The base class for distributors.

A distributor sets the proportion of samples to be generated inside each cluster and between clusters. Warning: This class should not be used directly. Use the derive classes instead.

`fit(X, y, labels=None, neighbors=None)`

Generate the intra-label and inter-label distribution.

Parameters:

Name	Type	Description	Default
`X`	`InputData`	Matrix containing the data which have to be sampled.	required
`y`	`Targets`	Corresponding label for each sample in X.	required
`labels`	`Labels \| None`	Labels of each sample.	`None`
`neighbors`	`Neighbors \| None`	An array that contains all neighboring pairs. Each row is a unique neighboring pair.	`None`

Returns:

Type	Description
`Self`	The object itself.

Source code in src/clover/distribution/base.py

def fit(
    self: Self,
    X: InputData,
    y: Targets,
    labels: Labels | None = None,
    neighbors: Neighbors | None = None,
) -> Self:
    """Generate the intra-label and inter-label distribution.

    Args:
        X:
            Matrix containing the data which have to be sampled.

        y:
            Corresponding label for each sample in X.
        labels:
            Labels of each sample.
        neighbors:
            An array that contains all neighboring pairs. Each row is
            a unique neighboring pair.

    Returns:
        The object itself.
    """
    # Check data
    X, y = check_X_y(X, y, dtype=None)

    # Set statistics
    counts = Counter(y)
    self.majority_class_labels_ = [
        class_label
        for class_label, class_label_count in counts.items()
        if class_label_count == max(counts.values())
    ]
    self.unique_cluster_labels_ = np.unique(labels) if labels is not None else np.array(0, dtype=int)
    self.unique_class_labels_ = np.unique(y)
    self.n_samples_ = len(X)

    # Set default attributes
    self.labels_ = np.repeat(0, len(X)) if labels is None else check_array(labels, ensure_2d=False)
    self.neighbors_ = np.empty((0, 2), dtype=int) if neighbors is None else check_array(neighbors, ensure_2d=False)
    self.intra_distribution_: IntraDistribution = {
        (0, class_label): 1.0 for class_label in np.unique(y) if class_label not in self.majority_class_labels_
    }
    self.inter_distribution_: InterDistribution = {}

    # Fit distributor
    self._fit(X, y, labels, neighbors)

    # Validate fitting procedure
    self._validate_fitting()

    return self

`fit_distribute(X, y, labels, neighbors)`

Return the intra-label and inter-label distribution.

Parameters:

Name	Type	Description	Default
`X`	`InputData`	Matrix containing the data which have to be sampled.	required
`y`	`Targets`	Corresponding label for each sample in X.	required
`labels`	`Labels \| None`	Labels of each sample.	required
`neighbors`	`Neighbors \| None`	An array that contains all neighboring pairs. Each row is a unique neighboring pair.	required

Returns:

Name	Type	Description
`distributions`	`tuple[IntraDistribution, InterDistribution]`	A tuple with the two distributions.

Source code in src/clover/distribution/base.py

def fit_distribute(
    self: Self,
    X: InputData,
    y: Targets,
    labels: Labels | None,
    neighbors: Neighbors | None,
) -> tuple[IntraDistribution, InterDistribution]:
    """Return the intra-label and inter-label distribution.

    Args:
        X:
            Matrix containing the data which have to be sampled.
        y:
            Corresponding label for each sample in X.
        labels:
            Labels of each sample.
        neighbors:
            An array that contains all neighboring pairs. Each row is
            a unique neighboring pair.

    Returns:
        distributions:
            A tuple with the two distributions.
    """
    self.fit(X, y, labels, neighbors)
    return self.intra_distribution_, self.inter_distribution_