"""High-level Python runtime interface."""
import contextlib
import io
import sys
import warnings
import numpy as np
from ..model import minirocket_classifier
__all__ = [
"classify_train",
"classify_predict",
]
class _LoggerStream(io.TextIOBase):
"""Stream that forwards written lines to a logger callback."""
def __init__(self, logger, original_stdout):
self.logger = logger
self._original_stdout = original_stdout
self._buf = ""
def write(self, s):
self._buf += s
while "\n" in self._buf:
line, self._buf = self._buf.split("\n", 1)
if line.strip():
sys.stdout = self._original_stdout
try:
self.logger(line)
finally:
sys.stdout = self
return len(s)
def flush(self):
if self._buf.strip():
sys.stdout = self._original_stdout
try:
self.logger(self._buf.strip())
finally:
sys.stdout = self
self._buf = ""
[docs]
def classify_train(
profiles,
labels,
cv=5,
calibration="sigmoid",
normalize=True,
n_jobs=None,
random_state=0,
logger=lambda x: None,
n_splits=None,
):
"""Train classification model.
Parameters
----------
profiles : heavyedge.ProfileData
Open h5 file of profiles.
labels : np.ndarray
Label array. The order of labels should match the order of profiles.
cv : int, iterable, or cross-validation generator, default=5
Cross-validation strategy.
If an integer is passed, it is the number of folds for stratified k-fold CV.
calibration : {"sigmoid", "isotonic", "temperature", "sigmoid_ovo", "isotonic_ovo"}
Calibration method for the classifier.
normalize : bool, default=True
Whether to normalize profiles by area under curve.
Set this to False if *profiles* are already normalized.
n_jobs : int, default=None
Number of jobs to run in parallel
random_state : int, default=0
Random seed for reproducibility.
logger : callable, optional
Logger function which accepts a progress message string.
n_splits : int, optional
Number of splits for cross-validation.
If passed, overrides *cv*.
.. deprecated:: 1.4.0
The *n_splits* parameter is deprecated and will be removed in a future
version. Use *cv* instead.
Returns
-------
model
Trained model object.
Examples
--------
>>> from heavyedge import ProfileData
>>> from heavyedge_classify.samples import get_sample_path
>>> from heavyedge_classify.api import classify_train
>>> import numpy as np
>>> profiles = ProfileData(get_sample_path("Profiles.h5"))
>>> labels = np.load(get_sample_path("labels.npy"))
>>> classify_train(profiles, labels)
CalibratedClassifierCV(...)
"""
if n_splits is not None:
warnings.warn(
(
"n_splits is deprecated and will be removed in a future version. "
"Use cv instead."
),
DeprecationWarning,
stacklevel=2,
)
cv = n_splits
x = profiles.x()
X, _, _ = profiles[:]
if normalize:
X /= np.trapezoid(X, x, axis=1)[..., np.newaxis]
model = minirocket_classifier(
cv=cv,
calibration=calibration,
n_jobs=n_jobs,
verbose=True,
random_state=random_state,
)
with contextlib.redirect_stdout(_LoggerStream(logger, sys.stdout)):
model.fit(X, labels)
return model
[docs]
def classify_predict(
model, profiles, normalize=True, batch_size=None, logger=lambda x: None
):
"""Predict probabilistic labels of profiles using a trained model.
Parameters
----------
model
Trained model object.
profiles : heavyedge.ProfileData
Open h5 file of profiles.
normalize : bool, default=True
Whether to normalize profiles by area under curve.
Set this to False if *profiles* are already normalized.
batch_size : int, optional
Batch size to load data.
If not passed, all data are loaded at once.
logger : callable, optional
Logger function which accepts a progress message string.
Yields
------
predicted_labels : np.ndarray
Predicted probabilistic label array.
Examples
--------
>>> import pickle
>>> from heavyedge import ProfileData
>>> from heavyedge_classify.samples import get_sample_path
>>> from heavyedge_classify.api import classify_predict
>>> with open(get_sample_path("model.pkl"), "rb") as f:
... model = pickle.load(f)
>>> profiles = ProfileData(get_sample_path("Profiles.h5"))
>>> [lab.shape for lab in classify_predict(model, profiles, batch_size=50)]
[(50, 3), (25, 3)]
"""
x = profiles.x()
N, _ = profiles.shape()
if batch_size is None:
X, _, _ = profiles[:]
if normalize:
X /= np.trapezoid(X, x, axis=1)[..., np.newaxis]
yield model.predict_proba(X)
logger(f"{N}/{N}")
else:
for i in range(0, N, batch_size):
X, _, _ = profiles[i : i + batch_size]
if normalize:
X /= np.trapezoid(X, x, axis=1)[..., np.newaxis]
yield model.predict_proba(X)
logger(f"{min(i + batch_size, N)}/{N}")