from . import som
from collections import defaultdict, Counter
from operator import itemgetter
import numpy as np
[docs]class SomClassifier:
[docs] def __init__(self, cls=som.BSom, height=10, width=10, dim=None, **kwargs):
"""\
:param cls: SOM constructor
:type cls: :class:`Som` or :class:`BSom`
:param height: SOM height (default 10)
:type height: int
:param width: SOM width (default 10)
:type width: int
:param dim: SOM nodes nb of dimensions (if None, detected when :class:`fit` called)
:type dim: int
:param \**kwargs: named parameters for the constructor
"""
# take any positional arguments to named
kwargs['h'] = height
kwargs['w'] = width
if dim is not None:
kwargs['d'] = dim
self._cls = cls # the network constructor
self._kwargs = kwargs # named params for constructor
self._som = None # the SOM network
self._bmus = None # bmus of the training data
self.classifier = None # used for network calibration
self.quant_error = None
self.topog_error = None
[docs] def setup(self, **kwargs):
"""\
Add / Change SOM constructor parameters.
"""
assert self._som is None, 'cannot setup params after SOM instanciation'
self._kwargs.update(kwargs)
[docs] def params(self):
"""\
Get the SOM constructor parameters.
"""
return self._kwargs
@property
def som(self):
# lazy constructor
if self._som is None:
self._som = self._cls(**self._kwargs)
return self._som
[docs] def fit(self, data, labels, **kwargs):
"""\
Training the SOM on the the data and calibrate itself.
After the training, `self.quant_error` and `self.topog_error` are
respectively set.
:param data: sparse input matrix (ideal dtype is `numpy.float32`)
:type data: :class:`scipy.sparse.csr_matrix`
:param labels: the labels associated with data
:type labels: iterable
:param \**kwargs: optional parameters for :meth:`train`
"""
# set vectors dimensions if they're missing
if 'd' not in self._kwargs:
self._kwargs['d'] = data.shape[1]
assert self.som.dim == data.shape[1], 'dimension mismatch'
# train the network
self.som.train(data, **kwargs)
# retrieve first and second bmus and distances
bmus = self.som.bmus(data, True, True)
# set errors measures of training data
self.quant_error = self.som.quantization_error
self.topog_error = self.som.topographic_error
# store training bmus
self._bmus = bmus
# calibrate
self._calibrate(data, labels)
def _calibrate(self, data, labels):
"""\
Calibrate the network using `self._bmus`.
"""
# network calibration
classifier = defaultdict(Counter)
for (i, j), label in zip(self._bmus, labels):
classifier[i, j][label] += 1
self.classifier = {}
for ij, cnt in classifier.items():
maxi = max(cnt.items(), key=itemgetter(1))
nb = sum(cnt.values())
self.classifier[ij] = maxi[0], maxi[1] / nb
def _predict_from_bmus(self, bmus, unkown):
lst = []
for i, j in bmus:
cls = self.classifier.get((i, j))
if cls is None:
lst.append(unkown)
else:
lbl, p = cls
lst.append(lbl)
return np.array(lst)
[docs] def predict(self, data, unkown=None):
"""\
Classify data according to previous calibration.
:param data: sparse input matrix (ideal dtype is `numpy.float32`)
:type data: :class:`scipy.sparse.csr_matrix`
:param unkown: the label to attribute if no label is known
:returns: the labels guessed for data
:rtype: `numpy.array`
"""
assert self.classifier is not None, 'not calibrated'
bmus = self.som.bmus(data)
return self._predict_from_bmus(bmus, unkown)
[docs] def fit_predict(self, data, labels, unkown=None):
"""\
Fit and classify data efficiently.
:param data: sparse input matrix (ideal dtype is `numpy.float32`)
:type data: :class:`scipy.sparse.csr_matrix`
:param labels: the labels associated with data
:type labels: iterable
:param unkown: the label to attribute if no label is known
:returns: the labels guessed for data
:rtype: `numpy.array`
"""
self.fit(data, labels)
return self._predict_from_bmus(self._bmus, unkown)
[docs] def get_precision(self):
"""\
:returns: the ratio part of the dominant label for each unit.
:rtype: 2D :class:`numpy.ndarray`
"""
assert self.classifier is not None, 'not calibrated'
arr = np.zeros((self.som.nrows, self.som.ncols))
for ij, (lbl, p) in self.classifier.items():
arr[ij] = p
return arr
[docs] def histogram(self, bmus=None):
"""\
Return a 2D histogram of bmus.
:param bmus: the best-match units indexes for underlying data.
:type bmus: :class:`numpy.ndarray`
:returns: the computed 2D histogram of bmus.
:rtype: :class:`numpy.ndarray`
"""
if bmus is None:
assert self._bmus is not None, 'not trained'
bmus = self._bmus
arr = np.zeros((self.som.nrows, self.som.ncols))
for i, j in bmus:
arr[i, j] += 1
return arr
def __repr__(self):
return '<SomClassifier: ' + ', '.join(map('%s=%s'.__mod__, self._kwargs.items())) + '>'