Source code for flexible_linear

# Author:  Markus Kliegl
# License: MIT
"""Regularized linear regression with custom training and regularization costs.

:class:`FlexibleLinearRegression` is a scikit-learn-compatible linear
regression estimator that allows specification of arbitrary
training and regularization cost functions.

For a linear model:

.. math::

   \\textrm{predictions} = X \\cdot W

this model attempts to find :math:`W` by minimizing:

.. math::

   \\min_{W} \\left\\{
       \\textrm{cost}(X \cdot W - y) + C \\cdot \\textrm{reg_cost}(W)
   \\right\\}

for given training data :math:`X, y`. Here :math:`C` is the
regularization strength and :math:`\\textrm{cost}` and
:math:`\\textrm{reg_cost}` are customizable cost functions
(e.g., the squared :math:`\\ell^2` norm or the :math:`\\ell^1` norm).

*Note:* In reality, we fit an intercept (bias coefficient) as well.
Think of :math:`X` in the above as having an extra column of 1's.

Ideally, the cost functions should be convex and continuously
differentiable.

We provide some cost functions: see :func:`l1_cost_func`,
:func:`l2_cost_func`, :func:`japanese_cost_func` (or the
:data:`cost_func_dict` dictionary). If you want to use a
custom cost function, it should be of the form::

    def custom_cost_func(z, **opts):
        # <code to compute cost and gradient>
        return cost, gradient

where `cost` is a float, `gradient` is an array of the same
dimensions as `z`, and you may specify any number of keyword
arguments.
"""
from __future__ import division
import numpy as np
import scipy.optimize
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted


[docs]def l2_cost_func(z): """Normalized squared :math:`\\ell^2` cost and gradient .. math:: \\mathrm{cost}(z) = \\frac{1}{2n} ||z||_{\\ell^2}^2 = \\frac{1}{2n} \sum_{i=1}^n |z_i|^2 \,. Args: z (ndarray): Input vector. Returns: Tuple[float, ndarray]: The cost and gradient (same shape as `z`). """ N = len(z) return 0.5 * np.dot(z, z) / N, z / N
[docs]def l1_cost_func(z): """Normalized :math:`\\ell^1` cost and gradient .. math:: \\mathrm{cost}(z) = \\frac{1}{n} ||z||_{\\ell^1} = \\frac{1}{n} \\sum_{i=1}^n |z_i| \,. .. note:: This cost is not differentiable. For a smooth alternative, see :func:`japanese_cost_func`. Args: z (ndarray): Input vector. Returns: Tuple[float, ndarray]: The cost and gradient (same shape as `z`). """ N = len(z) return np.sum(np.abs(z)) / N, np.sign(z) / N
[docs]def japanese_cost_func(z, eta=0.1): """'Japanese bracket' cost and gradient Computes cost and gradient for the cost function: .. math:: \\mathrm{cost}(z) = \\frac{\\eta^2}{n} \\sum_{i=1}^n \\left( \\sqrt{ 1 + \\left( \\frac{z_i}{\\eta} \\right)^2 } - 1 \\right) \,. This cost function interpolates componentwise between the squared :math:`\\ell^2` norm (for :math:`|z_i| \ll \\eta`) and the :math:`\\ell^1` norm (for :math:`|z_i| \gg \\eta`) and is thus useful for reducing the impact of outliers (or when dealing with heavy-tailed rather than Gaussian noise). Unlike the :math:`\\ell^1` norm, this cost function is smooth. The key to understanding this is that the *Japanese bracket* .. math:: \\langle z \\rangle := \\sqrt{ 1 + |z|^2 } satisfies these asymptotics: .. math:: \\sqrt{ 1 + |z|^2 } - 1 \\approx \\begin{cases} \\frac12 |z|^2 & \\text{for $|z| \\ll 1$} \\\\ |z| & \\text{for $|z| \\gg 1$} \\end{cases} \,. Args: z (ndarray): Input vector. eta (Optional[float]): Positive scale parameter. Returns: Tuple[float, ndarray]: The cost and gradient (same shape as `z`). """ N = len(z) z_norm = z / eta z_jap = np.sqrt(1.0 + z_norm * z_norm) # componentwise Japanese bracket cost = eta**2 * np.sum(z_jap - 1.0) gradient = z / z_jap return cost / N, gradient / N
cost_func_dict = { 'l2': l2_cost_func, 'l1': l1_cost_func, 'japanese': japanese_cost_func, } """Dictionary of implemented cost functions."""
[docs]class FitError(Exception): """Exception raised when fitting fails. Attributes: message (str): Error message. res (scipy.optimize.OptimizeResult): Results returned by `scipy.optimize.minimize`. See SciPy documentation on `OptimizeResult`_ for details. .. _OptimizeResult: http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html # NOQA """ def __init__(self, message, res): self.message = message self.res = res
[docs]class FlexibleLinearRegression(BaseEstimator): """Regularized linear regression with custom training/regularization costs. Args: C (Optional[float]): Nonnegative regularization coefficient. (Zero means no regularization.) cost_func (Optional[callable or str]): Training cost function. If not callable, should be one of 'l1', 'l2', or 'japanese'. cost_opts (Optional[dict]): Parameters to pass to `cost_func`. reg_cost_func (Optional[callable or str]): Regularization cost function. If not callable, should be one of 'l1', 'l2', or 'japanese'. reg_cost_opts (Optional[dict]): Parameters to pass to `reg_cost_func`. Attributes: coef_ (ndarray): Weight matrix of shape (n_features+1,). (coef_[0] is the intercept coefficient.) """ def __init__( self, C=1.0, cost_func='l2', cost_opts=None, reg_cost_func='l2', reg_cost_opts=None): self.C = C self.cost_func = cost_func self.cost_opts = cost_opts self.reg_cost_func = reg_cost_func self.reg_cost_opts = reg_cost_opts def _check_cost_func(self, cost_func, cost_opts): if not callable(cost_func): try: cost_func = cost_func_dict[cost_func] except KeyError: raise ValueError( "Unknown cost function: '{}'".format(cost_func)) if cost_opts is None: cost_opts = {} return cost_func, cost_opts
[docs] def fit(self, X, y): """Fit the model. Args: X (ndarray): Training data of shape ``(n_samples, n_features)``. y (ndarray): Target values of shape ``(n_samples,)``. Returns: self Raises: FitError: If the fitting failed. """ X, y = check_X_y(X, y, y_numeric=True) C = self.C cost_func, cost_opts = self._check_cost_func( self.cost_func, self.cost_opts) reg_cost_func, reg_cost_opts = self._check_cost_func( self.reg_cost_func, self.reg_cost_opts) # add a column of ones to X (for intercept coefficient) X = np.hstack((np.ones((X.shape[0], 1), dtype=float), X)) def objective(W): # compute training cost/grad cost, outer_grad = cost_func(np.dot(X, W) - y, **cost_opts) grad = np.dot(outer_grad, X) # chain rule # add regularization cost/grad (but don't regularize intercept) reg_cost, reg_grad = reg_cost_func(W[1:], **reg_cost_opts) cost += C * reg_cost grad[1:] += C * reg_grad return cost, grad initial_coef_ = np.zeros(X.shape[1]) res = scipy.optimize.minimize( objective, initial_coef_, jac=True, method='L-BFGS-B') if res.success: self.coef_ = res.x else: raise FitError("Fit failed: {}".format(res.message), res=res) return self
[docs] def predict(self, X): """Predict using the model. Args: X (ndarray): Data of shape ``(n_samples, n_features)``. Returns: y (ndarray): Predicted values of shape ``(n_samples,)``. """ check_is_fitted(self, 'coef_') X = check_array(X) n_features = self.coef_.shape[0] - 1 if X.shape[1] != n_features: raise ValueError( "X should have %d features, not %d" % (n_features, X.shape[1])) y = np.dot(X, self.coef_[1:]) + self.coef_[0] return y
[docs]def test_estimator(): from sklearn.utils.estimator_checks import check_estimator check_estimator(FlexibleLinearRegression)
[docs]def test_gradients(): z0 = np.random.randn(5) for cost_func in cost_func_dict.values(): func = lambda z: cost_func(z)[0] grad = lambda z: cost_func(z)[1] assert scipy.optimize.check_grad(func, grad, z0) < 1e-5
if __name__ == '__main__': import nose2 nose2.main()