Source code for bluebell

import numpy as np

# If you're going to edit the code, some linear algebra is useful.
# Without loss of generality, an ellipse centred at zero is defined in
# "mean-covariance" form by
#
#     x'C¯¹x = 1
#
# C¯¹ must be positive definite, so we can factorise it by
#
#     C¯¹ = UΣ²U' = (UΣ)(ΣU')
#
# using e.g. the SVD.  If we then define
#
#     B = ΣU'
#
# our ellipsoid becomes
#
#     x'C¯¹x = (Bx)'(Bx)
#
# so B maps point x on an ellipsoid to y=Bx on a sphere.
# Specifically, V' rotates the principal axes to the co-ordinate axes
# and Σ rescales them to the unit vectors.  The reverse transformation,
# B¯¹ = (ΣU')¯¹ = UΣ¯¹ maps a point on a sphere to an ellipse.
#
# The SVD and eigenvalue decomposition both factorise C¯¹.  SVD gives
# USV' with S in *decreasing* order. `eigh` gives L, Q such that QLQ¯¹
# with L in *increasing* order.  So L = S[::-1] and, up to a sign, Q =
# U[:,::-1].

[docs] def MVBE(x, tol=1e-3, maxiter=1000): """Return the mean and covariance matrix that define the minimum-volume ellipsoid that bounds the points in ``x`` using algorithm 3.2 of `Moshtagh (2005)`_. .. _Moshtagh (2005): http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.116.7691 Parameters ---------- x: 2-d NumPy array of shape (N, D) tol: float, optional Tolerance parameter for precision of bounding ellipsoid (default=1e-3). maxiter: int, optional Maximum number of iterations to perform (default=1000). Returns ------- mu: 1-d NumPy array of shape (D,) Mean (or center) of the ellipsoid. C: 2-d NumPy array of shape (D,D) Covariance matrix that defines the ellipsoid. Example ------- .. plot:: :include-source: import matplotlib.pyplot as pl import bluebell as bb import bluebell.plot as bbplot x = np.random.rand(10,2) mu, C = bb.MVBE(x) pl.plot(*x.T, 'o') bbplot.cov_ellipse(mu, C, fc='none', ec='k') """ N, D = x.shape I = np.eye(N) Q = np.column_stack((x, np.ones(N))).T u = np.ones(N)/N err = np.inf i = 0 while err > tol and i < maxiter: V = (Q*u).dot(Q.T) Vinv = np.linalg.inv(V) # g = np.array([Qi.dot(Vinv).dot(Qi) for Qi in Q.T]) # slow but definitely correct g = np.sum(Q*(Vinv.dot(Q)), axis=0) # fast j = np.argmax(g) du = I[j]-u alpha = (g[j]-(D+1))/(D+1)/(g[j]-1) err = np.linalg.norm(alpha*du) u = u + alpha*du i += 1 mu = u.dot(x) C = (x.T.dot(np.diag(u)).dot(x)-np.outer(mu, mu))*D return mu, C
[docs] def select_by_relative_chi2(chi2, sigma_inner=1e-7, sigma_outer=np.inf): """Returns a logical array that is ``True`` where ``chi2`` relative to its minimum is within the range defined by ``sigma_inner`` and ``sigma_outer``. i.e. where ``sigma_inner**2 < chi2-np.min(chi2) < sigma_outer**2``.""" relative_chi2 = chi2 - np.nanmin(chi2) return (sigma_inner**2 < relative_chi2) & (relative_chi2 < sigma_outer**2)
[docs] def estimate(x, chi2, sigma_inner=1e-7, sigma_outer=np.inf, tol=1e-3, maxiter=1000): """Estimate uncertainties for a sample of points given their correspond values of chi-squared. Parameters ---------- x: 2-d NumPy array of shape (N, D) Points at which chi-squared has been evaluated. chi2: 1-d NumPy array of shape (N,) Values of chi-squared for the points in ``x``. sigma_inner: float, optional Only use the points that are more than this many sigma from the minimum. sigma_outer: float, optional Only use the points that are less than this many sigma from the minimum. tol: float, optional Tolerance parameter for finding minimum-volume bounding ellipsoid. maxiter: int, optional Maximum number of iterations to use when finding minimum-volume bounding ellipsoid. Returns ------- mu: 1-d NumPy array of shape (D,) Mean (or center) of the minimum-volume bounding ellipsoid around the rescaled points in ``x``. C: 2-d NumPy array of shape (D,D) Covariance matrix that defines the ellipsoid. """ x0 = x[np.nanargmin(chi2)] I = np.where(select_by_relative_chi2(chi2, sigma_inner=sigma_inner, sigma_outer=sigma_outer))[0] z = x0 + (x[I]-x0)/np.sqrt(chi2[I,None]-np.nanmin(chi2)) return MVBE(z[np.all(np.isfinite(z), axis=1)], tol=tol, maxiter=maxiter)
[docs] def propagate(x, y, mu, C, weights=None): """Propagate the uncertainties on ``x`` to other dependent variables ``y``. Returns full mean and uncertainty so that the correlations between main parameters and dependent variables are given. Parameters ---------- x: 2-d NumPy array Points at which function has been evaluated. y: 2-d NumPy array Values of function at points in ``x``. mu: 1-d NumPy array of shape (D,) Mean (or center) of the ellipsoid that characterizes the ``chi2`` values of the points in ``x``. C: 2-d NumPy array of shape (D,D) Covariance matrix of the ellipsoid. weights: 1-d NumPy array, optional Relative weights for the points in the fit. Returns ------- mu: 1-d NumPy array of shape (D,) Mean (or center) of both the underlying parameters ``x`` and the dependent variables ``y``. C: 2-d NumPy array of shape (D,D) Covariance matrix of both the underlying parameters ``x`` and the dependent variables ``y``. """ Q, B = linearize(x, y, weights=weights) A = np.vstack((np.eye(len(x[0])), B)) return np.hstack((mu, Q + B.dot(mu))), (A.dot(C)).dot(A.T)
[docs] def linearize(x, y, weights=None): """Given a set of points ``x`` and the values ``y`` of some (vector) function evaluated at those points, find the parameters of the best-fitting affine map between ``x`` and ``y``. i.e. returns arrays ``Q`` and ``B`` such that ``y == Q + x.dot(B.T)``. Parameters ---------- x: 2-d NumPy array of shape (N, D) Points at which function has been evaluated. y: 2-d NumPy array of shape (N, K) Values of function at points in ``x``. weights: 1-d NumPy array, optional Relative weights for the points in the fit. Returns ------- Q: 1-d NumPy array of shape (K,) Constant offset of affine map from ``x`` to ``y``. B: 2-d NumPy array of shape (K, D) Matrix transformation of affine map from ``x`` to ``y``. """ if weights is None: weights = np.ones(len(x)) x1 = np.hstack((np.ones((len(x),1)), x)) B = np.linalg.lstsq(x1*weights[:,None], y*weights[:,None], rcond=-1)[0] Q = B[0] B = B[1:].T return Q, B
[docs] def linear_optimum(x, y, obs, err, weights=None, rcond=None): """Given a set of points ``x``, the values ``y`` of some (vector) function evaluated at those points, observed values of the function ``obs`` and uncertainties on those observations ``err``, finds the best-fitting parameters an affine model of ``y`` as a function of ``x``. Parameters ---------- x: 2-d NumPy array of shape (N, D) Points at which function has been evaluated. y: 2-d NumPy array of shape (N, K) Values of function at points in ``x``. obs: 1-d NumPy array of shape (D,) Observed values. err: 1-d NumPy array of shape (D,) Uncertainties on observed values. weights: 1-d NumPy array, optional Relative weights for the points in the fit. rcond: float ``rcond`` parameter passed to ``numpy.linalg.lstsq``. """ Q, B = linearize(x, y, weights=weights) Bp = B/err[:,None] yp = (obs-Q)/err return np.linalg.lstsq(Bp, yp, rcond=rcond)[0]
[docs] def uniform_on_unit_sphere(N, D): """Draw ``N`` points uniformly distributed on the surface of a ``D``-dimensional unit sphere. Example ------- .. plot:: :include-source: import matplotlib.pyplot as pl import bluebell as bb import bluebell.plot as bbplot x = bb.uniform_on_unit_sphere(10, 2) pl.plot(*x.T, 'o') bbplot.cov_ellipse(np.zeros(2), np.eye(2), fc='none', ec='k') """ x = np.random.randn(N, D) x = x/np.sqrt(np.sum(x**2, axis=1)).reshape((-1,1)) return x
def sphere_to_ellipsoid(x, mu, C): U, S, VT = np.linalg.svd(np.linalg.inv(C)) return mu + x.dot(np.diag(np.sqrt(1./S))).dot(VT) # (VΣ¯¹x)' = x'Σ¯¹V'
[docs] def uniform_on_ellipsoid(mu, C, N): """Draw ``N`` points uniformly distributed on the surface of the ellipsoid define by mean ``mu`` and covariance ``C``. Example ------- .. plot:: :include-source: import matplotlib.pyplot as pl import bluebell as bb import bluebell.plot as bbplot mu = np.random.rand(2) A = np.random.rand(2,2) C = A.T.dot(A) x = bb.uniform_on_ellipsoid(mu, C, 10) pl.plot(*x.T, 'o') bbplot.cov_ellipse(mu, C, fc='none', ec='k') """ # https://math.stackexchange.com/a/982833 # μ² = (dS'/dS)² = (abc…)²((x/a)²+(y/b)²+(z/c)²+…), # where (x,y,z,…) are points on sphere # μ²_max = (abc…)²(1/a)² = (bc…)² assuming a is shortest axis # μ²/μ²_max = ((x/a)²+(y/b)²+(z/c)²+…)/(1/a²) def iterate(): x = uniform_on_unit_sphere(10000, len(mu)) area = np.sum(x**2*S, axis=1)**0.5 return sphere_to_ellipsoid(x[area/area_max > np.random.rand(len(x))], mu, C) U, S, VT = np.linalg.svd(np.linalg.inv(C)) area_max = S[0]**0.5 y_all = iterate() while len(y_all) < N: y_all = np.vstack((y_all, iterate())) return y_all[:N]
[docs] def uniform_in_ellipsoid(mu, C, N): """Draw ``N`` points uniformly distributed inside the ellipsoid define by mean ``mu`` and covariance ``C``. Example ------- .. plot:: :include-source: import matplotlib.pyplot as pl import bluebell as bb import bluebell.plot as bbplot mu = np.random.rand(2) A = np.random.rand(2,2) C = A.T.dot(A) x = bb.uniform_in_ellipsoid(mu, C, 100) pl.plot(*x.T, 'o') bbplot.cov_ellipse(mu, C, fc='none', ec='k') """ x = mu.reshape((1,-1)) std = np.sqrt(np.diag(C)) while len(x) < N+1: # x_n = mu - std + 2.*std*np.random.rand(len(mu)) # if is_in_ellipsoid(x_n, mu, C): x.append(x_n) x_n = (mu - std).reshape((1,-1)) + 2.*std.reshape((1,-1))*np.random.rand(1000, len(mu)) x = np.vstack((x, x_n[is_in_ellipsoid(x_n, mu, C)])) return np.array(x)[:N]
[docs] def vertices(mu, C): """Return the ``2*D`` points at the ends of each axis of the ellipsoid defined by mean ``mu`` and covariance ``C``. Example ------- .. plot:: :include-source: import matplotlib.pyplot as pl import bluebell as bb import bluebell.plot as bbplot mu = np.random.rand(2) A = np.random.rand(2,2) C = A.T.dot(A) x = bb.vertices(mu, C) pl.plot(*x.T, 'o') bbplot.cov_ellipse(mu, C, fc='none', ec='k') """ # should return VΣ¯¹[I,-I] # transpose is [I;-I]Σ¯¹V' D = len(C) U, S, VT = np.linalg.svd(C) # or S, VT = np.linalg.eigh(C) x = np.diag(np.sqrt(S)).dot(VT) return mu + np.vstack([x, -x])
[docs] def is_in_ellipsoid(x, mu, C): """Returns ``True`` if the point(s) in ``x`` are inside the ellipsoid defined by mean ``mu`` and covariance ``C``.""" return np.sum((x-mu).T*(np.linalg.inv(C).dot((x-mu).T)), axis=0) < 1
[docs] def is_in_simplex(x, s): """Returns ``True`` if the 1-d array ``x`` is in the simplex ``s``, where ``s`` is a 2-d array with dimensions ``(D+1,D)`` (each row is a point).""" D = len(s[0]) A = np.ones((D+1, D+1)) A[:-1] = s.T try: return np.all(np.linalg.solve(A, np.hstack([x, 1.])) >= 0) except np.linalg.LinAlgError: return False
[docs] def discard(x, iterations=1): """Drop points that don't help to define the bounding ellipsoid.""" D = len(x[0]) I = np.ones(len(x), dtype=bool) for iteration in range(iterations): s = np.random.permutation(x[I])[:D+1] # construct a random simplex for i, xi in enumerate(x): if not I[i]: continue elif np.any(np.all(xi == s, axis=1)): continue else: I[i] = not is_in_simplex(xi, s) # discard points inside that simplex if sum(I) == D+1: break return x[I]
[docs] def grid(N, lower, upper): """Create a regular grid with `N` points between `lower` and `upper`.""" D = len(upper) N = N*np.ones(D, dtype=int) grid = np.vstack(list(map(np.ravel, np.meshgrid(*[np.linspace(lower[i], upper[i], N[i]) for i in range(D)])))).T return grid