Module TeachMyAgent.students.openai_baselines.common.math_util
Expand source code
import numpy as np
import scipy.signal
def discount(x, gamma):
"""
computes discounted sums along 0th dimension of x.
inputs
------
x: ndarray
gamma: float
outputs
-------
y: ndarray with same shape as x, satisfying
y[t] = x[t] + gamma*x[t+1] + gamma^2*x[t+2] + ... + gamma^k x[t+k],
where k = len(x) - t - 1
"""
assert x.ndim >= 1
return scipy.signal.lfilter([1],[1,-gamma],x[::-1], axis=0)[::-1]
def explained_variance(ypred,y):
"""
Computes fraction of variance that ypred explains about y.
Returns 1 - Var[y-ypred] / Var[y]
interpretation:
ev=0 => might as well have predicted zero
ev=1 => perfect prediction
ev<0 => worse than just predicting zero
"""
assert y.ndim == 1 and ypred.ndim == 1
vary = np.var(y)
return np.nan if vary==0 else 1 - np.var(y-ypred)/vary
def explained_variance_2d(ypred, y):
assert y.ndim == 2 and ypred.ndim == 2
vary = np.var(y, axis=0)
out = 1 - np.var(y-ypred)/vary
out[vary < 1e-10] = 0
return out
def ncc(ypred, y):
return np.corrcoef(ypred, y)[1,0]
def flatten_arrays(arrs):
return np.concatenate([arr.flat for arr in arrs])
def unflatten_vector(vec, shapes):
i=0
arrs = []
for shape in shapes:
size = np.prod(shape)
arr = vec[i:i+size].reshape(shape)
arrs.append(arr)
i += size
return arrs
def discount_with_boundaries(X, New, gamma):
"""
X: 2d array of floats, time x features
New: 2d array of bools, indicating when a new episode has started
"""
Y = np.zeros_like(X)
T = X.shape[0]
Y[T-1] = X[T-1]
for t in range(T-2, -1, -1):
Y[t] = X[t] + gamma * Y[t+1] * (1 - New[t+1])
return Y
def test_discount_with_boundaries():
gamma=0.9
x = np.array([1.0, 2.0, 3.0, 4.0], 'float32')
starts = [1.0, 0.0, 0.0, 1.0]
y = discount_with_boundaries(x, starts, gamma)
assert np.allclose(y, [
1 + gamma * 2 + gamma**2 * 3,
2 + gamma * 3,
3,
4
])
Functions
def discount(x, gamma)
-
computes discounted sums along 0th dimension of x.
Inputs
x: ndarray gamma: float
Outputs
y: ndarray with same shape as x, satisfying
y[t] = x[t] + gamma*x[t+1] + gamma^2*x[t+2] + ... + gamma^k x[t+k], where k = len(x) - t - 1
Expand source code
def discount(x, gamma): """ computes discounted sums along 0th dimension of x. inputs ------ x: ndarray gamma: float outputs ------- y: ndarray with same shape as x, satisfying y[t] = x[t] + gamma*x[t+1] + gamma^2*x[t+2] + ... + gamma^k x[t+k], where k = len(x) - t - 1 """ assert x.ndim >= 1 return scipy.signal.lfilter([1],[1,-gamma],x[::-1], axis=0)[::-1]
def discount_with_boundaries(X, New, gamma)
-
X: 2d array of floats, time x features New: 2d array of bools, indicating when a new episode has started
Expand source code
def discount_with_boundaries(X, New, gamma): """ X: 2d array of floats, time x features New: 2d array of bools, indicating when a new episode has started """ Y = np.zeros_like(X) T = X.shape[0] Y[T-1] = X[T-1] for t in range(T-2, -1, -1): Y[t] = X[t] + gamma * Y[t+1] * (1 - New[t+1]) return Y
def explained_variance(ypred, y)
-
Computes fraction of variance that ypred explains about y. Returns 1 - Var[y-ypred] / Var[y]
interpretation: ev=0 => might as well have predicted zero ev=1 => perfect prediction ev<0 => worse than just predicting zero
Expand source code
def explained_variance(ypred,y): """ Computes fraction of variance that ypred explains about y. Returns 1 - Var[y-ypred] / Var[y] interpretation: ev=0 => might as well have predicted zero ev=1 => perfect prediction ev<0 => worse than just predicting zero """ assert y.ndim == 1 and ypred.ndim == 1 vary = np.var(y) return np.nan if vary==0 else 1 - np.var(y-ypred)/vary
def explained_variance_2d(ypred, y)
-
Expand source code
def explained_variance_2d(ypred, y): assert y.ndim == 2 and ypred.ndim == 2 vary = np.var(y, axis=0) out = 1 - np.var(y-ypred)/vary out[vary < 1e-10] = 0 return out
def flatten_arrays(arrs)
-
Expand source code
def flatten_arrays(arrs): return np.concatenate([arr.flat for arr in arrs])
def ncc(ypred, y)
-
Expand source code
def ncc(ypred, y): return np.corrcoef(ypred, y)[1,0]
def test_discount_with_boundaries()
-
Expand source code
def test_discount_with_boundaries(): gamma=0.9 x = np.array([1.0, 2.0, 3.0, 4.0], 'float32') starts = [1.0, 0.0, 0.0, 1.0] y = discount_with_boundaries(x, starts, gamma) assert np.allclose(y, [ 1 + gamma * 2 + gamma**2 * 3, 2 + gamma * 3, 3, 4 ])
def unflatten_vector(vec, shapes)
-
Expand source code
def unflatten_vector(vec, shapes): i=0 arrs = [] for shape in shapes: size = np.prod(shape) arr = vec[i:i+size].reshape(shape) arrs.append(arr) i += size return arrs