Parameter Estimation in Probabilistic Model
Table of Contents
Reconstructing the probability density function from a set of given data samples $y_1, y_2, \cdots, y_m$
Want to recover the underlying probability density function generating our dataset
$$P \left(y_1,y_2,\cdots,y_m \,;\, \mu,\sigma^2 \right) = \prod_{i=1}^{m} P \left(y_i \,;\, \mu,\sigma^2 \right)$$
Method of estimating the parameters of a probability distribution by maximizing a likelihood function
Under the assumed statistical model the observed data is most probable
$$\mathcal{L} = P(D \mid \theta) = P(D \,;\, \theta)$$
$$\theta_{MLE} = \underset{\theta}{\mathrm{argmax}}\;\; P(D \,;\, \theta)$$
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
%matplotlib inline
# MLE of Gaussian distribution
# mu
m = 20
mu = 0
sigma = 5
x = np.random.normal(mu,sigma,[m,1])
xp = np.linspace(-20, 20, 100)
y0 = np.zeros([m, 1])
muhat = [-5, 0, 5, np.mean(x)]
plt.figure(figsize=(8, 8))
for i in range(4):
yp = norm.pdf(xp, muhat[i], sigma)
y = norm.pdf(x, muhat[i], sigma)
logL = np.sum(np.log(y))
plt.subplot(4, 1, i+1)
plt.plot(xp, yp, 'r')
plt.plot(x, y, 'bo')
plt.plot(np.hstack([x, x]).T, np.hstack([y, y0]).T, 'k--')
plt.title(r'$\hat\mu$ = {0:.2f}'.format(muhat[i]), fontsize=15)
plt.text(-15,0.06,np.round(logL,4),fontsize=15)
plt.axis([-20, 20, 0, 0.11])
plt.tight_layout()
plt.show()
Compare to a result from formula
$$\mu_{MLE}=\frac{1}{m}\sum_{i = 1}^{m}x_i$$# mean is unknown in this example
# variance is known in this example
m = 10
mu = 0
sigma = 5
x = np.random.normal(mu,sigma,[m,1])
mus = np.arange(-10, 10.5, 0.5)
LOGL = []
for i in range(np.size(mus)):
y = norm.pdf(x, mus[i], sigma)
logL = np.sum(np.log(y))
LOGL.append(logL)
muhat = np.mean(x)
print(muhat)
plt.figure(figsize=(10, 6))
plt.plot(mus, LOGL, '.')
plt.title('$log (\prod \mathcal{N}(x \mid \mu , \sigma^2))$', fontsize=20)
plt.xlabel(r'$\hat \mu$', fontsize=15)
plt.grid(alpha=0.3)
plt.show()
# mean is known in this example
# variance is unknown in this example
m = 100
mu = 0
sigma = 3
x = np.random.normal(mu,sigma,[m,1]) # samples
sigmas = np.arange(1, 10, 0.1)
LOGL = []
for i in range(sigmas.shape[0]):
y = norm.pdf(x, mu, sigmas[i]) # likelihood
logL = np.sum(np.log(y))
LOGL.append(logL)
sigmahat = np.sqrt(np.var(x))
print(sigmahat)
plt.figure(figsize=(10,6))
plt.title(r'$\log (\prod \mathcal{N} (x|\mu,\sigma^2))$',fontsize=20)
plt.plot(sigmas, LOGL, '.')
plt.xlabel(r'$\hat \sigma$', fontsize=15)
plt.axis([0, np.max(sigmas), np.min(LOGL), -200])
plt.grid(alpha=0.3)
plt.show()
$$\begin{align*}P\left(y \mid x\right) & \sim \mathcal{N}\left(Cx,R\right)\\
&= \frac{1}{\sqrt{\left(2\pi\right)^2\vert R \vert}}\exp\left(-\frac{1}{2}\left(y-Cx\right)^TR^{-1}\left(y-Cx\right)\right)\end{align*}$$
$$
\begin{align*}
\left(y-Cx\right)^TR^{-1}\left(y-Cx\right) &= y^TR^{-1}y-y^TR^{-1}Cx-x^TC^TR^{-1}y+x^TC^TR^{-1}Cx \\ \\
\implies \frac{d\ell}{dx} & =0=-2C^TR^{-1}y + 2C^TR^{-1}Cx\\
\therefore \;\; \hat{x} &=\left(C^TR^{-1}C\right)^{-1}C^TR^{-1}y
\end{align*}
$$
Example of two rulers
x = 5 # true state (length in this example)
a = 1 # sigma of a
b = 2 # sigma of b
YA = []
YB = []
XML = []
for i in range(2000):
ya = x + np.random.normal(0,a)
yb = x + np.random.normal(0,b)
xml = (1/a**2*ya + 1/b**2*yb)/(1/a**2+1/b**2)
YA.append(ya)
YB.append(yb)
XML.append(xml)
plt.figure(figsize=(8, 6))
plt.subplot(3, 1, 1), plt.hist(YA, 41, density=True), plt.axis([0, 10, 0, 0.5]),
plt.title(r'$y_a$', fontsize=20), plt.grid(alpha=0.3)
plt.subplot(3, 1, 2), plt.hist(YB, 41, density=True), plt.axis([0, 10, 0, 0.5]),
plt.title(r'$y_b$', fontsize=20), plt.grid(alpha=0.3)
plt.subplot(3, 1, 3), plt.hist(XML, 41, density=True), plt.axis([0, 10, 0, 0.5]),
plt.title(r'$x_{ML}$', fontsize=20), plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
Example of two GPSs
x = np.array([5, 10]).reshape(-1, 1) # true position
mu = np.array([0, 0])
Ra = np.matrix([[9, 1],
[1, 1]])
Rb = np.matrix([[1, 1],
[1, 9]])
YA = []
YB = []
XML = []
for i in range(1000):
ya = x + np.random.multivariate_normal(mu, Ra).reshape(-1, 1)
yb = x + np.random.multivariate_normal(mu, Rb).reshape(-1, 1)
xml = (Ra.I+Rb.I).I*(Ra.I*ya+Rb.I*yb)
YA.append(ya.T)
YB.append(yb.T)
XML.append(xml.T)
YA = np.vstack(YA)
YB = np.vstack(YB)
XML = np.vstack(XML)
plt.figure(figsize=(10, 6))
plt.title('Data Fusion', fontsize=15)
plt.plot(YA[:,0], YA[:,1], 'b.', label='Observation 1')
plt.plot(YB[:,0], YB[:,1], 'r.', label='Observation 2')
plt.plot(XML[:,0], XML[:,1], 'k.', label='MLE')
plt.axis('equal')
plt.grid(alpha=0.3)
plt.legend(fontsize=15)
plt.show()