Physics-informed Neural Networks (PINN)
Table of Contents
Why do data-driven āblack-boxā methods fail?
Physics-Informed Neural Networks (PINNs)
$$
\frac{\partial u}{\partial t} - \lambda \frac{\partial^2 u}{\partial x^2} = 0
$$
$$
\begin{align*}
\mathcal{L} &= \omega_{\text{data}} \mathcal{L}_{\text{data}} + \omega_{\text{PDE}} \mathcal{L}_{\text{PDE}}\\\\\\
\mathcal{L}_{\text{data}} &= \frac{1}{N_{\text{data}}} \sum \left(u(x,t) - u \right)^2 \\
\mathcal{L}_{\text{PDE}} &= \frac{1}{N_{\text{PDE}}} \sum \left(\frac{\partial u}{\partial t} - \lambda \frac{\partial^2 u}{\partial x^2} \right)^2
\end{align*}
$$
This is a result first due to Lagaris et.al from 1998. The idea is to solve differential equations using neural networks by representing the solution by a neural network and training the resulting network to satisfy the conditions required by the differential equation.
Consider a system of ordinary differential equations
$$ {u^\prime} = f(u,t) $$with $t \in [0, 1]$ and a known initial condition $u(0) = u_0$.
To solve this, we approximate the solution by a neural network:
$$ {\text{NN}(t)} \approx {u(t)} $$If $\text{NN}(t)$ was the true solution, then it would hold that $\text{NN}'(t) = f\left(\text{NN}(t),t \right)$ for all $t$. Thus we turn this condtion into our loss function. This motivates the loss function.
$${L(\omega)} = \sum_{i} \left(\frac{d \text{NN}(t_i)}{dt}-f\left(\text{NN}(t_i),t_i \right)\right)^2$$
The choice of $t_i$ could be done in many ways: it can be random, it can be a grid, etc. Anyways, when this loss function is minimized (gradients computed with standard reverse-mode automatic differentiation), then we have that $\frac{dNN(t_i)}{dt} \approx f(\text{NN}(t_i),t_i)$ and thus $\text{NN}(t)$ approximately solves the differential equation.
Note that we still have to handle the initial condition. One simple way to do this is to add an initial condition term to the cost function.
$${L(\omega)} = \sum_{i} \left(\frac{d \text{NN}(t_i)}{dt}-f(\text{NN}(t_i),t_i)\right)^2 + (\text{NN}(0)-u_0)^2 $$
where $\omega$ are the parameters that define the neural network $\text{NN}$ that approximates $u$. Thus this reduces down, once again, to simply finding weights which minimize a loss function !
$$\frac{du}{dt} = \cos2\pi t$$
$$u(0) = 1$$
$$u(t) = \frac{1}{2\pi}\sin2\pi t + 1$$
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import random
NN = tf.keras.models.Sequential([
tf.keras.layers.Input((1,)),
tf.keras.layers.Dense(units = 32, activation = 'tanh'),
tf.keras.layers.Dense(units = 32, activation = 'tanh'),
tf.keras.layers.Dense(units = 32, activation = 'tanh'),
tf.keras.layers.Dense(units = 1)
])
NN.summary()
optm = tf.keras.optimizers.Adam(learning_rate = 0.001)
$$L_{\text{ODE}} = \frac{1}{n}\sum_{i=1}^{n} \left(\frac{d \text{NN}(t_i)}{dt}- \cos2\pi t_i\right)^2$$
$$L_{IC} = \frac{1}{n}\sum_{i=1}^{n} \left({\text{NN}(0)}- 1\right)^2$$
$$L_{\text{Total}} = L_{\text{ODE}} + L_{\text{IC}}$$
def ode_system(t, net):
t = t.reshape(-1,1)
t = tf.constant(t, dtype = tf.float32)
t_0 = tf.zeros((1,1))
one = tf.ones((1,1))
with tf.GradientTape() as tape:
tape.watch(t)
u = net(t)
u_t = tape.gradient(u, t)
ode_loss = u_t - tf.math.cos(2*np.pi*t)
IC_loss = net(t_0) - one
square_loss = tf.square(ode_loss) + tf.square(IC_loss)
total_loss = tf.reduce_mean(square_loss)
return total_loss
train_t = (np.array([0., 0.025, 0.475, 0.5, 0.525, 0.9, 0.95, 1., 1.05, 1.1, 1.4, 1.45, 1.5, 1.55, 1.6, 1.95, 2.])).reshape(-1, 1)
train_loss_record = []
for itr in range(6000):
with tf.GradientTape() as tape:
train_loss = ode_system(train_t, NN)
train_loss_record.append(train_loss)
grad_w = tape.gradient(train_loss, NN.trainable_variables)
optm.apply_gradients(zip(grad_w, NN.trainable_variables))
if itr % 1000 == 0:
print(train_loss.numpy())
plt.figure(figsize = (10,8))
plt.plot(train_loss_record)
plt.show()
test_t = np.linspace(0, 2, 100)
train_u = np.sin(2*np.pi*train_t)/(2*np.pi) + 1
true_u = np.sin(2*np.pi*test_t)/(2*np.pi) + 1
pred_u = NN.predict(test_t).ravel()
plt.figure(figsize = (10,8))
plt.plot(train_t, train_u, 'ok', label = 'Train')
plt.plot(test_t, true_u, '-k',label = 'True')
plt.plot(test_t, pred_u, '--r', label = 'Prediction')
plt.legend(fontsize = 15)
plt.xlabel('t', fontsize = 15)
plt.ylabel('u', fontsize = 15)
plt.show()
!pip install deepxde
# from deepxde.backend.set_default_backend import set_default_backend
# set_default_backend("tensorflow")
$$\frac{du}{dt} = \cos2\pi t$$
$$u(0) = 1$$
$$u(t) = \frac{1}{2\pi}\sin2\pi t + 1$$
import tensorflow as tf
import deepxde as dde
import numpy as np
import matplotlib.pyplot as plt
import math as m
pi = tf.constant(m.pi)
def ode_system(t, u):
du_t = dde.grad.jacobian(u, t)
return du_t - tf.math.cos(2*pi*t)
def boundary(t, on_initial):
return on_initial and np.isclose(t[0], 0)
geom = dde.geometry.TimeDomain(0, 2)
ic = dde.IC(geom, lambda t: 1, boundary)
# Reference solution to compute the error
def true_solution(t):
return np.sin(2*np.pi*t)/(2*np.pi) + 1
data = dde.data.PDE(geom,
ode_system,
ic,
num_domain = 30,
num_boundary = 2,
solution = true_solution,
num_test = 100)
layer_size = [1] + [32] + [1]
activation = "tanh"
initializer = "Glorot uniform"
NN = dde.maps.FNN(layer_size, activation, initializer)
model = dde.Model(data, NN)
model.compile("adam", lr = 0.001)
losshistory, train_state = model.train(epochs = 6000)
dde.saveplot(losshistory, train_state, issave = False, isplot = True)
$$m_1 \ddot{x_1} + k_1 (x_1 - L_1) - k_2 (x_2 -x_1 - L_2) = 0$$
$$m_2 \ddot{x_2} + k_2 (x_2 -x_1 - L_2) = 0$$
$$x_1(0) = 0.5, \quad x_2(0) = 3.25$$
$$\dot{x_1}(0) = 0, \quad \dot{x_2}(0) = 0$$
import deepxde as dde
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
# Masses
m1 = 1.0
m2 = 1.0
# Spring constants
k1 = 5.0
k2 = 2.0
# Natural lengths
L1 = 0.5
L2 = 0.5
# Initial conditions
# d1_0 and d2_0 are the initial displacements; v1_0 and v2_0 are the initial velocities
d1_0 = 0.5
d2_0 = 3.25
v1_0 = 0.0
v2_0 = 0.0
b1 = 0
b2 = 0
# maximum time to simulate
t_max = 5
def dy(t, x):
return dde.grad.jacobian(x, t)
def pde(t, x):
# mass 1 location
x_1 = x[:, [0]]
# mass 2 location
x_2 = x[:, [1]]
dx1_tt = dde.grad.hessian(x, t, i = 0, j = 0, component = 0)
dx2_tt = dde.grad.hessian(x, t, i = 0, j = 0, component = 1)
pde1 = m1 * dx1_tt + k1 * (x_1 - L1) - k2 * (x_2 - x_1 - L1)
pde2 = m2 * dx2_tt + k2 * (x_2 - x_1 - L1)
return [pde1, pde2]
def boundary_init(t, on_boundary):
return on_boundary and np.isclose(t[0], 0)
geom = dde.geometry.Interval(0, t_max)
init_d1 = dde.icbc.PointSetBC(np.array([0]), np.array([d1_0]).reshape(-1, 1), component=0)
init_d2 = dde.icbc.PointSetBC(np.array([0]), np.array([d2_0]).reshape(-1, 1), component=1)
init_v1 = dde.OperatorBC(geom, lambda x, y, _: dy(x, y[:, [0]]), boundary_init)
init_v2 = dde.OperatorBC(geom, lambda x, y, _: dy(x, y[:, [1]]), boundary_init)
data = dde.data.PDE(geom,
pde,
[init_d1, init_d2, init_v1, init_v2],
num_domain = 2000,
num_boundary = 100,
num_test = 1000)
layer_size = [1] + [20] * 3 + [2]
activation = "tanh"
initializer = "Glorot uniform"
net = dde.maps.FNN(layer_size, activation, initializer)
model = dde.Model(data, net)
model.compile("adam", lr = 1e-3)
losshistory, train_state = model.train(epochs = 5000)
dde.saveplot(losshistory, train_state, issave = False, isplot = False)
dde.optimizers.config.set_LBFGS_options(maxiter=5000)
model.compile("L-BFGS")
losshistory, train_state = model.train()
dde.saveplot(losshistory, train_state, issave = False, isplot = True)
def vectorfield(w, t, p):
"""
Defines the differential equations for the coupled spring-mass system.
Arguments:
w : vector of the state variables:
w = [x1,y1,x2,y2]
t : time
p : vector of the parameters:
p = [m1,m2,k1,k2,L1,L2,b1,b2]
"""
x1, y1, x2, y2 = w
m1, m2, k1, k2, L1, L2, b1, b2 = p
# Create f = (x1',y1',x2',y2'):
f = [y1,
(-b1 * y1 - k1 * (x1 - L1) + k2 * (x2 - x1 - L2)) / m1,
y2,
(-b2 * y2 - k2 * (x2 - x1 - L2)) / m2]
return f
# Use ODEINT to solve the differential equations defined by the vector field
# Parameter values
# ODE solver parameters
abserr = 1.0e-8
relerr = 1.0e-6
stoptime = t_max
numpoints = 250
t = geom.random_points(5000)
t[:,0].sort()
# Create the time samples for the output of the ODE solver.
# I use a large number of points, only because I want to make
# a plot of the solution that looks nice.
# Pack up the parameters and initial conditions:
p = [m1, m2, k1, k2, L1, L2, b1, b2]
w0 = [d1_0, v1_0, d2_0, v2_0]
# Call the ODE solver.
wsol = odeint(vectorfield, w0, t[:, 0], args=(p,),
atol=abserr, rtol=relerr)
result = model.predict(t)
usol1 = np.array(result[:, 0])
usol2 = np.array(result[:, 1])
lw = 2
plt.figure(figsize = (12, 6))
plt.plot(t,
wsol[:, 0],
alpha = 1,
label = 'Mass1 GT',
c = 'r',
lw = lw
)
plt.plot(t,
wsol[:, 2],
alpha = 1,
label = 'Mass2 GT',
c = 'b',
lw = lw
)
plt.plot(t[:, 0],
usol1,
alpha = 1,
label = 'Mass1 Prediction',
linestyle='dashed',
c = 'k',
lw = lw
)
plt.plot(t[:, 0],
usol2,
alpha = 1,
label = 'Mass2 Prediction',
linestyle='dashed',
c = 'k',
lw = lw
)
plt.legend(fontsize = 15)
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)
plt.xlabel('Time (s)', fontsize = 30)
plt.ylabel('Displacement (m)', fontsize = 30)
plt.show()
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')