Discrete Signal Processing and Machine Learning
Table of Contents
%%html
<iframe src="https://www.youtube.com/embed/Y_0WVpn5leY"
width="560" height="315" frameborder="0" allowfullscreen></iframe>
Signal processing studies signals and systems
Signal:
systems:
Goals:
A signal $x[n]$ is a function that maps an independent variable to a dependent variable.
In this course, we will focus on discrete-time signals $x[n]$:
(We will only consider a real number signal in this class)
plot
for continuous signals in python
stem
for discrete signals in python
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#import sounddevice as sd
#from scipy.io import wavfile
#import scipy.ndimage.filters as ndi
#import skimage.util.noise as noise
#t = np.array(np.arange(0,2,0.01))
t = np.arange(0,2,0.01)
x = np.sin(2*np.pi*t)
plt.figure(figsize=(8,4))
plt.plot(t,x,t,np.zeros(len(t)),'k--')
plt.ylim([-1.1, 1.1])
plt.xlabel('t [sec]',fontsize=15)
plt.ylabel('x(t)',fontsize=15)
plt.show()
N = 20
n = np.arange(N)
x = np.sin(2*np.pi/N*n)
plt.figure(figsize=(8,6))
plt.subplot(211), plt.plot(n,x), plt.axis('tight'), plt.axhline(0,c = 'k')
plt.subplot(212), plt.stem(n,x), plt.axis('tight'), plt.axhline(0,c = 'k')
plt.show()
plt.figure(figsize=(8,3))
plt.plot(n,x,'k--')
marker,stem,base = plt.stem(n,x)
plt.setp(marker, markersize=8)
plt.axhline(0,c='k')
plt.ylim([-1.1,1.1])
plt.show()
## random number generation (1D)
m = 1000;
# uniform distribution U(0,1)
x1 = np.random.rand(m)
# uniform distribution U(a,b)
a = 1
b = 5
x2 = a + (b-a)*np.random.rand(m)
# standard normal (Gaussian) distribution N(0,1^2)
x3 = np.random.randn(m)
# normal distribution N(5,2^2)
x4 = 5 + 2.*np.random.randn(m)
# random integers
x5 = np.random.randint(1,7,m)
N = 20
n = np.array(range(N))
x = np.sin(2*np.pi/N*n);
xn = np.sin(2*np.pi/N*n) + 0.1*np.random.randn(N);
plt.figure(figsize=(8,6))
plt.subplot(211), plt.plot(n,x,'k--')
plt.stem(n,x)
plt.ylim([-1.1,1.1])
plt.axhline(0,c='k')
plt.subplot(212), plt.plot(n,x,'k--')
plt.stem(n,xn)
plt.ylim([-1.1,1.1])
plt.axhline(0,c='k')
plt.show()
plt.figure(figsize=(8,3))
plt.stem(n,xn)
plt.ylim([-1.1,1.1])
plt.axhline(0,c='k')
plt.show()
## Statistics
# numerically understand statistics
m = 100;
x = np.random.rand(m) # uniform dist.
plt.figure(figsize=(8,3))
plt.plot(x)
plt.show()
xbar = 1/m*sum(x) # sample mean or
print('xbar = '+str(xbar))
print('mean = '+str(np.mean(x)))
varbar = 1/(m-1)*sum((x-xbar)**2) # sample variance or
print('varbar = '+str(varbar))
print('var = '+str(np.var(x)))
del xbar
s = 0
N = 100
for n in range(N):
s = (1-1/(n+1))*s + 1/(n+1)*x[n]
print('s = '+str(s))
print('mean = '+str(np.mean(x)))
# just to plot
s = 0
S = np.array([])
for n in range(N):
s = (1-1/(n+1))*s + 1/(n+1)*x[n]
S = np.hstack((S,s))
plt.figure(figsize=(8,3))
plt.plot(x)
plt.plot(S,'r')
plt.show()
N = 200
n = np.array(range(N))
x = np.sin(2*np.pi/N*n*4);
xn = np.sin(2*np.pi/N*n*4) + 0.1*np.random.randn(N)
plt.figure(figsize=(8,3))
plt.plot(n,xn,'k')
plt.ylim([-1.1,1.1])
plt.show()
# incorrect
s = 0
S = np.array([])
for n in range(N):
s = (1-1/(n+1))*s + 1/(n+1)*xn[n]
S = np.hstack((S,s))
plt.figure(figsize=(8,3))
plt.plot(xn,'k')
plt.plot(S)
plt.ylim([-1.1,1.1])
plt.show()
N = 200
n = np.array(range(N))
x = np.sin(2*np.pi/N*n*4)
xn = np.sin(2*np.pi/N*n*4) + 0.1*np.random.randn(N)
plt.figure(figsize=(8,3))
plt.plot(n,xn,'k')
plt.ylim([-1.1,1.1])
plt.show()
# window size of 5
y = np.zeros(len(xn))
for k in range(4,N):
y[k] = 1/5*(xn[k] + xn[k-1] + xn[k-2] + xn[k-3] + xn[k-4])
plt.figure(figsize=(8,3))
plt.plot(n,xn,'k',n,y)
plt.ylim([-1.1,1.1])
plt.show()
# window size of 3
y = np.zeros(len(xn))
m = 3
for k in range(2,N):
s = 0
for i in range(m):
s = s + xn[k-i]
y[k] = 1/m*s
plt.figure(figsize=(8,3))
plt.plot(n,xn,'k',n,y)
plt.ylim([-1.1,1.1])
plt.show()
$$
\begin{align*}
y[n-1] &= \frac{x[n-1] + x[n-2] + x[n-3] + x[n-4] + x[n-5]}{5} \\
y[n] &= \frac{x[n] + x[n-1] + x[n-2] + x[n-3] + x[n-4]}{5}\\
&= \frac{x[n]}{5} + \frac{x[n-1] + x[n-2] + x[n-3] + x[n-4]}{5} + \frac{x[n-5]}{5} - \frac{x[n-5]}{5}\\
y[n] &= \frac{x[n]}{5} + y[n-1]- \frac{x[n-5]}{5}\\
\end{align*}
$$
y = np.zeros(len(xn))
m = 7
s = 0
for i in range(m):
s = s + xn[i]
y[m-1] = s/(m-1)
for k in range(m,N):
y[k] = y[k-1] + xn[k]/m - xn[k-m]/m
plt.figure(figsize=(8,3))
plt.plot(n,xn,'k',n,y)
plt.ylim([-1.1,1.1])
plt.show()
Discount factor, $\theta$
Recursive
N = 200
n = np.array(range(N))
x = np.sin(2*np.pi/N*n*4)
xn = np.sin(2*np.pi/N*n*4) + 0.1*np.random.randn(N)
theta = 0.6
y = xn[0]
Y = np.array([y])
for k in range(1,N):
y = (1 - theta)*xn[k] + theta*y
Y = np.hstack((Y,y))
plt.figure(figsize=(8,3))
plt.plot(n,xn,'k',n,Y)
plt.ylim([-1.1,1.1])
plt.show()
$$
y[n] = \text{median}\{x[n], x[n-1], \cdots, x[n-k+1]\}$$
Median filters can do an excellent job of rejecting certain types of noise, in particular, "shot" or impulse noise (outlier in a time series) in which some individual pixels or signals have extreme values.
Remove shot noise (salt & pepper noise)
#plot -s 560,420
N = 200
n = np.array(range(200))
x = np.sin(2*np.pi/N*n*4)
shot_noise = (np.random.random(x.shape) < 0.05).astype(int)
xn = x + shot_noise - shot_noise.mean()
y = np.zeros(xn.shape)
m = 3
for k in range(m, N+1):
A = np.array([])
for i in range(0, m):
A = np.hstack((A, xn[k-i-1]))
y[k-1] = np.median(A)
plt.figure(figsize=(8,6))
plt.subplot(2,1,1), plt.stem(n, xn, markerfmt=" "), plt.ylim([-1.1, 1.1])
plt.subplot(2,1,2), plt.stem(n, y , markerfmt=" "), plt.ylim([-1.1, 1.1])
plt.show()
Fs, x = wavfile.read('./data_files/92002_jcveliz_violin-origional.wav')
x = x/max(x)
sd.play(x, Fs) # play a wave file with sampling rate Fs
# generate an audio signal with a salt and pepper noise
#shot_noise = imnoise(zeros(length(x),1),'salt & pepper',0.05);
shot_noise = (np.random.random(x.shape) < 0.05).astype(int)
#x_noise = x + shot_noise - mean(shot_noise);
x_noise = x + shot_noise - shot_noise.mean()
sd.play(x_noise,Fs)
# apply a linear low-pass filter
h = np.array([1,1,1])/3
x_avg = np.convolve(h,x_noise)
sd.play(x_avg,Fs)
# apply a nonlinear filter
x_median = ndi.median_filter(x_noise,7);
sd.play(x_median,Fs)
# piecewise smooth signal with a bit of noise added
N = 50
n = range(0, N)
s = np.hamming(N) * np.hstack((np.ones(N//2), -np.ones(N//2)))
x = s + 0.1*np.random.randn(N)
plt.figure(figsize=(8,6))
plt.subplot(2,1,1)
mline, sline, bline = plt.stem(n, x, markerfmt=" ")
plt.setp(sline, 'color', 'b', 'linewidth', 4)
plt.title('noisy piecewise smooth signal', fontsize=16)
y = np.zeros(x.shape)
m = 4
for k in range(m, N):
A = np.array([])
for i in range(0, m):
A = np.hstack((A, x[i-k]))
y[k-1] = 1/4 * np.dot(np.array([[1,1,-1,-1]]), A)
plt.subplot(2,1,2)
_mline, _sline, _bline = plt.stem(n, y, markerfmt=" ")
plt.axis([0,50,-1.5,1.5])
plt.setp(_sline, 'color', 'b', 'linewidth', 4)
plt.axis([1, N, -1.5, 1.5])
plt.title('output', fontsize=16)
plt.show()
We will use an acceleration signal only along z-axis.
Arduino code
#include <Wire.h>
const int MPU = 0x68; //Default address of I2C for MPU 6050
int16_t AcX, AcY, AcZ;
void setup() {
Wire.begin(); // Wire library initialization
Wire.beginTransmission(MPU); // Begin transmission to MPU
Wire.write(0x6B); // PWR_MGMT_1 register
Wire.write(0); // MPU-6050 to start mode
Wire.endTransmission(true);
Serial.begin(9600);
}
void loop() {
Wire.beginTransmission(MPU); // Start transfer
Wire.write(0x3B); // register 0x3B (ACCEL_XOUT_H), records data in queue
Wire.endTransmission(false); // Maintain connection
Wire.requestFrom(MPU, 14, true); // Request data to MPU
//Reads byte by byte
AcX = Wire.read() << 8 | Wire.read(); // 0x3B (ACCEL_XOUT_H) & 0x3C (ACCEL_XOUT_L)
AcY = Wire.read() << 8 | Wire.read(); // 0x3D (ACCEL_YOUT_H) & 0x3E (ACCEL_YOUT_L)
AcZ = Wire.read() << 8 | Wire.read(); // 0x3F (ACCEL_ZOUT_H) & 0x40 (ACCEL_ZOUT_L)
//Prints values on Serial
Serial.print(AcX);
Serial.print(",");
Serial.print(AcY);
Serial.print(",");
Serial.println(AcZ);
delay(20);
}
# plot only Z-axis
import serial
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import animation
ser = serial.Serial('COM4', 9600)
leng = 201
fig = plt.figure(figsize=(12, 6))
ax = plt.axes(xlim=(0,leng-1), ylim=(-2, 2))
plt.title('Real-time sensor data')
plt.xlabel('Data points')
plt.ylabel('Acceleration [G]')
graphX, = ax.plot([], [], 'b', label = 'X')
graphY, = ax.plot([], [], 'r', label = 'Y')
graphZ, = ax.plot([], [], 'g', label = 'Z')
ax.legend(loc='upper right')
ax.grid(True)
t = list(range(0, leng))
accX = []
accY = []
accZ = []
for i in range(0, leng):
accX.append(0)
accY.append(0)
accZ.append(0)
def init():
graphX.set_data([], [])
graphY.set_data([], [])
graphZ.set_data([], [])
return graphX, graphY, graphZ
def animate(i):
global t, accX, accY, accZ
while (ser.inWaiting() == 0):
pass
try:
arduinoString = ser.readline().decode("utf-8")
dataArray = arduinoString.split(',')
accX.append(float(dataArray[0])/(32767/2))
accY.append(float(dataArray[1])/(32767/2))
accZ.append(float(dataArray[2])/(32767/2))
accX.pop(0)
accY.pop(0)
accZ.pop(0)
except:
pass
graphZ.set_data(t, accZ)
return graphX, graphY, graphZ
delay = 20
anim = animation.FuncAnimation(fig, animate, init_func=init,
interval=delay, blit=True)
plt.show()
ser.close()
Moving average on acceleration signals
# plot only Z-axis
# moving average to reduce noise
# Noise and smooth
import serial
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import animation
ser = serial.Serial('COM4', 9600)
leng = 201
fig = plt.figure(figsize=(12, 6))
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
graph1, = ax1.plot([], [], 'b', label = 'Noisy')
graph2, = ax2.plot([], [], 'r', label = 'Smooth')
axes = [ax1, ax2]
for ax in axes:
ax.set_xlim(0, leng-1)
ax.set_ylim(-2, 2)
ax.set_ylabel('Acceleration [G]')
ax.legend(loc='upper right')
ax.grid(True)
ax1.set_title('Real-time noisy data')
ax2.set_title('Real-time smooth data')
time = list(range(0, leng))
acc1 = []
smooth = []
for i in range(0, leng):
acc1.append(0)
smooth.append(0)
def init():
graph1.set_data([], [])
graph2.set_data([], [])
return graph1, graph2
def animate(i):
global time, acc1, smooth
while (ser.inWaiting() == 0):
pass
try:
arduinoString = ser.readline().decode("utf-8")
dataArray = (arduinoString.split(','))
acc1.append(float(dataArray[2])/(32767/2))
acc1.pop(0)
m = 5
smooth.append(np.mean(acc1[leng-m:]))
smooth.pop(0)
except:
pass
graph1.set_data(time, acc1)
graph2.set_data(time, smooth)
return graph1, graph2
delay = 20
anim = animation.FuncAnimation(fig, animate, init_func=init,
interval=delay, blit=True)
plt.show()
ser.close()
Edge detection to detect sudden flip of IMU sensor
# plot only Z-axis
# edge detector
import serial
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import animation
ser = serial.Serial('COM4', 9600)
leng = 201
fig = plt.figure(figsize=(12, 6))
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
graph1, = ax1.plot([], [], 'b', label = 'Noisy')
graph2, = ax2.plot([], [], 'r', label = 'Sharp Changes')
axes = [ax1, ax2]
for ax in axes:
ax.set_xlim(0, leng-1)
ax.set_ylim(-2, 2)
ax.set_ylabel('Acceleration [G]')
ax.legend(loc='upper right')
ax.grid(True)
ax1.set_title('Real-time noisy data')
ax2.set_title('Real-time smooth data')
t = list(range(0, leng))
acc1 = []
edge = []
for i in range(0, leng):
acc1.append(0)
edge.append(0)
def init():
graph1.set_data([], [])
graph2.set_data([], [])
return graph1, graph2
def animate(i):
global t, acc1, edge
while (ser.inWaiting() == 0):
pass
try:
arduinoString = ser.readline().decode("utf-8")
dataArray = (arduinoString.split(','))
acc1.append(float(dataArray[2])/(32767/2))
acc1.pop(0)
edge.append((acc1[leng-3] + acc1[leng-2] - acc1[leng-1] - acc1[leng])/4)
edge.pop(0)
except:
pass
graph1.set_data(t, acc1)
graph2.set_data(t, edge)
return graph1, graph2
delay = 0
anim = animation.FuncAnimation(fig, animate, init_func=init,
interval=delay, blit=True)
plt.show()
ser.close()
In 1959, Arthur Samuel defined machine learning as a "Field of study that gives computers the ability to learn without being explicitly programmed"
$\qquad \;$(source: lecture video from The Machine Learning Summer School by Zoubin Ghahramani, Univ. of Cambridge)
Matlab: Signal Processing and Machine Learning Techniques for Sensor Data Analytics
A statistical process for estimating the relationships among variables
The goal is to make quantitative (real valued) predictions on the basis of a (vector of) features or attributes
Begin by considering linear regression (easy to extend to more comlex predictions later on)
Re-cast problem as a least squares
Note
Then we can use a linear algebra
# data points in column vector [input, output]
x = np.array([[0.1, 0.4, 0.7, 1.2, 1.3, 1.7, 2.2, 2.8, 3.0, 4.0, 4.3, 4.4, 4.9]]).T
y = np.array([[0.5, 0.9, 1.1, 1.5, 1.5, 2.0, 2.2, 2.8, 2.7, 3.0, 3.5, 3.7, 3.9]]).T
m = len(y)
A = np.hstack((x, np.ones((m, 1))))
theta = np.linalg.inv(A.T.dot(A)).dot(A.T).dot(y)
## to plot
plt.plot(x, y, 'ko')
# to plot a straight line (fitted line)
xp = np.arange(0, 5, 0.1)
yp = theta[0]*xp + theta[1];
plt.plot(xp, yp, 'r', linewidth=2)
plt.axis('equal')
plt.legend(['data', '$L_{2}$']);
plt.show()
where $y$ is a discrete value
start with binary class problems
We could use linear regression
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')