BLM_4: Spectrogram

BLM_4: Spectrogram#

Spectrograms are the work horse of bioacousticians to visualize complex acoustic data.

In a nutshell, spectrograms are a sequence of power spectra carried out over consecutive small time slices of the acoutsic recording. To compensate for windowing edge effects and to ‘smooth’ temporal appearence, temporal overlap is implemented.

import numpy as np
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 14}) # increase fontsize in figures

Simulation#

fs = 1024   #Hz
nfft=1024
nwin=nfft

#    Generate a 1 second test signal, a 2 Vrms sine wave at fs/8, corrupted by
#    0.001 V**2/Hz of white noise sampled at  1.024 kHz.

N = fs
time = np.arange(N) / fs

# signal description
arms=2
amp = arms*np.sqrt(2)
freq = 30

var = 0.001
noise_power = var * fs / 2 
# The factor of fs/2 converts the per sample noise power (variance) to a noise bandwith of -1/T to 1/T (negative and positive frequencies) 

xx = amp*np.sin(2*np.pi*freq*time)
rms= np.sqrt(np.mean(xx**2))

# add noise to signal
xx += np.random.normal(scale=np.sqrt(noise_power), size=time.shape)

print("Signal RMS : ",amp/np.sqrt(2), rms)

#
yy=np.fft.rfft(xx,nfft)
pp=np.abs(yy)**2

ff=np.arange(int(nfft/2)+1)*fs/nfft

ppmn=np.mean(pp[ff>50])
print("Noise ",ppmn)

fig = plt.figure("figure.figsize",[15,10])

plt.subplot(211)
plt.plot(time,xx)
plt.grid(True)
plt.plot(time,amp/np.sqrt(2)+0*time,'r')

plt.subplot(212)
plt.semilogy(ff, pp)
plt.plot(ff,ppmn+0*ff,'r')
plt.xlabel('Frequency [Hz]')
plt.ylabel('Spectrum [V**2]')
plt.grid(True)
Signal RMS :  2.0 2.0
Noise  503.6593868974355
../_images/0ccbda5dd2ce3bec48f4d1c713b147473ac578ab63f354bdfcc7ea13177c4cde.png
fs = 1024   #Hz
nfft=1024
nwin=nfft

#    Generate a 1 second test signal, a 2 Vrms sine wave at fs/8, corrupted by
#    0.001 V**2/Hz of white noise sampled at  1.024 kHz.

N = fs
time = np.arange(N) / fs

# signal description
arms=2
amp = arms*np.sqrt(2)

freq = 30
bw=200

xx = amp*np.sin(2*np.pi*(freq+bw/2*time)*time)
rms= np.sqrt(np.mean(xx**2))

# add noise to signal
var = 0.001
noise_power = var * fs / 2 
# The factor of fs/2 converts the per sample noise power (variance) to a noise bandwith of -1/T to 1/T (negative and positive frequencies) 


xx += np.random.normal(scale=np.sqrt(noise_power), size=time.shape)

print("Signal RMS : ",amp/np.sqrt(2), rms)

#
yy=np.fft.rfft(xx,nfft)
pp=np.abs(yy)**2

ff=np.arange(int(nfft/2)+1)*fs/nfft

ppmn=np.mean(pp[ff>250])
print("Noise ",ppmn)

fig = plt.figure("figure.figsize",[15,10])

plt.subplot(211)
plt.plot(time,xx)
plt.grid(True)
plt.plot(time,amp/np.sqrt(2)+0*time,'r')

plt.subplot(212)
plt.semilogy(ff, pp)
plt.plot(ff,ppmn+0*ff,'r')
plt.xlabel('Frequency [Hz]')
plt.ylabel('Spectrum [V**2]')
plt.grid(True)
plt.show()
Signal RMS :  2.0 1.9999533593066727
Noise  498.02056006554574
../_images/b148feb98ed67fa04fb777634ee5168649fe8cfadfce55cff202546664e41f9f.png
fig = plt.figure("figure.figsize",[15,10])

plt.specgram(xx,64,1024,noverlap=32)
plt.grid(True)
plt.show()
../_images/749c41cef1f5dfcdbb0a5667ba63f9ffa8f75d6779bf9b8a6e17851917d3d482.png

LFM sweep#

(230)#\[\begin{equation} x(t)= \sin{(2\pi f_0 t)} \end{equation}\]

becomes

(231)#\[\begin{equation} x(t)= \sin{(2\pi(f_0+\frac{B}{2}t)t)}=\sin{(2\pi(f_0 t+\frac{B}{2}t^2))}=\sin{(\phi(t))} \end{equation}\]

Frequency estimation from data

(232)#\[\begin{equation} f(t) = \frac{1}{2\pi}\frac{d}{dt} \phi(t) = f_0+Bt \end{equation}\]

Real Example#

# custom spectrum estimation
def mSpectrum(xx,win,fs,nfft,mode=None):
    # xx: real valued time series if at least nfft samples
    # nfft: length of fft in samples
    # fs: sampling frequency
    # mode: RMS for scaling of spectral peakes to signal RMS
    #       VAR for scaling of spectral mean values to per sample variance
    #       PSD for scaling of power spectral density to (v^2/Hz)
    #       None for power spectrum (FFT output) (Default)
    # output: P power estimates according to mode
    #         ff frequency vector
    nwin=len(win)
    if nfft<nwin: nfft=nwin # force nfft to be at least window length
    nfft=int(2**np.ceil(np.log2(nfft))) # increase nfft to next power of 2
    #
    # copy data to working buffer zero-pad if required
    uu=np.zeros(nfft)
    uu[:nwin]=xx[:nwin]*win
    
    yy=np.fft.rfft(uu,nfft)
    ff=np.arange(int(nfft/2)+1)*fs/nfft

    pp=np.abs(yy)**2
    if mode=="RMS":
        return pp/win.sum()**2, ff
    elif mode=="VAR":
        pp[1:] *=2
        return pp/(nwin*(win*win).sum()), ff
    elif mode=="PSD":
        pp[1:] *=2
        return pp/(fs*(win*win).sum()), ff
    else:
        return pp, ff

Load some data#

fname="../Data/Stenella.wav"

import wave

ich=0

no=0
ns=7

wf = wave.open(fname)
fs=wf.getframerate()
nch=wf.getnchannels()
#
nd=wf.getnframes()
if no*fs < nd:  
    wf.setpos(no*fs)
else:
    print("offset ",no," > ",nd/fs)

nd -= no*fs
#
nx = ns*fs
if nx> nd: nx=nd
xb=wf.readframes(nx)
wf.close()

xx=np.frombuffer(xb, dtype=np.int16).reshape(nx,nch)
if nch>1:
    xx = xx[:,ich]

print("xx= ",np.shape(xx))
tt=np.arange(nx)/fs

fig = plt.figure("figsize",[10,10])
ax1=plt.subplot(211)
plt.plot(tt,xx);
plt.xlim(0,tt[-1])
plt.title('start time: '+str(no)+' s')

# spectrogram
nw=512
nst=int(nw/2)
nfft=int(2*nw)
nfr=int(1+nfft/2)

ww=np.hanning(nw)
n2=int((nx-nw)/nw*2)
#
S = np.zeros((nfr,n2))
for ii in range(n2):
    io=ii*nst
    uu = xx[io:(io+nw),ich]*ww
    S[:,ii],ff = mSpectrum(uu,ww,fs,nfft)
print("S= ",np.shape(S))

# for too many time bins compress (peak picking) for better visualization (emphasize clicks)
if n2>1000:
    M = np.zeros((nfr,1000))
    for ii in range(n2):
        jj = int(ii*1000/n2)
        M[:,jj] = np.maximum(M[:,jj],S[:,ii])
else:
    M = S

mx=10*np.log10(np.max(M))
#cmap='winter'
#cmap='YlGnBu'
cmap='BuGn'
ax2=plt.subplot(212,sharex=ax1)
im=plt.imshow(10*np.log10(M), cmap=cmap,origin='lower', aspect='auto',extent=(0,ns,0,ff[-1]/1000),clim=(mx-60,mx))
plt.xlabel('Time [s]')
plt.ylabel('Frequency [kHz]')

plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
cax = plt.axes([0.82, 0.1, 0.02, 0.36])
plt.colorbar(cax=cax)
plt.show()
xx=  (308700, 1)
S=  (513, 1203)
../_images/540b3c25098472b04b1a236b6e9c0e52d3a8735b78baa2a3606cd16d42554d6f.png