% CCRMA MIR Workshop 2009 % Lab1.6: Frequency-domain Onset Detection % Kyogu Lee (June 2009) clear all; close all; % read audio file %[x,fs] = wavread('../wav/T39-piano-8k.wav'); % vowel 'ooh' [x,fs] = wavread('../wav/T08-violin-8k.wav'); % if size(x,2) > size(x,1) x = x'; % make a column vector end x = mean(x,2); % make mono x = x(1:2*fs); % first compute spectrogram M = 256; win = hamming(M); [spec,fi,ti] = specgram(x,M,fs,hamming(M),M-1); % sliding FFT (hopsize=1) nframe = length(ti); % energy e = sum(abs(spec).^2,1); % energy difference diffe = diff(e); % first-order difference for i=1:length(diffe)-M+1 de(i) = sum(win'.*diffe(i:i+M-1))/M; end % harmonic frequency content (HFC) for i=1:nframe X = spec(:,i); %hfc(i) = sum([0:M/2]'.*(abs(X).^2))/(M/2+1); hfc(i) = sum(fi.*(abs(X).^2))/(M/2+1); end % HFC difference diffhfc = diff(hfc); for i=1:length(diffhfc)-M+1 dhfc(i) = sum(win'.*diffhfc(i:i+M-1))/M; end % spectral difference (SD) or spectral flux % function [sf,T] = spectralflux(x,fs,Nfft,overlap) [sf,T] = spectralflux(x,fs,M,M/2); % display subplot(321); plot([0:length(x)-1]/fs,x); title('signal'); ylabel('x(t)'); subplot(322); plot(([0:length(e)-1]+M/2)/fs,e); title('energy'); ylabel('E(t)'); subplot(323); plot(([0:length(de)-1]+M/2)/fs,de); title('energy difference'); ylabel('dE(t)'); subplot(324); plot(([0:length(hfc)-1]+M/2)/fs,hfc); title('harmonic frequency content'); ylabel('HFC(t)'); subplot(325); plot(([0:length(dhfc)-1]+M/2)/fs,dhfc); title('HFC difference'); ylabel('dHFC(t)'); subplot(326); plot(T,sf); title('spectral flux'); ylabel('SFlux(t)'); xlabel('time (seconds)');