Source code for correlation

#!/usr/bin/env python
# encoding: utf-8
"""
Provides classes and functions for cross-correlation.
"""

import os
import glob
import time
import cPickle
import logging
import matplotlib.pyplot as plt
import numpy as np
from OP_waveforms import Waveform
from locations_trigger import read_locs_from_file


[docs]class BinaryFile(object):
    """
    Class that reads, writes and stores the binary files containing the
    correlation matrix.

    """
    def __init__(self, filename):
        """
        :param filename: Filename of binary file to be read/written
        :type filename: string

        """
        self.filename = filename

[docs]    def read_binary_file(self):
        """
        Reads the binary file from disk and returns un-pickled data
        :rtype: numpy array ?
        :returns: data read from tile
        """
        with open(self.filename, 'rb') as test:
            my_depickler = cPickle.Unpickler(test)
            result = my_depickler.load()
            test.close()
        return result

[docs]    def write_binary_file(self, input):
        """
        Writes a binary file from disk

        :param input: input data ready to be pickled
        """
        with open(self.filename, 'wb') as test:
            my_pickler = cPickle.Pickler(test)
            my_pickler.dump(input)
            test.close()


[docs]def fourier(x, y, dt):
    """
    Computes cross-correlation and auto-correlation vectors of two signals in
    the frequency domain.

    :param x: first signal
    :param y: second signal
    :param dt: time-step of both signals

    :type x: numpy array
    :type y: numpy array
    :type dt: float

    :rtype: numpy arrays
    :returns: Cxy, Cxx, Cyy, f

    """

    # remove the mean of both signals
    x = x-np.mean(x)
    y = y-np.mean(y)

    # compute fft
    X = np.fft.fft(x)
    Y = np.fft.fft(y)

    # Compute the cross-correlation and auto-correlation vectors
    Cxx, Cyy, Cxy = [], [], []
    Cxy = np.conjugate(X)*Y
    Cxx = np.conjugate(X)*X
    Cyy = np.conjugate(Y)*Y
    f = np.fft.fftfreq(len(x), d=dt)

    return Cxy, Cxx, Cyy, f


[docs]def corr_time(Cxy, Cxx, Cyy, dt, v):
    """
    Returns maximum of cross-correlation and time-lag given frequency domain
    cross- and auto- correlation vectors.

    :param Cxy: frequency domain cross-correlation
    :param Cxx: frequency domain auto-correlation
    :param Cyy: frequency domain auto-correlation
    :param dt: time step
    :param v: If ``True`` plots cross-correlation in the time-domain

    :type Cxy: numpy array
    :type Cxx: numpy array
    :type Cyy: numpy array
    :type dt: float
    :type v: boolean

    :rtype: float
    :returns: value_t, tau_t, respectively the cross-correlation maximum value
        and time-lag
    """

    # get time-domain arrays
    cxy = np.fft.ifft(Cxy)
    cxx = np.fft.ifft(Cxx)
    cyy = np.fft.ifft(Cyy)

    # time vector
    tc = np.arange(-np.floor(len(cxy)/2)*dt, np.floor(len(cxy)/2)*dt+dt, dt)

    # normalize the cross-correlation vector (cf Schwartz inequality)
    cxy = cxy/np.sqrt(cxx[0]*cyy[0])

    # rewrite the cross-correlation vector cxy in proper order
    cxy[len(cxy)/2+1:len(cxy)] = cxy[-1:-len(cxy)/2:-1]    # "positive" part
    cxy[0:len(cxy)/2+1] = cxy[-len(cxy)/2:-len(cxy)-1:-1]  # "negative" part
    if v:
        fig = plt.figure()
        fig.set_facecolor('white')
        plt.plot(cxy, 'k')
        plt.show()

    # get maximum of cross-correlation and time lag
    value_t = np.abs(np.max(cxy))
    tau_t = tc[np.argmax(cxy)]

    return value_t, tau_t


[docs]def corr_freq(f, Cxy, v):
    """
    Computes the time lag entirely in the spectral domain, using the slope of
    the phase spectrum where the amplitude is strong.

    :param f: frequency vector
    :param Cxy: frequency domain cross-correlation vector
    :param v: If ``True`` displays Cxy

    :type f: numpy array
    :type Cxy: numpy array
    :type v: boolean

    :rtype: float
    :returns: time-lag

    """

    # Determine the limits of the amplitude spectrum
    mini, maxi = cum(np.abs(Cxy[0:len(f)/2]), v)
    fmin, fmax = f[mini], f[maxi]
    f_min_max = f[mini:maxi]
    # Plot
    if v:
        display(Cxy, f, f_min_max)
        plt.show()

    # Compute the slope of the phase spectrum where the amplitude is strong...
    # This is only performed after time-realignment - do not unwrap the phase
    phase_min_max = np.angle(Cxy[mini:maxi], deg=False)
    a = sum(f_min_max*phase_min_max)/sum(f_min_max**2)

    # ... and deduce the time delay
    tau_f = a/(2*np.pi)

    return tau_f


[docs]def cum(x,v):
    """
    Determines the minimum and maximum indices of the most significative part of a signal.
    (Here, determines the part of the amplitude spectrum which is useful)

    :param x: signal
    :param v: If ``True`` then plot x and cumulative sum of x

    :type x: numpy array
    :type v: boolean

    :rtype: int
    :returns: mini, maxi

    """

    s = np.cumsum(x)
    p = np.polyfit(range(len(x)), s, deg=1)
    line = p[0]*np.arange(len(x))+p[1]
    l = s-line

    ind_min = np.where(l <= 0)[0][0]
    ind_max = np.where(l >= 0)[0][-1]+1
    mini = np.argmin(l[ind_min:ind_max])+ind_min
    maxi = np.argmax(l[ind_min:ind_max])+ind_min

    if v:
        fig = plt.figure()
        fig.set_facecolor('white')
        plt.plot(x, 'k')
        plt.plot(s, 'y')
        plt.plot(line, 'b')
        plt.plot(l, 'r')
        plt.plot(mini, l[mini], 'mo')
        plt.plot(maxi, l[maxi], 'mo')
        plt.show()

    return mini, maxi


[docs]def waveform(filename):
    """
    Convenience function to read a seismogram and return its data, time-step,
    station name and start-time

    :param filename: File to read
    :type filename: string

    :returns: val, dt, name, tdeb

        * val = seismogram values
        * dt = time-step
        * name = station name
        * tdeb = start-time (UTCDateTime)

    """

    wf = Waveform()
    wf.read_from_file(filename)
    name = wf.station
    val = wf.values
    dt = wf.delta
    tdeb = wf.starttime

    return val, dt, name, tdeb


[docs]def plot_waveform(x, y, dt, tau, ev1, ev2):
    """
    Plots the waveforms.
    On the first plot, both waveforms are superimposed.
    On the second plot, they are plotted separately.

    :param x: waveform
    :param y: waveform
    :param dt: time-step
    :param tau: time-lag
    :param ev1: name of event shown in waveform x
    :param ev2: name of event shown in waveform y

    :type x: numpy array
    :type y: numpy array
    :type dt: float
    :type tau: float
    :type ev1: string
    :type ev2: string

    """

    t = np.arange(len(x))*dt
    fig = plt.figure()
    fig.set_facecolor('white')

    for i in range(len(tau)):
        itau = tau[i]
        ax = fig.add_subplot(len(tau), 1, i+1)
        ax.plot(t, x/np.max(x))
        ax.plot(t+itau, y/np.max(y), 'r')

    plt.suptitle('Event pair: (%s, %s)' % (ev1, ev2))

    fig = plt.figure()
    fig.set_facecolor('white')
    ax1 = fig.add_subplot(211, title="event %s" % ev1)
    ax1.plot(t, x)
    ax2 = fig.add_subplot(212, title="event: %s" % ev2)
    ax2.plot(t, y)


[docs]def display(Cxy, f, f_min_max):
    """
    Displays amplitude and phase of the frequency domain cross-correlation.

    :param Cxy: frequency domain cross-correlation
    :param f: frequency vector
    :param f_min_max: 

    :type Cxy: numpy array
    :type f: numpy array
    :type f_min_max: numpy array

    """
    fig = plt.figure()
    fig.set_facecolor('white')
    ax1 = fig.add_subplot(211, title="Amplitude spectrum")
    ax1.plot(f[0:len(f)/2], np.abs(Cxy[0:len(f)/2]))
    ax2 = fig.add_subplot(212, title="Phase spectrum")
    ax2.plot(f[0:len(f)/2], np.angle(Cxy[0:len(f)/2], deg=False), '+')
    ax2.plot(f_min_max, np.zeros(len(f_min_max)), 'r')


[docs]def correlate(x, y, dt, v, a):
    """
    Performs correlation on two signals.

    :param x: waveform
    :param y: waveform
    :param dt: time-step
    :param v: If ``True`` makes plots
    :param a: ['t' | 'f'] for time-domain or frequency-domain extraction of
        time-lag

    :type x: numpy array
    :type y: numpy array
    :type dt: float
    :type v: boolean
    :type a: char

    :rtype: float
    :returns: The returned values depend on the *a* input parameter:

        * a='t' : returns time-lag and correlation value
        * a='f' : returns time-lag

    """

    Cxy, Cxx, Cyy, f = fourier(x, y, dt)
    if a == 't':
        corr, tau = corr_time(Cxy, Cxx, Cyy, dt, v)
        return tau, corr
    if a == 'f':
        tau = corr_freq(f, Cxy, v)
        return tau


[docs]def do_correlation_setup_and_run(opdict):
    """
    Runs correlation using options contained in a WavelocOptions.opdict
    dictionary.

    Explores and cross-correlates all possible event pairs of the Waveloc location file at all stations.
    Writes the correlation coefficients and time delays in 2-D numpy arrays for each station and saves 
    the final dictionaries into 2 binary files.

    The correlation first takes place in the time domain. If the correlation value is over a given 
    threshold, the correlation is also performed in the frequency domain so that a subsample precision 
    can be obtained on the time delay.
    """

    base_path = opdict['base_path']
    verbose = opdict['verbose']

    # data
    data_dir = os.path.join(base_path, 'data', opdict['datadir'])
    data_glob = opdict['dataglob']
    data_files = glob.glob(os.path.join(data_dir, data_glob))
    data_files.sort()

    # location file
    locdir = os.path.join(base_path, 'out', opdict['outdir'], 'loc')
    locfile = os.path.join(locdir, 'locations.dat')

    # file containing correlation values
    coeff_file = os.path.join(locdir, opdict['xcorr_corr'])
    # file containing time delays
    delay_file = os.path.join(locdir, opdict['xcorr_delay'])

    # threshold and time window
    threshold = float(opdict['xcorr_threshold'])
    t_before = float(opdict['xcorr_before'])
    t_after = float(opdict['xcorr_after'])

    # Read location
    locs = read_locs_from_file(locfile)

    # Create 2 empty dictionnaries
    coeff = {}
    delay = {}

    # ------------------------------------------------------------------------
    # MAIN PROGRAM : Compute the correlation value and the time delay for each
    # possible event pair

    tref = time.time()
    for file in data_files:
        event = 0
        val_all, dt, name, tdeb = waveform(file)
        logging.info(name)

        coeff[name] = []
        delay[name] = []

        for loc_a in locs:
            event = event+1
            stack_time = loc_a['o_time']-tdeb

            start_time = int(round((stack_time-t_before)*1./dt))
            end_time = int(round((stack_time+t_after)*1./dt))
            val1 = val_all[start_time-1:end_time]

            compteur = event-1

            liste, list_tau = [], []
            # Add zeros to liste as we only compute a semi matrix
            liste.extend(np.zeros(event-1))
            list_tau.extend(np.zeros(event-1))

            # for every event occurring after the event "event" ; replace by
            # event-1 if the autocorrelation is considered
            for loc_b in locs[event-1:]:
                compteur = compteur+1
                stack_time_2 = loc_b['o_time']-tdeb
                start_time_2 = int((stack_time_2-t_before)*1./dt)
                end_time_2 = int((stack_time_2+t_after)*1./dt)

                val2 = val_all[start_time_2-1:end_time_2]

                if not val1.any() or not val2.any():
                    liste.append('NaN')
                    list_tau.append('NaN')
                else:
                    tau, value = correlate(val1, val2, dt, verbose, 't')
                    if value > threshold and event != compteur:
                        ntau = int(round(tau*1./dt))
                        val3 = val_all[start_time_2-ntau-1:end_time_2-ntau]

                        tau_t = tau
                        tau = correlate(val1, val3, dt, verbose, 'f')
                        tau = tau_t+tau

                        if verbose:
                            tau_f = tau
                            plot_waveform(val1, val2, dt, [tau_t, tau_f],
                                          event, compteur)
                            print "time: %.4f, %.4f" % (value, tau_t)
                            print "frequency : %.4f, %.4f" % (value, tau_f)
                            print "final delay : %.4f" % tau
                            plt.show()

                    liste.append(round(value*10**2)/10**2)
                    if tau.size:
                        list_tau.append(round(tau*10**4)/10**4)
                    else:
                        list_tau.append('NaN')

            coeff[name].append(liste)
            delay[name].append(list_tau)

    # finished run, print timing info
    print "Elapsed time: ", time.time()-tref

    # Save the results in 2 binary files
    logging.info("Saving coeff and delay files")
    a = BinaryFile(coeff_file)
    a.write_binary_file(coeff)

    b = BinaryFile(delay_file)
    b.write_binary_file(delay)
Navigation

Source code for correlation

Quick search

Navigation