#!/usr/bin/env python
# encoding: utf-8
import os
import h5py
import logging
import glob
import numpy as np
from obspy.core import utcdatetime
from itertools import count, islice
from time import time
from OP_waveforms import read_data_compatible_with_time_dict
from NllGridLib import read_hdr_file
from hdf5_grids import get_interpolated_time_grids
from filters import smooth
[docs]def do_migration_setup_and_run(opdict):
"""
Do setup and launch migration.
:param opdict: WavelocOptions.opdict
"""
base_path = opdict['base_path']
runtime = opdict['time']
reloc = opdict['reloc']
# data
data_dir = os.path.join(base_path, 'data', opdict['datadir'])
if opdict['kderiv']:
data_glob = opdict['gradglob']
if opdict['gauss']:
data_glob = opdict['gaussglob']
else:
data_glob = opdict['kurtglob']
data_files = glob.glob(os.path.join(data_dir, data_glob))
data_files.sort()
if len(data_files) == 0:
logging.error('No data files found for %s and %s' % (data_dir,
data_glob))
raise UserWarning
# grids
search_grid_filename = os.path.join(base_path, 'lib',
opdict['search_grid'])
time_grids = get_interpolated_time_grids(opdict)
#start and end times
starttime = opdict['starttime']
endtime = opdict['endtime']
data_length = opdict['data_length']
data_overlap = opdict['data_overlap']
initial_start_time = utcdatetime.UTCDateTime(starttime)
initial_end_time = initial_start_time+data_length
final_end_time = utcdatetime.UTCDateTime(endtime)
time_shift_secs = data_length-data_overlap
######### FOR EACH TIME SPAN - DO MIGRATION #############
# start loop over time
start_time = initial_start_time
end_time = initial_end_time
if runtime:
t_ref = time()
while (start_time < final_end_time):
# read data
logging.info("Reading data : %s - %s." % (start_time.isoformat(),
end_time.isoformat()))
data, delta = \
read_data_compatible_with_time_dict(data_files, time_grids,
start_time, end_time)
if reloc:
tr_glob = opdict['kurtglob']
files = glob.glob(os.path.join(data_dir, tr_glob))
traces, delta = \
read_data_compatible_with_time_dict(files, time_grids,
start_time, end_time)
sta_list = sorted(traces)
for staname in sta_list:
snr = np.max(traces[staname])/np.mean(np.abs(traces[staname]))
if snr < opdict['reloc_snr']:
data[staname] = np.zeros(len(data[staname]))
# re-read grid_info at each iteration to make sure it is a clean copy
grid_info = read_hdr_file(search_grid_filename)
# do migration if have enough data (3 is bare minimum)
if len(data.keys()) >= 3:
logging.info("Migrating data : %s - %s." % (start_time.isoformat(),
end_time.isoformat()))
do_migration_loop_continuous(opdict, data, delta, start_time,
grid_info, time_grids)
elif len(data.keys()) == 0:
logging.warn('No data found between %s and %s.' %
(start_time.isoformat(), end_time.isoformat()))
else:
logging.warn('Insufficient data found between %s and %s.' %
(start_time.isoformat(), end_time.isoformat()))
# Reset the start and end times to loop again
start_time = start_time+time_shift_secs
end_time = end_time+time_shift_secs
if runtime:
t = time()-t_ref
logging.info("Time for migrating all time slices : %.2f s\n" % (t))
[docs]def do_migration_loop_continuous(opdict, data, delta, start_time, grid_info,
time_grids, keep_grid=False,
keep_stacks=True):
"""
Do continuous migration loop. TODO : flesh out this doc-string.
"""
logging.info("Processing time slice %s" % start_time.isoformat())
options_time = opdict['time']
use_ram = opdict['use_ram']
output_dir = os.path.join(opdict['base_path'], 'out', opdict['outdir'])
options_reloc = opdict['reloc']
nx = grid_info['nx']
ny = grid_info['ny']
nz = grid_info['nz']
n_buf = nx*ny*nz
min_npts = min([len(data[key]) for key in data.keys()])
dx = grid_info['dx']
dy = grid_info['dy']
dz = grid_info['dz']
x_orig = grid_info['x_orig']
y_orig = grid_info['y_orig']
z_orig = grid_info['z_orig']
if options_time:
t_ref = time()
# open hdf5 file for stack_grid
if options_reloc:
grid_filename = os.path.join(output_dir, 'grid',
'reloc_stack_grid_%s.hdf5' % start_time)
else:
grid_filename = os.path.join(output_dir, 'grid',
'stack_grid_%s.hdf5' % start_time)
logging.info('Creating grid file %s' % grid_filename)
# if running on small memory machine write to hdf5 file
if not use_ram:
f = h5py.File(grid_filename, 'w')
stack_grid = f.create_dataset('stack_grid', (n_buf, min_npts), 'f',
chunks=(1, min_npts))
stack_grid[...] = 0.
else:
# if running on big memory machine work in ram
stack_grid = np.empty((n_buf, min_npts), dtype='float32', order='F')
stack_grid[...] = 0.
# DO MIGRATION
#logging.debug("Data sum = %.6f"%np.sum(data.values()))
stack_shift_time = migrate_4D_stack(data, delta, time_grids, stack_grid,
use_ram)
stack_start_time = start_time-stack_shift_time
n_buf, nt = stack_grid.shape
if options_time:
t = time()-t_ref
logging.info("Time for migrating %d stacks, each of extent %d points :\
%.2f s\n" % (n_buf, nt, t))
if keep_stacks:
if options_time:
t_ref = time()
if options_reloc:
stack_filename = os.path.join(output_dir, 'stack',
'reloc_stack_all_%s.hdf5' %
start_time)
else:
stack_filename = os.path.join(output_dir, 'stack',
'stack_all_%s.hdf5' % start_time)
logging.info('Extracting max_val etc. to %s' % stack_filename)
f_stack = h5py.File(stack_filename, 'w')
# extract maxima
extract_max_values(stack_grid, grid_info, f_stack, use_ram=use_ram)
for name in f_stack:
dset = f_stack[name]
logging.debug('After extract_max_values : %s %f %f' %
(name, np.max(dset), np.sum(dset)))
dset.attrs['start_time'] = stack_start_time.isoformat()
dset.attrs['dt'] = delta
f_stack.close()
# keep the stack_filename
grid_info['stack_file'] = stack_filename
if options_time:
t = time()-t_ref
logging.info("Time for extracting maxima : %.2f s\n" % (t))
if keep_grid:
if not use_ram:
# working on small-ram machine
sg = stack_grid
else:
# working on big-ram machine
#create the hdf5 file
f = h5py.File(grid_filename, 'w')
sg = f.create_dataset('stack_grid', data=stack_grid)
# add useful attributes to the hdf5 dataset
for key, value in grid_info.iteritems():
sg.attrs[key] = value
sg.attrs['dt'] = delta
sg.attrs['start_time'] = stack_start_time.isoformat()
# keep the grid_filename
grid_info['dat_file'] = grid_filename
# hdf5 files cannot have complex attributes
# so add these to grid_info after having added
# useful info to stack_grid.attrs
grid_info['grid_spacing'] = (dx, dy, dz, delta)
grid_info['grid_orig'] = (x_orig, y_orig, z_orig)
grid_info['grid_shape'] = (nx, ny, nz, nt)
grid_info['start_time'] = stack_start_time
# close the hdf5 file for the grid
if (not use_ram) or keep_grid:
f.close()
# remove the grid file unless you want to keep it
if (not keep_grid) and (not use_ram):
logging.info('Removing grid file %s' % grid_filename)
os.remove(grid_filename)
[docs]def migrate_4D_stack(data, delta, time_grids, stack_grid, use_ram=False):
"""
Migrate the 4D stack. Central part of the waveloc process.
TODO : flesh out this doc-string
:param data:
:param delta:
:param time_grids:
:param stack_grid:
:param use_ram:
"""
# save the list of data keys
# note : keys of data are all included in keys of time_grid, but there may
# be more times than data
wf_ids = data.keys()
n_wf_ids = len(wf_ids)
n_buf, min_npts = stack_grid.shape
logging.debug("Stack max dimension = %d x %d" % (n_buf, min_npts))
# initialize the arrays we will need
tmp_stack = np.empty(min_npts, dtype='float32')
tmp_stack[:] = 0.
i_times = np.zeros((n_wf_ids, n_buf), dtype='int')
i_max_times = np.zeros(n_buf, dtype='int')
i_min_times = np.zeros(n_buf, dtype='int')
start_index = np.zeros(n_buf, dtype='int')
start_indexes = np.zeros((n_wf_ids, n_buf), dtype='int')
end_indexes = np.zeros((n_wf_ids, n_buf), dtype='int')
n_lens = np.zeros((n_wf_ids, n_buf), dtype='int')
# construct grid (n_buf x n_sta) grid of time_indexes for migration
for i in islice(count(0), n_wf_ids):
wf_id = wf_ids[i]
i_times[i, :] = np.round(time_grids[wf_id].grid_data[:]/delta)/1
# find the min and max time indexes for point
i_min_times = np.min(i_times, 0)
i_max_times = np.max(i_times, 0)
iextreme_min_time = np.min(i_min_times)
iextreme_max_time = np.max(i_max_times)
start_index = i_min_times-iextreme_min_time
stack_shift_time = delta*iextreme_min_time
# find start indexes, end indexes and lengths for each station and point
start_indexes = i_times-i_min_times
end_indexes = i_times-i_max_times+min_npts
n_lens = end_indexes-start_indexes
# keep the shortest length for each point
n_len = np.min(n_lens, 0)
# keep the shortest overall length
shortest_n_len = np.min(n_len)
# sill fix the length of the stack to the shortest possible length given
# all the previous travel time information
norm_stack_len = shortest_n_len-iextreme_max_time
if norm_stack_len < 0:
logging.error('Data length too short for coherent migration across\
network')
# the actual migration loop
# cannot seem to vectorize this any more... too bad !!
for ib in islice(count(0), n_buf):
# This is ugly, but necessary to avoid memory leak from inner loop
_do_stack(ib, n_wf_ids, wf_ids, stack_grid, data, min_npts, n_lens,
start_indexes, end_indexes, start_index, norm_stack_len)
# clean up what is no longer needed
del i_times, i_min_times, i_max_times, start_indexes, end_indexes
del n_lens, start_index
# resize stack_grid
if use_ram:
stack_grid.resize((n_buf, norm_stack_len), refcheck=False)
else:
stack_grid.resize(norm_stack_len, axis=1)
# end
return stack_shift_time
def _do_stack(ib, n_wf_ids, wf_ids, stack_grid, data, min_npts, n_lens,
start_indexes, end_indexes, start_index, norm_stack_len):
tmp_stack = np.empty(min_npts, dtype='float32')
tmp_stack[:] = 0.
# stack shifted data from each station
for i in islice(count(0), n_wf_ids):
tmp_stack[0:n_lens[i, ib]] += \
data[wf_ids[i]][start_indexes[i, ib]:end_indexes[i, ib]]
# We need to homogenize, and get everything to start and end at the same
# time
stack_grid[ib, 0:norm_stack_len] = \
tmp_stack[start_index[ib]:start_index[ib]+norm_stack_len]
# cleanup
del tmp_stack