Skip to content

Commit

Permalink
Segy (#270)
Browse files Browse the repository at this point in the history
* intiate rsf IO format

* Added SEGY reader and tests

* Added seyio as a dependency

* remove rsf remnants, fix path name

* Fixed seyio docstring

* add segy partial data read

* fix channel filter

* fix init docstring

---------

Co-authored-by: derrick chambers <[email protected]>
  • Loading branch information
aaronjgirard and d-chambers authored Oct 3, 2023
1 parent daab8f6 commit ad8e216
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 0 deletions.
20 changes: 20 additions & 0 deletions dascore/io/segy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
SEGY format support module.
Notes
-----
- Distance information is not found in most SEGY DAS files so returned
dimensions are "channel" and "time" rather than "distance" and "time".
Examples
--------
import dascore as dc
from dascore.utils.downloader import fetch
# get the path to a segy file.
path = fetch("conoco_segy_1.sgy")
segy_patch = dc.spool(path)[0]
"""

from .core import SegyV2
60 changes: 60 additions & 0 deletions dascore/io/segy/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""IO module for reading SEGY file format support."""
from __future__ import annotations

import segyio

import dascore as dc
from dascore.io.core import FiberIO

from .utils import _get_attrs, _get_coords, _get_filtered_data_and_coords


class SegyV2(FiberIO):
"""An IO class supporting version 2 of the SEGY format."""

name = "segy"
preferred_extensions = ("segy", "sgy")
# also specify a version so when version 2 is released you can
# just make another class in the same module named JingleV2.
version = "2"

def get_format(self, path) -> tuple[str, str] | bool:
"""Make sure input is segy."""
try:
with segyio.open(path, ignore_geometry=True):
return self.name, self.version
except Exception:
return False

def read(self, path, time=None, channel=None, **kwargs):
"""
Read should take a path and return a patch or sequence of patches.
It can also define its own optional parameters, and should always
accept kwargs. If the format supports partial reads, these should
be implemented as well.
"""
with segyio.open(path, ignore_geometry=True) as fi:
coords = _get_coords(fi)
attrs = _get_attrs(fi, coords, path, self)
data, coords = _get_filtered_data_and_coords(
fi, coords, time=time, channel=channel
)

patch = dc.Patch(coords=coords, data=data, attrs=attrs)
patch_trimmed = patch.select(time=time, channel=channel)
return dc.spool([patch_trimmed])

def scan(self, path) -> list[dc.PatchAttrs]:
"""
Used to get metadata about a file without reading the whole file.
This should return a list of
[`PatchAttrs`](`dascore.core.attrs.PatchAttrs`) objects
from the [dascore.core.attrs](`dascore.core.attrs`) module, or a
format-specific subclass.
"""
with segyio.open(path, ignore_geometry=True) as fi:
coords = _get_coords(fi)
attrs = _get_attrs(fi, coords, path, self)
return [attrs]
95 changes: 95 additions & 0 deletions dascore/io/segy/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Utilities for segy."""

from __future__ import annotations

import datetime

import numpy as np
from segyio import TraceField

import dascore as dc
from dascore.core import get_coord_manager

# --- Getting format/version


def _get_filtered_data_and_coords(segy_fi, coords, time=None, channel=None):
"""
Read data from segy_file, possibly reading only subsections.
Return filtered data and update coord manager.
"""
traces_inds_to_read = np.arange(len(segy_fi.header), dtype=np.int64)
time_slice = slice(None, None)
traces = segy_fi.trace

# filter time
if time is not None:
time_coord = coords.coord_map["time"]
new_coord, time_slice = time_coord.select(time)
coords = coords.update_coords(time=new_coord)

# filter channel
if channel:
channel_coord = coords.coord_map["channel"]
new_coord, channel_inds = channel_coord.select(channel)
coords = coords.update_coords(channel=new_coord)
traces_inds_to_read = traces_inds_to_read[channel_inds]

# filter channels
data_list = [traces[x][time_slice] for x in traces_inds_to_read]
return np.stack(data_list, axis=-1), coords


def _get_coords(fi):
"""
Get coordinates of the segy file.
Time comes from the SEGY format of year, julian day, hour, minute, second.
Distance axis is channel number. If the user knows the delta_x,
then the axis should be modified.
If a user knows the dx, change from channel to distance using
patch.update_coords after reading
"""
header_0 = fi.header[0]

# get time array from SEGY headers
starttime = _get_time_from_header(header_0)
dt = dc.to_timedelta64(header_0[TraceField.TRACE_SAMPLE_INTERVAL] / 1000)
ns = header_0[TraceField.TRACE_SAMPLE_COUNT]
time_array = starttime + dt * np.arange(ns)

# Get distance array from SEGY header
channel = np.arange(len(fi.header))

coords = get_coord_manager(
{"time": time_array, "channel": channel}, dims=("time", "channel")
)
return coords


def _get_attrs(fi, coords, path, file_io):
"""Create Patch Attribute from SEGY header contents."""
attrs = dc.PatchAttrs(
path=path,
file_version=file_io.version,
file_format=file_io.name,
coords=coords,
)
return attrs


def _get_time_from_header(header):
"""Creates a datetime64 object from SEGY header date information."""
year = header[TraceField.YearDataRecorded]
julday = header[TraceField.DayOfYear]
hour = header[TraceField.HourOfDay]
minute = header[TraceField.MinuteOfHour]
second = header[TraceField.SecondOfMinute]
# make those timedate64
fmt = "%Y.%j.%H.%M.%S"
s = f"{year}.{julday}.{hour}.{minute}.{second}"
time = datetime.datetime.strptime(s, fmt)
return dc.to_datetime64(time)
2 changes: 2 additions & 0 deletions docs/contributors.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,7 @@ A Huge thanks to all of our contributors:

[Eileen Martin](https://github.com/eileenrmartin)

[Aaron Girard](https://github.com/aaronjgirard)

You can find more contributor information
[here](https://github.com/DASDAE/dascore/graphs/contributors)
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ dependencies = [
"tables>=3.7",
"typing_extensions",
"pint",
"segyio",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -104,6 +105,7 @@ TDMS__V4713 = "dascore.io.tdms.core:TDMSFormatterV4713"
TERRA15__V4 = "dascore.io.terra15.core:Terra15FormatterV4"
TERRA15__V5 = "dascore.io.terra15.core:Terra15FormatterV5"
TERRA15__V6 = "dascore.io.terra15.core:Terra15FormatterV6"
SEGY__V2 = "dascore.io.segy.core:SegyV2"
WAV = "dascore.io.wav.core:WavIO"


Expand Down
2 changes: 2 additions & 0 deletions tests/test_io/test_common_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from dascore.io.h5simple import H5Simple
from dascore.io.pickle import PickleIO
from dascore.io.prodml import ProdMLV2_0, ProdMLV2_1
from dascore.io.segy import SegyV2
from dascore.io.tdms import TDMSFormatterV4713
from dascore.io.terra15 import (
Terra15FormatterV4,
Expand Down Expand Up @@ -58,6 +59,7 @@
),
Terra15FormatterV5(): ("terra15_v5_test_file.hdf5",),
Terra15FormatterV6(): ("terra15_v6_test_file.hdf5",),
SegyV2(): ("conoco_segy_1.sgy",),
}

# This tuple is for fiber io which support a write method and can write
Expand Down
1 change: 1 addition & 0 deletions tests/test_io/test_segy/test_segy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for SEGY format."""

0 comments on commit ad8e216

Please sign in to comment.