diff --git a/examples/QuickOverview.py b/examples/QuickOverview.py index 1e724cd..4959709 100644 --- a/examples/QuickOverview.py +++ b/examples/QuickOverview.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.14.4 +# jupytext_version: 1.16.1 # kernelspec: # display_name: Python 3 (ipykernel) # language: python @@ -16,19 +16,14 @@ # %% [markdown] # # Quick Overview # -# Here you can find some quick examples of what you can do with segysak. For more details refer to the [examples](../examples.html). +# Here you can find some quick examples of what you can do with SEGY-SAK. For more details refer to the [examples](../examples.html). # %% [markdown] # The library is imported as *segysak* and the loaded `xarray` objects are compatible with *numpy* and *matplotlib*. # # The cropped volume from the Volve field in the North Sea (made available by Equinor) is used for this example, and # all the examples and data in this documentation are available from the `examples` folder of the -# [Github](https://github.com/trhallam/segysak) respository. - -# %% nbsphinx="hidden" -import warnings - -warnings.filterwarnings("ignore") +# [Github](https://github.com/trhallam/segysak/examples/) respository. # %% import matplotlib.pyplot as plt @@ -44,7 +39,7 @@ # %% [markdown] # A basic operation would be to check the text header included in the SEG-Y file. The *get_segy_texthead* -# function accounts for common encoding issues and returns the header as a text string. +# function accounts for common encoding issues and returns the header as a text string. It is important to check the text header if you are unfamiliar with your data, it may provide important information about the contents/location of trace header values which are needed by SEGY-SAK to successfully load your data into a `xarray.Dataset`. # %% from segysak.segy import get_segy_texthead @@ -59,9 +54,13 @@ # %% from segysak.segy import segy_header_scan +import pandas as pd scan = segy_header_scan(V3D_path) -scan + +with pd.option_context('display.max_rows', 100, 'display.max_columns', 10): + # drop byte locations where the mean is zero, these are likely empty. + display(scan) # %% [markdown] # The header report can also be reduced by filtering blank byte locations. Here we use the standard deviation `std` @@ -71,10 +70,10 @@ # can also see the byte positions of the **local grid** for INLINE_3D in byte *189* and for CROSSLINE_3D in byte *193*. # %% -scan[scan["std"] > 0] +scan[scan["mean"] > 0] # %% [markdown] -# To retreive the raw header content use `segy_header_scrape`. Setting `partial_scan=None` will return the +# To retreive the raw header content (values) use `segy_header_scrape`. Setting `partial_scan=None` will return the # full dataframe of trace header information. # %% @@ -87,16 +86,27 @@ # ## Load SEG-Y data # %% [markdown] -# All SEG-Y (2D, 2D gathers, 3D & 3D gathers) are ingested into `xarray.Dataset` objects through the -# `segy_loader` function. It is best to be explicit about the byte locations of key information but -# `segy_loader` can attempt to guess the shape of your dataset. Some standard byte positions are -# defined in the `well_known_bytes` function and others can be added via pull requests to the Github -# repository if desired. +# All SEG-Y (2D, 2D gathers, 3D & 3D gathers) are ingested into `xarray.Dataset` using the Xarray SEGY engine provided by +# segysak. The engine recognizes the '.segy' and `.sgy' extensions automatically when passed to `xr.open_dataset()`. +# It is important to provide the `dim_bytes_fields` and if required the `extra_bytes_fields` variables. +# +# `dim_byte_fields` specifies the byte locations which determine the orthogonal geometry of your data. This could be CDP for 2D data, +# iline and xline for 3D data, or iline, xline and offset for pre-stack gathers over 3D data. UTM coordinates are not usually valid dimensions, +# because if a survey is rotated relative to the UTM grid, you do not get orthogonal dimensions. +# +# UTM coordinates and other trace header information you require can be loaded via the `extra_bytes_fields` argument. +# +# > Note that the keys you use to label byte fields will be the keys of the dimensions and variables loaded into the dataset. +# +# > Since v0.5 of SEGY-SAK, the vertical dimension is always labelled as `samples`. # %% -from segysak.segy import segy_loader, well_known_byte_locs - -V3D = segy_loader(V3D_path, iline=189, xline=193, cdp_x=73, cdp_y=77, vert_domain="TWT") +import xarray as xr +V3D = xr.open_dataset( + V3D_path, + dim_byte_fields={"iline":189, "xline":193}, + extra_byte_fields={"cdp_x":73, "cdp_y":77} +) V3D # %% [markdown] @@ -110,7 +120,7 @@ # %% fig, ax1 = plt.subplots(ncols=1, figsize=(15, 8)) iline_sel = 10093 -V3D.data.transpose("twt", "iline", "xline", transpose_coords=True).sel( +V3D.data.transpose("samples", "iline", "xline", transpose_coords=True).sel( iline=iline_sel ).plot(yincrease=False, cmap="seismic_r") plt.grid("grey") @@ -120,21 +130,24 @@ # %% [markdown] # ## Saving data to NetCDF4 # -# SEGYSAK offers a convenience utility to make saving to NetCDF4 simple. This is accesssed through the `seisio` accessor on the loaded -# SEG-Y or SEISNC volume. The `to_netcdf` method accepts the same arguments as the `xarray` version. +# SEGYSAK now loads data in a way that is compatable with the standard `to_netcdf` method of an `xarray.Dataset`. To output data to NetCDF4, simply specify the output file and Xarray should take care of the rest. If you have a particularly large SEG-Y file, that will not fit into memory, then you may need to chunk the dataset first prior to output. This will ensure lazy loading and output of data. + +# %% +V3D.to_netcdf("data/V3D.nc") # %% -V3D.seisio.to_netcdf("V3D.SEISNC") +# Here the seismic volume to process one INLINE at a time to the NetCDF4 file. +V3D.chunk({'iline':1, 'xline':-1, 'samples':-1}).to_netcdf("data/V3D_chks.nc") # %% [markdown] # ## Saving data to SEG-Y # -# To return data to SEG-Y after modification use the `segy_writer` function. `segy_writer` takes as argument a SEISNC dataset which -# requires certain attributes be set. You can also specify the byte locations to write header information. - -# %% tags=[] -from segysak.segy import segy_writer +# To return data to SEG-Y after modification use the `seisio` accessor provided by SEGY-SAK. Unlike the `to_netcdf` method. Additional arguments are required by `to_segy` to successfuly write a SEG-Y file with appropriate header information. At a minimum, this should include byte locations for each dimension in the Dataset, but additional variables can be written to trace headers as required. -segy_writer( - V3D, "V3D.segy", trace_header_map=dict(iline=5, xline=21) -) # Petrel Locations +# %% +V3D.seisio.to_segy( + "data/V3D-out.segy", + trace_header_map={"cdp_x":73, "cdp_y":77}, + iline=189, + xline=193, +) diff --git a/examples/example_segy_headers.py b/examples/example_segy_headers.py index 5272093..597e906 100644 --- a/examples/example_segy_headers.py +++ b/examples/example_segy_headers.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.14.4 +# jupytext_version: 1.16.1 # kernelspec: # display_name: Python 3 (ipykernel) # language: python @@ -24,11 +24,6 @@ # # `segy_header_scan` is primarily designed to help quickly asscertain the byte locations of key header information for loading or converting the full SEG-Y file. It does this by just looking at the first *N* traces (1000 by default) and returns the byte location and statistics related to the file. -# %% nbsphinx="hidden" -import warnings - -warnings.filterwarnings("ignore") - # %% from segysak.segy import segy_header_scan @@ -44,7 +39,7 @@ import pandas as pd from IPython.display import display -with pd.option_context("display.max_rows", 89): +with pd.option_context("display.max_rows", 91): display(scan) # %% [markdown] @@ -87,7 +82,7 @@ # %% [markdown] # We can also just plot up the geometry to check that everything looks ok, here the line numbering and coordinates seem to match up, great! -# %% tags=[] +# %% fig, axs = plt.subplots(nrows=2, figsize=(12, 10), sharex=True, sharey=True) scrape.plot( diff --git a/examples/example_segysak_basics.py b/examples/example_segysak_basics.py index a4afe29..a28131f 100644 --- a/examples/example_segysak_basics.py +++ b/examples/example_segysak_basics.py @@ -6,15 +6,13 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.14.4 +# jupytext_version: 1.16.1 # kernelspec: # display_name: Python 3 (ipykernel) # language: python # name: python3 # --- -# %% - # %% [markdown] # # SEGY-SAK Basics # @@ -160,5 +158,5 @@ # %% [markdown] # Sometimes we need to modify the dimensions because they were read wrong or to scale them. Modify your dimension from the seisnc and then put it back using `assign_coords`. -# %% tags=[] +# %% new_seisnc.assign_coords(iline=new_seisnc.iline * 10, twt=new_seisnc.twt + 1500) diff --git a/examples/example_segysak_segy_vectorisation.py b/examples/example_segysak_segy_vectorisation.py index 52b3f1d..6646e60 100644 --- a/examples/example_segysak_segy_vectorisation.py +++ b/examples/example_segysak_segy_vectorisation.py @@ -6,15 +6,13 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.14.4 +# jupytext_version: 1.16.1 # kernelspec: # display_name: Python 3 (ipykernel) # language: python # name: python3 # --- -# %% - # %% [markdown] # # SEG-Y to Vector DataFrames and Back # @@ -26,13 +24,13 @@ # %% import pathlib +import xarray as xr from IPython.display import display -from segysak.segy import segy_loader, well_known_byte_locs, segy_writer volve_3d_path = pathlib.Path("data/volve10r12-full-twt-sub3d.sgy") print("3D", volve_3d_path.exists()) -volve_3d = segy_loader(volve_3d_path, **well_known_byte_locs("petrel_3d")) +volve_3d = xr.open_dataset(volve_3d_path, dim_byte_fields={'iline': 5, 'xline': 21}, extra_byte_fields={'cdp_x': 73, 'cdp_y': 77}) # %% [markdown] # ## Vectorisation @@ -56,7 +54,7 @@ # It is possible to return the DataFrame to the Dataset for output to SEGY. To do this the multi-index must be reset. Afterward, `pandas` provides the `to_xarray` method. # %% -volve_3d_df_multi = volve_3d_df_reindex.set_index(["iline", "xline", "twt"]) +volve_3d_df_multi = volve_3d_df_reindex.set_index(["iline", "xline", "samples"]) display(volve_3d_df_multi) volve_3d_ds = volve_3d_df_multi.to_xarray() display(volve_3d_ds) @@ -73,13 +71,23 @@ # The `cdp_x` and `cdp_y` positions must be reduced to 2D along the vertical axis "twt" and set as coordinates. # %% -volve_3d_ds["cdp_x"] = volve_3d_ds["cdp_x"].mean(dim=["twt"]) -volve_3d_ds["cdp_y"] = volve_3d_ds["cdp_y"].mean(dim=["twt"]) +volve_3d_ds["cdp_x"] + +# %% +volve_3d_ds["cdp_x"] = volve_3d_ds["cdp_x"].mean(dim=["samples"]) +volve_3d_ds["cdp_y"] = volve_3d_ds["cdp_y"].mean(dim=["samples"]) volve_3d_ds = volve_3d_ds.set_coords(["cdp_x", "cdp_y"]) volve_3d_ds # %% [markdown] -# Afterwards, use the `segy_writer` utility as normal to return to SEGY. +# Afterwards, use the `to_segy` method as normal to return to SEGY. + +# %% +volve_3d_ds.seisio.to_segy("data/test.segy", iline=189, xline=193, trace_header_map={'cdp_x':181, 'cdp_y':185}) + +# %% [markdown] +# ## Very large datasets +# +# If you have a very large dataset (SEG-Y file), it may be possible to use `ds.to_dask_dataframe()` which can perform operations, including the writing of data in a lazy manner. -# %% tags=[] -segy_writer(volve_3d_ds, "test.segy") +# %%