Skip to content

Commit

Permalink
Merge pull request #27 from sat-utils/develop
Browse files Browse the repository at this point in the history
publish 0.2.1
  • Loading branch information
matthewhanson authored Feb 13, 2020
2 parents 5cf8653 + c74493a commit c5b61dc
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 18 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### Changed
- Updated metadata fields for STAC 0.9.0-rc2
- Update boto3-utils to 0.2.0

### Fixed
- Point to correct github tag when linking to STAC collection metadata
Expand Down Expand Up @@ -39,4 +40,4 @@ Initial Release
[Unreleased]: https://github.com/sat-utils/sat-stac-sentinel/compare/0.1.0...HEAD
[v0.2.1]: https://github.com/sat-utils/sat-stac-sentinel/compare/0.2.0...0.2.1
[v0.2.0]: https://github.com/sat-utils/sat-stac-sentinel/compare/0.1.0...0.2.0
[v0.1.0]: https://github.com/sat-utils/sat-stac-sentinel/tree/0.1.0
[v0.1.0]: https://github.com/sat-utils/sat-stac-sentinel/tree/0.1.0
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pyproj==2.4.1
shapely~=1.6.4.post2
boto3-utils~=0.1.3
boto3-utils~=0.2.0
#git+git://github.com/matthewhanson/boto3-utils@develop
xmljson~=0.2.0
requests>=2.18.1
6 changes: 3 additions & 3 deletions stac_sentinel/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def parse_args(args):
parser.add_argument('--prefix', help='Only ingest scenes with a path starting with prefix', default=None)
parser.add_argument('--start_date', help='Only ingest scenes with a Last Modified Date past provided start date', default=None)
parser.add_argument('--end_date', help='Only ingest scenes with a Last Modified Date before provided end date', default=None)
parser.add_argument('--direct_from_s3', help='Get metadata direct from s3 instead of free endpoint', default=False, action='store_true')

# output control
parser.add_argument('--save', help='Save fetch Items as <id>.json files to this folder', default=None)
Expand All @@ -48,16 +49,15 @@ def cli():
args = parse_args(sys.argv[1:])
logging.basicConfig(stream=sys.stdout,
level=args.pop('log') * 10,
datefmt='%Y-%m-%d %H:%M:%S')
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

publish = args.pop('publish', None)

collection_id = args.pop('collection')
savepath = args.pop('save')
if savepath is not None:
makedirs(savepath, exist_ok=True)
for i, item in enumerate(SentinelSTAC.get_aws_archive(collection_id, **args)):
print(item['properties']['datetime'], item['id'])
for item in SentinelSTAC.get_aws_archive(collection_id, **args):
# save items as JSON files
if savepath:
fname = op.join(savepath, '%s.json' % item['id'])
Expand Down
2 changes: 1 addition & 1 deletion stac_sentinel/sentinel-s2-l1c.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"properties": {
"constellation": "sentinel-2",
"instruments": ["msi"],
"gsd": 10,
"eo:gsd": 10,
"eo:bands": [
{
"name": "B01",
Expand Down
2 changes: 1 addition & 1 deletion stac_sentinel/sentinel-s2-l2a.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"properties": {
"constellation": "sentinel-2",
"instruments": ["msi"],
"gsd": 10,
"eo:gsd": 10,
"eo:bands": [
{
"name": "B01",
Expand Down
31 changes: 20 additions & 11 deletions stac_sentinel/sentinel.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def kml_to_geometry(cls, filename):
return cls.coordinates_to_geometry(coordinates)

@classmethod
def get_aws_archive(cls, collection, **kwargs):
def get_aws_archive(cls, collection, direct_from_s3=False, **kwargs):
""" Generator function returning the archive of Sentinel data on AWS
Keyword arguments:
prefix -- Process only files keys begining with this prefix
Expand All @@ -120,16 +120,25 @@ def get_aws_archive(cls, collection, **kwargs):
# get latest AWS inventory for this collection
inventory_url = 's3://sentinel-inventory/%s/%s-inventory' % (collection, collection)
inventory = s3().latest_inventory(inventory_url, **kwargs, suffix=cls.collections[collection])

#import pdb; pdb.set_trace()
# iterate through latest inventory
for i, record in enumerate(inventory):
url = '%s/%s/%s' % (cls.FREE_URL, collection, record['Key'])
logger.debug('Fetching initial metadata: %s' % url)
from datetime import datetime
for i, url in enumerate(inventory):
if (i % 100) == 0:
logger.info('%s records' % i)

try:
# get initial JSON file file
r = requests.get(url, stream=True)
base_url = 's3://%s/%s' % (record['Bucket'], op.dirname(record['Key']))
metadata = json.loads(r.text)
if direct_from_s3:
logger.debug('Fetching initial metadata: %s' % url)
metadata = s3().read_json(url, requester_pays=True)
else:
# use free endpoint to access file
parts = s3().urlparse(url)
_url = '%s/%s/%s' % (cls.FREE_URL, collection, parts['key'])
logger.debug('Fetching initial metadata: %s' % _url)
r = requests.get(_url, stream=True)
metadata = json.loads(r.text)

'''
fnames = [f"{base_url}/{a}" for a in md['filenameMap'].values() if 'annotation' in a and 'calibration' not in a]
metadata = {
Expand All @@ -140,11 +149,11 @@ def get_aws_archive(cls, collection, **kwargs):
'''
# transform to STAC Item
sentinel_scene = cls(collection, metadata)
item = sentinel_scene.to_stac(base_url=base_url)
item = sentinel_scene.to_stac(base_url=url)
yield item

except Exception as err:
logger.error('Error creating STAC Item %s: %s' % (record['url'], err))
logger.error('Error creating STAC Item from %s, Error: %s' % (url, err))
continue

def to_stac_from_s1l1c(self, **kwargs):
Expand Down

0 comments on commit c5b61dc

Please sign in to comment.