Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get all quad ids for a ZTF field #76

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 189 additions & 6 deletions tools/get_quad_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,140 @@
import pandas as pd
import json
import sys
import os
import h5py
from tqdm import tqdm
import time


def get_all_ids(
func,
catalog,
field=301,
ccd_range=16,
quad_range=4,
minobs=20,
limit=10000,
verbose=2,
output_dir=os.path.join(os.path.dirname(__file__), "output/"),
):
'''
Function wrapper for getting all field ids in a particular ccd and quad range
Parameters
----------
func : function
Function for getting ids for a specific quad of a CCD for a particular ZTF field.
catalog : str
Catalog containing ids, CCD, quad, and light curves
field : int
ZTF field number
ccd_range : int
Range of CCD numbers starting from 1 to get the ids. Takes values from [1,16]
quad_range : int
Range of CCD quad numbers starting from 1. Takes values from [1,4]
minobs : int
Minimum points in the light curve for the object to be selected
limit : int
How many of the selected rows to return. Default is 10000
output_dir : str
Relative directory path to save output files to

Returns
-------
Directory containing hdf5 files for each quad in the specified range

USAGE: get_all_ids(get_field_ids, 'ZTF_sources_20210401',field=301,ccd_range=2,quad_range=4,\
minobs=5,limit=2000)
'''
os.makedirs(output_dir, exist_ok=True)

# Write metadata in this file
f = open(output_dir + "data.txt", "w")
if verbose > 0:
string = (
"Catalog: "
+ catalog
+ "\nMin points: "
+ str(minobs)
+ "\nField: "
+ str(field)
+ "\nCCD Range: [1,"
+ str(ccd_range)
+ "] "
+ "\nQuad Range: [1,"
+ str(quad_range)
+ "]\n.\n.\n.\n\n"
)
f.writelines(string)

for ccd in tqdm(range(1, ccd_range + 1), disable=(not (verbose > 1))):
for quad in range(1, quad_range + 1):

if verbose > 1:
hf = h5py.File(
output_dir
+ 'data_ccd_'
+ str(ccd).zfill(2)
+ '_quad_'
+ str(quad)
+ '.h5',
'w',
)
i = 0
total_time = 0
while 1:
data, time_taken = func(
catalog,
field=field,
ccd=ccd,
quad=quad,
minobs=minobs,
skip=(i * limit),
limit=limit,
)
# Write to hdf5
if verbose > 1:
dset = hf.create_dataset('dataset_' + str(i).zfill(3), data=data)
dset.attrs['exec_time'] = time_taken # add attribute for time taken
total_time += time_taken
if len(data) < limit:
if verbose > 0:
length = len(data) + (i * limit)
string = (
"\nCCD: "
+ str(ccd)
+ " Quad: "
+ str(quad)
+ "\nNumber of ids: "
+ str(length)
+ "\nExecution Time: "
+ str(round(total_time * 1000, 4))
+ " ms\n"
)
f.writelines(string) # Write metadata for each quad
break
i += 1
if verbose > 1:
hf.close()
f.close()


def gettime(func):
'''
Wrapper function that reports the execution time of func.
'''

def wrap(*args, **kwargs):
start = time.time()
result = func(*args, **kwargs)
end = time.time()

return result, end - start

return wrap


@gettime
def get_field_ids(catalog, field=301, ccd=4, quad=3, minobs=20, skip=0, limit=10):
'''Get ids for a specific quad of a CCD for a particular ZTF field.
Parameters
Expand All @@ -28,8 +160,10 @@ def get_field_ids(catalog, field=301, ccd=4, quad=3, minobs=20, skip=0, limit=10
-------
ids : list
A list of ids
time_taken : float
Execution time

USAGE: data = get_field_ids('ZTF_sources_20210401',field=301,ccd=2,quad=3,\
USAGE: data, time_taken = get_field_ids('ZTF_sources_20210401',field=301,ccd=2,quad=3,\
minobs=5,skip=0,limit=20)
'''

Expand All @@ -44,7 +178,7 @@ def get_field_ids(catalog, field=301, ccd=4, quad=3, minobs=20, skip=0, limit=10
"field": {"$eq": field},
"ccd": {"$eq": ccd},
"quad": {"$eq": quad},
"nobs": {"$gt": minobs},
"n": {"$gt": minobs},
},
"projection": {
"_id": 1,
Expand All @@ -71,11 +205,18 @@ def get_field_ids(catalog, field=301, ccd=4, quad=3, minobs=20, skip=0, limit=10
default='ZTF_sources_20210401',
)
parser.add_argument(
"output",
"-output",
action='store',
default='output.txt',
type=argparse.FileType('w'),
help="file to write output to",
)
parser.add_argument(
"-output_dir",
action='store',
default="output/",
help="relative directory path to save output files to",
)

token_help = "put your Fritz token here or in the secrets file.\
You can get it from your Fritz profile page. This becomes\
Expand All @@ -89,6 +230,18 @@ def get_field_ids(catalog, field=301, ccd=4, quad=3, minobs=20, skip=0, limit=10
)
parser.add_argument("-ccd", type=int, default=4, help="ccd number (default 4)")
parser.add_argument("-quad", type=int, default=3, help="quad number (default 3)")
parser.add_argument(
"-ccd_range",
type=int,
default=16,
help="ccd range from 1 to ccd_range (default 16 -> default ccd range [1,16])",
)
parser.add_argument(
"-quad_range",
type=int,
default=4,
help="quad range from 1 to quad_range (default 4 -> default quad range [1,4])",
)
parser.add_argument(
"-minobs",
type=int,
Expand All @@ -99,7 +252,14 @@ def get_field_ids(catalog, field=301, ccd=4, quad=3, minobs=20, skip=0, limit=10
"-skip", type=int, default=0, help="number of rows to skip (default 0)"
)
parser.add_argument(
"-limit", type=int, default=10, help="number of rows to return (default 10)"
"-limit", type=int, default=10000, help="number of rows to return (default 10)"
)
parser.add_argument("-verbose", type=int, default=2, help="how much data to store")
parser.add_argument(
"--all-quads",
action="store_true",
default=False,
help="if passed as argument, get all quads for a particular field",
)

args = parser.parse_args()
Expand All @@ -111,5 +271,28 @@ def get_field_ids(catalog, field=301, ccd=4, quad=3, minobs=20, skip=0, limit=10

# data = get_field_ids(catalog='ZTF_sources_20210401',limit=args.limit)
# print(data)
data = get_field_ids(catalog=args.catalog, limit=args.limit)
pd.DataFrame(data).to_csv(args.output, index=False, header=False)

if args.all_quads:
get_all_ids(
get_field_ids,
catalog=args.catalog,
field=args.field,
ccd_range=args.ccd_range,
quad_range=args.quad_range,
minobs=args.minobs,
limit=args.limit,
verbose=args.verbose,
output_dir=os.path.join(os.path.dirname(__file__), args.output_dir),
)

else:
data, _ = get_field_ids(
catalog=args.catalog,
field=args.field,
ccd=args.ccd,
quad=args.quad,
minobs=args.minobs,
skip=args.skip,
limit=args.limit,
)
pd.DataFrame(data).to_csv(args.output, index=False, header=False)