Skip to content

Commit

Permalink
Merge pull request #158 from SNflows/devel
Browse files Browse the repository at this point in the history
devel 1.2.0 to master
  • Loading branch information
eamjensen authored Jan 25, 2025
2 parents 4334d9e + 99d1a0d commit af09c97
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 285 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,6 @@ jobs:
env:
FLOWS_CONFIG: ${{ secrets.FLOWS_CONFIG }}
FLOWS_API_TOKEN: ${{ secrets.FLOWS_API_TOKEN }}
CASSJOBS_WSID: ${{ secrets.CASSJOBS_WSID }}
CASSJOBS_PASSWORD: ${{ secrets.CASSJOBS_PASSWORD }}
run: |
python -m pip install --upgrade pip wheel
pip install -r requirements.txt
Expand Down
11 changes: 0 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,6 @@ Text coming soon...
# pipeline = False
token = None

# casjobs:
# wsid and password required for run_catalogs.py,
# user registration at
# https://galex.stsci.edu/casjobs/CreateAccount.aspx
# wsid can be found at
# https://galex.stsci.edu/casjobs/changedetails.aspx
# after login
[casjobs]
# wsid = $CASJOBS_WSID/None
# password = $CASJOBS_PASSWORD/None

# database:
# username and password required for run_catalogs.py,
# the user is a registered user in the flows database
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
master-v1.1.1
master-v1.2.0
274 changes: 73 additions & 201 deletions flows/catalogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
from bottleneck import anynan
from tendrils.utils import load_config, query_ztf_id

import mastcasjobs

from .aadc_db import AADC_DB

logger = logging.getLogger(__name__)
Expand All @@ -50,203 +48,6 @@ def floatval(value):
def intval(value):
return None if value == '' else int(value)


# --------------------------------------------------------------------------------------------------
def casjobs_configured(config: configparser.ConfigParser) -> bool:
"""
Check if casjobs credentials are available.
"""
wsid = config.get('casjobs', 'wsid', fallback=os.environ.get("CASJOBS_WSID", None))
passwd = config.get('casjobs', 'password', fallback=os.environ.get("CASJOBS_PASSWORD", None))
if wsid is None or passwd is None:
raise CasjobsError("CasJobs WSID and PASSWORD not in config.ini / 'CASJOBS_WSID' and 'CASJOBS_PASSWORD' not defined as environment variables")
return True

# --------------------------------------------------------------------------------------------------
def query_casjobs_refcat2(coo_centre, radius=24 * u.arcmin):
"""
Uses the CasJobs program to do a cone-search around the position.
Will first attempt to do single large cone-search, and if that
fails because of CasJobs memory limits, will sub-divide the cone
into smaller queries.
Parameters:
coo_centre (:class:`astropy.coordinates.SkyCoord`): Coordinates of centre of search cone.
radius (Angle, optional): Search radius. Default is 24 arcmin.
Returns:
list: List of dicts with the REFCAT2 information.
.. codeauthor:: Rasmus Handberg <[email protected]>
"""
if isinstance(radius, (float, int)):
radius *= u.deg

try:
results = _query_casjobs_refcat2(coo_centre, radius=radius)
except CasjobsMemoryError:
logger.debug("CasJobs failed with memory error. Trying to use smaller radii.")
results = _query_casjobs_refcat2_divide_and_conquer(coo_centre, radius=radius)

# Remove duplicate entries:
results = unique(results, keys='starid')

# Trim away anything outside radius:
ra = [res['ra'] for res in results]
decl = [res['decl'] for res in results]
coords = SkyCoord(ra=ra, dec=decl, unit='deg', frame='icrs')
sep = coords.separation(coo_centre)
mask = sep <= radius

logger.debug("Found %d unique results", np.count_nonzero(mask))
return results[mask]


# --------------------------------------------------------------------------------------------------
def _query_casjobs_refcat2_divide_and_conquer(coo_centre, radius):

# Just put in a stop criterion to avoid infinite recursion:
if radius < 0.04 * u.deg:
raise Exception("Too many subdivides")

# Search central cone:
try:
results = _query_casjobs_refcat2(coo_centre, radius=0.5 * radius)
except CasjobsMemoryError:
logger.debug("CasJobs failed with memory error. Trying to use smaller radii.")
results = _query_casjobs_refcat2_divide_and_conquer(coo_centre, radius=0.5 * radius)

# Search six cones around central cone:
for n in range(6):
# FIXME: The 0.8 here is kind of a guess. There should be an analytic solution
new = SkyCoord(ra=coo_centre.ra.deg + 0.8 * Angle(radius).deg * np.cos(n * 60 * np.pi / 180),
dec=coo_centre.dec.deg + 0.8 * Angle(radius).deg * np.sin(n * 60 * np.pi / 180), unit='deg',
frame='icrs')

try:
results += _query_casjobs_refcat2(new, radius=0.5 * radius)
except CasjobsMemoryError:
logger.debug("CasJobs failed with memory error. Trying to use smaller radii.")
results += _query_casjobs_refcat2_divide_and_conquer(new, radius=0.5 * radius)

return results


# --------------------------------------------------------------------------------------------------
def _query_casjobs_refcat2(coo_centre, radius=24 * u.arcmin):
"""
Uses the CasJobs program to do a cone-search around the position.
Parameters:
coo_centre (:class:`astropy.coordinates.SkyCoord`): Coordinates of centre of search cone.
radius (Angle, optional): Search radius. Default is 24 arcmin.
Returns:
list: List of dicts with the REFCAT2 information.
.. codeauthor:: Rasmus Handberg <[email protected]>
"""

if isinstance(radius, (float, int)):
radius *= u.deg

# prepare refcat2 query to find all objects near position
casjobs_context = "HLSP_ATLAS_REFCAT2"
casjobs_personal_table = "flows"
sql = ("""SELECT r.* INTO {table:s} FROM fGetNearbyObjEq({ra:f}, {dec:f}, {radius:f}) AS n
INNER JOIN {context:s}.refcat2 AS r ON n.objid=r.objid ORDER BY n.distance;"""
.format(table=casjobs_personal_table, context=casjobs_context,
ra=coo_centre.ra.deg, dec=coo_centre.dec.deg, radius=Angle(radius).deg))

config = load_config()
casjobs_configured(config)
jobs = mastcasjobs.MastCasJobs(userid=config.get('casjobs', 'wsid'),
password=config.get('casjobs', 'password'),
context=casjobs_context)

# limited storage space at remote casjobs, drop table, if it exists, before making a new one
for table in jobs.list_tables():
if table == casjobs_personal_table:
jobs.drop_table(casjobs_personal_table) # more graceful than drop_table_if_exists()

# submit job, wait for it to finish and retrieve the results
job_id = jobs.submit(sql, context=casjobs_context, task_name=casjobs_personal_table)
logger.debug("Submitted query '%s' to MAST CasJobs context '%s' with task name '%s'",
sql, casjobs_context, casjobs_personal_table)
code, status = jobs.monitor(job_id, timeout=5)
logger.debug("MAST CasJobs query exited with status %d: %s", code, status)
results = jobs.get_table(casjobs_personal_table)

# Contents of HLSP_ATLAS_REFCAT2.refcat2 data table - i.e. the columns of 'results' just
# above; from https://galex.stsci.edu/casjobs/MyDB.aspx -> HLSP_ATLAS_REFCAT2.refcat2
# -----------------------------------------------------------------------------
# Name Unit Data Type Size Default Value Description
# objid dimensionless bigint 8 Unique object identifier.
# RA degrees float 8 RA from Gaia DR2, J2000, epoch 2015.5
# Dec degrees float 8 Dec from Gaia DR2, J2000, epoch 2015.5
# plx mas real 4 Parallax from Gaia DR2
# dplx mas real 4 Parallax uncertainty
# pmra mas/yr real 4 Proper motion in RA from Gaia DR2
# dpmra mas/yr real 4 Proper motion uncertainty in RA
# pmdec mas/yr real 4 Proper motion in Dec from Gaia DR2
# dpmdec mas/yr real 4 Proper motion uncertainty in Dec
# Gaia mag real 4 Gaia DR2 G magnitude
# dGaia mag real 4 Gaia DR2 G magnitude uncertainty
# BP mag real 4 Gaia G_BP magnitude
# dBP mag real 4 Gaia G_BP magnitude uncertainty
# RP mag real 4 Gaia G_RP magnitude
# dRP mag real 4 Gaia G_RP magnitude uncertainty
# Teff [K] int 4 Gaia stellar effective temperature
# AGaia mag real 4 Gaia estimate of G-band extinction for this star
# dupvar dimensionless int 4 Gaia flags coded as CONSTANT (0), VARIABLE (1), or NOT_AVAILABLE (2) + 4*DUPLICATE
# Ag mag real 4 SFD estimate of total column g-band extinction
# rp1 arc seconds real 4 Radius where cumulative G flux exceeds 0.1x this star
# r1 arc seconds real 4 Radius where cumulative G flux exceeds 1x this star
# r10 arc seconds real 4 Radius where cumulative G flux exceeds 10x this star
# g mag real 4 Pan-STARRS g_P1 magnitude
# dg mag real 4 Pan-STARRS g_P1 magnitude uncertainty
# gchi dimensionless real 4 chi^2/DOF for contributors to g
# gcontrib dimensionless int 4 Bitmap of contributing catalogs to g
# r mag real 4 Pan-STARRS r_P1 magnitude
# dr mag real 4 Pan-STARRS r_P1 magnitude uncertainty
# rchi dimensionless real 4 chi^2/DOF for contributors to r
# rcontrib dimensionless int 4 Bitmap of contributing catalogs to r
# i mag real 4 Pan-STARRS i_P1 magnitude
# di mag real 4 Pan-STARRS i_P1 magnitude uncertainty
# ichi dimensionless real 4 chi^2/DOF for contributors to i
# icontrib dimensionless int 4 Bitmap of contributing catalogs to i
# z mag real 4 Pan-STARRS z_P1 magnitude
# dz mag real 4 Pan-STARRS z_P1 magnitude uncertainty
# zchi dimensionless real 4 chi^2/DOF for contributors to z
# zcontrib dimensionless int 4 Bitmap of contributing catalogs to z
# nstat dimensionless int 4 Count of griz deweighted outliers
# J mag real 4 2MASS J magnitude
# dJ mag real 4 2MASS J magnitude uncertainty
# H mag real 4 2MASS H magnitude
# dH mag real 4 2MASS H magnitude uncertainty
# K mag real 4 2MASS K magnitude
# dK mag real 4 2MASS K magnitude uncertainty

# remove unused columns from results
results.remove_columns(['plx', 'dplx', 'dpmra', 'dpmdec', 'dGaia', 'dBP', 'dRP', 'Teff', 'AGaia', 'Ag',
'rp1', 'r1', 'r10', 'dg', 'gchi', 'gcontrib', 'dr', 'rchi', 'rcontrib',
'di', 'ichi', 'icontrib', 'dz', 'zchi', 'zcontrib', 'nstat', 'dJ', 'dH', 'dK'])

# rename columns to match older java-based implementation which had different naming conventions
# TODO: refactor dependent functions to expect the default namings and get rid of these renamings
# to start: comment out the renamings below and see where the flows pipeline breaks, then fix it
results = Table(results,
names=('starid', 'ra', 'decl', 'pm_ra', 'pm_dec', 'gaia_mag', 'gaia_bp_mag', 'gaia_rp_mag',
'gaia_variability', 'g_mag', 'r_mag', 'i_mag', 'z_mag', 'J_mag', 'H_mag', 'K_mag'))

if not results:
raise CasjobsError("Could not find anything on CasJobs")

logger.debug("Found %d results", len(results))
return results


# --------------------------------------------------------------------------------------------------
def query_apass(coo_centre, radius=24 * u.arcmin):
"""
Expand Down Expand Up @@ -356,7 +157,6 @@ def query_simbad(coo_centre, radius=24 * u.arcmin):

s = Simbad()
s.ROW_LIMIT = 0
s.remove_votable_fields('coordinates')
s.add_votable_fields('ra(d;A;ICRS;J2000)', 'dec(d;D;ICRS;2000)', 'pmra', 'pmdec')
s.add_votable_fields('otype')
s.add_votable_fields('flux(B)', 'flux(V)', 'flux(R)', 'flux(I)', 'flux(J)', 'flux(H)', 'flux(K)')
Expand Down Expand Up @@ -456,6 +256,78 @@ def query_skymapper(coo_centre, radius=24 * u.arcmin):


# --------------------------------------------------------------------------------------------------

def query_local_refcat2(coo_centre, radius=24 * u.arcmin):
"""
Uses refcat.c from https://archive.stsci.edu/hlsp/atlas-refcat2 to do a cone-search around the position.
NOTE: In going from mast casjobs to local refcat.c, we have lost the unique object identifier, 'objid'
----> See https://archive.stsci.edu/hlsps/atlas-refcat2/hlsp_atlas-refcat2_atlas_ccd_all_multi_v1_readme.txt
We have to invent our own objids.
They get assigned incrementally from the value "300,000,000,000,000,000" onwards, which is comfortably
above any objids in our database on 2025-01-25.
TODO: Something less hacky! Assign every entry in the local catalog a unique objid somehow...
Parameters:
coo_centre (:class:`astropy.coordinates.SkyCoord`): Coordinates of centre of search cone.
radius (Angle, optional): Search radius. Default is 24 arcmin.
Returns:
list: List of dicts with the REFCAT2 information.
.. codeauthor:: Erik Jensen
"""

current_max_objid = 300_000_000_000_000_000
with AADC_DB() as db:
db.cursor.execute("SELECT MAX(starid) FROM flows.refcat2;")
for row in db.cursor.fetchall():
if row[0] > current_max_objid:
current_max_objid = row[0]

refcat_output = subprocess.run(["refcat",
str(coo_centre.ra.deg), str(coo_centre.dec.deg),
"-rad", str(Angle(radius).deg),
"-var", "ra,dec,pmra,pmdec,gaia,bp,rp,dupvar,g,r,i,z,j,h,k",
"-nohdr"],
encoding="utf-8", capture_output=True, check=True)

# TODO: feel free to make this less awful!
OBJID = RA = DEC = PMRA = PMDEC = GAIA = BP = RP = DUPVAR = G = R = I = Z = J = H = K = np.array([])
for line in refcat_output.stdout.splitlines():
OBJID = np.append(OBJID, current_max_objid)
current_max_objid += 1

ra, dec, pmra, pmdec, gaia, bp, rp, dupvar, g, r, i, z, j, h, k = line.split()

RA = np.append(RA, ra)
DEC = np.append(DEC, dec)
PMRA = np.append(PMRA, pmra)
PMDEC = np.append(PMDEC, pmdec)
GAIA = np.append(GAIA, gaia)
BP = np.append(BP, bp)
RP = np.append(RP, rp)
DUPVAR = np.append(DUPVAR, dupvar)
G = np.append(G, g)
R = np.append(R, r)
I = np.append(I, i)
Z = np.append(Z, z)
J = np.append(J, j)
H = np.append(H, h)
K = np.append(K, k)

# rename columns to match older java-based implementation which had different naming conventions
# TODO: refactor dependent functions to expect the default namings and get rid of these renamings
# to start: comment out the renamings below and see where the flows pipeline breaks, then fix it
results = Table({'starid': OBJID, 'ra': RA, 'decl': DEC, 'pm_ra': PMRA, 'pm_dec': PMDEC, 'gaia_mag': GAIA,
'gaia_bp_mag': BP, 'gaia_rp_mag': RP, 'gaia_variability': DUPVAR, 'g_mag': G, 'r_mag': R, 'i_mag': I,
'z_mag': Z, 'J_mag': J, 'H_mag': H, 'K_mag': K})

if not results:
raise CasjobsError("Something went wrong in local refcat.c binary")

logger.debug("Found %d results", len(results))
return results

def query_all(coo_centre, radius=24 * u.arcmin, dist_cutoff=2 * u.arcsec):
"""
Query all catalogs (REFCAT2, APASS, SDSS and SkyMapper) and return merged catalog.
Expand All @@ -479,7 +351,7 @@ def query_all(coo_centre, radius=24 * u.arcmin, dist_cutoff=2 * u.arcsec):
"""

# Query the REFCAT2 catalog using CasJobs around the target position:
results = query_casjobs_refcat2(coo_centre, radius=radius)
results = query_local_refcat2(coo_centre, radius=radius)
AT_results = Table(results)
refcat = SkyCoord(ra=AT_results['ra'], dec=AT_results['decl'], unit=u.deg, frame='icrs')

Expand Down
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ pytz >= 2024.2
sep-pjw >= 1.3.5
astroalign >= 2.5.1
networkx >= 3.3
astroquery >= 0.4.7
astroquery == 0.4.7 # TODO version >= 0.4.9 brings changes which messes with catalogs.py:query_simbad() - any expert want to have a crack at this?
tendrils >= 0.3.2
mastcasjobs >= 0.0.6
healpy >= 1.17.3
21 changes: 0 additions & 21 deletions requirements.txt.erik

This file was deleted.

Loading

0 comments on commit af09c97

Please sign in to comment.