Skip to content

Commit

Permalink
Fix wrong display in statistics because the DataFrame was not sorted (#…
Browse files Browse the repository at this point in the history
…574)

* Fix wrong display in statistics because the DataFrame was not sorted

* Keep fixing ordering of the data

* Refactor query to /statistics

* Do not drop indexing field when necessary

* Fill NaN when generating histograms

* Fill NaN when generating histograms -- II

* Actually apply arguments...

* Make the field on which to sort and index more generic
  • Loading branch information
JulienPeloton authored Jan 22, 2024
1 parent c79c0dd commit 7264225
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 113 deletions.
100 changes: 10 additions & 90 deletions apps/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from apps.utils import sine_fit
from apps.utils import class_colors
from apps.utils import request_api
from apps.utils import query_and_order_statistics
from apps.statistics import dic_names

from fink_utils.sso.spins import func_hg, func_hg12, func_hg1g2, func_hg1g2_with_spin
Expand Down Expand Up @@ -3610,18 +3611,7 @@ def plot_stat_evolution(param_name, switch):
else:
param_name_ = param_name

r = request_api(
'/api/v1/statistics',
json={
'date': '',
'output-format': 'json',
'columns': param_name_
}
)

# Format output in a DataFrame
pdf = pd.read_json(r)
pdf = pdf.set_index('key:key')
pdf = query_and_order_statistics(columns=param_name_)
pdf = pdf.fillna(0)

pdf['date'] = [
Expand Down Expand Up @@ -3963,21 +3953,7 @@ def make_daily_card(pdf, color, linecolor, title, description, height='12pc', sc
def hist_sci_raw(dropdown_days):
""" Make an histogram
"""
r = request_api(
'/api/v1/statistics',
json={
'date': '',
'output-format': 'json',
'columns': 'basic:raw,basic:sci'
}
)

# Format output in a DataFrame
pdf = pd.read_json(r)
pdf = pdf.set_index('key:key')
# Remove hbase specific fields
if 'key:time' in pdf.columns:
pdf = pdf.drop(columns=['key:time'])
pdf = query_and_order_statistics(columns='basic:raw,basic:sci')

if dropdown_days is None or dropdown_days == '':
dropdown_days = pdf.index[-1]
Expand All @@ -3992,7 +3968,7 @@ def hist_sci_raw(dropdown_days):
"""

card = make_daily_card(
pdf, color='rgb(158,202,225)', linecolor='rgb(8,48,107)', title='Quality cuts', description=description, norm=norm
pdf[['Received', 'Processed']], color='rgb(158,202,225)', linecolor='rgb(8,48,107)', title='Quality cuts', description=description, norm=norm
)

return card
Expand All @@ -4004,21 +3980,8 @@ def hist_sci_raw(dropdown_days):
def hist_catalogued(dropdown_days):
""" Make an histogram
"""
r = request_api(
'/api/v1/statistics',
json={
'date': '',
'output-format': 'json',
'columns': 'class:Solar System MPC,class:simbad_tot,basic:sci'
}
)

# Format output in a DataFrame
pdf = pd.read_json(r)
pdf = pdf.set_index('key:key')
# Remove hbase specific fields
if 'key:time' in pdf.columns:
pdf = pdf.drop(columns=['key:time'])
pdf = query_and_order_statistics(columns='class:Solar System MPC,class:simbad_tot,basic:sci')
pdf = pdf.fillna(0)

pdf = pdf.rename(columns={'class:Solar System MPC': 'MPC', 'class:simbad_tot': 'SIMBAD'})

Expand Down Expand Up @@ -4047,24 +4010,9 @@ def hist_catalogued(dropdown_days):
def hist_classified(dropdown_days):
""" Make an histogram
"""
r = request_api(
'/api/v1/statistics',
json={
'date': '',
'output-format': 'json',
'columns': 'basic:sci,class:Unknown'
}
)

# Format output in a DataFrame
pdf = pd.read_json(r)
pdf = pdf.set_index('key:key')
# Remove hbase specific fields
if 'key:time' in pdf.columns:
pdf = pdf.drop(columns=['key:time'])
pdf = query_and_order_statistics(columns='basic:sci,class:Unknown')
pdf = pdf.fillna(0)

# In case class:unknown contains NaN (see https://github.com/astrolabsoftware/fink-utils/issues/25)
pdf['class:Unknown'] = pdf['class:Unknown'].replace(np.nan, 0)
pdf['Classified'] = pdf['basic:sci'].astype(int) - pdf['class:Unknown'].astype(int)
pdf = pdf.rename(columns={'class:Unknown': 'Unclassified'})

Expand Down Expand Up @@ -4093,21 +4041,7 @@ def hist_classified(dropdown_days):
def hist_candidates(dropdown_days):
""" Make an histogram
"""
r = request_api(
'/api/v1/statistics',
json={
'date': '',
'output-format': 'json',
'columns': 'class:Solar System candidate,class:SN candidate,class:Early SN Ia candidate,class:Kilonova candidate'
}
)

# Format output in a DataFrame
pdf = pd.read_json(r)
pdf = pdf.set_index('key:key')
# Remove hbase specific fields
if 'key:time' in pdf.columns:
pdf = pdf.drop(columns=['key:time'])
pdf = query_and_order_statistics(columns='class:Solar System candidate,class:SN candidate,class:Early SN Ia candidate,class:Kilonova candidate')

pdf = pdf.rename(
columns={
Expand Down Expand Up @@ -4139,21 +4073,7 @@ def hist_candidates(dropdown_days):
def fields_exposures(dropdown_days):
""" Make an histogram
"""
r = request_api(
'/api/v1/statistics',
json={
'date': '',
'output-format': 'json',
'columns': '*'
}
)

# Format output in a DataFrame
pdf = pd.read_json(r)
pdf = pdf.set_index('key:key')
# Remove hbase specific fields
if 'key:time' in pdf.columns:
pdf = pdf.drop(columns=['key:time'])
pdf = query_and_order_statistics(columns='*')

to_drop = [i for i in pdf.columns if i.startswith('basic:')]
pdf = pdf.drop(columns=to_drop)
Expand Down
28 changes: 5 additions & 23 deletions apps/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from app import app
from apps.utils import loading
from apps.utils import request_api
from apps.utils import query_and_order_statistics

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -119,20 +120,11 @@ def store_stat_query(name):
https://dash.plotly.com/sharing-data-between-callbacks
"""
cols = 'basic:raw,basic:sci,basic:fields,basic:exposures,class:Unknown'

r = request_api(
'/api/v1/statistics',
json={
'date': '',
'output-format': 'json',
'columns': cols
}
pdf = query_and_order_statistics(
columns='basic:raw,basic:sci,basic:fields,basic:exposures,class:Unknown',
drop=False
)

pdf = pd.read_json(r)
pdf = pdf.set_index('key:key', drop=False)

return pdf.to_json()

@app.callback(
Expand Down Expand Up @@ -327,17 +319,7 @@ def daily_stats():
def generate_night_list():
""" Generate the list of available nights (last night first)
"""
r = request_api(
'/api/v1/statistics',
json={
'date': '',
'output-format': 'json',
'columns': ''
}
)

# Format output in a DataFrame
pdf = pd.read_json(r)
pdf = query_and_order_statistics(columns='', drop=False)

labels = list(pdf['key:key'].apply(lambda x: x[4:8] + '-' + x[8:10] + '-' + x[10:12]))

Expand Down
41 changes: 41 additions & 0 deletions apps/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,47 @@ def format_hbase_output(

return pdfs

def query_and_order_statistics(date='', columns='*', index_by='key:key', drop=True):
""" Query /statistics, and order the resulting dataframe
Parameters
----------
date: str, optional
Date (default is '')
columns: str
Column names (default is '*')
index_by: str, optional
Column name on which to index on (default is key:key)
drop: bool
If True, drop original column used to index the dataframe.
Default is False.
Returns
----------
pdf: Pandas DataFrame
DataFrame with statistics data, ordered from
oldest (top) to most recent (bottom)
"""
r = request_api(
'/api/v1/statistics',
json={
'date': date,
'columns': columns,
'output-format': 'json'
}
)

# Format output in a DataFrame
pdf = pd.read_json(r)
pdf = pdf.sort_values(index_by)
pdf = pdf.set_index(index_by, drop=drop)

# Remove hbase specific fields
if 'key:time' in pdf.columns:
pdf = pdf.drop(columns=['key:time'])

return pdf

def isoify_time(t):
try:
tt = Time(t)
Expand Down

0 comments on commit 7264225

Please sign in to comment.