diff --git a/README.md b/README.md index c513509..fa68565 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Purple Air API +# PurpleAir API -A Python 3.x module to turn data from the PurpleAir/ThingSpeak API into a Pandas DataFrame safely, with many utility methods. +A Python 3.x module to turn data from the PurpleAir/ThingSpeak API into a Pandas DataFrame safely, with many utility methods and clear errors. ![Global Sensor Map with Celsius Scale](maps/sensor_map.png) @@ -48,7 +48,9 @@ print(s) # Sensor 2891 at 10834, Canyon Road, Omaha, Douglas County, Nebraska, ```python from purpleair.network import SensorList p = SensorList() # Initialized 10,812 sensors! -df = p.to_dataframe(sensor_filter='all' channel='a') # Other options include 'outside' and 'useful' +# Other sensor filters include 'outside', 'useful', 'family', and 'no_child' +df = p.to_dataframe(sensor_filter='all', + channel='parent') ``` Result: @@ -63,6 +65,20 @@ id 53069 47.190197 -122.177992 #2 outside 109.82 79.0 26.111111 ... False 0 109.52 108.72 109.33 116.64 74.52 ``` +### Make a DataFrame from all current sensors that have a 10 minute average pm2.5 value + +```python +from purpleair.network import SensorList +p = SensorList() # Initialized 10,812 sensors! +# If `sensor_filter` is set to 'column' then we must also provide a value for `column` +df_1 = p.to_dataframe(sensor_filter='all', + channel='parent') +df_2 = p.to_dataframe(sensor_filter='column', + channel='parent', + column='m10avg') # See Channel docs for all column options +print(len(df_1), len(df_2)) # 11,071 10,723 +``` + ### Get historical data for parent sensor secondary channel ```python diff --git a/docs/api/channel_methods.md b/docs/api/channel_methods.md index f17947e..7c4ad2f 100644 --- a/docs/api/channel_methods.md +++ b/docs/api/channel_methods.md @@ -2,7 +2,120 @@ ## `setup()` -This converts the JSON metadata to Python class members, exposing data in a Pythonic way. +This converts the JSON metadata to Python class properties, exposing data in a Pythonic way. + +## `as_dict() -> dict` + +Return a dictionary representation of a Channel. The data is shaped like this: + +```python +{ + 'meta': { + 'id': a.identifier, + 'parent': None, + 'lat': a.lat, + 'lon': a.lon, + 'name': a.name, + 'location_type': a.location_type + }, + 'data': { + 'pm_2.5': a.current_pm2_5, + 'temp_f': a.current_temp_f, + 'temp_c': a.current_temp_c, + 'humidity': a.current_humidity, + 'pressure': a.current_pressure, + 'p_0_3_um': a.current_p_0_3_um, + 'p_0_5_um': a.current_p_0_5_um, + 'p_1_0_um': a.current_p_1_0_um, + 'p_2_5_um': a.current_p_2_5_um, + 'p_5_0_um': a.current_p_5_0_um, + 'p_10_0_um': a.current_p_10_0_um, + 'pm1_0_cf_1': a.current_pm1_0_cf_1, + 'pm2_5_cf_1': a.current_pm2_5_cf_1, + 'pm10_0_cf_1': a.current_pm10_0_cf_1, + 'pm1_0_atm': a.current_pm1_0_atm, + 'pm2_5_atm': a.current_pm2_5_atm, + 'pm10_0_atm': a.current_pm10_0_atm + }, + 'diagnostic': { + 'last_seen': a.last_seen, + 'model': a.model, + 'hidden': a.hidden, + 'flagged': a.flagged, + 'downgraded': a.downgraded, + 'age': a.age, + 'brightness': a.brightness, + 'hardware': a.hardware, + 'version': a.version, + 'last_update_check': a.last_update_check, + 'created': a.created, + 'uptime': a.uptime, + 'is_owner': a.is_owner + }, + 'statistics': { + '10min_avg': a.m10avg, + '30min_avg': a.m30avg, + '1hour_avg': a.h1ravg, + '6hour_avg': a.h6ravg, + '1day_avg': a.d1avg, + '1week_avg': a.w1avg + } +} +``` + +## `as_flat_dict() -> dict` + +Returns a flat dictionary representation of the Channel data. + +The data is shaped like this: + +```python +{ + 'parent': 0, + 'lat': 0, + 'lon': 0, + 'name': 0, + 'location_type': 0, + 'pm_2.5': 0, + 'temp_f': 0, + 'temp_c': 0, + 'humidity': 0, + 'pressure': 0, + 'p_0_3_um': 0, + 'p_0_5_um': 0, + 'p_1_0_um': 0, + 'p_2_5_um': 0, + 'p_5_0_um': 0, + 'p_10_0_um': 0, + 'pm1_0_cf_1': 0, + 'pm2_5_cf_1': 0, + 'pm10_0_cf_1': 0, + 'pm1_0_atm': 0, + 'pm2_5_atm': 0, + 'pm10_0_atm': 0, + 'last_seen': 0, + 'model': 0, + 'adc': 0, + 'rssi': 0, + 'hidden': 0, + 'flagged': 0, + 'downgraded': 0, + 'age': 0, + 'brightness': 0, + 'hardware': 0, + 'version': 0, + 'last_update_check': 0, + 'created': 0, + 'uptime': 0, + 'is_owner': 0, + '10min_avg': 0, + '30min_avg': 0, + '1hour_avg': 0, + '6hour_avg': 0, + '1day_avg': 0, + '1week_avg': 0 +} +``` ## `get_historical(weeks_to_get: int, thingspeak_field: str) -> pd.DataFrame` diff --git a/docs/api/sensor_methods.md b/docs/api/sensor_methods.md index c299f4d..90ef1f8 100644 --- a/docs/api/sensor_methods.md +++ b/docs/api/sensor_methods.md @@ -164,68 +164,84 @@ Return a dictionary representation of a sensor. The data is shaped like this: '30min_avg': a.m30avg, '1hour_avg': a.h1ravg, '6hour_avg': a.h6ravg, + '1day_avg': a.d1avg, '1week_avg': a.w1avg } }, 'child':{ 'meta': { - 'id': a.identifier, - 'parent': None, - 'lat': a.lat, - 'lon': a.lon, - 'name': a.name, - 'location_type': a.location_type + 'id': b.identifier, + 'parent': a.identifier, + 'lat': b.lat, + 'lon': b.lon, + 'name': b.name, + 'location_type': b.location_type }, 'data': { - 'pm_2.5': a.current_pm2_5, - 'temp_f': a.current_temp_f, - 'temp_c': a.current_temp_c, - 'humidity': a.current_humidity, - 'pressure': a.current_pressure, - 'p_0_3_um': a.current_p_0_3_um, - 'p_0_5_um': a.current_p_0_5_um, - 'p_1_0_um': a.current_p_1_0_um, - 'p_2_5_um': a.current_p_2_5_um, - 'p_5_0_um': a.current_p_5_0_um, - 'p_10_0_um': a.current_p_10_0_um, - 'pm1_0_cf_1': a.current_pm1_0_cf_1, - 'pm2_5_cf_1': a.current_pm2_5_cf_1, - 'pm10_0_cf_1': a.current_pm10_0_cf_1, - 'pm1_0_atm': a.current_pm1_0_atm, - 'pm2_5_atm': a.current_pm2_5_atm, - 'pm10_0_atm': a.current_pm10_0_atm + 'pm_2.5': b.current_pm2_5, + 'temp_f': b.current_temp_f, + 'temp_c': b.current_temp_c, + 'humidity': b.current_humidity, + 'pressure': b.current_pressure, + 'p_0_3_um': b.current_p_0_3_um, + 'p_0_5_um': b.current_p_0_5_um, + 'p_1_0_um': b.current_p_1_0_um, + 'p_2_5_um': b.current_p_2_5_um, + 'p_5_0_um': b.current_p_5_0_um, + 'p_10_0_um': b.current_p_10_0_um, + 'pm1_0_cf_1': b.current_pm1_0_cf_1, + 'pm2_5_cf_1': b.current_pm2_5_cf_1, + 'pm10_0_cf_1': b.current_pm10_0_cf_1, + 'pm1_0_atm': b.current_pm1_0_atm, + 'pm2_5_atm': b.current_pm2_5_atm, + 'pm10_0_atm': b.current_pm10_0_atm }, 'diagnostic': { - 'last_seen': a.last_seen, - 'model': a.model, - 'hidden': a.hidden, - 'flagged': a.flagged, - 'downgraded': a.downgraded, - 'age': a.age, - 'brightness': a.brightness, - 'hardware': a.hardware, - 'version': a.version, - 'last_update_check': a.last_update_check, - 'created': a.created, - 'uptime': a.uptime, - 'is_owner': a.is_owner + 'last_seen': b.last_seen, + 'model': b.model, + 'adc': b.adc, + 'rssi': b.rssi, + 'hidden': b.hidden, + 'flagged': b.flagged, + 'downgraded': b.downgraded, + 'age': b.age, + 'brightness': b.brightness, + 'hardware': b.hardware, + 'version': b.version, + 'last_update_check': b.last_update_check, + 'created': b.created, + 'uptime': b.uptime, + 'is_owner': b.is_owner }, 'statistics': { - '10min_avg': a.m10avg, - '30min_avg': a.m30avg, - '1hour_avg': a.h1ravg, - '6hour_avg': a.h6ravg, - '1week_avg': a.w1avg + '10min_avg': b.m10avg, + '30min_avg': b.m30avg, + '1hour_avg': b.h1ravg, + '6hour_avg': b.h6ravg, + '1day_avg': b.d1avg, + '1week_avg': b.w1avg } } } ``` +## `as_list() -> dict` + +Return a list representation of a sensor. The data is shaped the same as `as_dict` except instead of `parent` and `child` keys, the `parent` is the 0th index and the child is the 1st index: + +```python +[ + {...parent_sensor_data...}, + {...child_sensor_data...}, +] +``` + + ## `as_flat_dict(channel: str) -> dict` Returns a flat dictionary representation of the Sensor data. -`channel` is one of `{'a', 'b'}`. +`channel` is one of `{'parent', 'child'}`. The data is shaped like this: @@ -255,6 +271,8 @@ The data is shaped like this: 'pm10_0_atm': 0, 'last_seen': 0, 'model': 0, + 'adc': 0, + 'rssi': 0, 'hidden': 0, 'flagged': 0, 'downgraded': 0, @@ -270,6 +288,7 @@ The data is shaped like this: '30min_avg': 0, '1hour_avg': 0, '6hour_avg': 0, + '1day_avg': 0, '1week_avg': 0 } ``` diff --git a/docs/api/sensorlist_methods.md b/docs/api/sensorlist_methods.md index 262e789..b891a31 100644 --- a/docs/api/sensorlist_methods.md +++ b/docs/api/sensorlist_methods.md @@ -4,11 +4,11 @@ Automatically run on instantiation. Retrieves the current network data from the PurpleAir API. -## `to_dataframe(sensor_group: str, channel: str) -> pd.DataFrame` +## `to_dataframe(sensor_group: str, channel: str, column: Optional[str] = None, value_filter: Union[str, int, float, None] = None) -> pd.DataFrame` Converts dictionary representation of a list of sensors to a Pandas DataFrame where `sensor_group` determines which group of sensors are used. -`channel` is one of `{'a', 'b'}`. +`channel` is one of `{'parent', 'child'}`. * `'useful'` * Sensors with no faults, as determined by [`is_useful()`](/docs/api/sensor_methods.md#is_useful---bool) @@ -16,5 +16,15 @@ Converts dictionary representation of a list of sensors to a Pandas DataFrame wh * Outdoor sensors only * `'all'` * Do not filter sensors +* `family` + * Sensor has both parent and child +* `no_child` + * Sensor is parent-only +* `column` + * Must be a value that exists on a [Channel](/docs/documentation.md#channel) + * If `value_filter` is not provided: + * Sensor has data in `column`, i.e. no `None` values + * If `value_filter` is provided: + * Sensor has data in `column` that is the same as `value_filter` If `sensor_group` is not in the above set, `to_dataframe()` will raise a `ValueError`. diff --git a/docs/documentation.md b/docs/documentation.md index 256f555..bc15247 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -2,6 +2,8 @@ There are two main components of this program: `SensorList` and `Sensor`. A `SensorList` instance represents the network or a subset of the network of PurpleAir sensors, while a `Sensor` represents a single sensor. +`Sensor`s have up to two channels, a `parent` and an optional `child`, that hold data the sensor generates. + ## SensorList PurpleAir sensor network representation @@ -12,13 +14,9 @@ PurpleAir sensor network representation To parse location of all sensors from coordinates to addresses, pass `SensorList(parse_location=True)`. -* Members +* Properties * `all_sensors` * All sensors in the PurpleAir network - * `outside_sensors` - * Outdoor sensors in the PurpleAir network - * `useful_sensors` - * Sensors without faults in the PurpleAir network See [api/sensorlist_methods.md](api/sensorlist_methods.md) for method documentation. @@ -36,7 +34,7 @@ Initialize a new sensor. `parse_location` is an optional boolean parameter to use `geopy` to parse the rough address of the location of the sensor based on the latitude and longitude from the sensor's metadata. -* Members +* Properties * `identifier` * Sensor ID Number * `data` @@ -67,7 +65,7 @@ Representation of a sensor channel, either `a` or `b`. For channel `b` (child) s ### `class Channel(channel_data: dict)` -* Members +* Properties * `channel_data` * metadata in Python dictionary format about the channel * `lat` diff --git a/docs/faq.md b/docs/faq.md index fcdb6ca..e4a3e0a 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -10,6 +10,53 @@ The only fix is to try again or invalidate or delete the cache that `requests_ca This error means there is a problem connecting to the `cache.sqlite` file created by `requests_cache`. The program will still run, but results of API calls will not be cached, so affected programs may hit rate limits. +## Invalid ThingSpeak key + +Provided key does not exist on ThingSpeak. Refer to the [purpleair docs](/docs/purpleair_documentation.md#Field%20descriptions) for valid columns and their meanings. + +## Child `{child_sensor_id}` lists parent `{parent_sensor_id}`, but parent does not exist + +The child sensor requested lists a parent, but the parent does not exist on the PurpleAir network. This is a problem with PurpleAir, not this program. Try removing the `cache.sqlite` file it creates in the project’s root directory. + +## No column name provided to filter on + +`to_dataframe` was invoked with `sensor_filter` set to `'column'` but no value for the `column` parameter was provided. It should be invoked like this: + +```python +p = SensorList() +p.to_dataframe(sensor_filter='column', + channel='parent', + column='m10avg') # See Channel docs for all column options +``` + +## Column name provided does not exist in sensor data + +`to_dataframe` was invoked with `sensor_filter` set to `'column'` and the value for the `column` parameter does not exist as a column. Please only use properties of a [Channel](/docs/documentation.md#Channel). + +## No data for filter set: Column `{column}`, value filter: `{value_filter}` + +`to_dataframe` was invoked with `sensor_filter` set to `'column'` and none of the values in the column denoted by the `column` parameter match the given `value_filter`. This means the DataFrame would be empty, so we raise an error here before the user attempts to transform data. + +## Invalid sensor channel: `{channel}`. Must be in `{"a", "b"}` + +A function that requires a `channel` parameter can only look at channels `a` and `b`. Since there are no other channels, we raise an error when this occurs. + +## Invalid sensor: no configuration found for `{identifer}` + +The requested sensor does not have any data on PurpleAir. + +## Sensor `{identifier}` created without valid data + +A `Sensor` was created with the `json_data` parameter filled, but the json is malformed. + +## Invalid sensor ID + +The given `Sensor()`'s ID is not in valid integer form. + +## More than 2 channels found for `{identifier}` + +PurpleAir reports that a sensor has more than one child. This is a problem with PurpleAir, not this program. Try removing the `cache.sqlite` file it creates in the project’s root directory. + ## No sensor data returned from PurpleAir This error happens if the API fails to return data with a `results` key, where `results` is mapped to a JSON blob of sensors. diff --git a/purpleair/channel.py b/purpleair/channel.py index e2f837a..c51849e 100644 --- a/purpleair/channel.py +++ b/purpleair/channel.py @@ -78,12 +78,18 @@ def setup(self) -> None: # Statistics self.pm2_5stats: Optional[dict] = json.loads(self.channel_data['Stats']) \ if 'Stats' in self.channel_data else None - self.m10avg: Optional[float] = self.safe_float('v1') - self.m30avg: Optional[float] = self.safe_float('v2') - self.h1ravg: Optional[float] = self.safe_float('v3') - self.h6ravg: Optional[float] = self.safe_float('v4') - self.d1avg: Optional[float] = self.safe_float('v5') - self.w1avg: Optional[float] = self.safe_float('v6') + self.m10avg: Optional[float] = self.pm2_5stats.get( + 'v1') if self.pm2_5stats else None + self.m30avg: Optional[float] = self.pm2_5stats.get( + 'v2') if self.pm2_5stats else None + self.h1ravg: Optional[float] = self.pm2_5stats.get( + 'v3') if self.pm2_5stats else None + self.h6ravg: Optional[float] = self.pm2_5stats.get( + 'v4') if self.pm2_5stats else None + self.d1avg: Optional[float] = self.pm2_5stats.get( + 'v5') if self.pm2_5stats else None + self.w1avg: Optional[float] = self.pm2_5stats.get( + 'v6') if self.pm2_5stats else None self.last_modified_stats: Optional[datetime] = None last_mod = self.pm2_5stats.get('lastModified') \ if self.pm2_5stats is not None else None @@ -93,7 +99,7 @@ def setup(self) -> None: self.last2_modified: Optional[int] = self.pm2_5stats.get( 'timeSinceModified') if self.pm2_5stats is not None else None - # Thingspeak IDs, if these are missing do not crash, just set to None + # ThingSpeak IDs, if these are missing do not crash, just set to None try: self.tp_primary_channel: Optional[str] = self.channel_data['THINGSPEAK_PRIMARY_ID'] self.tp_primary_key: Optional[str] = self.channel_data['THINGSPEAK_PRIMARY_ID_READ_KEY'] @@ -187,6 +193,78 @@ def get_historical(self, weekly_data.index = weekly_data.pop('entry_id') return weekly_data + def as_dict(self) -> dict: + """ + Returns a dictionary representation of the channel data + """ + out_d = { + 'meta': { + 'id': self.identifier, + 'parent': self.parent, + 'lat': self.lat, + 'lon': self.lon, + 'name': self.name, + 'location_type': self.location_type + }, + 'data': { + 'pm_2.5': self.current_pm2_5, + 'temp_f': self.current_temp_f, + 'temp_c': self.current_temp_c, + 'humidity': self.current_humidity, + 'pressure': self.current_pressure, + 'p_0_3_um': self.current_p_0_3_um, + 'p_0_5_um': self.current_p_0_5_um, + 'p_1_0_um': self.current_p_1_0_um, + 'p_2_5_um': self.current_p_2_5_um, + 'p_5_0_um': self.current_p_5_0_um, + 'p_10_0_um': self.current_p_10_0_um, + 'pm1_0_cf_1': self.current_pm1_0_cf_1, + 'pm2_5_cf_1': self.current_pm2_5_cf_1, + 'pm10_0_cf_1': self.current_pm10_0_cf_1, + 'pm1_0_atm': self.current_pm1_0_atm, + 'pm2_5_atm': self.current_pm2_5_atm, + 'pm10_0_atm': self.current_pm10_0_atm + }, + 'diagnostic': { + 'last_seen': self.last_seen, + 'model': self.model, + 'adc': self.adc, + 'rssi': self.rssi, + 'hidden': self.hidden, + 'flagged': self.flagged, + 'downgraded': self.downgraded, + 'age': self.age, + 'brightness': self.brightness, + 'hardware': self.hardware, + 'version': self.version, + 'last_update_check': self.last_update_check, + 'created': self.created, + 'uptime': self.uptime, + 'is_owner': self.is_owner + }, + 'statistics': { + '10min_avg': self.m10avg, + '30min_avg': self.m30avg, + '1hour_avg': self.h1ravg, + '6hour_avg': self.h6ravg, + '1day_avg': self.d1avg, + '1week_avg': self.w1avg + } + } + + return out_d + + def as_flat_dict(self) -> dict: + """ + Returns a flat dictionary representation of channel data + """ + out_d = {} + nested = self.as_dict() + for category in nested: + for prop in nested[category]: + out_d[prop] = nested[category][prop] + return out_d + def __repr__(self): """ String representation of the class diff --git a/purpleair/network.py b/purpleair/network.py index f0cb42d..ef10b4d 100644 --- a/purpleair/network.py +++ b/purpleair/network.py @@ -6,7 +6,7 @@ import json import time from json.decoder import JSONDecodeError -from typing import List +from typing import List, Optional, Union import pandas as pd import requests @@ -29,12 +29,6 @@ def __init__(self, parse_location=False): self.all_sensors: List[Sensor] = [] self.generate_sensor_list() # Populate `all_sensors` - # Commonly requested/used filters - self.outside_sensors: List[Sensor] = [ - s for s in self.all_sensors if s.location_type == 'outside'] - self.useful_sensors: List[Sensor] = [ - s for s in self.all_sensors if s.is_useful()] - def get_all_data(self) -> None: """ Get all data from the API @@ -56,7 +50,6 @@ def get_all_data(self) -> None: self.parse_raw_result(data['results']) print(f"Initialized {len(self.data):,} sensors!") - def parse_raw_result(self, flat_sensor_data: dict) -> None: """ O(2n) algorithm to build the network map @@ -112,27 +105,79 @@ def generate_sensor_list(self) -> None: json_data=sensor, parse_location=self.parse_location)) - def to_dataframe(self, sensor_filter: str, channel: str) -> pd.DataFrame: + def filter_column(self, + channel: str, + column: Optional[str], + value_filter: Union[str, int, float, None]) -> pd.DataFrame: """ - Converts dictionary representation of a list of sensors to a Pandas DataFrame - where sensor_group determines which group of sensors are used + Filter sensors by column and value_filter. If only column is passed, we + return rows that are not None. If the value_filter is passed, we only + return rows where the column matches that value. """ - if sensor_filter not in {'useful', 'outside', 'all'}: + # Check if there is no column passed + if column is None: + raise ValueError('No column name provided to filter on!') + out_l: List[dict] = [] + for sensor in self.all_sensors: + sensor_data = sensor.as_flat_dict(channel) + if column not in sensor_data: + raise ValueError( + f'Requested column {column} does not exist in sensor data!') + result = sensor_data.get(column) + if value_filter and result != value_filter: + continue + if value_filter and result == value_filter: + out_l.append(sensor_data) + elif result is not None: + # If we do not want to filter the values, we filter out `None`s + out_l.append(sensor_data) + + if len(out_l) == 0: # pylint: disable=line-too-long raise ValueError( - f'{sensor_filter} is an invalid sensor group! Must be in {{"useful", "outside", "all"}}') - if channel not in {'a', 'b'}: - raise ValueError( - f'Invalid sensor channel: {channel}. Must be in {{"a", "b"}}') - - if sensor_filter == 'all': - sensor_data = pd.DataFrame([s.as_flat_dict(channel) - for s in self.all_sensors]) - elif sensor_filter == 'outside': - sensor_data = pd.DataFrame([s.as_flat_dict(channel) - for s in self.outside_sensors]) - elif sensor_filter == 'useful': - sensor_data = pd.DataFrame([s.as_flat_dict(channel) - for s in self.useful_sensors]) + f'No data for filter set: Column {column}, value filter: {value_filter}') + return pd.DataFrame(out_l) + + def to_dataframe(self, + sensor_filter: str, + channel: str, + column: Optional[str] = None, + value_filter: Union[str, int, float, None] = None) -> pd.DataFrame: + """ + Converts dictionary representation of a list of sensors to a Pandas DataFrame + where sensor_group determines which group of sensors are used. + + We do not want to pre-calculate all of the possible filters just by creating an + instance of this class. + + Using lambdas here means instead of immediately generating and storing a result of + some code to the dictionary when we first construct it, we only store a function + that can generate the data we want. + + The dictionary returns this function, which we immediately call. As a result we do + not create these data until we ask for them. + """ + try: + sensor_data: pd.DataFrame = { + 'all': lambda: pd.DataFrame([s.as_flat_dict(channel) + for s in self.all_sensors]), + 'outside': lambda: pd.DataFrame([s.as_flat_dict(channel) + for s in [s for s in self.all_sensors + if s.location_type == 'outside']]), + 'useful': lambda: pd.DataFrame([s.as_flat_dict(channel) + for s in [s for s in self.all_sensors + if s.is_useful()]]), + 'family': lambda: pd.DataFrame([s.as_flat_dict(channel) + for s in [s for s in self.all_sensors + if s.parent and s.child]]), + 'no_child': lambda: pd.DataFrame([s.as_flat_dict(channel) + for s in [s for s in self.all_sensors + if not s.child]]), + 'column': lambda: self.filter_column(channel, column, value_filter) + }[sensor_filter]() + except KeyError as err: + raise KeyError( + f'Invalid sensor filter supplied: {sensor_filter}') from err + sensor_data.index = sensor_data.pop('id') return sensor_data diff --git a/purpleair/sensor.py b/purpleair/sensor.py index 5506091..94bbde7 100644 --- a/purpleair/sensor.py +++ b/purpleair/sensor.py @@ -6,7 +6,7 @@ import json import os from re import sub -from typing import Optional +from typing import Optional, List import requests from geopy.geocoders import Nominatim @@ -25,7 +25,7 @@ def __init__(self, identifier: int, json_data: list = None, parse_location=False self.data: Optional[list] = json_data \ if json_data is not None else self.get_data() - # Validate the data we recieved + # Validate the data we received if not self.data: raise ValueError( f'Invalid sensor: no configuration found for {identifier}') @@ -51,7 +51,7 @@ def get_data(self) -> Optional[list]: """ Get new data if no data is provided """ - # Santize ID + # Sanitize ID if not isinstance(self.identifier, int): raise ValueError(f'Invalid sensor ID: {self.identifier}') @@ -141,163 +141,50 @@ def get_location(self) -> None: user_agent = root_ua + sub(r'\/|\\| ', '', user_agent) except OSError: print( - 'Unable to read current direcory name to generate Nominatim user agent!') + 'Unable to read current directory name to generate Nominatim user agent!') user_agent = f'{root_ua}anonymous_github_com_reagentx_purple_air_api' geolocator = Nominatim(user_agent=user_agent) location = geolocator.reverse(f'{self.parent.lat}, {self.parent.lon}') - self.location = location + self.location = str(location) def as_dict(self) -> dict: """ Returns a dictionary representation of the sensor data """ - - # Shorthand names for brevity here - # pylint: disable=invalid-name - a = self.parent - # pylint: disable=invalid-name - b = self.child - out_d = { - 'parent': { - 'meta': { - 'id': a.identifier, - 'parent': None, - 'lat': a.lat, - 'lon': a.lon, - 'name': a.name, - 'location_type': a.location_type - }, - 'data': { - 'pm_2.5': a.current_pm2_5, - 'temp_f': a.current_temp_f, - 'temp_c': a.current_temp_c, - 'humidity': a.current_humidity, - 'pressure': a.current_pressure, - 'p_0_3_um': a.current_p_0_3_um, - 'p_0_5_um': a.current_p_0_5_um, - 'p_1_0_um': a.current_p_1_0_um, - 'p_2_5_um': a.current_p_2_5_um, - 'p_5_0_um': a.current_p_5_0_um, - 'p_10_0_um': a.current_p_10_0_um, - 'pm1_0_cf_1': a.current_pm1_0_cf_1, - 'pm2_5_cf_1': a.current_pm2_5_cf_1, - 'pm10_0_cf_1': a.current_pm10_0_cf_1, - 'pm1_0_atm': a.current_pm1_0_atm, - 'pm2_5_atm': a.current_pm2_5_atm, - 'pm10_0_atm': a.current_pm10_0_atm - }, - 'diagnostic': { - 'last_seen': a.last_seen, - 'model': a.model, - 'hidden': a.hidden, - 'flagged': a.flagged, - 'downgraded': a.downgraded, - 'age': a.age, - 'brightness': a.brightness, - 'hardware': a.hardware, - 'version': a.version, - 'last_update_check': a.last_update_check, - 'created': a.created, - 'uptime': a.uptime, - 'is_owner': a.is_owner - } - }, - 'child': { - 'meta': { - 'id': b.identifier if b else None, - 'parent': a.identifier if b else None, - 'lat': b.lat if b else None, - 'lon': b.lon if b else None, - 'name': b.name if b else None, - 'location_type': b.location_type if b else None - }, - 'data': { - 'pm_2.5': b.current_pm2_5 if b else None, - 'temp_f': b.current_temp_f if b else None, - 'temp_c': b.current_temp_c if b else None, - 'humidity': b.current_humidity if b else None, - 'pressure': b.current_pressure if b else None, - 'p_0_3_um': b.current_p_0_3_um if b else None, - 'p_0_5_um': b.current_p_0_5_um if b else None, - 'p_1_0_um': b.current_p_1_0_um if b else None, - 'p_2_5_um': b.current_p_2_5_um if b else None, - 'p_5_0_um': b.current_p_5_0_um if b else None, - 'p_10_0_um': b.current_p_10_0_um if b else None, - 'pm1_0_cf_1': b.current_pm1_0_cf_1 if b else None, - 'pm2_5_cf_1': b.current_pm2_5_cf_1 if b else None, - 'pm10_0_cf_1': b.current_pm10_0_cf_1 if b else None, - 'pm1_0_atm': b.current_pm1_0_atm if b else None, - 'pm2_5_atm': b.current_pm2_5_atm if b else None, - 'pm10_0_atm': b.current_pm10_0_atm if b else None - }, - 'diagnostic': { - 'last_seen': b.last_seen if b else None, - 'model': b.model if b else None, - 'hidden': b.hidden if b else None, - 'flagged': b.flagged if b else None, - 'downgraded': b.downgraded if b else None, - 'age': b.age if b else None, - 'brightness': b.brightness if b else None, - 'hardware': b.hardware if b else None, - 'version': b.version if b else None, - 'last_update_check': b.last_update_check if b else None, - 'created': b.created if b else None, - 'uptime': b.uptime if b else None, - 'is_owner': b.is_owner if b else None - } - } + return { + 'parent': self.parent.as_dict(), + 'child': self.child.as_dict() if self.child else None, } - if 'Stats' in a.channel_data and a.channel_data['Stats']: - out_d['parent']['statistics'] = { - '10min_avg': a.m10avg, - '30min_avg': a.m30avg, - '1hour_avg': a.h1ravg, - '6hour_avg': a.h6ravg, - '1week_avg': a.w1avg - } - else: - out_d['parent']['statistics'] = { - '10min_avg': None, - '30min_avg': None, - '1hour_avg': None, - '6hour_avg': None, - '1week_avg': None - } - - if b and 'Stats' in b.channel_data and b.channel_data['Stats']: - out_d['child']['statistics'] = { - '10min_avg': b.m10avg if b else None, - '30min_avg': b.m30avg if b else None, - '1hour_avg': b.h1ravg if b else None, - '6hour_avg': b.h6ravg if b else None, - '1week_avg': b.w1avg if b else None - } - else: - out_d['child']['statistics'] = { - '10min_avg': None, - '30min_avg': None, - '1hour_avg': None, - '6hour_avg': None, - '1week_avg': None - } + def as_list(self) -> List[Optional[dict]]: + """ + Returns a list representation of the sensor data + """ + return [ + self.parent.as_dict(), + self.child.as_dict() if self.child else None + ] - return out_d + def resolve_sensor_channel(self, channel: str) -> Optional[Channel]: + """ + Resolves a sensor channel string to the respective Channel object + """ + if channel not in {'parent', 'child'}: + raise ValueError( + f'Invalid sensor channel: {channel}. Must be in {{"parent", "child"}}') + choice: Optional[Channel] = self.parent if channel == 'parent' else self.child + return choice def as_flat_dict(self, channel: str) -> dict: """ Returns a flat dictionary representation of the Sensor data """ - channel_map = {'a': 'parent', 'b': 'child'} - if channel not in channel_map: - raise ValueError(f'Invalid sensor channel: {channel}') - out_d = {} - src = self.as_dict() - for data_category in src[channel_map[channel]]: - for data in src[channel_map[channel]][data_category]: - out_d[data] = src[channel_map[channel]][data_category][data] - return out_d + choice = self.resolve_sensor_channel(channel) + if choice is None: + # There is no data for the specified sensor, so fill with `None`s + return {key: None for key in self.parent.as_flat_dict()} + return choice.as_flat_dict() def __repr__(self): """ diff --git a/scripts/plot_map.py b/scripts/plot_map.py index b495360..a181484 100644 --- a/scripts/plot_map.py +++ b/scripts/plot_map.py @@ -14,7 +14,7 @@ # Get PurpleAir data p = SensorList() -df = p.to_dataframe('all', 'a') +df = p.to_dataframe('all', 'parent') # Store the lat and lon coords to plot lat = df['lat'].values diff --git a/scripts/run.py b/scripts/run.py index 61e137c..20b2995 100644 --- a/scripts/run.py +++ b/scripts/run.py @@ -8,10 +8,9 @@ # All Sensors p = SensorList() -print(len(p.useful_sensors)) -s = p.useful_sensors[0] # First confirmed useful sensor +s = p.all_sensors[0] # First sensor found s.get_location() -df = p.to_dataframe(sensor_filter='all', channel='a') +df = p.to_dataframe(sensor_filter='all', channel='parent') print(df.head()) # Single sensor @@ -19,7 +18,7 @@ print(se) print(se.parent) print(se.child) -print(se.as_flat_dict('a')) +print(se.parent.as_flat_dict()) se.get_field('field3') se.get_field('field4') print(se.thingspeak_data.keys()) diff --git a/setup.py b/setup.py index 2413843..b3c41bd 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='purpleair', - version='1.1.2', + version='1.2', description='Python API Client to get and transform PurpleAir data.', long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", diff --git a/tests/test_channel.py b/tests/test_channel.py new file mode 100644 index 0000000..6067130 --- /dev/null +++ b/tests/test_channel.py @@ -0,0 +1,147 @@ +import unittest + +from purpleair import sensor + + +class TestChannelMethods(unittest.TestCase): + """ + Tests for Sensor class + """ + + def test_get_historical(self): + """ + Test that we properly get a sensor's historical data + """ + se = sensor.Sensor(2891) + se.parent.get_historical(1, 'primary') + se.parent.get_historical(1, 'secondary') + se.child.get_historical(1, 'primary') + se.child.get_historical(1, 'secondary') + + def test_as_dict(self): + """ + Test that channel dictionary representation works + """ + se = sensor.Sensor(1243) + expected_shape = { + 'meta': { + 'id': 0, + 'parent': 0, + 'lat': 0, + 'lon': 0, + 'name': 0, + 'location_type': 0, + }, + 'data': { + 'pm_2.5': 0, + 'temp_f': 0, + 'temp_c': 0, + 'humidity': 0, + 'pressure': 0, + 'p_0_3_um': 0, + 'p_0_5_um': 0, + 'p_1_0_um': 0, + 'p_2_5_um': 0, + 'p_5_0_um': 0, + 'p_10_0_um': 0, + 'pm1_0_cf_1': 0, + 'pm2_5_cf_1': 0, + 'pm10_0_cf_1': 0, + 'pm1_0_atm': 0, + 'pm2_5_atm': 0, + 'pm10_0_atm': 0, + }, + 'diagnostic': { + 'last_seen': 0, + 'model': 0, + 'adc': 0, + 'rssi': 0, + 'hidden': 0, + 'flagged': 0, + 'downgraded': 0, + 'age': 0, + 'brightness': 0, + 'hardware': 0, + 'version': 0, + 'last_update_check': 0, + 'created': 0, + 'uptime': 0, + 'is_owner': 0, + } + } + # Parent channel + result = se.parent.as_dict() + for key in expected_shape: + self.assertIn(key, result) + resolved_data = result[key] + expected_data = expected_shape[key] + for key_2 in expected_data: + self.assertIn(key_2, resolved_data) + + # Child channel + result = se.child.as_dict() + for key in expected_shape: + self.assertIn(key, result) + resolved_data = result[key] + expected_data = expected_shape[key] + for key_2 in expected_data: + self.assertIn(key_2, resolved_data) + + def test_as_flat_dict(self): + """ + Test that channel flat dictionary representation works + """ + se = sensor.Sensor(9234) + expected_shape = { + 'id': 0, + 'parent': 0, + 'lat': 0, + 'lon': 0, + 'name': 0, + 'location_type': 0, + 'pm_2.5': 0, + 'temp_f': 0, + 'temp_c': 0, + 'humidity': 0, + 'pressure': 0, + 'p_0_3_um': 0, + 'p_0_5_um': 0, + 'p_1_0_um': 0, + 'p_2_5_um': 0, + 'p_5_0_um': 0, + 'p_10_0_um': 0, + 'pm1_0_cf_1': 0, + 'pm2_5_cf_1': 0, + 'pm10_0_cf_1': 0, + 'pm1_0_atm': 0, + 'pm2_5_atm': 0, + 'pm10_0_atm': 0, + 'last_seen': 0, + 'model': 0, + 'adc': 0, + 'rssi': 0, + 'hidden': 0, + 'flagged': 0, + 'downgraded': 0, + 'age': 0, + 'brightness': 0, + 'hardware': 0, + 'version': 0, + 'last_update_check': 0, + 'created': 0, + 'uptime': 0, + 'is_owner': 0, + } + # Parent channel + result = se.parent.as_flat_dict() + for key in expected_shape: + self.assertIn(key, result) + + # Child channel + result = se.child.as_flat_dict() + for key in expected_shape: + self.assertIn(key, result) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_purpleair.py b/tests/test_purpleair.py index f7062bf..61477c0 100644 --- a/tests/test_purpleair.py +++ b/tests/test_purpleair.py @@ -15,37 +15,100 @@ def test_setup_purpleair(self): p = network.SensorList() self.assertIsInstance(p, network.SensorList) - def test_outside_sensor_filtering(self): + def test_to_dataframe_no_filtering(self): """ - Test that outdoor sensor filter work + Test that not using sensor filters works """ p = network.SensorList() - self.assertLess(len(p.outside_sensors), len(p.all_sensors)) + p.to_dataframe('all', 'parent') + p.to_dataframe('all', 'child') - def test_useful_sensor_filtering(self): + def test_to_dataframe_filtering_outside(self): """ - Test that useful sensor filter works + Test that outside sensor filter works """ p = network.SensorList() - self.assertLess(len(p.useful_sensors), len(p.all_sensors)) + p.to_dataframe('outside', 'parent') + p.to_dataframe('outside', 'child') - def test_to_dataframe_filtering(self): + def test_to_dataframe_filtering_useful(self): """ Test that useful sensor filter works """ p = network.SensorList() - self.assertEqual(len(p.to_dataframe('all', 'a')), len(p.all_sensors)) - self.assertEqual(len(p.to_dataframe('all', 'b')), len(p.all_sensors)) - self.assertEqual(len(p.to_dataframe('outside', 'a')), len(p.outside_sensors)) - self.assertEqual(len(p.to_dataframe('outside', 'b')), len(p.outside_sensors)) - self.assertEqual(len(p.to_dataframe('useful', 'a')), len(p.useful_sensors)) - self.assertEqual(len(p.to_dataframe('useful', 'b')), len(p.useful_sensors)) + p.to_dataframe('useful', 'parent') + p.to_dataframe('useful', 'child') + + def test_to_dataframe_filtering_no_child(self): + """ + Test that no_child sensor filter works + """ + p = network.SensorList() + p.to_dataframe('no_child', 'parent') + p.to_dataframe('no_child', 'child') - def test_to_dataframe(self): + def test_to_dataframe_filtering_family(self): + """ + Test that family sensor filter works + """ p = network.SensorList() - df_a = p.to_dataframe(sensor_filter='all', channel='a') - df_b = p.to_dataframe(sensor_filter='all', channel='b') + p.to_dataframe('family', 'parent') + p.to_dataframe('family', 'child') + + def test_to_dataframe_cols(self): + p = network.SensorList() + df_a = p.to_dataframe(sensor_filter='all', channel='parent') + df_b = p.to_dataframe(sensor_filter='all', channel='child') self.assertListEqual(list(df_a.columns), list(df_b.columns)) + +class TestPurpleAirColumnFilters(unittest.TestCase): + """ + Test that we can initialize the PurpleAir network + """ + def test_to_dataframe_filtering_no_column(self): + """ + Test that not providing a column fails + """ + with self.assertRaises(ValueError): + p = network.SensorList() + p.to_dataframe('column', 'parent') + p.to_dataframe('column', 'child') + + def test_to_dataframe_filtering_bad_column(self): + """ + Test that providing a bad column fails + """ + with self.assertRaises(ValueError): + p = network.SensorList() + p.to_dataframe('column', 'parent', 'fake_col_name') + p.to_dataframe('column', 'child', 'fake_col_name') + + def test_to_dataframe_filtering_no_value(self): + """ + Test that providing a bad value fails + """ + p = network.SensorList() + p.to_dataframe('column', 'parent', 'temp_f') + p.to_dataframe('column', 'child', 'temp_f') + + def test_to_dataframe_filtering_good_value(self): + """ + Test that providing a bad value fails + """ + p = network.SensorList() + p.to_dataframe('column', 'parent', 'location_type', 'outside') + with self.assertRaises(ValueError): + p.to_dataframe('column', 'child', 'location_type', 'outside') + + def test_to_dataframe_filtering_bad_value(self): + """ + Test that providing a bad value fails + """ + with self.assertRaises(ValueError): + p = network.SensorList() + p.to_dataframe('column', 'parent', 'location_type', 1234) + p.to_dataframe('column', 'child', 'location_type', 1234) + if __name__ == '__main__': unittest.main() diff --git a/tests/test_sensor.py b/tests/test_sensor.py index e94983b..8779f79 100644 --- a/tests/test_sensor.py +++ b/tests/test_sensor.py @@ -1,4 +1,3 @@ -from purpleair.sensor import Sensor import unittest from purpleair import sensor @@ -24,14 +23,14 @@ def test_cannot_create_sensor_bad_id(self): Test that we cannot create a sensor without an integer ID """ with self.assertRaises(ValueError): - se = sensor.Sensor('a') + se = sensor.Sensor('parent') def test_cannot_create_sensor_bad_json(self): """ Test that we cannot create a sensor without valid json """ with self.assertRaises(ValueError): - se = sensor.Sensor('1', {'a': 1}) + se = sensor.Sensor('1', {'parent': 1}) def test_create_sensor_no_location(self): """ @@ -98,6 +97,8 @@ def test_as_dict(self): 'diagnostic': { 'last_seen': 0, 'model': 0, + 'adc': 0, + 'rssi': 0, 'hidden': 0, 'flagged': 0, 'downgraded': 0, @@ -115,6 +116,7 @@ def test_as_dict(self): '30min_avg': 0, '1hour_avg': 0, '6hour_avg': 0, + '1day_avg': 0, '1week_avg': 0 } }, @@ -149,6 +151,8 @@ def test_as_dict(self): 'diagnostic': { 'last_seen': 0, 'model': 0, + 'adc': 0, + 'rssi': 0, 'hidden': 0, 'flagged': 0, 'downgraded': 0, @@ -166,6 +170,7 @@ def test_as_dict(self): '30min_avg': 0, '1hour_avg': 0, '6hour_avg': 0, + '1day_avg': 0, '1week_avg': 0 } } @@ -208,6 +213,8 @@ def test_as_flat_dict(self): 'last_seen': 0, 'model': 0, 'hidden': 0, + 'adc': 0, + 'rssi': 0, 'flagged': 0, 'downgraded': 0, 'age': 0, @@ -222,18 +229,19 @@ def test_as_flat_dict(self): '30min_avg': 0, '1hour_avg': 0, '6hour_avg': 0, + '1day_avg': 0, '1week_avg': 0, } - # Test channel b - src = se.as_flat_dict(channel='a') + # Test parent + src = se.as_flat_dict(channel='parent') for data_category in expected_shape: self.assertIn(data_category, src) for data in src: self.assertNotIsInstance(src[data], dict) - # Test channel a - src = se.as_flat_dict(channel='b') + # Test child + src = se.as_flat_dict(channel='child') for data_category in expected_shape: self.assertIn(data_category, src) for data in src: