openaq_engine.config package

Submodules

openaq_engine.config.model_settings module

class openaq_engine.config.model_settings.BuildFeaturesConfig(TARGET_COL: str = 'value', CATEGORICAL_FEATURES: List[pydantic.types.StrictStr] = <factory>, CORE_FEATURES: List[pydantic.types.StrictStr] = <factory>)[source]

Bases: object

property ALL_MODEL_FEATURES: List[str]

Return all features to be fed into the model

CATEGORICAL_FEATURES: List[StrictStr]
CITY = ''
CORE_FEATURES: List[StrictStr]
COUNTRY = ''
SATELLITE_FEATURES = []
TARGET_COL: str = 'value'
TARGET_VARIABLE = 'pm25'
class openaq_engine.config.model_settings.CohortBuilderConfig(ENTITY_ID_COLS: Sequence[str] = <factory>, DATE_COL: str = 'date.utc', SCHEMA_NAME: str = '', FILTER_DICT: Dict[str, Any] = <factory>)[source]

Bases: object

CITY = ''
COUNTRY = ''
DATE_COL: str = 'date.utc'
ENTITY_ID_COLS: Sequence[str]
FILTER_DICT: Dict[str, Any]
LOCAL_DATA = ''
REGION = 'us-east-1'
S3_BUCKET = None
S3_OUTPUT = None
SCHEMA_NAME: str = ''
SENSOR_TYPE = 'reference grade'
SOURCE = ''
TABLE_NAME = ''
TARGET_VARIABLE = ''
class openaq_engine.config.model_settings.EEConfig(DATE_COL: str = 'timestamp_utc', AOD_IMAGE_COLLECTION: str = 'MODIS/006/MCD19A2_GRANULES', AOD_IMAGE_BAND: Sequence[str] = <factory>, LANDSAT_IMAGE_COLLECTION: str = 'LANDSAT/LC08/C01/T1', LANDSAT_IMAGE_BAND: Sequence[str] = <factory>, NIGHTTIME_LIGHT_IMAGE_COLLECTION: str = 'NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG', NIGHTTIME_LIGHT_IMAGE_BAND: Sequence[str] = <factory>, METEROLOGICAL_IMAGE_COLLECTION: str = 'NOAA/GFS0P25', METEROLOGICAL_IMAGE_BAND: Sequence[str] = <factory>, POPULATION_IMAGE_COLLECTION: str = 'CIESIN/GPWv411/GPW_Basic_Demographic_Characteristics', POPULATION_IMAGE_BAND: Sequence[str] = <factory>, LAND_COVER_IMAGE_COLLECTION: str = 'COPERNICUS/Landcover/100m/Proba-V-C3/Global', LAND_COVER_IMAGE_BAND: Sequence[str] = <factory>)[source]

Bases: object

property ALL_SATELLITES: <zip object at 0x7f2e098c5640>

Return varying satellites to be fed into the model

AOD_IMAGE_BAND: Sequence[str]
AOD_IMAGE_COLLECTION: str = 'MODIS/006/MCD19A2_GRANULES'
AOD_IMAGE_PERIOD = 2
AOD_IMAGE_RES = 1000
BUCKET_NAME = ''
DATE_COL: str = 'timestamp_utc'
LANDSAT_IMAGE_BAND: Sequence[str]
LANDSAT_IMAGE_COLLECTION: str = 'LANDSAT/LC08/C01/T1'
LANDSAT_PERIOD = 8
LANDSAT_RES = 30
LAND_COVER_IMAGE_BAND: Sequence[str]
LAND_COVER_IMAGE_COLLECTION: str = 'COPERNICUS/Landcover/100m/Proba-V-C3/Global'
LAND_COVER_IMAGE_RES = 100
LAND_COVER_PERIOD = 1500
LOOKBACK_N = 1
METEROLOGICAL_IMAGE_BAND: Sequence[str]
METEROLOGICAL_IMAGE_COLLECTION: str = 'NOAA/GFS0P25'
METEROLOGICAL_IMAGE_PERIOD = 1
METEROLOGICAL_IMAGE_RES = 27830
NIGHTTIME_LIGHT_IMAGE_BAND: Sequence[str]
NIGHTTIME_LIGHT_IMAGE_COLLECTION: str = 'NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG'
NIGHTTIME_LIGHT_PERIOD = 30
NIGHTTIME_LIGHT_RES = 463.83
PATH_TO_PRIVATE_KEY = ''
POPULATION_IMAGE_BAND: Sequence[str]
POPULATION_IMAGE_COLLECTION: str = 'CIESIN/GPWv411/GPW_Basic_Demographic_Characteristics'
POPULATION_IMAGE_RES = 1000
POPULATION_PERIOD = 1100
SERVICE_ACCOUNT = ''
TABLE_NAME = 'cohorts'
class openaq_engine.config.model_settings.HyperparamConfig[source]

Bases: object

MODEL_HYPERPARAMS = {'DTC': {'max_depth': [5, 10, 20, 30, 40]}, 'MLR': {'C': [1, 0.1, 0.01], 'max_iter': [2000], 'penalty': ['l2'], 'solver': ['saga']}, 'MNB': {'alpha': [0, 0.05]}, 'RFR': {'max_depth': [10, 50, 70], 'n_estimators': [500, 800]}, 'XGB': {'learning_rate': [0.1, 0.5, 1], 'max_depth': [5, 150, 200, 250, 300]}}
MODEL_TYPES = ['DTC', 'MNB', 'RFR', 'XGB']
class openaq_engine.config.model_settings.MatrixGeneratorConfig(ID_COLUMN_LIST: Sequence[str] = <factory>)[source]

Bases: object

ALGORITHM = 'RFR'
ID_COLUMN_LIST: Sequence[str]
class openaq_engine.config.model_settings.ModelTrainerConfig[source]

Bases: object

All_MODEL_FEATURES = ['Optical_Depth_047', 'B4', 'B3', 'B2', 'avg_rad', 'temperature_2m_above_ground', 'relative_humidity_2m_above_ground', 'total_precipitation_surface', 'total_cloud_cover_entire_atmosphere', 'u_component_of_wind_10m_above_ground', 'v_component_of_wind_10m_above_ground', 'basic_demographic_characteristics', 'discrete_classification']
ID_COLS_TO_REMOVE = ['location_id', 'cohort', 'cohort_type']
MODEL_NAMES_LIST = ['RFR']
RANDOM_STATE = 99
class openaq_engine.config.model_settings.ModelVisualizerConfig(PLOT: bool = True, PLOT_METRICS: Sequence[str] = <factory>, PLOTS_TABLE_NAME: str = '', PLOTS_SCHEMA_NAME: str = '', RESULTS_TABLE_NAME: str = 'results')[source]

Bases: object

PLOT: bool = True
PLOTS_SCHEMA_NAME: str = ''
PLOTS_TABLE_NAME: str = ''
PLOT_METRICS: Sequence[str]
RESULTS_TABLE_NAME: str = 'results'
class openaq_engine.config.model_settings.TimeSplitterConfig(DATE_COL: str = 'date.utc', TIME_WINDOW_LENGTH: int = 4, WITHIN_WINDOW_SAMPLER: int = 4, WINDOW_COUNT: int = 10, TABLE_NAME: str = '', TRAIN_VALIDATION_DICT: Dict[str, List[Any]] = <factory>)[source]

Bases: object

AWS_ACCESS_KEY = None
AWS_SECRET_ACCESS_KEY = None
CITY = ''
COUNTRY = ''
DATABASE = None
DATE_COL: str = 'date.utc'
LOCAL_DATA = ''
REGION = ''
RESOURCE = s3.ServiceResource()
S3_BUCKET = None
S3_OUTPUT = None
SENSOR_TYPE = 'reference grade'
SOURCE = ''
TABLE_NAME: str = ''
TARGET_VARIABLE = ''
TIME_WINDOW_LENGTH: int = 4
TRAIN_VALIDATION_DICT: Dict[str, List[Any]]
WINDOW_COUNT: int = 10
WITHIN_WINDOW_SAMPLER: int = 4

Module contents