Skip to content

Hourly Model

eemeter.eemeter.models.hourly

Copyright 2014-2024 OpenEEmeter contributors

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

HourlyModel(settings=None)

A class to fit a model to the input meter data.

Attributes:

Name Type Description
settings dict

A dictionary of settings.

baseline_metrics dict

A dictionary of metrics based on input baseline data and model fit.

Parameters:

Name Type Description Default
settings dict | BaseHourlySettings | None

HourlySettings to use (generally left default). Will default to solar model if GHI is given to the fit step.

None
Source code in eemeter/eemeter/models/hourly/model.py
def __init__(
    self,
    settings: dict | _settings.BaseHourlySettings | None = None,
):
    """
    Args:
        settings: HourlySettings to use (generally left default). Will default to solar model if GHI is given to the fit step.
    """

    # TODO move this logic into HourlySettings init
    if isinstance(settings, dict):
        if features := settings.get("train_features"):
            if "ghi" in features:
                settings = _settings.HourlySolarSettings(**settings)
            else:
                settings = _settings.HourlyNonSolarSettings(**settings)
        else:
            settings = _settings.BaseHourlySettings(**settings)

    # Initialize settings
    if settings is None:
        self.settings = _settings.BaseHourlySettings()
    else:
        self.settings = settings

    # Initialize model
    if self.settings.scaling_method == _settings.ScalingChoice.STANDARDSCALER:
        self._feature_scaler = StandardScaler()
        self._y_scaler = StandardScaler()
    elif self.settings.scaling_method == _settings.ScalingChoice.ROBUSTSCALER:
        self._feature_scaler = RobustScaler(unit_variance=True)
        self._y_scaler = RobustScaler(unit_variance=True)

    self._T_edge_bin_coeffs = None

    self._model = ElasticNet(
        alpha=self.settings.elasticnet.alpha,
        l1_ratio=self.settings.elasticnet.l1_ratio,
        fit_intercept=self.settings.elasticnet.fit_intercept,
        precompute=self.settings.elasticnet.precompute,
        max_iter=self.settings.elasticnet.max_iter,
        tol=self.settings.elasticnet.tol,
        selection=self.settings.elasticnet.selection,
        random_state=self.settings.elasticnet._seed,
    )

    self._T_bin_edges = None
    self._T_edge_bin_rate = None
    self._df_temporal_clusters = None
    self._categorical_features = None
    self._ts_feature_norm = None

    self._ts_features = []
    if self.settings.train_features:
        self._ts_features = self.settings.train_features.copy()

    self.is_fitted = False
    self.baseline_metrics = None

    self.warnings: list[EEMeterWarning] = []
    self.disqualification: list[EEMeterWarning] = []

    self.baseline_timezone = None
    self.error = dict()
    self.version = __version__

settings = _settings.BaseHourlySettings() instance-attribute

is_fitted = False instance-attribute

baseline_metrics = None instance-attribute

warnings: list[EEMeterWarning] = [] instance-attribute

disqualification: list[EEMeterWarning] = [] instance-attribute

baseline_timezone = None instance-attribute

error = dict() instance-attribute

version = __version__ instance-attribute

fit(baseline_data, ignore_disqualification=False)

Fit the model using baseline data.

Parameters:

Name Type Description Default
baseline_data HourlyBaselineData

HourlyBaselineData object.

required
ignore_disqualification bool

Whether to ignore disqualification errors / warnings.

False

Returns:

Type Description
HourlyModel

The fitted model.

Raises:

Type Description
TypeError

If baseline_data is not an HourlyBaselineData object.

DataSufficiencyError

If the model can't be fit on disqualified baseline data.

Source code in eemeter/eemeter/models/hourly/model.py
def fit(
    self, baseline_data: HourlyBaselineData, ignore_disqualification: bool = False
) -> HourlyModel:
    """Fit the model using baseline data.

    Args:
        baseline_data: HourlyBaselineData object.
        ignore_disqualification: Whether to ignore disqualification errors / warnings.

    Returns:
        The fitted model.

    Raises:
        TypeError: If baseline_data is not an HourlyBaselineData object.
        DataSufficiencyError: If the model can't be fit on disqualified baseline data.
    """
    if not isinstance(baseline_data, HourlyBaselineData):
        raise TypeError("baseline_data must be an HourlyBaselineData object")
    baseline_data.log_warnings()
    if baseline_data.disqualification and not ignore_disqualification:
        raise DataSufficiencyError("Can't fit model on disqualified baseline data")
    if "ghi" in self._ts_features and not "ghi" in baseline_data.df.columns:
        raise ValueError(
            "Model was explicitly set to use GHI, but baseline data does not contain GHI."
        )

    self.warnings = baseline_data.warnings
    self.disqualification = baseline_data.disqualification

    if not self._ts_features:
        self.settings = self.settings.add_default_features(baseline_data.df.columns)
        self._ts_features = self.settings.train_features.copy()

    if "ghi" in baseline_data.df.columns and not "ghi" in self._ts_features:
        model_mismatch_warning = EEMeterWarning(
            qualified_name="eemeter.potential_model_mismatch",
            description=(
                "Model was explicitly set to ignore GHI, but baseline period contained a GHI column."
            ),
            data={},
        )
        model_mismatch_warning.warn()
        self.warnings.append(model_mismatch_warning)

    self._fit(baseline_data)
    if not self._model_fit_is_acceptable():
        model_fit_warning = EEMeterWarning(
            qualified_name="eemeter.model_fit_metrics",
            description="Model disqualified due to poor fit.",
            data={
                "cvrmse_threshold": self.settings.cvrmse_threshold,
                "cvrmse_adj": self.baseline_metrics.cvrmse_adj,
                "pnrmse_threshold": self.settings.pnrmse_threshold,
                "pnrmse_adj": self.baseline_metrics.pnrmse_adj,
            },
        )
        model_fit_warning.warn()
        self.disqualification.append(model_fit_warning)
    return self

predict(reporting_data, ignore_disqualification=False)

Predicts the energy consumption using the fitted model.

Parameters:

Name Type Description Default
reporting_data Union[HourlyBaselineData, HourlyReportingData]

The data used for prediction.

required
ignore_disqualification bool

Whether to ignore model disqualification. Defaults to False.

False

Returns:

Type Description
DataFrame

Dataframe with input data along with predicted energy consumption.

Raises:

Type Description
RuntimeError

If the model is not fitted.

DisqualifiedModelError

If the model is disqualified and ignore_disqualification is False.

TypeError

If the reporting data is not of type HourlyBaselineData or HourlyReportingData.

Source code in eemeter/eemeter/models/hourly/model.py
def predict(
    self,
    reporting_data,
    ignore_disqualification=False,
) -> pd.DataFrame:
    """Predicts the energy consumption using the fitted model.

    Args:
        reporting_data (Union[HourlyBaselineData, HourlyReportingData]): The data used for prediction.
        ignore_disqualification (bool, optional): Whether to ignore model disqualification. Defaults to False.

    Returns:
        Dataframe with input data along with predicted energy consumption.

    Raises:
        RuntimeError: If the model is not fitted.
        DisqualifiedModelError: If the model is disqualified and ignore_disqualification is False.
        TypeError: If the reporting data is not of type HourlyBaselineData or HourlyReportingData.
    """
    if not self.is_fitted:
        raise RuntimeError("Model must be fit before predictions can be made.")

    if missing_features := (
        set(self._ts_features) - set(reporting_data.df.columns)
    ):
        raise ValueError(
            f"Reporting data is missing the following features: {missing_features}"
        )

    if "ghi" in reporting_data.df.columns and not "ghi" in self._ts_features:
        model_mismatch_warning = EEMeterWarning(
            qualified_name="eemeter.potential_model_mismatch",
            description=(
                "Reporting data contains GHI, but model was fit without GHI."
            ),
            data={},
        )
        model_mismatch_warning.warn()
        self.warnings.append(model_mismatch_warning)

    if str(self.baseline_timezone) != str(reporting_data.tz):
        raise ValueError(
            "Reporting data must use the same timezone that the model was initially fit on."
        )

    if self.disqualification and not ignore_disqualification:
        raise DisqualifiedModelError(
            "Attempting to predict using disqualified model without setting ignore_disqualification=True"
        )

    if not isinstance(reporting_data, (HourlyBaselineData, HourlyReportingData)):
        raise TypeError(
            "reporting_data must be a HourlyBaselineData or HourlyReportingData object"
        )

    return self._predict(reporting_data)

to_dict()

Returns a dictionary of model parameters.

Returns:

Type Description
dict

Model parameters.

Source code in eemeter/eemeter/models/hourly/model.py
def to_dict(self) -> dict:
    """Returns a dictionary of model parameters.

    Returns:
        Model parameters.
    """
    feature_scaler = {}
    if self.settings.scaling_method == _settings.ScalingChoice.STANDARDSCALER:
        for i, key in enumerate(self._ts_features):
            feature_scaler[key] = [
                self._feature_scaler.mean_[i],
                self._feature_scaler.scale_[i],
            ]

        y_scaler = [self._y_scaler.mean_.squeeze(), self._y_scaler.scale_.squeeze()]

    elif self.settings.scaling_method == _settings.ScalingChoice.ROBUSTSCALER:
        for i, key in enumerate(self._ts_features):
            feature_scaler[key] = [
                self._feature_scaler.center_[i],
                self._feature_scaler.scale_[i],
            ]

        y_scaler = [
            self._y_scaler.center_.squeeze(),
            self._y_scaler.scale_.squeeze(),
        ]

    # convert self._df_temporal_clusters to list of lists
    df_temporal_clusters = self._df_temporal_clusters.reset_index().values.tolist()

    params = _settings.SerializeModel(
        settings=self.settings,
        temporal_clusters=df_temporal_clusters,
        temperature_bin_edges=self._T_bin_edges,
        temperature_edge_bin_coefficients=self._T_edge_bin_coeffs,
        ts_features=self._ts_features,
        categorical_features=self._categorical_features,
        coefficients=self._model.coef_.tolist(),
        intercept=self._model.intercept_.tolist(),
        feature_scaler=feature_scaler,
        catagorical_scaler=None,
        y_scaler=y_scaler,
        baseline_metrics=self.baseline_metrics,
        info=_settings.ModelInfo(
            disqualification=self.disqualification,
            warnings=self.warnings,
            error=self.error,
            baseline_timezone=str(self.baseline_timezone),
            version=self.version,
        ),
    )

    model_dict = params.model_dump()
    return model_dict

to_json()

Returns a JSON string of model parameters.

Returns:

Type Description
str

Model parameters.

Source code in eemeter/eemeter/models/hourly/model.py
def to_json(self) -> str:
    """Returns a JSON string of model parameters.

    Returns:
        Model parameters.
    """
    return json.dumps(self.to_dict())

from_dict(data) classmethod

Create a instance of the class from a dictionary (such as one produced from the to_dict method).

Parameters:

Name Type Description Default
data dict

The dictionary containing the model data.

required

Returns:

Type Description
HourlyModel

An instance of the class.

Source code in eemeter/eemeter/models/hourly/model.py
@classmethod
def from_dict(cls, data) -> HourlyModel:
    """Create a instance of the class from a dictionary (such as one produced from the to_dict method).

    Args:
        data (dict): The dictionary containing the model data.

    Returns:
        An instance of the class.
    """
    # get settings
    train_features = data.get("settings").get("train_features")

    if "ghi" in train_features:
        settings = _settings.HourlySolarSettings(**data.get("settings"))
    else:
        settings = _settings.HourlyNonSolarSettings(**data.get("settings"))

    # initialize model class
    model_cls = cls(settings=settings)

    df_temporal_clusters = pd.DataFrame(
        data.get("temporal_clusters"),
        columns=model_cls._temporal_cluster_cols + ["temporal_cluster"],
    ).set_index(model_cls._temporal_cluster_cols)

    model_cls._df_temporal_clusters = df_temporal_clusters
    model_cls._T_bin_edges = np.array(data.get("temperature_bin_edges"))
    model_cls._T_edge_bin_coeffs = {
        int(k): v for k, v in data.get("temperature_edge_bin_coefficients").items()
    }

    model_cls._ts_features = data.get("ts_features")
    model_cls._categorical_features = data.get("categorical_features")

    # set scalers
    feature_scaler_values = list(data.get("feature_scaler").values())
    feature_scaler_loc = [i[0] for i in feature_scaler_values]
    feature_scaler_scale = [i[1] for i in feature_scaler_values]

    y_scaler_values = data.get("y_scaler")

    if settings.scaling_method == _settings.ScalingChoice.STANDARDSCALER:
        model_cls._feature_scaler.mean_ = np.array(feature_scaler_loc)
        model_cls._feature_scaler.scale_ = np.array(feature_scaler_scale)

        model_cls._y_scaler.mean_ = np.array(y_scaler_values[0])
        model_cls._y_scaler.scale_ = np.array(y_scaler_values[1])

    elif settings.scaling_method == _settings.ScalingChoice.ROBUSTSCALER:
        model_cls._feature_scaler.center_ = np.array(feature_scaler_loc)
        model_cls._feature_scaler.scale_ = np.array(feature_scaler_scale)

        model_cls._y_scaler.center_ = np.array(y_scaler_values[0])
        model_cls._y_scaler.scale_ = np.array(y_scaler_values[1])

    # set model
    model_cls._model.coef_ = np.array(data.get("coefficients"))
    model_cls._model.intercept_ = np.array(data.get("intercept"))

    model_cls.is_fitted = True

    # set baseline metrics
    model_cls.baseline_metrics = BaselineMetricsFromDict(
        data.get("baseline_metrics")
    )

    info = _settings.ModelInfo(**data.get("info"))
    model_cls.warnings = info.warnings
    model_cls.disqualification = info.disqualification
    model_cls.error = info.error
    model_cls.baseline_timezone = info.baseline_timezone
    model_cls.version = info.version

    return model_cls

from_json(str_data) classmethod

Create an instance of the class from a JSON string.

Parameters:

Name Type Description Default
str_data

The JSON string representing the object.

required

Returns:

Type Description
HourlyModel

An instance of the class.

Source code in eemeter/eemeter/models/hourly/model.py
@classmethod
def from_json(cls, str_data) -> HourlyModel:
    """Create an instance of the class from a JSON string.

    Args:
        str_data: The JSON string representing the object.

    Returns:
        An instance of the class.

    """
    return cls.from_dict(json.loads(str_data))

plot(df_eval)

Plot a model fit with baseline or reporting data.

Parameters:

Name Type Description Default
df_eval HourlyBaselineData | HourlyReportingData

The baseline or reporting data object to plot.

required
Source code in eemeter/eemeter/models/hourly/model.py
def plot(
    self,
    df_eval: HourlyBaselineData | HourlyReportingData,
):
    """Plot a model fit with baseline or reporting data.

    Args:
        df_eval: The baseline or reporting data object to plot.
    """
    raise NotImplementedError

HourlyBaselineData(df, is_electricity_data, pv_start=None, **kwargs)

Data class to represent Hourly Baseline Data.

Only baseline data should go into the dataframe input, no blackout data should be input. Checks sufficiency for the data provided as input depending on OpenEEMeter specifications and populates disqualifications and warnings based on it.

Parameters:

Name Type Description Default
df DataFrame

A dataframe having a datetime index or a datetime column with the timezone also being set. It also requires 2 more columns - 'observed' for meter data, and 'temperature' for temperature data. Optionally, column 'ghi' can be included in order to fit on solar data. The temperature column should have values in Fahrenheit. Please convert your temperatures accordingly.

required
is_electricity_data bool

Flag to ascertain if this is electricity data or not. Electricity data values of 0 are set to NaN.

required

Attributes:

Name Type Description
df DataFrame

Immutable dataframe that contains the meter and temperature values for the baseline data period.

disqualification list[EEMeterWarning]

A list of serious issues with the data that can degrade the quality of the model. If you want to go ahead with building the model while ignoring them, set the ignore_disqualification = True flag in the model. By default disqualifications are not ignored.

warnings list[EEMeterWarning]

A list of issues with the data, but none that will severely reduce the quality of the model built.

pv_start date

Solar install date. If left unset, assumed to be at beginning of data.

Source code in eemeter/eemeter/models/hourly/data.py
def __init__(
    self,
    df: pd.DataFrame,
    is_electricity_data: bool,
    pv_start: Union[date, str, None] = None,
    **kwargs: dict,
):
    self._df = None
    self.warnings = []
    self.disqualification = []
    self.is_electricity_data = is_electricity_data
    self.tz = None

    # TODO copied from HourlyData
    self._to_be_interpolated_columns = []
    self._outputs = []

    self.pv_start = None
    if pv_start is not None:
        self.pv_start = pd.to_datetime(pv_start).date()

    # TODO not sure why we're keeping this copy, just set the attrs
    self._kwargs = copy.deepcopy(kwargs)
    if "outputs" in self._kwargs:
        self._outputs = copy.deepcopy(self._kwargs["outputs"])
    else:
        self._outputs = ["temperature", "observed"]

    self._df = self._set_data(df)
    disqualification, warnings = self._check_data_sufficiency()

    self.disqualification += disqualification
    self.warnings += warnings
    self.log_warnings()

warnings = [] instance-attribute

disqualification = [] instance-attribute

is_electricity_data = is_electricity_data instance-attribute

tz = None instance-attribute

pv_start = None instance-attribute

df property

Get the corrected input data stored in the class. The actual dataframe is immutable, this returns a copy.

log_warnings()

Logs the warnings and disqualifications associated with the data.

Source code in eemeter/eemeter/models/hourly/data.py
def log_warnings(self):
    """
    Logs the warnings and disqualifications associated with the data.
    """
    for warning in self.warnings + self.disqualification:
        warning.warn()

HourlyReportingData(df, is_electricity_data, pv_start=None, **kwargs)

Data class to represent Hourly Reporting Data.

Only reporting data should go into the dataframe input, no blackout data should be input. Checks sufficiency for the data provided as input depending on OpenEEMeter specifications and populates disqualifications and warnings based on it.

Meter data input is optional for the reporting class.

Parameters:

Name Type Description Default
df DataFrame

A dataframe having a datetime index or a datetime column with the timezone also being set. It also requires 2 more columns - 'observed' for meter data, and 'temperature' for temperature data. If GHI was provided during the baseline period, it should also be supplied for the reporting period with column name 'ghi'. The temperature column should have values in Fahrenheit. Please convert your temperatures accordingly.

required
is_electricity_data bool

Flag to ascertain if this is electricity data or not. Electricity data values of 0 are set to NaN.

required

Attributes:

Name Type Description
df DataFrame

Immutable dataframe that contains the meter and temperature values for the baseline data period.

disqualification list[EEMeterWarning]

A list of serious issues with the data that can degrade the quality of the model. If you want to go ahead with building the model while ignoring them, set the ignore_disqualification = True flag in the model. By default disqualifications are not ignored.

warnings list[EEMeterWarning]

A list of issues with the data, but none that will severely reduce the quality of the model built.

pv_start date

Solar install date. If left unset, assumed to be at beginning of data.

Source code in eemeter/eemeter/models/hourly/data.py
def __init__(
    self,
    df: pd.DataFrame,
    is_electricity_data: bool,
    pv_start: Union[date, str, None] = None,
    **kwargs: dict,
):
    df = df.copy()
    if "observed" not in df.columns:
        df["observed"] = np.nan

    super().__init__(df, is_electricity_data, pv_start, **kwargs)

warnings = [] instance-attribute

disqualification = [] instance-attribute

is_electricity_data = is_electricity_data instance-attribute

tz = None instance-attribute

pv_start = None instance-attribute

df property

Get the corrected input data stored in the class. The actual dataframe is immutable, this returns a copy.

log_warnings()

Logs the warnings and disqualifications associated with the data.

Source code in eemeter/eemeter/models/hourly/data.py
def log_warnings(self):
    """
    Logs the warnings and disqualifications associated with the data.
    """
    for warning in self.warnings + self.disqualification:
        warning.warn()