Source code for microgrid.forecast.forecaster

import numpy as np

from datetime import datetime, timedelta
from sklearn.ensemble import ExtraTreesRegressor


[docs]class Forecaster:
    _ls_lower_bound_ = datetime(2014, 1, 1, 0, 0, 0)
    _dt_lower_bound_ = datetime(2015, 1, 1, 0, 0, 0)
    _dt_upper_bound_ = datetime(2015, 6, 30, 0, 0, 0)

    _ls_input_ = ['Year', 'Month', 'Day', 'Hour', 'Minutes', 'Seconds', 'IsoDayOfWeek',
                  'IsoWeekNumber']

[docs]    def __init__(self, database):
        """
        A Forecaster object allows to generate forecast of any of the uncertain quantities referenced in the database.

        :param database: A :class:`Database` object used for training the forecaster
        """
        self.database = database
        self._predicted_quantities_ = database._output_

[docs]    def forecast(self, column, dt_from, dt_to):
        """
        Forecast an uncertain quantity over a specified time range with a hourly resolution.
        Each time a forecast is asked, a new forecaster is trained using all previous values
        of the quantity until dt_from.

        :param column: Name of the series to forecast
        :param dt_from: A date_time object specifying the start of the prediction horizon
        :param dt_to: A date_time object specifying the end of the prediction horizon
        :return: The forecast as a numpy array. The length of the array is equal to the number of hours between dt_from and dt_to, rounded down
        """
        dt_from = dt_from.replace(minute=0, second=0, microsecond=0)
        dt_to = dt_to.replace(minute=0, second=0, microsecond=0)

        if column not in self._predicted_quantities_:
            raise ValueError('Cannot predict column %s' % column)
        if dt_from > dt_to:
            raise ValueError("From date time cannot be after to date time")
        if dt_from < Forecaster._dt_lower_bound_:
            raise ValueError('From date cannot be before %s' % Forecaster._dt_lower_bound_)
        if dt_from > Forecaster._dt_upper_bound_:
            raise ValueError('From date cannot be after %s' % Forecaster._dt_upper_bound_)
        if dt_to < Forecaster._dt_lower_bound_:
            raise ValueError('To date cannot be before %s' % Forecaster._dt_lower_bound_)
        if dt_to > Forecaster._dt_upper_bound_:
            raise ValueError('To date cannot be after %s' % Forecaster._dt_upper_bound_)

        shift_history_size = 48
        one_hour = timedelta(hours=1)

        dt_train_from = Forecaster._ls_lower_bound_
        dt_train_to = dt_from - one_hour

        df = self.database.data_frame
        df_x_ls = df[dt_train_from: dt_train_to].copy()
        df_y_ls = df_x_ls[column]

        # all previous values of the quantity
        shift_columns = []
        for tau in range(1, shift_history_size + 1):
            col_name = "%s(t-%d)" % (column, tau)
            df_x_ls[col_name] = df_y_ls.shift(tau)
            shift_columns.append(col_name)

        df_x_ls = df_x_ls.dropna(axis=0, how='any')

        X_train = df_x_ls.as_matrix(columns=Forecaster._ls_input_ + shift_columns)
        y_train = df_x_ls[column].as_matrix()

        clf = ExtraTreesRegressor(n_estimators=10)
        clf.fit(X_train, y_train)

        dt_test = dt_from

        date_diff = dt_to - dt_from
        days, seconds = date_diff.days, date_diff.seconds
        hours = days * 24 + seconds // 3600

        X_test_shifts = np.flipud(y_train[-shift_history_size:])

        h = 0
        y_pred = np.zeros((hours + 1))

        while dt_test <= dt_to:
            X_test_dt_infos = np.array([
                dt_test.year,
                dt_test.month,
                dt_test.day,
                dt_test.hour,
                dt_test.minute,
                dt_test.second,
                dt_test.isoweekday(),
                dt_test.isocalendar()[1]
            ])

            X_test_row = np.concatenate((X_test_dt_infos, X_test_shifts))
            X_test = np.tile(X_test_row, (1, 1))

            y_pred[h] = clf.predict(X_test)

            X_test_shifts = np.roll(X_test_shifts, 1, axis=0)
            X_test_shifts[0] = y_pred[h]

            h = h + 1
            dt_test = dt_test + one_hour

        return y_pred