In [1]:

# import libraries
"""
Author: Chia E. Tungom
Date: 2023-10-10 10:31
email: chemago99@yahoo.com
"""

import numpy as np
import time
import os
from tqdm.auto import tqdm
import json
import pandas as pd
from citylearn.citylearn import CityLearnEnv

"""
This is only a reference script provided to allow you 
to do local evaluation. The evaluator **DOES NOT** 
use this script for orchestrating the evaluations. 
"""

/Users/chemago/opt/anaconda3/envs/CityLearn2023/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

Out[1]:

'\nThis is only a reference script provided to allow you \nto do local evaluation. The evaluator **DOES NOT** \nuse this script for orchestrating the evaluations. \n'

Configure CityLearn Environment¶

This configuration gives us the data and environment variables that make up the simulation.

The configuration uses a json configuration file that can be modified as needed

In [2]:

data_dir = './data/'
SCHEMA = os.path.join(data_dir, 'schemas/warm_up/schema.json')

env = CityLearnEnv(SCHEMA)

In [3]:

env.observation_names[0][:]

Out[3]:

['day_type',
 'hour',
 'outdoor_dry_bulb_temperature',
 'outdoor_dry_bulb_temperature_predicted_6h',
 'outdoor_dry_bulb_temperature_predicted_12h',
 'outdoor_dry_bulb_temperature_predicted_24h',
 'diffuse_solar_irradiance',
 'diffuse_solar_irradiance_predicted_6h',
 'diffuse_solar_irradiance_predicted_12h',
 'diffuse_solar_irradiance_predicted_24h',
 'direct_solar_irradiance',
 'direct_solar_irradiance_predicted_6h',
 'direct_solar_irradiance_predicted_12h',
 'direct_solar_irradiance_predicted_24h',
 'carbon_intensity',
 'indoor_dry_bulb_temperature',
 'non_shiftable_load',
 'solar_generation',
 'dhw_storage_soc',
 'electrical_storage_soc',
 'net_electricity_consumption',
 'electricity_pricing',
 'electricity_pricing_predicted_6h',
 'electricity_pricing_predicted_12h',
 'electricity_pricing_predicted_24h',
 'cooling_demand',
 'dhw_demand',
 'occupant_count',
 'indoor_dry_bulb_temperature_set_point',
 'power_outage',
 'indoor_dry_bulb_temperature',
 'non_shiftable_load',
 'solar_generation',
 'dhw_storage_soc',
 'electrical_storage_soc',
 'net_electricity_consumption',
 'cooling_demand',
 'dhw_demand',
 'occupant_count',
 'indoor_dry_bulb_temperature_set_point',
 'power_outage',
 'indoor_dry_bulb_temperature',
 'non_shiftable_load',
 'solar_generation',
 'dhw_storage_soc',
 'electrical_storage_soc',
 'net_electricity_consumption',
 'cooling_demand',
 'dhw_demand',
 'occupant_count',
 'indoor_dry_bulb_temperature_set_point',
 'power_outage']

Getting Prediction Variables from environmet¶

The environment is composed of several buildings and each building has it's own load, dhw, and cooling demand

To get the variables from the enviroment, we need to speicy the builign and the variables as follows

env.building["building_name"].energy_simulation."variable_name"
building_name and variable_name are the names of the buildings and variable to predict respectively

The building varables include

non_shiftable_load, dhw_demand, cooling_demand

In [4]:

env.reset()
env.buildings[0].energy_simulation.non_shiftable_load[:4]

print(env.buildings[0].energy_simulation.solar_generation[env.time_step+10])
print(env.buildings[1].energy_simulation.solar_generation[env.time_step+10])
# print(env.buildings[2].energy_simulation.carbon_intnsity.carbon[env.time_step+10])
env.buildings[0].solar_generation #   solar_generation#[env.time_step+10]

564.91034
564.91034

Out[4]:

array([-0.], dtype=float32)

Getting the Environment Data¶

To get the observations of a given building at a given step, use

- `env.buildings["building_name"].observations()`

To get observations for the entire environment, use

- `env.step(action)`

Note that for the entire environment observations, the observations of individual buildings are not exclusively distinguishable but we can see the observations for 'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 'electrical_storage_soc', 'net_electricity_consumption', 'cooling_demand', 'dhw_demand', 'occupant_count', 'indoor_dry_bulb_temperature_set_point', 'power_outage' to be unique to each building and the others being general. These are well alligned from the second building being the last 11*(number of buildings - 1) observations in the array. for the first building, there is a mixup with electricity price.

In [5]:

observations = env.reset()
for i in range(1):
    actions = np.zeros( (1, len(env.buildings) * 3) )
    print("\n ====== BUILDING OBSERVATION ========= \n ", env.buildings[0].observations())
    print("\n ========= ENVIRONMENT OBSERVATION ========= \n ", observations)
    observations, _, done, _ = env.step(actions)

 ====== BUILDING OBSERVATION ========= 
  {'day_type': 5, 'hour': 1, 'outdoor_dry_bulb_temperature': 24.66, 'outdoor_dry_bulb_temperature_predicted_6h': 24.910639, 'outdoor_dry_bulb_temperature_predicted_12h': 38.41596, 'outdoor_dry_bulb_temperature_predicted_24h': 27.611464, 'diffuse_solar_irradiance': 0.0, 'diffuse_solar_irradiance_predicted_6h': 54.625927, 'diffuse_solar_irradiance_predicted_12h': 116.84289, 'diffuse_solar_irradiance_predicted_24h': 0.0, 'direct_solar_irradiance': 0.0, 'direct_solar_irradiance_predicted_6h': 143.32434, 'direct_solar_irradiance_predicted_12h': 1020.7561, 'direct_solar_irradiance_predicted_24h': 0.0, 'carbon_intensity': 0.40248835, 'indoor_dry_bulb_temperature': 23.098652, 'non_shiftable_load': 0.35683933, 'solar_generation': 0.0, 'dhw_storage_soc': 0.0, 'electrical_storage_soc': 0.2, 'net_electricity_consumption': 0.67788136, 'electricity_pricing': 0.02893, 'electricity_pricing_predicted_6h': 0.02893, 'electricity_pricing_predicted_12h': 0.02915, 'electricity_pricing_predicted_24h': 0.02893, 'cooling_demand': 1.1192156, 'dhw_demand': 0.055682074, 'occupant_count': 3.0, 'indoor_dry_bulb_temperature_set_point': 23.222221, 'power_outage': 0}

 ========= ENVIRONMENT OBSERVATION ========= 
  [[5, 1, 24.66, 24.910639, 38.41596, 27.611464, 0.0, 54.625927, 116.84289, 0.0, 0.0, 143.32434, 1020.7561, 0.0, 0.40248835, 23.098652, 0.35683933, 0.0, 0.0, 0.2, 0.67788136, 0.02893, 0.02893, 0.02915, 0.02893, 1.1192156, 0.055682074, 3.0, 23.222221, 0, 24.278513, 0.18733284, 0.0, 0.0, 0.2, 0.18733284, 0.0, 0.0, 1.0, 24.444445, 0, 24.431562, 0.4220805, 0.0, 0.0, 0.2, 0.5631514, 0.5579055, 0.0, 2.0, 24.444445, 0]]

Convert individual building data to dataframe¶

we use pandas to convert the data into a pandas dataframe. we will add extra features so that we can predict future time steps

In [6]:

observations = env.reset()
# create an empty dataframe with column names
print(env.time_step)

# check the decision variable by indexing the building variable at the time step
print(env.buildings[0].energy_simulation.dhw_demand[env.time_step])
print(env.buildings[0].energy_simulation.dhw_demand[env.time_step])
print(env.buildings[0].heating_device)
print(env.buildings[0].observations().keys())

0
0.055682074
0.055682074
<citylearn.energy_model.HeatPump object at 0x7ff5f16ed640>
dict_keys(['day_type', 'hour', 'outdoor_dry_bulb_temperature', 'outdoor_dry_bulb_temperature_predicted_6h', 'outdoor_dry_bulb_temperature_predicted_12h', 'outdoor_dry_bulb_temperature_predicted_24h', 'diffuse_solar_irradiance', 'diffuse_solar_irradiance_predicted_6h', 'diffuse_solar_irradiance_predicted_12h', 'diffuse_solar_irradiance_predicted_24h', 'direct_solar_irradiance', 'direct_solar_irradiance_predicted_6h', 'direct_solar_irradiance_predicted_12h', 'direct_solar_irradiance_predicted_24h', 'carbon_intensity', 'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 'electrical_storage_soc', 'net_electricity_consumption', 'electricity_pricing', 'electricity_pricing_predicted_6h', 'electricity_pricing_predicted_12h', 'electricity_pricing_predicted_24h', 'cooling_demand', 'dhw_demand', 'occupant_count', 'indoor_dry_bulb_temperature_set_point', 'power_outage'])

In [7]:

cols = ['day_type', 'hour', 'outdoor_dry_bulb_temperature', 'outdoor_dry_bulb_temperature_predicted_6h', 
        'outdoor_dry_bulb_temperature_predicted_12h', 'outdoor_dry_bulb_temperature_predicted_24h', 
        'diffuse_solar_irradiance', 'diffuse_solar_irradiance_predicted_6h', 'diffuse_solar_irradiance_predicted_12h', 
        'diffuse_solar_irradiance_predicted_24h', 'direct_solar_irradiance', 'direct_solar_irradiance_predicted_6h', 
        'direct_solar_irradiance_predicted_12h', 'direct_solar_irradiance_predicted_24h', 'carbon_intensity', 
        'indoor_dry_bulb_temperature', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 
        'electrical_storage_soc', 'net_electricity_consumption', 'electricity_pricing', 'electricity_pricing_predicted_6h',
        'electricity_pricing_predicted_12h', 'electricity_pricing_predicted_24h', 'cooling_demand', 'dhw_demand',
        'occupant_count', 'indoor_dry_bulb_temperature_set_point', 'power_outage']

drop = ['day_type', 'hour', 'outdoor_dry_bulb_temperature', 
        'diffuse_solar_irradiance', 'direct_solar_irradiance', 'non_shiftable_load', 'solar_generation', 'dhw_storage_soc', 
        'electrical_storage_soc', 'net_electricity_consumption', 'electricity_pricing', 'cooling_demand', 'dhw_demand',
        'occupant_count', 'indoor_dry_bulb_temperature_set_point']

def building_observation_dataframe(environment, building: int , forward_steps: int = 48, drop_cols: list = [], keep_cols = []):
    """ takes a CityLearn Environment and a given building and builds a dataframe for future 48hrs 
    ======================
    environement: defined CityLearn environment
    building: building number to build dataframe for
    forward step: number of steps to predict for target variable
    drop_cols: columns to drop 
    
    returns a dataframe for a given time instance with future variables """

    generated_df = pd.DataFrame(columns=env.buildings[building].observations().keys())
    generated_df = generated_df.append(pd.Series(environment.buildings[building].observations().values(), index=generated_df.columns), ignore_index=True)
    # generate same repeated rows
    # matrix = [environment.buildings[building].observations().values()]
    # for i in range(1,forward_steps):
    #     matrix.append(environment.buildings[building].observations().values())
    #     # generated_df = generated_df.append(pd.Series(environment.buildings[building].observations().values(), index=generated_df.columns), ignore_index=True)

    # generated_df = pd.DataFrame(matrix, columns=environment.buildings[building].observations().keys())

    generated_df = generated_df.loc[generated_df.index.repeat(forward_steps)].reset_index(drop=True)
    # print(matrix_df)
    # === add deterministic future features ======
    generated_df["day_type"] = [ int(environment.buildings[building].energy_simulation.day_type[env.time_step+i]) for i in range(forward_steps)]
    generated_df["hour"] = [ int(environment.buildings[building].energy_simulation.hour[env.time_step+i]) for i in range(forward_steps)]
    generated_df["step_count"] = [ int(i+1) for i in range(forward_steps)]

    # === add target future features one step ahead=============
    generated_df["future_electric_load"] = [ environment.buildings[building].energy_simulation.non_shiftable_load[env.time_step+i] for i in range(1, forward_steps+1)]
    generated_df["future_cooling_demand"] = [ environment.buildings[building].energy_simulation.cooling_demand[env.time_step+i] for i in range(1, forward_steps+1)]
    generated_df["future_dhw_demand"] = [ environment.buildings[building].energy_simulation.dhw_demand[env.time_step+i] for i in range(1, forward_steps+1)]

    return generated_df


building_observation_dataframe(env, 0)

Out[7]:

	day_type	hour	outdoor_dry_bulb_temperature	outdoor_dry_bulb_temperature_predicted_6h	outdoor_dry_bulb_temperature_predicted_12h	outdoor_dry_bulb_temperature_predicted_24h	diffuse_solar_irradiance_predicted_6h	diffuse_solar_irradiance_predicted_12h	...	electricity_pricing_predicted_24h	cooling_demand	dhw_demand	occupant_count	indoor_dry_bulb_temperature_set_point	step_count	future_electric_load	future_cooling_demand	future_dhw_demand
0	5	1	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	1	0.345078	1.469638	0.159338
1	5	2	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	2	0.338769	1.458372	0.057004
2	5	3	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	3	0.334856	1.337342	0.000000
3	5	4	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	4	0.348607	1.163453	0.000000
4	5	5	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	5	0.346894	1.335715	0.334834
5	5	6	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	6	0.408589	1.238941	0.905819
6	5	7	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	7	0.402513	1.319662	0.000000
7	5	8	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	8	0.452810	4.025016	3.315016
8	5	9	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	9	0.400676	2.458844	0.215396
9	5	10	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	10	0.398331	1.653834	0.000000
10	5	11	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	11	0.399291	3.217214	1.656299
11	5	12	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	12	0.403018	3.049252	0.226496
12	5	13	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	13	0.356134	3.509696	0.000000
13	5	14	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	14	0.356304	4.315030	0.226489
14	5	15	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	15	0.405048	6.009754	1.020970
15	5	16	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	16	0.359361	7.337235	0.452995
16	5	17	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	17	0.773497	4.635886	0.000000
17	5	18	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	18	0.624087	2.425167	0.000000
18	5	19	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	19	0.594565	3.645800	0.452992
19	5	20	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	20	0.528865	8.104759	0.000000
20	5	21	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	21	0.454219	2.925058	0.452995
21	5	22	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	22	0.421510	3.134185	0.000000
22	5	23	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	23	0.356079	6.387691	0.000000
23	5	24	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	24	0.354211	6.112397	0.000000
24	6	1	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	25	0.352739	3.202373	0.000000
25	6	2	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	26	0.351437	1.661196	0.000000
26	6	3	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	27	0.350079	1.363489	0.000000
27	6	4	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	28	1.354694	1.525330	0.000000
28	6	5	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	29	1.365853	1.648455	0.000000
29	6	6	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	30	1.400738	1.782482	0.000000
30	6	7	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	31	0.402513	1.851054	0.000000
31	6	8	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	32	0.355172	0.386763	0.000000
32	6	9	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	33	0.446802	1.073413	0.225318
33	6	10	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	34	1.462315	2.032756	0.000000
34	6	11	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	35	1.662307	2.833847	0.204226
35	6	12	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	36	6.279381	4.397444	0.121406
36	6	13	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	37	3.217829	5.408320	0.139542
37	6	14	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	38	0.446124	5.937724	0.523780
38	6	15	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	39	0.405048	6.220956	0.225315
39	6	16	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	40	1.434183	5.993321	0.000000
40	6	17	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	41	0.433568	6.689406	0.000000
41	6	18	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	42	0.548903	6.047097	0.675951
42	6	19	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	43	0.518428	4.672842	0.225318
43	6	20	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	44	0.528865	5.162632	1.654751
44	6	21	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	45	0.635330	7.165135	0.000000
45	6	22	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	46	0.543542	9.692888	0.000000
46	6	23	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	47	0.374985	8.609599	0.000000
47	6	24	24.66	24.910639	38.415958	27.611464	54.625927	116.842888	...	0.02893	1.119216	0.055682	3.0	23.222221	48	0.354211	6.258777	0.000000

48 rows × 34 columns

Build a Training and Testing Dataset¶

For every building and time step, we are going to build the dataset for every point and use it for training

In [8]:

env.buildings
observations = env.reset()
forward_time = 48


# Compose Dataset

def compose_dataset(env, forward_time):
    env.reset()
    total_time_steps = env.time_steps

    generated_df = pd.DataFrame(columns=env.buildings[0].observations().keys())
    for i in range(total_time_steps - forward_time):
        for building in range(len(env.buildings)):
            df = building_observation_dataframe(env, building, forward_steps = forward_time)
            generated_df = pd.concat([generated_df, df], axis=0, ignore_index=True)

        actions = np.zeros( (1, len(env.buildings) * 3) )
        observations, _, done, _ = env.step(actions)

    return generated_df


compose_dataset(env, forward_time)

Out[8]:

	day_type	hour	outdoor_dry_bulb_temperature	outdoor_dry_bulb_temperature_predicted_6h	outdoor_dry_bulb_temperature_predicted_12h	outdoor_dry_bulb_temperature_predicted_24h	diffuse_solar_irradiance	diffuse_solar_irradiance_predicted_6h	diffuse_solar_irradiance_predicted_12h	diffuse_solar_irradiance_predicted_24h	...	electricity_pricing_predicted_24h	cooling_demand	dhw_demand	occupant_count	indoor_dry_bulb_temperature_set_point	power_outage	step_count	future_electric_load	future_cooling_demand	future_dhw_demand
0	5	1	24.660000	24.910639	38.415958	27.611464	0.0	54.625927	116.842888	0.0	...	0.02893	1.119216	0.055682	3.0	23.222221	0.0	1.0	0.345078	1.469638	0.159338
1	5	2	24.660000	24.910639	38.415958	27.611464	0.0	54.625927	116.842888	0.0	...	0.02893	1.119216	0.055682	3.0	23.222221	0.0	2.0	0.338769	1.458372	0.057004
2	5	3	24.660000	24.910639	38.415958	27.611464	0.0	54.625927	116.842888	0.0	...	0.02893	1.119216	0.055682	3.0	23.222221	0.0	3.0	0.334856	1.337342	0.000000
3	5	4	24.660000	24.910639	38.415958	27.611464	0.0	54.625927	116.842888	0.0	...	0.02893	1.119216	0.055682	3.0	23.222221	0.0	4.0	0.348607	1.163453	0.000000
4	5	5	24.660000	24.910639	38.415958	27.611464	0.0	54.625927	116.842888	0.0	...	0.02893	1.119216	0.055682	3.0	23.222221	0.0	5.0	0.346894	1.335715	0.334834
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
96763	6	19	27.030001	24.359739	36.602211	26.779373	0.0	0.000000	320.126404	0.0	...	0.02893	0.000000	0.000000	2.0	24.444445	0.0	44.0	0.981112	3.581775	0.000000
96764	6	20	27.030001	24.359739	36.602211	26.779373	0.0	0.000000	320.126404	0.0	...	0.02893	0.000000	0.000000	2.0	24.444445	0.0	45.0	1.070560	2.959194	0.000000
96765	6	21	27.030001	24.359739	36.602211	26.779373	0.0	0.000000	320.126404	0.0	...	0.02893	0.000000	0.000000	2.0	24.444445	0.0	46.0	0.816532	2.480101	0.204833
96766	6	22	27.030001	24.359739	36.602211	26.779373	0.0	0.000000	320.126404	0.0	...	0.02893	0.000000	0.000000	2.0	24.444445	0.0	47.0	0.709541	3.568882	1.768555
96767	6	23	27.030001	24.359739	36.602211	26.779373	0.0	0.000000	320.126404	0.0	...	0.02893	0.000000	0.000000	2.0	24.444445	0.0	48.0	0.526549	1.902680	0.000000

96768 rows × 34 columns

In [9]:

class generate_building_data:
    def __init__(self, environment, forward_steps):

        self.environment = environment
        self.forward_steps = forward_steps
        self.total_time_steps = env.time_steps
        self.actions = np.zeros( (1, len(env.buildings) * 3) )


    def building_observation_dataframe(self, building):
        """ takes a given building and builds a dataframe for future 48hrs 
        ======================
        building: building number to build dataframe for
        returns a dataframe for a given time instance with future variables """

        building_df = pd.DataFrame(columns=env.buildings[building].observations().keys())
        building_df = building_df.append(pd.Series(self.environment.buildings[building].observations().values(), index=building_df.columns), ignore_index=True)

        # === generate repeated rows =======================
        building_df = building_df.loc[building_df.index.repeat(self.forward_steps)].reset_index(drop=True)

        # === add deterministic future features ============
        building_df["day_type"] = [ int(self.environment.buildings[building].energy_simulation.day_type[env.time_step+i]) for i in range(self.forward_steps)]
        building_df["hour"] = [ int(self.environment.buildings[building].energy_simulation.hour[env.time_step+i]) for i in range(self.forward_steps)]
        building_df["step_count"] = [ int(i+1) for i in range(self.forward_steps)]

        # === add target future features one step ahead======
        building_df["future_electric_load"] = [ self.environment.buildings[building].energy_simulation.non_shiftable_load[env.time_step+i] for i in range(1, self.forward_steps+1)]
        building_df["future_cooling_demand"] = [ self.environment.buildings[building].energy_simulation.cooling_demand[env.time_step+i] for i in range(1, self.forward_steps+1)]
        building_df["future_dhw_demand"] = [ self.environment.buildings[building].energy_simulation.dhw_demand[env.time_step+i] for i in range(1, self.forward_steps+1)]
        building_df["future_solar_generation"] = [ int(self.environment.buildings[0].energy_simulation.solar_generation[env.time_step+i]) for i in range(self.forward_steps)]
        building_df["building"] = building # can be use to get solar generation data

        return building_df

    def building_data(self, buildings: list, shuffle = True):
        """ takes given buildings and builds a dataset entire simulation period
        ======================
        buildings: buildings numbers to build dataframe for
        returns a dataframe for a simulation period """

        self.environment.reset()
        buildings_dataset = pd.DataFrame(columns=self.environment.buildings[buildings[0]].observations().keys())

        for i in range(self.total_time_steps - forward_time): # cannot sample till the end 
            for building in buildings:
                df = self.building_observation_dataframe(building)
                buildings_dataset = pd.concat([buildings_dataset, df], axis=0, ignore_index=True)
        
        if shuffle:
            buildings_dataset = buildings_dataset.sample(frac=1, random_state=42).reset_index(drop=True)

        return buildings_dataset


    def environment_data(self, shuffle = True):
        """ takes given buildings and builds a dataset entire simulation period
        ======================
        returns a dataframe for a simulation period """

        observations = self.environment.reset()
        cols = self.environment.observation_names[0]
        initial_data = pd.DataFrame(columns=cols)

        for i in range(self.total_time_steps - self.forward_steps): # cannot sample till the end 
            step_df = pd.DataFrame(observations, columns=cols)
            initial_data = pd.concat([initial_data, step_df], axis=0, ignore_index=True)
            observations, _, done, _ = env.step(self.actions)

        # === get entire column data =======================
        # env_data = pd.DataFrame(columns=cols)
        solars = list(initial_data.solar_generation.values)
        carbons = list(initial_data.carbon_intensity.values)
        days = list(initial_data.day_type.values)
        hours = list(initial_data.hour.values)

        for i in range(0, len(initial_data) - self.forward_steps):
            step_df = pd.DataFrame( [ initial_data.iloc[i].values ], columns=cols)
            step_df = step_df.loc[step_df.index.repeat(self.forward_steps)].reset_index(drop=True)
            
            # === add deterministic future features ============
            step_df["day_type"] = days[i+1: i + self.forward_steps + 1]
            step_df["hour"] = hours[i+1: i + self.forward_steps + 1]
            step_df["step_count"] = [ int(i+1) for i in range(self.forward_steps)]

            # === add target future features one step ahead======
            step_df["future_solar_generation"] = solars[i+1: i + self.forward_steps + 1]
            step_df["future_carbon_intensity"] = carbons[i+1: i + self.forward_steps + 1]

            if i == 0:
                env_data = step_df.copy(deep=True)
            else:
                env_data = pd.concat([env_data, step_df], axis=0, ignore_index=True)
        
        if shuffle:
            env_data = env_data.sample(frac=1, random_state=42).reset_index(drop=True)

        return env_data

In [10]:

env.reset()
data = generate_building_data(env, 1)
neighbourhood_data = data.environment_data( shuffle = False)
building_data = data.building_data(buildings = [0,1,2], shuffle = False)

building_data["day_type"] = building_data["day_type"].astype(int)
building_data["hour"] = building_data["hour"].astype(int)
building_data["step_count"] = building_data["step_count"].astype(int)

In [18]:

neighbourhood_data[['solar_generation', "future_solar_generation"]]

Out[18]:

	solar_generation	solar_generation	solar_generation	future_solar_generation
0	0.000000	0.000000	0.000000	[0.0, 0.0, 0.0]
1	0.000000	0.000000	0.000000	[0.0, 0.0, 0.0]
2	0.000000	0.000000	0.000000	[0.0, 0.0, 0.0]
3	0.000000	0.000000	0.000000	[0.0, 0.0, 0.0]
4	0.000000	0.000000	0.000000	[0.0, 0.0, 0.0]
...	...	...	...	...
713	0.507637	0.253818	0.507637	[0.18798004, 0.09399002, 0.18798004]
714	0.187980	0.093990	0.187980	[0.0053076698, 0.0026538349, 0.0053076698]
715	0.005308	0.002654	0.005308	[0.0, 0.0, 0.0]
716	0.000000	0.000000	0.000000	[0.0, 0.0, 0.0]
717	0.000000	0.000000	0.000000	[0.0, 0.0, 0.0]

718 rows × 4 columns

In [19]:

keep = ['day_type', 'hour', 'step_count', 
        'outdoor_dry_bulb_temperature', 'solar_generation', 'dhw_storage_soc',
        'cooling_demand', 'dhw_demand', 'occupant_count',
        'electrical_storage_soc', 'net_electricity_consumption', 
        'non_shiftable_load', 'future_electric_load', "future_dhw_demand", "future_cooling_demand"]


# keep = ['day_type', 'hour', 'step_count', 'non_shiftable_load', 'future_electric_load']

# df['day_type'] = df['day_type'].astype(int)

In [20]:

def getTrainSplitData(df, train_size = 0.7):

    last_index = int(len(df) * train_size)  # Calculate the index corresponding to the last 70% of the data

    train = df.loc[df.index < last_index]
    test = df.loc[df.index >= last_index]

    return train, test

# generated_df['future_electric_load'] = generated_df['future_electric_load'].astype(float).round(5)
train, test = getTrainSplitData(building_data, train_size = 0.6)

train.dtypes

Out[20]:

day_type                                        int64
hour                                            int64
outdoor_dry_bulb_temperature                  float64
outdoor_dry_bulb_temperature_predicted_6h     float64
outdoor_dry_bulb_temperature_predicted_12h    float64
outdoor_dry_bulb_temperature_predicted_24h    float64
diffuse_solar_irradiance                      float64
diffuse_solar_irradiance_predicted_6h         float64
diffuse_solar_irradiance_predicted_12h        float64
diffuse_solar_irradiance_predicted_24h        float64
direct_solar_irradiance                       float64
direct_solar_irradiance_predicted_6h          float64
direct_solar_irradiance_predicted_12h         float64
direct_solar_irradiance_predicted_24h         float64
carbon_intensity                              float64
indoor_dry_bulb_temperature                   float64
non_shiftable_load                            float64
solar_generation                              float64
dhw_storage_soc                               float64
electrical_storage_soc                        float64
net_electricity_consumption                   float64
electricity_pricing                           float64
electricity_pricing_predicted_6h              float64
electricity_pricing_predicted_12h             float64
electricity_pricing_predicted_24h             float64
cooling_demand                                float64
dhw_demand                                    float64
occupant_count                                float64
indoor_dry_bulb_temperature_set_point         float64
power_outage                                  float64
step_count                                      int64
future_electric_load                          float32
future_cooling_demand                         float32
future_dhw_demand                             float32
future_solar_generation                       float64
building                                      float64
dtype: object

In [21]:

class generate_model:
    def __init__(self, dataset, ):
        self.dataset = dataset


    def train_model(self):
        pass

Predict Electric Load¶

In [22]:

from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")

X_train, y_train = train.drop(columns=["future_cooling_demand"]), train["future_cooling_demand"]
X_test, y_test = test.drop(columns=["future_cooling_demand"]), test["future_cooling_demand"]

reg = XGBRegressor(base_score=0.5, booster='dart',   
                           n_estimators=100,
                           early_stopping_rounds=50,
                           enable_categorical = True,
                           objective='reg:squarederror',
                           max_depth=100,
                           learning_rate=0.1)
reg.fit(X_train, 
        y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        eval_metric='mae',
        verbose=20)

[0]	validation_0-mae:0.52462	validation_1-mae:0.52462
[20]	validation_0-mae:0.06413	validation_1-mae:0.06413
[40]	validation_0-mae:0.00784	validation_1-mae:0.00784
[60]	validation_0-mae:0.00096	validation_1-mae:0.00096
[80]	validation_0-mae:0.00012	validation_1-mae:0.00012
[99]	validation_0-mae:0.00003	validation_1-mae:0.00003

Out[22]:

XGBRegressor(base_score=0.5, booster='dart', callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=50,
             enable_categorical=True, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=100, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=100, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...)

In [23]:

feature_important = reg.get_booster().get_score(importance_type='weight')
keys = list(feature_important.keys())
values = list(feature_important.values())

data = pd.DataFrame(data=values, index=keys, columns=["score"]).sort_values(by = "score", ascending=False)
data#.nlargest(40, columns="score").plot(kind='barh', figsize = (20,10)) ## plot top 40 features

Out[23]:

	score
indoor_dry_bulb_temperature	171.0
non_shiftable_load	4.0

In [24]:

sorted_idx = np.argsort(reg.feature_importances_)[::-1]

for index in sorted_idx:
    print([train.columns[index], reg.feature_importances_[index]])

['indoor_dry_bulb_temperature', 0.9999999]
['non_shiftable_load', 1.1102811e-07]
['diffuse_solar_irradiance_predicted_12h', 0.0]
['carbon_intensity', 0.0]
['direct_solar_irradiance_predicted_24h', 0.0]
['direct_solar_irradiance_predicted_12h', 0.0]
['direct_solar_irradiance_predicted_6h', 0.0]
['direct_solar_irradiance', 0.0]
['diffuse_solar_irradiance_predicted_24h', 0.0]
['future_solar_generation', 0.0]
['diffuse_solar_irradiance', 0.0]
['outdoor_dry_bulb_temperature_predicted_24h', 0.0]
['outdoor_dry_bulb_temperature_predicted_12h', 0.0]
['outdoor_dry_bulb_temperature_predicted_6h', 0.0]
['outdoor_dry_bulb_temperature', 0.0]
['hour', 0.0]
['diffuse_solar_irradiance_predicted_6h', 0.0]
['solar_generation', 0.0]
['future_dhw_demand', 0.0]
['dhw_demand', 0.0]
['future_cooling_demand', 0.0]
['future_electric_load', 0.0]
['step_count', 0.0]
['power_outage', 0.0]
['indoor_dry_bulb_temperature_set_point', 0.0]
['occupant_count', 0.0]
['cooling_demand', 0.0]
['dhw_storage_soc', 0.0]
['electricity_pricing_predicted_24h', 0.0]
['electricity_pricing_predicted_12h', 0.0]
['electricity_pricing_predicted_6h', 0.0]
['electricity_pricing', 0.0]
['net_electricity_consumption', 0.0]
['electrical_storage_soc', 0.0]
['day_type', 0.0]

NeurIPS 2023 Citylearn Challenge

StART Forecasting with XGBoost! TRIAL 001