import numpy as np
import pylab as plt
from scipy import stats
goal = 50 #kWh, goal for the system to reach
s_avg_cc = 0.3 # as decimal percentage of sky, average cloud cover
s_std_cc = 0.1 # as decimal percentage of sky, standard deviation cloud cover
s_irr = 7 # kj/m2, solar irradiance
s_area = 100 # m2, solar panel area
ng_power = 70 # kW, natural gas power generation
ng_avg_uptime = 0.95 # as decimal percentage of an hour, average up-time of equipment
ng_std_uptime = 0.01 # as decimal percentage of an hour, standard deviation up-time of equipment
ng_emissions_per_kWh = 0.4127691 # emissions per kWh when an ng turbine is running
ng_startup_time = 4 # hours, time to start-up natural gas turbine
ng_shutdown_time = 2 # hours, time to shutdown natural gas turbine
reward_function = [0,50,100]
# Create Simulation
def simulation(actions):
reward = []
for idx, ng_on in enumerate(actions):
s_kWh = (s_irr * stats.norm.pdf((idx-12)/2.738612788, 0, 1) / \
stats.norm.pdf((12-12)/2.738612788, 0, 1) ) * \
(1-np.random.normal(s_avg_cc, s_std_cc, 1)[0]) * s_area
ng_kWh = ng_on * np.random.normal(ng_avg_uptime, ng_std_uptime,1)[0] * ng_power
ng_emissions = ng_kWh * ng_emissions_per_kWh
if s_kWh >= goal and ng_emissions < 1:
r = reward_function[2]
elif s_kWh + ng_kWh >= goal:
r = reward_function[1]
else:
r = reward_function[0]
reward.append(r)
return reward
# Test Simulation
actions_guess = np.ones(24)
actions_guess *= -1
for i in range(24):
actions_guess[i] = round(np.random.uniform(0,1),0)
actions_guess = [int(a) for a in actions_guess]
rewards = simulation(actions_guess)
print(rewards)
[50, 0, 0, 50, 0, 0, 0, 100, 50, 50, 50, 100, 100, 100, 100, 100, 100, 100, 50, 50, 0, 50, 0, 0]
%%time
import itertools
state_space = list(itertools.product([0, 1], repeat=24))
len(state_space)
Wall time: 3.27 s
16777216
%%time
# takes 29 minutes to run
indexes = []
b = (1,0,0,0,1)
for idx, states in enumerate(state_space):
for i in range(len(states)):
if states[i:i+len(b)] == b:
indexes.append(idx)
print("finished (1,0,0,0,1)")
b = (1,0,0,1)
for idx, states in enumerate(state_space):
for i in range(len(states)):
if states[i:i+len(b)] == b:
indexes.append(idx)
print("finished (1,0,0,1)")
b = (1,0,1)
for idx, states in enumerate(state_space):
for i in range(len(states)):
if states[i:i+len(b)] == b:
indexes.append(idx)
print("finished (1,0,1)")
b = (0,1,0)
for idx, states in enumerate(state_space):
for i in range(len(states)):
if states[i:i+len(b)] == b:
indexes.append(idx)
print("finished (0,1,0)")
b = (0,0,0,1)
for idx, states in enumerate(state_space):
for i in range(len(states)):
if states[0:i+len(b)] == b:
indexes.append(idx)
print("finished (0,0,0,1)")
b = (0,0,1)
for idx, states in enumerate(state_space):
for i in range(len(states)):
if states[0:i+len(b)] == b:
indexes.append(idx)
print("finished (0,0,1)")
b = (0,1)
for idx, states in enumerate(state_space):
for i in range(len(states)):
if states[0:i+len(b)] == b:
indexes.append(idx)
print("finished (0,1)")
b = (1,0)
for idx, states in enumerate(state_space):
for i in range(len(states)):
if states[0:i+len(b)] == b:
indexes.append(idx)
print("finished (1,0)")
indexes.sort()
indexes = set(indexes)
indexes = list(indexes)
print('number of invalid states:',len(indexes))
for i in reversed(indexes):
state_space.pop(i)
print('number of valid states',len(state_space))
#state_space
finished (1,0,0,0,1) finished (1,0,0,1) finished (1,0,1) finished (0,1,0) finished (0,0,0,1) finished (0,0,1) finished (0,1) finished (1,0) number of invalid states: 16768878 number of valid states 8338 Wall time: 15min 57s
goal_mapping = []
for i in state_space:
goal_mapping.append(simulation(i))
#goal_mapping
%%time
reward_matrix = []
num_iterations = 3
for i in range(num_iterations):
reward = []
for state in state_space:
reward.append(round(sum(simulation(state)),1))
reward_matrix.append(reward)
print(np.array(reward_matrix).shape)
print('average winner was:',np.argmax(np.average(reward_matrix,axis=0)),\
'with simulation:',state_space[np.argmax(np.average(reward_matrix,axis=0))])
for i in range(len(reward_matrix)):
print('round '+str(i+1),'winner was:',np.argmax(reward_matrix[i]),\
'with simulation:',state_space[np.argmax(reward_matrix[i])])
(3, 8338) average winner was: 7622 with simulation: (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1) round 1 winner was: 7622 with simulation: (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1) round 2 winner was: 7272 with simulation: (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1) round 3 winner was: 7265 with simulation: (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1) Wall time: 1min 18s
node_counter = 0
state_lookup = {}
for j in range(1,25):
for i in state_space:
if 'level'+str(j)+'_'+str(i[:j]) not in state_lookup:
state_lookup['level'+str(j)+'_'+str(i[:j])] = node_counter
node_counter += 1
states = []
for i in state_space:
available_actions = []
for j in range(1,25):
available_actions.append(state_lookup['level'+str(j)+'_'+str(i[:j])])
states.append(available_actions)
available_actions = []
term_nodes = [17924]
for i in states:
for j in range(1,25):
if j == 0:
if (0,i[j]) not in available_actions:
available_actions.append((0,i[j]))
elif j == 24:
if (node_counter,0) not in available_actions and (node_counter,1) not in available_actions:
available_actions.append((node_counter,0))
available_actions.append((node_counter,1))
else:
term_nodes.append(i[-1])
else:
if (i[j-1],i[j]) not in available_actions:
available_actions.append((i[j-1],i[j]))
#available_actions
points_list = available_actions
# how many points in graph? x points
MATRIX_SIZE = node_counter + 1
# create matrix x*y
R = np.matrix(np.ones(shape=(MATRIX_SIZE, MATRIX_SIZE)))
R *= -1
# assign zeros to paths and 100 to goal-reaching point
reward = -1
for point in points_list:
#print(point)
for idx, term_node in enumerate(term_nodes):
if point[0] == term_node or point[1] == term_node:
reward = sum(goal_mapping[idx])
if point[0] in term_nodes:
R[point] = reward
else:
R[point] = 0
if point[1] in term_nodes:
R[point[::-1]] = reward
else:
# reverse of point
R[point[::-1]]= 0
# add goal point round trip
for idx, term_node in enumerate(term_nodes):
R[term_node,term_node]= sum(goal_mapping[idx])
#R
from numba import jit
@jit(nopython=True) # Set "nopython" mode for best performance, equivalent to @njit
def go_fast_score(Q): # Function is compiled to machine code when called the first time
go_fast_score = Q/np.max(Q)*100
return go_fast_score
Q = np.matrix(np.zeros([MATRIX_SIZE,MATRIX_SIZE], dtype=int))
# learning parameter
gamma = 0.8
initial_state = node_counter
def available_actions(state):
current_state_row = R[state,]
av_act = np.where(current_state_row >= 0)[1]
return av_act
available_act = available_actions(initial_state)
def sample_next_action(available_actions_range):
next_action = int(np.random.choice(available_act,1))
return next_action
action = sample_next_action(available_act)
def update(current_state, action, gamma):
max_index = np.where(Q[action,] == np.max(Q[action,]))[1]
if max_index.shape[0] > 1:
max_index = int(np.random.choice(max_index, size = 1))
else:
max_index = int(max_index)
max_value = Q[action, max_index]
Q[current_state, action] = int(R[current_state, action] + gamma * max_value)
#print('max_value', R[current_state, action] + gamma * max_value)
#if (np.max(Q) > 0):
# return(np.sum(go_fast_score(Q)))
#else:
return(0)
update(initial_state, action, gamma)
0
%%time
# Training
scores = []
for i in range(2000000):
current_state = np.random.randint(0, int(Q.shape[0]))
available_act = available_actions(current_state)
action = sample_next_action(available_act)
score = update(current_state,action,gamma)
if i % 1000 == 0:
score = np.sum(go_fast_score(Q))
scores.append(score)
#print ('Score:', str(score))
print("Trained Q matrix:")
#print(go_fast_score(Q))
Trained Q matrix: Wall time: 3h 16min 33s
# Testing
current_state = node_counter
steps = [current_state]
while current_state not in term_nodes:
next_step_index = np.where(Q[current_state,] == np.max(Q[current_state,]))[1]
if next_step_index.shape[0] > 1:
next_step_index = int(np.random.choice(next_step_index, size = 1))
else:
next_step_index = int(next_step_index)
steps.append(next_step_index)
current_state = next_step_index
print("Most efficient path:")
print(steps)
plt.plot(scores)
plt.show()
Most efficient path: [26262, 1, 3, 6, 10, 16, 24, 35, 52, 78, 117, 174, 257, 377, 551, 805, 1178, 1727, 2535, 3722, 5463, 8011, 11740, 17199, 25196]
%%time
for step in steps:
for state in state_lookup:
if state_lookup[state] == step:
print(state)
level1_(1,) level2_(1, 1) level3_(1, 1, 1) level4_(1, 1, 1, 1) level5_(1, 1, 1, 1, 1) level6_(1, 1, 1, 1, 1, 1) level7_(1, 1, 1, 1, 1, 1, 0) level8_(1, 1, 1, 1, 1, 1, 0, 0) level9_(1, 1, 1, 1, 1, 1, 0, 0, 0) level10_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0) level11_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0) level12_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0) level13_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) level14_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) level15_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0) level16_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) level17_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) level18_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) level19_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) level20_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) level21_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) level22_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1) level23_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1) level24_(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1) Wall time: 270 ms