import os
import importlib
import numpy as np
import matplotlib.pyplot as plt


g = 1 # gain per unit sold
l = 0.5 # loss per unit produced but not sold
demandDist = lambda: np.random.geometric(0.1) # random demand


def evaluatePolicy(T, N, agent, g, l, demandDist):
    res = []
    for _ in range(N):
        s = 0
        for t in range(T):
            prod = agent.decisionRule()
            demand = demandDist()
            reward = g * min(prod, demand) - l * max(0, prod - demand)
            s += reward
            agent.receiveReward(reward)
        res.append(s)
    return np.mean(res)

T = 365
N = 100
for f in os.listdir("."):
	if f != "evaluator.py" and f != "__pycache__":
		i = importlib.import_module(f[:-3])
		agentK =  i.AgentK(g, l, demandDist)
		scoreK = evaluatePolicy(T, N, agentK, g, l, demandDist)
		agentU =  i.AgentU(g, l)
		scoreU = evaluatePolicy(T, N, agentU, g, l, demandDist)
		print(", ".join((f[:-3], str(scoreK), str(scoreU))))