In [ ]:

What is the notebook about?

Problem - YouTube

This problem deals with a Youtuber, having to employ someone to edit videos

  • Formulate the problem as an MDP
  • Use dynamic programming to find out the optimal policy and optimal values for each month
  • Visualize and explain the results

  • Update the config parameters. You can define the common variables here

Variable Description
AICROWD_DATASET_PATH Path to the file containing test data. This should be an absolute path.
AICROWD_RESULTS_DIR Path to write the output to.
AICROWD_ASSETS_DIR In case your notebook needs additional files (like model weights, etc.,), you can add them to a directory and specify the path to the directory here (please specify relative path). The contents of this directory will be sent to AIcrowd for evaluation.
AICROWD_API_KEY In order to submit your code to AIcrowd, you need to provide your account's API key. This key is available at https://www.aicrowd.com/participants/me

In [6]:
!pip install -U aicrowd-cli > /dev/null

In [7]:
import os

AICROWD_DATASET_PATH = os.getenv("DATASET_PATH", os.getcwd()+"/61c5aa77-62c0-48c9-afef-96d618708b43_data_youtube.zip")
AICROWD_RESULTS_DIR = os.getenv("OUTPUTS_DIR", "results")
API_KEY = "" # Get your key from https://www.aicrowd.com/participants/me (ctrl + click the link)
In [3]:
!aicrowd login --api-key $API_KEY
!aicrowd dataset download -c rl-assignment-2-youtube
API Key valid
Saved API Key successfully!
61c5aa77-62c0-48c9-afef-96d618708b43_data_youtube.zip: 100% 6.35k/6.35k [00:00<00:00, 264kB/s]
In [ ]:
DATASET_DIR = 'data_youtube'

Install packages 🗃

Please add all package installations in this section

In [ ]:

Import packages 💻

In [8]:
import numpy as np
import matplotlib.pyplot as plt 
import os

Prediction Phase

In [9]:
class YouTuberEnv:

  def __init__(self,kwargs):
    self.low_salary = kwargs["low_salary"]
    self.high_salary = kwargs["high_salary"]
    self.low_quit_prob = kwargs["low_quit_prob"]
    self.high_quit_prob = kwargs["high_quit_prob"]
    self.self_edit_cost = kwargs["self_edit_cost"]
    self.low_add_cost = kwargs["low_add_cost"]
    self.high_add_cost = kwargs["high_add_cost"]
    self.low_add_success_prob = kwargs["low_add_success_prob"]
    self.high_add_success_prob = kwargs["high_add_success_prob"]

  def _verify_params(self,kwargs):
    assert "low_salary" in kwargs, "no param for low_salary"
    assert "high_salary" in kwargs, "no param for high_salary"
    assert "low_quit_prob" in kwargs, "no param for low_quit_prob"
    assert "high_quit_prob" in kwargs, "no param for high_quit_prob"
    assert "self_edit_cost" in kwargs, "no param for self_edit_cost"
    assert "low_add_cost" in kwargs, "no param for low_add_cost"
    assert "high_add_cost" in kwargs, "no param for high_add_cost"
    assert "low_add_success_prob" in kwargs, "no param for low_add_success_prob"
    assert "high_add_success_prob" in kwargs, "no param for high_add_success_prob"
In [10]:
def MDP(env):

  states = [0,1] ### DO NOT MODIFY
  actions = []
  rewards = []
  probabilities = []
  extra_info = {}

  ####### DO NOT EDIT BELOW THIS LINE ########
  mdp = {
  return mdp, extra_info
In [11]:
def DP(mdp):

  states = mdp["states"]
  actions = mdp["actions"]
  rewards = mdp["rewards"]
  probabilties = mdp["probabilities"]

  N = 12 # horizon for 1 year
  n_states = len(states)
  values = np.zeros((N+1, n_states))
  policy = np.random.choice(['L','H'],  size = (N,n_states))
  ### Note: Each value in policy should either be a 'H' or 'L'
  ### Modify the contents of the above 'policy' array
  extra_info = {}

  ####### DO NOT EDIT BELOW THIS LINE ########
  result = {
  return result, extra_info
In [12]:
def verify_results(results):
  assert "Values" in results
  assert "Policy" in results
  values = results["Values"]
  policy = results["Policy"]
  n_states = 2
  assert np.shape(values) == (N+1,n_states)
  assert np.shape(policy) == (N,n_states)
  unique_values = set(np.unique(policy))
  allowed_values = {'L','H'}
  assert unique_values <= allowed_values

def get_results(kwargs):
  env = YouTuberEnv(kwargs)
  mdp, mdp_info = MDP(env)
  results, dp_info = DP(mdp)
  return results, mdp_info, dp_info
In [13]:
def get_base_params():
  params = {}
  params["low_salary"] = 2300
  params["high_salary"] = 3000
  params["low_quit_prob"] = 0.6
  params["high_quit_prob"] = 0.2
  params["self_edit_cost"] = 4000
  params["low_add_cost"] = 300
  params["high_add_cost"] = 600
  params["low_add_success_prob"] = 0.7
  params["high_add_success_prob"] = 0.9
  return params

base_params = get_base_params()
results, mdp_info, dp_info = get_results(base_params)
{'Values': array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]]), 'Policy': array([['L', 'H'],
       ['H', 'L'],
       ['H', 'H'],
       ['H', 'H'],
       ['L', 'L'],
       ['H', 'H'],
       ['L', 'L'],
       ['L', 'H'],
       ['L', 'H'],
       ['H', 'L'],
       ['H', 'H'],
       ['L', 'L']], dtype='<U1')}
In [13]:

In [14]:
if not os.path.exists(AICROWD_RESULTS_DIR):
if not os.path.exists(DATASET_DIR+'/inputs'):
In [16]:
# Do not edit this cell, generate results with it as is
input_dir = os.path.join(DATASET_DIR, 'inputs')
if not os.path.exists(AICROWD_RESULTS_DIR):

for params_file in os.listdir(input_dir):
  if ".npy" not in params_file:
  kwargs = np.load(os.path.join(input_dir, params_file), allow_pickle=True).item()
  results, mdp_info, dp_info = get_results(kwargs)
  idx = params_file.split('_')[-1][:-4]
  np.save(os.path.join(AICROWD_RESULTS_DIR, 'results_' + idx), results)
In [17]:
# Check your score on the given test cases (There are more private test cases not provided)
result_folder = AICROWD_RESULTS_DIR
target_folder = os.path.join(DATASET_DIR, 'targets')

def check_algo_match(results, targets):
    param_results = targets
    param_targets = results

    tv = param_targets['Values'].flatten('F')
    rv_0 = param_results['Values'][:,0]
    rv_1 = param_results['Values'][:,1]
    rewards_match_0 = np.allclose(np.concatenate((rv_0, rv_1)), tv, atol=1e-1)

    rv_0 = param_results['Values'][:,1]
    rv_1 = param_results['Values'][:,0]
    rewards_match_1 = np.allclose(np.concatenate((rv_0, rv_1)), tv, atol=1e-1)

    tp = param_targets['Policy'].flatten('F')
    rp_0 = param_results['Policy'][:,0]
    rp_1 = param_results['Policy'][:,1]
    policy_match_0 = np.concatenate((rp_0, rp_1)) == tp

    rp_0 = param_results['Policy'][:,1]
    rp_1 = param_results['Policy'][:,0]
    policy_match_1 = np.concatenate((rp_0, rp_1)) == tp

    equal = (rewards_match_0 and policy_match_0.all()) or (rewards_match_1 or policy_match_1.all())
    return equal

def check_score(target_folder, result_folder):
    match = []
    for out_file in os.listdir(result_folder):
        res_file = os.path.join(result_folder, out_file)
        results = np.load(res_file, allow_pickle=True).item()
        idx = out_file.split('_')[-1][:-4]  # Extract the file number
        target_file = os.path.join(target_folder, f"targets_{idx}.npy")
        targets = np.load(target_file, allow_pickle=True)[0]
        algo_results = results
        algo_targets = targets
        algo_match = check_algo_match(algo_results, algo_targets)
    return np.mean(match)

if os.path.exists(target_folder):
    print("Shared data Score (normalized to 1):", check_score(target_folder, result_folder))
Shared data Score (normalized to 1): 0.0

Answer the following

Consider a policy where you always pay the employee low income and allocate a high advertising budget. Is it optimal? Justify your answer. (Based on the data provided in the assignment question)

Your answer:

In [ ]:
aicrowd notebook submit \
    -c rl-assignment-2-youtube -a assets
In [ ]:


