diff --git a/README.md b/README.md index b2d5f20d8e9e55969fbc2dfd928dab9c8a4817cb..b786d92ef9bc0c1ea63180c1614a0b59c12930d0 100644 --- a/README.md +++ b/README.md @@ -8,5 +8,7 @@ This repository contains a few simple tasks for practicing work with HyperQueue. ## Exercises First, download and extract [HyperQueue](https://github.com/It4innovations/hyperqueue/releases/download/v0.17.0/hq-v0.17.0-linux-x64.tar.gz). +Then you can try to implement the following exercises: - [Exercise 1](exercise-1) - [Exercise 1b](exercise-1b) +- [Exercise 2](exercise-2) diff --git a/exercise-2/README.md b/exercise-2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..eb7b596261c2c1c81a400ffbef7f732a0fc7f593 --- /dev/null +++ b/exercise-2/README.md @@ -0,0 +1,29 @@ +# Exercise 2 +The goal of this exercise is to execute a computational graph containing dependencies between +individual tasks, using the HyperQueue [Python API](https://it4innovations.github.io/hyperqueue/stable/python/). + +For this task, you will need to have Python 3.6+ and the `hyperqueue` Python package available. You +can install the package from [PyPi](https://pypi.org/project/hyperqueue/): +```bash +# Create a virtual environment +$ python3 -m venv venv +$ source venv/bin/activate +(venv) $ python3 -m pip install -U setuptools wheel pip + +# Install the HyperQueue Python package +(venv) $ python3 -m pip install hyperqueue[all] +``` + +The `configs` directory contains several configurations for training a machine learning model. +Your goal is to train a model for each configuration, and then postprocess the results of all the +trainings into a single result file, which will print the results of the individual trainings, +sorted by accuracy. + +Perform the following tasks: +1) Implement the `train_model_task` function in [`tasks.py`](tasks.py). +2) Create HQ tasks for training the models and for performing the final postprocsesing in +[`main.py`](main.py) + +## Checking the results +After the job is completed, check the contents of the `output/result.txt` file. It should contain +the training results of all model configurations. diff --git a/exercise-2/configs/config-0.json b/exercise-2/configs/config-0.json new file mode 100644 index 0000000000000000000000000000000000000000..44e8eb8ab8784df8a04523e7dd8d2c9ef5de21ee --- /dev/null +++ b/exercise-2/configs/config-0.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 1 +} \ No newline at end of file diff --git a/exercise-2/configs/config-1.json b/exercise-2/configs/config-1.json new file mode 100644 index 0000000000000000000000000000000000000000..b61c2636694ab8b66d3d86bf8a1d2868379f8721 --- /dev/null +++ b/exercise-2/configs/config-1.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 2 +} \ No newline at end of file diff --git a/exercise-2/configs/config-10.json b/exercise-2/configs/config-10.json new file mode 100644 index 0000000000000000000000000000000000000000..9f5f002c86157e2a4f1553c10170a3aee43bd7d3 --- /dev/null +++ b/exercise-2/configs/config-10.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 1 +} \ No newline at end of file diff --git a/exercise-2/configs/config-11.json b/exercise-2/configs/config-11.json new file mode 100644 index 0000000000000000000000000000000000000000..bf6dfd37d66cb45b79de0708eccdc5d53b795825 --- /dev/null +++ b/exercise-2/configs/config-11.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 2 +} \ No newline at end of file diff --git a/exercise-2/configs/config-12.json b/exercise-2/configs/config-12.json new file mode 100644 index 0000000000000000000000000000000000000000..77de27a8209c96c2216f631d821994937599e226 --- /dev/null +++ b/exercise-2/configs/config-12.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 4 +} \ No newline at end of file diff --git a/exercise-2/configs/config-13.json b/exercise-2/configs/config-13.json new file mode 100644 index 0000000000000000000000000000000000000000..8bd4da1f469710eb3bff82949d66ba0b0664bfaa --- /dev/null +++ b/exercise-2/configs/config-13.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 8 +} \ No newline at end of file diff --git a/exercise-2/configs/config-14.json b/exercise-2/configs/config-14.json new file mode 100644 index 0000000000000000000000000000000000000000..b9fc88817aa66dd3639e24ea7979468150d7bcbc --- /dev/null +++ b/exercise-2/configs/config-14.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 16 +} \ No newline at end of file diff --git a/exercise-2/configs/config-2.json b/exercise-2/configs/config-2.json new file mode 100644 index 0000000000000000000000000000000000000000..5b6226725bda861a91a66ff1d0e3c530f32fc75d --- /dev/null +++ b/exercise-2/configs/config-2.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 4 +} \ No newline at end of file diff --git a/exercise-2/configs/config-3.json b/exercise-2/configs/config-3.json new file mode 100644 index 0000000000000000000000000000000000000000..608234e89b96b01d857b9f93f23d40f315c1eedc --- /dev/null +++ b/exercise-2/configs/config-3.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 8 +} \ No newline at end of file diff --git a/exercise-2/configs/config-4.json b/exercise-2/configs/config-4.json new file mode 100644 index 0000000000000000000000000000000000000000..e45ed068ae7b0a9f43cd95d72b6c4bb79bc2807e --- /dev/null +++ b/exercise-2/configs/config-4.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 16 +} \ No newline at end of file diff --git a/exercise-2/configs/config-5.json b/exercise-2/configs/config-5.json new file mode 100644 index 0000000000000000000000000000000000000000..bacedff111f27625c81f09531d72d49b6a717e79 --- /dev/null +++ b/exercise-2/configs/config-5.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 1 +} \ No newline at end of file diff --git a/exercise-2/configs/config-6.json b/exercise-2/configs/config-6.json new file mode 100644 index 0000000000000000000000000000000000000000..9b18b1a9509bfc6419f840ea36651de78c8337dd --- /dev/null +++ b/exercise-2/configs/config-6.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 2 +} \ No newline at end of file diff --git a/exercise-2/configs/config-7.json b/exercise-2/configs/config-7.json new file mode 100644 index 0000000000000000000000000000000000000000..68ade58e6d9450ceddcf9b29b59634ad1f470f64 --- /dev/null +++ b/exercise-2/configs/config-7.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 4 +} \ No newline at end of file diff --git a/exercise-2/configs/config-8.json b/exercise-2/configs/config-8.json new file mode 100644 index 0000000000000000000000000000000000000000..5cde25dbcb24b15178f3cac5ff19ce53c3b2fc94 --- /dev/null +++ b/exercise-2/configs/config-8.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 8 +} \ No newline at end of file diff --git a/exercise-2/configs/config-9.json b/exercise-2/configs/config-9.json new file mode 100644 index 0000000000000000000000000000000000000000..a77d1e9b1899bb42a589e43e34fb5f20f23d51fd --- /dev/null +++ b/exercise-2/configs/config-9.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 16 +} \ No newline at end of file diff --git a/exercise-2/functions.py b/exercise-2/functions.py new file mode 100644 index 0000000000000000000000000000000000000000..fa37973e83d9af9459ded31a30dd019cbe1f892c --- /dev/null +++ b/exercise-2/functions.py @@ -0,0 +1,26 @@ +""" +You don't need to modify this file. +""" + +import json +import random +import time +from pathlib import Path + + +def train_model(learning_rate: float, batch_size: float, output_path: Path): + """ + "Trains" a machine learning model using the given parameters (`learning_rate` and `batch_size`) + and stores the result into `output_path`. + """ + print(f"Training model using learning_rate={learning_rate} and batch_size={batch_size}") + time.sleep(30 / batch_size) + + accuracy = 0.9 + (random.random() / 10) + + print(f"Model training finished, resulting accuracy: {accuracy}") + with open(output_path, "w") as f: + json.dump({ + "parameters": dict(learning_rate=learning_rate, batch_size=batch_size), + "accuracy": accuracy + }, f, indent=4) diff --git a/exercise-2/main.py b/exercise-2/main.py new file mode 100644 index 0000000000000000000000000000000000000000..b09f1f495d4b121ea6db5c947ad6b9d989804359 --- /dev/null +++ b/exercise-2/main.py @@ -0,0 +1,46 @@ +import glob +import shutil +from pathlib import Path + +from hyperqueue import Job, LocalCluster +from hyperqueue.visualization import visualize_job + +from tasks import postprocess_task, train_model_task + +if __name__ == "__main__": + # Spawn a HQ server + with LocalCluster() as cluster: + # Add a single HyperQueue worker to the server + cluster.start_worker() + + # Create a client and a job + client = cluster.client() + job = Job() + + # Directory where output will be stored + output_dir = Path("output") + shutil.rmtree(output_dir, ignore_errors=True) + output_dir.mkdir(parents=True, exist_ok=True) + + train_tasks = [] + result_files = [] + for (index, config_file) in enumerate(sorted(glob.glob("configs/*.json"))): + # TODO: add a Python function task to the job. The task should compute the + # `train_model_task` function. The path to the config file, and the path to a result + # file should be passed as arguments to the task. Create a unique result path for each + # task (e.g. output/<index>-result.json). + + # TODO: create the final postprocessing task, which should execute the `postprocessing_task` + # function. The task should receive a list of results from the model training and a path to + # the final result file (`postprocessing_result_path`) as arguments. + + + # Submit the job + submitted = client.submit(job) + + # Visualize the created job using the DOT format. + # You can render the graph using `$ xdot job.dot`. + visualize_job(job, "job.dot") + + # Wait until the job completes + client.wait_for_jobs([submitted]) diff --git a/exercise-2/tasks.py b/exercise-2/tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..511b5f68f9972c92c20842b79a8a4aa362a35930 --- /dev/null +++ b/exercise-2/tasks.py @@ -0,0 +1,31 @@ +import json +from pathlib import Path +from typing import List + +from functions import train_model + + +def train_model_task(config: Path, output_path: Path): + """ + TODO: read the JSON config from `config` and pass its parameters (along with `output_path`) + to the `train_model` function. + """ + + +def postprocess_task(result_files: List[Path], output_file: Path): + results = [] + for result in result_files: + with open(result) as f: + result = json.load(f) + results.append(result) + results = sorted(results, key=lambda r: r["accuracy"], reverse=True) + with open(output_file, "w") as f: + print("Training results sorted by accuracy:", file=f) + for result in results: + learning_rate = result["parameters"]["learning_rate"] + batch_size = result["parameters"]["batch_size"] + accuracy = result["accuracy"] + print( + f"Learning rate={learning_rate}, batch_size={batch_size}: {accuracy * 100.0:.2f}%", + file=f + )