From 6f31fcb7b80331877e8cfdab00b5f0f0ea08a75b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com> Date: Wed, 22 Nov 2023 15:22:33 +0100 Subject: [PATCH] Add exercise 2 --- README.md | 2 ++ exercise-2/README.md | 29 +++++++++++++++++++ exercise-2/configs/config-0.json | 4 +++ exercise-2/configs/config-1.json | 4 +++ exercise-2/configs/config-10.json | 4 +++ exercise-2/configs/config-11.json | 4 +++ exercise-2/configs/config-12.json | 4 +++ exercise-2/configs/config-13.json | 4 +++ exercise-2/configs/config-14.json | 4 +++ exercise-2/configs/config-2.json | 4 +++ exercise-2/configs/config-3.json | 4 +++ exercise-2/configs/config-4.json | 4 +++ exercise-2/configs/config-5.json | 4 +++ exercise-2/configs/config-6.json | 4 +++ exercise-2/configs/config-7.json | 4 +++ exercise-2/configs/config-8.json | 4 +++ exercise-2/configs/config-9.json | 4 +++ exercise-2/functions.py | 26 +++++++++++++++++ exercise-2/main.py | 46 +++++++++++++++++++++++++++++++ exercise-2/tasks.py | 31 +++++++++++++++++++++ 20 files changed, 194 insertions(+) create mode 100644 exercise-2/README.md create mode 100644 exercise-2/configs/config-0.json create mode 100644 exercise-2/configs/config-1.json create mode 100644 exercise-2/configs/config-10.json create mode 100644 exercise-2/configs/config-11.json create mode 100644 exercise-2/configs/config-12.json create mode 100644 exercise-2/configs/config-13.json create mode 100644 exercise-2/configs/config-14.json create mode 100644 exercise-2/configs/config-2.json create mode 100644 exercise-2/configs/config-3.json create mode 100644 exercise-2/configs/config-4.json create mode 100644 exercise-2/configs/config-5.json create mode 100644 exercise-2/configs/config-6.json create mode 100644 exercise-2/configs/config-7.json create mode 100644 exercise-2/configs/config-8.json create mode 100644 exercise-2/configs/config-9.json create mode 100644 exercise-2/functions.py create mode 100644 exercise-2/main.py create mode 100644 exercise-2/tasks.py diff --git a/README.md b/README.md index b2d5f20..b786d92 100644 --- a/README.md +++ b/README.md @@ -8,5 +8,7 @@ This repository contains a few simple tasks for practicing work with HyperQueue. ## Exercises First, download and extract [HyperQueue](https://github.com/It4innovations/hyperqueue/releases/download/v0.17.0/hq-v0.17.0-linux-x64.tar.gz). +Then you can try to implement the following exercises: - [Exercise 1](exercise-1) - [Exercise 1b](exercise-1b) +- [Exercise 2](exercise-2) diff --git a/exercise-2/README.md b/exercise-2/README.md new file mode 100644 index 0000000..eb7b596 --- /dev/null +++ b/exercise-2/README.md @@ -0,0 +1,29 @@ +# Exercise 2 +The goal of this exercise is to execute a computational graph containing dependencies between +individual tasks, using the HyperQueue [Python API](https://it4innovations.github.io/hyperqueue/stable/python/). + +For this task, you will need to have Python 3.6+ and the `hyperqueue` Python package available. You +can install the package from [PyPi](https://pypi.org/project/hyperqueue/): +```bash +# Create a virtual environment +$ python3 -m venv venv +$ source venv/bin/activate +(venv) $ python3 -m pip install -U setuptools wheel pip + +# Install the HyperQueue Python package +(venv) $ python3 -m pip install hyperqueue[all] +``` + +The `configs` directory contains several configurations for training a machine learning model. +Your goal is to train a model for each configuration, and then postprocess the results of all the +trainings into a single result file, which will print the results of the individual trainings, +sorted by accuracy. + +Perform the following tasks: +1) Implement the `train_model_task` function in [`tasks.py`](tasks.py). +2) Create HQ tasks for training the models and for performing the final postprocsesing in +[`main.py`](main.py) + +## Checking the results +After the job is completed, check the contents of the `output/result.txt` file. It should contain +the training results of all model configurations. diff --git a/exercise-2/configs/config-0.json b/exercise-2/configs/config-0.json new file mode 100644 index 0000000..44e8eb8 --- /dev/null +++ b/exercise-2/configs/config-0.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 1 +} \ No newline at end of file diff --git a/exercise-2/configs/config-1.json b/exercise-2/configs/config-1.json new file mode 100644 index 0000000..b61c263 --- /dev/null +++ b/exercise-2/configs/config-1.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 2 +} \ No newline at end of file diff --git a/exercise-2/configs/config-10.json b/exercise-2/configs/config-10.json new file mode 100644 index 0000000..9f5f002 --- /dev/null +++ b/exercise-2/configs/config-10.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 1 +} \ No newline at end of file diff --git a/exercise-2/configs/config-11.json b/exercise-2/configs/config-11.json new file mode 100644 index 0000000..bf6dfd3 --- /dev/null +++ b/exercise-2/configs/config-11.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 2 +} \ No newline at end of file diff --git a/exercise-2/configs/config-12.json b/exercise-2/configs/config-12.json new file mode 100644 index 0000000..77de27a --- /dev/null +++ b/exercise-2/configs/config-12.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 4 +} \ No newline at end of file diff --git a/exercise-2/configs/config-13.json b/exercise-2/configs/config-13.json new file mode 100644 index 0000000..8bd4da1 --- /dev/null +++ b/exercise-2/configs/config-13.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 8 +} \ No newline at end of file diff --git a/exercise-2/configs/config-14.json b/exercise-2/configs/config-14.json new file mode 100644 index 0000000..b9fc888 --- /dev/null +++ b/exercise-2/configs/config-14.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.0001, + "batch_size": 16 +} \ No newline at end of file diff --git a/exercise-2/configs/config-2.json b/exercise-2/configs/config-2.json new file mode 100644 index 0000000..5b62267 --- /dev/null +++ b/exercise-2/configs/config-2.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 4 +} \ No newline at end of file diff --git a/exercise-2/configs/config-3.json b/exercise-2/configs/config-3.json new file mode 100644 index 0000000..608234e --- /dev/null +++ b/exercise-2/configs/config-3.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 8 +} \ No newline at end of file diff --git a/exercise-2/configs/config-4.json b/exercise-2/configs/config-4.json new file mode 100644 index 0000000..e45ed06 --- /dev/null +++ b/exercise-2/configs/config-4.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.01, + "batch_size": 16 +} \ No newline at end of file diff --git a/exercise-2/configs/config-5.json b/exercise-2/configs/config-5.json new file mode 100644 index 0000000..bacedff --- /dev/null +++ b/exercise-2/configs/config-5.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 1 +} \ No newline at end of file diff --git a/exercise-2/configs/config-6.json b/exercise-2/configs/config-6.json new file mode 100644 index 0000000..9b18b1a --- /dev/null +++ b/exercise-2/configs/config-6.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 2 +} \ No newline at end of file diff --git a/exercise-2/configs/config-7.json b/exercise-2/configs/config-7.json new file mode 100644 index 0000000..68ade58 --- /dev/null +++ b/exercise-2/configs/config-7.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 4 +} \ No newline at end of file diff --git a/exercise-2/configs/config-8.json b/exercise-2/configs/config-8.json new file mode 100644 index 0000000..5cde25d --- /dev/null +++ b/exercise-2/configs/config-8.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 8 +} \ No newline at end of file diff --git a/exercise-2/configs/config-9.json b/exercise-2/configs/config-9.json new file mode 100644 index 0000000..a77d1e9 --- /dev/null +++ b/exercise-2/configs/config-9.json @@ -0,0 +1,4 @@ +{ + "learning_rate": 0.001, + "batch_size": 16 +} \ No newline at end of file diff --git a/exercise-2/functions.py b/exercise-2/functions.py new file mode 100644 index 0000000..fa37973 --- /dev/null +++ b/exercise-2/functions.py @@ -0,0 +1,26 @@ +""" +You don't need to modify this file. +""" + +import json +import random +import time +from pathlib import Path + + +def train_model(learning_rate: float, batch_size: float, output_path: Path): + """ + "Trains" a machine learning model using the given parameters (`learning_rate` and `batch_size`) + and stores the result into `output_path`. + """ + print(f"Training model using learning_rate={learning_rate} and batch_size={batch_size}") + time.sleep(30 / batch_size) + + accuracy = 0.9 + (random.random() / 10) + + print(f"Model training finished, resulting accuracy: {accuracy}") + with open(output_path, "w") as f: + json.dump({ + "parameters": dict(learning_rate=learning_rate, batch_size=batch_size), + "accuracy": accuracy + }, f, indent=4) diff --git a/exercise-2/main.py b/exercise-2/main.py new file mode 100644 index 0000000..b09f1f4 --- /dev/null +++ b/exercise-2/main.py @@ -0,0 +1,46 @@ +import glob +import shutil +from pathlib import Path + +from hyperqueue import Job, LocalCluster +from hyperqueue.visualization import visualize_job + +from tasks import postprocess_task, train_model_task + +if __name__ == "__main__": + # Spawn a HQ server + with LocalCluster() as cluster: + # Add a single HyperQueue worker to the server + cluster.start_worker() + + # Create a client and a job + client = cluster.client() + job = Job() + + # Directory where output will be stored + output_dir = Path("output") + shutil.rmtree(output_dir, ignore_errors=True) + output_dir.mkdir(parents=True, exist_ok=True) + + train_tasks = [] + result_files = [] + for (index, config_file) in enumerate(sorted(glob.glob("configs/*.json"))): + # TODO: add a Python function task to the job. The task should compute the + # `train_model_task` function. The path to the config file, and the path to a result + # file should be passed as arguments to the task. Create a unique result path for each + # task (e.g. output/<index>-result.json). + + # TODO: create the final postprocessing task, which should execute the `postprocessing_task` + # function. The task should receive a list of results from the model training and a path to + # the final result file (`postprocessing_result_path`) as arguments. + + + # Submit the job + submitted = client.submit(job) + + # Visualize the created job using the DOT format. + # You can render the graph using `$ xdot job.dot`. + visualize_job(job, "job.dot") + + # Wait until the job completes + client.wait_for_jobs([submitted]) diff --git a/exercise-2/tasks.py b/exercise-2/tasks.py new file mode 100644 index 0000000..511b5f6 --- /dev/null +++ b/exercise-2/tasks.py @@ -0,0 +1,31 @@ +import json +from pathlib import Path +from typing import List + +from functions import train_model + + +def train_model_task(config: Path, output_path: Path): + """ + TODO: read the JSON config from `config` and pass its parameters (along with `output_path`) + to the `train_model` function. + """ + + +def postprocess_task(result_files: List[Path], output_file: Path): + results = [] + for result in result_files: + with open(result) as f: + result = json.load(f) + results.append(result) + results = sorted(results, key=lambda r: r["accuracy"], reverse=True) + with open(output_file, "w") as f: + print("Training results sorted by accuracy:", file=f) + for result in results: + learning_rate = result["parameters"]["learning_rate"] + batch_size = result["parameters"]["batch_size"] + accuracy = result["accuracy"] + print( + f"Learning rate={learning_rate}, batch_size={batch_size}: {accuracy * 100.0:.2f}%", + file=f + ) -- GitLab