From 6f31fcb7b80331877e8cfdab00b5f0f0ea08a75b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Wed, 22 Nov 2023 15:22:33 +0100
Subject: [PATCH] Add exercise 2

---
 README.md                         |  2 ++
 exercise-2/README.md              | 29 +++++++++++++++++++
 exercise-2/configs/config-0.json  |  4 +++
 exercise-2/configs/config-1.json  |  4 +++
 exercise-2/configs/config-10.json |  4 +++
 exercise-2/configs/config-11.json |  4 +++
 exercise-2/configs/config-12.json |  4 +++
 exercise-2/configs/config-13.json |  4 +++
 exercise-2/configs/config-14.json |  4 +++
 exercise-2/configs/config-2.json  |  4 +++
 exercise-2/configs/config-3.json  |  4 +++
 exercise-2/configs/config-4.json  |  4 +++
 exercise-2/configs/config-5.json  |  4 +++
 exercise-2/configs/config-6.json  |  4 +++
 exercise-2/configs/config-7.json  |  4 +++
 exercise-2/configs/config-8.json  |  4 +++
 exercise-2/configs/config-9.json  |  4 +++
 exercise-2/functions.py           | 26 +++++++++++++++++
 exercise-2/main.py                | 46 +++++++++++++++++++++++++++++++
 exercise-2/tasks.py               | 31 +++++++++++++++++++++
 20 files changed, 194 insertions(+)
 create mode 100644 exercise-2/README.md
 create mode 100644 exercise-2/configs/config-0.json
 create mode 100644 exercise-2/configs/config-1.json
 create mode 100644 exercise-2/configs/config-10.json
 create mode 100644 exercise-2/configs/config-11.json
 create mode 100644 exercise-2/configs/config-12.json
 create mode 100644 exercise-2/configs/config-13.json
 create mode 100644 exercise-2/configs/config-14.json
 create mode 100644 exercise-2/configs/config-2.json
 create mode 100644 exercise-2/configs/config-3.json
 create mode 100644 exercise-2/configs/config-4.json
 create mode 100644 exercise-2/configs/config-5.json
 create mode 100644 exercise-2/configs/config-6.json
 create mode 100644 exercise-2/configs/config-7.json
 create mode 100644 exercise-2/configs/config-8.json
 create mode 100644 exercise-2/configs/config-9.json
 create mode 100644 exercise-2/functions.py
 create mode 100644 exercise-2/main.py
 create mode 100644 exercise-2/tasks.py

diff --git a/README.md b/README.md
index b2d5f20..b786d92 100644
--- a/README.md
+++ b/README.md
@@ -8,5 +8,7 @@ This repository contains a few simple tasks for practicing work with HyperQueue.
 ## Exercises
 First, download and extract [HyperQueue](https://github.com/It4innovations/hyperqueue/releases/download/v0.17.0/hq-v0.17.0-linux-x64.tar.gz).
 
+Then you can try to implement the following exercises:
 - [Exercise 1](exercise-1)
 - [Exercise 1b](exercise-1b)
+- [Exercise 2](exercise-2)
diff --git a/exercise-2/README.md b/exercise-2/README.md
new file mode 100644
index 0000000..eb7b596
--- /dev/null
+++ b/exercise-2/README.md
@@ -0,0 +1,29 @@
+# Exercise 2
+The goal of this exercise is to execute a computational graph containing dependencies between
+individual tasks, using the HyperQueue [Python API](https://it4innovations.github.io/hyperqueue/stable/python/).
+
+For this task, you will need to have Python 3.6+ and the `hyperqueue` Python package available. You
+can install the package from [PyPi](https://pypi.org/project/hyperqueue/):
+```bash
+# Create a virtual environment
+$ python3 -m venv venv
+$ source venv/bin/activate
+(venv) $ python3 -m pip install -U setuptools wheel pip
+
+# Install the HyperQueue Python package
+(venv) $ python3 -m pip install hyperqueue[all]
+```
+
+The `configs` directory contains several configurations for training a machine learning model.
+Your goal is to train a model for each configuration, and then postprocess the results of all the
+trainings into a single result file, which will print the results of the individual trainings,
+sorted by accuracy.
+
+Perform the following tasks:
+1) Implement the `train_model_task` function in [`tasks.py`](tasks.py).
+2) Create HQ tasks for training the models and for performing the final postprocsesing in
+[`main.py`](main.py)
+
+## Checking the results
+After the job is completed, check the contents of the `output/result.txt` file. It should contain
+the training results of all model configurations.
diff --git a/exercise-2/configs/config-0.json b/exercise-2/configs/config-0.json
new file mode 100644
index 0000000..44e8eb8
--- /dev/null
+++ b/exercise-2/configs/config-0.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.01,
+    "batch_size": 1
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-1.json b/exercise-2/configs/config-1.json
new file mode 100644
index 0000000..b61c263
--- /dev/null
+++ b/exercise-2/configs/config-1.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.01,
+    "batch_size": 2
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-10.json b/exercise-2/configs/config-10.json
new file mode 100644
index 0000000..9f5f002
--- /dev/null
+++ b/exercise-2/configs/config-10.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.0001,
+    "batch_size": 1
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-11.json b/exercise-2/configs/config-11.json
new file mode 100644
index 0000000..bf6dfd3
--- /dev/null
+++ b/exercise-2/configs/config-11.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.0001,
+    "batch_size": 2
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-12.json b/exercise-2/configs/config-12.json
new file mode 100644
index 0000000..77de27a
--- /dev/null
+++ b/exercise-2/configs/config-12.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.0001,
+    "batch_size": 4
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-13.json b/exercise-2/configs/config-13.json
new file mode 100644
index 0000000..8bd4da1
--- /dev/null
+++ b/exercise-2/configs/config-13.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.0001,
+    "batch_size": 8
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-14.json b/exercise-2/configs/config-14.json
new file mode 100644
index 0000000..b9fc888
--- /dev/null
+++ b/exercise-2/configs/config-14.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.0001,
+    "batch_size": 16
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-2.json b/exercise-2/configs/config-2.json
new file mode 100644
index 0000000..5b62267
--- /dev/null
+++ b/exercise-2/configs/config-2.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.01,
+    "batch_size": 4
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-3.json b/exercise-2/configs/config-3.json
new file mode 100644
index 0000000..608234e
--- /dev/null
+++ b/exercise-2/configs/config-3.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.01,
+    "batch_size": 8
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-4.json b/exercise-2/configs/config-4.json
new file mode 100644
index 0000000..e45ed06
--- /dev/null
+++ b/exercise-2/configs/config-4.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.01,
+    "batch_size": 16
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-5.json b/exercise-2/configs/config-5.json
new file mode 100644
index 0000000..bacedff
--- /dev/null
+++ b/exercise-2/configs/config-5.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.001,
+    "batch_size": 1
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-6.json b/exercise-2/configs/config-6.json
new file mode 100644
index 0000000..9b18b1a
--- /dev/null
+++ b/exercise-2/configs/config-6.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.001,
+    "batch_size": 2
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-7.json b/exercise-2/configs/config-7.json
new file mode 100644
index 0000000..68ade58
--- /dev/null
+++ b/exercise-2/configs/config-7.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.001,
+    "batch_size": 4
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-8.json b/exercise-2/configs/config-8.json
new file mode 100644
index 0000000..5cde25d
--- /dev/null
+++ b/exercise-2/configs/config-8.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.001,
+    "batch_size": 8
+}
\ No newline at end of file
diff --git a/exercise-2/configs/config-9.json b/exercise-2/configs/config-9.json
new file mode 100644
index 0000000..a77d1e9
--- /dev/null
+++ b/exercise-2/configs/config-9.json
@@ -0,0 +1,4 @@
+{
+    "learning_rate": 0.001,
+    "batch_size": 16
+}
\ No newline at end of file
diff --git a/exercise-2/functions.py b/exercise-2/functions.py
new file mode 100644
index 0000000..fa37973
--- /dev/null
+++ b/exercise-2/functions.py
@@ -0,0 +1,26 @@
+"""
+You don't need to modify this file.
+"""
+
+import json
+import random
+import time
+from pathlib import Path
+
+
+def train_model(learning_rate: float, batch_size: float, output_path: Path):
+    """
+    "Trains" a machine learning model using the given parameters (`learning_rate` and `batch_size`)
+    and stores the result into `output_path`.
+    """
+    print(f"Training model using learning_rate={learning_rate} and batch_size={batch_size}")
+    time.sleep(30 / batch_size)
+
+    accuracy = 0.9 + (random.random() / 10)
+
+    print(f"Model training finished, resulting accuracy: {accuracy}")
+    with open(output_path, "w") as f:
+        json.dump({
+            "parameters": dict(learning_rate=learning_rate, batch_size=batch_size),
+            "accuracy": accuracy
+        }, f, indent=4)
diff --git a/exercise-2/main.py b/exercise-2/main.py
new file mode 100644
index 0000000..b09f1f4
--- /dev/null
+++ b/exercise-2/main.py
@@ -0,0 +1,46 @@
+import glob
+import shutil
+from pathlib import Path
+
+from hyperqueue import Job, LocalCluster
+from hyperqueue.visualization import visualize_job
+
+from tasks import postprocess_task, train_model_task
+
+if __name__ == "__main__":
+    # Spawn a HQ server
+    with LocalCluster() as cluster:
+        # Add a single HyperQueue worker to the server
+        cluster.start_worker()
+
+        # Create a client and a job
+        client = cluster.client()
+        job = Job()
+
+        # Directory where output will be stored
+        output_dir = Path("output")
+        shutil.rmtree(output_dir, ignore_errors=True)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        train_tasks = []
+        result_files = []
+        for (index, config_file) in enumerate(sorted(glob.glob("configs/*.json"))):
+            # TODO: add a Python function task to the job. The task should compute the
+            # `train_model_task` function. The path to the config file, and the path to a result
+            # file should be passed as arguments to the task. Create a unique result path for each
+            # task (e.g. output/<index>-result.json).
+
+        # TODO: create the final postprocessing task, which should execute the `postprocessing_task`
+        # function. The task should receive a list of results from the model training and a path to
+        # the final result file (`postprocessing_result_path`) as arguments.
+
+
+        # Submit the job
+        submitted = client.submit(job)
+
+        # Visualize the created job using the DOT format.
+        # You can render the graph using `$ xdot job.dot`.
+        visualize_job(job, "job.dot")
+
+        # Wait until the job completes
+        client.wait_for_jobs([submitted])
diff --git a/exercise-2/tasks.py b/exercise-2/tasks.py
new file mode 100644
index 0000000..511b5f6
--- /dev/null
+++ b/exercise-2/tasks.py
@@ -0,0 +1,31 @@
+import json
+from pathlib import Path
+from typing import List
+
+from functions import train_model
+
+
+def train_model_task(config: Path, output_path: Path):
+    """
+    TODO: read the JSON config from `config` and pass its parameters (along with `output_path`)
+    to the `train_model` function.
+    """
+
+
+def postprocess_task(result_files: List[Path], output_file: Path):
+    results = []
+    for result in result_files:
+        with open(result) as f:
+            result = json.load(f)
+            results.append(result)
+    results = sorted(results, key=lambda r: r["accuracy"], reverse=True)
+    with open(output_file, "w") as f:
+        print("Training results sorted by accuracy:", file=f)
+        for result in results:
+            learning_rate = result["parameters"]["learning_rate"]
+            batch_size = result["parameters"]["batch_size"]
+            accuracy = result["accuracy"]
+            print(
+                f"Learning rate={learning_rate}, batch_size={batch_size}: {accuracy * 100.0:.2f}%",
+                file=f
+            )
-- 
GitLab