diff --git a/12_omp_offload_vadd/README.md b/12_omp_offload_vadd/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d82e941c0a13e0899de0f6c874f7a16c8c92db4d
--- /dev/null
+++ b/12_omp_offload_vadd/README.md
@@ -0,0 +1,14 @@
+
+OpenMP offloading on AMD GPUs
+=============================
+
+This example demostrates how to use AOMP, which can compile programs that use OpenMP offloading.
+
+The `vadd.cpp` source file contains a simple vector add source code. On line 35 there begins a loop performing the vector addition, which is annotated by several OpenMP constructs. The `target` construct makes the code execute on the GPU, `map` informs OpenMP about what data transfers should be done. The `teams` construct creates a league of teams, and `distribute` splits the for loop iterations between all teams, a lot like dividing work between threadblocks in CUDA/HIP. `parallel for` then creates several threads, which together work on the team's loop iterations, just like threads in a threadblock.
+
+The code can be compiled using
+```
+aompcc vadd.cpp -o vadd.x
+```
+
+On machines with other than non-default GPU (default is Vega, gfx900), one would either `export AOMP_GPU=gfx908` or compile using `aompcc --offload-arch gfx908 vadd.cpp -o vadd.x` (for AMD Instinct MI100).
diff --git a/13_omp_offload_pi/Makefile b/13_omp_offload_pi/Makefile
index 422296dc7b04d6c97cb24f7cfaea3dbc8601d937..efaac82ca10a81c34e9360c40cf6da9d6b0f05b7 100644
--- a/13_omp_offload_pi/Makefile
+++ b/13_omp_offload_pi/Makefile
@@ -2,7 +2,7 @@
 CLANG=/opt/rocm/llvm/bin/clang++
 
 
-.PHONY: compile clean run pi_seq.x pi_omp.x pi_gpu_clang.x pi_gpu_aomp.x pi_hip.x
+.PHONY: compile clean run
 
 
 
@@ -12,11 +12,11 @@ clean:
 	rm -rf *.x
 
 run: compile
-	@echo "Sequential"                &&   ./pi_seq.x       1000000000    &&   echo
-	@echo "OpenMP"                    &&   ./pi_omp.x       10000000000   &&   echo
-	@echo "OpenMP offloading Clang"   &&   ./pi_gpu_clang.x 10000000000   &&   echo
-	@echo "OpenMP offloading AOMP"    &&   ./pi_gpu_aomp.x  10000000000   &&   echo
-	@echo "Classic HIP"               &&   ./pi_hip.x       10000000000   &&   echo
+	@echo "\nSequential, 10x less points" &&   ./pi_seq.x       1000000000
+	@echo "\nOpenMP"                      &&   ./pi_omp.x       10000000000
+	@echo "\nOpenMP offloading Clang"     &&   ./pi_gpu_clang.x 10000000000
+	@echo "\nOpenMP offloading AOMP"      &&   ./pi_gpu_aomp.x  10000000000
+	@echo "\nHIP"                         &&   ./pi_hip.x       10000000000
 
 
 
diff --git a/13_omp_offload_pi/README.md b/13_omp_offload_pi/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..214b71879530450cd3e9797b25ac172efc315329
--- /dev/null
+++ b/13_omp_offload_pi/README.md
@@ -0,0 +1,7 @@
+
+OpenMP offloading, comparison
+=============================
+
+This example compares different OpenMP parallelization techniques of a simple algorithm calculating $\pi$ based on numerical integration and the fact that $\pi = \int_0^1 \frac{4}{1+x^2} \; \mathrm{d} x$.
+
+The `pi_seq.cpp` source file contains sequential code of this algorithm, `pi_omp.cpp` is parallelized using OpenMP, `pi_omp_offload.cpp` uses OpenMP offloading, and `pi_hip.hip.cpp` is the same algorithm, but written in HIP. Compile the sources by `make` and run them all by `make run`. Watch how many different ways was the code compiled, what commands were used for the compilation, and compare the differences in computation time.