Commit f7a48e01 authored by Stanislav Bohm's avatar Stanislav Bohm

ENH: new mapping files for "run" task

parent 7caba4cf
......@@ -18,64 +18,13 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='loomrun.proto',
package='loomrun',
serialized_pb=_b('\n\rloomrun.proto\x12\x07loomrun\"X\n\x07MapFile\x12\x10\n\x08\x66ilename\x18\x01 \x02(\t\x12\x13\n\x0binput_index\x18\x02 \x02(\x05\x12\x14\n\x0coutput_index\x18\x03 \x02(\x05\x12\x10\n\x08variable\x18\x04 \x01(\t\"3\n\x03Run\x12\x0c\n\x04\x61rgs\x18\x01 \x03(\t\x12\x1e\n\x04maps\x18\x02 \x03(\x0b\x32\x10.loomrun.MapFileB\x02H\x03')
serialized_pb=_b('\n\rloomrun.proto\x12\x07loomrun\"<\n\x03Run\x12\x0c\n\x04\x61rgs\x18\x01 \x03(\t\x12\x12\n\nmap_inputs\x18\x02 \x03(\t\x12\x13\n\x0bmap_outputs\x18\x03 \x03(\tB\x02H\x03')
)
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
_MAPFILE = _descriptor.Descriptor(
name='MapFile',
full_name='loomrun.MapFile',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='filename', full_name='loomrun.MapFile.filename', index=0,
number=1, type=9, cpp_type=9, label=2,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='input_index', full_name='loomrun.MapFile.input_index', index=1,
number=2, type=5, cpp_type=1, label=2,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='output_index', full_name='loomrun.MapFile.output_index', index=2,
number=3, type=5, cpp_type=1, label=2,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='variable', full_name='loomrun.MapFile.variable', index=3,
number=4, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
extension_ranges=[],
oneofs=[
],
serialized_start=26,
serialized_end=114,
)
_RUN = _descriptor.Descriptor(
name='Run',
full_name='loomrun.Run',
......@@ -91,8 +40,15 @@ _RUN = _descriptor.Descriptor(
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='maps', full_name='loomrun.Run.maps', index=1,
number=2, type=11, cpp_type=10, label=3,
name='map_inputs', full_name='loomrun.Run.map_inputs', index=1,
number=2, type=9, cpp_type=9, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='map_outputs', full_name='loomrun.Run.map_outputs', index=2,
number=3, type=9, cpp_type=9, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
......@@ -108,21 +64,12 @@ _RUN = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=116,
serialized_end=167,
serialized_start=26,
serialized_end=86,
)
_RUN.fields_by_name['maps'].message_type = _MAPFILE
DESCRIPTOR.message_types_by_name['MapFile'] = _MAPFILE
DESCRIPTOR.message_types_by_name['Run'] = _RUN
MapFile = _reflection.GeneratedProtocolMessageType('MapFile', (_message.Message,), dict(
DESCRIPTOR = _MAPFILE,
__module__ = 'loomrun_pb2'
# @@protoc_insertion_point(class_scope:loomrun.MapFile)
))
_sym_db.RegisterMessage(MapFile)
Run = _reflection.GeneratedProtocolMessageType('Run', (_message.Message,), dict(
DESCRIPTOR = _RUN,
__module__ = 'loomrun_pb2'
......
......@@ -139,6 +139,8 @@ class Plan(object):
TASK_ARRAY_MAKE = "array/make"
TASK_ARRAY_GET = "array/get"
TASK_RUN = "run"
u64 = struct.Struct("<Q")
def __init__(self):
......@@ -164,9 +166,23 @@ class Plan(object):
def task_split_lines(self, input, start, end):
return self.add(SplitLinesTask(input, start, end))
def task_run(self, args, stdin=None, stdout=None, variable=None):
return self.add(
RunTask(args, stdin=stdin, stdout=stdout, variable=variable))
def task_run(self, args, inputs=(), outputs=(None,)):
if isinstance(args, str):
args = args.split()
task = Task()
task.task_type = self.TASK_RUN
task.inputs = tuple(i for i, fname in inputs)
msg = loomrun_pb2.Run()
msg.args.extend(args)
msg.map_inputs.extend(fname if fname else "+in"
for i, fname in inputs)
msg.map_outputs.extend(fname if fname else "+out"
for fname in outputs)
task.config = msg.SerializeToString()
return self.add(task)
def task_array_make(self, inputs):
task = Task()
......
......@@ -4,6 +4,7 @@
#include "log.h"
#include "types.h"
#include "data/rawdata.h"
#include "data/array.h"
#include <stdlib.h>
#include <sstream>
......@@ -67,6 +68,10 @@ Worker::Worker(uv_loop_t *loop,
add_unpacker(RawData::TYPE_ID,
std::make_unique<SimpleUnpackFactory<RawDataUnpacker>>());
add_unpacker(Array::TYPE_ID,
std::make_unique<SimpleUnpackFactory<ArrayUnpacker>>());
resource_cpus = 1;
}
......
......@@ -2,14 +2,8 @@ package loomrun;
option optimize_for = LITE_RUNTIME;
message MapFile {
required string filename = 1;
required int32 input_index = 2; // -2 = no mapping
required int32 output_index = 3; // -2 = no mapping, -1 = task_id
optional string variable = 4;
}
message Run {
repeated string args = 1;
repeated MapFile maps = 2;
repeated string map_inputs = 2;
repeated string map_outputs = 3;
}
......@@ -175,7 +175,6 @@ void TaskManager::distribute_work(TaskNode::Vector &tasks)
input->add_owner(&load.connection);
}
}
llog->alert("X = {}", task->get_id());
load.connection.send_task(task);
}
}
......
......@@ -32,8 +32,8 @@ void MergeTask::start(DataVector &inputs) {
for (auto& data : inputs) {
char *mem = (*data)->get_raw_data(worker);
assert(mem);
size_t size = (*data)->get_size();
assert(mem || size == 0);
memcpy(dst, mem, size);
dst += size;
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -17,26 +17,24 @@ def test_cv_iris(loom_env):
p = loom_env.plan()
a = p.task_open(IRIS_DATA)
b = p.task_run(("sort", "--random-sort", "-"), stdin=a)
b = p.task_run(("sort", "--random-sort", "-"), [(a, None)])
chunks = [p.task_split_lines(b, i * CHUNK_SIZE, (i + 1) * CHUNK_SIZE)
for i in xrange(CHUNKS)]
trainsets = [p.task_merge(chunks[:i] + chunks[i+1:])
trainsets = [p.task_merge(chunks[:i] + chunks[i + 1:])
for i in xrange(CHUNKS)]
models = []
for ts in trainsets:
model = p.task_run("svm-train data")
model.map_file_in(ts, "data")
model.map_file_out("data.model")
model = p.task_run("svm-train data",
[(ts, "data")], ["data.model"])
models.append(model)
predict = []
for chunk, model in zip(chunks, models):
task = p.task_run("svm-predict testdata model out")
task.map_file_in(chunk, "testdata")
task.map_file_in(model, "model")
task = p.task_run("svm-predict testdata model out",
[(chunk, "testdata"), (model, "model")])
predict.append(task)
results = loom_env.submit(p, predict)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment