checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
---
2
2
SHA256:
3
- metadata.gz: a0fedb378361dced41add87745a7bb3a03201e4e2aa4d1d5a743634c9aec5f58
4
- data.tar.gz: 4932a8137f5c73351db21c830c9ae07893f2970bd9cebebab3f2d1b8855efa28
3
+ metadata.gz: 28fd9758cdc95babbcf72463572301d88e9691de7f90c927bd53db60dc2b46b4
4
+ data.tar.gz: a70e2704a22a10f1f2a68843794099dfaeb88d1ad08e0734f39ec1711f5975a3
5
5
SHA512:
6
- metadata.gz: 9e1d2c81c868905a32641f20376af5e9f16ecbbec96dbc61fd21b1168609abc91a5ba720560c23ec390244d8810efc22b08febc8576af53710bb30ca4a405209
7
- data.tar.gz: 57a55a6b535ab9366158db866bab743861579223277d115e723c9e344145f013907e8f2a8adcf5b9920f463c122cf8a5ba3f550d1721e8a9996fa1c5de86d622
6
+ metadata.gz: fcff6081b20d4d551a74eb094b1b6e5e61149a31bf367dbf693c88c12102b0c9fe032fe7c0766e09b1d5b63fc44393de128cc94c7d9a01e6c867814938fdf072
7
+ data.tar.gz: 10c62b40edbad82555426ab3ba2bd514ff7904e5e04c775aa22b5b2f63c0034ba2360a67f26a3e5aa53e160625834cc39e92a0ad78602af8e5ece025bbb0f0f9
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
# Data Forest Changelog
2
2
3
+ ## 0.0.2
4
+
5
+ No need to explicitly define leaf nodes if they are trivial.
6
+
3
7
## 0.0.1
4
8
5
9
Initial version. Allows you to define and execute basic data processing trees.
data/README.md CHANGED
@@ -1,18 +1,17 @@
1
1
# Data Forest
2
2
3
- Define and execute data processing trees.
3
+ Execute data processing jobs, defined as a dependency graph.
4
4
5
5
## Usage
6
6
7
- Define a data forest with a tree:
7
+ Define a data forest with a tree structure:
8
8
9
9
```ruby
10
10
DataForest.define(:my_forest) do
11
11
tree :my_tree do
12
12
root root_node: [:node_one, :node_two]
13
+
13
14
node node_one: :node_three
14
- node :node_two
15
- node :node_three
16
15
end
17
16
end
18
17
```
@@ -114,6 +113,7 @@ bundle exec rake test
114
113
```sh
115
114
bundle exec data-forest -f ./examples/simple_tree.rb run my_forest:simple_tree
116
115
bundle exec data-forest -f ./examples/valid_tree.rb run my_forest:valid_tree
116
+ bundle exec data-forest -f ./examples/mini_tree.rb run my_forest:mini_tree
117
117
```
118
118
119
119
## Publishing
data/lib/data-forest.rb CHANGED
@@ -5,7 +5,7 @@ require 'data-forest/job'
5
5
require 'data-forest/runner'
6
6
7
7
module DataForest
8
- VERSION = '0.0.1'
8
+ VERSION = '0.0.2'
9
9
10
10
FORESTS = {}
11
11
data/lib/data-forest/builder.rb CHANGED
@@ -31,6 +31,16 @@ module DataForest
31
31
@root = nil
32
32
33
33
instance_exec(&block)
34
+
35
+ remaining = []
36
+ @nodes.each do |_, node|
37
+ node.dependencies.each do |dependency|
38
+ remaining.push(dependency)
39
+ end
40
+ end
41
+ remaining -= @nodes.keys
42
+ remaining.each { |node| add_node_to_list(node) }
43
+
34
44
@trees[name] = Tree.new(name, @root, @nodes)
35
45
36
46
@nodes = nil
@@ -77,7 +87,7 @@ module DataForest
77
87
78
88
case args
79
89
when Symbol
80
- dependencies = nil
90
+ dependencies = []
81
91
when Hash
82
92
dependencies = case args[name]
83
93
when Symbol
data/lib/data-forest/forest.rb CHANGED
@@ -5,8 +5,36 @@ module DataForest
5
5
attr_reader :name, :trees
6
6
7
7
def initialize(name, trees)
8
+ validate_name!(name)
9
+ validate_trees!(trees)
10
+
8
11
@name = name
9
12
@trees = trees
10
13
end
14
+
15
+ private
16
+
17
+ def validate_name!(name)
18
+ error = ArgumentError
19
+ message = 'name is not a Symbol'
20
+
21
+ raise(error, message) unless name.is_a?(Symbol)
22
+ end
23
+
24
+ def validate_trees!(trees)
25
+ error = ArgumentError
26
+ message = 'trees is not a Hash'
27
+
28
+ raise(error, message) unless trees.is_a?(Hash)
29
+
30
+ trees.each do |key, value|
31
+ message = "tree key: #{key} is not a Symbol, but a #{key.class.name}"
32
+ raise(error, message) unless key.is_a?(Symbol)
33
+
34
+ message = "tree value: #{value} is not a DataForest::Tree, "\
35
+ "but a #{value.class.name}"
36
+ raise(error, message) unless value.is_a?(DataForest::Tree)
37
+ end
38
+ end
11
39
end
12
40
end
data/lib/data-forest/job.rb CHANGED
@@ -2,10 +2,12 @@
2
2
3
3
module DataForest
4
4
class Job
5
- def initialize
6
- @has_input = false
7
- @input = {}
5
+ def initialize(forest_name, tree_name)
6
+ @forest_name = forest_name
7
+ @tree_name = tree_name
8
8
9
+ @has_input = false
10
+ @input = {}
9
11
@has_output = false
10
12
@output = nil
11
13
end
data/lib/data-forest/node.rb CHANGED
@@ -2,12 +2,44 @@
2
2
3
3
module DataForest
4
4
class Node
5
- attr_reader :dependencies, :job_class
5
+ attr_reader :dependencies, :job_class, :name
6
6
7
7
def initialize(name, job_class, dependencies)
8
+ validate_name!(name)
9
+ validate_job_class!(job_class)
10
+ validate_dependencies!(dependencies)
11
+
8
12
@name = name
9
13
@job_class = job_class
10
14
@dependencies = dependencies
11
15
end
16
+
17
+ private
18
+
19
+ def validate_name!(name)
20
+ error = ArgumentError
21
+ message = 'name is not a Symbol'
22
+
23
+ raise(error, message) unless name.is_a?(Symbol)
24
+ end
25
+
26
+ def validate_job_class!(job)
27
+ error = ArgumentError
28
+ message = "node value: #{job} is not a DataForest::Job"
29
+
30
+ valid = job.is_a?(Class) && job.ancestors.include?(DataForest::Job)
31
+ raise(error, message) unless valid
32
+ end
33
+
34
+ def validate_dependencies!(dependencies)
35
+ error = ArgumentError
36
+ message = "dependencies: #{dependencies} is not an Array"
37
+ raise(error, message) unless dependencies.is_a?(Array)
38
+
39
+ dependencies.each do |dependency|
40
+ message = "dependencies value: #{dependency} is not a Symbol"
41
+ raise(error, message) unless dependency.is_a?(Symbol)
42
+ end
43
+ end
12
44
end
13
45
end
data/lib/data-forest/runner.rb CHANGED
@@ -53,7 +53,7 @@ module DataForest
53
53
54
54
node = tree.nodes[name]
55
55
job_class = node.job_class
56
- job = job_class.new
56
+ job = job_class.new(forest.name, tree.name)
57
57
58
58
if sources
59
59
sources.each do |source|
data/lib/data-forest/tree.rb CHANGED
@@ -5,9 +5,49 @@ module DataForest
5
5
attr_reader :name, :nodes, :root
6
6
7
7
def initialize(name, root, nodes)
8
+ validate_name!(name)
9
+ validate_nodes!(nodes)
10
+ validate_root!(root, nodes)
11
+
8
12
@name = name
9
13
@nodes = nodes
10
14
@root = root
11
15
end
16
+
17
+ private
18
+
19
+ def validate_name!(name)
20
+ error = ArgumentError
21
+ message = 'name is not a Symbol'
22
+
23
+ raise(error, message) unless name.is_a?(Symbol)
24
+ end
25
+
26
+ def validate_nodes!(nodes)
27
+ error = ArgumentError
28
+ message = 'nodes is not a Hash'
29
+
30
+ raise(error, message) unless nodes.is_a?(Hash)
31
+
32
+ nodes.each do |key, value|
33
+ message = "node key: #{key} is not a Symbol, but a #{key.class.name}"
34
+ raise(error, message) unless key.is_a?(Symbol)
35
+
36
+ message = "node value: #{value} is not a DataForest::Node, "\
37
+ "but a #{value.class.name}"
38
+ raise(error, message) unless value.is_a?(DataForest::Node)
39
+ end
40
+ end
41
+
42
+ def validate_root!(root, nodes)
43
+ return if root.nil? && nodes.empty?
44
+
45
+ error = ArgumentError
46
+ message = "root value: #{root} is not a Symbol, but a #{root.class.name}"
47
+ raise(error, message) unless root.is_a?(Symbol)
48
+
49
+ message = "root value: #{root} is not a key of the `nodes` hash"
50
+ raise(error, message) unless nodes.key?(root)
51
+ end
12
52
end
13
53
end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
--- !ruby/object:Gem::Specification
2
2
name: data-forest
3
3
version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
platform: ruby
6
6
authors:
7
7
- Codruț Constantin Gușoi
8
8
autorequire:
9
9
bindir: bin
10
10
cert_chain: []
11
- date: 2019-11-29 00:00:00.000000000 Z
11
+ date: 2019-12-02 00:00:00.000000000 Z
12
12
dependencies:
13
13
- !ruby/object:Gem::Dependency
14
14
name: bundler